 |
|
 |
I understand that this article is somewhat old. For those who'd like to have a full support for Unicode you can use this:
(And it is also much more easy to understand. There's no need to scrimp on memory and speed in this case.)
CString EscapeURL(LPCTSTR pStrURL)
{
ASSERT(pStrURL);
CString strOut;
int nLn = lstrlen(pStrURL);
for(int i = 0; i < nLn; i++)
{
TCHAR z = pStrURL[i];
if(z == '\\')
{
strOut += _T('/');
}
else if(z == '/' || z == ':' ||
z == '.' ||
z == '_' ||
(z < 0x100 && _istalnum(z)))
{
strOut += z;
}
else if(z < 0x100)
{
strOut.AppendFormat(_T("%%%x%x"),
(BYTE)(z >> 4),
(BYTE)(z & 0xf));
}
else
{
ASSERT(z >= 0x100);
strOut.AppendFormat(_T("&#%u;"),
z);
}
}
return strOut;
}
|
|
|
|
 |
|
 |
why does URLEncode1 have
if (!isalnum(*pInTmp) && !isalnum(*pInTmp)) ?
is this an error, something supposed to be a double check, in case program is run on computer without ECC ?
|
|
|
|
 |
|
 |
<pre><code> CString URLEncode(CString sIn) { CString sOut; const int nLen = sIn.GetLength() + 1; register LPTSTR pOutTmp = NULL; LPTSTR pOutBuf = NULL; register LPTSTR pInTmp = NULL; LPTSTR pInBuf =(LPTSTR)sIn.GetBuffer(nLen); //BYTE b = 0; //alloc out buffer pOutBuf = (LPTSTR)sOut.GetBuffer(nLen * 3 - 2);//new BYTE [nLen * 3]; if(pOutBuf) { pInTmp = pInBuf; pOutTmp = pOutBuf; // do encoding while (*pInTmp) { if(isalnum(*pInTmp)) *pOutTmp++ = *pInTmp; else if(isspace(*pInTmp) && ((*pInTmp!='\n') && (*pInTmp!='\r'))) *pOutTmp++ = '+'; else { *pOutTmp++ = '%'; *pOutTmp++ = toHex(*pInTmp>>4); *pOutTmp++ = toHex(*pInTmp%16); } pInTmp++; } *pOutTmp = '\0'; //sOut=pOutBuf; //delete [] pOutBuf; sOut.ReleaseBuffer(); } sIn.ReleaseBuffer(); return sOut; } </code></pre>
|
|
|
|
 |
|
 |
CString URLEncode2(CString sIn)
{
CString sOut;
const int nLen = sIn.GetLength();
// do encoding
for (int i = 0; i >4))));
sOut.AppendChar(_TCHAR(toHex((BYTE)(ic%16))));
}
}
sOut.AppendChar(_T('\0'));
return sOut;
}
I think that it works.
Tell me if you have problems.
|
|
|
|
 |
|
 |
I had a bug in the last post.
inline TCHAR toHex(const BYTE &x)
{
return x > 9 ? _T('A') + x-10: _T('0') + x;
}
CString URLEncode2(CString sIn)
{
CString sOut;
const int nLen = sIn.GetLength();
// do encoding
for (int i = 0; i >4)));
sOut.AppendChar(toHex((BYTE)(ic%16)));
}
}
sOut.AppendChar(_T('\0'));
return sOut;
}
|
|
|
|
 |
|
 |
Could anyone post a Unicode version of URLEncode?
|
|
|
|
 |
|
 |
great code, thanks
only thing I was missing is support for linefeeds (CR LF)
As a workaround, I modified:
// do encoding
while (*pInTmp)
{
if(isalnum(*pInTmp))
*pOutTmp++ = *pInTmp;
else
if( isspace(*pInTmp) &&
((*pInTmp!='\n') && // new
(*pInTmp!='\r')) ) // new
*pOutTmp++ = '+';
else
{
*pOutTmp++ = '%';
*pOutTmp++ = toHex(*pInTmp>>4);
*pOutTmp++ = toHex(*pInTmp%16);
}
pInTmp++;
}
that way, both '\r' and '\n' (CR LF) will be converted
properly to %0D%0A.
apart from that, thanks again.
|
|
|
|
 |
|
 |
in function urlencode1:
if (!isalnum(*pInTmp) && !isalnum(*pInTmp))
it should be:
if (!isalnum(*pInTmp) && !isspace(*pInTmp))
yes ?
|
|
|
|
 |
|
 |
how to converts a string that has been encoded for transmission in a URL into a decoded string?
|
|
|
|
 |
|
 |
// guess rafaelcn already found answer, but // I use something like this // helper to convert %XX to chars inline char x2c ( unsigned char* in) { register char digit; digit = ( (*in) >= 'A' ? (((*in) & 0xdf) - 'A')+10 : ((*in) - '0')); digit *= 16; in++; digit += ((*in) >= 'A' ? (((*in) & 0xdf) - 'A')+10 : ((*in) - '0')); return(digit); } //do the work std::string URLdecode( const std::string& in ) { int len = in.size(); unsigned char* inBuff = (unsigned char*)(in.c_str()); unsigned char* out = new unsigned char[len+1]; unsigned char* outBuff = out; while( *inBuff ) { if ( *inBuff == '%' ) { inBuff++; *outBuff = x2c(inBuff); inBuff++; } else { *outBuff=*inBuff; } outBuff++; inBuff++; } *outBuff =0; std::string ret ((char*)out); delete[] out; return ret; } // optimize away use of std::string or // convert to CString/whatever as needed, // but: The real question now is where you have your URLencoded string from. Could it be encoded in UTF-8, as URL string nowadays tend to, e.g. if send from browser to server? Hope your App is UTF-8 aware
|
|
|
|
 |
|
 |
It looks to me like those functions are not Unicode aware..
--
Alex Marbus
www.marbus.net
But then again, I could be wrong.
|
|
|
|
 |
|
 |
the hex function is not portable
pamela
|
|
|
|
 |
|
 |
You would have thought you'd have made a bigger deal over the MFC CStrings.
But hey...
|
|
|
|
 |
|
 |
AS one of learner,I want to add a dialog to verify whether a user is legal before Windows98's Explore working ,which is like NT's verification dialog.
But I don't know how to do,anyone can help me? Thanks!
|
|
|
|
 |
|
 |
Why not use the standard InternetCanonicalizeUrl function?
|
|
|
|
 |
|
 |
I use presented functions in WinSock application. InternetCanonicalizeUrl function is from wininet.dll. I encode XML data and I need fast functions. My function is faster than the standard InternetCanonicalizeUrl.
|
|
|
|
 |
|
 |
Why should you use additonal library just for one function?
|
|
|
|
 |
|
 |
I had problems with InternetCanonicalizeUrl if the XML file was massive. Maybe the function is only expecting URLs, not form data?
This URLEncode function works great, however
|
|
|
|
 |
|
 |
The line:
pOutBuf = new BYTE [nLen + 3 * k];
Should be:
pOutBuf = new BYTE [nLen + 2 * k];
since we are already allocating one byte for the "original" byte and thus only need two additional bytes for the characters needing encoding.
Also, as someone else has noted, you might as well use .GetBuffer on the output string since that avoids a redundant allocation and memcpy.
Marc
|
|
|
|
 |
|
 |
Hi,
why don't you use
outBuf = (LPBYTE)sOut.GetBuffer(nLen*3)
...
sOut.ReleaseBuffer(reallen);
instead of
outBuf = new BYTE[nLen*3];
...
sOut = outBuf;
delete []outBuf;
this would save memory and a memory copy which is realy slow.
Ciao Matthias
|
|
|
|
 |
|
 |
Hi,
why don't you use
outBuf = (LPBYTE)sOut.GetBuffer(nLen*3)
...
sOut.ReleaseBuffer(reallen);
instead of
outBuf = new BYTE[nLen*3];
...
sOut = outBuf;
delete []outBuf;
this would save memory and a memory copy which is realy slow.
Ciao Matthias
|
|
|
|
 |
|
 |
Heya,
when doing fast conversions like this I usually uses the _alloca() routine. It allocates memory on the stack instead of the heap, which usually makes it faster. And also when the function runs out of scope it automaticallu frees the stack and so the allocated item.
Sample:
pOutBuf = (LPBYTE)_alloca(nLen * 3); //automatically frees when running out of scope
(It is suitable for smaller allocations, since the stack is limited... )
Cheers
/WW
|
|
|
|
 |