Click here to Skip to main content
Email Password   helpLost your password?

URLEncode

I use this functions to prepare POST strings with XML data. The first function URLEncode1 uses less memory but is slower than the second URLEncode2. Both functions return a CString and get a CString as the input parameter.

The demo project contains sample usage with execution time of presented functions.

Source

Helper function

inline BYTE toHex(const BYTE &x)
{
	return x > 9 ? x + 55: x + 48;
}

URLEncode1

CString URLEncode1(CString sIn)
{
    CString sOut;
	
    int k;
    const int nLen = sIn.GetLength() + 1;

    register LPBYTE pOutTmp = NULL;
    LPBYTE pOutBuf = NULL;
    register LPBYTE pInTmp = NULL;
    LPBYTE pInBuf =(LPBYTE)sIn.GetBuffer(nLen);
    BYTE b = 0;

    //count not alphanumeric characters

    k = 0;
	
    pInTmp = pInBuf;
    while(*pInTmp)
    {
        if (!isalnum(*pInTmp) && !isalnum(*pInTmp))
            k++;
        pInTmp++;
    }

    //alloc out buffer

    pOutBuf = (LPBYTE)sOut.GetBuffer(nLen  + 2 * k); //new BYTE [nLen  + 3 * k];


    if(pOutBuf)
    {
        pInTmp	= pInBuf;
	pOutTmp = pOutBuf;
		
	// do encoding

	while (*pInTmp)
        {
	    if(isalnum(*pInTmp))
                *pOutTmp++ = *pInTmp;
	    else
		if(isspace(*pInTmp))
		    *pOutTmp++ = '+';
		else
		{
		    *pOutTmp++ = '%';
		    *pOutTmp++ = toHex(*pInTmp>>4);
		     *pOutTmp++ = toHex(*pInTmp%16);
		}
	    pInTmp++;
	}
	
	*pOutTmp = '\0';
	//sOut=pOutBuf;

	//delete [] pOutBuf;

	sOut.ReleaseBuffer();
    }
    sIn.ReleaseBuffer();
    return sOut;
}

URLEncode2

CString URLEncode2(CString sIn)
{
    CString sOut;
	
    const int nLen = sIn.GetLength() + 1;

    register LPBYTE pOutTmp = NULL;
    LPBYTE pOutBuf = NULL;
    register LPBYTE pInTmp = NULL;
    LPBYTE pInBuf =(LPBYTE)sIn.GetBuffer(nLen);
    BYTE b = 0;
	
    //alloc out buffer

    pOutBuf = (LPBYTE)sOut.GetBuffer(nLen  * 3 - 2);//new BYTE [nLen  * 3];


    if(pOutBuf)
    {
        pInTmp	= pInBuf;
	pOutTmp = pOutBuf;
		
	// do encoding

	while (*pInTmp)
	{
	    if(isalnum(*pInTmp))
	        *pOutTmp++ = *pInTmp;
	    else
	        if(isspace(*pInTmp))
		    *pOutTmp++ = '+';
		else
		{
		    *pOutTmp++ = '%';
		    *pOutTmp++ = toHex(*pInTmp>>4);
		    *pOutTmp++ = toHex(*pInTmp%16);
		}
	    pInTmp++;
	}
	*pOutTmp = '\0';
	//sOut=pOutBuf;

	//delete [] pOutBuf;

	sOut.ReleaseBuffer();
    }
    sIn.ReleaseBuffer();
    return sOut;
}

Modifications

26.06.2001 - changed out buffer memory allocation (thx 2 Marc Brooks and Matthias)

You must Sign In to use this message board.
 
 
Per page   
 FirstPrevNext
Generalwtf is "if (!isalnum(*pInTmp) && !isalnum(*pInTmp))" ?
s987690
0:34 27 Jul '07  
why does URLEncode1 have
if (!isalnum(*pInTmp) && !isalnum(*pInTmp)) ?

is this an error, something supposed to be a double check, in case program is run on computer without ECC ?


GeneralURLEncode2() Unicode version
mcanti
3:28 2 May '06  
<pre><code>
CString URLEncode(CString sIn)
{
      CString sOut;

    
      const int nLen = sIn.GetLength() + 1;

      register LPTSTR pOutTmp = NULL;
      LPTSTR pOutBuf = NULL;
      register LPTSTR pInTmp = NULL;
      LPTSTR pInBuf =(LPTSTR)sIn.GetBuffer(nLen);
      //BYTE b = 0;
    
      //alloc out buffer
      pOutBuf = (LPTSTR)sOut.GetBuffer(nLen   * 3 - 2);//new BYTE [nLen   * 3];

      if(pOutBuf)
      {
            pInTmp     = pInBuf;
     pOutTmp = pOutBuf;
         
     // do encoding
     while (*pInTmp)
     {
           if(isalnum(*pInTmp))
                 *pOutTmp++ = *pInTmp;
           else
                 if(isspace(*pInTmp) && ((*pInTmp!='\n') && (*pInTmp!='\r')))
                *pOutTmp++ = '+';
          else
          {
                *pOutTmp++ = '%';
                *pOutTmp++ = toHex(*pInTmp>>4);
                *pOutTmp++ = toHex(*pInTmp%16);
          }
           pInTmp++;
     }
     *pOutTmp = '\0';
     //sOut=pOutBuf;
     //delete [] pOutBuf;
     sOut.ReleaseBuffer();
      }
      sIn.ReleaseBuffer();
      return sOut;
}
</code></pre>
GeneralUnicode solution
angelo moscati
4:41 4 Jul '05  
CString URLEncode2(CString sIn)
{
CString sOut;

const int nLen = sIn.GetLength();
// do encoding
for (int i = 0; i TCHAR c = sIn.GetAt(i);

if(_istalnum(c))
sOut.AppendChar(c);
else
if(_istspace(c))
sOut.AppendChar(_T('+'));
else
{
int ic = _TINT(c);
sOut.AppendChar(_T('%'));
sOut.AppendChar(_TCHAR(toHex((BYTE)(ic>>4))));
sOut.AppendChar(_TCHAR(toHex((BYTE)(ic%16))));
}
}
sOut.AppendChar(_T('\0'));
return sOut;
}

I think that it works.
Tell me if you have problems.
GeneralRe: Unicode solution
angelo moscati
6:24 4 Jul '05  
I had a bug in the last post.

inline TCHAR toHex(const BYTE &x)
{
return x > 9 ? _T('A') + x-10: _T('0') + x;
}

CString URLEncode2(CString sIn)
{
CString sOut;

const int nLen = sIn.GetLength();
// do encoding
for (int i = 0; i TCHAR c = sIn.GetAt(i);

if(_istalnum(c))
sOut.AppendChar(c);
else
if(_istspace(c))
sOut.AppendChar(_T('+'));
else
{
int ic = _TINT(c);
sOut.AppendChar(_T('%'));
sOut.AppendChar(toHex((BYTE)(ic>>4)));
sOut.AppendChar(toHex((BYTE)(ic%16)));
}
}
sOut.AppendChar(_T('\0'));
return sOut;
}
GeneralNot support UNICODE
chinkuanyeh
22:28 10 Oct '04  
Could anyone post a Unicode version of URLEncode?
GeneralCR LF support
little.mole
11:29 3 Jul '04  
great code, thanks Smile

only thing I was missing is support for linefeeds (CR LF)

As a workaround, I modified:

// do encoding
while (*pInTmp)
{
if(isalnum(*pInTmp))
*pOutTmp++ = *pInTmp;
else
if( isspace(*pInTmp) &&
((*pInTmp!='\n') &&   // new
(*pInTmp!='\r')) )       // new
*pOutTmp++ = '+';
else
{
*pOutTmp++ = '%';
*pOutTmp++ = toHex(*pInTmp>>4);
*pOutTmp++ = toHex(*pInTmp%16);
}
pInTmp++;
}

that way, both '\r' and '\n' (CR LF) will be converted
properly to %0D%0A.

apart from that, thanks again.
Generalmistake?
3m2u
0:02 17 Mar '04  
in function urlencode1:
if (!isalnum(*pInTmp) && !isalnum(*pInTmp))
it should be:
if (!isalnum(*pInTmp) && !isspace(*pInTmp))

yes ?


Generalhow to converts a string that has been encoded for transmission in a URL into a decoded string?
rafaelcn
7:25 24 Mar '03  
how to converts a string that has been encoded for transmission in a URL into a decoded string?
GeneralRe: how to converts a string that has been encoded for transmission in a URL into a decoded string?
little.mole
12:23 3 Jul '04  
// guess rafaelcn already found answer, but
// I use something like this

// helper to convert %XX to chars
inline char x2c ( unsigned char* in)
{
   register char digit;
   digit = ( (*in) >= 'A' ? (((*in) & 0xdf) - 'A')+10 : ((*in) - '0'));
   digit *= 16;
   in++;
   digit += ((*in) >= 'A' ? (((*in) & 0xdf) - 'A')+10 : ((*in) - '0'));
   return(digit);
}

//do the work
std::string URLdecode( const std::string& in )
{
   int len = in.size();
   unsigned char* inBuff = (unsigned char*)(in.c_str());
   unsigned char* out = new unsigned char[len+1];
   unsigned char* outBuff = out;

   while( *inBuff )
   {
      if ( *inBuff == '%' )
      {
         inBuff++;
         *outBuff = x2c(inBuff);
         inBuff++;
      }
      else
      {
         *outBuff=*inBuff;
      }
      outBuff++;
      inBuff++;
   }
   *outBuff =0;
   std::string ret ((char*)out);
   delete[] out;
   return ret;
}
// optimize away use of std::string or
// convert to CString/whatever as needed,
// but:

The real question now is where you have your
URLencoded string from. Could it be encoded in
UTF-8, as URL string nowadays tend to, e.g. if send from browser to server?

Hope your App is UTF-8 aware Wink
GeneralUnicode?
AlexMarbus
4:11 6 Nov '02  
It looks to me like those functions are not Unicode aware..

--
Alex Marbus
www.marbus.net But then again, I could be wrong.
Generalnot portable
pamela
3:42 7 Oct '01  
the hex function is not portable

pamela
GeneralRe: not portable
Anonymous
10:15 17 May '03  
You would have thought you'd have made a bigger deal over the MFC CStrings.

But hey...
GeneralHow to add a dialog before Windows's explore working
NewLearnXZX
15:48 12 Jul '01  
AS one of learner,I want to add a dialog to verify whether a user is legal before Windows98's Explore working ,which is like NT's verification dialog.
But I don't know how to do,anyone can help me? Thanks! Confused
GeneralWindows already does (most) of this, methinks...
Arnt Witteveen
1:57 4 Jul '01  
Why not use the standard InternetCanonicalizeUrl function?
GeneralRe: Windows already does (most) of this, methinks...
Ryszard Krakowiak
5:35 9 Jul '01  
I use presented functions in WinSock application. InternetCanonicalizeUrl function is from wininet.dll. I encode XML data and I need fast functions. My function is faster than the standard InternetCanonicalizeUrl.
GeneralRe: Windows already does (most) of this, methinks...
wangjj
16:29 23 Jul '01  
Why should you use additonal library just for one function?
GeneralRe: Windows already does (most) of this, methinks...
Anonymous
2:29 29 Aug '02  
I had problems with InternetCanonicalizeUrl if the XML file was massive. Maybe the function is only expecting URLs, not form data?

This URLEncode function works great, however
GeneralMinor correction in UrlEncode1
Marc Brooks
19:14 25 Jun '01  
The line:

pOutBuf = new BYTE [nLen + 3 * k];

Should be:

pOutBuf = new BYTE [nLen + 2 * k];

since we are already allocating one byte for the "original" byte and thus only need two additional bytes for the characters needing encoding.

Also, as someone else has noted, you might as well use .GetBuffer on the output string since that avoids a redundant allocation and memcpy.





Marc
GeneralGetBuffer instead of new
Anonymous
6:08 25 Jun '01  
Hi,

why don't you use
outBuf = (LPBYTE)sOut.GetBuffer(nLen*3)
...
sOut.ReleaseBuffer(reallen);

instead of
outBuf = new BYTE[nLen*3];
...
sOut = outBuf;
delete []outBuf;

this would save memory and a memory copy which is realy slow.

Ciao Matthias


GeneralGetBuffer instead of new
Anonymous
6:08 25 Jun '01  
Hi,

why don't you use
outBuf = (LPBYTE)sOut.GetBuffer(nLen*3)
...
sOut.ReleaseBuffer(reallen);

instead of
outBuf = new BYTE[nLen*3];
...
sOut = outBuf;
delete []outBuf;

this would save memory and a memory copy which is realy slow.

Ciao Matthias


GeneralRe: GetBuffer instead of new
Wictor Wilén
11:59 1 Aug '01  
Heya,

when doing fast conversions like this I usually uses the _alloca() routine. It allocates memory on the stack instead of the heap, which usually makes it faster. And also when the function runs out of scope it automaticallu frees the stack and so the allocated item. Cool

Sample:
pOutBuf = (LPBYTE)_alloca(nLen * 3); //automatically frees when running out of scope

(It is suitable for smaller allocations, since the stack is limited... Wink )

Cheers

/WW




Last Updated 26 Jun 2001 | Advertise | Privacy | Terms of Use | Copyright © CodeProject, 1999-2010