65.9K
CodeProject is changing. Read more.
Home

Working with UTF8 characters...

starIcon
emptyStarIcon
starIcon
emptyStarIconemptyStarIconemptyStarIcon

1.62/5 (8 votes)

Mar 11, 2008

CPOL
viewsIcon

25137

it is not difficult to work with UTF 8 characters anymore...

Introduction

Some times situation is like we have to parse the string which has both Unicode and ASCII characters in single string, at that time Encoding functions in .NET will not be help ful,
so i have created two use full functions for that situations...

Background

this article requires some headache with working with UTF 8 characters. just kidding..

you should know the string formate in .NET and how one character is recognize as unicode in 16 bits(2 bytes)

Using the code

These two function will solve our problem fro converting UTF characters to bytes and from bytes to UTF characters.
            
public static string GetUTF8StringFrombytes(byte[] byteVal)

        {            

            byte[] btOne = new byte[1];

            StringBuilder sb = new StringBuilder("");

            char uniChar;

            for (int i = 0; i < byteVal.Length; i++)

            {

                btOne[0] = byteVal[i];

                if (btOne[0] > 127)

                {

                    uniChar = Convert.ToChar(btOne[0]);

                    sb.Append(uniChar);

                }

                else

                    sb.Append(Encoding.UTF8.GetString(btOne));

            }

            return sb.ToString();

        }



        public static byte[] GetBytesFromUTF8Chars(string strVal)

        {

            if (strVal != string.Empty || strVal != null)

            {

                byte btChar;

                byte[] btArr = new byte[strVal.Length * 2];

                byte[] tempArr;

                int arrIndex = 0;

                for (int i = 0; i < strVal.Length; i++)

                {

                    btChar = (byte)strVal[i];

                    if (btChar > 127 && btChar < 256)

                    {

                        btArr[arrIndex] = btChar;

                        arrIndex++;

                    }

                    else

                    {

                        tempArr = Encoding.UTF8.GetBytes(strVal[i].ToString());

                        Array.Copy(tempArr, 0, btArr, arrIndex, tempArr.Length);

                        arrIndex += tempArr.Length;

                        tempArr = null;

                    }

                }

                byte[] retVal = new byte[arrIndex];

                Array.Copy(btArr, 0, retVal, 0, arrIndex);

                return retVal;

            }

            else

                return new byte[0];

        }

        


Points of Interest

Just providing some solution which i have found while have problem, so that other will not face them.

History

If any improvements are suggested then they are welcomes.