Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Detect Encoding for In- and Outgoing Text

, 27 Oct 2009
Detect the encoding of a text without BOM (Byte Order Mask) and choose the best Encoding for persistence or network transport of text
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using href.Utils;
using System.Threading;
using System.IO;

namespace EncodingTest
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            Thread.CurrentThread.CurrentUICulture = System.Globalization.CultureInfo.GetCultureInfo("en-us");
            InitializeComponent();
        }

 
        
        private void button1_Click(object sender, EventArgs e)
        {
            this.listView1.Items.Clear();
            Encoding encForStream = EncodingTools.GetMostEfficientEncodingForStream(testText.Text);
            Encoding encForMime = EncodingTools.GetMostEfficientEncoding(testText.Text);


            ListViewItem encItem = null;
            /*
            
            encItem = GetListItemForEncoding(encForMime);
            encItem.ImageKey = "text_ok";
            this.listView1.Items.Add(encItem);
           */
            
            
            


            Encoding[] encodings = EncodingTools.DetectOutgoingEncodings(testText.Text, EncodingTools.AllEncodings, true);
            foreach (Encoding encoding in encodings)
            {

                encItem = GetListItemForEncoding(encoding);

                if (encoding == encForStream)
                    encItem.StateImageIndex = 2;
                
                if (encoding == encForMime)
                    encItem.ImageKey = "text_ok";
                    

                
                this.listView1.Items.Add(encItem);
            }
            Encoding encodingDefault = EncodingTools.GetMostEfficientEncoding(testText.Text);

            
        }

        private ListViewItem GetListItemForEncoding(Encoding encoding)
        {
            byte[] encodedBytes = encoding.GetBytes(this.testText.Text);

            /* test if all bytes are kept through encoding
            byte[] unicodeBytes = Encoding.Unicode.GetBytes(this.testText.Text);

            
            string decoded = encoding.GetString(encodedBytes);
            byte[] unicodeAfterEncodingByte = Encoding.Unicode.GetBytes(decoded);



            int errors = (int)System.Math.Abs(unicodeBytes.Length - unicodeAfterEncodingByte.Length);
            for(int i=0;i<System.Math.Min(unicodeBytes.Length, unicodeAfterEncodingByte.Length);i++)
            {
                if (unicodeBytes[i] != unicodeAfterEncodingByte[i])
                    errors++;
            }
            */

            ListViewItem encItem = new ListViewItem(encoding.EncodingName);
            encItem.SubItems.Add(encoding.CodePage.ToString());
            encItem.SubItems.Add(encoding.BodyName);
            encItem.SubItems.Add(encodedBytes.Length.ToString());
            encItem.Tag = encoding;
          //  encItem.SubItems.Add(errors.ToString());
            return encItem;
        }

        private void listView1_DoubleClick(object sender, EventArgs e)
        {
            if (this.listView1.SelectedItems.Count != 1)
                return;

            EncodingTestForm testForm = new EncodingTestForm(this.listView1.SelectedItems[0].Tag as Encoding, this.testText.Text);
            testForm.ShowDialog(this);
        }


        /// <summary>
        /// Save the given text to a stream
        /// </summary>
        /// <param name="text">text to save</param>
        /// <param name="path">path to the file</param>
        private void SaveToStream(string text, string path)
        {
            // this is all... detect the encoding
            Encoding enc = EncodingTools.GetMostEfficientEncodingForStream(text);
            // then safe
            using (StreamWriter sw = new StreamWriter(path, false, enc))
                sw.Write(text);
        }

        /// <summary>
        /// Creeates a dummy email
        /// </summary>
        /// <param name="text">body text of the email</param>
        /// <param name="path">path to the new email (should have the extension .eml)</param>
        private void SaveToAsEmail(string text, string path)
        {
            // this is all... detect the encoding
            Encoding enc = EncodingTools.GetMostEfficientEncoding(text);
            // then safe
            using (StreamWriter sw = new StreamWriter(path, false, Encoding.ASCII))
            {
                sw.WriteLine("Subject: test");
                sw.WriteLine("Transfer-Encoding: 7bit");
                sw.WriteLine("Content-Type: text/plain;\r\n\tcharset=\"{0}\"", enc.BodyName);
                sw.WriteLine("Content-Transfer-Encoding: base64"); // should be QP
                sw.WriteLine();
                sw.Write(Convert.ToBase64String(enc.GetBytes(text),Base64FormattingOptions.InsertLineBreaks));
            }
        }


        private void OpenTextFileTest()
        {
            // read the complete file into a string
            string content = EncodingTools.ReadTextFile(@"C:\test\txt");

            // create a StreamReader with the guessed best encoding
            using (StreamReader sr = EncodingTools.OpenTextFile(@"C:\test\txt"))
            {
                string fileContent = sr.ReadToEnd();
            }

            // create a streamReader from a stream
            using (MemoryStream ms = new MemoryStream(
                Encoding.GetEncoding("windows-1252").GetBytes("Some umlauts: ����")))
            {
                using (StreamReader sr = EncodingTools.OpenTextStream(ms))
                {
                    string fileContent = sr.ReadToEnd();
                }
            }
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under A Public Domain dedication

Share

About the Author

Carsten Zeumer
Software Developer (Senior)
Germany Germany
Carsten started programming Basic and Assembler back in the 80’s when he got his first C64. After switching to a x86 based system he started programming in Pascal and C. He started Windows programming with the arrival of Windows 3.0. After working for various internet companies developing a linguistic text analysis and classification software for 25hours communications he is now working as a contractor.
 
Carsten lives in Hamburg, Germany with his wife and five children.

| Advertise | Privacy | Mobile
Web01 | 2.8.140827.1 | Last Updated 27 Oct 2009
Article Copyright 2007 by Carsten Zeumer
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid