Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Detect Encoding for In- and Outgoing Text

, 27 Oct 2009 Public Domain
Detect the encoding of a text without BOM (Byte Order Mask) and choose the best Encoding for persistence or network transport of text
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using href.Utils;

namespace EncodingTest
{
    public partial class EncodingTestForm : Form
    {
        private Encoding m_Encoding;
        private string m_TestText;

        public EncodingTestForm(Encoding enc, string testText)
        {
            InitializeComponent();
            this.m_Encoding = enc;
            this.m_TestText = testText;
            this.DoTest();
        }

        private void DoTest()
        {
         

            if ((this.m_TestText == null) || (this.m_TestText.Length == 0))
                return;
            using (System.IO.MemoryStream ms = new System.IO.MemoryStream())
            {
                byte[] encoded = this.m_Encoding.GetBytes(this.m_TestText);
                // preamble?
                byte[] preamble = this.m_Encoding.GetPreamble();

                // Make sure a preamble was returned 
                // and is large enough to containa BOM.
                if (preamble.Length >= 2)
                {
                    ms.Write(preamble, 0, preamble.Length);
                }

                ms.Write(encoded, 0, encoded.Length);

                ms.Position = 0;
                // read it using standard text reader
                System.IO.StreamReader tr = new System.IO.StreamReader(ms, true);
                

                this.streamReader.Text = tr.ReadToEnd();
                this.label1.Text = String.Format("StreamReader: {0} / {1}", tr.CurrentEncoding.EncodingName, tr.CurrentEncoding.BodyName);

                // now the improved test
                ms.Position = 0;
                Encoding targetEncoding;
                byte[] rawData = ms.ToArray();
                try
                {
                    targetEncoding = EncodingTools.DetectInputCodepage(rawData);
                }
                catch (System.Runtime.InteropServices.COMException)
                {
                    targetEncoding = Encoding.Default;
                }
                this.detected.Text = targetEncoding.GetString(rawData);
                this.label2.Text = String.Format("EncodingTools.DetectInputCodepage: {0} / {1}", targetEncoding.EncodingName, targetEncoding.BodyName);
            }

        }


    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under A Public Domain dedication

Share

About the Author

Carsten Zeumer
Software Developer (Senior)
Germany Germany
Carsten started programming Basic and Assembler back in the 80’s when he got his first C64. After switching to a x86 based system he started programming in Pascal and C. He started Windows programming with the arrival of Windows 3.0. After working for various internet companies developing a linguistic text analysis and classification software for 25hours communications he is now working as a contractor.
 
Carsten lives in Hamburg, Germany with his wife and five children.

| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.141220.1 | Last Updated 27 Oct 2009
Article Copyright 2007 by Carsten Zeumer
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid