Click here to Skip to main content
12,747,719 members (38,859 online)
Click here to Skip to main content


227 bookmarked
Posted 17 Jan 2007

Detect Encoding for In- and Outgoing Text

, 27 Oct 2009 Public Domain
Detect the encoding of a text without BOM (Byte Order Mask) and choose the best Encoding for persistence or network transport of text
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using href.Utils;

namespace EncodingTest
    public partial class EncodingTestForm : Form
        private Encoding m_Encoding;
        private string m_TestText;

        public EncodingTestForm(Encoding enc, string testText)
            this.m_Encoding = enc;
            this.m_TestText = testText;

        private void DoTest()

            if ((this.m_TestText == null) || (this.m_TestText.Length == 0))
            using (System.IO.MemoryStream ms = new System.IO.MemoryStream())
                byte[] encoded = this.m_Encoding.GetBytes(this.m_TestText);
                // preamble?
                byte[] preamble = this.m_Encoding.GetPreamble();

                // Make sure a preamble was returned 
                // and is large enough to containa BOM.
                if (preamble.Length >= 2)
                    ms.Write(preamble, 0, preamble.Length);

                ms.Write(encoded, 0, encoded.Length);

                ms.Position = 0;
                // read it using standard text reader
                System.IO.StreamReader tr = new System.IO.StreamReader(ms, true);

                this.streamReader.Text = tr.ReadToEnd();
                this.label1.Text = String.Format("StreamReader: {0} / {1}", tr.CurrentEncoding.EncodingName, tr.CurrentEncoding.BodyName);

                // now the improved test
                ms.Position = 0;
                Encoding targetEncoding;
                byte[] rawData = ms.ToArray();
                    targetEncoding = EncodingTools.DetectInputCodepage(rawData);
                catch (System.Runtime.InteropServices.COMException)
                    targetEncoding = Encoding.Default;
                this.detected.Text = targetEncoding.GetString(rawData);
                this.label2.Text = String.Format("EncodingTools.DetectInputCodepage: {0} / {1}", targetEncoding.EncodingName, targetEncoding.BodyName);



By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.


This article, along with any associated source code and files, is licensed under A Public Domain dedication


About the Author

Carsten Zeumer
Software Developer (Senior)
Germany Germany
Carsten started programming Basic and Assembler back in the 80’s when he got his first C64. After switching to a x86 based system he started programming in Pascal and C. He started Windows programming with the arrival of Windows 3.0. After working for various internet companies developing a linguistic text analysis and classification software for 25hours communications he is now working as a contractor.

Carsten lives in Hamburg, Germany with his wife and five children.

You may also be interested in...

Permalink | Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.170215.1 | Last Updated 27 Oct 2009
Article Copyright 2007 by Carsten Zeumer
Everything else Copyright © CodeProject, 1999-2017
Layout: fixed | fluid