Click here to Skip to main content
15,886,026 members
Articles / Programming Languages / C# 4.0

Detect a written text's language

Rate me:
Please Sign up or sign in to vote.
4.96/5 (75 votes)
21 Oct 2009CPOL6 min read 155.6K   7.7K   114  
An article on how to detect the language of a written text.
#if DIALOGUEMASTER

using System;

namespace DialogueMaster.Babel
{
	/// <summary>
	/// Summary description for Class1.
	/// </summary>
	class Class1
	{
		/// <summary>
		/// The main entry point for the application.
		/// </summary>
		[STAThread]
		static void Main(string[] args)
		{
	//		Installer.UninstallCounters();
			Installer.InstallCounters();

			DateTime start = DateTime.Now;
			System.Console.Out.WriteLine("Reading model");
			// CREATE NEW MODEL FROM FILES
			BabelModel model = new BabelModel();
			model.AddFile("de",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\german.txt");
			model.AddFile("en",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\english.txt");
			model.AddFile("fr",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\french.txt");
			model.AddFile("it",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\italian.txt");
			model.AddFile("es",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\spanish.txt");
            model.SaveToFile(@"C:\src\DialogueMaster\DialogueMaster.Babel\models\small.model");
            

            model.AddFile("pt", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\portuguese.txt");
            model.AddFile("nl", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\dutch.txt");
			model.AddFile("sv",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\swedish.txt");
            model.AddFile("no", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\norwegian.txt");
            model.AddFile("da", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\danish.txt");
			model.SaveToFile(@"C:\src\DialogueMaster\DialogueMaster.Babel\models\\default.model");

            model.AddFile("ru", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\russian.txt");
            model.AddFile("el", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\greek.txt");
            model.AddFile("tr", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\turkish.txt");
            model.AddFile("cs", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\czech.txt");
			model.AddFile("pl",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\polish.txt");
			model.AddFile("is",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\icelandic.txt");
			model.AddFile("fi",@"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\finnish.txt");
            model.AddFile("zh", @"C:\src\DialogueMaster\DialogueMaster.Babel\Resources\chinese.zh.txt");
			model.SaveToFile(@"C:\src\DialogueMaster\DialogueMaster.Babel\models\\large.model");

			// System.Console.In.ReadLine();

			model = BabelModel.LoadFromFile(@"C:\src\DialogueMaster\DialogueMaster.Babel\models\large.model");

			TimeSpan duration = DateTime.Now-start;
			System.Console.Out.WriteLine("Read "+model.Count.ToString()+" languages in "+duration.TotalMilliseconds+"ms" );
			/*
			for(int i=0;i<100000;i++)
			{
				model.ClassifyText("Das ist ein Test text");
				model.ClassifyText("This is a simple test text");
			}
			*/

			System.Console.Out.WriteLine("Das ist ein Test text");
			System.Console.Out.WriteLine(model.ClassifyText("Das ist ein Test text"));

			System.Console.Out.WriteLine("This is a simple test text");
			System.Console.Out.WriteLine(model.ClassifyText("This is a simple test text"));
			System.Console.Out.WriteLine("This is a simple test text / Das ist ein Test text");
			System.Console.Out.WriteLine(model.ClassifyText("This is a simple test text / Das ist ein Test text"));

			System.Console.Out.WriteLine("Il a en outre voulu rassurer les princes de l'Eglise sur son intention de d�velopper le gouvernement coll�gial de l'Eglise, c'est-�-dire de faire participer les pr�lats � la prise de d�cisions.");
			System.Console.Out.WriteLine(model.ClassifyText("Il a en outre voulu rassurer les princes de l'Eglise sur son intention de d�velopper le gouvernement coll�gial de l'Eglise, c'est-�-dire de faire participer les pr�lats � la prise de d�cisions.") );

			System.Console.Out.WriteLine("Some people think this is a test / Glauben Sie es oder nicht, dies ist ein test / Il a en outre voulu rassurer les princes de l'Eglise sur son intention de d�velopper.");
			System.Console.Out.WriteLine(model.ClassifyText("Some people think this is a test / Man mag glaube, das es ein test sei / Il a en outre voulu rassurer les princes de l'Eglise sur son intention de d�velopper") );
			System.Console.Out.WriteLine("Enkelhet - snabb och enkel orderprocedur, f�rslag p� relaterade produkter i samband med order samt m�jlighet att �teranv�nda tidigare ink�pslistor");
			System.Console.Out.WriteLine(model.ClassifyText("Enkelhet - snabb och enkel orderprocedur, f�rslag p� relaterade produkter i samband med order samt m�jlighet att �teranv�nda tidigare ink�pslistor") );
			System.Console.Out.WriteLine("-------------------------------");
			System.Console.Out.WriteLine("Enter text to classify:");
			while(true)
			{
				string input = System.Console.ReadLine();
				if (input=="")
					break;
				System.Console.Out.WriteLine(model.ClassifyText(input));
			}
		}
	}
}
#endif

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer (Senior)
Germany Germany
Carsten started programming Basic and Assembler back in the 80’s when he got his first C64. After switching to a x86 based system he started programming in Pascal and C. He started Windows programming with the arrival of Windows 3.0. After working for various internet companies developing a linguistic text analysis and classification software for 25hours communications he is now working as a contractor.

Carsten lives in Hamburg, Germany with his wife and five children.

Comments and Discussions