Click here to Skip to main content
15,881,204 members
Articles / Programming Languages / C#

A SoundEx implementation in .NET

Rate me:
Please Sign up or sign in to vote.
4.94/5 (15 votes)
21 Jan 2002BSD4 min read 139.2K   1.5K   69  
Demonstrates an OO implementation of 4 SoundEx variants in .NET
using System;
using System.Text;

namespace ThunderMain.SoundEx {
	
	/// <summary>
	/// Implements American SoundEx or Miracode algorithm according 
	/// to http://www.nara.gov/genealogy/soundex/soundex.html
	/// Miracode was first used with the 1910 US census
	/// </summary>
	internal class MiracodeSoundEx : ISoundEx{

		public override string GenerateSoundEx(string s) {
			StringBuilder output=new StringBuilder();

			if(s.Length>0) {

				output.Append(Char.ToUpper(s[0]));

				// Stop at a maximum of 4 characters
				for(int i=1; i<s.Length && output.Length<4; i++) {

					string c=EncodeChar(s[i]);

					// We either append or ignore, determined by the preceding char
					switch(Char.ToLower(s[i-1])) {
						case 'h':
						case 'w':
							// Don't encode the consonant
							break;
						case 'a':
						case 'e':
						case 'i':
						case 'o':
						case 'u':
							// Chars separated by a vowel - OK to encode
							output.Append(c);					

							break;
						default:
							// Ignore duplicated phonetic sounds
							if(output.Length==1) {
								// We only have the first character, which is never
								// encoded. However, we need to check whether it is
								// the same phonetically as the next char
								if(EncodeChar(output[output.Length-1])!=c)
									output.Append(c);
							} else {
								if(output[output.Length-1].ToString()!=c)
									output.Append(c);
							}
							
							break;
					}
				}

				// Pad with zeros
				for(int i=output.Length; i<4; i++) {
					output.Append("0");
				}
			}
	
			return output.ToString();
		}
		private void AssertEquals(string s1, string s2, string error) {
			if(!s1.Equals(s2))
				throw new Exception(error + ". Expected " + s2 + " but got " + s1);
		}
		public override void ValidateAlgorithm() {
			// Validate the SoundEx agorithm
			// using http://www.nara.gov/genealogy/soundex/soundex.html

			AssertEquals(GenerateSoundEx("Tymczak"),"T522", "SoundEx Algoritm Broken");
			AssertEquals(GenerateSoundEx("Ashcraft"),"A261", "SoundEx Algoritm Broken");
			AssertEquals(GenerateSoundEx("Pfister"),"P236", "SoundEx Algoritm Broken");
			AssertEquals(GenerateSoundEx("Jackson"),"J250", "SoundEx Algoritm Broken");
			AssertEquals(GenerateSoundEx("Gutierrez"),"G362", "SoundEx Algoritm Broken");
			AssertEquals(GenerateSoundEx("VanDeusen"),"V532", "SoundEx Algoritm Broken");
			AssertEquals(GenerateSoundEx("Deusen"),"D250", "SoundEx Algoritm Broken");
		}


	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The BSD License


Written By
Web Developer
United Kingdom United Kingdom
Richard Birkby is a software engineer from London, UK, specializing in .Net. Richard has coded for many different sized companies from small venture-capital funded start-ups, to multi-national corporations (ie Microsoft). When he's not programming, he enjoys driving his sports car or eating curry (although never at the same time!).

Richard helps run CurryPages.com and has several other covert ventures in development. Stay tuned!

Comments and Discussions