|
using System;
using System.Text;
namespace ThunderMain.SoundEx {
/// <summary>
/// Implements American SoundEx or Miracode algorithm according
/// to http://www.nara.gov/genealogy/soundex/soundex.html
/// Miracode was first used with the 1910 US census
/// </summary>
internal class MiracodeSoundEx : ISoundEx{
public override string GenerateSoundEx(string s) {
StringBuilder output=new StringBuilder();
if(s.Length>0) {
output.Append(Char.ToUpper(s[0]));
// Stop at a maximum of 4 characters
for(int i=1; i<s.Length && output.Length<4; i++) {
string c=EncodeChar(s[i]);
// We either append or ignore, determined by the preceding char
switch(Char.ToLower(s[i-1])) {
case 'h':
case 'w':
// Don't encode the consonant
break;
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
// Chars separated by a vowel - OK to encode
output.Append(c);
break;
default:
// Ignore duplicated phonetic sounds
if(output.Length==1) {
// We only have the first character, which is never
// encoded. However, we need to check whether it is
// the same phonetically as the next char
if(EncodeChar(output[output.Length-1])!=c)
output.Append(c);
} else {
if(output[output.Length-1].ToString()!=c)
output.Append(c);
}
break;
}
}
// Pad with zeros
for(int i=output.Length; i<4; i++) {
output.Append("0");
}
}
return output.ToString();
}
private void AssertEquals(string s1, string s2, string error) {
if(!s1.Equals(s2))
throw new Exception(error + ". Expected " + s2 + " but got " + s1);
}
public override void ValidateAlgorithm() {
// Validate the SoundEx agorithm
// using http://www.nara.gov/genealogy/soundex/soundex.html
AssertEquals(GenerateSoundEx("Tymczak"),"T522", "SoundEx Algoritm Broken");
AssertEquals(GenerateSoundEx("Ashcraft"),"A261", "SoundEx Algoritm Broken");
AssertEquals(GenerateSoundEx("Pfister"),"P236", "SoundEx Algoritm Broken");
AssertEquals(GenerateSoundEx("Jackson"),"J250", "SoundEx Algoritm Broken");
AssertEquals(GenerateSoundEx("Gutierrez"),"G362", "SoundEx Algoritm Broken");
AssertEquals(GenerateSoundEx("VanDeusen"),"V532", "SoundEx Algoritm Broken");
AssertEquals(GenerateSoundEx("Deusen"),"D250", "SoundEx Algoritm Broken");
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
Richard Birkby is a software engineer from London, UK, specializing in .Net. Richard has coded for many different sized companies from small venture-capital funded start-ups, to multi-national corporations (ie Microsoft). When he's not programming, he enjoys driving his sports car or eating curry (although never at the same time!).
Richard helps run
CurryPages.com and has several other covert ventures in development. Stay tuned!