Click here to Skip to main content
15,885,155 members
Articles / Programming Languages / VBScript

Implement Phonetic ("Sounds-like") Name Searches with Double Metaphone Part III: VBScript and ASP & Database Solutions

Rate me:
Please Sign up or sign in to vote.
4.67/5 (6 votes)
19 Mar 20078 min read 84.4K   1.6K   36  
Further describes the COM wrapper around the author's C++ implementation of Double Metaphone, and demonstrates use of this COM wrapper within ASP and VBScript to query a database of names via a web page.
/**
 * DoubleMetaphone.cs
 * 
 * An implemenatation of Lawrence Phillips' Double Metaphone phonetic matching
 * algorithm, published in C/C++ Users Journal, June, 2000.
 * 
 * This implementation was written by Adam J. Nelson (anelson@nullpointer.net).
 * It is based on the C++ template implementation, also by Adam Nelson.
 * For the latest version of this implementation, implementations
 * in other languages, and links to articles I've written on the use of my various
 * Double Metaphone implementations, see:
 * http;//www.nullpointer.net/anelson/
 * 
 * Note that since this impl implements IComparable, it can be used to key associative containers,
 * thereby easily implementing phonetic matching within a simple container.  Examples of this
 * should have been included in the archive from which you obtained this file.
 * 
 * Current Version: 1.0.0
 * Revision History:
 * 	1.0.0 - ajn - First release
 * 
 * This implemenatation, and optimizations, Copyright (C) 2003, Adam J. Nelson
 * The Double Metaphone algorithm was written by Lawrence Phillips, and is 
 * Copyright (c) 1998, 1999 by Lawrence Philips.
 */
using System;
using System.Text;

namespace nullpointer.Metaphone
{
    /// <summary>Implements the Double Metaphone phonetic matching algorithm published
    ///     by Lawrence Phillips in June 2000 C/C++ Users Journal. 
    /// 
    ///     Optimized and ported to C# by Adam Nelson (anelson@nullpointer.net)
    /// 										</summary>
	public class DoubleMetaphone
	{
		public const int METAPHONE_KEY_LENGTH = 4;//The length of the metaphone keys produced.  4 is sweet spot
		
		///StringBuilders used to construct the keys
		private StringBuilder m_primaryKey, m_alternateKey;
		
		///Actual keys, populated after construction
		private String m_primaryKeyString, m_alternateKeyString;
		
		///Variables to track the key length w/o having to grab the .Length attr
		private int m_primaryKeyLength, m_alternateKeyLength;
		
		///Working copy of the word, and the original word
		private String m_word, m_originalWord;
		
		///Length and last valid zero-based index into word
		int m_length, m_last;
		
		///Flag indicating if an alternate metaphone key was computed for the word
		bool m_hasAlternate;
		
        /// <summary>Default ctor, initializes by computing the keys of an empty string,
        ///     which are both empty strings</summary>
		public DoubleMetaphone() {
			//Leave room at the end for writing a bit beyond the length; keys are chopped at the end anyway
			m_primaryKey = new StringBuilder(METAPHONE_KEY_LENGTH+2);
			m_alternateKey = new StringBuilder(METAPHONE_KEY_LENGTH+2);
			
			computeKeys("");
		}
		
        /// <summary>Constructs a new DoubleMetaphone object, and initializes it with
        ///     the metaphone keys for a given word</summary>
        /// 
        /// <param name="word">Word with which to initialize the object.  Computes the metaphone keys
        ///     of this word.</param>
		public DoubleMetaphone(String word) {
			//Leave room at the end for writing a bit beyond the length; keys are chopped at the end anyway
			m_primaryKey = new StringBuilder(METAPHONE_KEY_LENGTH+2);
			m_alternateKey = new StringBuilder(METAPHONE_KEY_LENGTH+2);
			
			computeKeys(word);
		}
		
        /// <summary>The primary metaphone key for the current word</summary>
		public String PrimaryKey {
			get {
				return m_primaryKeyString;
			}
		}
		
        /// <summary>The alternate metaphone key for the current word, or null if the current
        ///     word does not have an alternate key by Double Metaphone</summary>
		public String AlternateKey {
			get {
				return m_hasAlternate ? m_alternateKeyString : null;
			}
		}
		
        /// <summary>Original word for which the keys were computed</summary>
		public String Word {
			get {
				return m_originalWord;
			}
		}
		
        /// <summary>Static wrapper around the class, enables computation of metaphone keys
        ///     without instantiating a class.</summary>
        /// 
        /// <param name="word">Word whose metaphone keys are to be computed</param>
        /// <param name="primaryKey">Ref to var to receive primary metaphone key</param>
        /// <param name="alternateKey">Ref to var to receive alternate metaphone key, or be set to null if
        ///     word has no alternate key by double metaphone</param>
		static public void doubleMetaphone(String word, ref String primaryKey, ref String alternateKey) {
			DoubleMetaphone mp = new DoubleMetaphone(word);
			
			primaryKey = mp.PrimaryKey;
			alternateKey = mp.AlternateKey;
		}
		
        /// <summary>Sets a new current word for the instance, computing the new word's metaphone
        ///     keys</summary>
        /// 
        /// <param name="word">New word to set to current word.  Discards previous metaphone keys,
        ///     and computes new keys for this word</param>
		public void computeKeys(String word) {
			m_primaryKey.Length = 0;
			m_alternateKey.Length = 0;
			
			m_primaryKeyString = "";
			m_alternateKeyString = "";
			
			m_primaryKeyLength = m_alternateKeyLength = 0;
			
			m_hasAlternate = false;
			
			m_originalWord = word;
			
			//Copy word to an internal working buffer so it can be modified
            m_word = word;

            m_length = m_word.Length;

            //Compute last valid index into word
            m_last = m_length - 1;
			
			//Padd with four spaces, so word can be over-indexed without fear of exception
			m_word = String.Concat(m_word, "     ");
			
			//Convert to upper case, since metaphone is not case sensitive
			m_word = m_word.ToUpper();
			
			//Now build the keys
			buildMetaphoneKeys();
		}
		
		/**
		 * Internal impl of double metaphone algorithm.  Populates m_primaryKey and m_alternateKey.  Modified copy-past of
		 * Phillips' original code
		 */
		private void buildMetaphoneKeys() {
			int current = 0;
			if (m_length < 1)
				return;
	
			//skip these when at start of word
			if (areStringsAt(0, 2, "GN", "KN", "PN", "WR", "PS"))
				current += 1;
	
			//Initial 'X' is pronounced 'Z' e.g. 'Xavier'
			if (m_word[0] == 'X') {
				addMetaphoneCharacter("S");	//'Z' maps to 'S'
				current += 1;
			}
	
			///////////main loop//////////////////////////
			while ((m_primaryKeyLength < METAPHONE_KEY_LENGTH) || (m_alternateKeyLength < METAPHONE_KEY_LENGTH)) {
				if (current >= m_length)
					break;
	
				switch (m_word[current]) {
				case 'A':
				case 'E':
				case 'I':
				case 'O':
				case 'U':
				case 'Y':
					if (current == 0)
						//all init vowels now map to 'A'
						addMetaphoneCharacter("A");
					current +=1;
					break;
	
				case 'B':
	
					//"-mb", e.g", "dumb", already skipped over...
					addMetaphoneCharacter("P");
	
					if (m_word[current + 1] == 'B')
						current +=2;
					else
						current	+=1;
					break;
	
				case '�':
					addMetaphoneCharacter("S");
					current += 1;
					break;
	
				case 'C':
					//various germanic
					if ((current > 1)
						&& !isVowel(current - 2) 
						&& areStringsAt((current - 1), 3, "ACH") 
						&& ((m_word[current + 2] != 'I') && ((m_word[current + 2] != 'E') 
															  || areStringsAt((current - 2), 6, "BACHER", "MACHER")) )) {
						addMetaphoneCharacter("K");
						current +=2;
						break;
					}
	
					//special case 'caesar'
					if ((current == 0) && areStringsAt(current, 6, "CAESAR")) {
						addMetaphoneCharacter("S");
						current +=2;
						break;
					}
	
					//italian 'chianti'
					if (areStringsAt(current, 4, "CHIA")) {
						addMetaphoneCharacter("K");
						current +=2;
						break;
					}
	
					if (areStringsAt(current, 2, "CH")) {
						//find 'michael'
						if ((current > 0) && areStringsAt(current, 4, "CHAE")) {
							addMetaphoneCharacter("K", "X");
							current +=2;
							break;
						}
	
						//greek roots e.g. 'chemistry', 'chorus'
						if ((current == 0)
							&& (areStringsAt((current + 1), 5, "HARAC", "HARIS") 
								 || areStringsAt((current + 1), 3, "HOR", "HYM", "HIA", "HEM")) 
							&& !areStringsAt(0, 5, "CHORE")) {
							addMetaphoneCharacter("K");
							current +=2;
							break;
						}
	
						//germanic, greek, or otherwise 'ch' for 'kh' sound
						if ((areStringsAt(0, 4, "VAN ", "VON ") || areStringsAt(0, 3, "SCH"))
							// 'architect but not 'arch', 'orchestra', 'orchid'
							|| areStringsAt((current - 2), 6, "ORCHES", "ARCHIT", "ORCHID")
							|| areStringsAt((current + 2), 1, "T", "S")
							|| ((areStringsAt((current - 1), 1, "A", "O", "U", "E") || (current == 0))
								//e.g., 'wachtler', 'wechsler', but not 'tichner'
								&& areStringsAt((current + 2), 1, "L", "R", "N", "M", "B", "H", "F", "V", "W", " "))) {
							addMetaphoneCharacter("K");
						} else {
							if (current > 0) {
								if (areStringsAt(0, 2, "MC"))
									//e.g., "McHugh"
									addMetaphoneCharacter("K");
								else
									addMetaphoneCharacter("X", "K");
							} else
								addMetaphoneCharacter("X");
						}
						current +=2;
						break;
					}
					//e.g, 'czerny'
					if (areStringsAt(current, 2, "CZ") && !areStringsAt((current - 2), 4, "WICZ")) {
						addMetaphoneCharacter("S", "X");
						current += 2;
						break;
					}
	
					//e.g., 'focaccia'
					if (areStringsAt((current + 1), 3, "CIA")) {
						addMetaphoneCharacter("X");
						current += 3;
						break;
					}
	
					//double 'C', but not if e.g. 'McClellan'
					if (areStringsAt(current, 2, "CC") && !((current == 1) && (m_word[0] == 'M')))
						//'bellocchio' but not 'bacchus'
						if (areStringsAt((current + 2), 1, "I", "E", "H") && !areStringsAt((current + 2), 2, "HU")) {
							//'accident', 'accede' 'succeed'
							if (((current == 1) && (m_word[current - 1] == 'A')) 
								|| areStringsAt((current - 1), 5, "UCCEE", "UCCES"))
								addMetaphoneCharacter("KS");
							//'bacci', 'bertucci', other italian
							else
								addMetaphoneCharacter("X");
							current += 3;
							break;
						} else {//Pierce's rule
							addMetaphoneCharacter("K");
							current += 2;
							break;
						}
	
					if (areStringsAt(current, 2, "CK", "CG", "CQ")) {
						addMetaphoneCharacter("K");
						current += 2;
						break;
					}
	
					if (areStringsAt(current, 2, "CI", "CE", "CY")) {
						//italian vs. english
						if (areStringsAt(current, 3, "CIO", "CIE", "CIA"))
							addMetaphoneCharacter("S", "X");
						else
							addMetaphoneCharacter("S");
						current += 2;
						break;
					}
	
					//else
					addMetaphoneCharacter("K");
	
					//name sent in 'mac caffrey', 'mac gregor
					if (areStringsAt((current + 1), 2, " C", " Q", " G"))
						current += 3;
					else
						if (areStringsAt((current + 1), 1, "C", "K", "Q") 
							&& !areStringsAt((current + 1), 2, "CE", "CI"))
						current += 2;
					else
						current	+= 1;
					break;
	
				case 'D':
					if (areStringsAt(current, 2, "DG"))
						if (areStringsAt((current + 2), 1, "I", "E", "Y")) {
							//e.g. 'edge'
							addMetaphoneCharacter("J");
							current += 3;
							break;
						} else {
							//e.g. 'edgar'
							addMetaphoneCharacter("TK");
							current += 2;
							break;
						}
	
					if (areStringsAt(current, 2, "DT", "DD")) {
						addMetaphoneCharacter("T");
						current += 2;
						break;
					}
	
					//else
					addMetaphoneCharacter("T");
					current += 1;
					break;
	
				case 'F':
					if (m_word[current + 1] == 'F')
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("F");
					break;
	
				case 'G':
					if (m_word[current + 1] == 'H') {
						if ((current > 0) && !isVowel(current - 1)) {
							addMetaphoneCharacter("K");
							current += 2;
							break;
						}
	
						if (current < 3) {
							//'ghislane', ghiradelli
							if (current == 0) {
								if (m_word[current + 2] == 'I')
									addMetaphoneCharacter("J");
								else
									addMetaphoneCharacter("K");
								current += 2;
								break;
							}
						}
						//Parker's rule (with some further refinements) - e.g., 'hugh'
						if (((current > 1) && areStringsAt((current - 2), 1, "B", "H", "D") )
							//e.g., 'bough'
							|| ((current > 2) && areStringsAt((current - 3), 1, "B", "H", "D") )
							//e.g., 'broughton'
							|| ((current > 3) && areStringsAt((current - 4), 1, "B", "H") )) {
							current += 2;
							break;
						} else {
							//e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
							if ((current > 2) 
								&& (m_word[current - 1] == 'U') 
								&& areStringsAt((current - 3), 1, "C", "G", "L", "R", "T")) {
								addMetaphoneCharacter("F");
							} else
								if ((current > 0) && m_word[current - 1] != 'I')
								addMetaphoneCharacter("K");
	
							current += 2;
							break;
						}
					}
	
					if (m_word[current + 1] == 'N') {
						if ((current == 1) && isVowel(0) && !isWordSlavoGermanic()) {
							addMetaphoneCharacter("KN", "N");
						} else
							//not e.g. 'cagney'
							if (!areStringsAt((current + 2), 2, "EY") 
								&& (m_word[current + 1] != 'Y') && !isWordSlavoGermanic()) {
							addMetaphoneCharacter("N", "KN");
						} else
							addMetaphoneCharacter("KN");
						current += 2;
						break;
					}
	
					//'tagliaro'
					if (areStringsAt((current + 1), 2, "LI") && !isWordSlavoGermanic()) {
						addMetaphoneCharacter("KL", "L");
						current += 2;
						break;
					}
	
					//-ges-,-gep-,-gel-, -gie- at beginning
					if ((current == 0)
						&& ((m_word[current + 1] == 'Y') 
							 || areStringsAt((current + 1), 2, "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER"))) {
						addMetaphoneCharacter("K", "J");
						current += 2;
						break;
					}
	
					// -ger-,  -gy-
					if ((areStringsAt((current + 1), 2, "ER") || (m_word[current + 1] == 'Y'))
						&& !areStringsAt(0, 6, "DANGER", "RANGER", "MANGER")
						&& !areStringsAt((current - 1), 1, "E", "I") 
						&& !areStringsAt((current - 1), 3, "RGY", "OGY")) {
						addMetaphoneCharacter("K", "J");
						current += 2;
						break;
					}
	
					// italian e.g, 'biaggi'
					if (areStringsAt((current + 1), 1, "E", "I", "Y") || areStringsAt((current - 1), 4, "AGGI", "OGGI")) {
						//obvious germanic
						if ((areStringsAt(0, 4, "VAN ", "VON ") || areStringsAt(0, 3, "SCH"))
							|| areStringsAt((current + 1), 2, "ET"))
							addMetaphoneCharacter("K");
						else
							//always soft if french ending
							if (areStringsAt((current + 1), 4, "IER "))
							addMetaphoneCharacter("J");
						else
							addMetaphoneCharacter("J", "K");
						current += 2;
						break;
					}
	
					if (m_word[current + 1] == 'G')
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("K");
					break;
	
				case 'H':
					//only keep if first & before vowel or btw. 2 vowels
					if (((current == 0) || isVowel(current - 1)) 
						&& isVowel(current + 1)) {
						addMetaphoneCharacter("H");
						current += 2;
					} else//also takes care of 'HH'
						current	+= 1;
					break;
	
				case 'J':
					//obvious spanish, 'jose', 'san jacinto'
					if (areStringsAt(current, 4, "JOSE") || areStringsAt(0, 4, "SAN ")) {
						if (((current == 0) && (m_word[current + 4] == ' ')) || areStringsAt(0, 4, "SAN "))
							addMetaphoneCharacter("H");
						else {
							addMetaphoneCharacter("J", "H");
						}
						current +=1;
						break;
					}
	
					if ((current == 0) && !areStringsAt(current, 4, "JOSE"))
						addMetaphoneCharacter("J", "A");//Yankelovich/Jankelowicz
					else
						//spanish pron. of e.g. 'bajador'
						if (isVowel(current - 1) 
							&& !isWordSlavoGermanic()
							&& ((m_word[current + 1] == 'A') || (m_word[current + 1] == 'O')))
						addMetaphoneCharacter("J", "H");
					else
						if (current == m_last)
						addMetaphoneCharacter("J", " ");
					else
						if (!areStringsAt((current + 1), 1, "L", "T", "K", "S", "N", "M", "B", "Z") 
							&& !areStringsAt((current - 1), 1, "S", "K", "L"))
						addMetaphoneCharacter("J");
	
					if (m_word[current + 1] == 'J')//it could happen!
						current += 2;
					else
						current	+= 1;
					break;
	
				case 'K':
					if (m_word[current + 1] == 'K')
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("K");
					break;
	
				case 'L':
					if (m_word[current + 1] == 'L') {
						//spanish e.g. 'cabrillo', 'gallegos'
						if (((current == (m_length - 3)) 
							 && areStringsAt((current - 1), 4, "ILLO", "ILLA", "ALLE"))
							|| ((areStringsAt((m_last - 1), 2, "AS", "OS") || areStringsAt(m_last, 1, "A", "O")) 
								&& areStringsAt((current - 1), 4, "ALLE"))) {
							addMetaphoneCharacter("L", " ");
							current += 2;
							break;
						}
						current += 2;
					} else
						current	+= 1;
					addMetaphoneCharacter("L");
					break;
	
				case 'M':
					if ((areStringsAt((current - 1), 3, "UMB") 
						 && (((current + 1) == m_last) || areStringsAt((current + 2), 2, "ER")))
						//'dumb','thumb'
						||  (m_word[current + 1] == 'M'))
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("M");
					break;
	
				case 'N':
					if (m_word[current + 1] == 'N')
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("N");
					break;
	
				case '�':
					current += 1;
					addMetaphoneCharacter("N");
					break;
	
				case 'P':
					if (m_word[current + 1] == 'H') {
						addMetaphoneCharacter("F");
						current += 2;
						break;
					}
	
					//also account for "campbell", "raspberry"
					if (areStringsAt((current + 1), 1, "P", "B"))
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("P");
					break;
	
				case 'Q':
					if (m_word[current + 1] == 'Q')
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("K");
					break;
	
				case 'R':
					//french e.g. 'rogier', but exclude 'hochmeier'
					if ((current == m_last)
						&& !isWordSlavoGermanic()
						&& areStringsAt((current - 2), 2, "IE") 
						&& !areStringsAt((current - 4), 2, "ME", "MA"))
						addMetaphoneCharacter("", "R");
					else
						addMetaphoneCharacter("R");
	
					if (m_word[current + 1] == 'R')
						current += 2;
					else
						current	+= 1;
					break;
	
				case 'S':
					//special cases 'island', 'isle', 'carlisle', 'carlysle'
					if (areStringsAt((current - 1), 3, "ISL", "YSL")) {
						current += 1;
						break;
					}
	
					//special case 'sugar-'
					if ((current == 0) && areStringsAt(current, 5, "SUGAR")) {
						addMetaphoneCharacter("X", "S");
						current += 1;
						break;
					}
	
					if (areStringsAt(current, 2, "SH")) {
						//germanic
						if (areStringsAt((current + 1), 4, "HEIM", "HOEK", "HOLM", "HOLZ"))
							addMetaphoneCharacter("S");
						else
							addMetaphoneCharacter("X");
						current += 2;
						break;
					}
	
					//italian & armenian
					if (areStringsAt(current, 3, "SIO", "SIA") || areStringsAt(current, 4, "SIAN")) {
						if (!isWordSlavoGermanic())
							addMetaphoneCharacter("S", "X");
						else
							addMetaphoneCharacter("S");
						current += 3;
						break;
					}
	
					//german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
					//also, -sz- in slavic language altho in hungarian it is pronounced 's'
					if (((current == 0) 
						 && areStringsAt((current + 1), 1, "M", "N", "L", "W"))
						|| areStringsAt((current + 1), 1, "Z")) {
						addMetaphoneCharacter("S", "X");
						if (areStringsAt((current + 1), 1, "Z"))
							current += 2;
						else
							current	+= 1;
						break;
					}
	
					if (areStringsAt(current, 2, "SC")) {
						//Schlesinger's rule
						if (m_word[current + 2] == 'H')
							//dutch origin, e.g. 'school', 'schooner'
							if (areStringsAt((current + 3), 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
								//'schermerhorn', 'schenker'
								if (areStringsAt((current + 3), 2, "ER", "EN")) {
									addMetaphoneCharacter("X", "SK");
								} else
									addMetaphoneCharacter("SK");
								current += 3;
								break;
							} else {
								if ((current == 0) && !isVowel(3) && (m_word[3] != 'W'))
									addMetaphoneCharacter("X", "S");
								else
									addMetaphoneCharacter("X");
								current += 3;
								break;
							}
	
						if (areStringsAt((current + 2), 1, "I", "E", "Y")) {
							addMetaphoneCharacter("S");
							current += 3;
							break;
						}
						//else
						addMetaphoneCharacter("SK");
						current += 3;
						break;
					}
	
					//french e.g. 'resnais', 'artois'
					if ((current == m_last) && areStringsAt((current - 2), 2, "AI", "OI"))
						addMetaphoneCharacter("", "S");
					else
						addMetaphoneCharacter("S");
	
					if (areStringsAt((current + 1), 1, "S", "Z"))
						current += 2;
					else
						current	+= 1;
					break;
	
				case 'T':
					if (areStringsAt(current, 4, "TION")) {
						addMetaphoneCharacter("X");
						current += 3;
						break;
					}
	
					if (areStringsAt(current, 3, "TIA", "TCH")) {
						addMetaphoneCharacter("X");
						current += 3;
						break;
					}
	
					if (areStringsAt(current, 2, "TH") 
						|| areStringsAt(current, 3, "TTH")) {
						//special case 'thomas', 'thames' or germanic
						if (areStringsAt((current + 2), 2, "OM", "AM") 
							|| areStringsAt(0, 4, "VAN ", "VON ") 
							|| areStringsAt(0, 3, "SCH")) {
							addMetaphoneCharacter("T");
						} else {
							addMetaphoneCharacter("0", "T");
						}
						current += 2;
						break;
					}
	
					if (areStringsAt((current + 1), 1, "T", "D"))
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("T");
					break;
	
				case 'V':
					if (m_word[current + 1] == 'V')
						current += 2;
					else
						current	+= 1;
					addMetaphoneCharacter("F");
					break;
	
				case 'W':
					//can also be in middle of word
					if (areStringsAt(current, 2, "WR")) {
						addMetaphoneCharacter("R");
						current += 2;
						break;
					}
	
					if ((current == 0) 
						&& (isVowel(current + 1) || areStringsAt(current, 2, "WH"))) {
						//Wasserman should match Vasserman
						if (isVowel(current + 1))
							addMetaphoneCharacter("A", "F");
						else
							//need Uomo to match Womo
							addMetaphoneCharacter("A");
					}
	
					//Arnow should match Arnoff
					if (((current == m_last) && isVowel(current - 1)) 
						|| areStringsAt((current - 1), 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") 
						|| areStringsAt(0, 3, "SCH")) {
						addMetaphoneCharacter("", "F");
						current +=1;
						break;
					}
	
					//polish e.g. 'filipowicz'
					if (areStringsAt(current, 4, "WICZ", "WITZ")) {
						addMetaphoneCharacter("TS", "FX");
						current +=4;
						break;
					}
	
					//else skip it
					current +=1;
					break;
	
				case 'X':
					//french e.g. breaux
					if (!((current == m_last) 
						  && (areStringsAt((current - 3), 3, "IAU", "EAU") 
							   || areStringsAt((current - 2), 2, "AU", "OU"))))
						addMetaphoneCharacter("KS");
	
					if (areStringsAt((current + 1), 1, "C", "X"))
						current += 2;
					else
						current	+= 1;
					break;
	
				case 'Z':
					//chinese pinyin e.g. 'zhao'
					if (m_word[current + 1] == 'H') {
						addMetaphoneCharacter("J");
						current += 2;
						break;
					} else
						if (areStringsAt((current + 1), 2, "ZO", "ZI", "ZA") 
							|| (isWordSlavoGermanic() && ((current > 0) && m_word[current - 1] != 'T'))) {
						addMetaphoneCharacter("S", "TS");
					} else
						addMetaphoneCharacter("S");
	
					if (m_word[current + 1] == 'Z')
						current += 2;
					else
						current	+= 1;
					break;
	
				default:
					current += 1;
					break;
				}
			}
				
			//Finally, chop off the keys at the proscribed length
			if (m_primaryKeyLength > METAPHONE_KEY_LENGTH) {
				m_primaryKey.Length = METAPHONE_KEY_LENGTH;
			}
			
			if (m_alternateKeyLength > METAPHONE_KEY_LENGTH) {
				m_alternateKey.Length = METAPHONE_KEY_LENGTH;
			}
			
			m_primaryKeyString = m_primaryKey.ToString();
			m_alternateKeyString = m_alternateKey.ToString();
		}
	
		/**
		 * Returns true if m_word is classified as "slavo-germanic" by Phillips' algorithm
		 * 
		 * @return true if word contains strings that Lawrence's algorithm considers indicative of
		 *         slavo-germanic origin; else false
		 */
		private bool isWordSlavoGermanic() {
			if((m_word.IndexOf("W") != -1) || 
				(m_word.IndexOf("K") != -1) || 
				(m_word.IndexOf("CZ") != -1) || 
				(m_word.IndexOf("WITZ") != -1))
					return true;
		
			return false;
		}
	
		/**
		 * Returns true if letter at given position in word is a Roman vowel
		 * 
		 * @param pos    Position at which to check for a vowel
		 * 
		 * @return True if m_word[pos] is a Roman vowel, else false
		 */
		private bool isVowel(int pos) {
			if ((pos < 0) || (pos >= m_length))
				return false;
	
			Char it = m_word[pos];
	
			if ((it == 'E') || (it == 'A') || (it == 'I') || (it == 'O') || (it == 'U') || (it == 'Y'))
				return true;
	
			return false;
		}
	
		/**
		 * Appends the given metaphone character to the primary and alternate keys
		 * 
		 * @param primaryCharacter
		 *               Character to append
		 */
		private void addMetaphoneCharacter(String primaryCharacter) {
			addMetaphoneCharacter(primaryCharacter, null);
		}
		
		/**
		 * Appends a metaphone character to the primary, and a possibly different alternate,
		 * metaphone keys for the word.
		 * 
		 * @param primaryCharacter
		 *               Primary character to append to primary key, and, if no alternate char is present,
		 *               the alternate key as well
		 * @param alternateCharacter
		 *               Alternate character to append to alternate key.  May be null or a zero-length string,
		 *               in which case the primary character will be appended to the alternate key instead
		 */
		private void addMetaphoneCharacter(String primaryCharacter, String alternateCharacter) {
			//Is the primary character valid?
			if (primaryCharacter.Length > 0) {
				int idx = 0;
				while (idx < primaryCharacter.Length) {
					m_primaryKey.Length++;
					m_primaryKey[m_primaryKeyLength++] = primaryCharacter[idx++];
				}
			}
			
			//Is the alternate character valid?
			if (alternateCharacter != null) {
				//Alternate character was provided.  If it is not zero-length, append it, else
				//append the primary string as long as it wasn't zero length and isn't a space character
				if (alternateCharacter.Length > 0) {
					m_hasAlternate = true;
					if (alternateCharacter[0] != ' ') {
						int idx = 0;
						while (idx < alternateCharacter.Length) {
							m_alternateKey.Length++;
							m_alternateKey[m_alternateKeyLength++] = alternateCharacter[idx++];
						}
					}
				} else {
					//No, but if the primary character is valid, add that instead
					if (primaryCharacter.Length > 0 && (primaryCharacter[0] != ' ')) {
						int idx = 0;
						while (idx < primaryCharacter.Length) {
							m_alternateKey.Length++;
							m_alternateKey[m_alternateKeyLength++] = primaryCharacter[idx++];
						}
					}
				}
			} else if (primaryCharacter.Length > 0) {
				//Else, no alternate character was passed, but a primary was, so append the primary character to the alternate key
				int idx = 0;
				while (idx < primaryCharacter.Length) {
					m_alternateKey.Length++;
					m_alternateKey[m_alternateKeyLength++] = primaryCharacter[idx++];
				}
			}
		}
	
		/**
		 * Tests if any of the strings passed as variable arguments are at the given start position and
		 * length within word
		 * 
		 * @param start   Start position in m_word
		 * @param length  Length of substring starting at start in m_word to compare to the given strings
		 * @param strings params array of zero or more strings for which to search in m_word
		 * 
		 * @return true if any one string in the strings array was found in m_word at the given position
		 *         and length
		 */
		private bool areStringsAt(int start, int length, params String[] strings) 
		{
			if (start < 0) 
			{
				//Sometimes, as a result of expressions like "current - 2" for start, 
				//start ends up negative.  Since no string can be present at a negative offset, this is always false
				return false;
			}
		
			String target = m_word.Substring(start, length);
			
			for (int idx = 0; idx < strings.Length; idx++) {
				if (strings[idx] == target) {
					return true;
				}
			}
		
			return false;
		}
	}
}
					

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States
My name is Adam Nelson. I've been a professional programmer since 1996, working on everything from database development, early first-generation web applications, modern n-tier distributed apps, high-performance wireless security tools, to my last job as a Senior Consultant at BearingPoint posted in Baghdad, Iraq training Iraqi developers in the wonders of C# and ASP.NET. I am currently an Engineering Director at Dell.

I have a wide range of skills and interests, including cryptography, image processing, computational linguistics, military history, 3D graphics, database optimization, and mathematics, to name a few.

Comments and Discussions