Click here to Skip to main content
15,880,854 members
Articles / Programming Languages / C++

Implement Phonetic ("Sounds-like") Name Searches with Double Metaphone Part I: Introduction & C++ Implementation

Rate me:
Please Sign up or sign in to vote.
4.91/5 (21 votes)
19 Mar 2007CPOL15 min read 147.6K   2.8K   60  
Introduces the Double Metaphone algorithm for phonetic comparison of proper names, and provides a practical C++ implementation for use in the reader's projects.
/**
 * ShortDoubleMetaphone.cs
 * 
 * An implemenatation of Lawrence Phillips' Double Metaphone phonetic matching
 * algorithm, published in C/C++ Users Journal, June, 2000.  This implementation
 * implements Lawrence's proposed optimization, whereby four-character metaphone keys
 * are represented as four nibbles in an unsigned short.  This dramatically improves
 * storage and search efficiency.
 * 
 * This implementation was written by Adam J. Nelson (anelson@nullpointer.net).
 * It is based on the general C# implementation, also by Adam Nelson.
 * For the latest version of this implementation, implementations
 * in other languages, and links to articles I've written on the use of my various
 * Double Metaphone implementations, see:
 * http;//www.nullpointer.net/anelson/
 * 
 * Note that since this impl implements IComparable, it can be used to key associative containers,
 * thereby easily implementing phonetic matching within a simple container.  Examples of this
 * should have been included in the archive from which you obtained this file.
 * 
 * Current Version: 1.0.0
 * Revision History:
 * 	1.0.0 - ajn - First release
 * 
 * This implemenatation, and optimizations, Copyright (C) 2003, Adam J. Nelson
 * The Double Metaphone algorithm was written by Lawrence Phillips, and is 
 * Copyright (c) 1998, 1999 by Lawrence Philips.
 */
using System;
using System.Text;

namespace nullpointer.Metaphone
{
    /// <summary>Subclass of DoubleMetaphone, Adam Nelson's (anelson@nullpointer.net)
    ///     C# implementation of Lawrence Phillips' Double Metaphone algorithm,
    ///     published in C/C++ Users Journal, June, 2000.
    /// 
    ///     This subclass implements Lawrence's suggested optimization, whereby
    ///     four-letter metaphone keys are represented as four nibbles in an
    ///     unsigned short.  This greatly improves storage and search efficiency.</summary>
	public class ShortDoubleMetaphone : DoubleMetaphone
	{
		//Constants representing the characters in a metaphone key
		public const ushort METAPHONE_A           = 0x01;
		public const ushort METAPHONE_F           = 0x02;
		public const ushort METAPHONE_FX          = ((METAPHONE_F << 4) | METAPHONE_X);
		public const ushort METAPHONE_H           = 0x03;
		public const ushort METAPHONE_J           = 0x04;
		public const ushort METAPHONE_K           = 0x05;
		public const ushort METAPHONE_KL          = ((METAPHONE_K << 4) | METAPHONE_L);
		public const ushort METAPHONE_KN          = ((METAPHONE_K << 4) | METAPHONE_N);
		public const ushort METAPHONE_KS          = ((METAPHONE_K << 4) | METAPHONE_S);
		public const ushort METAPHONE_L           = 0x06;
		public const ushort METAPHONE_M           = 0x07;
		public const ushort METAPHONE_N           = 0x08;
		public const ushort METAPHONE_P           = 0x09;
		public const ushort METAPHONE_S           = 0x0A;
		public const ushort METAPHONE_SK          = ((METAPHONE_S << 4) | METAPHONE_K);
		public const ushort METAPHONE_T           = 0x0B;
		public const ushort METAPHONE_TK          = ((METAPHONE_T << 4) | METAPHONE_K);
		public const ushort METAPHONE_TS          = ((METAPHONE_T << 4) | METAPHONE_S);
		public const ushort METAPHONE_R           = 0x0C;
		public const ushort METAPHONE_X           = 0x0D;
		public const ushort METAPHONE_0           = 0x0E;
		public const ushort METAPHONE_SPACE       = 0x0F;
		public const ushort METAPHONE_NULL        = 0x00;
		
		/// Sentinel value, used to denote an invalid key
		public const ushort METAPHONE_INVALID_KEY = 0xffff;
		
		/// The ushort versions of the primary and alternate keys
		private ushort m_primaryShortKey, m_alternateShortKey;
		
        /// <summary>Default ctor, initializes to an empty string and 0 keys</summary>
		public ShortDoubleMetaphone() : base()
		{
			m_primaryShortKey = m_alternateShortKey = 0;
		}
		
        /// <summary>Initializes the base class with the given word, then computes
        ///     ushort representations of the metaphone keys computed by the
        ///     base class</summary>
        /// 
        /// <param name="word">Word for which to compute metaphone keys</param>
		public ShortDoubleMetaphone(String word) : base(word)
		{
			m_primaryShortKey = ShortDoubleMetaphone.metaphoneKeyToShort(this.PrimaryKey);
			if (this.AlternateKey != null) {
				m_alternateShortKey = ShortDoubleMetaphone.metaphoneKeyToShort(this.AlternateKey);
			} else {
				m_alternateShortKey = METAPHONE_INVALID_KEY;
			}
		}
		
        /// <summary>Sets a new current word, computing the string and ushort representations
        ///     of the metaphone keys of the given word.
        /// 
        ///     Note that this uses the new modifier, which hides the base class
        ///     computeKeys.  The base class's computeKeys is then explicitly 
        ///     called as part of the function body.  It is important to note that
        ///     this is NOT equivalent to overriding a virtual function, in that 
        ///     polymorphism is not provided.  In this case, polymorphism is of no 
        ///     value, while the potential efficiency gained by not using virtual
        ///     methods is quite valuable.</summary>
        /// 
        /// <param name="word">New current word for which to compute metaphone keys</param>
		new public void computeKeys(String word) {
			base.computeKeys(word);
			
			m_primaryShortKey = ShortDoubleMetaphone.metaphoneKeyToShort(this.PrimaryKey);
			if (this.AlternateKey != null) {
				m_alternateShortKey = ShortDoubleMetaphone.metaphoneKeyToShort(this.AlternateKey);
			} else {
				m_alternateShortKey = METAPHONE_INVALID_KEY;
			}
		}			
		
        /// <summary>The primary metaphone key, represented as a ushort</summary>
		public ushort PrimaryShortKey {
			get {
				return m_primaryShortKey;
			}
		}
		
        /// <summary>The alternative metaphone key, or METAPHONE_INVALID_KEY if the current
        ///     word has no alternate key by double metaphone</summary>
		public ushort AlternateShortKey {
			get {
				return m_alternateShortKey;
			}
		}
		
        /// <summary>Represents a string metaphone key as a ushort</summary>
        /// 
        /// <param name="metaphoneKey">String metaphone key.  Must be four chars long; if you change 
        ///     METAPHONE_KEY_LENGTH in DoubleMetaphone, this will break.  Length
        ///     tests are not performed, for performance reasons.</param>
        /// 
        /// <returns>ushort representation of the given metahphone key</returns>
		static private ushort metaphoneKeyToShort(String metaphoneKey) {
			ushort result, charResult;
			Char currentChar;
			
			result = 0;
		
			for (int currentCharIdx = 0; currentCharIdx < metaphoneKey.Length; currentCharIdx++) {
				currentChar = metaphoneKey[currentCharIdx];
				if (currentChar == 'A')
					charResult = METAPHONE_A;
				else if (currentChar == 'P')
					charResult = METAPHONE_P;
				else if (currentChar == 'S')
					charResult = METAPHONE_S;
				else if (currentChar == 'K')
					charResult = METAPHONE_K;
				else if (currentChar == 'X')
					charResult = METAPHONE_X;
				else if (currentChar == 'J')
					charResult = METAPHONE_J;
				else if (currentChar == 'T')
					charResult = METAPHONE_T;
				else if (currentChar == 'F')
					charResult = METAPHONE_F;
				else if (currentChar == 'N')
					charResult = METAPHONE_N;
				else if (currentChar == 'H')
					charResult = METAPHONE_H;
				else if (currentChar == 'M')
					charResult = METAPHONE_M;
				else if (currentChar == 'L')
					charResult = METAPHONE_L;
				else if (currentChar == 'R')
					charResult = METAPHONE_R;
				else if (currentChar == ' ')
					charResult = METAPHONE_SPACE;
				else if (currentChar == '\0')
					charResult = METAPHONE_0;
				else 
					charResult = 0x00; //This should never happen
		
				result <<= 4;
				result |= charResult;
			};
			return result;
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
United States United States
My name is Adam Nelson. I've been a professional programmer since 1996, working on everything from database development, early first-generation web applications, modern n-tier distributed apps, high-performance wireless security tools, to my last job as a Senior Consultant at BearingPoint posted in Baghdad, Iraq training Iraqi developers in the wonders of C# and ASP.NET. I am currently an Engineering Director at Dell.

I have a wide range of skills and interests, including cryptography, image processing, computational linguistics, military history, 3D graphics, database optimization, and mathematics, to name a few.

Comments and Discussions