Click here to Skip to main content
15,891,942 members
Articles / Programming Languages / C++

Implement Phonetic ("Sounds-like") Name Searches with Double Metaphone Part I: Introduction & C++ Implementation

Rate me:
Please Sign up or sign in to vote.
4.91/5 (21 votes)
19 Mar 2007CPOL15 min read 148.7K   2.8K   60  
Introduces the Double Metaphone algorithm for phonetic comparison of proper names, and provides a practical C++ implementation for use in the reader's projects.
// WordLookupSample.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"

#include "..\MetaphoneLib\DoubleMetaphone.h"

#include <fstream>
#include <iostream>

#include <map> 
#include <set>
#include <string>

#define METAPHONE_KEY_LENGTH 	4

using namespace std;

typedef std::multimap<string, string> WordMapType;
typedef std::set<string> WordListType;

void phoneticMatch(WordMapType& words, const string& searchWord, WordListType& matchingWord);
int computeMatchLevel(const string& searchWord, const string& candidateWord);

int _tmain(int argc, _TCHAR* argv[])
{
	//Attempt to open the list of names
	ifstream file;
	
	file.open("..\\namelist.txt");
	if (file.fail()) {
		cout << "Unable to open ..\\namelist.txt" << endl;
		return -1;
	}
	
	//Read the contents of the file
	//Each line is a word
	WordMapType wordMap;
	DoubleMetaphone<METAPHONE_KEY_LENGTH> mphone;
	char line[100];
	while (!file.eof()) {
		//Read a word from the file
		file.getline(line, 100);
		
		//Compute the metaphone keys for the word
		mphone.computeKeys(line);
		
		//Add a string object containing the word to the map,
		//with the primary and alternate metaphone keys as map keys
		string word = line;
		wordMap.insert(WordMapType::value_type(string(mphone.getPrimaryKey()), 
											   word));
		if (mphone.getAlternateKey() != NULL) {
			wordMap.insert(WordMapType::value_type(string(mphone.getAlternateKey()), 
												   word));
		}
	}
	file.close();
	
	//wordMap is populated.  Enter the loop that performs searches
	string searchWord;
	
	while (1) {
		cout << "Enter word to search (q to quit): ";
		cin >> searchWord;
		
		if (searchWord.compare("q") == 0) {
			break;
		}
		
		//Declare a set to contain the matches
		WordListType matchingWords;
		
		//Perform the search
		phoneticMatch(wordMap, searchWord, matchingWords);
		
		//Display the results
		for (WordListType::iterator iter = matchingWords.begin();
			  iter != matchingWords.end();
			  iter++) {
			cout << "\tFound: " << (*iter).c_str();
			
			//Display and compute the match level
			cout << " (Match level: " << computeMatchLevel(searchWord, (*iter)) << ")" << endl;
		}
		matchingWords.clear();
	}
}

void phoneticMatch(WordMapType& words, const string& searchWord, WordListType& matchingWords) {
	DoubleMetaphone<METAPHONE_KEY_LENGTH> searchKey(searchWord.c_str());
	string search1 = searchKey.getPrimaryKey();
	cout << "Searching for [" << searchWord.c_str() << "]" << endl;
	
	for (WordMapType::iterator iter = words.lower_bound(search1);
		  iter != words.upper_bound(search1);
		  iter++) {
		matchingWords.insert((*iter).second);
	}
	if (searchKey.getAlternateKey() != NULL) {
		string search2 = searchKey.getAlternateKey();
		for (WordMapType::iterator iter = words.lower_bound(search2);
			  iter != words.upper_bound(search2);
			  iter++) {
			matchingWords.insert((*iter).second);
		}
	}
}

int computeMatchLevel(const string& searchWord, const string& candidateWord) {
	DoubleMetaphone<METAPHONE_KEY_LENGTH> searchWordMphone(searchWord.c_str());
	DoubleMetaphone<METAPHONE_KEY_LENGTH> candidateWordMphone(candidateWord.c_str());
	
	if (strcmp(searchWordMphone.getPrimaryKey(), 
			   candidateWordMphone.getPrimaryKey()) == 0) {
		//Primary-Primary match, that's level 1 (strongest)
		return 1;
	}
	
	if (searchWordMphone.getAlternateKey() != NULL) {
		if (strcmp(searchWordMphone.getAlternateKey(), 
				   candidateWordMphone.getPrimaryKey()) == 0) {
			//Alternate-Primary match, that's level 2 (normal)
			return 2;
		} 
	}
	
	if (candidateWordMphone.getAlternateKey() != NULL) {
		if (strcmp(searchWordMphone.getPrimaryKey(), 
				   candidateWordMphone.getAlternateKey()) == 0) {
			//Primary-Alternate match, that's level 2 (normal)
			return 2;
		} 
	}
	
	if (searchWordMphone.getAlternateKey() != NULL &&
		candidateWordMphone.getAlternateKey() != NULL) {
		if (strcmp(searchWordMphone.getAlternateKey(), 
				   candidateWordMphone.getAlternateKey()) == 0) {
			//Alternate-Alternate match, that's level 3 (minimal)
			return 3;
		} 
	}

	return 0;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
United States United States
My name is Adam Nelson. I've been a professional programmer since 1996, working on everything from database development, early first-generation web applications, modern n-tier distributed apps, high-performance wireless security tools, to my last job as a Senior Consultant at BearingPoint posted in Baghdad, Iraq training Iraqi developers in the wonders of C# and ASP.NET. I am currently an Engineering Director at Dell.

I have a wide range of skills and interests, including cryptography, image processing, computational linguistics, military history, 3D graphics, database optimization, and mathematics, to name a few.

Comments and Discussions