Click here to Skip to main content
15,891,905 members
Articles / Programming Languages / C++

A Spell Checking Engine

Rate me:
Please Sign up or sign in to vote.
4.88/5 (16 votes)
5 Feb 2001 267.1K   7K   108  
A free spell checking engine for use in your C++ applications. Includes the current US English dictionary
/*

  Copyright:		2000
  Author:			Matthew T Gullett
  Email:			gullettm@yahoo.com
  Name:				CFPSSpellCheckEngine
  Part of:			Spell Checking Engine  
  Requires:			USMain.dic, [USUser.dic], [USCommon.dic]

  DESCRIPTION
  ----------------------------------------------------------
  This class is designed to implement a phonetic spell
  checking engine with support for US English (and probably
  UK english)  It implements a simple interface for accessing
  spell checking functions.  It is designed to be very fast
  with support for a user dictionary and a common misspelling
  dictionary to improve performance and user perception.  


  INFO:
  ----------------------------------------------------------
  This class is provided -as is-.  No warranty as to the
  function or performance of this class is provided either 
  written or implied.  
  
  You may freely use this code and modify it as necessary,
  as long as this header is unmodified and credit is given
  to the author in the application(s) in which it is
  incorporated.

*/

#include "stdafx.h"
#include "resource.h"
#include "FPSSpellCheckEngine.h"

#include "io.h"
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

#include "PrShtSpellOptions.h"

#define FPSSPELL_CUSTOM_ALGORITHM

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CFPSSpellCheckEngine::CFPSSpellCheckEngine()
{
	// initialize internal variables
	m_pMainDic = NULL;
	m_pUserDic = NULL;
}

CFPSSpellCheckEngine::~CFPSSpellCheckEngine()
{
	// cleanup memory and file handles
	m_CmnUseDic.RemoveAll();
	m_RecentFinds.RemoveAll();

	if (m_pMainDic)
		delete m_pMainDic;
	if (m_pUserDic)
		delete m_pUserDic;
}

// initialize the spell checking engine
// this function caches the user and commonly mispelled
// dictionaries, and opens the main dictionary and caches
// information on the number of words in the dictionary
// and the location of the letter breaks within the file
int CFPSSpellCheckEngine::InitEngine()
{
	ASSERT(!m_pUserDic);
	ASSERT(!m_pMainDic);
	ASSERT(m_RecentFinds.GetCount() == 0);
	ASSERT(m_CmnUseDic.GetCount() == 0);

	int iReturn = FPSSPELLCHECK_ERROR_NONE;

	// cache common mispelling dictionary
	CacheCommonUseWords();

	// create and initialize main dictionary
	try
	{
		m_pMainDic = new CFPSDictionary;
	}
	catch(...)
	{
		iReturn = FPSSPELLCHECK_ERROR_MEMORY;
		m_pMainDic = NULL;
	}
	ASSERT(m_pMainDic);
	if (!m_pMainDic)
		iReturn = FPSSPELLCHECK_ERROR_MEMORY;

	if (IsOK(iReturn))
		iReturn = m_pMainDic->Open(GetOptions().GetMainDic());

	if (IsOK(iReturn))
	{
		try
		{
			m_pUserDic = new CFPSDictionary;
		}
		catch(...)
		{
			iReturn = FPSSPELLCHECK_ERROR_MEMORY;
			m_pUserDic = NULL;
		}
		ASSERT(m_pUserDic);
		if (!m_pUserDic)
			iReturn = FPSSPELLCHECK_ERROR_MEMORY;

		if (IsOK(iReturn))
		{
			iReturn = m_pUserDic->Open(GetOptions().GetUserDic());

			// the user dictionary is not 100% required and
			// can be created automatically when words are
			// added, therefore, if open fails, we will reset
			// return to NO ERROR and call CreateNew
			if (iReturn == CFPSDictionary_ERROR_DIC_OPEN_FAILED)
			{
				iReturn = m_pUserDic->CreateNew();
			}
		}
	}
	
	return iReturn;
}

// this is the main function used to search for a word in
// the dictionary and returning a list of possible matches
BOOL CFPSSpellCheckEngine::FindWord(LPCSTR lpszWord, CStringList &PossibleMatches, BOOL bSuggest)
{
	ASSERT(lpszWord);
	ASSERT(AfxIsValidString(lpszWord));
	ASSERT(m_pMainDic);
	ASSERT(m_pUserDic);

	BOOL bReturn = FALSE;
	CString strTemp;
	char szWordToFind[200];
	char szUpper[200];

	// trim word and create 2 copies, one w/ no padding
	// 2= no padding + upper case
	strcpy(szWordToFind, lpszWord);
	strcpy(szUpper, lpszWord);

	TrimRight(szWordToFind);
	TrimRight(szUpper);
	CharUpper(szUpper);

	// clear possible matches
	PossibleMatches.RemoveAll();

	// make sure word is not ignored by option settings
	if (IsIgnoredWord(lpszWord))
		bReturn = TRUE;

	// *******************************************************
	// 1. look in recent finds first
	if (!bReturn)
	{
		if (m_RecentFinds.Lookup(szWordToFind, strTemp))
			bReturn = TRUE;
	}

	// *******************************************************
	// 2. Look in User Data dictionary
	if (!bReturn)
	{
		if (m_pUserDic->IsWordInDictionary(szWordToFind))
			bReturn = TRUE;
	}

	// *******************************************************
	// 3. look in common misspells
	if (!bReturn)
	{
		if (m_CmnUseDic.Lookup(szUpper, strTemp))
		{
			PossibleMatches.AddTail(strTemp);
		}
		else
		{
			// *******************************************************
			// 4. look in main dictionary
			if (m_pMainDic->IsWordInDictionary(szWordToFind))
			{
				m_RecentFinds.SetAt(szUpper, lpszWord);
				bReturn = TRUE;
			}

			// find the possible matches for this word
			if (bSuggest && !bReturn)
				m_pMainDic->FindMatches(szWordToFind, PossibleMatches);
		}
	}

	return bReturn;
}

// cache the common mispelled dictionary
void CFPSSpellCheckEngine::CacheCommonUseWords()
{
	CStdioFile fp;
	CString strLine;
	CString strIncorrect;
	CString strCorrect;
	int iPos = 0;

	if (fp.Open(m_Options.GetCommonDic(), CFile::modeRead | CFile::shareExclusive))
	{
		while (fp.ReadString(strLine))
		{
			strLine.TrimLeft();
			strLine.TrimRight();

			iPos = strLine.Find('=');
			if (iPos > 0)
			{
				strIncorrect = strLine.Left(iPos);
				strCorrect = strLine.Mid(iPos+1);

				strIncorrect.TrimLeft();
				strIncorrect.TrimRight();
				strIncorrect.MakeUpper();

				strCorrect.TrimLeft();
				strCorrect.TrimRight();

				m_CmnUseDic.SetAt(strIncorrect, strCorrect);
			}
		}

		fp.Close();
	}
}

BOOL CFPSSpellCheckEngine::InitEngine(LPCSTR lpszConfigFile)
{
	if (lpszConfigFile)
		ASSERT(AfxIsValidString(lpszConfigFile));

	CString strFile = lpszConfigFile;
	char szAppPath[2000];
	char szDrive[200];
	char szDir[200];

	// if no config file is passed in, assume we should use
	// a default config file in the apps path
	if (strFile == "")
	{
		::GetModuleFileName(AfxGetInstanceHandle(), szAppPath, 2000);
		_splitpath(szAppPath, szDrive, szDir, NULL, NULL);

		strFile = szDrive;
		strFile += szDir;
		strFile += "SpellCheckOptions.cfg";
	}

	m_Options.LoadOptions(strFile);

	return InitEngine();
}

void CFPSSpellCheckEngine::DisplayOptions()
{
	CPrShtSpellOptions dlg("Spell Checking Options");

	dlg.LoadOptions(this);
	if (dlg.DoModal() == IDOK)
	{
		dlg.SaveOptions(this);

		m_Options.SaveOptions(m_Options.GetOptionsFileName());
	}
}

void CFPSSpellCheckEngine::ReloadCommonDic()
{
	m_CmnUseDic.RemoveAll();
	CacheCommonUseWords();
}

BOOL CFPSSpellCheckEngine::IsIgnoredWord(LPCSTR lpszWord)
{
	BOOL bReturn = FALSE;
	BOOL bNumbers = FALSE;
	BOOL bUpperCase = TRUE;
	BOOL bAddress = FALSE;
	int iLen = lstrlen(lpszWord);
	int iPos = 0;

	while (iPos < iLen)
	{
		char cThisChar = lpszWord[iPos];

		if (cThisChar <= 'A' || cThisChar >= 'Z')
			bUpperCase = FALSE;
		if (cThisChar >= '0' && cThisChar <= '9')
			bNumbers = TRUE;
		if (cThisChar == ':' || cThisChar == '/' || cThisChar == '\\' || cThisChar == '@')
			bAddress = TRUE;
		iPos++;
	}

	if (m_Options.IgnoreAddresses() && bAddress)
		bReturn = TRUE;
	if (m_Options.IgnoreNumbers() && bNumbers)
		bReturn = TRUE;
	if (m_Options.IgnoreUpperCase() && bUpperCase)
		bReturn = TRUE;

	return bReturn;
}

void CFPSSpellCheckEngine::IgnoreWord(LPCSTR lpszWord)
{
	ASSERT(lpszWord);
	ASSERT(AfxIsValidString(lpszWord));

	CString strWord = lpszWord;

	strWord.TrimLeft(); strWord.TrimRight(); strWord.MakeUpper();

	m_RecentFinds.SetAt(strWord, lpszWord);
}

long CFPSSpellCheckEngine::GetRecordCount()
{
	ASSERT(m_pMainDic);

	return m_pMainDic->GetWordCount();
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions