// Dictionary.cpp: implementation of the Dictionary classes
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "Dictionary.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
#define NODE_HAS_NEXT 0x01
#define NODE_HAS_ALTERN 0x02
#define REMOVEDWORD_TERMINATOR char(-1)
//////////////////////////////////////////////////////////////////////////////
// //
// CNode //
// //
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// //
// Function : CNode //
// Purpose : Constructor //
// Params : c - Character this node is to represent //
// Returns : N/A //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
CNode::CNode(char c)
{
m_Character = c;
m_pAlternative = NULL;
m_pNext = NULL;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : ~CNode //
// Purpose : Destructor //
// Params : None //
// Returns : N/A //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
CNode::~CNode()
{
delete m_pAlternative;
delete m_pNext;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : InsertWord //
// Purpose : Insert the given word into the tree //
// Params : szWord - Word to insert into the tree //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CNode::InsertWord(LPCSTR szWord)
{
if (m_Character == szWord[0])
{
// This is the right node for this character!
if (szWord[0])
{
// Only carry on if we have not reached the end of the word!
if (m_pNext == NULL)
{
// Create a new "Next" node
m_pNext = new CNode(szWord[1]);
}
m_pNext->InsertWord(&szWord[1]);
}
}
else
{
// This is not the right node, so we need to see if we have an alternative
if (m_pAlternative == NULL)
{
// Create a new alternative node
m_pAlternative = new CNode(szWord[0]);
}
m_pAlternative->InsertWord(szWord);
}
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : IsWordListed //
// Purpose : Determine if a word appears in the tree //
// Params : szWord - Word to check //
// bMatchCase - true if case must be matched, else false //
// match - Enumeration showing if word was found //
// pFinalNode - Pointer to the final node encountered //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CNode::IsWordListed(LPCSTR szWord, bool bMatchCase, WordMatch & match, bool bIsFirstNode, CNode ** pFinalNode)
{
// If we are collecting nodes encountered, add ourselves to the array
if (pFinalNode) *pFinalNode = this;
// Start out by assuming that we have a perfect match (we may be proved wrong later!)
if (bIsFirstNode) match = eMatchPerfect;
if (m_Character == szWord[0])
{
// Unless we have reached the end, continue matching the rest of the word
if (m_Character != '\0')
{
if (m_pNext)
{
m_pNext->IsWordListed(&szWord[1], bMatchCase, match, false, pFinalNode);
if ((match != eMatchPerfect) && (m_Character >= 'A') && (m_Character <= 'Z') && m_pAlternative && !bMatchCase)
{
// Could be a case thing - see if we have an alternative
WordMatch matchTemp;
m_pAlternative->IsWordListed(szWord, bMatchCase, matchTemp, bIsFirstNode, pFinalNode);
match = CDictionary::GetBestMatch(match, matchTemp);
}
}
else
{
// Something not right here - if the character is not '\0', we should always have a next pointer
ASSERT(0);
match = eMatchInternalError;
}
}
}
else if ((tolower(m_Character) == tolower(szWord[0]) && !bMatchCase))
{
// We have found a match, but with the wrong case. Now, we could have the instance where
// there are two words in the dictionary, with different capitalisation (for example "Bob" and "bob")
// First off, if we still have a perfect match, see if we can get a perfect match from an alternative
if (match == eMatchPerfect)
{
if (m_pAlternative)
{
// Check to see what we can find
m_pAlternative->IsWordListed(szWord, bMatchCase, match, bIsFirstNode, pFinalNode);
}
else
{
// No alternative, hence no match
match = eMatchNone;
}
}
switch (match)
{
case eMatchNone:
case eMatchMixedCase:
case eMatchCapitalisedFirst:
// Carry on with this node, but with an imperfect match
if (m_pNext)
{
// Set the match according to whether or not we are the first node
match = (bIsFirstNode && m_Character == tolower(szWord[0])) ? eMatchCapitalisedFirst : eMatchMixedCase;
// And carry on trying to find a match
m_pNext->IsWordListed(&szWord[1], bMatchCase, match, false, pFinalNode);
}
else
{
// No next node, which is not right
ASSERT(0);
match = eMatchInternalError;
}
break;
default:
// Either we found a perfect match, or the next best thing - a word with only a capitalised first letter
// Or of course we could have hit an internal error. However, basically, we finish here, as we have the
// best result we are going to get.
break;
}
}
else if (m_pAlternative)
{
// This character doesn't match - see if any of the alternatives do
m_pAlternative->IsWordListed(szWord, bMatchCase, match, bIsFirstNode, pFinalNode);
}
else
{
// We have no match for this character - hence, we have no match
match = eMatchNone;
}
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : GetPatternMatchingWords //
// Purpose : Get words in the dictionary that match the given pattern //
// Params : szWord - Pattern to match (? represents a single char) //
// straSuggestions - Reference to array for results //
// strWordSoFar - Word built so far by parent items //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CNode::GetPatternMatchingWords(LPCSTR szWord, CStringArray & straSuggestions, CString strWordSoFar)
{
if ((m_Character == szWord[0]) || (tolower(m_Character) == tolower(szWord[0])) || (szWord[0] == '?'))
{
// We have a match for this character
if (m_Character == '\0' && szWord[0] == m_Character)
{
// We have reached the end of the word, add it to the suggestions
straSuggestions.Add(strWordSoFar);
}
else
{
if (m_pNext)
{
// We have not reached the end, and we have more node, so continue checking
m_pNext->GetPatternMatchingWords(&szWord[1], straSuggestions, strWordSoFar + m_Character);
}
if (m_pAlternative)
{
// We have not reached the end, and we have more node, so continue checking
m_pAlternative->GetPatternMatchingWords(szWord, straSuggestions, strWordSoFar);
}
}
}
else
{
// Does not match - see if one of the alternatives match
if (m_pAlternative)
m_pAlternative->GetPatternMatchingWords(szWord, straSuggestions, strWordSoFar);
}
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : Serialise //
// Purpose : Serialise the node into or out of a file //
// Params : ar - Reference to archive to / from which we transfer data //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CNode::Serialise(CArchive & ar)
{
if (ar.IsLoading())
{
// Loading - get the character
ar >> m_Character;
// Get the bitmask indicating what children we have
BYTE byBitmask;
ar >> byBitmask;
// And load the children according to the bitmask
if (byBitmask & NODE_HAS_NEXT)
{
m_pNext = new CNode;
m_pNext->Serialise(ar);
}
if (byBitmask & NODE_HAS_ALTERN)
{
m_pAlternative = new CNode;
m_pAlternative->Serialise(ar);
}
}
else
{
// Saving - save the character
ar << m_Character;
// Create and save a bitmask indicating which children we have
BYTE byBitmask = 0;
if (m_pNext) byBitmask |= NODE_HAS_NEXT;
if (m_pAlternative) byBitmask |= NODE_HAS_ALTERN;
ar << byBitmask;
// Serialise our children
if (m_pNext) m_pNext->Serialise(ar);
if (m_pAlternative) m_pAlternative->Serialise(ar);
}
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : GetWordCount //
// Purpose : Get the word count in the tree //
// Params : nCount - Reference to the count variable //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CNode::GetWordCount(int & nCount)
{
if (m_Character == '\0') nCount++;
if (m_pNext) m_pNext->GetWordCount(nCount);
if (m_pAlternative) m_pAlternative->GetWordCount(nCount);
}
//////////////////////////////////////////////////////////////////////////////
// //
// CDictionary //
// //
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// //
// Function : CDictionary //
// Purpose : Constructor //
// Params : None //
// Returns : N/A //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
CDictionary::CDictionary()
{
m_pRootNode = NULL;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : ~CDictionary //
// Purpose : Destructor //
// Params : None //
// Returns : N/A //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
CDictionary::~CDictionary()
{
delete m_pRootNode;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : InsertWord //
// Purpose : Insert a word into the dictionary //
// Params : szWord - Word to be inserted //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CDictionary::InsertWord(LPCSTR szWord)
{
ASSERT(szWord);
if (m_pRootNode == NULL)
{
// We have no root node, so start the tree off with the first character of the word
m_pRootNode = new CNode(szWord[0]);
}
// Insert the word into the tree
m_pRootNode->InsertWord(szWord);
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : IsWordListed //
// Purpose : Determines if a word is listed in the dictionary //
// Params : szWord - Word to check //
// bMatchCase - true if case must be matched, else false //
// Returns : Word match enumeration showing if word was found //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
WordMatch CDictionary::IsWordListed(LPCSTR szWord)
{
if (m_pRootNode == NULL) return eMatchNone;
// First off, see if we have an exact case match
WordMatch match;
m_pRootNode->IsWordListed(szWord, true, match, true);
if (match != eMatchPerfect)
{
// Nope, see if we have anything remotely close
m_pRootNode->IsWordListed(szWord, false, match, true);
}
return match;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : GetSuggestions //
// Purpose : Get suggestions for a given word //
// Params : szWord - Word to suggest for //
// straSuggestions - Reference to array for results //
// bCaseSuggestionsOnly - true to return only suggestions that //
// are the same as the word, but with differing case //
// Returns : Number of suggestions found //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
int CDictionary::GetSuggestions(LPCSTR szWord, CStringArray & straSuggestions, bool bCaseSuggestionsOnly)
{
if (m_pRootNode == NULL) return 0;
// Check that the word is not listed - it is silly to suggest for a valid word!
ASSERT(IsWordListed(szWord) != eMatchPerfect);
if (IsWordListed(szWord) != eMatchPerfect)
{
// Get the suggestions that match the word but with difference cases
m_pRootNode->GetPatternMatchingWords(szWord, straSuggestions);
if (!bCaseSuggestionsOnly)
{
int nWordLength = strlen(szWord), nCount;
char * szBuffer = new char[nWordLength + 2];
// Get the substitution suggestions
// For each character, we consider that it may be wrong and see if there is anything else matching
for (nCount = 0; nCount < nWordLength; nCount++)
{
strcpy(szBuffer, szWord);
szBuffer[nCount] = '?';
m_pRootNode->GetPatternMatchingWords(szBuffer, straSuggestions);
}
// Get the deletion suggestions
// For each character position, we assume that there is a character missing
for (nCount = 0; nCount <= nWordLength; nCount++)
{
memset(szBuffer, 0, nWordLength + 2);
strncpy(szBuffer, szWord, nCount);
strcat(szBuffer, "?");
strcat(szBuffer, &szWord[nCount]);
m_pRootNode->GetPatternMatchingWords(szBuffer, straSuggestions);
}
// Get the insertion suggestions
// For each character , assume that the character is extraneous
for (nCount = 0; nCount < nWordLength; nCount++)
{
memset(szBuffer, 0, nWordLength + 2);
strncpy(szBuffer, szWord, nCount);
strcat(szBuffer, &szWord[nCount + 1]);
if (IsWordListed(szBuffer) != eMatchNone)
straSuggestions.Add(szBuffer);
}
// Get the transposition suggestions
// For each pair of characters, do a swap and see if we can exactly match the string
for (nCount = 0; nCount < nWordLength - 1; nCount++)
{
strcpy(szBuffer, szWord);
char cTemp = szBuffer[nCount];
szBuffer[nCount] = szBuffer[nCount + 1];
szBuffer[nCount + 1] = cTemp;
if (IsWordListed(szBuffer) != eMatchNone)
straSuggestions.Add(szBuffer);
}
delete [] szBuffer;
}
}
// Tidy up the suggestions list
SortSuggestions(straSuggestions);
RemoveDuplicates(straSuggestions);
return straSuggestions.GetSize();
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : PatternMatchWord //
// Purpose : Pattern match a given word (? represents any single char) //
// Params : szWord - Pattern to match //
// straSuggestions - Reference to array for results //
// Returns : Number of suggestions found //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
int CDictionary::PatternMatchWord(LPCSTR szWord, CStringArray & straSuggestions)
{
straSuggestions.RemoveAll();
if (m_pRootNode == NULL) return 0;
m_pRootNode->GetPatternMatchingWords(szWord, straSuggestions);
return straSuggestions.GetSize();
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : LoadDictionary //
// Purpose : Load a previously saved dictionary file //
// Params : strFilename - Filename //
// Returns : true if load was successful, else false //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
bool CDictionary::LoadDictionary(const CString & strFilename)
{
bool bLoaded = false;
if (m_pRootNode)
{
// Remove the existing dictionary tree
delete m_pRootNode;
m_pRootNode = NULL;
}
try
{
// Open the file
CFile fileDictionary(strFilename, CFile::modeRead | CFile::shareDenyWrite);
CArchive archive(&fileDictionary, CArchive::load);
// And load the tree
m_pRootNode = new CNode;
m_pRootNode->Serialise(archive);
bLoaded = true;
}
catch (CFileException * e)
{
e->Delete();
}
catch (CArchiveException * e)
{
e->Delete();
}
return bLoaded;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : SaveDictionary //
// Purpose : Save the dictionary to file //
// Params : strFilename - Filename //
// Returns : true if saved successfully, else false //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
bool CDictionary::SaveDictionary(const CString & strFilename)
{
bool bSaved = false;
if (m_pRootNode)
{
try
{
// Open the file
CFile fileDictionary(strFilename, CFile::modeCreate | CFile::modeWrite | CFile::shareDenyWrite);
CArchive archive(&fileDictionary, CArchive::store);
// And save the tree
m_pRootNode->Serialise(archive);
bSaved = true;
}
catch (CFileException * e)
{
e->Delete();
}
catch (CArchiveException * e)
{
e->Delete();
}
}
return bSaved;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : CreateFromList //
// Purpose : Create a dictionary from a text file (one word per line) //
// Params : strFilename - Filename //
// bAppend - true to append to current dictionary, false to //
// start afresh //
// Returns : true if loaded successfully, else false //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
bool CDictionary::CreateFromList(const CString & strFilename, bool bAppend)
{
bool bCreated = false;
if (!bAppend)
{
// We are not appending the list, so delete any content we currently have
delete m_pRootNode;
}
try
{
// Open the file
CStdioFile fileList(strFilename, CFile::modeRead | CFile::shareDenyWrite);
CString strWord;
// And treat each line as a word
while(fileList.ReadString(strWord))
{
InsertWord(strWord);
}
bCreated = true;
}
catch (CFileException * e)
{
e->Delete();
}
return bCreated;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : SortSuggestions //
// Purpose : Sorts the words in the given array //
// Params : straSuggestions - Reference to CStringArray to sort //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CDictionary::SortSuggestions(CStringArray & straSuggestions)
{
bool bChanged=true;
while(bChanged)
{
bChanged=false;
for(int nItem = 1; nItem < straSuggestions.GetSize(); nItem++)
{
if(straSuggestions.GetAt(nItem - 1) > straSuggestions.GetAt(nItem))
{
CString strTemp = straSuggestions.GetAt(nItem - 1);
straSuggestions.SetAt(nItem - 1, straSuggestions.GetAt(nItem));
straSuggestions.SetAt(nItem, strTemp);
bChanged=true;
}
}
}
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : RemoveDuplicates //
// Purpose : Removed duplicates from the given array //
// Params : straSuggestions - Reference to CStringArray to process //
// Returns : None //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
void CDictionary::RemoveDuplicates(CStringArray & straSuggestions)
{
for (int nCount = straSuggestions.GetUpperBound(); nCount > 0; nCount--)
{
if (straSuggestions.GetAt(nCount) == straSuggestions.GetAt(nCount - 1))
straSuggestions.RemoveAt(nCount);
}
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : GetWordCount //
// Purpose : Count how many words are contained in the dictionary //
// Params : None //
// Returns : Word in dictionary //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
int CDictionary::GetWordCount()
{
int nWordCount = 0;
if (m_pRootNode)
m_pRootNode->GetWordCount(nWordCount);
return nWordCount;
}
//////////////////////////////////////////////////////////////////////////////
// //
// Function : RemoveWord //
// Purpose : Marks a word as deleted from the dictionary //
// Params : szWord - Word to remove //
// Returns : true if the word is removed, false otherwise //
// Throws : None //
// //
//////////////////////////////////////////////////////////////////////////////
bool CDictionary::RemoveWord(LPCSTR szWord)
{
bool bRemoved = false;
if (m_pRootNode)
{
// First off, go through the tree to find the word, getting the terminating node
// Note that we only remove the word that is an exact case match for the given word
WordMatch match;
CNode * pFinalNode = NULL;
m_pRootNode->IsWordListed(szWord, true, match, true, &pFinalNode);
if (match == eMatchPerfect && pFinalNode)
{
// We found the final node - change it's character to something other than '\0'
pFinalNode->m_Character = REMOVEDWORD_TERMINATOR;
bRemoved = true;
}
}
return bRemoved;
}