|
// Dictionary.h: interface for the CDictionary class.
//
//////////////////////////////////////////////////////////////////////
#if !defined(AFX_DICTIONARY_H__F17F3FC5_2376_4678_A66E_67E9D5C08438__INCLUDED_)
#define AFX_DICTIONARY_H__F17F3FC5_2376_4678_A66E_67E9D5C08438__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#pragma warning (disable:4786)
#include <map>
#include <list>
#include <stack>
#include <vector>
using namespace std;
//typedefs
typedef multimap<int , CString, greater<int> > MMAP_REVERSE_INT_STR;
#define DICTIONARY_LITE_HEADER_SIZE 100
#define DICTIONARY_LITE_DEFAULT_NO_OF_STR_IN_NODE 2 //5
#define DICTIONARY_LITE_DEFAULT_NO_OF_STR_IN_TRANSITION 2 //6
#define DICTIONARY_LITE_DEFAULT_STR_LEN 10
#define DICTIONARY_LITE_INVALID_OFFSET -1
#define DICTIONARY_LITE_HEADER_INITIAL_CHAR_LEN 2
#define DICTIONARY_LITE_HEADER_PARAM_PART_LENGTH 60
#define DICTIONARY_LITE_HEADER_STAT_PART_LENGTH 38
class CStrList : public list<CString>
{
public:
CStrList() {}
~CStrList() {}
CStrList& operator += (CStrList& oStrList)
{
list<CString>::iterator ite1 = oStrList.begin();
list<CString>::iterator iteEnd1 = oStrList.end();
for( ; ite1 != iteEnd1; ite1++)
push_back(*ite1);
return *this;
}
CString GetConcatenatedString()
{
CString s = "";
list<CString>::iterator ite1 = begin();
list<CString>::iterator iteEnd1 = end();
for( ; ite1 != iteEnd1; ite1++)
s += *ite1;
return s;
}
bool IsEmpty() {return (size() == 0);}
};
class CDictionaryLite
{
private:
// dictionary parameters
int i_StrLen;
int i_NoOfStrInNode;
int i_NoOfStrInTransition;
int i_NoOfNodes;
int i_NodeSize;
int i_TransitionSize;
int i_TransitionMilestoneSize;
CString s_Filename;
CFile m_File;
bool b_KeepFile;
int i_CurrNodeOffset;
//dictionary statistics
int i_DistinctStringCount;
int i_CummulativeStringCount;
//char buffer to hold the header
char a_Header[DICTIONARY_LITE_HEADER_SIZE];
//char buffer to hold current node
char* p_CurrNodeBuff;
//char buffer to hold backup node in memory
char* p_BackupNodeBuff;
//char buffer to hold current transition in memory
char* p_CurrTransitionBuff;
//char buffer to hold current transition milestone in memory
char* p_CurrTransitionMilestone;
//root info
bool b_IsRootFinal;
map<CString, int> map_RootTransitions;
//variables used when retrieving words
stack<map<CString, int> > stk_TransitionMaps;
stack<int> stk_NodeOffsets;
stack<CString> stk_Str;
//Initialize
void Init();
void InitFromFile();
void Finalize();
//methods to read from and write to file
void ReadNode(int iOffset);
void ReadTransition(int iOffset);
void WriteNode(int iOffset);
int AppendNode();
void WriteTransition(int iOffset);
int AppendTransition();
void ReadHeader();
void WriteHeader();
void ReadTransitionMilestoneBlock(int iOffset);
void WriteTransitionMilestoneBlock(int iOffset);
int AppendTransitionMilestone();
//Header manipulation
void ExtractValuesFromHeader();
void WriteValuesToHeader();
//methods regarding the current node (which is in memory)
bool IsCurrNodeFinal();
int GetIDOfCurrNode();
int GetCountOfCurrNode();
int GetNextPartOffsetOfCurrentNode();
int GetDestNodeFromCurrNode(CString str);
void GetTransitionMilestonesFromCurrNode(map<CString, int>& mapTransitions);
void GetAllTransitionsFromCurrentNode(map<CString, int>& mapTransitions);
void SetCurrNodeAsFinal(bool bIsFinal = true);
void SetIDOfCurrNode(int iID);
void SetCountOfCurrNode(int iCount);
void SetDestNodeFromCurrentNode(int iOffset);
void SetTransitionMilestonesOfCurrNode(map<CString, int>& mapTransitions);
void AddTransitionToCurrNode(CString str, int iOffset); //iOffset is the offset of destination node
void AddTransitionToTransitionMilestone(int iTMOffset, CString str, int iOffset);
//methods regarding the current transition (which is in memory)
void GetTransitionsFromCurrTransition(map<CString, int>& mapTransitions);
void WriteTransitionsToCurrTransaction(map<CString, int>& mapTransitions);
//methods regarding transition milestone blocks
void AddStringToTransitionMilestoneBlock(int iOffset, CString str, int iOffsetVal);
void AddStringToTransitionMilestoneBlock(char* pBlock, CString str, int iOffsetVal);
int GetOffsetToNextFromTransitionMilestoneBlock();
void SetOffsetToNextOfTransitionMilestoneBlock(int iOffset);
void GetTransitionsFromCurrTransitionMilestonesBlock(map<CString, int>& mapTransitions);
void SetTransitionsOfCurrTransitionMilestonesBlock(map<CString, int>& mapTransitions);
//other utility functions
void ReadStringIntArrayBlock(map<CString, int>& mapBlock, char* pStart, int iMaxNo);
void WriteStringIntArrayBlock(map<CString, int>& mapBlock, char* pStart, int iMaxNo);
int NewNode();
void LoadRoot();
int GetRootOffset();
//utility functions for word retreival
void FlushStacks();
CStrList GetStringFromStack();
public:
CDictionaryLite(CString sFilename);
CDictionaryLite(CString sFilename, bool bKeepFile, int iStrLen = DICTIONARY_LITE_DEFAULT_STR_LEN, int iNoOfStrInNode = DICTIONARY_LITE_DEFAULT_NO_OF_STR_IN_NODE, int iNoOfStrInTransition = DICTIONARY_LITE_DEFAULT_NO_OF_STR_IN_TRANSITION);
virtual ~CDictionaryLite();
//access methods
int GetStrLen();
int GetNoOfStrInNode();
int GetNoOfStrInTransition();
int GetNodeCount();
int GetDistinctStringCount();
int GetCummulativeStringCount();
void SetStrLen(int iLen);
void SetNoOfStrInNode(int iNo);
void SetNoOfStrInTransition(int iNo);
//utility methods
void AddString(CStrList& oStrList, int iCount = 1);
int GetStringCount(CStrList& oStrList);
bool GetFirstWord(CStrList& oStrList, int& iCount);
bool GetNextWord(CStrList& oStrList, int& iCount);
void WriteToTextFile(CString sFilename);
void WriteToTextFileDescending(CString sFilename);
//debugging
void PrintNodes(CString sFilename);
void AppendTreeToFile(CString sFilename, int iOffset);
};
#endif // !defined(AFX_DICTIONARY_H__F17F3FC5_2376_4678_A66E_67E9D5C08438__INCLUDED_)
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
I am a Sri Lankan Software Engineer with 3 years experience in software development. I have a C background and have experience in MFC. Iam very much interested in AI especially Natural Language Processing. I have had done some research in Sinhala which is my native language.
In my spare time I use to read novels and politics.