Click here to Skip to main content
15,886,199 members
Articles / Desktop Programming / MFC

SmartLexicon

Rate me:
Please Sign up or sign in to vote.
4.93/5 (14 votes)
30 Aug 2006GPL310 min read 85.9K   3.3K   51  
A multilingual dictionary engine with regular expressions support and Web browser integration.
/*  This file is a part of SmartLexicon, a multi-lingual dictionary engine.

    Copyright (C) 2005, Kostas Giannakakis

    SmartLexicon is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    SmartLexicon is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SmartLexicon; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "StdAfx.h"
#include "LexIndexFileDing.h"
#include "Utilities.h"

using namespace std;

#define CONSTRUCT_CODE(line, token) ((token) & 0xFF) | (((line) << 8) & 0xFFFFFF00)
#define GET_LINE_FROM_CODE(code) (((code) >> 8) & 0xFFFFFF)
#define GET_TOKEN_FROM_CODE(code) ((code) & 0xFF)

// Thread Global Function
static UINT LoadingThreadFn_(LPVOID pParam)
{
	return( ((CLexIndexFileDing *) pParam)->LoadingThreadFn());
}

CLexIndexFileDing::CLexIndexFileDing(void)
{
	CLexIndexFileBase::CLexIndexFileBase();
	threadRunning = FALSE;
	paramType[0] = PARAM_BOOL;
	paramName[0] = _T("Reverse");
	reverse = FALSE;
}

int CLexIndexFileDing::LoadNew(CLexSourceFileBase *aSourceFile)
{
	isReady  = FALSE;
	sourceFile = (CLexSourceFileDing *) aSourceFile;
	if (sourceFile == NULL || !sourceFile->IsReady())
	{
		return(LOADING_FAILED);
	}

	// Get source file's information
	CString sourceName;

	sourceFile->GetName(sourceName);
	sourceFileSize = sourceFile->GetSize();
	lineCount = sourceFile->GetLineCount();

	// Construct corresponding index file
	CString str;
	GetParam(0, str);

	if (str != _T("TRUE"))
	{
		name = sourceName + _T(".ind");
		reverse = FALSE;
	}
	else
	{
		name = sourceName + _T("2.ind");
		
		reverse = TRUE;
	}
	sourceFile->SetReverse(reverse);
	
	stage = 0;
	//LoadingThreadFn_((LPVOID) this);
	threadRunning = TRUE;
	pLoadingThread = AfxBeginThread(LoadingThreadFn_,
                                    (LPVOID) this);
	return(LOADING_PENDING);
}

int CLexIndexFileDing::Load(CLexSourceFileBase *aSourceFile,
	   					    CString aName)
{
	isReady  = FALSE;
	sourceFile = (CLexSourceFileDing *) aSourceFile;
	if (sourceFile == NULL || !sourceFile->IsReady())
	{
		return(LOADING_FAILED);
	}

	// Get source file's information
	CString sourceName;

	sourceFile->GetName(sourceName);
	sourceFileSize = sourceFile->GetSize();
	lineCount = sourceFile->GetLineCount();

	// Check if the index file already exists
	CFileException err;
	BOOL fileExists;

	name = aName;
	fileExists = indexFile.Open(name,
								CFile::modeRead | CFile::shareDenyWrite,
								&err);
	if (!fileExists)
		return(LOADING_FAILED);
	
	// Check header information
	const BUFFER_SIZE = sizeof(DingIndexFileSettings);
	BYTE buffer[BUFFER_SIZE];
	int count;

	count = indexFile.Read(buffer, BUFFER_SIZE);

	if (count == BUFFER_SIZE)
	{
		int s, n;	
		DingIndexFileSettings *readSettings;

		CString resToken, nameNoPath;
		int curPos = 0;

		UtilityFunctions::RemovePath(sourceName, nameNoPath);
		readSettings = (DingIndexFileSettings *) buffer;
		for(int i = 0; i < HEADER_FILE_NAME_SIZE; i++)
		{
			if (i < nameNoPath.GetLength() && buffer[i] != (BYTE) nameNoPath.GetAt(i))
				break;
		}
		s = readSettings->sourceSize;
		n = readSettings->sourceLineCount;

		if (!(s == sourceFileSize && n == lineCount && i == HEADER_FILE_NAME_SIZE))
		{
			// Index file has not the write format
			indexFile.Close();
			return(LOADING_FAILED);
		}
		else
		{
			isOpen = TRUE;
			memcpy(&settings, readSettings, sizeof(DingIndexFileSettings));
			reverse = (settings.reverse != 0);
			sourceFile->SetReverse(reverse);
		}
	}
	else
	{
		// Index file has not the write format
		indexFile.Close();
		return(LOADING_FAILED);
	}

	// Finally load the index file
	if (LoadFromExisting())
	{
		return(LOADING_SUCCESSFUL);
	}
	else
	{
		isOpen = FALSE;
		return(LOADING_FAILED);
	}
}

BOOL CLexIndexFileDing::LoadFromExisting()
{
	int size = ((int) indexFile.GetLength()) - 
		        sizeof(DingIndexFileSettings);

	if (size < 4 || (size%4) != 0)
		return FALSE;

	entriesCount = size/4;
	TRACE1("Keywords = %d\n", size/4);
	isReady = TRUE;

	return(TRUE);
}

int CLexIndexFileDing::GetEntryAt(int index, CString& entry)
{
	const int BUFFER_SIZE = 100;
	BYTE buffer[BUFFER_SIZE];
	CString line, firstHalf;
	unsigned int lineNumber, tokenNumber;

	int count, pos = sizeof(DingIndexFileSettings) + index*sizeof(int);
	indexFile.Seek(pos, CFile::begin);
	count = indexFile.Read(buffer, BUFFER_SIZE);

	unsigned int code = *((unsigned int *) &buffer[0]);
	lineNumber =GET_LINE_FROM_CODE(code);
	tokenNumber =GET_TOKEN_FROM_CODE(code);
	sourceFile->ReadLine(line, lineNumber);

	if (reverse == TRUE)
	{
		// Get second part of the line
		int start = line.Find(_T(" :: "));
		
		if ( start > 0)
			line = line.Mid(start + 4);
	}

	int curPos = 0;
	firstHalf = line.Tokenize(_T(":|;"), curPos);
	curPos = 0;
	for(unsigned int i=0; i<=tokenNumber; i++)
	{
		entry = EntryTokenize(firstHalf, curPos);
		if (entry ==_T(""))
		{
			return -1;
		}
	}

	return(lineNumber);
}

CString CLexIndexFileDing::EntryTokenize(const CString& firstHalf, int& curPos)
{
	BOOL insideParentheses;
	CString token = _T("");
	
	insideParentheses = FALSE;
	if (firstHalf.GetLength() > 0)
	{
		token = firstHalf.Tokenize(_T(" "), curPos);
		while(token != _T(""))
		{
			if (token.GetAt(0) == '(')
			{
				insideParentheses = TRUE;
			}

			if (IsValidEntry(token) && !insideParentheses)
				return token;

			if (token.Find(')') >= 0)
			{
				insideParentheses = FALSE;
			}

			token = firstHalf.Tokenize(_T(" "), curPos);
		}
	}
	return token;
}

UINT CLexIndexFileDing::LoadingThreadFn()
{
	CFileException err;
	CString line, entry, firstHalf;
	int tokenNumber;
	
	isReady = FALSE;
	currentLine = 0;
	stop = FALSE;

	// Read the source file line by line
	sourceFile->SeekToBegin();
	while( sourceFile->ReadLine(line) && !stop && currentLine < lineCount)
	{
		if (line.GetAt(0) != _T('#'))
		{
			int curPos = 0;

			if (reverse == TRUE)
			{
				// Get second part of the line
				int start = line.Find(_T(" :: "));
				
				if ( start > 0)
				{
					line = line.Mid(start + 4);
				}
			}

			firstHalf = line.Tokenize(_T(":|;"), curPos);
			if (firstHalf.GetLength() > 0)
			{
				GetLang()->ToLower(firstHalf);
				GetLang()->MakeSimple(firstHalf);

				curPos = 0;
				tokenNumber = 0;
				entry = EntryTokenize(firstHalf, curPos);
				while(entry != _T(""))
				{
					int code = CONSTRUCT_CODE(currentLine,tokenNumber);
					entriesTable.insert(pair<CString, int>(entry, code));
					tokenNumber++;
					entry = EntryTokenize(firstHalf, curPos);
				}
			}
		}
		currentLine++;
	}
	if (entriesTable.size() < (size_t) ((lineCount*80)/100))
		stop = TRUE;

	sourceFile->SeekToBegin();
	if (!stop &&
		indexFile.Open(name, 
					   CFile::modeCreate | CFile::modeWrite,
					   &err))
	{
		// Write the index file and create the file line start vectors
		int indexFileSize = 0;
		const int TEMP_BUFFER_SIZE = 1024;
		BYTE buffer[TEMP_BUFFER_SIZE];

		// Write header information
		CString nameNoPath, sourceFileName, resToken;
		sourceFile->GetName(sourceFileName);
		UtilityFunctions::RemovePath(sourceFileName, nameNoPath);

		for(int i = 0; i<HEADER_FILE_NAME_SIZE; i++)
		{
			if (i>=nameNoPath.GetLength())
				settings.name[i] = 0;
			else
				settings.name[i] = (BYTE) nameNoPath.GetAt(i);
		}
		settings.sourceSize = sourceFile->GetSize();
		settings.sourceLineCount = sourceFile->GetLineCount();
		settings.reverse = reverse;

		indexFile.Write((BYTE *) &settings, sizeof(DingIndexFileSettings));
		indexFileSize += sizeof(DingIndexFileSettings);

		// Start second stage, update lineCount and currentLine data members,
		// so that the GetProgress function will work properly.
		stage++;
		lineCount = (int) entriesTable.size();
		entriesCount = lineCount;
		currentLine = 0;

		multimap<CString, int>::iterator it;
		int bufSize = 0;
		for(it = entriesTable.begin(); it != entriesTable.end() && !stop; it++)
		{		
			*((int *) (&buffer[bufSize])) = it->second;
			bufSize+=sizeof(int);
			indexFileSize += sizeof(int);
			if (bufSize > (TEMP_BUFFER_SIZE - sizeof(int)))
			{
				indexFile.Write(buffer, bufSize);
				bufSize = 0;
			}
			currentLine++;
		}
		if (bufSize > 0)
		{
			indexFile.Write(buffer, bufSize);
			indexFileSize += bufSize;
		}
		indexFile.Close();
	}
	else
	{
		stop = 1;
	}
	
	if (stop)
	{
		stage = 1;
		lineCount = currentLine = 1;
		threadRunning = FALSE;
		return(-1);
	}

	// Open index file for reading
	isOpen = indexFile.Open(name,
							CFile::modeRead,
							&err);
	isReady = isOpen;

	threadRunning = FALSE;
	return(0);

}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)


Written By
Software Developer (Senior) Self employed
Greece Greece
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions