/* This file is a part of SmartLexicon, a multi-lingual dictionary engine.
Copyright (C) 2005, Kostas Giannakakis
SmartLexicon is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SmartLexicon is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SmartLexicon; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "StdAfx.h"
#include "LexIndexFileDing.h"
#include "Utilities.h"
using namespace std;
#define CONSTRUCT_CODE(line, token) ((token) & 0xFF) | (((line) << 8) & 0xFFFFFF00)
#define GET_LINE_FROM_CODE(code) (((code) >> 8) & 0xFFFFFF)
#define GET_TOKEN_FROM_CODE(code) ((code) & 0xFF)
// Thread Global Function
static UINT LoadingThreadFn_(LPVOID pParam)
{
return( ((CLexIndexFileDing *) pParam)->LoadingThreadFn());
}
CLexIndexFileDing::CLexIndexFileDing(void)
{
CLexIndexFileBase::CLexIndexFileBase();
threadRunning = FALSE;
paramType[0] = PARAM_BOOL;
paramName[0] = _T("Reverse");
reverse = FALSE;
}
int CLexIndexFileDing::LoadNew(CLexSourceFileBase *aSourceFile)
{
isReady = FALSE;
sourceFile = (CLexSourceFileDing *) aSourceFile;
if (sourceFile == NULL || !sourceFile->IsReady())
{
return(LOADING_FAILED);
}
// Get source file's information
CString sourceName;
sourceFile->GetName(sourceName);
sourceFileSize = sourceFile->GetSize();
lineCount = sourceFile->GetLineCount();
// Construct corresponding index file
CString str;
GetParam(0, str);
if (str != _T("TRUE"))
{
name = sourceName + _T(".ind");
reverse = FALSE;
}
else
{
name = sourceName + _T("2.ind");
reverse = TRUE;
}
sourceFile->SetReverse(reverse);
stage = 0;
//LoadingThreadFn_((LPVOID) this);
threadRunning = TRUE;
pLoadingThread = AfxBeginThread(LoadingThreadFn_,
(LPVOID) this);
return(LOADING_PENDING);
}
int CLexIndexFileDing::Load(CLexSourceFileBase *aSourceFile,
CString aName)
{
isReady = FALSE;
sourceFile = (CLexSourceFileDing *) aSourceFile;
if (sourceFile == NULL || !sourceFile->IsReady())
{
return(LOADING_FAILED);
}
// Get source file's information
CString sourceName;
sourceFile->GetName(sourceName);
sourceFileSize = sourceFile->GetSize();
lineCount = sourceFile->GetLineCount();
// Check if the index file already exists
CFileException err;
BOOL fileExists;
name = aName;
fileExists = indexFile.Open(name,
CFile::modeRead | CFile::shareDenyWrite,
&err);
if (!fileExists)
return(LOADING_FAILED);
// Check header information
const BUFFER_SIZE = sizeof(DingIndexFileSettings);
BYTE buffer[BUFFER_SIZE];
int count;
count = indexFile.Read(buffer, BUFFER_SIZE);
if (count == BUFFER_SIZE)
{
int s, n;
DingIndexFileSettings *readSettings;
CString resToken, nameNoPath;
int curPos = 0;
UtilityFunctions::RemovePath(sourceName, nameNoPath);
readSettings = (DingIndexFileSettings *) buffer;
for(int i = 0; i < HEADER_FILE_NAME_SIZE; i++)
{
if (i < nameNoPath.GetLength() && buffer[i] != (BYTE) nameNoPath.GetAt(i))
break;
}
s = readSettings->sourceSize;
n = readSettings->sourceLineCount;
if (!(s == sourceFileSize && n == lineCount && i == HEADER_FILE_NAME_SIZE))
{
// Index file has not the write format
indexFile.Close();
return(LOADING_FAILED);
}
else
{
isOpen = TRUE;
memcpy(&settings, readSettings, sizeof(DingIndexFileSettings));
reverse = (settings.reverse != 0);
sourceFile->SetReverse(reverse);
}
}
else
{
// Index file has not the write format
indexFile.Close();
return(LOADING_FAILED);
}
// Finally load the index file
if (LoadFromExisting())
{
return(LOADING_SUCCESSFUL);
}
else
{
isOpen = FALSE;
return(LOADING_FAILED);
}
}
BOOL CLexIndexFileDing::LoadFromExisting()
{
int size = ((int) indexFile.GetLength()) -
sizeof(DingIndexFileSettings);
if (size < 4 || (size%4) != 0)
return FALSE;
entriesCount = size/4;
TRACE1("Keywords = %d\n", size/4);
isReady = TRUE;
return(TRUE);
}
int CLexIndexFileDing::GetEntryAt(int index, CString& entry)
{
const int BUFFER_SIZE = 100;
BYTE buffer[BUFFER_SIZE];
CString line, firstHalf;
unsigned int lineNumber, tokenNumber;
int count, pos = sizeof(DingIndexFileSettings) + index*sizeof(int);
indexFile.Seek(pos, CFile::begin);
count = indexFile.Read(buffer, BUFFER_SIZE);
unsigned int code = *((unsigned int *) &buffer[0]);
lineNumber =GET_LINE_FROM_CODE(code);
tokenNumber =GET_TOKEN_FROM_CODE(code);
sourceFile->ReadLine(line, lineNumber);
if (reverse == TRUE)
{
// Get second part of the line
int start = line.Find(_T(" :: "));
if ( start > 0)
line = line.Mid(start + 4);
}
int curPos = 0;
firstHalf = line.Tokenize(_T(":|;"), curPos);
curPos = 0;
for(unsigned int i=0; i<=tokenNumber; i++)
{
entry = EntryTokenize(firstHalf, curPos);
if (entry ==_T(""))
{
return -1;
}
}
return(lineNumber);
}
CString CLexIndexFileDing::EntryTokenize(const CString& firstHalf, int& curPos)
{
BOOL insideParentheses;
CString token = _T("");
insideParentheses = FALSE;
if (firstHalf.GetLength() > 0)
{
token = firstHalf.Tokenize(_T(" "), curPos);
while(token != _T(""))
{
if (token.GetAt(0) == '(')
{
insideParentheses = TRUE;
}
if (IsValidEntry(token) && !insideParentheses)
return token;
if (token.Find(')') >= 0)
{
insideParentheses = FALSE;
}
token = firstHalf.Tokenize(_T(" "), curPos);
}
}
return token;
}
UINT CLexIndexFileDing::LoadingThreadFn()
{
CFileException err;
CString line, entry, firstHalf;
int tokenNumber;
isReady = FALSE;
currentLine = 0;
stop = FALSE;
// Read the source file line by line
sourceFile->SeekToBegin();
while( sourceFile->ReadLine(line) && !stop && currentLine < lineCount)
{
if (line.GetAt(0) != _T('#'))
{
int curPos = 0;
if (reverse == TRUE)
{
// Get second part of the line
int start = line.Find(_T(" :: "));
if ( start > 0)
{
line = line.Mid(start + 4);
}
}
firstHalf = line.Tokenize(_T(":|;"), curPos);
if (firstHalf.GetLength() > 0)
{
GetLang()->ToLower(firstHalf);
GetLang()->MakeSimple(firstHalf);
curPos = 0;
tokenNumber = 0;
entry = EntryTokenize(firstHalf, curPos);
while(entry != _T(""))
{
int code = CONSTRUCT_CODE(currentLine,tokenNumber);
entriesTable.insert(pair<CString, int>(entry, code));
tokenNumber++;
entry = EntryTokenize(firstHalf, curPos);
}
}
}
currentLine++;
}
if (entriesTable.size() < (size_t) ((lineCount*80)/100))
stop = TRUE;
sourceFile->SeekToBegin();
if (!stop &&
indexFile.Open(name,
CFile::modeCreate | CFile::modeWrite,
&err))
{
// Write the index file and create the file line start vectors
int indexFileSize = 0;
const int TEMP_BUFFER_SIZE = 1024;
BYTE buffer[TEMP_BUFFER_SIZE];
// Write header information
CString nameNoPath, sourceFileName, resToken;
sourceFile->GetName(sourceFileName);
UtilityFunctions::RemovePath(sourceFileName, nameNoPath);
for(int i = 0; i<HEADER_FILE_NAME_SIZE; i++)
{
if (i>=nameNoPath.GetLength())
settings.name[i] = 0;
else
settings.name[i] = (BYTE) nameNoPath.GetAt(i);
}
settings.sourceSize = sourceFile->GetSize();
settings.sourceLineCount = sourceFile->GetLineCount();
settings.reverse = reverse;
indexFile.Write((BYTE *) &settings, sizeof(DingIndexFileSettings));
indexFileSize += sizeof(DingIndexFileSettings);
// Start second stage, update lineCount and currentLine data members,
// so that the GetProgress function will work properly.
stage++;
lineCount = (int) entriesTable.size();
entriesCount = lineCount;
currentLine = 0;
multimap<CString, int>::iterator it;
int bufSize = 0;
for(it = entriesTable.begin(); it != entriesTable.end() && !stop; it++)
{
*((int *) (&buffer[bufSize])) = it->second;
bufSize+=sizeof(int);
indexFileSize += sizeof(int);
if (bufSize > (TEMP_BUFFER_SIZE - sizeof(int)))
{
indexFile.Write(buffer, bufSize);
bufSize = 0;
}
currentLine++;
}
if (bufSize > 0)
{
indexFile.Write(buffer, bufSize);
indexFileSize += bufSize;
}
indexFile.Close();
}
else
{
stop = 1;
}
if (stop)
{
stage = 1;
lineCount = currentLine = 1;
threadRunning = FALSE;
return(-1);
}
// Open index file for reading
isOpen = indexFile.Open(name,
CFile::modeRead,
&err);
isReady = isOpen;
threadRunning = FALSE;
return(0);
}