/*********************************************************************
Copyright (C) 2001 by
Alexander Berthold, alexander-berthold@web.de.
Hoegestr. 54
79108 Freiburg i. Breisgau
Germany
-- This file is part of cxTokenizer --
"cxTokenizer" is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or any later version.
"cxTokenizer" is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with "cxTokenizer"; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
---------------------------------------------------------------
If you find any bugs or if you make other corrections/
enhancements, i'd appreciate if you'd let me know about
that. My email is
alexander-berthold@web.de
If you share this code, do not remove this text.
---------------------------------------------------------------
*********************************************************************/
// cxTokenizer.cpp: implementation of the cxTokenizer class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "cxTokenizerTokenRule.h"
#include "cxTokenizerCharTokenRule.h"
#include "cxTokenizerContextCookie.h"
#include "cxTokenizerContext.h"
#include "cxTokenizerMapData.h"
#include "cxTokenizerMap.h"
#include "cxTokenizerInputStream.h"
#include "cxTokenizer.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
#include "cxTokenizerDiags.cpp"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
cxTokenizer::cxTokenizer(cxTokenizerMap *ptmLexxerMap,
cxTokenizerListener *ptlReceiver)
{
// ASSERT if any assumption fails
ASSERT(ptmLexxerMap!=NULL);
ASSERT(ptlReceiver!=NULL);
ASSERT(ptmLexxerMap->fCheckValid());
ASSERT(ptlReceiver->fCheckValid());
// The tree with the 'path' to the tokens
m_ptmLexxerMap =ptmLexxerMap;
// The listener for the output token stream
m_ptlReceiver =ptlReceiver;
m_tcContextInfo.vClearAllFlags();
// Set owner of the context
m_tcContextInfo.vSetTokenizer(this);
// Clear the substitutor listener list
memset(m_aptslSubstitutors,0,sizeof(m_aptslSubstitutors));
// Initialize the map
ptmLexxerMap->vInitNotify(true,this);
}
cxTokenizer::cxTokenizer(cxTokenizerInputStream *ptisData,
cxTokenizerMap *ptmLexxerMap,
cxTokenizerListener *ptlReceiver)
{
// ASSERT if any assumption fails
ASSERT(ptisData!=NULL);
ASSERT(ptmLexxerMap!=NULL);
ASSERT(ptlReceiver!=NULL);
ASSERT(ptisData->fCheckValid());
ASSERT(ptmLexxerMap->fCheckValid());
ASSERT(ptlReceiver->fCheckValid());
// The input stream ...
m_stkPtisData.push(ptisData);
// The tree with the 'path' to the tokens
m_ptmLexxerMap =ptmLexxerMap;
// The listener for the output token stream
m_ptlReceiver =ptlReceiver;
m_tcContextInfo.vClearAllFlags();
// Clear the substitutor listener list
memset(m_aptslSubstitutors,0,sizeof(m_aptslSubstitutors));
// Initialize the map
ptmLexxerMap->vInitNotify(true,this);
}
cxTokenizer::~cxTokenizer()
{
// Shutdown the map
m_ptmLexxerMap->vInitNotify(false,this);
// Delete the input streams if desired
vDetachInputStream(NULL);
vCleanupInputStreams();
// Delete the token listener if it wants to
if(m_ptlReceiver)
{
// Check validity
ASSERT(m_ptlReceiver->fCheckValid());
// Delete?
if(m_ptlReceiver->fShouldDelete())
{
// Yes.
delete m_ptlReceiver;
m_ptlReceiver =NULL;
}
}
}
void cxTokenizer::vReset()
{
// Delete the input streams if desired
vDetachInputStream(NULL);
vCleanupInputStreams();
// Shutdown the map
m_ptmLexxerMap->vInitNotify(false,this);
// Reset the context
m_tcContextInfo.vReset();
// Reinit the map
m_ptmLexxerMap->vInitNotify(true,this);
}
//////////////////////////////////////////////////////////////////////
// Protected internal token recognition operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cxTokenizer::fRecognizeNonDelimTokens
PURPOSE: Internally called when a non-delimeting token
has been recognized. Analyzes the part between
the next delimeting token for non-delimeting tokens,
and calls vRegisterToken() if anything found.
RETURNS: returns 'false' if the token could not
be resolved.
*********************************************************************/
bool cxTokenizer::fRecognizeNonDelimTokens(std::tstring strTokenText)
{
// Is anything to check?
if(strTokenText.length()==0)
return true;
// TODO: Extend this function to correctly handle the flag
// ooptrf_intermediate (currently, only one token is recognized)
cxTokenizerMapData *ptmdCurRule = &m_ptmLexxerMap->m_tmdFPRules;
cxTokenizerMapData *ptmdNextRule = NULL;
int i,len;
TCHAR tcChar;
bool fComplete = false;
len =strTokenText.length();
for(i=0;i<len;i++)
{
tcChar =strTokenText[i];
m_tcContextInfo.strGetCurrentText()+=tcChar;
ptmdNextRule =ptmdCurRule->ptmdDoesApply(&m_tcContextInfo,tcChar,&fComplete,0,i);
if(ptmdNextRule==NULL)
break;
ptmdNextRule->vApplied(&m_tcContextInfo,tcChar,fComplete,0,i);
ptmdCurRule =ptmdNextRule;
}
if(ptmdNextRule!=NULL && fComplete)
{
std::tstring strTemp;
if(ptmdNextRule->pttrGetRule()->fGetResultString(&m_tcContextInfo,strTemp))
strTokenText=strTemp;
vRegisterToken(strTokenText,ptmdNextRule->pttrGetRule(),m_stkPtisData.top());
}
else
return false;
return true;
}
/*********************************************************************
FUNCTION: cxTokenizer::vDelimTokenRecognized
PURPOSE: Internally called when a delimeting token
has been recognized. Calls vRecognizeNonDelimTokens()
to analyze the part between the last delimeting token
and this one. Afterwards registers the token using
vRegisterToken().
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vDelimTokenRecognized()
{
// Token has been recognized
cxTokenizerContext::tc_list_type
*plstRuleList;
cxTokenizerContext::cxListEntry
*pteCur;
cxTokenizerMapData *ptmdRule;
cxTokenizerTokenRule *pttrDelimTokenRule;
cxTokenizerContext::rulelist_iterator
it,cit;
std::tstring strDelimToken;
std::tstring strBetweenToken;
std::tstring strPutBack;
// There must be exactly one entry in the list of tokens
plstRuleList =m_tcContextInfo.plstGetTokenRuleList();
ASSERT(plstRuleList!=NULL);
ASSERT(plstRuleList->size()<=1);
if(plstRuleList->empty())
{
if(!fRecognizeNonDelimTokens(m_tcContextInfo.strGetCurrentText()))
{
std::tstring strSubstitute;
if( !fSubstituteUnknownToken(
m_tcContextInfo.strGetCurrentText(),
std::tstring(),
ptiGetInputStream(),
strSubstitute))
vRegisterToken(
m_tcContextInfo.strGetCurrentText(),
NULL,ptiGetInputStream());
else
VERIFY(ptiGetInputStream()->fPutBack(strSubstitute.c_str()));
}
// Clean up the parser context
m_tcContextInfo.vCleanUpAfterTokenRecognition();
return;
}
if(plstRuleList->size()!=1)
throw cxTokenizerException(ERR_UNEXPECTED_TOKEN);
// This one token is the delimeting token
pteCur =*plstRuleList->begin();
ptmdRule =pteCur->ptmdGetLastValidRule();
if(ptmdRule->fIsLeaf()==false)
throw cxTokenizerException(ERR_COMPILER_UNSPECIFIED);
pttrDelimTokenRule =ptmdRule->pttrGetRule();
strDelimToken =m_tcContextInfo.strGetCurrentText().substr(
pteCur->nGetStartPosition(),
pteCur->nGetEndPosition()-pteCur->nGetStartPosition());
strBetweenToken =m_tcContextInfo.strGetCurrentText().substr(
0,
pteCur->nGetStartPosition());
strPutBack =m_tcContextInfo.strGetCurrentText().substr(
pteCur->nGetEndPosition());
std::tstring strTemp;
if(pttrDelimTokenRule->fGetResultString(&m_tcContextInfo,strTemp))
strDelimToken =strTemp;
if(pttrDelimTokenRule->fWantPutBack(&m_tcContextInfo))
{
std::tstring strPutBackFromRule;
pttrDelimTokenRule->vGetPutBackString(&m_tcContextInfo,strPutBackFromRule);
m_stkPtisData.top()->fPutBack(strPutBackFromRule.data());
}
// Now parse the text before the delimeting token
// (can consist of multiple tokens)
m_tcContextInfo.vCleanUpAfterTokenRecognition();
// If the tokens could not be evaluated, maybe there
// is a substition.
if(!fRecognizeNonDelimTokens(strBetweenToken))
{
m_stkPtisData.top()->fPutBack(strPutBack.c_str());
std::tstring strSubstitute;
bool fDone = false;
if( !fSubstituteUnknownToken(
strBetweenToken,
strDelimToken,
ptiGetInputStream(),
strSubstitute))
vRegisterToken(
strBetweenToken,
NULL,ptiGetInputStream()),
fDone = true;
if(!fDone)
{
m_stkPtisData.top()->fPutBack(strSubstitute.c_str());
vMarkAllForDeletionExcept(NULL);
m_tcContextInfo.nDeleteMarkedListEntries();
m_tcContextInfo.strGetCurrentText()="";
return;
}
}
else
m_stkPtisData.top()->fPutBack(strPutBack.c_str());
// Register the delimeting token
vRegisterToken(strDelimToken,pttrDelimTokenRule,m_stkPtisData.top());
// Clean up the parser context
m_tcContextInfo.vCleanUpAfterTokenRecognition();
}
//////////////////////////////////////////////////////////////////////
// Protected post token recognition operations
//////////////////////////////////////////////////////////////////////
void cxTokenizer::vRegisterToken(const std::tstring& strTokenText,
cxTokenizerTokenRule* pttrRule,
const cxTokenizerInputStream* ptisStream)
{
ASSERT(m_ptlReceiver->fCheckValid());
m_ptlReceiver->vRegisterToken( strTokenText, pttrRule,
ptisStream);
}
//////////////////////////////////////////////////////////////////////
// Protected helper operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cxTokenizer::fSubstituteUnknownToken
PURPOSE: Asks the subsitutor listeners if a substitution
for the current token is available
RETURNS: true if so
*********************************************************************/
bool cxTokenizer::fSubstituteUnknownToken(
const std::tstring& strToken,
const std::tstring& strDelim,
cxTokenizerInputStream* ptisStream,
std::tstring& strSubstitute)
{
int i;
for(i=0;m_aptslSubstitutors[i]!=NULL && i<MAX_SUBSTITUTORS;i++)
{
cxTokenizerSubstitutionListener* cur = m_aptslSubstitutors[i];
if(cur->fDoSubstituteUnknownToken(&m_tcContextInfo,strToken,strDelim,ptisStream,strSubstitute))
return true;
}
return false;
}
/*********************************************************************
FUNCTION: cxTokenizer::pleFindEarliestCompletedToken
PURPOSE: Searches for the rule(token) with the status
'completed' and which started earliest
(minimal nStartPosition).
RETURNS: cxTokenizerContext::cooListEntry
*********************************************************************/
cxTokenizerContext::cxListEntry
*cxTokenizer::pteFindEarliestCompletedToken()
{
cxTokenizerContext::tc_list_type
*plstRuleList;
cxTokenizerContext::rulelist_iterator
cit;
plstRuleList =m_tcContextInfo.plstGetTokenRuleList();
// Search for the rule which started earliest
int nMRPos = 32767;
int nMRLen = 0;
cxTokenizerContext::cxListEntry
*pteMREntry = NULL;
for(cit=plstRuleList->begin();cit!=plstRuleList->end();cit++)
{
cxTokenizerContext::cxListEntry
*pteCur = *cit;
if(pteCur->fIsMarkedForDeletion())
continue;
if(pteCur->fIsCompleted())
{
if(pteCur->nGetStartPosition()<nMRPos)
{
nMRPos =pteCur->nGetStartPosition();
nMRLen =pteCur->nGetEndPosition()-nMRPos;
pteMREntry =pteCur;
}
else if(pteCur->nGetStartPosition()==nMRPos)
{
int len = pteCur->nGetEndPosition()-nMRPos;
if(len>nMRLen)
nMRLen = len, pteMREntry = pteCur;
}
}
}
return pteMREntry;
}
/*********************************************************************
FUNCTION: cxTokenizer::pleFindEarliestAppliedToken
PURPOSE: Searches for the rule(token) with the status
'applied' and which started earliest
(minimal nStartPosition).
RETURNS: cxTokenizerContext::cooListEntry
*********************************************************************/
cxTokenizerContext::cxListEntry
*cxTokenizer::pteFindEarliestAppliedToken()
{
cxTokenizerContext::tc_list_type
*plstRuleList;
cxTokenizerContext::rulelist_iterator
cit;
plstRuleList =m_tcContextInfo.plstGetTokenRuleList();
// Search for the rule which started earliest
int nMRPos = 32767;
cxTokenizerContext::cxListEntry
*pteMREntry = NULL;
for(cit=plstRuleList->begin();cit!=plstRuleList->end();cit++)
{
cxTokenizerContext::cxListEntry
*pteCur = *cit;
if(pteCur->fIsMarkedForDeletion())
continue;
if(pteCur->fIsNotApplying())
{
if(pteCur->nGetStartPosition()<nMRPos)
{
nMRPos =pteCur->nGetStartPosition();
pteMREntry =pteCur;
}
}
}
return pteMREntry;
}
/*********************************************************************
FUNCTION: cxTokenizer::vMarkAllForDeletionExcept
PURPOSE: Marks all currently used rules for deletion
except the one given as an argument.
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vMarkAllForDeletionExcept(const cxTokenizerContext::cxListEntry* pEntry)
{
cxTokenizerContext::tc_list_type
*plstRuleList;
cxTokenizerContext::rulelist_iterator
cit;
plstRuleList =m_tcContextInfo.plstGetTokenRuleList();
for(cit=plstRuleList->begin();cit!=plstRuleList->end();cit++)
{
cxTokenizerContext::cxListEntry
*pteCur = *cit;
if(pteCur!=pEntry)
pteCur->vMarkForDeletion();
}
}
/*********************************************************************
FUNCTION: cxTokenizer::fExclusiveModeEntered
PURPOSE: Handles if exclusive mode has been entered
RETURNS: 'true' if ready for going on,
'false' for next pass
*********************************************************************/
bool cxTokenizer::fExclusiveModeEntered(bool fHybrid, cxTokenizerMapData *ptmdNode, TCHAR tcChar, std::tstring& strCurrentText)
{
int len, destlen = 1;
len =strCurrentText.length();
// If we entered exclusive mode by a hybrid rule,
// i.e. a rule composed of a 'token'-part and a computed
// rule as the node of the token part (C++ comments "/*dklsd*/"
// are composed like this)
if(fHybrid)
destlen = ptmdNode->nGetHybridLength();
// Is this the first character?
// If yes, we can proceed. If not, we have to complete
// any open token
if(len!=destlen)
{
// Reset exclusive flag temporarily
// (will be set again next time)
m_tcContextInfo.vSetFlag(tctx_exclusive,false);
TCHAR tcPB;
tcPB =strCurrentText[len-2];
strCurrentText =strCurrentText.substr(0,strCurrentText.length()-2);
m_stkPtisData.top()->fPutBack(tcChar);
m_stkPtisData.top()->fPutBack(tcPB);
vParseCharacter(true);
return false;
}
return true;
}
//////////////////////////////////////////////////////////////////////
// Operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cxTokenizer::vPushInputStream
PURPOSE: Pushes the given input stream onto the input stream stack.
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vPushInputStream(cxTokenizerInputStream *pxtisInput)
{
if(pxtisInput!=NULL)
{
const stktis_type::container_type& c = m_stkPtisData._Get_c();
stktis_type::container_type::const_iterator it;
for(it=c.begin();it!=c.end();it++)
if( (*it)==pxtisInput )
{
TRACE(_T("cxTokenizer::vPushInputStream() - input stream is already in list.\n"));
return;
}
}
m_stkPtisData.push(pxtisInput);
if(m_setPtisData.find(pxtisInput)!=m_setPtisData.end())
{
TRACE( "Input stream was already in list. This causes problems "
"because an input stream is reused; the state of "
"line start positions will be corrupted. Do not "
"reuse an input stream, create a new one instead.");
ASSERT(FALSE);
}
m_setPtisData.insert(pxtisInput);
}
/*********************************************************************
FUNCTION: cxTokenizer::vDetachInputStream
PURPOSE: Detach input stream (or all if pxtisInput==NULL)
'pxtisInput' must be m_stkPtisData.top() or
NULL to succeed.
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vDetachInputStream(cxTokenizerInputStream *pxtisInput)
{
if(pxtisInput!=NULL)
{
ASSERT(!m_stkPtisData.empty());
if(m_stkPtisData.top()!=pxtisInput)
{
ASSERT(FALSE);
return;
}
m_stkPtisData.pop();
}
else
{
// Delete the input streams
while(!m_stkPtisData.empty()) m_stkPtisData.pop();
}
}
/*********************************************************************
FUNCTION: cxTokenizer::vCleanupInputStreams
PURPOSE: Cleans up the internal set of input streams and
deletes them if desired. May only be used if
the stack of open input streams is empty.
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vCleanupInputStreams()
{
ASSERT(m_stkPtisData.empty());
if(!m_stkPtisData.empty()) return;
settis_type::iterator it;
for(it=m_setPtisData.begin();it!=m_setPtisData.end();it++)
{
if( (*it)->fShouldDelete() )
delete (*it);
}
m_setPtisData.clear();
}
/*********************************************************************
FUNCTION: cxTokenizer::vAddSubstitutorListener
PURPOSE: Adds a new token substitutor listener
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vAddSubstitutorListener(cxTokenizerSubstitutionListener *pxtslSubstitutor)
{
cxTokenizerSubstitutionListener* cur = NULL;
int i;
for(i=0;m_aptslSubstitutors[i]!=NULL && i<MAX_SUBSTITUTORS;i++);
if(i==MAX_SUBSTITUTORS || m_aptslSubstitutors[i]!=NULL)
{
ASSERT(FALSE);
return;
}
m_aptslSubstitutors[i] = pxtslSubstitutor;
}
/*********************************************************************
FUNCTION: cxTokenizer::vRemoveSubstitutorListener
PURPOSE: Removes a token substitutor listener
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vRemoveSubstitutorListener(cxTokenizerSubstitutionListener *pxtslSubstitutor)
{
int i;
for(i=0;m_aptslSubstitutors[i]!=NULL && i<MAX_SUBSTITUTORS;i++)
{
cxTokenizerSubstitutionListener* cur = m_aptslSubstitutors[i];
if(cur==pxtslSubstitutor)
{
for(;m_aptslSubstitutors[i]!=NULL && i<MAX_SUBSTITUTORS;i++)
m_aptslSubstitutors[i]=m_aptslSubstitutors[i+1];
return;
}
}
ASSERT(FALSE);
return;
}
void cxTokenizer::dumpList(cxTokenizerContext::tc_list_type *plist)
{
cxTokenizerContext::tc_list_type::const_iterator it;
for(it=plist->begin();it!=plist->end();it++)
{
TRACE("Item: %s, %s, %s, %d, %d, 0x%08lx\n",
(*it)->fIsCompleted()?"true":"false",
(*it)->fIsNotApplying()?"true":"false",
(*it)->fIsMarkedForDeletion()?"true":"false",
(*it)->nGetStartPosition(),
(*it)->nGetEndPosition());
}
}
/*********************************************************************
FUNCTION: cxTokenizer::vParseCharacter
PURPOSE: Parses the next character from the input stream.
RETURNS: - void -
*********************************************************************/
void cxTokenizer::vParseCharacter(bool fOverrideIsLastChar)
{
TCHAR tcChar;
bool fIsLastChar, fExclusive;
cxTokenizerContext::tc_list_type
*plstRuleList;
cxTokenizerContext::rulelist_iterator
it,cit,dit;
std::tstring& strCurrentText = m_tcContextInfo.strGetCurrentText();
tcChar =m_stkPtisData.top()->tcGetNextCharacter();
fIsLastChar =m_stkPtisData.top()->fIsEofReached()?true:fOverrideIsLastChar;
strCurrentText +=tcChar;
plstRuleList =m_tcContextInfo.plstGetTokenRuleList();
it =plstRuleList->begin();
fExclusive =m_tcContextInfo.fIsFlagSet(tctx_exclusive);
// The approach is like this:
// Find the next 'within' token (ignore all characters until this
// token is found). Then, determine the type of the token in between
// the starting position and the position where the 'within' token is
// found
// 1. Check if already found rules still apply
int nCntTotal = 0;
int nCntToDelete = 0;
int nCntCompleted = 0;
int nCntNotApplying = 0;
bool fRecognized = false;
bool fCompleteRuleGoesOn = false;
for(cit=it;cit!=plstRuleList->end();cit++)
{
bool fSkip = false;
bool fComplete = false;
cxTokenizerContext::cxListEntry
*pteCur = *cit;
cxTokenizerMapData *ptmdRule = pteCur->ptmdGetRule(),
*ptmdRuleNext = NULL;
nCntTotal++;
if(pteCur->fIsNotApplying())
nCntNotApplying++,fSkip=true;
if(fSkip)
continue;
ptmdRuleNext =ptmdRule->ptmdDoesApply(&m_tcContextInfo,tcChar,&fComplete,pteCur->nGetStartPosition(),strCurrentText.length());
if(ptmdRuleNext!=NULL)
{
ptmdRuleNext->vApplied(&m_tcContextInfo,tcChar,fComplete,pteCur->nGetStartPosition(),strCurrentText.length());
// Hybrid rule?
if(ptmdRuleNext->fIsComputed())
{
bool fSkip = false;
// Has the 'exclusive access' flag been set?
if(!fExclusive && m_tcContextInfo.fIsFlagSet(tctx_exclusive))
{
if(!fExclusiveModeEntered(true,ptmdRuleNext,tcChar,strCurrentText))
return;
cxTokenizerContext::cxListEntry
*pteNewEntry = new cxTokenizerContext::cxListEntry(
strCurrentText.length()-
ptmdRuleNext->nGetHybridLength(),
ptmdRuleNext);
if(fComplete)
{
pteNewEntry->vSetCompleted( strCurrentText.length(),
ptmdRuleNext);
nCntCompleted++;
}
pteCur->vMarkForDeletion();
plstRuleList->push_front(pteNewEntry);
nCntTotal++;
fSkip = true;
}
}
if(!fSkip)
{
if(fComplete)
{
if(!m_tcContextInfo.fIsFlagSet(tctx_exclusive) &&
fExclusive)
{
// special case if the exclusive mode has been
// reset to non-exclusive mode
fRecognized = true;
}
pteCur->vSetCompleted( strCurrentText.length(),
ptmdRuleNext);
//pteCur->vSetNotApplying();
//nCntNotApplying++;
nCntCompleted++;
}
pteCur->vSetRule(ptmdRuleNext);
}
}
else
{
// NULL -> this char doesn't fit for this rule anymore
if(pteCur->fIsCompleted())
{
pteCur->vSetNotApplying();
nCntNotApplying++;
}
else
{
pteCur->vMarkForDeletion();
nCntToDelete++;
}
}
}
// Check for new tokens only if in non-exclusive mode
if(!fExclusive && !m_tcContextInfo.fIsFlagSet(tctx_exclusive))
{
// 2. Check if any new rules apply for that character
cxTokenizerMapData *ptmdNewRule;
bool fComplete = false;
ptmdNewRule =m_ptmLexxerMap->m_tmdWIRules.ptmdDoesApply(
&m_tcContextInfo,tcChar,&fComplete,strCurrentText.length(),strCurrentText.length());
if(ptmdNewRule!=NULL)
{
ptmdNewRule->vApplied(&m_tcContextInfo,tcChar,fComplete,strCurrentText.length(),strCurrentText.length());
// Has the 'exclusive access' flag been set?
if(m_tcContextInfo.fIsFlagSet(tctx_exclusive))
{
if(!fExclusiveModeEntered(false,ptmdNewRule,tcChar,strCurrentText))
return;
}
cxTokenizerContext::cxListEntry
*pteNewEntry = new cxTokenizerContext::cxListEntry(
strCurrentText.length()-1,
ptmdNewRule);
if(fComplete)
{
pteNewEntry->vSetCompleted( strCurrentText.length(),
ptmdNewRule);
nCntCompleted++;
}
plstRuleList->push_front(pteNewEntry);
nCntTotal++;
}
}
// More than one rule has been tested?
if(nCntTotal!=0)
{
// Is at least one rule valid?
if(nCntToDelete!=nCntTotal)
{
// Last character found?
if(fIsLastChar)
{
// Search for the rule which started earliest
cxTokenizerContext::cxListEntry
*pteMREntry = pteFindEarliestCompletedToken();
vMarkAllForDeletionExcept(pteMREntry);
nCntToDelete =plstRuleList->size()-1;
fRecognized =true;
}
// Are all rules which are still valid not applying anymore?
// (hence completed, but the new character doesn't fit to any
// of the remaining rules)?
if( (nCntTotal-nCntToDelete)==nCntNotApplying &&
!fCompleteRuleGoesOn)
{
if( nCntNotApplying==1 )
{
// The remaining rule is the result
fRecognized =true;
}
else
{
// Search for the rule which started earliest
cxTokenizerContext::cxListEntry
*pteMREntry = pteFindEarliestAppliedToken();
vMarkAllForDeletionExcept(pteMREntry);
nCntToDelete =plstRuleList->size()-1;
fRecognized =true;
}
}
}
}
// Delete the rules in the list which are marked for deletion
// VERIFY(m_tcContextInfo.nDeleteMarkedListEntries()==nCntToDelete);
m_tcContextInfo.nDeleteMarkedListEntries();
// Has a rule been recognized?
if(fRecognized)
vDelimTokenRecognized();
else
{
if(fIsLastChar)
{
std::tstring strTemp = m_tcContextInfo.strGetCurrentTextConst();
m_tcContextInfo.vCleanUpAfterTokenRecognition();
bool fOk = fRecognizeNonDelimTokens(strTemp);
m_tcContextInfo.vCleanUpAfterTokenRecognition();
m_tcContextInfo.vSetFlag(tctx_exclusive,false);
if(!fOk)
{
std::tstring strSubstitute;
if( !fSubstituteUnknownToken(
strTemp, std::tstring(),
ptiGetInputStream(),
strSubstitute))
vRegisterToken(
strTemp,
NULL,ptiGetInputStream());
else
VERIFY(ptiGetInputStream()->fPutBack(strSubstitute.c_str()));
}
}
}
}