// cooLexxer.cpp: implementation of the cooLexxer class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "cooLexxerTokenRule.h"
#include "cooLexxerCharTokenRule.h"
#include "cooLexxerContextCookie.h"
#include "cooLexxerContext.h"
#include "cooLexxerMapData.h"
#include "cooLexxerMap.h"
#include "cooLexxerInputStream.h"
#include "cooLexxer.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
#include "cooLexxerDiags.cpp"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
cooLexxer::cooLexxer(cooLexxerInputStream *plisData,
cooLexxerMap *plmLexxerMap,
cooLexxerListener *pllReceiver)
{
// ASSERT if any assumption fails
ASSERT(plisData!=NULL);
ASSERT(plmLexxerMap!=NULL);
ASSERT(pllReceiver!=NULL);
ASSERT(plisData->fCheckValid());
ASSERT(plmLexxerMap->fCheckValid());
ASSERT(pllReceiver->fCheckValid());
// The input stream ...
m_plisData =plisData;
// The tree with the 'path' to the tokens
m_plmLexxerMap =plmLexxerMap;
// The listener for the output token stream
m_pllReceiver =pllReceiver;
m_lcContextInfo.vClearAllFlags();
}
cooLexxer::~cooLexxer()
{
// Delete the input stream if it wants to
if(m_plisData)
{
// Check validity
ASSERT(m_plisData->fCheckValid());
// Delete?
if(m_plisData->fShouldDelete())
{
// Yes.
delete m_plisData;
m_plisData =NULL;
}
}
// Delete the token listener if it wants to
if(m_pllReceiver)
{
// Check validity
ASSERT(m_pllReceiver->fCheckValid());
// Delete?
if(m_pllReceiver->fShouldDelete())
{
// Yes.
delete m_pllReceiver;
m_pllReceiver =NULL;
}
}
}
//////////////////////////////////////////////////////////////////////
// Protected internal token recognition operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cooLexxer::vRecognizeNonDelimTokens
PURPOSE: Internally called when a non-delimeting token
has been recognized. Analyzes the part between
the next delimeting token for non-delimeting tokens,
and calls vRegisterToken() if anything found.
RETURNS: - void -
*********************************************************************/
void cooLexxer::vRecognizeNonDelimTokens(std::tstring strTokenText)
{
// Is anything to check?
if(strTokenText.length()==0)
return;
// TODO: Extend this function to correctly handle the flag
// ooptrf_intermediate (currently, only one token is recognized)
cooLexxerMapData *plmdCurRule = &m_plmLexxerMap->m_lmdFPRules;
cooLexxerMapData *plmdNextRule = NULL;
int i,len;
TCHAR tcChar;
bool fComplete = false;
len =strTokenText.length();
for(i=0;i<len;i++)
{
tcChar =strTokenText[i];
m_lcContextInfo.strGetCurrentText()+=tcChar;
plmdNextRule =plmdCurRule->plmdDoesApply(&m_lcContextInfo,tcChar,&fComplete,0,i);
if(plmdNextRule==NULL)
break;
plmdNextRule->vApplied(&m_lcContextInfo,tcChar,fComplete,0,i);
plmdCurRule =plmdNextRule;
}
if(plmdNextRule!=NULL && fComplete)
{
std::tstring strTemp;
if(plmdNextRule->pltrGetRule()->fGetResultString(&m_lcContextInfo,strTemp))
strTokenText=strTemp;
vRegisterToken(strTokenText,plmdNextRule->pltrGetRule());
}
else
vRegisterToken(strTokenText,NULL);
}
/*********************************************************************
FUNCTION: cooLexxer::vDelimTokenRecognized
PURPOSE: Internally called when a delimeting token
has been recognized. Calls vRecognizeNonDelimTokens()
to analyze the part between the last delimeting token
and this one. Afterwards registers the token using
vRegisterToken().
RETURNS: - void -
*********************************************************************/
void cooLexxer::vDelimTokenRecognized()
{
// Token has been recognized
cooLexxerContext::lc_list_type
*plstRuleList;
cooLexxerContext::cooListEntry
*pleCur;
cooLexxerMapData *plmdRule;
cooLexxerTokenRule *pltrDelimTokenRule;
cooLexxerContext::rulelist_iterator
it,cit;
std::tstring strDelimToken;
std::tstring strBetweenToken;
std::tstring strPutBack;
// There must be exactly one entry in the list of tokens
plstRuleList =m_lcContextInfo.plstGetTokenRuleList();
ASSERT(plstRuleList!=NULL);
ASSERT(plstRuleList->size()==1);
if(plstRuleList->size()!=1)
throw cooLexxerException(ERR_UNEXPECTED_TOKEN);
// This one token is the delimeting token
pleCur =*plstRuleList->begin();
plmdRule =pleCur->plmdGetLastValidRule();
if(plmdRule->fIsLeaf()==false)
throw cooLexxerException(ERR_COMPILER_UNSPECIFIED);
pltrDelimTokenRule =plmdRule->pltrGetRule();
strDelimToken =m_lcContextInfo.strGetCurrentText().substr(
pleCur->nGetStartPosition(),
pleCur->nGetEndPosition()-pleCur->nGetStartPosition());
strBetweenToken =m_lcContextInfo.strGetCurrentText().substr(
0,
pleCur->nGetStartPosition());
strPutBack =m_lcContextInfo.strGetCurrentText().substr(
pleCur->nGetEndPosition());
std::tstring strTemp;
if(pltrDelimTokenRule->fGetResultString(&m_lcContextInfo,strTemp))
strDelimToken =strTemp;
// Now parse the text before the delimeting token
// (can consist of multiple tokens)
m_lcContextInfo.vCleanUpAfterTokenRecognition();
vRecognizeNonDelimTokens(strBetweenToken);
// Register the delimeting token
vRegisterToken(strDelimToken,pltrDelimTokenRule);
// Clean up the parser context
m_lcContextInfo.vCleanUpAfterTokenRecognition();
// Put back the remaining characters
m_plisData->fPutBack(strPutBack.data());
}
//////////////////////////////////////////////////////////////////////
// Protected post token recognition operations
//////////////////////////////////////////////////////////////////////
void cooLexxer::vRegisterToken(const std::tstring& strTokenText,
cooLexxerTokenRule* pltrRule)
{
ASSERT(m_pllReceiver->fCheckValid());
m_pllReceiver->vRegisterToken(strTokenText, pltrRule);
}
//////////////////////////////////////////////////////////////////////
// Protected helper operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cooLexxer::pleFindEarliestCompletedToken
PURPOSE: Searches for the rule(token) with the status
'completed' and which started earliest
(minimal nStartPosition).
RETURNS: cooLexxerContext::cooListEntry
*********************************************************************/
cooLexxerContext::cooListEntry
*cooLexxer::pleFindEarliestCompletedToken()
{
cooLexxerContext::lc_list_type
*plstRuleList;
cooLexxerContext::rulelist_iterator
cit;
plstRuleList =m_lcContextInfo.plstGetTokenRuleList();
// Search for the rule which started earliest
int nMRPos = 0x7fff;
cooLexxerContext::cooListEntry
*pleMREntry = NULL;
for(cit=plstRuleList->begin();cit!=plstRuleList->end();cit++)
{
cooLexxerContext::cooListEntry
*pleCur = *cit;
if(pleCur->fIsMarkedForDeletion())
continue;
if(pleCur->fIsCompleted())
{
if(pleCur->nGetStartPosition()<nMRPos)
{
nMRPos =pleCur->nGetStartPosition();
pleMREntry =pleCur;
}
}
}
return pleMREntry;
}
/*********************************************************************
FUNCTION: cooLexxer::pleFindEarliestAppliedToken
PURPOSE: Searches for the rule(token) with the status
'applied' and which started earliest
(minimal nStartPosition).
RETURNS: cooLexxerContext::cooListEntry
*********************************************************************/
cooLexxerContext::cooListEntry
*cooLexxer::pleFindEarliestAppliedToken()
{
cooLexxerContext::lc_list_type
*plstRuleList;
cooLexxerContext::rulelist_iterator
cit;
plstRuleList =m_lcContextInfo.plstGetTokenRuleList();
// Search for the rule which started earliest
int nMRPos = 0x7fff;
cooLexxerContext::cooListEntry
*pleMREntry = NULL;
for(cit=plstRuleList->begin();cit!=plstRuleList->end();cit++)
{
cooLexxerContext::cooListEntry
*pleCur = *cit;
if(pleCur->fIsMarkedForDeletion())
continue;
if(pleCur->fIsNotApplying())
{
if(pleCur->nGetStartPosition()<nMRPos)
{
nMRPos =pleCur->nGetStartPosition();
pleMREntry =pleCur;
}
}
}
return pleMREntry;
}
/*********************************************************************
FUNCTION: cooLexxer::vMarkAllForDeletionExcept
PURPOSE: Marks all currently used rules for deletion
except the one given as an argument.
RETURNS: - void -
*********************************************************************/
void cooLexxer::vMarkAllForDeletionExcept(const cooLexxerContext::cooListEntry* pEntry)
{
cooLexxerContext::lc_list_type
*plstRuleList;
cooLexxerContext::rulelist_iterator
cit;
plstRuleList =m_lcContextInfo.plstGetTokenRuleList();
for(cit=plstRuleList->begin();cit!=plstRuleList->end();cit++)
{
cooLexxerContext::cooListEntry
*pleCur = *cit;
if(pleCur!=pEntry)
pleCur->vMarkForDeletion();
}
}
//////////////////////////////////////////////////////////////////////
// Operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cooLexxer::vParseCharacter
PURPOSE: Parses the next character from the input stream.
RETURNS: - void -
*********************************************************************/
void cooLexxer::vParseCharacter(bool fOverrideIsLastChar)
{
TCHAR tcChar;
bool fIsLastChar;
cooLexxerContext::lc_list_type
*plstRuleList;
cooLexxerContext::rulelist_iterator
it,cit,dit;
std::tstring& strCurrentText = m_lcContextInfo.strGetCurrentText();
tcChar =m_plisData->tcGetNextCharacter();
fIsLastChar =m_plisData->fIsEofReached()?true:fOverrideIsLastChar;
strCurrentText +=tcChar;
plstRuleList =m_lcContextInfo.plstGetTokenRuleList();
it =plstRuleList->begin();
// The approach is like this:
// Find the next 'within' token (ignore all characters until this
// token is found). Then, determine the type of the token in between
// the starting position and the position where the 'within' token is
// found
// 1. Check if already found rules still apply
int nCntTotal = 0;
int nCntToDelete = 0;
int nCntCompleted = 0;
int nCntNotApplying = 0;
for(cit=it;cit!=plstRuleList->end();cit++)
{
bool fSkip = false;
bool fComplete = false;
cooLexxerContext::cooListEntry
*pleCur = *cit;
cooLexxerMapData *plmdRule = pleCur->plmdGetRule(),
*plmdRuleNext = NULL;
nCntTotal++;
if(pleCur->fIsNotApplying())
nCntNotApplying++,fSkip=true;
if(fSkip)
continue;
plmdRuleNext =plmdRule->plmdDoesApply(&m_lcContextInfo,tcChar,&fComplete,pleCur->nGetStartPosition(),strCurrentText.length());
if(plmdRuleNext!=NULL)
{
plmdRule->vApplied(&m_lcContextInfo,tcChar,fComplete,pleCur->nGetStartPosition(),strCurrentText.length());
if(fComplete)
{
pleCur->vSetCompleted( strCurrentText.length(),
plmdRuleNext);
nCntCompleted++;
}
pleCur->vSetRule(plmdRuleNext);
}
else
{
// NULL -> this char doesn't fit for this rule anymore
if(pleCur->fIsCompleted())
{
pleCur->vSetNotApplying();
nCntNotApplying++;
}
else
{
pleCur->vMarkForDeletion();
nCntToDelete++;
}
}
}
// Check for new tokens only if in non-exclusive mode
if(!m_lcContextInfo.fIsFlagSet(oolctx_exclusive))
{
// 2. Check if any new rules apply for that character
cooLexxerMapData *plmdNewRule;
bool fComplete = false;
plmdNewRule =m_plmLexxerMap->m_lmdWIRules.plmdDoesApply(
&m_lcContextInfo,tcChar,&fComplete,strCurrentText.length(),strCurrentText.length());
if(plmdNewRule!=NULL)
{
plmdNewRule->vApplied(&m_lcContextInfo,tcChar,fComplete,strCurrentText.length(),strCurrentText.length());
// Has the 'exclusive access' flag been set?
if(m_lcContextInfo.fIsFlagSet(oolctx_exclusive))
{
// Is this the first character?
// If yes, we can proceed. If not, we have to complete
// any open token
int len;
len =strCurrentText.length();
if(len!=1)
{
// Reset exclusive flag temporarily
// (will be set again next time)
m_lcContextInfo.vSetFlag(oolctx_exclusive,false);
TCHAR tcPB;
tcPB =strCurrentText[len-2];
strCurrentText =strCurrentText.substr(0,strCurrentText.length()-2);
m_plisData->fPutBack(tcChar);
m_plisData->fPutBack(tcPB);
vParseCharacter(true);
return;
}
}
cooLexxerContext::cooListEntry
*pleNewEntry = new cooLexxerContext::cooListEntry(strCurrentText.length()-1,
plmdNewRule);
if(fComplete)
{
pleNewEntry->vSetCompleted( strCurrentText.length(),
plmdNewRule);
nCntCompleted++;
}
plstRuleList->push_front(pleNewEntry);
nCntTotal++;
}
}
bool fRecognized = false;
// More than one rule has been tested?
if(nCntTotal!=0)
{
// Is at least one rule valid?
if(nCntToDelete!=nCntTotal)
{
// Last character found?
if(fIsLastChar)
{
// Search for the rule which started earliest
cooLexxerContext::cooListEntry
*pleMREntry = pleFindEarliestCompletedToken();
vMarkAllForDeletionExcept(pleMREntry);
nCntToDelete =plstRuleList->size()-1;
fRecognized =true;
}
// Are all rules which are still valid not applying anymore?
// (hence completed, but the new character doesn't fit to any
// of the remaining rules)?
if( (nCntTotal-nCntToDelete)==nCntNotApplying )
{
if( nCntNotApplying==1 )
{
// The remaining rule is the result
fRecognized =true;
}
else
{
// Search for the rule which started earliest
cooLexxerContext::cooListEntry
*pleMREntry = pleFindEarliestAppliedToken();
vMarkAllForDeletionExcept(pleMREntry);
nCntToDelete =plstRuleList->size()-1;
fRecognized =true;
}
}
}
}
// Delete the rules in the list which are marked for deletion
VERIFY(m_lcContextInfo.nDeleteMarkedListEntries()==nCntToDelete);
// Has a rule been recognized?
if(fRecognized)
vDelimTokenRecognized();
else
{
if(fIsLastChar)
{
std::tstring strTemp = m_lcContextInfo.strGetCurrentTextConst();
m_lcContextInfo.vCleanUpAfterTokenRecognition();
vRecognizeNonDelimTokens(strTemp);
m_lcContextInfo.vCleanUpAfterTokenRecognition();
}
}
}