/*********************************************************************
Copyright (C) 2001 by
Alexander Berthold, alexander-berthold@web.de.
Hoegestr. 54
79108 Freiburg i. Breisgau
Germany
-- This file is part of cxAnalyzer --
"cxAnalyzer" is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or any later version.
"cxAnalyzer" is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with "cxAnalyzer"; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
---------------------------------------------------------------
If you find any bugs or if you make other corrections/
enhancements, i'd appreciate if you'd let me know about
that. My email is
alexander-berthold@web.de
If you share this code, do not remove this text.
---------------------------------------------------------------
*********************************************************************/
// cxAnalyzerMain.cpp: implementation of the cxAnalyzerMain class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "cxAnalyzerTypeMap.h"
#include "cxaToken.h"
#include "cxAnalyzerExpression.h"
#include "cxAnalyzerTree.h"
#include "cxaRuleCacheElement.h"
#include "cxaRuleCache.h"
#include "cxAnalyzerMain.h"
#include "cxAnalyzerException.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
cxAnalyzerMain::cxAnalyzerMain(cxAnalyzerTypeMap *patmTypeMap)
{
ASSERT(patmTypeMap!=NULL);
ASSERT(patmTypeMap->fCheckValid());
m_patmTypeMap =patmTypeMap;
m_fInitialized =false;
m_pacbUnknownToken=NULL;
}
cxAnalyzerMain::~cxAnalyzerMain()
{
atmae_map_type::const_iterator mit;
ae_vec_type::const_iterator vit;
// Delete the cxAnalyzerExpressions
for(vit=m_vecAEToDelete.begin();vit!=m_vecAEToDelete.end();vit++)
delete (*vit);
m_vecAEToDelete.clear();
// Delete the lists of priority-sorted cxAnalyzerExpressions
for(mit=m_mapAtmAEList.begin();mit!=m_mapAtmAEList.end();mit++)
delete (*mit).second;
m_mapAtmAEList.clear();
m_mapAtmImplicit.clear();
}
void cxAnalyzerMain::vReset()
{
m_arcRuleCache.clear();
}
//////////////////////////////////////////////////////////////////////
// Operations
//////////////////////////////////////////////////////////////////////
/********************************************************************
FUNCTION: fLoadFromStream()
PURPOSE: Initializes the cxAnalyzerMain from the stream 'input'.
For a format description, see the appriopriate header
file.
RETURNS: 'true' / 'false'
********************************************************************/
bool cxAnalyzerMain::fLoadFromStream(std::basic_istream<TCHAR> &input)
{
ASSERT(!m_fInitialized);
ASSERT(m_mapAtmAEList.begin()==m_mapAtmAEList.end());
ASSERT(m_vecAEToDelete.begin()==m_vecAEToDelete.end());
ASSERT(m_patmTypeMap->fCheckValid());
m_fInitialized =true;
typedef std::vector<cxAnalyzerExpression*> pae_vec_type;
bool fOkay = true;
std::tstring strLine;
pae_vec_type paevec;
// 1st part - generate cxAnalyzerExpression objects from
// the input stream
while(!input.eof())
{
std::getline(input,strLine);
if(strLine.length()==0)
continue;
if(strLine[0]==_T('\''))
continue;
if(_tcsicmp(strLine.data(),_T("[end]"))==0)
break;
cxAnalyzerExpression *paeTemp;
paeTemp =new cxAnalyzerExpression(m_patmTypeMap);
try {
paeTemp->fInitFromRule(strLine);
}
catch(cxAnalyzerException& e)
{
pae_vec_type::const_iterator it;
for(it=paevec.begin();it!=paevec.end();it++) delete (*it);
delete paeTemp;
if(e.ErrorString()==NULL) e.SetErrorString(strLine.c_str());
throw e;
}
paevec.push_back(paeTemp);
}
pae_vec_type::const_iterator it;
// 2nd part - after all expressions are now assigned
// to ATM-Id's, finish the initialization
for(it=paevec.begin();it!=paevec.end();it++)
{
try {
if(!(*it)->fFinishInit())
fOkay=false;
else
vRegisterAE((*it));
}
catch(cxAnalyzerException& e)
{
for(;it!=paevec.end();it++)
delete (*it);
throw e;
}
}
/*#ifndef ANALYZER_NO_OPTIMIZATION
// 3rd part - extract inherent information in the rules
// to optimize the recognition process later on
for(it=paevec.begin();it!=paevec.end();it++)
{
cxAnalyzerExpression *paeExpr = (*it);
for(int nLen=0; nLen<paeExpr->nGetPatternSize(); nLen++)
fGenerateExclusionInformation(paeExpr,nLen);
}
#endif*/
return fOkay;
}
#ifndef ANALYZER_NO_OPTIMIZATION
/********************************************************************
FUNCTION: fGenerateExclusionInformation()
PURPOSE: Tests for the given length 'n' which rules can safely be ignored if
'n' pattern elements of the rule 'paeSrc' are true and inserts them
in the expressions' exclusion map.
RETURNS: Returns 'true' if at least one rule could be excluded
********************************************************************/
/* bool cxAnalyzerMain::fDoesAnyStartWith(int nAtmID, const cxAnalyzerTypeInfo *patiSrc) const
{
atmae_map_type::const_iterator atmit;
ae_list_type::const_iterator aeit;
const ae_list_type *paeList = NULL;
bool fResult = false;
if(patiSrc->nGetMType()==ATM_MTYPE_RULE)
{ ASSERT(FALSE); return false; };
atmit =m_mapAtmAEList.find(nAtmID);
if(atmit==m_mapAtmAEList.end())
{
ASSERT(FALSE);
return false;
}
paeList =atmit->second;
for(aeit=paeList->begin(); aeit!=paeList->end(); aeit++)
{
const cxAnalyzerExpression *paeOther = (*aeit);
const cxAnalyzerTypeInfo *patiOther;
ASSERT(paeOther!=NULL && paeOther->nGetPatternSize()>0);
patiOther = (*paeOther)[0];
ASSERT(patiOther!=NULL);
if( patiOther->nGetMType()==ATM_MTYPE_RULE &&
patiOther->nGetAtmType()!=nAtmID)
{
if(fDoesAnyStartWith(patiOther->nGetAtmType(), patiSrc))
{ fResult = true; break; };
continue;
}
if( patiOther->nGetAtmType()==patiSrc->nGetAtmType() )
{ fResult = true; break; };
}
return fResult;
}*/
/********************************************************************
FUNCTION: fGenerateExclusionInformation()
PURPOSE: Tests for the given length 'n' which rules can safely be ignored if
'n' pattern elements of the rule 'paeSrc' are true and inserts them
in the expressions' exclusion map.
RETURNS: Returns 'true' if at least one rule could be excluded
********************************************************************/
/* bool cxAnalyzerMain::fGenerateExclusionInformation(
cxAnalyzerExpression *paeExpr,
int nLength)
{
ASSERT(paeExpr!=NULL);
ASSERT(nLength < paeExpr->nGetPatternSize());
bool fResult = false;
// The algorithm here is not complete in the sense that it does not
// recognize all rules which can be excluded, it currently recognizes
// only a subset thererof. To make it complete, one had to implement
// a pre-parser which tests using recursion if there are more rules
// that can be excluded.
atmae_map_type::const_iterator atmit;
ae_list_type::const_iterator aeit;
const ae_list_type *paeList = NULL;
atmit =m_mapAtmAEList.find(paeExpr->nGetAtmType());
if(atmit==m_mapAtmAEList.end())
{
ASSERT(FALSE);
return false;
}
paeList =atmit->second;
for(aeit=paeList->begin(); aeit!=paeList->end(); aeit++)
{
const cxAnalyzerExpression *paeOther = (*aeit);
ASSERT(paeOther!=NULL);
int nPos = 0;
bool fExclude = false;
for(nPos = 0; nPos<nLength && nPos<paeOther->nGetPatternSize(); nPos++)
{
cxAnalyzerTypeInfo *patiSrc = (*paeExpr)[nPos];
cxAnalyzerTypeInfo *patiDest = (*paeOther)[nPos];
if( patiSrc->nGetMType()==ATM_MTYPE_TOKEN &&
patiDest->nGetMType()==ATM_MTYPE_TOKEN &&
patiSrc->nGetAtmType()!=patiDest->nGetAtmType())
{ fExclude = true; break; };
if( patiSrc->nGetMType()==ATM_MTYPE_COMP_TOKEN &&
patiDest->nGetMType()==ATM_MTYPE_COMP_TOKEN)
continue;
if( patiSrc->nGetMType()==ATM_MTYPE_RULE &&
patiDest->nGetMType()==ATM_MTYPE_RULE &&
patiSrc->nGetAtmType()==patiDest->nGetAtmType())
continue;
*/
/* if( patiSrc->nGetMType()!=ATM_MTYPE_RULE &&
patiDest->nGetMType()==ATM_MTYPE_RULE)
{
if(fDoesAnyStartWith(patiDest->nGetAtmType(), patiSrc))
{ fExclude = true; break; };
}
*/
/* break;
}
if(fExclude)
paeExpr->fInsertIntoExclusionMap(nLength, paeOther);
}
return false;
}*/
#endif
/********************************************************************
FUNCTION: vRegisterAE()
PURPOSE: Registers the cxAnalyzerExpression. Inserts it at the
appropriate position into the m_mapAtmAEList, or in the
m_mapAtmImplicit map if it is an implicit expression.
RETURNS: - void -
********************************************************************/
void cxAnalyzerMain::vRegisterAE(cxAnalyzerExpression *paeExpr)
{
ASSERT(m_fInitialized);
ASSERT(m_patmTypeMap->fCheckValid());
ASSERT(paeExpr!=NULL);
ASSERT(paeExpr->fCheckValid());
ASSERT(paeExpr->fUsesAtm(m_patmTypeMap));
int nAtmImplicit;
// Implicit conversion?
// (i.e. a mapping to a primitive type like a simple token
// or a computed token)
if(paeExpr->fIsImplicitExpressionToAtm(&nAtmImplicit))
{
#ifdef _DEBUG
// Verify if it is really a implicit conversion ...
const cxAnalyzerTypeInfo *patiTemp;
patiTemp =m_patmTypeMap->patiGetTypeInfo(nAtmImplicit);
if( (patiTemp->nGetMType()!=ATM_MTYPE_TOKEN) &&
(patiTemp->nGetMType()!=ATM_MTYPE_COMP_TOKEN) &&
(patiTemp->nGetMType()!=ATM_MTYPE_RULE))
ASSERT(FALSE);
#endif
if(paeExpr->fImplicitIgnore())
m_mapAtmImplicit.insert(
atmimp_map_type::value_type(
nAtmImplicit,
atm_imp(paeExpr->nGetAtmType(),paeExpr->nGetIDValue(),NULL)
));
else
m_mapAtmImplicit.insert(
atmimp_map_type::value_type(
nAtmImplicit,
atm_imp(paeExpr->nGetAtmType(),paeExpr->nGetIDValue(),paeExpr)
));
/* // Remember for later deletion
m_vecAEToDelete.push_back(paeExpr);
return;*/
}
// Non implicit conversion; register the rule.
atmae_map_type::iterator it;
int nAtmAEExpr = paeExpr->nGetAtmType();
ae_list_type *paeAtmList = NULL;
it =m_mapAtmAEList.find(nAtmAEExpr);
if(it==m_mapAtmAEList.end())
{
paeAtmList =new ae_list_type;
m_mapAtmAEList.insert(
atmae_map_type::value_type(
nAtmAEExpr,paeAtmList
));
}
else
paeAtmList =(*it).second;
ASSERT(paeAtmList!=NULL);
// Find the right insertion point in 'paeAtmList'
// (defined by length of paeExpr)
// As this function here is only called during setup of the
// analyzer, it is not time critical. Optimizations are left
// out for clarity.
ae_list_type::iterator lit; // for() loop
int nSizePattern;
nSizePattern=paeExpr->nGetPatternSize();
for(lit=paeAtmList->begin();lit!=paeAtmList->end();lit++)
{
if( nSizePattern>=(*lit)->nGetPatternSize() )
break;
}
if(lit!=paeAtmList->end())
paeAtmList->insert(lit,paeExpr);
else
paeAtmList->push_back(paeExpr);
// Remember for later deletion
m_vecAEToDelete.push_back(paeExpr);
}
/********************************************************************
FUNCTION: fIsImplicitRule()
PURPOSE: Tests if the type 'nAtmTypeTestFor' can be 'cast'
to 'nAtmTypeSource'.
RETURNS: 'true' / 'false'
********************************************************************/
bool cxAnalyzerMain::fIsImplicitRule(int nAtmTypeTestFor,
int nAtmTypeSource,
int *pnIDValue, cxAnalyzerExpression** ppaeExpr) const
{
ASSERT(m_fInitialized);
atmimp_map_type::const_iterator it;
it =m_mapAtmImplicit.find(nAtmTypeSource);
if(it==m_mapAtmImplicit.end())
return false;
while( it!=m_mapAtmImplicit.end() &&
(*it).first == nAtmTypeSource)
{
if( (*it).second.nAtmType == nAtmTypeTestFor)
{
if(pnIDValue!=NULL)
(*pnIDValue)=(*it).second.nIDValue;
if(ppaeExpr!=NULL)
{
cxAnalyzerExpression *temp = (*it).second.pExpr;
(*ppaeExpr)=temp;
}
return true;
}
it++;
}
return false;
}
#ifndef ANALYZER_NO_OPTIMIZATION
/********************************************************************
FUNCTION: vInsertIntoRuleCache()
PURPOSE: Tests if the type 'nAtmTypeTestFor' can be 'cast'
to 'nAtmTypeSource'.
RETURNS: 'true' / 'false'
********************************************************************/
void cxAnalyzerMain::vInsertIntoRuleCache(
cxAnalyzerTree *patTree,
int nRuleIDValue, int nAtmTypeFirst,
const cxaToken* patFirstToken, bool fCheckFirstIs,
cxAnalyzerTree::chkpoint_type cpStart,
cxAnalyzerTree::chkpoint_type cpEnd,
cxaTokenStream::const_iterator start,
cxaTokenStream::const_iterator end)
{
#ifdef _DEBUG
const cxaRuleCacheElement *parcExisting = m_arcRuleCache.find(nRuleIDValue,nAtmTypeFirst,patFirstToken,start);
ASSERT(parcExisting==NULL);
#endif
cxaRuleCache::iterator it;
cxaRuleCacheElement *parcElem =
new cxaRuleCacheElement(
nRuleIDValue,nAtmTypeFirst,patFirstToken,fCheckFirstIs,
start,end);
parcElem->vSetContents(patTree,cpStart,cpEnd);
m_arcRuleCache.insert( std::make_pair(parcElem->hvGetHashValue(),parcElem) );
}
#endif
/********************************************************************
FUNCTION: fCheckRule()
PARAMETERS: 'nAtmTestFor'
the type of expression to test for
'patsContext'
the token stream containing 'start'
'nAtmTypeFirstIs'
can be ATM_ID_INVALID if n/a, or an AtmID of the token
preceding 'start' (which may not be equal to *(start-1)).
'patFirstToken'
if 'nAtmTypeFirstIs'!=ATM_ID_INVALID, contains the token
described by nAtmTypeFirstIs
'pascCondition'
Can point to an 'cxaStatusCookie' object. Will if != NULL
contain information on what parsing problem happened.
CAUTION: Consumes a lot of additional time; use only if
needed.
'start'
the first token to start the test with
'pend'
receives the position of the first token after the
expression if the function is successful
'patTree'
if it is needed to 'treeize' the expression, then this
argument takes the 'cxAnalyzerTree' which does that
later on.
PURPOSE: Check if the given position in the token stream matches
the expression 'nAtmTypeTestFor', and determine (if does
match) the next token after the expression.
RETURNS: 'true':
Expression recognized.
- (*pend) contains the position of the
first token after the expression.
- 'patTree' (if !=NULL) has been filled with the
recognized tokens
'false':
Parsing error.
- if 'pascCondition' was !=NULL: contains the error
condition.
********************************************************************/
bool cxAnalyzerMain::fCheckRuleInt( int nAtmTestFor,
const cxaTokenStream* patsContext,
int nAtmTypeFirstIs,
const cxaToken* patFirstToken,
cxaStatusCookie* pascCondition,
cxaTokenStream::const_iterator start,
cxaTokenStream::const_iterator *pend,
cxAnalyzerTree* patTree)
{
ASSERT(m_fInitialized);
ASSERT(patsContext!=NULL);
ASSERT(patsContext->fCheckValid());
bool fCheckFirstIs = (nAtmTypeFirstIs!=ATM_ID_INVALID);
// Is there anything to do?
if(start==patsContext->end() && nAtmTypeFirstIs==ATM_ID_INVALID)
{
if(pascCondition!=NULL)
pascCondition->vSetBrkCause(cxaStatusCookie::brkcause_notokens,NULL);
return false;
}
#ifdef ANALYZER_STATS_ENABLED
if(pascCondition!=NULL)
{
pascCondition->stats.nCurRecursionLevel++;
if( pascCondition->stats.nCurRecursionLevel>
pascCondition->stats.nMaxRecursionLevel)
pascCondition->stats.nMaxRecursionLevel = pascCondition->stats.nCurRecursionLevel;
pascCondition->stats.nRuleCallsTotal++;
}
#endif
// Get the information on the expression to scan for
atmae_map_type::const_iterator ae_it;
ae_it =m_mapAtmAEList.find(nAtmTestFor);
if(ae_it==m_mapAtmAEList.end())
{
ASSERT(FALSE);
throw cxAnalyzerException(ANERR_UNKNOWN_RULE,NULL);
}
const cxAnalyzerTypeInfo *patiTestFor =
m_patmTypeMap->patiGetTypeInfo(nAtmTestFor);
ASSERT(patiTestFor->nGetMType()==ATM_MTYPE_RULE);
ASSERT(patiTestFor->nGetSubType()!=ATM_SUBT_RULE_UNDEF);
cxaTokenStream::const_iterator it;
ae_list_type::iterator lit;
bool fResult = false;
ae_list_type *paeAtmList = (*ae_it).second;
// Iterate over the tokens in the token input stream
for(it=start;fCheckFirstIs || it!=patsContext->end();)
{
int nAtmTypeCur = ATM_ID_INVALID;
const cxaToken* patCurToken = NULL;
bool fDone = false;
// Get the next token ...
if(fCheckFirstIs)
{
nAtmTypeCur =nAtmTypeFirstIs;
patCurToken =patFirstToken;
fCheckFirstIs =false;
}
else
{
patCurToken =&(*it);
nAtmTypeCur =patCurToken->nAtmType;
it++;
}
// If there is a status cookie, remember the state
int nTempCurLength = 0;
if(pascCondition!=NULL)
nTempCurLength = pascCondition->nCurLength;
// Check the rules (they are ordered by length)
for(lit=paeAtmList->begin();lit!=paeAtmList->end();lit++)
{
cxAnalyzerExpression *pExpr = (*lit);
cxaTokenStream::const_iterator temp,temp2;
bool fResultTemp;
// Reinitialize the status cookie if needed
if(pascCondition!=NULL)
pascCondition->nCurLength = nTempCurLength;
// @Optimization:
// A very common pattern is {.x}={.x}{$y}{.x}. If this
// is the case, a one-token read ahead can avoid a
// closer examination of the context.
if( nAtmTypeCur==pExpr->nGetAtmType() &&
pExpr->nGetPatternSize()>=2 &&
(*pExpr)[1]->nGetMType()==ATM_MTYPE_TOKEN)
{
if( (*pExpr)[1]->nGetAtmType()!=it->nGetAtmType())
{
#ifdef ANALYZER_STATS_ENABLED
if(pascCondition!=NULL)
pascCondition->stats.nSkipped++;
#endif
continue;
}
}
#ifndef ANALYZER_NO_OPTIMIZATION
if(pExpr->fIsCacheable())
{
const cxaRuleCacheElement *pelem =
m_arcRuleCache.find(
pExpr->nGetIDValue(),
nAtmTypeCur, patCurToken,it);
if(pelem)
{
pelem->vUnloadTo(patTree,it);
patFirstToken=NULL;
fResult =fDone = true;
fCheckFirstIs = pelem->fIsCheckFirstIs();
nAtmTypeFirstIs=pExpr->nGetAtmType();
if(pend!=NULL)
(*pend) =it;
break;
}
};
cxAnalyzerTree::chkpoint_type cpStart = patTree->cpGetCheckpoint();
#endif
// Perform the test ...
patTree->vBeginTrans();
fResultTemp =pExpr->fCheckComp(this,patsContext,nAtmTypeCur,patCurToken,pascCondition,it,&temp,patTree);
if(fResultTemp)
{
if(pend!=NULL)
(*pend) =temp;
if(pExpr->fIsRightBound(pExpr->nGetAtmType()))
fCheckFirstIs = false;
else
fCheckFirstIs = true;
if(fCheckFirstIs || temp!=patsContext->end())
{
patTree->vCommitTrans();
#ifndef ANALYZER_NO_OPTIMIZATION
if(pExpr->fIsCacheable())
{
vInsertIntoRuleCache(
patTree,pExpr->nGetIDValue(),
nAtmTypeCur,patCurToken,fCheckFirstIs,
cpStart,patTree->cpGetCheckpoint(),
it,temp);
}
#endif
it =temp;
patFirstToken=NULL;
fResult =fDone = true;
nAtmTypeFirstIs=pExpr->nGetAtmType();
break;
}
else
patTree->vRollbackTrans();
}
else
patTree->vRollbackTrans();
}
// No rule found yet?
if(!fDone)
{
int nIDValue;
cxAnalyzerExpression *pExpr = NULL;
// Test if an implicit rule applies
if(fIsImplicitRule(nAtmTestFor,nAtmTypeCur,&nIDValue,&pExpr))
{
if(patTree)
{
if(pExpr!=NULL && !pExpr->fImplicitIgnore())
patTree->vBeginRule(nAtmTestFor,nIDValue,pExpr);
patTree->vLogToken(nAtmTestFor,nIDValue,NULL,patCurToken);
if(pExpr!=NULL && !pExpr->fImplicitIgnore())
patTree->vEndRule();
}
if(pend!=NULL)
(*pend) =it;
fResult = true;
fDone = true;
fCheckFirstIs=true;
nAtmTypeFirstIs=nAtmTestFor;
patFirstToken=NULL;
// Increase the length of the status cookie
if(pascCondition)
pascCondition->nCurLength++;
}
}
// Still no rule found -> bail out?
if(!fDone)
break;
// Is the rule currently being tested a finite rule?
// If so, it can't happen to be continued this way.
if(patiTestFor->nGetSubType()==ATM_SUBT_RULE_FINITE)
break;
}
#ifdef ANALYZER_STATS_ENABLED
if(pascCondition!=NULL)
{
if(fResult)
pascCondition->stats.nSuccessfulRuleCallsTotal++;
pascCondition->stats.nCurRecursionLevel--;
}
#endif
return fResult;
}
bool cxAnalyzerMain::fCheckRule( int nAtmTestFor,
cxaTokenStream* patsContext,
cxaStatusCookie* pascCondition,
cxaTokenStream::const_iterator start,
cxaTokenStream::const_iterator *pend,
cxAnalyzerTree* patTree)
{
if( fCheckRuleInt( nAtmTestFor,patsContext,ATM_ID_INVALID,
NULL,pascCondition,start,pend,patTree) )
return true;
return false;
}