/*********************************************************************
Copyright (C) 2001/2 by
Alexander Berthold, alexander-berthold@web.de.
Hoegestr. 54
79108 Freiburg i. Breisgau
Germany
-- This file is part of cxAnalyzer --
"cxAnalyzer" is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or any later version.
"cxAnalyzer" is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with "cxAnalyzer"; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
---------------------------------------------------------------
If you find any bugs or if you make other corrections/
enhancements, i'd appreciate if you'd let me know about
that. My email is
alexander-berthold@web.de
If you share this code, do not remove this text.
---------------------------------------------------------------
Class: cxAnalyzerMain
Author: Alexander Berthold
Copyright: Alexander Berthold
Date: 2001/12/19
Version: 0.2.01
Purpose: Main class of this package.
- Stores the analyzer rules describing the grammar
(m_mapAtmAEList).
- Stores implicit mappings (m_mapAtmImplicit)
- Provides the functionality to analyze a sequence
of tokens for a given rule (fCheckRule).
Version history:
- 2001/06/01
Optimized fCheckRule. If used for checking for a rule
which is finite, it was still tried to test if the rule
continues.
- 2001/06/02
Added cxaStatusCookie. Intended for providing error
information.
- 2001/06/02
Source labeled version 0.1.06
- 2001/06/12
Fixed some bugs with cxaStatusCookie. Improved accuracy
of returned information.
Version number is now 0.1.07
- 2001/12/16
Improved speed by introducing a rule cache and by minor
other optimizations. Currently, the rule cache caches only successfully
parsed sub-expressions. I will extend it to cache also failed sub-expressions,
because this saves even more time under certain (but probable) circumstances.
- 2001/12/19
Labeled version 0.2.01
ToDo:
- Further optimize fCheckRule:
Currently it is only being differentiated between finite
and non-finite rules. Can be improved by differentiating
between left-only non-finite rules and right-only non-finite
rules.
- Further optimize the code. There is still 30%-50% more speed by
optimizing easily possible.
*********************************************************************/
// cxAnalyzerMain.h: interface for the cxAnalyzerMain class.
//
//////////////////////////////////////////////////////////////////////
#if !defined(AFX_CXANALYZERMAIN_H__CCC86421_3E3E_4694_A24A_2CDC157E268B__INCLUDED_)
#define AFX_CXANALYZERMAIN_H__CCC86421_3E3E_4694_A24A_2CDC157E268B__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
class cxAnalyzerTree;
class cxaStatusCookie
{
// Construction/Destruction
public:
cxaStatusCookie()
{
nMaxLength =0;
nCurLength =0;
lCurPosition=0;
lMaxPosition=0;
nBrkCause =0;
patToken =NULL;
lPosition =0;
#ifdef ANALYZER_STATS_ENABLED
memset( (void*)&stats,0,sizeof(stats));
#endif
};
cxaStatusCookie(int nLeft, int nRight, const cxaToken* patToken)
{
nMaxLength =0;
nCurLength =0;
lCurPosition=nLeft;
lMaxPosition=nRight;
nBrkCause =0;
nBrkAtmType =0;
nBrkIDValue =0;
patToken =patToken;
lPosition =nLeft;
#ifdef ANALYZER_STATS_ENABLED
memset(&stats,0,sizeof(stats));
#endif
}
// Attributes
public:
enum {
brkcause_ok=0,
brkcause_eots,
brkcause_wrongtoken,
brkcause_subrulefailed,
brkcause_notokens,
brkcause_unexpected
};
int nMaxLength; /* Internally used */
int nCurLength; /* Internally used */
long lMaxPosition; /* Internally used */
long lCurPosition; /* Internally used */
int nBrkAtmType; // AtmType of the analyzer expression which hit the end of the token stream
int nBrkIDValue; // IDValue " " " ...
protected:
int nBrkCause; /* parsing error cause */
public:
const cxaToken* patToken; /* token where the error happened */
long lPosition; /* Position in the input stream where the error happened */
long lLastPosition; /* Position in the input stream where the error 'ended' */
#ifdef ANALYZER_STATS_ENABLED
// Statistics
public:
struct
{
int nRuleCallsTotal;
int nSuccessfulRuleCallsTotal;
int nAERuleCallsTotal;
int nSuccessfulAERuleCallsTotal;
int nMaxRecursionLevel;
int nCurRecursionLevel;
int nSkipped;
}
stats;
#endif
// Operations
public:
void vSetBrkCause(int nBrkCause_, const cxaToken* patToken_, long lEndPosition = 0)
{
ASSERT(nBrkCause_!=brkcause_eots);
if(nBrkCause==brkcause_eots) // must be preserved!
return;
if(nCurLength>=nMaxLength)
{
nBrkCause=nBrkCause_;
nMaxLength=nCurLength;
patToken=patToken_;
lPosition=(patToken_!=NULL)?patToken_->nGetTokenOrderID():-1;
lLastPosition=lEndPosition;
if(lCurPosition>lMaxPosition)
lMaxPosition = lCurPosition;
}
}
void vSetBrkCauseEots(int nAtmType, int nIDValue, const cxaToken* _patToken)
{
if(nBrkCause==brkcause_eots)
return;
nBrkCause = brkcause_eots;
nBrkAtmType = nAtmType;
nBrkIDValue = nIDValue;
patToken=_patToken;
lPosition=(patToken!=NULL)?patToken->nGetTokenOrderID():-1;
lLastPosition=-1;
lMaxPosition = lCurPosition;
}
int nGetBrkCause() const { return nBrkCause; };
void vReset()
{
nMaxLength =0;
nCurLength =0;
lCurPosition=0;
lMaxPosition=0;
nBrkCause =0;
patToken =NULL;
lPosition =0;
#ifdef ANALYZER_STATS_ENABLED
memset( (void*)&stats,0,sizeof(stats));
#endif
}
};
/*
Callback class for resolving unknown tokens. The callback method
doesn't need to be highly optimized because the results will be
cached (currently not 100% true, see optimization note above).
*/
class cxAnalyzerUnknownTokenCallback
{
public:
virtual bool nResolveUnknownToken(
cxAnalyzerTree *patTree,
int nAtmTypeExpected,
const cxaToken* patToken,
int *pnAtmTypeRet, int *pnIDValueRet,
void **ppvDataRet) const = 0;
};
class cxAnalyzerMain
{
// Construction/Destruction
public:
cxAnalyzerMain(cxAnalyzerTypeMap *patmTypeMap);
virtual ~cxAnalyzerMain();
// Sub-Class for storing data on implicit mappings (ex.: {.expr}=0:{!number} )
struct atm_imp {
atm_imp(int _nAtmType, int _nIDValue, cxAnalyzerExpression *_pExpr)
{
nAtmType =_nAtmType;
nIDValue =_nIDValue;
pExpr =_pExpr;
}
int nAtmType;
int nIDValue;
cxAnalyzerExpression *pExpr;
};
// Typedefs
public:
typedef std::list<cxAnalyzerExpression*>
ae_list_type;
typedef std::vector<cxAnalyzerExpression*>
ae_vec_type;
typedef std::map<int,ae_list_type*>
atmae_map_type;
typedef std::multimap<int,atm_imp>
atmimp_map_type;
// Attributes
protected:
// Already initialized?
bool m_fInitialized;
// The TypeMap this expression belongs to
cxAnalyzerTypeMap *m_patmTypeMap;
// Implicit mappings
atmimp_map_type m_mapAtmImplicit;
// Map from nAtmID's to cxAnalyzerExpression (length sorted) lists
atmae_map_type m_mapAtmAEList;
// Vector of cxAnalyzerExpression*'s to delete
ae_vec_type m_vecAEToDelete;
// Callback class for unknown tokens
cxAnalyzerUnknownTokenCallback *m_pacbUnknownToken;
#ifndef ANALYZER_NO_OPTIMIZATION
cxaRuleCache m_arcRuleCache;
#endif
// Protected operations
protected:
// Registers the cxAnalyzerExpression.
void vRegisterAE(cxAnalyzerExpression* paeExpr);
#ifndef ANALYZER_NO_OPTIMIZATION
// Insert the rule specified through the tuple 'nAtmTypeFirst',
// 'start' and 'ende' into the rule cache. 'cpStart' and 'cpEnd'
// contain the rule log start and end positions of the rule to
// insert.
void vInsertIntoRuleCache(
cxAnalyzerTree *patTree,
int nRuleIDValue, int nAtmTypeFirst,
const cxaToken* patFirstToken,
bool fCheckFirstIs,
cxAnalyzerTree::chkpoint_type cpStart,
cxAnalyzerTree::chkpoint_type cpEnd,
cxaTokenStream::const_iterator start,
cxaTokenStream::const_iterator end);
#endif
// Check if the given position in the token stream matches
// the expression 'nAtmTypeTestFor', and determine (if it matches)
// the next token after the expression.
bool fCheckRuleInt( int nAtmTypeTestFor,
const cxaTokenStream* patsContext,
int nAtmTypeFirstIs, /*= PTTM_ID_INVALID */
const cxaToken* patFirstToken /* = NULL */,
cxaStatusCookie* pascCondition,
cxaTokenStream::const_iterator start,
cxaTokenStream::const_iterator *pend,
cxAnalyzerTree* patTree);
// Operations
public:
/*** Debugging support ***/
bool fCheckValid() const { return true; };
void vDump() const { ASSERT(FALSE); };
/*** Initialization ***/
bool fLoadFromStream(std::basic_istream<TCHAR>& input);
void vReset();
/*** Raw data access ***/
const ae_vec_type& paevecGetExpressionsVector() const
{ return m_vecAEToDelete; };
/*** Analyzer operations ***/
// Test if 'nAtmTypeTestFor' belongs implictly to the
// expression of type 'nAtmTypeSource'
bool fIsImplicitRule(int nAtmTypeTestFor,
int nAtmTypeSource,
int *pnIDValue,
cxAnalyzerExpression **ppaeExpr = NULL) const;
bool fCheckRule( int nAtmTestFor,
cxaTokenStream* patsContext,
cxaStatusCookie* pascCondition,
cxaTokenStream::const_iterator start,
cxaTokenStream::const_iterator *pend,
cxAnalyzerTree* patTree);
// Virtual member function which is called everytime a ATM_ID_LITERAL
// token is found (and no exact pattern match was possible i.e. the
// patterns' token was not ATM_ID_LITERAL).
virtual bool nResolveUnknownToken( cxAnalyzerTree* patTree,
int nAtmTypeExpected,
const cxaToken* patToken,
int *pnAtmTypeRet, int *pnIDValueRet, void **ppvDataRet) const
{
if(m_pacbUnknownToken!=NULL)
return m_pacbUnknownToken->nResolveUnknownToken(patTree,nAtmTypeExpected,patToken,pnAtmTypeRet,pnIDValueRet,ppvDataRet);
else
return false;
}
void vSetUnknownTokenCallback(cxAnalyzerUnknownTokenCallback* pacbUnknownToken)
{
m_pacbUnknownToken = pacbUnknownToken;
}
#ifndef ANALYZER_NO_OPTIMIZATION
void vClearCache() { m_arcRuleCache.clear(); };
#endif
friend class cxAnalyzer;
friend class cxAnalyzerExpression;
};
#endif // !defined(AFX_CXANALYZERMAIN_H__CCC86421_3E3E_4694_A24A_2CDC157E268B__INCLUDED_)