Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Tokenizer and analyzer package supporting precedence prioritized rules

, 1 Jan 2002
A library allowing you to conveniently build a custom tokenizer and analyzer supporting precedence priorized rules
cxtpackagetut_win32vc.zip
cxTokenizer
cxTokenizer.dsp
cxTokenizer.plg
cxtPackage
cxtPackage.plg
cxtPackage.dsp
mathTok
mathTok.plg
mathTok.dsp
simpleCalc
simpleCalc.bmp
simpleCalc.dsp
simpleCalc.plg
tkCommon
cxAnalyzer
cxAnalyzer.dsp
cxAnalyzer.plg
COPYING
cpAbsd.dsw
cxtpackage_win32vc.zip
cxTokenizer.dsp
cxTokenizer.plg
Debug
cxAnalyzer.dsp
cxAnalyzer.plg
cxtPackage.plg
cxtPackage.dsp
COPYING
cpAbsd.dsw
grammaride.zip
stlport_vc645.dll
grammarIDE.exe
pkgcomplete.zip
cxTokenizerMatchTokenRule.inl
cxTokenizer.dsp
cxtPackage.dsp
emptyTestApp
emptyTestApp.clw
res
emptyTestApp.ico
emptyTestApp.dsp
grammarIDE
grammarIDE.clw
res
grammarIDEDoc.ico
icon1.ico
zoomable.ico
Toolbar.bmp
grammarIDE.ico
vssver.scc
grammarIDE.dsp
simpleCalc.bmp
simpleCalc.dsp
vssver.scc
cxaToken.inl
cxAnalyzer.dsp
COPYING
cpAbsd.dsw
pkgsrconly.zip
cxTokenizerMatchTokenRule.inl
cxTokenizer.dsp
cxtPackage.dsp
emptyTestApp.clw
emptyTestApp.ico
emptyTestApp.dsp
vssver.scc
cxaToken.inl
cxAnalyzer.dsp
COPYING
cpAbsd.dsw
/*********************************************************************
	Copyright (C) 2001 by

		Alexander Berthold, alexander-berthold@web.de.
		Hoegestr. 54
		79108 Freiburg i. Breisgau
		Germany

    -- This file is part of cxAnalyzer --

    "cxAnalyzer" is free software; you can redistribute it and/or 
	modify it under the terms of the GNU Lesser General Public 
	License as published by the Free Software Foundation; either 
	version 2 of the License, or any later version.

    "cxAnalyzer" is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
	License along with "cxAnalyzer"; if not, write to the Free 
	Software  Foundation, Inc., 59 Temple Place, Suite 330, 
	Boston, MA  02111-1307  USA

    ---------------------------------------------------------------
      If you find any bugs or if you make other corrections/
	  enhancements, i'd appreciate if you'd let me know about 
	  that. My email is
  
       alexander-berthold@web.de
  
      If you share this code, do not remove this text.
    ---------------------------------------------------------------

*********************************************************************/

// cxAnalyzerExpression.cpp: implementation of the cxAnalyzerExpression class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"

#include "cxAnalyzerException.h"
#include "cxaToken.h"
#include "cxAnalyzerTypeMap.h"
#include "cxAnalyzerExpression.h"
#include "cxAnalyzerTree.h"
#include "cxAnalyzerMain.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

	cxAnalyzerExpression::cxAnalyzerExpression(
							cxAnalyzerTypeMap* patmTypeMap)
		{
		m_nAtmType		=ATM_ID_INVALID;
		m_nPrecPrio		=-1;
		m_patmTypeMap	=patmTypeMap;
		m_fInitializeComplete=false;
		m_pstrInitTemp	=NULL;
		m_apatiPattern	=NULL;
		m_nSizePattern	=0;
		}

	cxAnalyzerExpression::~cxAnalyzerExpression()
		{
		if(m_pstrInitTemp)
			delete m_pstrInitTemp;
		if(m_apatiPattern)
			delete[] m_apatiPattern;
		}

//////////////////////////////////////////////////////////////////////
// Protected operations
//////////////////////////////////////////////////////////////////////

	/********************************************************************
	FUNCTION:	fCheckComp()

  PARAMETERS:	'paHost'
					the cxAnalyzerMain object doing the test
				'patsContext'
					the token stream containing 'start'
				'nAtmTypeFirstIs'
					can be ATM_ID_INVALID if n/a, or an AtmID of the token
					preceding 'start' (which may not be equal to *(start-1)).
				'patFirstToken'
					if 'nAtmTypeFirstIs'!=ATM_ID_INVALID, contains the token
					described by nAtmTypeFirstIs
				'pascCondition'
					Can point to an 'cxaStatusCookie' object. Will if != NULL
					contain information on what parsing problem happened.
					CAUTION: Consumes a lot of additional time; use only if
					needed.
				'start'
					the first token to start the test with
				'pend'
					receives the position of the first token after the
					expression if the function is successful
				'patTree'
					if it is needed to 'treeize' the expression, then this
					argument takes the 'cxAnalyzerTree' which does that
					later on.

	PURPOSE:	Performs a test on the given token stream (patsContext)
				if the pattern of this expression matches the token pattern.

	RETURNS:	'true':
					Expression recognized.
					- (*pend) contains the position of the 
					  first token after the expression.
					- 'patTree' (if !=NULL) has been filled with the
					  recognized tokens
				'false':
					Parsing error.
					- if 'pascCondition' was !=NULL: contains the error
					  condition.
	********************************************************************/
	bool	cxAnalyzerExpression::fCheckComp(
				const cxAnalyzerMain *paHost,
				const cxaTokenStream *patsContext,
				int nAtmTypeFirstIs,
				const cxaToken* patFirstToken,
				cxaStatusCookie* pascCondition,
				cxaTokenStream::const_iterator startpos,
				cxaTokenStream::const_iterator *pend,
				cxAnalyzerTree* patTree) const
		{
        if(patTree)
            {
            patTree->vBeginTrans();
            patTree->vBeginRule(m_nAtmType,nGetIDValue(),this);
            }

		bool				fCheckFirstIs = true;
        bool                fIsFirstToken = true;
        bool                fEndRuleLogged = false;
		int					i;
		cxaTokenStream::const_iterator	
							it, log_it;

		it					=startpos;
        log_it              =patsContext->end();
		for(i=0;i<m_nSizePattern;i++, pascCondition?pascCondition->nCurLength++:0)
			{
            const cxaToken*    patToken = NULL;
			int					nAtmType = ATM_ID_INVALID;
			bool				fDone = false;
            bool                fWasFirstToken = fIsFirstToken;
            fIsFirstToken       =false;

			// Fetch the next 'AtmType' (either from the args or from the next element)
			if(fCheckFirstIs)
				{
				fCheckFirstIs	=false;
				if(nAtmTypeFirstIs!=ATM_ID_INVALID)
					{
					nAtmType		=nAtmTypeFirstIs;
                    patToken        =patFirstToken;
					fDone			=true;
					}
				}
			if(!fDone)
				{
				if(it==patsContext->end())
					{
					if(pascCondition)
						pascCondition->vSetBrkCause(cxaStatusCookie::brkcause_eots,NULL);
					break;
					}

                patToken        =&(*it);
				nAtmType		=(*it).nGetAtmType();
				it++;
				}

			// Compare the patterns
			int					nAtmTypePat, nMTypePat;
			nAtmTypePat			=m_apatiPattern[i]->nGetAtmType();
			nMTypePat			=m_apatiPattern[i]->nGetMType();

			// Already equal?
			if(nAtmTypePat == nAtmType)
                {
                if(patTree) 
                    patTree->vLogToken(patToken);

				continue;
                }

			// Not equal
			// Is 'nAtmTypePat' an expression?
			if(nMTypePat != ATM_MTYPE_RULE)
				{
				if(pascCondition)
					pascCondition->vSetBrkCause(cxaStatusCookie::brkcause_wrongtoken,patToken);
				break;
				}

			// If the first token in the pattern is a 
			// rule (the same rule as the one here)
			// it must be able to be implicitly mapped to
			// the rule.
            if(fWasFirstToken && nAtmTypePat == nGetAtmType())
                {
				if(!paHost->fIsImplicitRule(nAtmTypePat, nAtmType, NULL))
					{
					if(pascCondition)
						pascCondition->vSetBrkCause(cxaStatusCookie::brkcause_wrongtoken,patToken);
					break;
					}

                if(patTree)
                    patTree->vLogToken(patToken);

                continue;
                }
#ifndef RECURSIVE_COMPLETE
            else
                {
			    if(i==(m_nSizePattern-1))
                    {
                    if(paHost->fIsImplicitRule(nAtmTypePat, nAtmType, NULL))
                        {
                        if(patTree)
                            patTree->vLogToken(patToken);

                        continue;
                        }
                    }
                
                }
#endif

			cxaTokenStream::const_iterator	temp;

			// Is a rule available?
            if(patTree)
                {
                patTree->vBeginTrans();
                if(i==(m_nSizePattern-1) && fIsRightBound(nGetAtmType()))
                    {
	                patTree->vLogToken(NULL);
                    fEndRuleLogged=true;
                    patTree->vEndRule();
                    }
                }

            if(!paHost->fCheckRule(nAtmTypePat,patsContext,
                                   nAtmType,patToken,
								   pascCondition,
                                   it,&temp,patTree))
                {
                if(patTree)
                    patTree->vRollbackTrans();

				if(pascCondition)
					pascCondition->vSetBrkCause(cxaStatusCookie::brkcause_subrulefailed,patToken);
				break;
                }
			else
				{
                if(patTree)
                    patTree->vCommitTrans();

				// A rule was available
                if(i==(m_nSizePattern-1))
                    {
                    // This was the last element in the pattern,
                    // and the element was a rule. Since the rule
                    // has already been logged by 'fCheckRule',
                    // in this case the end of _this_ rule
                    // must point to the start of the recently
                    // recognized rule.
                    log_it  =it;
                    it      =temp;
                    }
                else
                    {
                    it		=temp;
                    }
                }
            }
        
        if(i==m_nSizePattern)
            {
            if(patTree)
                {
                if(!fEndRuleLogged)
                    patTree->vEndRule();

                patTree->vCommitTrans();
                }

            (*pend)	=it;
            return true;
            }
        
        if(patTree)
            {
            if(!fEndRuleLogged)
                patTree->vEndRule();

            patTree->vRollbackTrans();
            }

        return false;
        }
    
    
//////////////////////////////////////////////////////////////////////
// Operations
//////////////////////////////////////////////////////////////////////

    bool cxAnalyzerExpression::fCheckValid() const
        {
        if(NULL == this)
            return false;
        
        if(m_nAtmType==ATM_ID_INVALID)
            return false;
        
        if(m_nPrecPrio<0)
            return false;
        
        if(m_fInitializeComplete==false)
            return false;
        
        return true;
        }
    
    bool cxAnalyzerExpression::fInitFromRule(std::tstring strRule)
        {
        // May not be done already
        ASSERT(m_nAtmType==ATM_ID_INVALID);
        ASSERT(m_fInitializeComplete==false);
        
        // This function should use a parser (hehe)
        int				nPosEqual,nSep,nPos0;
        std::tstring	strName;
        std::tstring	strTemp;
        
		nPos0			=0;
        nPosEqual		=strRule.find(_T('='));
		nSep			=strRule.find(_T(':'));
		if(nSep<nPosEqual)
			{
			m_nIDValue		=_ttoi(strRule.substr(0,nSep).data());
			nPos0			=nSep+1;
			}
		else
			m_nIDValue		=0;

        nSep			=strRule.find(_T(':'),nPosEqual+1);
        
        if(nPosEqual==-1 || nSep==-1)
            throw cxAnalyzerException(ANERR_RULEDEF_WRONG);
        
        strName			=strRule.substr(nPos0,nPosEqual-nPos0);
        strTemp			=strRule.substr(nPosEqual+1,nSep-nPosEqual-1);
        
        // Remove surrounding {}
        if(!ctkMisc::fStripOffBraces(strName))
            throw cxAnalyzerException(ANERR_RULEDEF_EXPR_WRONG);
        
        m_nPrecPrio		=_ttoi(strTemp.data());
        m_nAtmType		=m_patmTypeMap->nGetIDFor(strName);
		m_patmTypeMap->vSetCustIDToAtmMapping(m_nIDValue,m_nAtmType);
        m_patmTypeMap->patiSetTypeInfo(m_nAtmType,ATM_MTYPE_RULE,ATM_SUBT_RULE_UNDEF);
        
        // Temporary store the initialization string
        m_pstrInitTemp	=new std::tstring(strRule.substr(nSep+1));
        
        return true;
        }
    
    bool cxAnalyzerExpression::fFinishInit()
        {
        // May not be done already, and fInitFromRule must have been called
        // already.
        ASSERT(m_fInitializeComplete==false);
        ASSERT(m_nAtmType!=ATM_ID_INVALID);
        ASSERT(m_pstrInitTemp!=NULL);
        ASSERT(m_apatiPattern==NULL);
        ASSERT(m_nSizePattern==0);
        
        // Since we use no parser :), replace all single '{' and '}' with
        // '{{' and '}}', to distinguish them from escaped ones ('\{', '\}')
        std::tstring	strRule = ctkMisc::strEscapeBraces(*m_pstrInitTemp);
        
        // Delete temporary string
        //	delete m_pstrInitTemp;
        //	m_pstrInitTemp	=NULL;
        
        // Go on with parsing the expression
        // (The expression is now in the form {{...}}[{{...}}[...]] )
        int				nCurPos;
        int				nLen = strRule.length();
        if(nLen==0)
            throw cxAnalyzerException(ANERR_RULEDEF_EXPR_EMPTY);
        
        std::vector<int>	vecTemp;
        for(nCurPos=0;nCurPos<nLen;)
            {
            if(strRule.substr(nCurPos,2)!=_T("{{"))
                throw cxAnalyzerException(ANERR_RULEDEF_EXPR_WRONG);
            
            int				nEndBrace = strRule.find(_T("}}"),nCurPos);
            int				nAtmType = ATM_ID_INVALID;
            std::tstring	strPart;
            
            if(nEndBrace==-1)
                throw cxAnalyzerException(ANERR_RULEDEF_EXPR_WRONG);
            
            strPart			=ctkMisc::strUnescapeBraces(strRule.substr(nCurPos+2,nEndBrace-nCurPos-2));
			strPart			=ctkMisc::strParseEscapeCharacters(strPart);
            nAtmType		=m_patmTypeMap->nGetIDFor(strPart,false);
            if(nAtmType==ATM_ID_INVALID)
                throw cxAnalyzerException(ANERR_RULEDEF_PART_UNDEFINED);
            
            vecTemp.push_back(nAtmType);
            
            nCurPos			=nEndBrace+2;
            }
        
        // Now store the contents of the 'vecTemp' array in a dynamically
        // allocated 'int[]' (cause the length of the array is not subject
        // to change)
        int				i;
        
        m_nSizePattern	=vecTemp.size();
        m_apatiPattern	=new cxAnalyzerTypeInfo*[m_nSizePattern];
        for(i=0;i<m_nSizePattern;i++)
            m_apatiPattern[i]=m_patmTypeMap->patiGetTypeInfo(vecTemp[i]);
        
        // Almost done ...
        m_fInitializeComplete=true;

		// the only thing left is to update the nSubType value of
		// the cxAnalyzerTypeInfo belonging to our nAtmID if
		// necessary
		cxAnalyzerTypeInfo	*patiCur = 
			m_patmTypeMap->patiGetTypeInfo(m_nAtmType);

		if(fIsLeftBound(m_nAtmType) || fIsRightBound(m_nAtmType))
			{
			if(patiCur->nGetSubType()!=ATM_SUBT_RULE_OPEN)
				patiCur->vSetSubType(ATM_SUBT_RULE_OPEN);
			}
		else
			{
			if(patiCur->nGetSubType()==ATM_SUBT_RULE_UNDEF)
				patiCur->vSetSubType(ATM_SUBT_RULE_FINITE);
			}
        
        return true;
        }
    
    bool	cxAnalyzerExpression::fIsImplicitExpressionToAtm(int *pnAtmType) const
        {
        ASSERT(m_fInitializeComplete);
        ASSERT(pnAtmType!=NULL);
        
        if(m_nSizePattern!=1)
            return false;
        
        if(	(m_apatiPattern[0]->nGetMType()==ATM_MTYPE_TOKEN) ||
            (m_apatiPattern[0]->nGetMType()==ATM_MTYPE_COMP_TOKEN))
            {
            (*pnAtmType)=m_apatiPattern[0]->nGetAtmType();
            return true;
            }
        
        return false;
        }

	bool	cxAnalyzerExpression::fIsLeftBound(int nAtmType) const
		{
		ASSERT(m_nSizePattern>0);

		if(m_apatiPattern[0]->nGetMType()!=ATM_MTYPE_RULE)
			return false;

		if(m_apatiPattern[0]->nGetAtmType()!=nAtmType)
			return false;

		return true;
		}

	bool	cxAnalyzerExpression::fIsRightBound(int nAtmType) const
		{
		ASSERT(m_nSizePattern>0);

		if(m_apatiPattern[m_nSizePattern-1]->nGetMType()!=ATM_MTYPE_RULE)
			return false;

		if(m_apatiPattern[m_nSizePattern-1]->nGetAtmType()!=nAtmType)
			return false;

		return true;
		}
    
    bool	cxAnalyzerExpression::fCheck( 
				const cxAnalyzerMain *paHost,
				const cxaTokenStream *patsContext,
				int nAtmTypeFirstIs,
				const cxaToken* patFirstToken,
				cxaStatusCookie* pascCondition,
				cxaTokenStream::const_iterator start,
				cxaTokenStream::const_iterator *pend,
				cxAnalyzerTree* patTree) const
        {
        if(fCheckComp(paHost,patsContext,nAtmTypeFirstIs,patFirstToken,pascCondition,start,pend,patTree))
            return true;
        
        return false;
		}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

Alexander Berthold
Web Developer
Germany Germany
No Biography provided

| Advertise | Privacy | Mobile
Web04 | 2.8.141022.2 | Last Updated 2 Jan 2002
Article Copyright 2001 by Alexander Berthold
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid