Tokenizer and analyzer package supporting precedence prioritized rules

Alexander Berthold

Rate me:

5.00/5 (4 votes)

1 Jan 20023 min read

181.7K

2.8K

A library allowing you to conveniently build a custom tokenizer and analyzer supporting precedence priorized rules

pkgcomplete.zip
- COMMON.CPP
- COMMON.H
- common_error.h
- common_win32.h
- COPYING
- cpAbsd.dsw
- cpp-grammar.txt
- cxAnalyzer
  - ANALYZER.H
  - cxAnalyzer.dsp
  - cxAnalyzerException.h
  - cxAnalyzerExpression.cpp
  - cxAnalyzerExpression.h
  - cxAnalyzerMain.cpp
  - cxAnalyzerMain.h
  - cxAnalyzerTree.cpp
  - cxAnalyzerTree.h
  - cxAnalyzerTypeMap.cpp
  - cxAnalyzerTypeMap.h
  - cxaParseTree.cpp
  - cxaParseTree.h
  - cxaRuleCache.cpp
  - cxaRuleCache.h
  - cxaRuleCacheElement.cpp
  - cxaRuleCacheElement.h
  - cxaToken.cpp
  - cxaToken.h
  - cxaToken.inl
  - cxaTokenStream.cpp
  - Readme.txt
  - StdAfx.cpp
  - StdAfx.h
- cxTokenizer
- cxtPackage
  - cxtPackage.cpp
  - cxtPackage.dsp
  - cxtPackage.h
  - PACKAGE.H
  - Readme.txt
  - StdAfx.cpp
  - StdAfx.h
  - todo.txt
- DEBUG.CPP
- DEBUG.H
- emptyTestApp
  - emptyTestApp.clw
  - emptyTestApp.cpp
  - emptyTestApp.dsp
  - emptyTestApp.h
  - emptyTestApp.rc
  - emptyTestAppDlg.cpp
  - emptyTestAppDlg.h
  - ReadMe.txt
  - res
    - emptyTestApp.ico
    - emptyTestApp.rc2
  - Resource.h
  - StdAfx.cpp
  - StdAfx.h
- grammarIDE
  - DlgEvaluate.cpp
  - DlgEvaluate.h
  - DlgProperties.cpp
  - DlgProperties.h
  - grammarIDE.clw
  - grammarIDE.cpp
  - grammarIDE.dsp
  - grammarIDE.h
  - grammarIDE.rc
  - grammarIDEDoc.cpp
  - grammarIDEDoc.h
  - grammarIDEView.cpp
  - grammarIDEView.h
  - LeftView.cpp
  - LeftView.h
  - ltrItemTreeWnd.cpp
  - ltrItemTreeWnd.h
  - MainFrm.cpp
  - MainFrm.h
  - PaneEditorWnd.cpp
  - PaneEditorWnd.h
  - propsItemGrammar.cpp
  - propsItemGrammar.h
  - ReadMe.txt
  - res
    - grammarIDE.ico
    - grammarIDE.rc2
    - grammarIDEDoc.ico
    - icon1.ico
    - Toolbar.bmp
    - vssver.scc
    - zoomable.ico
  - Resource.h
  - StdAfx.cpp
  - StdAfx.h
- readme.txt
- sample-grammar.txt
- simpleCalc
  - ReadMe.txt
  - simpleCalc.bmp
  - simpleCalc.cpp
  - simpleCalc.dsp
  - StdAfx.cpp
  - StdAfx.h
- tkCommon
  - ctkCheckValid.h
  - ctkEnumerator.h
  - ctkExternalObjectPointer.h
  - ctkFlagsMixin.h
  - ctkHLinkedList.cpp
  - ctkHLinkedList.h
  - ctkMisc.h
  - ctkSerializable.h
  - tkCommon.h
  - vssver.scc
- vcstl_nowarnings.h
pkgsrconly.zip
- COMMON.CPP
- COMMON.H
- common_error.h
- common_win32.h
- COPYING
- cpAbsd.dsw
- cpp-grammar.txt
- ANALYZER.H
- cxAnalyzer.dsp
- cxAnalyzerException.h
- cxAnalyzerExpression.cpp
- cxAnalyzerExpression.h
- cxAnalyzerMain.cpp
- cxAnalyzerMain.h
- cxAnalyzerTree.cpp
- cxAnalyzerTree.h
- cxAnalyzerTypeMap.cpp
- cxAnalyzerTypeMap.h
- cxaParseTree.cpp
- cxaParseTree.h
- cxaRuleCache.cpp
- cxaRuleCache.h
- cxaRuleCacheElement.cpp
- cxaRuleCacheElement.h
- cxaToken.cpp
- cxaToken.h
- cxaToken.inl
- cxaTokenStream.cpp
- Readme.txt
- StdAfx.cpp
- StdAfx.h
- cxTokenizer.cpp
- cxTokenizer.dsp
- cxTokenizer.h
- cxTokenizerCharTokenRule.cpp
- cxTokenizerCharTokenRule.h
- cxTokenizerCommentTokenRule.cpp
- cxTokenizerCommentTokenRule.h
- cxTokenizerContext.cpp
- cxTokenizerContext.h
- cxTokenizerContextCookie.h
- cxTokenizerContextDiags.cpp
- cxTokenizerDiags.cpp
- cxTokenizerException.cpp
- cxTokenizerException.h
- cxTokenizerInputStream.h
- cxTokenizerMap.cpp
- cxTokenizerMap.h
- cxTokenizerMapData.cpp
- cxTokenizerMapData.h
- cxTokenizerMapDataDiags.cpp
- cxTokenizerMapDiags.cpp
- cxTokenizerMatchTokenRule.h
- cxTokenizerMatchTokenRule.inl
- cxTokenizerNumberTokenRule.cpp
- cxTokenizerNumberTokenRule.h
- cxTokenizerSTLInputStream.cpp
- cxTokenizerSTLInputStream.h
- cxTokenizerStringTokenRule.cpp
- cxTokenizerStringTokenRule.h
- cxTokenizerTextInputStream.cpp
- cxTokenizerTextInputStream.h
- cxTokenizerTokenRule.cpp
- cxTokenizerTokenRule.h
- Readme.txt
- StdAfx.cpp
- StdAfx.h
- tokenizer.h
- tokenizer_error.h
- tokenizer_flags.h
- tokenizerBase.h
- cxtPackage.cpp
- cxtPackage.dsp
- cxtPackage.h
- PACKAGE.H
- Readme.txt
- StdAfx.cpp
- StdAfx.h
- todo.txt
- DEBUG.CPP
- DEBUG.H
- emptyTestApp.clw
- emptyTestApp.cpp
- emptyTestApp.dsp
- emptyTestApp.h
- emptyTestApp.rc
- emptyTestAppDlg.cpp
- emptyTestAppDlg.h
- ReadMe.txt
- emptyTestApp.ico
- emptyTestApp.rc2
- Resource.h
- StdAfx.cpp
- StdAfx.h
- readme.txt
- sample-grammar.txt
- ctkCheckValid.h
- ctkEnumerator.h
- ctkExternalObjectPointer.h
- ctkFlagsMixin.h
- ctkHLinkedList.cpp
- ctkHLinkedList.h
- ctkMisc.h
- ctkSerializable.h
- tkCommon.h
- vssver.scc
- vcstl_nowarnings.h
grammaride.zip
- cpp-grammar.txt
- grammarIDE.exe
- readme.txt
- sample-grammar.txt
- stlport_vc645.dll
cxtpackagetut_win32vc.zip
- common.cpp
- common.h
- common_error.h
- common_win32.h
- COPYING
- cpAbsd.dsw
- analyzer.h
- cxAnalyzer.dsp
- cxAnalyzer.plg
- cxAnalyzerException.h
- cxAnalyzerExpression.cpp
- cxAnalyzerExpression.h
- cxAnalyzerMain.cpp
- cxAnalyzerMain.h
- cxAnalyzerTree.cpp
- cxAnalyzerTree.h
- cxAnalyzerTypeMap.cpp
- cxAnalyzerTypeMap.h
- cxaParseTree.cpp
- cxaParseTree.h
- cxaToken.cpp
- cxaToken.h
- cxaTokenStream.cpp
- Readme.txt
- StdAfx.cpp
- StdAfx.h
- cxTokenizer.cpp
- cxTokenizer.dsp
- cxTokenizer.h
- cxTokenizer.plg
- cxTokenizerCharTokenRule.cpp
- cxTokenizerCharTokenRule.h
- cxTokenizerContext.cpp
- cxTokenizerContext.h
- cxTokenizerContextCookie.h
- cxTokenizerContextDiags.cpp
- cxTokenizerDiags.cpp
- cxTokenizerException.cpp
- cxTokenizerException.h
- cxTokenizerInputStream.h
- cxTokenizerMap.cpp
- cxTokenizerMap.h
- cxTokenizerMapData.cpp
- cxTokenizerMapData.h
- cxTokenizerMapDataDiags.cpp
- cxTokenizerMapDiags.cpp
- cxTokenizerNumberTokenRule.cpp
- cxTokenizerNumberTokenRule.h
- cxTokenizerSTLInputStream.cpp
- cxTokenizerSTLInputStream.h
- cxTokenizerStringTokenRule.cpp
- cxTokenizerStringTokenRule.h
- cxTokenizerTextInputStream.cpp
- cxTokenizerTextInputStream.h
- cxTokenizerTokenRule.cpp
- cxTokenizerTokenRule.h
- Readme.txt
- StdAfx.cpp
- StdAfx.h
- tokenizer.h
- tokenizer_error.h
- tokenizer_flags.h
- tokenizerBase.h
- cxtPackage.cpp
- cxtPackage.dsp
- cxtPackage.h
- cxtPackage.plg
- package.h
- Readme.txt
- StdAfx.cpp
- StdAfx.h
- mathTok
  - mathTok.cpp
  - mathTok.dsp
  - mathTok.plg
  - ReadMe.txt
  - StdAfx.cpp
  - StdAfx.h
  - ReadMe.txt
  - simpleCalc.bmp
  - simpleCalc.cpp
  - simpleCalc.dsp
  - simpleCalc.plg
  - StdAfx.cpp
  - StdAfx.h
  - ctkCheckValid.h
  - ctkExternalObjectPointer.h
  - ctkFlagsMixin.h
  - ctkHLinkedList.cpp
  - ctkHLinkedList.h
  - ctkMisc.h
  - ctkSerializable.h
  - tkCommon.h
cxtpackage_win32vc.zip
- common.cpp
- common.h
- common_error.h
- common_win32.h
- COPYING
- cpAbsd.dsw
- analyzer.h
- cxAnalyzer.dsp
- cxAnalyzer.plg
- cxAnalyzerException.h
- cxAnalyzerExpression.cpp
- cxAnalyzerExpression.h
- cxAnalyzerMain.cpp
- cxAnalyzerMain.h
- cxAnalyzerTree.cpp
- cxAnalyzerTree.h
- cxAnalyzerTypeMap.cpp
- cxAnalyzerTypeMap.h
- cxaParseTree.cpp
- cxaParseTree.h
- cxaToken.cpp
- cxaToken.h
- cxaTokenStream.cpp
- Debug
  - Readme.txt
  - StdAfx.cpp
  - StdAfx.h
  - cxTokenizer.cpp
  - cxTokenizer.dsp
  - cxTokenizer.h
  - cxTokenizer.plg
  - cxTokenizerCharTokenRule.cpp
  - cxTokenizerCharTokenRule.h
  - cxTokenizerContext.cpp
  - cxTokenizerContext.h
  - cxTokenizerContextCookie.h
  - cxTokenizerContextDiags.cpp
  - cxTokenizerDiags.cpp
  - cxTokenizerException.cpp
  - cxTokenizerException.h
  - cxTokenizerInputStream.h
  - cxTokenizerMap.cpp
  - cxTokenizerMap.h
  - cxTokenizerMapData.cpp
  - cxTokenizerMapData.h
  - cxTokenizerMapDataDiags.cpp
  - cxTokenizerMapDiags.cpp
  - cxTokenizerNumberTokenRule.cpp
  - cxTokenizerNumberTokenRule.h
  - cxTokenizerSTLInputStream.cpp
  - cxTokenizerSTLInputStream.h
  - cxTokenizerStringTokenRule.cpp
  - cxTokenizerStringTokenRule.h
  - cxTokenizerTextInputStream.cpp
  - cxTokenizerTextInputStream.h
  - cxTokenizerTokenRule.cpp
  - cxTokenizerTokenRule.h
  - Readme.txt
  - StdAfx.cpp
  - StdAfx.h
  - tokenizer.h
  - tokenizer_error.h
  - tokenizer_flags.h
  - tokenizerBase.h
  - cxtPackage.cpp
  - cxtPackage.dsp
  - cxtPackage.h
  - cxtPackage.plg
  - package.h
  - Readme.txt
  - StdAfx.cpp
  - StdAfx.h
  - ctkCheckValid.h
  - ctkExternalObjectPointer.h
  - ctkFlagsMixin.h
  - ctkHLinkedList.cpp
  - ctkHLinkedList.h
  - ctkMisc.h
  - ctkSerializable.h
  - tkCommon.h

/*********************************************************************
	Copyright (C) 2001 by

		Alexander Berthold, alexander-berthold@web.de.
		Hoegestr. 54
		79108 Freiburg i. Breisgau
		Germany

    -- This file is part of cxAnalyzer --

    "cxAnalyzer" is free software; you can redistribute it and/or 
	modify it under the terms of the GNU Lesser General Public 
	License as published by the Free Software Foundation; either 
	version 2 of the License, or any later version.

    "cxAnalyzer" is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
	License along with "cxAnalyzer"; if not, write to the Free 
	Software  Foundation, Inc., 59 Temple Place, Suite 330, 
	Boston, MA  02111-1307  USA

    ---------------------------------------------------------------
      If you find any bugs or if you make other corrections/
	  enhancements, i'd appreciate if you'd let me know about 
	  that. My email is
  
       alexander-berthold@web.de
  
      If you share this code, do not remove this text.
    ---------------------------------------------------------------

Class:      cxaToken / cxaTokenStream / cxaParseElement /
			cxaParseNode / cxaParseBranch
Author:     Alexander Berthold
Copyright:  Alexander Berthold
Date:       2001/06/12
Version:	0.1.07
Purpose:    - cxaToken:
				Contains data for a token
			- cxaTokenStream:
				Implements cxTokenizerListener. Accepts the output token stream
				from the tokenizer and builds a list from the tokens.
			- cxaParseElement / cxaParseNode / cxaParseBranch
				Derived from the appropriate ctkHLinkedListXXX classes.
				Build the parse tree.
			

Version history:

	-	2001/05/19
		Released the version 0.1.05

	-	2001/06/12
		Current source labeled version 0.1.07

ToDo:
	-	Change cxaTokenStream to use std::deque instead of std::list

	-	Improve vRegisterToken performance (see ToDo in code)

	-	Improve documentation :)

*********************************************************************/

// cxaToken.h: interface for the cxaToken class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_CXATOKEN_H__0CD5BF48_9045_4916_B780_68258BC9D524__INCLUDED_)
#define AFX_CXATOKEN_H__0CD5BF48_9045_4916_B780_68258BC9D524__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

class 		cxaParseBranch;
class 		cxaParseNode;
class		cxAnalyzerTypeMap;
class		sit_data_t;

class cxaToken
{
// Construction/Destruction
public:
	cxaToken(int _nToid, int _nAtmType, int _nIDValue, LPCTSTR _lpszTokenText)
		{
		vSetTo(_nToid,_nAtmType,_nIDValue,_lpszTokenText);
		}
	cxaToken(const cxaToken& other)
		{
		vSetTo(other.nToid,other.nAtmType,other.nIDValue,other.lpszTokenText);
		}
	cxaToken()
		{
		nToid		=0;
		nAtmType	=0;
        nIDValue    =0;
		lpszTokenText=NULL;
		}
	~cxaToken()
		{
		if(lpszTokenText)
			{
			delete[] lpszTokenText;
			lpszTokenText=NULL;
			}
		}

// Attributes
public:
	// Token order id
	int				nToid;
	// Token identifier (analyzer type map)
	int				nAtmType;
    // Token custom identifier
    int             nIDValue;
	// Token text (if needed)
	TCHAR*			lpszTokenText;

	int				nGetAtmType() const
					{ return nAtmType; };
	int				nGetTokenOrderID() const
					{ return nToid; };

public:
	void			vSetTo(int _nToid, int _nAtmType, int _nIDValue, LPCTSTR _lpszTokenText)
		{
		nToid		=_nToid;
		nAtmType	=_nAtmType;
        nIDValue    =_nIDValue;

		if(_lpszTokenText!=NULL)
			{
			lpszTokenText=new TCHAR[_tcslen(_lpszTokenText)+1];
			_tcscpy(lpszTokenText,_lpszTokenText);
			}
		else
			lpszTokenText=NULL;
		}

	cxaToken&		operator=(const cxaToken& other)
		{
		nToid		=other.nToid;
		nAtmType	=other.nAtmType;
        nIDValue    =other.nIDValue;
		
		if(other.lpszTokenText)
			{
			lpszTokenText=new TCHAR[_tcslen(other.lpszTokenText)+1];
			_tcscpy(lpszTokenText,other.lpszTokenText);
			}
		else
			lpszTokenText=NULL;

		return (*this);
		}
};

class cxaTokenStream :	public cxTokenizerListener,
						public std::list<cxaToken>
{
// Construction/Destruction
public:
	cxaTokenStream(const cxAnalyzerTypeMap* patmMap);
	virtual ~cxaTokenStream();

// Attributes
public:
	const cxAnalyzerTypeMap		*m_patmMap;

// cooLexxerListener Operations
public:
	virtual	bool				fCheckValid() const	{ return true; };
	virtual	void				vRegisterToken(	const std::tstring& strTokenText, const cxTokenizerTokenRule* pttrRule,
												const cxTokenizerInputStream *ptisStream);
    virtual	bool	            fShouldDelete() const { return false; };

// Operations
public:

	// Debugging
	void						vDump() const;
};

class	sit_data_t;

class 	cxaParseElement :	public ctkHLinkedListElement<cxaParseElement>
{
// Construction/Destruction
public:
	cxaParseElement(bool fIsNode) : ctkHLinkedListElement<cxaParseElement>(fIsNode)
		{};

// Operations
public:
	cxaParseBranch*	papbElem()
		{	ASSERT(fIsBranch()); return reinterpret_cast<cxaParseBranch*>(this); };
	const cxaParseBranch* papbElem() const
		{	ASSERT(fIsBranch()); return reinterpret_cast<const cxaParseBranch*>(this); };
	cxaParseNode*	papnElem()
		{	ASSERT(fIsNode()); return reinterpret_cast<cxaParseNode*>(this); };
	const cxaParseNode* papnElem() const
		{	ASSERT(fIsNode()); return reinterpret_cast<const cxaParseNode*>(this); };
};

class	cxaParseBranch :	public cxaParseElement,
							public ctkHLinkedListBranch<cxaParseBranch,cxaParseElement>
{
// Construction/Destruction
public:
	cxaParseBranch(int _nAtmType, int _nIDValue, int _nPrecPrio,
					bool _fLeftBound, bool _fRightBound)
					: cxaParseElement(false)
		{
		nAtmType		=_nAtmType;
		nIDValue		=_nIDValue;
		nPrecPrio		=_nPrecPrio;
		fLeftBound		=_fLeftBound;
		fRightBound		=_fRightBound;
		};

	~cxaParseBranch()
		{
		cxaParseElement	*papeCur = papeGetFrontElement();
		cxaParseElement	*papeTemp;
		while(papeCur!=NULL)
			{
			papeTemp	=papeCur->tGetNext();
			delete papeCur;
			papeCur		=papeTemp;
			}
		}

// Attributes
protected:
	int					nAtmType, nIDValue;
	int					nPrecPrio;
	bool				fLeftBound;
	bool				fRightBound;

// Operations
public:
	int					nGetAtmType() const		{ return nAtmType; };
	int					nGetIDValue() const		{ return nIDValue; };
	int					nGetPrecPrio() const	{ return nPrecPrio; };
	bool				fIsLeftBound() const	{ return fLeftBound; };
	bool				fIsRightBound() const	{ return fRightBound; };

	bool				fIsFrontNULLToken() const;
	bool				fIsBackNULLToken() const;
	cxaParseNode		*papnGetFrontNode() const;
	cxaParseNode		*papnGetBackNode() const;
	cxaParseElement		*papeGetFrontElement() const;
	cxaParseElement		*papeGetBackElement() const;
	cxaParseBranch		*papbGetParent() const;

	void				vDump(int nSpaces = 0) const;

// Simple token iteration operations
public:
	void				vEnumBegin(sit_data_t* psData) const;
	bool				fHelpNext(sit_data_t* psData, bool &fBreak) const;
	bool				fEnumNext(sit_data_t* psData) const;
	const cxaParseElement *papeEnumGetAt(sit_data_t* psData) const;
	void				vEnumEnd(sit_data_t* psData) const;
};

class	cxaParseNode :		public cxaParseElement,
							public ctkHLinkedListNode<cxaParseNode,cxaParseElement>
{
// Construction/Destruction
public:
	cxaParseNode(const cxaToken* _patToken)
		: cxaParseElement(true)
		{
		patToken	=_patToken;
		};

// Attributes
protected:
	const cxaToken*	patToken;
	void				vSetToken(const cxaToken* _patToken)
		{ patToken = _patToken; };

// Operations
public:
	const cxaToken		*patGetToken() const	{ return patToken; };
};

class	sit_data_t
{
public:
	const cxaParseBranch			*papbBranch;
	cxaParseBranch::const_iterator	pos0;
};

#endif // !defined(AFX_CXATOKEN_H__0CD5BF48_9045_4916_B780_68258BC9D524__INCLUDED_)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Written By

Alexander Berthold

Web Developer

Germany

This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Tokenizer and analyzer package supporting precedence prioritized rules

License

Comments and Discussions