Click here to Skip to main content
15,884,968 members
Articles / Programming Languages / C++

Tokenizer and analyzer package supporting precedence prioritized rules

Rate me:
Please Sign up or sign in to vote.
5.00/5 (4 votes)
1 Jan 20023 min read 181.4K   2.8K   54  
A library allowing you to conveniently build a custom tokenizer and analyzer supporting precedence priorized rules
/*********************************************************************
	Copyright (C) 2001/2 by

		Alexander Berthold, alexander-berthold@web.de.
		Hoegestr. 54
		79108 Freiburg i. Breisgau
		Germany

    -- This file is part of cxAnalyzer --

    "cxAnalyzer" is free software; you can redistribute it and/or 
	modify it under the terms of the GNU Lesser General Public 
	License as published by the Free Software Foundation; either 
	version 2 of the License, or any later version.

    "cxAnalyzer" is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
	License along with "cxAnalyzer"; if not, write to the Free 
	Software  Foundation, Inc., 59 Temple Place, Suite 330, 
	Boston, MA  02111-1307  USA

    ---------------------------------------------------------------
      If you find any bugs or if you make other corrections/
	  enhancements, i'd appreciate if you'd let me know about 
	  that. My email is
  
       alexander-berthold@web.de
  
      If you share this code, do not remove this text.
    ---------------------------------------------------------------

Class:      cxaToken / cxaTokenStream / cxaParseElement /
			cxaParseNode / cxaParseBranch
Author:     Alexander Berthold
Copyright:  Alexander Berthold
Date:       2001/12/19
Version:	0.2.01
Purpose:    - cxaToken:
				Contains data for a token
			- cxaTokenStream:
				Implements cxTokenizerListener. Accepts the output token stream
				from the tokenizer and builds a list from the tokens.
			- cxaParseElement / cxaParseNode / cxaParseBranch
				Derived from the appropriate ctkHLinkedListXXX classes.
				Contain the parse tree.
			

Version history:

	-	2001/06/12
		Current source labeled version 0.1.07

	-	2001/12/19
		Released the version 0.2.01

ToDo:
	-	Change cxaTokenStream to use std::deque instead of std::list

	-	Improve documentation :)

*********************************************************************/

// cxaToken.h: interface for the cxaToken class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_CXATOKEN_H__0CD5BF48_9045_4916_B780_68258BC9D524__INCLUDED_)
#define AFX_CXATOKEN_H__0CD5BF48_9045_4916_B780_68258BC9D524__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

class 		cxaParseBranch;
class 		cxaParseNode;
class		cxAnalyzerTypeMap;
class		sit_data_t;

class cxaToken
{
// Construction/Destruction
public:
	cxaToken(int _nToid, int _nAtmType, int _nIDValue, LPCTSTR _lpszTokenText, void *_pvData)
		{
		vSetTo(_nToid,_nAtmType,_nIDValue,_lpszTokenText,_pvData);
		}
	cxaToken(const cxaToken& other)
		{
		vSetTo(other.nToid,other.nAtmType,other.nIDValue,other.lpszTokenText,other.pvData);
		}
	cxaToken()
		{
		nToid		=0;
		nAtmType	=0;
        nIDValue    =0;
		lpszTokenText=NULL;
		}
	~cxaToken()
		{
		if(lpszTokenText)
			{
			delete[] lpszTokenText;
			lpszTokenText=NULL;
			}
		}

// Attributes
public:
	// Token order id
	int				nToid;
	// Token identifier (analyzer type map)
	int				nAtmType;
    // Token custom identifier
    int             nIDValue;
	// Token text (if needed)
	TCHAR*			lpszTokenText;
	// Additional data
	void			*pvData;

	int				nGetAtmType() const
					{ return nAtmType; };
	int				nGetTokenOrderID() const
					{ return nToid; };

public:
	void			vSetTo(int _nToid, int _nAtmType, int _nIDValue, LPCTSTR _lpszTokenText, void *_pvData)
		{
		nToid		=_nToid;
		nAtmType	=_nAtmType;
        nIDValue    =_nIDValue;
		pvData		=_pvData;

		if(_lpszTokenText!=NULL)
			{
			lpszTokenText=new TCHAR[_tcslen(_lpszTokenText)+1];
			_tcscpy(lpszTokenText,_lpszTokenText);
			}
		else
			lpszTokenText=NULL;
		}

	cxaToken&		operator=(const cxaToken& other)
		{
		nToid		=other.nToid;
		nAtmType	=other.nAtmType;
        nIDValue    =other.nIDValue;
		pvData		=other.pvData;
		
		if(other.lpszTokenText)
			{
			lpszTokenText=new TCHAR[_tcslen(other.lpszTokenText)+1];
			_tcscpy(lpszTokenText,other.lpszTokenText);
			}
		else
			lpszTokenText=NULL;

		return (*this);
		}
};

class cxaTokenStream :	/*public cxTokenizerListener,*/
						public std::list<cxaToken>
{
// Construction/Destruction
public:
	cxaTokenStream(const cxAnalyzerTypeMap* patmMap);
	virtual ~cxaTokenStream();

// Attributes
public:
	const cxAnalyzerTypeMap		*m_patmMap;

// cooLexxerListener Operations
public:
	virtual	bool				fCheckValid() const	{ return true; };
	virtual	void				vRegisterToken(	const std::tstring& strTokenText, const cxTokenizerTokenRule* pttrRule, int nIDValue, void *pvData,
												const cxTokenizerInputStream *ptisStream);
    virtual	bool	            fShouldDelete() const { return false; };

// Operations
public:

	// Debugging
	void						vDump() const;

	// Content enumeration support
	void						enumerate(ctkSortedEnumerator<LPCTSTR>& enumerator) const
		{
		const_iterator	it;
		int				i = 0;

		enumerator.enumBegin();
		for(it=begin();it!=end();it++)
			{
			TCHAR		szTemp[256];
			_sntprintf(szTemp,255,_T("Token Order ID:%5d, AtmID#:%5d, Text: '%s'"),
				it->nToid, it->nAtmType, it->lpszTokenText!=NULL?it->lpszTokenText:"<N/A>");
			enumerator.enumElement(i++,szTemp);
			}
		enumerator.enumEnd();
		};
};

class	sit_data_t;

class 	cxaParseElement :	public ctkHLinkedListElement<cxaParseElement>
{
// Construction/Destruction
public:
	cxaParseElement(bool fIsNode) : ctkHLinkedListElement<cxaParseElement>(fIsNode)
		{};

// Operations
public:
	virtual bool			fIsNull() const = 0;
	virtual int				nGetAtmType() const = 0;
	virtual int				nGetIDValue() const = 0;
	virtual LPCTSTR			lpszGetElemText() const = 0;
	virtual void*			pvGetData() const = 0;

	cxaParseBranch*	papbElem()
		{	ASSERT(fIsBranch()); return reinterpret_cast<cxaParseBranch*>(this); };
	const cxaParseBranch* papbElem() const
		{	ASSERT(fIsBranch()); return reinterpret_cast<const cxaParseBranch*>(this); };
	cxaParseNode*	papnElem()
		{	ASSERT(fIsNode()); return reinterpret_cast<cxaParseNode*>(this); };
	const cxaParseNode* papnElem() const
		{	ASSERT(fIsNode()); return reinterpret_cast<const cxaParseNode*>(this); };
};

class	cxaParseBranch :	public cxaParseElement,
							public ctkHLinkedListBranch<cxaParseBranch,cxaParseElement>
{
// Construction/Destruction
public:
	cxaParseBranch(int _nAtmType, int _nIDValue, int _nPrecPrio,
					bool _fLeftBound, bool _fRightBound)
					: cxaParseElement(false)
		{
		nAtmType		=_nAtmType;
		nIDValue		=_nIDValue;
		nPrecPrio		=_nPrecPrio;
		fLeftBound		=_fLeftBound;
		fRightBound		=_fRightBound;
		};

	~cxaParseBranch()
		{
		if(!fIsEmpty())
			{
			cxaParseElement	*papeCur = papeGetFrontElement();
			cxaParseElement	*papeTemp;
			while(papeCur!=NULL)
				{
				papeTemp	=papeCur->tGetNext();
				delete papeCur;
				papeCur		=papeTemp;
				}
			}
		}

// Attributes
protected:
	int					nAtmType, nIDValue;
	int					nPrecPrio;
	bool				fLeftBound;
	bool				fRightBound;

// Operations
public:
	virtual bool		fIsNull() const			{ return false; };
	virtual int			nGetAtmType() const		{ return nAtmType; };
	virtual int			nGetIDValue() const		{ return nIDValue; };
	virtual LPCTSTR		lpszGetElemText() const	{ return NULL; };
	virtual void*		pvGetData() const		{ return NULL; };
	int					nGetPrecPrio() const	{ return nPrecPrio; };
	bool				fCompareToIDValue(const cxAnalyzerTypeMap* patmMap, int nIDValue) const;
	bool				fIsLeftBound() const	{ return fLeftBound; };
	bool				fIsRightBound() const	{ return fRightBound; };

	bool				fIsFrontNULLToken() const;
	bool				fIsBackNULLToken() const;
	cxaParseNode		*papnGetFrontNode() const;
	cxaParseNode		*papnGetBackNode() const;
	cxaParseElement		*papeGetFrontElement() const;
	cxaParseElement		*papeGetBackElement() const;
	cxaParseBranch		*papbGetParent() const;

	void				vDump(int nSpaces = 0) const;

// Simple token iteration operations
public:
	void				vEnumBegin(sit_data_t* psData) const;
	bool				fHelpNext(sit_data_t* psData, bool &fBreak) const;
	bool				fEnumNext(sit_data_t* psData) const;
	const cxaParseElement *papeEnumGetAt(sit_data_t* psData) const;
	void				vEnumEnd(sit_data_t* psData) const;

// extended iteration operations
public:
	inline void	enumerate(ctkSortedEnumerator<LPCTSTR>& enumerator, int nNestLevel = 0, int *pnCounter = NULL) const;
	inline void enumerateRaw(ctkSortedEnumerator<LPCTSTR>& enumerator, int nNestLevel = 0, int *pnCounter = NULL) const;

	friend class cxaParseTree;
};

class	cxaParseNode :		public cxaParseElement,
							public ctkHLinkedListNode<cxaParseNode,cxaParseElement>
{
// Construction/Destruction
public:
	cxaParseNode(const cxaToken* _patToken)
		: cxaParseElement(true)
		{
		patToken	=_patToken;
		};

// Attributes
protected:
	const cxaToken*	patToken;
	void				vSetToken(const cxaToken* _patToken)
		{ patToken = _patToken; };

// Operations
public:
	const cxaToken		*patGetToken() const	{ return patToken; };

	virtual bool		fIsNull() const			{ return patToken==NULL; };
	virtual int			nGetAtmType() const		{ return patToken==NULL?0:patToken->nGetAtmType(); };
	virtual int			nGetIDValue() const		{ return patToken==NULL?0:patToken->nIDValue; };
	virtual LPCTSTR		lpszGetElemText() const	{ return (patToken==NULL)?NULL:patToken->lpszTokenText; };
	virtual void*		pvGetData() const		{ return (patToken==NULL)?NULL:patToken->pvData; };
};

/* sit_data_t
	- is a helper class used for enumerating the parse tree with the
	  high-level methods 'vEnumBegin()'/'papeEnumGetAt()'/'fEnumNext()'/
	  'vEnumEnd()'.
 */
class	sit_data_t
{
protected:
	const cxaParseBranch			*papbBranch;
	cxaParseBranch::const_iterator	pos0;

	friend class cxaParseBranch;
};

#include "cxaToken.inl"

#endif // !defined(AFX_CXATOKEN_H__0CD5BF48_9045_4916_B780_68258BC9D524__INCLUDED_)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
Germany Germany
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions