Click here to Skip to main content
15,880,608 members
Articles / Programming Languages / C++

Tokenizer and analyzer package supporting precedence prioritized rules

Rate me:
Please Sign up or sign in to vote.
5.00/5 (4 votes)
1 Jan 20023 min read 180.7K   2.8K   54  
A library allowing you to conveniently build a custom tokenizer and analyzer supporting precedence priorized rules
/*********************************************************************
	Copyright (C) 2001/2 by

		Alexander Berthold, alexander-berthold@web.de.
		Hoegestr. 54
		79108 Freiburg i. Breisgau
		Germany

    -- This file is part of cxtPackage --

    "cxtPackage" is free software; you can redistribute it and/or 
	modify it under the terms of the GNU Lesser General Public 
	License as published by the Free Software Foundation; either 
	version 2 of the License, or any later version.

    "cxtPackage" is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
	License along with "cxtPackage"; if not, write to the Free 
	Software  Foundation, Inc., 59 Temple Place, Suite 330, 
	Boston, MA  02111-1307  USA

    ---------------------------------------------------------------
      If you find any bugs or if you make other corrections/
	  enhancements, i'd appreciate if you'd let me know about 
	  that. My email is
  
       alexander-berthold@web.de
  
      If you share this code, do not remove this text.
    ---------------------------------------------------------------

Class:      cxtPackage
Author:     Alexander Berthold
Copyright:  Alexander Berthold
Date:       2001/12/19
Version:	0.2.01
Purpose:    Serves as interface to cxTokenizer / cxAnalyzer.
			Provides methods to conveniently create a custom
			tokenizer and analyzer.


Version history:

	-	2001/06/04
		Released first version.

	-	2001/06/12
		Added support for cxaStatusCookie in papbCheckForRule.
		Current source labeled version 0.1.02
		
	-	2001/12/19
		Made since some corrections due to changes in the underlying
		libraries. Labeled version 0.2.01
		
*********************************************************************/

// cxtPackage.h: interface for the cxtPackage class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_CXTPACKAGE_H__073D39C0_EDE2_461E_9CA8_BE5E52C98860__INCLUDED_)
#define AFX_CXTPACKAGE_H__073D39C0_EDE2_461E_9CA8_BE5E52C98860__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#define	CXTCFR_VALID		((cxaParseBranch*)0)
#define	CXTCFR_INVALID		((cxaParseBranch*)1)

	class	cxtpDelimeterClassBase
		{
		public:
			virtual bool fIsDelimeter(int nIDValue, const std::tstring& strTokenText) = 0;
			virtual bool fIsInError() const = 0;
		};

	class	cxtpMatchingDelimeterClass : public cxtpDelimeterClassBase
		{
		protected:
			bool			fInitialTokenFound;
			int				nIDValueOpen;
			int				nIDValueClose;
			int				nOpenCounter;

		public:
			cxtpMatchingDelimeterClass(int _nIDValueOpen, int _nIDValueClose, int _nOpenCounter)
				:	nIDValueOpen(_nIDValueOpen),
					nIDValueClose(_nIDValueClose),
					nOpenCounter(_nOpenCounter),
					fInitialTokenFound(_nOpenCounter!=0)
				{};

			virtual bool fIsDelimeter(int nIDValue, const std::tstring& strTokenText)
				{	if(nIDValue==nIDValueOpen) nOpenCounter++;
					if(nIDValue==nIDValueClose)
						if( (--nOpenCounter)==0 )
							return true;
					return false;
				}
			virtual bool fIsInError() const
				{	return (nOpenCounter!=0); };
		};

class cxtPackage : public cxTokenizerListener
{
// Construction/Destruction
public:
	cxtPackage(std::tstringstream& init);
	virtual ~cxtPackage();

// Attributes
protected:
	// token - map
	cxTokenizerMap			m_xtmTokMap;
	// the tokenizer object
	cxTokenizer				*m_pxtTokenizer;
	// the grammar analyzer map
	cxAnalyzerTypeMap		*m_patmRuleMap;
	// the grammar analyzer object
	cxAnalyzerMain			*m_pamParser;
	// the token stream
	cxaTokenStream			*m_patsTokens;
	// the callback object for unknown tokens (may be NULL)
	const cxAnalyzerUnknownTokenCallback *m_pacbUnknownToken;

// ReadUntilDelimeter - specific attributes
protected:
	// no. of break-id values
	int						m_nCountBreakIDValues;
	// nReadUntilDelimeter:	array of break-id values
	int						*m_pnBreakIDValues;
	// nReadUntilDelimeter:	custom delimeter recognition class
	cxtpDelimeterClassBase	*m_pxtpDelimClass;
	// a delimeting token has been recognized
	bool					m_fBreakRead;
	// no. of tokens read
	int						m_nTokenCount;

// patCheckForRule - specific attributes
protected:
	// the current starting position within the token stream
	cxaTokenStream::const_iterator	m_itCurrentToken;

// Operations
public:
	// preprocessor specific (requires the rule "cpp-preprocessor")
    bool                    fPreprocessorSetIncludePaths(const std::vector<std::tstring&>& vecPaths) { return false; };
	bool					fPreprocessorGetErrors(std::vector<std::tstring>& dest);

	// general
	void					vReset();

	// accessor
	int						nGetCurrentLineNumber() const
							{	
								if(ptisGetInputStream()==NULL) return 0;
								long lCurPos = ptisGetInputStream()->lGetCurrentPosition();
								return ptisGetInputStream()->lGetLineNumberFromPosition(lCurPos); 
							};
	const cxaTokenStream	*patsGetTokenStream() const
							{ return m_patsTokens;	};
	const cxTokenizerInputStream	*ptisGetInputStream() const
							{ return m_pxtTokenizer->ptiGetInputStream(); };
	const cxAnalyzerMain	*pamGetParser() const
							{ return m_pamParser; };
	const cxTokenizer*		pxtGetTokenizer() const
							{ return m_pxtTokenizer; };
	const cxTokenizerMap*	pxtmGetTokenizerMap() const
							{ return &m_xtmTokMap; };
	const cxAnalyzerTypeMap* patmGetAnalyzerMap() const 
							{ return m_patmRuleMap; };

//	bool					fIsImplicitRule(int nAtmTypeTestFor, int nAtmTypeSource) const;

	void					vSetInputStream(cxTokenizerInputStream* pxtis);
	void					vSetStartFromBeginning();
	void					vSetStartFrom(const cxaTokenStream::const_iterator itStartPos);
	void					vFlush(const cxaTokenStream::const_iterator itEndPos);
	void					vFlush();
	void					vSetDelimeterIDs(const int *pnIDValues, int nCount=0);
	void					vSetUnknownTokenCallback(cxAnalyzerUnknownTokenCallback* pacbUnknownToken)
							{	m_pamParser->vSetUnknownTokenCallback(pacbUnknownToken); };

	// action operations
	int						nReadUntilDelimeter();
	int						nReadUntilDelimeter(cxtpDelimeterClassBase *pxtpDelimClass);

	cxaParseBranch*			papbCheckForRule(int nIDValue, cxaTokenStream::const_iterator *pend, cxaStatusCookie* pascCondition=NULL, bool fTestOnly = false);
	cxaParseBranch*			papbCheckForRuleAtm(int nAtmType, cxaTokenStream::const_iterator *pend, cxaStatusCookie* pascCondition=NULL, bool fTestOnly = false);
	void					vRebalance(cxaParseBranch *papbBranch, int nIDValue) const;
	void					vRebalanceAtm(cxaParseBranch *papbBranch, int nAtmType) const;

// cxTokenizerListener operations
public:
	virtual	void			vRegisterToken(	const std::tstring& strTokenText, const cxTokenizerTokenRule* pltrRule,
											const cxTokenizerInputStream *ptisStream);

// ctkCheckValid operations
public:
#ifdef _DEBUG
	virtual bool			fCheckValid() const;
	static bool				fRunDiagnostics();
#else
	virtual bool			fCheckValid() const { return true; };
#endif

// ctkExternalObjectPointer operations
public:
	virtual	bool			fShouldDelete() const { return false;};
};

#endif // !defined(AFX_CXTPACKAGE_H__073D39C0_EDE2_461E_9CA8_BE5E52C98860__INCLUDED_)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
Germany Germany
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions