Click here to Skip to main content
15,881,852 members
Articles / Programming Languages / C++

Tokenizer and analyzer package supporting precedence prioritized rules

Rate me:
Please Sign up or sign in to vote.
5.00/5 (4 votes)
1 Jan 20023 min read 180.8K   2.8K   54  
A library allowing you to conveniently build a custom tokenizer and analyzer supporting precedence priorized rules
/*********************************************************************
	Copyright (C) 2001/2 by

		Alexander Berthold, alexander-berthold@web.de.
		Hoegestr. 54
		79108 Freiburg i. Breisgau
		Germany

    -- This file is part of cxAnalyzer --

    "cxAnalyzer" is free software; you can redistribute it and/or 
	modify it under the terms of the GNU Lesser General Public 
	License as published by the Free Software Foundation; either 
	version 2 of the License, or any later version.

    "cxAnalyzer" is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
	License along with "cxAnalyzer"; if not, write to the Free 
	Software  Foundation, Inc., 59 Temple Place, Suite 330, 
	Boston, MA  02111-1307  USA

    ---------------------------------------------------------------
      If you find any bugs or if you make other corrections/
	  enhancements, i'd appreciate if you'd let me know about 
	  that. My email is
  
       alexander-berthold@web.de
  
      If you share this code, do not remove this text.
    ---------------------------------------------------------------

Class:      cxAnalyzerTypeMap
Author:     Alexander Berthold
Copyright:  Alexander Berthold
Date:       2001/12/19
Version:	0.2.01
Purpose:    Stores metadata for the analyzer token / rule IDs.
			

Version history:

	-	2001/05/19
		Released the version 0.1.05

	-	2001/06/01
		Added #DEFINE's for the subtypes ATM_SUBT_RULE_FINITE and
		ATM_SUBT_RULE_OPEN (for closed rules / open rules)

	-	2001/06/12
		Current source labeled version 0.1.07#

	-	2001/12/01
		Added support for element enumeration

	-	2001/12/19
		Labeled 0.2.01

*********************************************************************/

// cxAnalyzerTypeMap.h: interface for the cxAnalyzerTypeMap class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_CXANALYZERTYPEMAP_H__0FAC41ED_07D2_4FD8_9514_34B34370D8B8__INCLUDED_)
#define AFX_CXANALYZERTYPEMAP_H__0FAC41ED_07D2_4FD8_9514_34B34370D8B8__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

// Forward references

class cxTokenizerMap;
class cxTokenizerMapData;
class cxTokenizerTokenRule;

// ATM_ID_INVALID - indicates invalid status
#define	ATM_ID_INVALID			0
// ATM_ID_LITERAL - {#literal#}
#define	ATM_ID_LITERAL			-1
// ATM_ID_NIL - {#nil#} - indicates start/end of statement, not supported yet
#define ATM_ID_NIL				-2


// ATM_MTYPE_TOKEN - Item belongs to the group of tokens
#define	ATM_MTYPE_TOKEN			0
// ATM_MTYPE_COMP_TOKEN - Item belongs to the group of computed tokens, for example strings or numbers
#define	ATM_MTYPE_COMP_TOKEN	1
// ATM_MTYPE_RULE - Item is an analyzer expression
#define	ATM_MTYPE_RULE			2

// ATM_SUBT_RULE_UNDEF - nSubType if MType==ATM_MTYPE_RULE, indicates invalid status
#define	ATM_SUBT_RULE_UNDEF		-1
// ATM_SUBT_RULE_RULE_FINITE - nSubType if MType==ATM_MTYPE_RULE, finite rule (i.e. no expression of the Atm-Group is left- or rightbound)
#define	ATM_SUBT_RULE_FINITE	0
// ATM_SUBT_RULE_OPEN - nSubType if MType==ATM_MTYPE_RULE, open rule (i.e. one or more expressions of the Atm-Group are left- or rightbound)
#define	ATM_SUBT_RULE_OPEN		1

// Invalid token ID
#define TOKEN_ID_INVALID		-1
// ID assigned to reordered branches (see cxaParseTree::vRebalance)
#define TOKEN_ID_REORDER		-2

class cxAnalyzerTypeInfo
{
// Construction
public:
	cxAnalyzerTypeInfo(int nAtmType, int nMType, int nSubType)
		{
		m_nAtmType	=nAtmType;
		m_nMType	=nMType;
		m_nSubType	=nSubType;
		}

	~cxAnalyzerTypeInfo()
		{
		}

// Attributes
protected:
	int				m_nAtmType;
	int				m_nMType;
	int				m_nSubType;
	void			*m_pvData;

// Operations
public:
	int				nGetAtmType() const
		{ return m_nAtmType; };
	int				nGetMType() const
		{ return m_nMType; };
	int				nGetSubType() const
		{ return m_nSubType; };
	void*			pvGetData()
		{ return m_pvData; };
	const void*		pvGetData() const
		{ return m_pvData; };
	void*			pvSetData(void* pvNewData)
		{ void *pvTemp = m_pvData; m_pvData = pvNewData; return pvTemp; };
	void			vSetSubType(int nSubType)
		{ m_nSubType = nSubType; };
};

class cxAnalyzerTypeMap 
: public ctkSupportsSortedEnumeration< LPCTSTR >
{
// Construction
public:
	cxAnalyzerTypeMap();
	virtual ~cxAnalyzerTypeMap();

// Attributes
protected:
	int				m_nNextID;

	typedef std::map<std::tstring,int>	
					strmap_type;
	typedef std::map<DWORD,int>			
					dwmap_type;
	typedef std::map<int,cxAnalyzerTypeInfo*>
					atmmap_type;
	typedef std::map<int,int>
					atm_to_id_map_type;
	typedef std::map<int,int>
					id_to_atm_map_type;

	// Mapping from string identifiers to ID's
	strmap_type		m_mapStr;
	// Mapping from DWORD(cooLexxerTokenRule*) to ID's
	dwmap_type		m_mapDW;
	// Mapping from AtmID to cxAnalyzerTypeInfo
	atmmap_type		m_mapAtmInfo;
	// Mapping from CustID to AtmID
	atm_to_id_map_type m_mapCustIDtoAtmID;
	// ... and vice versa
	id_to_atm_map_type m_mapAtmIDtoCustID;

// Protected operations
protected:
	void			vInitFromTokenizerMapData(const cxTokenizerMapData* plmdRules);

// Operations
public:
	virtual bool	fCheckValid() const { return true; };

	void			vInitFromTokenizerMap(const cxTokenizerMap* pmInit);
	void			vSetCustIDToAtmMapping(int nCustID, int nAtmID);
	int				nGetAtmTypeFor(const cxTokenizerTokenRule* pltrRule) const;
	int				nGetAtmTypeFor(const std::tstring& str, bool fCreate = true);
	int				nGetAtmTypeFor(LPCTSTR lpszString, bool fCreate = true);
	int				nGetAtmTypeFor(int nCustID) const;
	int				nGetCustIDFor(int nAtmType) const;
	int				nNormalizeCustID(int nCustID) const;

	cxAnalyzerTypeInfo*
					patiGetTypeInfo(int nAtmID);
	const cxAnalyzerTypeInfo*
					patiGetTypeInfo(int nAtmID) const;
	cxAnalyzerTypeInfo*
					patiSetTypeInfo(int nID, int nMType, int nSubType);

	// For testing purposes only
	std::tstring	strGetStringForID(int nID) const;
	DWORD			dwGetDWORDForID(int nID) const;
	void			vDump() const;

	void			enumerate(ctkSortedEnumerator<LPCTSTR>& enumerator, int level = 0, int nNestLevel = 0, int *pnCounter = NULL) const
		{
		enumerateSorted(enumerator,level,nNestLevel,pnCounter);
		};

	void			enumerateSorted(ctkSortedEnumerator<LPCTSTR>& enumerator, int level = 0, int nNestLevel = 0, int *pnCounter = NULL) const
		{
#ifdef STLPORT
		int		nCounter = 0;
		TCHAR	szTemp[257]; szTemp[256]='\0';
		typedef	std::map<int,const std::tstring,std::less<int> > sortmap_t;
		sortmap_t sort;
		sortmap_t::const_iterator it;
		cxAnalyzerTypeMap::strmap_type::const_iterator it0;
		if(pnCounter==NULL) pnCounter=&nCounter;

		for(it0=m_mapStr.begin();it0!=m_mapStr.end();it0++)
			sort.insert( sortmap_t::value_type(it0->second, it0->first) );

		enumerator.enumBegin();
		for(it=sort.begin();it!=sort.end();it++)
			{
			_sntprintf(szTemp,255,_T("AtmID#:%5d, string='%s'"),
				it->first, it->second.c_str() );

			enumerator.enumElement( (*pnCounter)++, szTemp);
			}
		enumerator.enumEnd();
#endif
		};
};

#endif // !defined(AFX_CXANALYZERTYPEMAP_H__0FAC41ED_07D2_4FD8_9514_34B34370D8B8__INCLUDED_)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
Germany Germany
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions