Click here to Skip to main content
15,884,537 members
Articles / Programming Languages / C++

Tokenizer and analyzer package supporting precedence prioritized rules

Rate me:
Please Sign up or sign in to vote.
5.00/5 (4 votes)
1 Jan 20023 min read 181.3K   2.8K   54  
A library allowing you to conveniently build a custom tokenizer and analyzer supporting precedence priorized rules
/*********************************************************************
	Copyright (C) 2001 by

		Alexander Berthold, alexander-berthold@web.de.
		Hoegestr. 54
		79108 Freiburg i. Breisgau
		Germany

    -- This file is part of cxTokenizer --

    "cxTokenizer" is free software; you can redistribute it and/or 
	modify it under the terms of the GNU Lesser General Public 
	License as published by the Free Software Foundation; either 
	version 2 of the License, or any later version.

    "cxTokenizer" is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
	License along with "cxTokenizer"; if not, write to the Free 
	Software  Foundation, Inc., 59 Temple Place, Suite 330, 
	Boston, MA  02111-1307  USA

    ---------------------------------------------------------------
      If you find any bugs or if you make other corrections/
	  enhancements, i'd appreciate if you'd let me know about 
	  that. My email is
  
       alexander-berthold@web.de
  
      If you share this code, do not remove this text.
    ---------------------------------------------------------------

Class:      cxTokenizerContext
Author:     Alexander Berthold
Copyright:  Alexander Berthold
Date:       2001/06/12
Version:	0.1.16
Purpose:    This class manages the context of the lexxer.
            - It maintains a list of currently active lexxer rules
            - It stores the cookies of the active lexxer rules


Version history:
	-	2001/05/16
		Fixed problem with std::list<cxListEntry*> destruction and
		STLport 4.0.

	-	2001/05/19
		Renamed class from 'cpLexxerContext' to 'cxTokenizerContext'.

	-	2001/06/12
		Current source labeled version 0.1.16

*********************************************************************/

// cxTokenizerContext.h: interface for the cxTokenizerContext class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_CXTOKENIZERCONTEXT_H__121468FE_E85D_42BE_8226_FDA6A3D57D52__INCLUDED_)
#define AFX_CXTOKENIZERCONTEXT_H__121468FE_E85D_42BE_8226_FDA6A3D57D52__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

// Forwards
class	cxTokenizerTokenRule;
class	cxTokenizerMapData;

class cxTokenizerContext : 
	public ctkFlagsMixin<xtctx_flags>,
	public ctkCheckValid
{
// Construction/Destruction
public:
	cxTokenizerContext();
	virtual ~cxTokenizerContext();

// Local classes
protected:
	// Data class
	class cxListEntry :
		public ctkCheckValid
	{
	// Construction/Destruction
	public:
		cxListEntry(int _nStartPosition, cxTokenizerMapData *_ptmdRule)
			{
			fNotApplying	=false;
			fMarkedForDeletion=false;
			nStartPosition	=_nStartPosition;
			nEndPosition	=-1;
			ptmdRule		=_ptmdRule;
			ptmdRuleLastValid=NULL;
			}

	// Attributes
	protected:
		bool				fNotApplying;		// Rule not applying anymore
		bool				fMarkedForDeletion;	// Rule is marked for deletion :)
		int					nStartPosition;
		int					nEndPosition;
		cxTokenizerMapData	*ptmdRule;			// Current sub-rule
		cxTokenizerMapData	*ptmdRuleLastValid;	// Last valid rule

	// Operations
	public:
        /*** Diagnostics ***/
#ifdef _DEBUG
		bool				fCheckValid() const;
#else
		bool				fCheckValid() const
							{ return true; };
#endif

        /*** Status query ***/
		bool				fIsCompleted()
							{ return (nEndPosition!=-1); };
		bool				fIsNotApplying()
							{ return fNotApplying; };
		bool				fIsMarkedForDeletion()
							{ return fMarkedForDeletion; };
		cxTokenizerMapData*	ptmdGetRule()
							{ return ptmdRule; };
		cxTokenizerMapData*	ptmdGetLastValidRule()
							{ return ptmdRuleLastValid; };
		int					nGetStartPosition()
							{ return nStartPosition; };
		int					nGetEndPosition()
							{ return nEndPosition; };

        /*** Status manipulation ***/
		void				vSetRule(cxTokenizerMapData* _ptmdRule)
							{ ptmdRule=_ptmdRule; };
		void				vMarkForDeletion()
							{fMarkedForDeletion=true;};
		void				vSetNotApplying()
							{fNotApplying=true;};
		void				vSetCompleted(int _nEndPosition, cxTokenizerMapData* _ptmdRuleLastValid)
							{nEndPosition=_nEndPosition;
							 ptmdRuleLastValid=_ptmdRuleLastValid;
							 ASSERT(ptmdRuleLastValid!=NULL);
							 ASSERT(nEndPosition!=-1);};

	};

	// Allocator classes
	class hlp1 : public std::list<cxListEntry*>	{};
	class tc_list_type : public hlp1
	{
	public:
		iterator erase(iterator it)
			{
			delete (*it);
			return hlp1::erase(it);
			}

		iterator erase(iterator first, iterator last)
			{
			iterator	it;
			for(it=first;it!=last;)
				delete (*it);
			return hlp1::erase(first,last);
			}
		void clear()
			{
			erase(begin(),end());
			}
	};

	class hlp2 : public std::allocator<std::pair<DWORD,cxTokenizerContextCookie*> >
		{};

	class cxTokenizerContextCookie_allocator : public hlp2
	{
	public:
		void destroy(pointer p)
			{
			delete p->second;
			hlp2::destroy(p);
			}
	};

// Typedefs
public:
	typedef std::map<DWORD,cxTokenizerContextCookie*,std::less<DWORD>,cxTokenizerContextCookie_allocator>
					tc_cookiemap_type;
	typedef tc_cookiemap_type::iterator 
					cookiemap_iterator;
	typedef tc_cookiemap_type::const_iterator 
					const_cookiemap_iterator;
	typedef tc_cookiemap_type::value_type
					cookiemap_valuetype;
	typedef	tc_list_type::iterator
					rulelist_iterator;

// Attributes
protected:
	// List of currently used rules
	tc_list_type	m_lstTokenRules;
	// Current text
	std::tstring	m_strCurrentText;
	// The map of cookies (each 'token rule' can set one)
	tc_cookiemap_type m_mapCookies;

// Operations
public:
#ifdef _DEBUG
	virtual	bool	fCheckValid() const;
	static bool		fRunDiagnostics();
#else
	virtual	bool	fCheckValid() const
					{ return true; };
#endif

	/*** Token rule list operations ***/
	tc_list_type*
					plstGetTokenRuleList()
					{ return &m_lstTokenRules; };

	/*** Cookie operations ***/
	bool			fSetCookie(const cxTokenizerTokenRule* pttrRule,
							   cxTokenizerContextCookie* ptccCookie);
	cxTokenizerContextCookie*
					ptccGetCookie(const cxTokenizerTokenRule* pttrRule) const;
	const cxTokenizerContextCookie*
					ptccGetConstCookie(const cxTokenizerTokenRule* pttrRule) const
					{ return ptccGetCookie(pttrRule); };
	cxTokenizerContextCookie*
					ptccGetCookie(DWORD dwCookieID) const;
	bool			fDeleteCookie(cxTokenizerTokenRule* pttrRule);
	bool			fDeleteCookie(DWORD dwCookieID);

	/*** Data access operations ***/
	std::tstring&	strGetCurrentText()
					{ return m_strCurrentText; };
	const std::tstring& strGetCurrentTextConst() const
					{ return m_strCurrentText; };
	int				nDeleteMarkedListEntries();
	void			vCleanUpAfterTokenRecognition();

// Friend declarations
	friend class cxTokenizer;
};

#endif // !defined(AFX_CXTOKENIZERCONTEXT_H__121468FE_E85D_42BE_8226_FDA6A3D57D52__INCLUDED_)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
Germany Germany
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions