/*********************************************************************
Copyright (C) 2001 by
Alexander Berthold, alexander-berthold@web.de.
Hoegestr. 54
79108 Freiburg i. Breisgau
Germany
-- This file is part of cxTokenizer --
"cxTokenizer" is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or any later version.
"cxTokenizer" is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with "cxTokenizer"; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
---------------------------------------------------------------
If you find any bugs or if you make other corrections/
enhancements, i'd appreciate if you'd let me know about
that. My email is
alexander-berthold@web.de
If you share this code, do not remove this text.
---------------------------------------------------------------
Class: cxTokenizer
Author: Alexander Berthold
Copyright: Alexander Berthold
Date: 2001/06/12
Version: 0.1.16
Purpose: - Main class for the lexical analyzer.
- Scans the input character stream for tokens.
- Calls a call-back class ('cxTokenizerListener') for each
recognized token.
- Uses an incremental scan; should be pretty fast.
- Can also parse computed tokens, like "text" and 1234.
Version history:
- 2001/04/19
First functional version implemented and released (0.1.13).
- 2001/05/11
Minor allocation bug fixed.
- 2001/05/19
Renamed project from 'cpLexxer' to 'cxTokenizer'.
- 2001/06/01
Added vSetInputStream() to dynamically change the source
input stream.
- 2001/06/02
Current source labeled version 0.1.14
- 2001/06/04
Modified cxTokenizerListener interface; added parameter
'ptisStream' allowing to query the current tokens' position
within the input stream.
- 2001/06/12
Current source labeled version 0.1.16
*********************************************************************/
// cxTokenizer.h: interface for the cxTokenizer class.
//
//////////////////////////////////////////////////////////////////////
#if !defined(AFX_CXTOKENIZER_H__D79EE6C7_40E0_4452_A7A4_20278169F438__INCLUDED_)
#define AFX_CXTOKENIZER_H__D79EE6C7_40E0_4452_A7A4_20278169F438__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
class cxTokenizerInputStream;
class cxTokenizerListener :
public ctkCheckValid,
public ctkExternalObjectPointer
{
// Operations
public:
virtual void vRegisterToken( const std::tstring& strTokenText, const cxTokenizerTokenRule* pltrRule,
const cxTokenizerInputStream *ptisStream) = 0;
};
class cxTokenizer
{
// Construction/Destruction
public:
cxTokenizer(cxTokenizerMap* ptmLexxerMap,
cxTokenizerListener* ptlReceiver);
cxTokenizer(cxTokenizerInputStream* ptisData,
cxTokenizerMap* ptmLexxerMap,
cxTokenizerListener* ptlReceiver);
virtual ~cxTokenizer();
// Attributes
protected:
// Contains the state of the lexxer
cxTokenizerContext m_tcContextInfo;
// The 'tree' containing the tokens
cxTokenizerMap *m_ptmLexxerMap;
// The input stream
cxTokenizerInputStream *m_ptisData;
// The listener to the token output
cxTokenizerListener *m_ptlReceiver;
// ctkCheckValid operations
public:
#ifdef _DEBUG
virtual bool fCheckValid() const;
static bool fRunDiagnostics();
#else
virtual bool fCheckValid() const { return true; };
#endif
// Protected operations
protected:
/*** Internal token recognition methods ***/
// A delimeting token has been recognized
void vDelimTokenRecognized();
// A non-delimeting token has been recognized
void vRecognizeNonDelimTokens(std::tstring strTokenText);
/*** Post token recognition methods ***/
// Is called by the lexxer after a token has been recognized
void vRegisterToken( const std::tstring& strTokenText, cxTokenizerTokenRule* pttrRule,
const cxTokenizerInputStream* ptisStream);
/*** Helper functions ***/
// Find the longest currently active (completed) rule
cxTokenizerContext::cxListEntry
*pteFindEarliestCompletedToken();
// Find the longest applied rule, not yet invalidated rule
cxTokenizerContext::cxListEntry
*pteFindEarliestAppliedToken();
// Mark all active rules except the given one for deletion
void vMarkAllForDeletionExcept(const cxTokenizerContext::cxListEntry* pEntry);
// Operations
public:
// Set the input stream
void vSetInputStream(cxTokenizerInputStream* pxtisInput);
// Return the input stream
cxTokenizerInputStream *ptiGetInputStream() const { return m_ptisData; };
// Parse the next character, 'fOverrideIsLastChar' tells if the next
// char is to be treated as it was the last of the stream.
void vParseCharacter(bool fOverrideIsLastChar = false);
};
#endif // !defined(AFX_CXTOKENIZER_H__D79EE6C7_40E0_4452_A7A4_20278169F438__INCLUDED_)