/*********************************************************************
Copyright (C) 2001 by
Alexander Berthold, alexander-berthold@web.de.
Hoegestr. 54
79108 Freiburg i. Breisgau
Germany
-- This file is part of cxTokenizer --
"cxTokenizer" is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or any later version.
"cxTokenizer" is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with "cxTokenizer"; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
---------------------------------------------------------------
If you find any bugs or if you make other corrections/
enhancements, i'd appreciate if you'd let me know about
that. My email is
alexander-berthold@web.de
If you share this code, do not remove this text.
---------------------------------------------------------------
*********************************************************************/
// cxTokenizerMap.cpp: implementation of the cxTokenizerMap class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "cxTokenizerMapData.h"
#include "cxTokenizerTokenRule.h"
#include "cxTokenizerCharTokenRule.h"
#include "cxTokenizerMap.h"
#include "cxTokenizerContextCookie.h"
#include "cxTokenizerStringTokenRule.h"
#include "cxTokenizerNumberTokenRule.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
#include "cxTokenizerMapDiags.cpp"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
cxTokenizerMap::cxTokenizerMap()
{
m_fInitialized =false;
}
cxTokenizerMap::cxTokenizerMap(std::basic_istream<TCHAR>& input)
{
m_fInitialized =false;
vLoadFromStream(input);
}
cxTokenizerMap::~cxTokenizerMap()
{
ttrdel_vec_type::iterator it;
for(it=m_vecRulesToDelete.begin();it!=m_vecRulesToDelete.end();it++)
delete (*it);
m_vecRulesToDelete.clear();
}
//////////////////////////////////////////////////////////////////////
// Protected operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cxTokenizerMap::fRegisterCharRule
PURPOSE: Registers the given rule in the map
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fRegisterCharRule(
cxTokenizerMapData& tmdRules,
cxTokenizerCharTokenRule* pRule)
{
if(NULL == pRule)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
cxTokenizerMapData *ptmdRulesCur = &tmdRules, *ptmdTemp = NULL;
std::tstring strToken = pRule->strGetTokenString();
int i,len;
bool fReturn = true;
len =strToken.length();
for(i=0;i<len;i++)
{
ptmdTemp =ptmdRulesCur->ptmdInsert(
strToken[i],
(i==(len-1))?pRule:NULL);
if(NULL == ptmdTemp)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
ptmdRulesCur=ptmdTemp;
}
return fReturn;
}
/*********************************************************************
FUNCTION: cxTokenizerMap::fRegisterComputedRule
PURPOSE: Registers the given rule in the map
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fRegisterComputedRule(
cxTokenizerMapData& tmdRules,
cxTokenizerTokenRule* pRule)
{
if(NULL == pRule)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
cxTokenizerMapData *ptmdNewRule = new cxTokenizerMapData;
ptmdNewRule->m_pRule=pRule;
ptmdNewRule->m_fComputed=true;
tmdRules.insert(cxTokenizerMapData::value_type(_T('\0'),ptmdNewRule));
return true;
}
//////////////////////////////////////////////////////////////////////
// Operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cxTokenizerMap::fRegisterRule
PURPOSE: Registers the given rule in the map
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fRegisterRule(cxTokenizerTokenRule* pRule)
{
if(NULL == pRule)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
cxTokenizerMapData* ptmdRules = NULL;
if(pRule->fIsFlagSet(ttrf_within))
ptmdRules =&m_tmdWIRules;
else
ptmdRules =&m_tmdFPRules;
if(pRule->fIsFlagSet(ttrf_character_rule))
return cxTokenizerMap::fRegisterCharRule(
*ptmdRules,
reinterpret_cast<cxTokenizerCharTokenRule*>(pRule));
else
return cxTokenizerMap::fRegisterComputedRule(
*ptmdRules,
pRule);
}
/*********************************************************************
FUNCTION: cxTokenizerMap::fBuildRule
PURPOSE: Creates a path in the 'map' - tree (...) for
a character token using the text 'strToken'
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fBuildRule(int nIDValue, std::tstring strToken, bool fSeperator)
{
cxTokenizerCharTokenRule *ptctrRule = NULL;
if(fSeperator)
ptctrRule =new cxTokenizerCharTokenRule(
nIDValue,
((xttr_flags)(ttrf_character_rule | ttrf_within | ttrf_intermediate)),
strToken.data());
else
ptctrRule =new cxTokenizerCharTokenRule(
nIDValue,
((xttr_flags)(ttrf_character_rule | ttrf_seperate | ttrf_not_intermediate)),
strToken.data());
m_vecRulesToDelete.push_back(ptctrRule);
if(!fRegisterRule(ptctrRule))
return false;
return true;
}
/*********************************************************************
FUNCTION: cxTokenizerMap::vLoadFromStream
PURPOSE: Initializes the map from the text stream
RETURNS: - void -
*********************************************************************/
void cxTokenizerMap::vLoadFromStream(std::basic_istream<TCHAR>& input)
{
ASSERT(m_fInitialized==false);
enum mode_type {
mode_invalid,
mode_seperators,
mode_tokens,
mode_rules
};
std::tstring strLine;
mode_type mode = mode_invalid;
while(!input.eof())
{
bool fValid = false;
std::getline(input,strLine);
if(strLine.length()==0)
continue;
if(strLine[0]==_T('\''))
continue;
if(!fValid && _tcsicmp(strLine.data(),_T("[seperators]"))==0)
mode=mode_seperators, fValid=true;
if(!fValid && _tcsicmp(strLine.data(),_T("[tokens]"))==0)
mode=mode_tokens, fValid=true;
if(!fValid && _tcsicmp(strLine.data(),_T("[rules]"))==0)
mode=mode_rules, fValid=true;
if(!fValid && _tcsicmp(strLine.data(),_T("[ends]"))==0)
break;
if(!fValid)
{
std::string::size_type nTemp = strLine.find(':');
int nIDValue = 0;
if(nTemp!=std::string::npos)
{
nIDValue =_tcstoul(strLine.substr(0,nTemp).data(),NULL,10);
strLine =ctkMisc::strParseEscapeCharacters(strLine.substr(nTemp+1));
}
if(mode==mode_seperators)
fValid =fBuildRule(nIDValue,strLine,true);
if(mode==mode_tokens)
fValid =fBuildRule(nIDValue,strLine,false);
if(mode==mode_rules)
{
cxTokenizerTokenRule *pNewRule = NULL;
if(_tcsicmp(strLine.data(),_T("strings"))==0)
pNewRule = new cxTokenizerStringTokenRule(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)));
if(_tcsicmp(strLine.data(),_T("numbers"))==0)
pNewRule = new cxTokenizerNumberTokenRule(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_seperate | ttrf_not_intermediate)));
m_vecRulesToDelete.push_back(pNewRule);
if(fRegisterRule(pNewRule))
fValid = true;
}
}
ASSERT(fValid);
}
}