/*********************************************************************
Copyright (C) 2001 by
Alexander Berthold, alexander-berthold@web.de.
Hoegestr. 54
79108 Freiburg i. Breisgau
Germany
-- This file is part of cxTokenizer --
"cxTokenizer" is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or any later version.
"cxTokenizer" is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with "cxTokenizer"; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
---------------------------------------------------------------
If you find any bugs or if you make other corrections/
enhancements, i'd appreciate if you'd let me know about
that. My email is
alexander-berthold@web.de
If you share this code, do not remove this text.
---------------------------------------------------------------
*********************************************************************/
// cxTokenizerMap.cpp: implementation of the cxTokenizerMap class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "cxTokenizerMapData.h"
#include "cxTokenizerTokenRule.h"
#include "cxTokenizerCharTokenRule.h"
#include "cxTokenizerMap.h"
#include "cxTokenizerContextCookie.h"
#include "cxTokenizerContext.h"
#include "cxTokenizerStringTokenRule.h"
#include "cxTokenizerNumberTokenRule.h"
#include "cxTokenizerCommentTokenRule.h"
//#include "cxTokenizerPreprocessorTokenRules.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
#include "cxTokenizerMapDiags.cpp"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
cxTokenizerMap::cxTokenizerMap()
{
m_fInitialized =false;
}
cxTokenizerMap::cxTokenizerMap(std::basic_istream<TCHAR>& input)
{
m_fInitialized =false;
vLoadFromStream(input);
}
cxTokenizerMap::~cxTokenizerMap()
{
ttrdel_vec_type::iterator it;
for(it=m_vecRulesToDelete.begin();it!=m_vecRulesToDelete.end();it++)
delete (*it);
m_vecRulesToDelete.clear();
}
//////////////////////////////////////////////////////////////////////
// Protected operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cxTokenizerMap::pttrGetRuleForID
PURPOSE: Searches the rule with the given ID
RETURNS: cxTokenizerTokenRule*, if successful. Otherwise NULL
*********************************************************************/
const cxTokenizerTokenRule* cxTokenizerMap::pttrGetRuleForID(int nIDValue) const
{
ttrdel_vec_type::const_iterator it;
for(it=m_vecRulesToDelete.begin();it!=m_vecRulesToDelete.end();it++)
{
if( (*it)->nGetIDValue()==nIDValue )
return (*it);
}
return NULL;
}
/*********************************************************************
FUNCTION: cxTokenizerMap::pttrGetRuleForString
PURPOSE: Searches the rule with the given token string
RETURNS: cxTokenizerTokenRule*, if successful. Otherwise NULL
*********************************************************************/
const cxTokenizerTokenRule* cxTokenizerMap::pttrGetRuleForString(const std::tstring& strToken) const
{
ttrdel_vec_type::const_iterator it;
for(it=m_vecRulesToDelete.begin();it!=m_vecRulesToDelete.end();it++)
{
if( (*it)->strGetTokenString()==strToken )
return (*it);
}
return NULL;
}
/*********************************************************************
FUNCTION: cxTokenizerMap::fRegisterCharRule
PURPOSE: Registers the given rule in the map
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fRegisterCharRule(
cxTokenizerMapData& tmdRules,
cxTokenizerCharTokenRule* pRule,
cxTokenizerMapData **pptmdEnd)
{
if(NULL == pRule)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
cxTokenizerMapData *ptmdRulesCur = &tmdRules, *ptmdTemp = NULL;
std::tstring strToken = pRule->strGetTokenString();
int i,len;
bool fReturn = true;
len =strToken.length();
for(i=0;i<len;i++)
{
ptmdTemp =ptmdRulesCur->ptmdInsert(
strToken[i],
(i==(len-1))?pRule:NULL);
if(NULL == ptmdTemp)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
ptmdRulesCur=ptmdTemp;
}
if(pptmdEnd!=NULL) (*pptmdEnd)=ptmdRulesCur;
return fReturn;
}
/*********************************************************************
FUNCTION: cxTokenizerMap::fRegisterComputedRule
PURPOSE: Registers the given rule in the map
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fRegisterComputedRule(
cxTokenizerMapData& tmdRules,
cxTokenizerTokenRule* pRule)
{
if(NULL == pRule)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
cxTokenizerMapData *ptmdNewRule = new cxTokenizerMapData;
ptmdNewRule->m_pRule=pRule;
ptmdNewRule->m_fComputed=true;
tmdRules.insert(cxTokenizerMapData::value_type(_T('\0'),ptmdNewRule));
return true;
}
//////////////////////////////////////////////////////////////////////
// Operations
//////////////////////////////////////////////////////////////////////
/*********************************************************************
FUNCTION: cxTokenizerMap::vInitNotify
PURPOSE: Broadcasts initialization messages to the contained rules
RETURNS: - void -
*********************************************************************/
void cxTokenizerMap::vInitNotify(bool fInit, cxTokenizer *pxTokenizer)
{
m_tmdWIRules.vInitNotify(fInit,pxTokenizer);
m_tmdFPRules.vInitNotify(fInit,pxTokenizer);
}
/*********************************************************************
FUNCTION: cxTokenizerMap::fRegisterRule
PURPOSE: Registers the given rule in the map
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fRegisterRule(cxTokenizerTokenRule* pRule)
{
if(NULL == pRule)
throw cxTokenizerException(ERR_UNEXPECTED_NULL_PARAMETER);
cxTokenizerMapData* ptmdRules = NULL;
if(pRule->fIsFlagSet(ttrf_within))
ptmdRules =&m_tmdWIRules;
else
ptmdRules =&m_tmdFPRules;
if(pRule->fIsFlagSet(ttrf_character_rule))
return cxTokenizerMap::fRegisterCharRule(
*ptmdRules,
reinterpret_cast<cxTokenizerCharTokenRule*>(pRule));
else
return cxTokenizerMap::fRegisterComputedRule(
*ptmdRules,
pRule);
}
/*********************************************************************
FUNCTION: cxTokenizerMap::fBuildRule
PURPOSE: Creates a path in the 'map' - tree (...) for
a character token using the text 'strToken'
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fBuildRule(int nIDValue, const std::tstring strToken, const std::tstring& strInitString, bool fSeperator)
{
cxTokenizerCharTokenRule *ptctrRule = NULL;
if(fSeperator)
ptctrRule =new cxTokenizerCharTokenRule(
nIDValue,
((xttr_flags)(ttrf_character_rule | ttrf_within | ttrf_intermediate)),
strToken.data(), strInitString);
else
ptctrRule =new cxTokenizerCharTokenRule(
nIDValue,
((xttr_flags)(ttrf_character_rule | ttrf_seperate | ttrf_not_intermediate)),
strToken.data(), strInitString);
m_vecRulesToDelete.push_back(ptctrRule);
if(!fRegisterRule(ptctrRule))
return false;
return true;
}
/*********************************************************************
FUNCTION: cxTokenizerMap::fBuildHybridRule
PURPOSE: Creates a path in the 'map' - tree (...) for
a character token using the text 'strToken'
RETURNS: 'true' on success.
*********************************************************************/
bool cxTokenizerMap::fBuildHybridRule(int nIDValue, const std::tstring strToken, const std::tstring& strInitString, bool fSeperator, cxTokenizerTokenRule *pRule)
{
cxTokenizerCharTokenRule *ptctrRule = NULL;
cxTokenizerMapData* ptmdRules = NULL;
cxTokenizerMapData* ptmdEnd = NULL;
cxTokenizerMapData* ptmdTemp = NULL;
if(fSeperator)
ptmdRules =&m_tmdWIRules,
ptctrRule =new cxTokenizerCharTokenRule(
0,
((xttr_flags)(ttrf_character_rule | ttrf_within | ttrf_intermediate)),
strToken.data(), strInitString);
else
ptmdRules =&m_tmdFPRules,
ptctrRule =new cxTokenizerCharTokenRule(
0,
((xttr_flags)(ttrf_character_rule | ttrf_seperate | ttrf_not_intermediate)),
strToken.data(), strInitString);
m_vecRulesToDelete.push_back(ptctrRule);
if(!cxTokenizerMap::fRegisterCharRule(
*ptmdRules,
reinterpret_cast<cxTokenizerCharTokenRule*>(ptctrRule),
&ptmdEnd))
return false;
ASSERT(ptmdEnd!=NULL);
ptmdEnd->m_fComputed=true;
ptmdEnd->m_nHybridLength=strToken.size();
ptmdEnd->m_pRule=pRule;
return true;
}
/*********************************************************************
FUNCTION: cxTokenizerMap::vLoadFromStream
PURPOSE: Initializes the map from the text stream
RETURNS: - void -
*********************************************************************/
void cxTokenizerMap::vLoadFromStream(std::basic_istream<TCHAR>& input)
{
ASSERT(m_fInitialized==false);
enum mode_type {
mode_invalid,
mode_seperators,
mode_tokens,
mode_rules
};
std::tstring strLine,strLine0;
mode_type mode = mode_invalid;
while(!input.eof())
{
bool fValid = false;
std::getline(input,strLine);
if(strLine.length()==0)
continue;
if(strLine[0]==_T('\''))
continue;
strLine0 =strLine;
if(!fValid && _tcsicmp(strLine.data(),_T("[seperators]"))==0)
mode=mode_seperators, fValid=true;
if(!fValid && _tcsicmp(strLine.data(),_T("[tokens]"))==0)
mode=mode_tokens, fValid=true;
if(!fValid && _tcsicmp(strLine.data(),_T("[rules]"))==0)
mode=mode_rules, fValid=true;
if(!fValid && _tcsicmp(strLine.data(),_T("[ends]"))==0)
break;
if(!fValid && _tcsicmp(strLine.data(),_T("[grammar]"))==0)
break;
if(!fValid)
{
std::string::size_type nTemp = strLine.find(':');
int nIDValue = 0;
if(nTemp!=std::string::npos)
{
nIDValue =_tcstoul(strLine.substr(0,nTemp).data(),NULL,10);
strLine =ctkMisc::strParseEscapeCharacters(strLine.substr(nTemp+1));
try {
if(mode==mode_seperators)
fValid =fBuildRule(nIDValue,strLine,strLine0,true);
if(mode==mode_tokens)
fValid =fBuildRule(nIDValue,strLine,strLine0,false);
if(mode==mode_rules)
{
cxTokenizerTokenRule *pNewRule = NULL;
bool fNoRegister = false;
std::tstring strRuleName, strArgs;
std::tstring::size_type pos = strLine.find(',');
// extract arguments (if given)
if(pos!=std::tstring::npos)
strRuleName = strLine.substr(0,pos), strArgs = strLine.substr(pos), strArgs+=',';
else
strRuleName = strLine;
try {
if(_tcsicmp(strRuleName.data(),_T("strings"))==0)
pNewRule = new cxTokenizerStringTokenRule(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)), strLine0);
if(_tcsicmp(strRuleName.data(),_T("numbers"))==0)
pNewRule = new cxTokenizerNumberTokenRule(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)), strLine0);
if(_tcsicmp(strRuleName.data(),_T("cppmultilinecomments"))==0)
{
if(strArgs.find(",collect,")!=std::tstring::npos)
pNewRule = new cxTokenizerMultiLineCommentTokenRule<true>(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)), strLine0);
else
pNewRule = new cxTokenizerMultiLineCommentTokenRule<false>(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)), strLine0);
fBuildHybridRule(0,"/*",strLine0,true,pNewRule);
fNoRegister = true;
}
if(_tcsicmp(strRuleName.data(),_T("cppsinglelinecomments"))==0)
{
if(strArgs.find(",collect,")!=std::tstring::npos)
pNewRule = new cxTokenizerSingleLineCommentTokenRule<true>(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)), strLine0);
else
pNewRule = new cxTokenizerSingleLineCommentTokenRule<false>(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)), strLine0);
fBuildHybridRule(0,"//",strLine0,true,pNewRule);
fNoRegister = true;
}
/* if(_tcsicmp(strRuleName.data(),_T("cpppreprocessor"))==0)
pNewRule = new cxTokenizerPreprocessorTokenRule(nIDValue,((xttr_flags)(ttrf_computed_rule | ttrf_within | ttrf_intermediate)), strLine0);*/
if(pNewRule==NULL)
pNewRule = new cxDummyTokenizerTokenRule(nIDValue,strLine.data());
pNewRule->vSetArguments(strArgs);
}
catch(cxTokenizerException& e)
{
if(pNewRule) delete pNewRule;
pNewRule = NULL;
throw e;
}
catch(...)
{
ASSERT(FALSE);
AfxMessageBox("Unhandled Exception in cxTokenizerMap::vLoadFromStream.");
delete pNewRule; pNewRule=NULL;
}
if(pNewRule!=NULL)
{
m_vecRulesToDelete.push_back(pNewRule);
if(!fNoRegister)
{
if(fRegisterRule(pNewRule))
fValid = true;
}
else
fValid = true;
}
}
}
catch(cxTokenizerException& e)
{
if(e.ErrorString()==NULL)
e.SetErrorString(strLine.c_str());
throw e;
}
}
}
// ASSERT(fValid);
if(!fValid)
throw cxTokenizerException(ERR_INVALID_RULE,strLine.c_str());
}
m_tmdFPRules.vSetQueryStage();
m_tmdWIRules.vSetQueryStage();
}