/* samples for 'PARSING and GRAMMARS'
Author: Martin.Holzherr@infobrain.com
Purpose: demonstrate simple recursive descent parsing techniques
*/
#pragma warning(disable:4786)
#include <cstdlib>
#include <iostream>
#include <cassert>
#include <cstdio>
#include <string>
#include <algorithm>
#include <time.h>
#include <iomanip>
#include <climits>
#include <sstream>
#include "peg_templ.h"
#include "peg_proc.h"
struct C{
C():nNofIntegers(0),nNofIdents(0){}
int nNofIntegers;
int nNofIdents;
};
class SimpleTimer{
public:
SimpleTimer(const char* msg):m_msg(msg),m_start(clock()){}
void SetErr(std::string s){m_err=s;}
~SimpleTimer()
{
m_stop = clock();
std::cout << ">> CPU usage (" << std::setw(35) << m_err+m_msg
<< ") = " << std::setprecision(12)
<< ((double)(m_stop - m_start)/ CLOCKS_PER_SEC)
<< " seconds" << std::endl;
}
private:
time_t m_start, m_stop;
std::string m_msg;
std::string m_err;
};
void ShowOptions()
{
std::cout
<< "-s\t\tshow built in samples" << std::endl
<< "-d <digits>\tExpects parenthized digits, e.g. 1132 or (123) or (((5)))" << std::endl
<< "-e <email>\tExpects an email as input" << std::endl
<< "-r <ident>\tExpects an identifier, uses begend iterator" << std::endl
<< "-p\tperformance test for various PEG implementations" << std::endl
<< std::endl << std::endl
<< "enter one of the options starting with either " << std::endl
<< "-s -d -e -r or -p with parameters as explained above"
<< std::endl;
}
struct peg_begend_iterator{
peg_begend_iterator(const char* p=0,const char* pEnd=0)
:m_p((const unsigned char*)p),
m_pEnd((const unsigned char*)pEnd){}
int operator*()const{
return m_p==m_pEnd?(INT_MIN+1):*m_p;
}
peg_begend_iterator& operator++(){
if(m_p<m_pEnd)++m_p;
return *this;
}
unsigned const char* m_p;
unsigned const char* m_pEnd;
};
typedef const unsigned char* openend_iterator;
bool MatchesDigits(openend_iterator& it)
{
return (*it>='0' && *it<='9'?(++it,true):false)//[0-9]
&& (MatchesDigits(it),true); //Digits?
}
bool MatchesEnclosedDigits(openend_iterator& it)
{ openend_iterator itSave= it;
return
MatchesDigits(it) // Digits
|| // |
(*it=='('?(++it,true):false) // '('
&& MatchesEnclosedDigits(it) // EnclosedDigits
&& (*it==')'?(++it,true):(it=itSave,false));// ')'
}
bool MatchesEMail(openend_iterator& it)
{ // EMailChar: [A-Za-z0-9._%-];
// EMailSuffix: [A-Za-z]{2,4} !EMailChar ;
/* EMail: EMailChar+
'@'
EMailChar
([A-Za-z0-9_%-] |'.'!EMailSuffix)*
'.'
EmailSuffix ;
*/
using namespace peg_templ;
C c;
typedef In<'A','Z','a','z'> Alpha;
typedef In<'A','Z','a','z','0','9'> Alnum;
typedef Or<Alnum,OneOfChars<'.','_','%','-'> > EMailChar;
typedef And<For<2,4,Alpha >,Not<EMailChar > > EMailSuffix;
typedef And<
PlusRepeat<EMailChar>,
Char<'@'>,
PlusRepeat<Or< Or<Alnum,OneOfChars<'_','%','-'> >,
And< Char<'.'>,Not<EMailSuffix> >
>
>,
Char<'.'>,
EMailSuffix
> EMail;
return EMail::Matches(it,&c);
}
template<typename Iterator>
bool MatchesIdentOnly(Iterator& it)
{
using namespace peg_templ;
C c;
typedef And<PlusRepeat<In<'a','z','A','Z'> >,Not<In<1,255> > > FullIdent;
return FullIdent::Matches(it,&c);
}
std::string ShowResult(const char* toDisplay,
const char* itBeg, const char* it,bool bOk)
{
int nPos= it-itBeg;
std::string sRes=
std::string(toDisplay,nPos) + "|" + std::string(toDisplay+nPos);
return ">> " + sRes + (bOk?" : success " : " : fail")+"\n";
}
void Test_MatchesEnclosedDigits(const char* toTest)
{
openend_iterator it= (openend_iterator)toTest;
std::cout
<< ShowResult(toTest,toTest,(const char*)it,MatchesEnclosedDigits(it));
}
void Test_MatchesEMail(const char* toTest)
{
openend_iterator it= (openend_iterator)toTest;
bool bOk= MatchesEMail(it);
std::cout << ShowResult(toTest,toTest,(const char*)it,bOk);
}
void Test_MatchIdentBegEndIterator(const char* toTest)
{
size_t n= strlen(toTest);
peg_begend_iterator it(toTest,toTest+n);
bool bOk= MatchesIdentOnly(it);
std::cout << ShowResult(toTest,toTest,(const char*)it.m_p,bOk);
if( !bOk && n>1){
for( ;n>0;n--){
peg_begend_iterator it1(toTest,toTest+n);
bool bOk= MatchesIdentOnly(it1);
if( bOk ){
std::string sMsg(toTest);
std::ostringstream oss;
oss<<n;
sMsg+= " (only first " + oss.str() + ")";
std::cout << ShowResult(sMsg.c_str(),toTest,(const char*)it1.m_p,bOk);
break;
}
}
}
}
////////////////{ procedural implementation of Expr and related rules for performance test
namespace test_proc_peg{
using namespace peg_proc;
inline bool IsExpr(unsigned const char*& p,C* pC);
inline bool IsSpace(unsigned const char*& p,C* pC)
{ (pC);
while(OneOfChars(p,' ','\t'));
return true;
}
inline bool IsIdent(const unsigned char*& p,C* pC)
{
const unsigned char* pSave;
if( !PEG_OR2(pSave,p,In(p,'a','z','A','Z'),Char(p,'_')) ){return false;}
while( PEG_OR2(pSave,p,In(p,'a','z','A','Z','0','9'),Char(p,'_')) ){}
pC->nNofIdents++;
return true;
}
inline bool IsInteger(const unsigned char*& p,C* pC)
{
if( !In(p,'0','9') ){return false;}
while(In(p,'0','9') ){}
pC->nNofIntegers++;
return true;
}
inline bool IsFactor(const unsigned char*& p,C* pC)
{
const unsigned char* pSave;
return PEG_OR3(pSave,p,
IsIdent(p,pC),
IsInteger(p,pC),
PEG_AND(pSave,p,
Char(p,'(')
&& IsSpace(p,pC)
&& IsExpr(p,pC)
&& IsSpace(p,pC)
&& Char(p,')')
)
);
}
inline bool IsTerm(const unsigned char*& p,C* pC)
{
if( !IsFactor(p,pC) ){return false;}
IsSpace(p,pC);
const unsigned char* pSave;
while(
PEG_AND(pSave,p,
OneOfChars(p,'*','/')
&& IsSpace(p,pC)
&& IsFactor(p,pC)
&& IsSpace(p,pC)
) ){
}
return true;
}
inline bool IsExpr(unsigned const char*& p,C* pC)
{
if( !IsTerm(p,pC) ){ return false; }
IsSpace(p,pC);
const unsigned char* pSave;
while(
PEG_AND(pSave,p,
OneOfChars(p,'+','/')
&& IsSpace(p,pC)
&& IsTerm(p,pC)
&& IsSpace(p,pC)
) ){
}
return true;
}
void ProcPegPerformance(const char* s,size_t nNofExpectedIntsAndIntegers)
{
C c;
SimpleTimer st("procedureal PEG implementation");
const unsigned char* pStart=(const unsigned char*)s;
while( IsExpr(pStart,&c) ){}
if( !( nNofExpectedIntsAndIntegers==c.nNofIntegers
&& nNofExpectedIntsAndIntegers==c.nNofIdents) ){
st.SetErr("EVALUATION ERROR ");
}
}
}
////////////////{ template implementation of Expr and related rules for performance test
namespace test_templ_peg{
using namespace peg_templ;
struct Term;
struct Factor;
typedef Or<In<'a','z','A','Z'>,Char<'_'> > BegChar;
typedef Or<BegChar,In<'0','9'> > SecondChar;
typedef And<BegChar,OptRepeat<SecondChar> > Ident;
typedef PlusRepeat<In<'0','9'> > Integer;
typedef OptRepeat<OneOfChars<' ','\t'> > S;
struct CountIdent{
template<typename It, typename Context>
static bool Matches(It& p,Context* pC)
{
if(Ident::Matches(p,pC) ){
pC->nNofIdents++;
return true;
}else{
return false;
}
}
};
struct CountInteger{
template<typename It, typename Context>
static bool Matches(It& p,Context* pC)
{
if(Integer::Matches(p,pC) ){
pC->nNofIntegers++;
return true;
}else{
return false;
}
}
};
struct Expr{
typedef And<Term,S,OptRepeat<And<OneOfChars<'+','-'>,S,Term,S> > > rule;
template<typename It, typename Context>
static bool Matches(It& p,Context* pC)
{return rule::Matches(p,pC);}
};
struct Term{
typedef And<S,Factor,S,OptRepeat<And<OneOfChars<'*','/'>,S,Factor,S> > >rule;
template<typename It, typename Context>
static bool Matches(It& p,Context* pC)
{return rule::Matches(p,pC);}
};
struct Factor{
typedef Or<CountIdent,CountInteger,And<Char<'('>,S,Expr,S,Char<')'> > > rule;
template<typename It, typename Context>
static bool Matches(It& p,Context* pC)
{ return rule::Matches(p,pC);}
};
////////////////} template implementation of Expr and related rules for performance test
template<typename Iterator>
void TemplPegPerformanceImpl(const char* sTitle, Iterator it,
int nNofExpectedIntsAndIntegers)
{ using namespace peg_templ;
C c;
SimpleTimer st(sTitle);
PlusRepeat<And<Expr,S> >::Matches(it,&c);
if( !( nNofExpectedIntsAndIntegers==c.nNofIntegers
&& nNofExpectedIntsAndIntegers==c.nNofIdents) ){
st.SetErr("EVALUATION ERROR ");
}
}
void TemplPegPerformance(const char* s,size_t nNofExpectedIntsAndIntegers)
{
peg_begend_iterator begEnd(s,s+strlen(s));
TemplPegPerformanceImpl("templated PEG implementation",
(const unsigned char*)s,nNofExpectedIntsAndIntegers);
TemplPegPerformanceImpl("begEnd templated PEG implementation",
begEnd,nNofExpectedIntsAndIntegers);
}
}
////////////////{ hand crafted implementation of Expr and related rules for performance test
namespace test_handcrafted{
inline bool IsExpr(unsigned const char*& p,C* pC);
inline bool IsSpace(unsigned const char*& p,C* pC)
{ (pC);
while(*p==' '||*p=='\t'){++p;}
return true;
}
inline bool IsIdent(const unsigned char*& p,C* pC)
{
if( !(*p>='a' && *p<='z' || *p>='A'&&*p<='Z' || *p=='_') ){return false;}
++p;
while( *p>='a'&&*p<='z' || *p>='A'&&*p<='Z' || *p>='0'&&*p<='9' || *p=='_'){
++p;
}
pC->nNofIdents++;
return true;
}
inline bool IsInteger(const unsigned char*& p,C* pC)
{
if( !(*p>='0' && *p<='9') ){return false;}
++p;
while( *p>='0'&&*p<='9'){
++p;
}
pC->nNofIntegers++;
return true;
}
inline bool IsFactor(const unsigned char*& p,C* pC)
{
if( IsIdent(p,pC) ) {return true;}
if( IsInteger(p,pC) ) {return true;}
const unsigned char* p0=p;
if(*p=='('){
++p;
IsSpace(p,pC);
if( IsExpr(p,pC) ){
IsSpace(p,pC);
if(*p==')'){
++p;
return true;
}
}
}
p= p0;
return false;
}
inline bool IsTerm(const unsigned char*& p,C* pC)
{
if( !IsFactor(p,pC) ){return false;}
IsSpace(p,pC);
while(*p=='*'||*p=='/'){
const unsigned char* p0=p;
++p;
IsSpace(p,pC);
if(IsFactor(p,pC)){
IsSpace(p,pC);
}else{
p=p0;
break;
}
}
return true;
}
inline bool IsExpr(unsigned const char*& p,C* pC)
{
if( !IsTerm(p,pC) ){ return false; }
IsSpace(p,pC);
while(*p=='+'||*p=='-'){
const unsigned char* p0=p;
++p;
IsSpace(p,pC);
if(IsTerm(p,pC)){
IsSpace(p,pC);
}else{
p=p0;
break;
}
}
return true;
}
int HandOptimizedPerformance(const char* s,size_t nNofExpectedIntsAndIntegers)
{
C c;
SimpleTimer st("hand optimized PEG implementation");
const unsigned char* pStart=(const unsigned char*)s;
while( IsExpr(pStart,&c) ){}
if( !( nNofExpectedIntsAndIntegers==c.nNofIntegers
&& nNofExpectedIntsAndIntegers==c.nNofIdents) ){
st.SetErr("EVALUATION ERROR ");
}
return 0;
}
}
void Test_Performance()
{//Expr: Term S (('+'|'-') S Term)* ;
//Term= Factor S (('*'|'/') S Factor S)* ;
//Factor: Ident / Integer / '(' S Expr S ')' ;
//Ident:[a-zA-Z_][a-zA-Z0-9_]* ; Integer: [0-9]+ ;
const char sExpr[]= "132*( firstOccurance + x2*( 1001/N55 )+19 ) ";
char s[1000000]={0};
for(int i=0;i<sizeof(s)/sizeof(sExpr);i++){
memcpy(s+i*(sizeof(sExpr)-1),sExpr,sizeof(sExpr)-1);
}
s[sizeof(s)-1]='\0';
std::cout << ">> parse a 1 MB buffer containing expressions " << std::endl;
test_proc_peg::ProcPegPerformance (s, 3*(sizeof(s)/sizeof(sExpr)));
test_templ_peg::TemplPegPerformance (s, 3*(sizeof(s)/sizeof(sExpr)));
test_handcrafted::HandOptimizedPerformance (s, 3*(sizeof(s)/sizeof(sExpr)));
std::cout << ">> performance tests done" << std::endl;
}
void ShowBuiltInSamples();
void ExecuteOption(const char* sOption)
{
using namespace peg_templ;
C c;
typedef OptRepeat<OneOfChars<' ','\t','\r'> > S;
typedef And<S,Char<'-'> > option;
if( option::Matches(sOption,&c) ){
if( And<Char<'d'>,S>::Matches(sOption,&c)){
Test_MatchesEnclosedDigits(sOption);
}else if ( And<Char<'e'>,S>::Matches(sOption,&c)){
Test_MatchesEMail(sOption);
}else if ( And<Char<'r'>,S>::Matches(sOption,&c)){
Test_MatchIdentBegEndIterator(sOption);
}else if ( And<Char<'p'>,S>::Matches(sOption,&c)){
Test_Performance();
}else if ( And<Char<'s'>,S>::Matches(sOption,&c)){
ShowBuiltInSamples();
}else{
ShowOptions();
}
}else{
std::cout
<< "your line must start with a '-' followed by 'd', 'e', 'r' or 'p'"
<< std::endl;
ShowOptions();
}
}
void SelfExecuteOption(const char* pOption)
{
std::cout << pOption << std::endl;
ExecuteOption(pOption);
std::cout << std::endl;
}
void ShowBuiltInSamples()
{
SelfExecuteOption("-d (((489)))");
SelfExecuteOption("-d (((123))");
SelfExecuteOption("-e support@codeproject.com");
SelfExecuteOption("-e bloom@blo.blo.uk");
SelfExecuteOption("-e byrne@bomm.123");
SelfExecuteOption("-r fashion week");
SelfExecuteOption("-p ");
ExecuteOption("-?");
}
int main(int argc, char *argv[])
{
std::cout << "welcome to the samples of PARSING & GRAMMARS" << std::endl;
ShowOptions();
char buf[1048];
while( std::cin.getline(buf,1048) ){
ExecuteOption(buf);
}
system("PAUSE");
return EXIT_SUCCESS;
}