Click here to Skip to main content
15,881,852 members
Articles / Programming Languages / C++

Tokenizer and analyzer package supporting precedence prioritized rules

Rate me:
Please Sign up or sign in to vote.
5.00/5 (4 votes)
1 Jan 20023 min read 180.8K   2.8K   54  
A library allowing you to conveniently build a custom tokenizer and analyzer supporting precedence priorized rules
// simpleCalc.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "../cxtPackage/package.h"
#include <math.h>
#include <iostream>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

enum	
	{
	ID_FUNC_START=100,
	ID_FUNC_SIN=100,
	ID_FUNC_COS=101,
	ID_FUNC_TAN=102,
	ID_FUNC_SQRT=103,
	ID_FUNC_END=104,

	ID_OP_START=200,
	ID_OP_ADD=200,
	ID_OP_SUB=201,
	ID_OP_MUL=202,
	ID_OP_DIV=203,
	ID_OP_POW=204,
	ID_OP_ENDEXPR=205,
	ID_OP_END=206,

	ID_SPEC_BRACEBEG=206,
	ID_SPEC_BRACEEND=207,

	ID_NUMBER=300,

	ID_EOL=999
	};

double	calculate(const cxaParseBranch *papbRes)
	{
	bool			fContinue = true;
	double			dValue = 0.0f, dTemp = 0.0f;
	sit_data_t		sit;
	const cxaToken	*patOperator = NULL;
	const cxaParseElement *papeCurEl = NULL;

	papbRes->vEnumBegin(&sit);
	while(fContinue)
		{
		bool		fComplete = false;

		papeCurEl	=papbRes->papeEnumGetAt(&sit);
		fContinue	=papbRes->fEnumNext(&sit);
		
		if(papeCurEl->fIsBranch())
			{
			dTemp		=calculate(papeCurEl->papbElem());
			fComplete	=true;
			}
		else
			{
			const cxaToken	*pToken;
			
			pToken		=(papeCurEl->papnElem())->patGetToken();

			// Number?
			if(pToken->nIDValue==ID_NUMBER)
				dTemp = atof(pToken->lpszTokenText),
				fComplete = true;

			// Operator?
			if(pToken->nIDValue>=ID_OP_START && pToken->nIDValue<ID_OP_END)
				patOperator = pToken;

			if(pToken->nIDValue>=ID_FUNC_START && pToken->nIDValue<ID_FUNC_END)
				/* Here a function call would go ... */;

			// (expr) appears always as a sub-branch with the tokens
			// '(', {token/branch}, ')'. Thus skipping the brace operators
			// is all we need here.
			if(pToken->nIDValue==ID_SPEC_BRACEBEG)
				continue;

			if(pToken->nIDValue==ID_SPEC_BRACEEND)
				continue;
			}

		// No operator found yet?
		if(patOperator==NULL)
			dValue = dTemp;

		// Operator + argument found?
		if(patOperator && fComplete)
			{
			if(patOperator->nIDValue==ID_OP_ADD)
				dValue += dTemp;
			if(patOperator->nIDValue==ID_OP_SUB)
				dValue -= dTemp;
			if(patOperator->nIDValue==ID_OP_MUL)
				dValue *= dTemp;
			if(patOperator->nIDValue==ID_OP_DIV)
				dValue /= dTemp;
			if(patOperator->nIDValue==ID_OP_POW)
				dValue = pow(dValue,dTemp);
			}
		}
	papbRes->vEnumEnd(&sit);

	return dValue;
	}

int main(int argc, char* argv[])
	{
	// Tokenizer/Analyzer setup
	std::tstringstream init(
		"[tokens]\n"
		"100:sin\n"     "101:cos\n"		/* Not currently used yet */
		"102:tan\n"     "103:sqrt\n"
		"[seperators]\n"
		"200:+\n"       "201:-\n"
		"202:*\n"       "203:/\n"
		"204:^\n"       "205:;\n"
		"206:(\n"       "207:)\n"
		"' Whitespace tokens:\n"
		"0: \n"         "0:\t\n"
		"0:\\n\n"       "0:\\r\n"
		"[rules]\n"
		"300:numbers\n"
		"[ends]\n"
		"401:{.expr}=100:{.expr}{$+}{.expr}\n"
		"402:{.expr}=100:{.expr}{$-}{.expr}\n"
		"403:{.expr}=99:{.expr}{$*}{.expr}\n"
		"404:{.expr}=99:{.expr}{$/}{.expr}\n"
		"405:{.expr}=98:{.expr}{$^}{.expr}\n"
		"406:{.expr}=0:{$(}{.expr}{$)}\n"
		"400:{.expr}=0:{!number}\n"
		"500:{.line}=0:{.expr}{$;}\n");

	cxtPackage	pkg(init);

	for(;;)
		{
		std::string	strExpr;

		std::cout << "Enter expression (terminate with a ';') to evaluate or nothing to end:\n";
		std::getline(std::cin,strExpr,std::cin.widen('\n'));
		if(strExpr.length()==0)
			break;

		cxaTokenStream::const_iterator it;
		cxTokenizerTextInputStream<> tisInput(strExpr.data(),false);

		tisInput.vInit();

		pkg.vSetInputStream(&tisInput);
		if(pkg.nReadUntilDelimeter()==0)
			break;

		pkg.vSetStartFromBeginning();
		cxaStatusCookie ascCondition;
		cxaParseBranch *papbRes = pkg.papbCheckForRule(500,&it,&ascCondition);
		if(papbRes!=NULL)
			{
			pkg.vRebalance(papbRes,400);
			papbRes->vDump();
			std::cout << calculate(papbRes) << "\n";
			delete papbRes;
			}
		else
			{
			std::cout << "The expression could not be evaluated.\n";

			switch(ascCondition.nBrkCause)
				{
				case cxaStatusCookie::brkcause_eots:
					std::cout << "The expression was not complete.\n";
					break;
				case cxaStatusCookie::brkcause_wrongtoken:
					std::cout << "Unexpected token was hit.\n";
					break;
				case cxaStatusCookie::brkcause_subrulefailed:
					std::cout << "A nested sub-expression was invalid.\n";
					break;
				case cxaStatusCookie::brkcause_notokens:
					std::cout << "The expression was empty.\n";
					break;
				case cxaStatusCookie::brkcause_unexpected:
					std::cout << "An unexpected error occured.\n";
					break;
				}

			if(ascCondition.lPosition!=-1)
				{
				pkg.patsGetTokenStream()->vDump();
				std::tstring	strLine;
				if(!tisInput.fGetLineAtPosition(ascCondition.lPosition,strLine))
					std::cout << "*** No more error information available.\n";
				else
					{
					long	lPos = tisInput.lGetPosInLine(ascCondition.lPosition);

					std::cout << strLine.substr(0,lPos) << " <-- error\n";
					}
				}
			}
		pkg.vSetInputStream(NULL);
		pkg.vFlush();
		}

	return 0;
	}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
Germany Germany
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions