Click here to Skip to main content
15,897,187 members
Articles / Programming Languages / C#

General Expression Parser and Evaluator

Rate me:
Please Sign up or sign in to vote.
4.10/5 (12 votes)
26 Jun 2011CPOL4 min read 69.4K   1.8K   46  
A user configurable expression parser and evaluator
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace RpnParser {
  public class MathParser : ITokeniser {
    // There must be a one to one correspondence between allOps and precedence
    private string[] allOps = {
			",", "(", ")", "!", ":", "?", "||", "&&", "|", "^", "&",
			"=", "==", "<", "<=", ">", ">=", "<<", ">>", "<<=", ">>=", "<>",
			"+", "-", "*", "/", "//", "%", "**",
			":=", "+=", "-=", "*=", "/=", "//=", "%="
		};
    //                                  
    private int[] precedence = {
	  //,  (  )  !   :  ?  || && |  ^  & 
			0, 0, 0, 16, 2, 3, 4, 4, 5, 5, 5,
		//=  == <  <= >  >= << >> <<= >>= <>
			6, 6, 6, 6, 6, 6, 6, 6,  6, 6,  6,
		//+  -  *  /  \  %  **
			7, 7, 8, 8, 8, 8, 9,
		//:= += -= *= /= //= %=
			1, 1, 1, 1, 1, 1,  1
		};
    const int assgnPrecedence = 1;
    private string[] arithmeticOps = { "+", "-", "*", "/", "//", "%", "**" };
    private string[] comparisonOps = { "=", "==", "!=", "<", "<=", ">", ">=", "<<", ">>", "<<=", ">>=", "<>" };
    private string[] logicalOps = { "&&", "||" };
    private string[] bitwiseOps = { "&", "|", "^" };
    private string[] condOps = { "?", ":" };
    private string[] assignOps = { ":=", "+=", "-=", "*=", "/=", "//=", "%=" };

    public List<RpnToken> Tokenise(string rpnExpression) {
      List<RpnToken> tokens = new List<RpnToken>();
      // Split the expression string into indivual tokens
      List<string> tokenList = Split(rpnExpression);

      RpnToken prevToken = null;
      bool addToken = true;
      string affixSign = "";
      bool negate = false;

      for (int stx = 0; stx < tokenList.Count; ++stx) {
        string strToken = tokenList[stx].Trim();
        if (strToken.Length > 0) {
          // check for string literal
          if (strToken[0] == '\'' || strToken[0] == '"') {
            if (strToken.Length > 3)
              // replace double string delimiter with single
              if (strToken[0] == '"')
                strToken = strToken.Replace("\"\"", "\"");
              else
                strToken = strToken.Replace("''", "'");
          }
          if (strToken.Length == 1) {
            // check for negation operator
            if ("!^\\".IndexOf(strToken[0]) >= 0) {
              if (stx + 1 < tokenList.Count) {
                if (tokenList[stx + 1] != "(") {
                  negate = true;
                  continue;
                } else
                  strToken = "!";    // convert to internal negation op code
              }
            }
          } else
            // replace external not equal with internal format 
            if (strToken == "<>") {
              negate = true;
              strToken = "=";
            }

          RpnToken token = new RpnToken(affixSign + strToken);
          affixSign = "";
          EvalToken(token, negate);
          negate = false;
          if (token.IsOperator) {
            if ((strToken == "+" || strToken == "-"))
              if (prevToken == null || (prevToken.IsOperator || prevToken.StrValue == "(" ||
                  prevToken.ElementType == ElementType.CondFalse ||
                  prevToken.ElementType == ElementType.CondTrue)) {
                token.Precedence = 16;
              } else
                if (prevToken == null || (prevToken.ElementType == ElementType.Function ||
                  prevToken.ElementType == ElementType.Argument)) {
                  addToken = false;
                  affixSign = strToken;
                }
          }
          if (token.StrValue == "(") {
            token.ElementType = ElementType.GroupStart;
            if (prevToken != null) {
              if (prevToken.ElementType == ElementType.Identifier &&
                 (Char.IsLetter(prevToken.StrValue, 0) || prevToken.StrValue == "#")) {
                prevToken.ElementType = ElementType.Function;
                addToken = false;
              }
            }
          }
          if (token.StrValue == ")")
            token.ElementType = ElementType.GroupEnd;
          if (addToken) {
            tokens.Add(token);
            prevToken = token;
          }
          addToken = true;
        }
      }
      return tokens;
    }

    private List<String> Split(String expression) {
      List<String> tokenList = new List<String>();
      int expLen = expression.Length;
      int tokenStart = 0;
      int tokenEnd = 0;
      bool haveOpr = false;
      bool inString = false;
      char stringDelm = ' ';
      int exp = 0;
      for (; exp < expLen; ++exp) {
        char ch = expression[exp];
        char nextch = ch;
        if (!inString) {
          if (haveOpr) {
            tokenList.Add(expression.Substring(tokenStart, exp - tokenStart));
            tokenStart = exp;
          }
        }
        haveOpr = false;
        switch (ch) {
          // [+\-*/%=<>]{1,2}
          case '+':
          case '-':
          case '*':
          case '/':
          case '%':
          case '<':
          case '>':
          case '=':
            if (!inString) {
              if (exp > tokenStart)
                tokenList.Add(expression.Substring(tokenStart, exp - tokenStart));
              tokenStart = exp;
              haveOpr = true;
              if (exp + 1 < expLen) {
                nextch = expression[exp + 1];
                if ((ch == '<' && nextch == '<') || (ch == '>' && nextch == '>'))
                  ++exp;
                else
                  if (ch == '/' && nextch == ch)
                    ++exp;
                  else
                    if (ch == '*' && ch == nextch)
                      ++exp;
                if (exp + 1 < expLen)
                  nextch = expression[exp + 1];
                if (nextch == '=')
                  ++exp;
              }
            }
            break;

          // [(),!^:\?\\]{1}
          case '(':
          case ')':
          case ',':
          case '!':
          case '^':
          case ':':
          case '?':
          case '\\':
            if (!inString) {
              if (exp > tokenStart)
                tokenList.Add(expression.Substring(tokenStart, exp - tokenStart));
              tokenStart = exp;
              if (ch == ':' && exp + 1 < expLen) {
                nextch = expression[exp + 1];
                if (nextch == '=')
                  ++exp;
              }
              haveOpr = true;
            }
            break;

          // [&\|]{1,2}
          case '&':
          case '|':
            if (!inString) {
              if (exp > tokenStart)
                tokenList.Add(expression.Substring(tokenStart, exp - tokenStart));
              tokenStart = exp;
              haveOpr = true;
              if (exp + 1 < expLen) {
                nextch = expression[exp + 1];
                if (ch == nextch)
                  ++exp;
              }
            }
            break;

          // string [" ']
          case '"':
          case '\'':
            if (inString) {
              if (ch == stringDelm) {
                if (exp + 1 < expLen) {
                  nextch = expression[exp + 1];
                  if (ch == nextch)
                    ++exp;
                  else
                    inString = false;
                } else
                  inString = false;
              }
            } else {
              if (exp > tokenStart)
                tokenList.Add(expression.Substring(tokenStart, exp - tokenStart));
              tokenStart = exp;
              inString = true;
              stringDelm = ch;
            }
            break;
        }
      }
      if (tokenStart < expLen)
        tokenList.Add(expression.Substring(tokenStart));
      return tokenList;
    }


    private void EvalToken(RpnToken token, bool negate) {
      string strValue = token.StrValue;
      char ch = strValue[0];
      if (ch == '"' || ch == '\'') {
        // check for a hex or unicode literal string
        ch = strValue[strValue.Length - 1];
        if (ch == 'x' || ch == 'X')
          HexString(token, strValue.Substring(1, strValue.Length - 3), ElementType.HexLiteral);
        else
          if (ch == 'u' || ch == 'U')
            HexString(token, strValue.Substring(1, strValue.Length - 3), ElementType.UCLiteral);
          else {
            token.ElementType = ElementType.Literal;
            token.StrValue = strValue.Substring(1, strValue.Length - 2);
          }
      } else {
        // is this a function argument list delimitor
        if (ch == ',' && strValue.Length == 1)
          token.ElementType = ElementType.Argument;
        else
          // check for numeric or hex constant
          if (Char.IsDigit(ch) || ch == '.' || ch == '-' || ch == '+') {
            if (strValue.Length > 2 && ch == '0' && (strValue[1] == 'x' || strValue[1] == 'X')) {
              HexString(token, strValue.Substring(2), ElementType.Constant);
            } else {
              double dbl;
              if (Double.TryParse(strValue, out dbl)) {
                token.ConstValue = dbl;
                token.ElementType = ElementType.Constant;
              }
            }
          } else
            // check for conditional statement
            if (strValue.Length == 1 && (ch == '?' || ch == ':'))
              if (ch == '?') {
                token.ElementType = ElementType.CondTrue;
                token.Association = Association.Right;
              } else
                token.ElementType = ElementType.CondFalse;

        if (token.ElementType == ElementType.Identifier) {
          // if token has not been classified then
          // check for an operator token
          if (Array.IndexOf(allOps, token.StrValue) >= 0) {
            token.ElementType = ElementType.Operator;
            token.Association = Association.Left;
          } else {
            // if not an operator then check for a qualified indentifier
            int pos;
            if ((pos = strValue.LastIndexOf('.')) >= 0) {
              token.IsQualified = true;
              token.Qualifier = "";
              if (pos > 1) {
                token.Qualifier = strValue.Substring(0, pos);
                token.StrValue = strValue.Substring(pos + 1);
              } else
                token.StrValue = strValue.Substring(pos + 1);
            }
          }
        }
        token.Negate = negate;
        if (token.IsOperator) {
          int opIndex = Array.IndexOf<string>(allOps, token.StrValue);
          token.Precedence = precedence[opIndex];
          if (token.StrValue == "**")
            token.Association = Association.Right;
          if (token.Precedence == assgnPrecedence) {
            token.ElementType = ElementType.Assignment;
            token.Association = Association.Right;
          }
        }
      }
    }

    private void HexString(RpnToken token, string hexChar, ElementType type) {
      string hex;
      int rc, count, cx, hexCh, cw;
      System.Text.StringBuilder sb = new StringBuilder(100);
      cw = 2;
      if (type == ElementType.UCLiteral)
        cw = 4;
      count = hexChar.Length / cw;
      cx = 0;
      if ((count * cw) != hexChar.Length)
        cx = (hexChar.Length % cw) - cw;
      hex = hexChar.Substring(0, cw + cx);
      rc = 0;
      while (rc == 0 && count > 0) {
        if (Int32.TryParse(hex, System.Globalization.NumberStyles.AllowHexSpecifier, null, out hexCh))
          sb.Append((char)hexCh);
        else
          rc = -1;
        cx += cw;
        if (--count > 0)
          hex = hexChar.Substring(cx, cw);
      }
      if (rc == 0) {
        token.StrValue = sb.ToString();
        token.ElementType = type;
        if (type == ElementType.Constant) {
          token.ConstValue = Double.NaN;
          long temp;
          if (Int64.TryParse(hexChar, System.Globalization.NumberStyles.AllowHexSpecifier, null, out temp)) {
            token.StrValue = temp.ToString();
          } else
            token.ElementType = ElementType.HexLiteral;
        }
      }
    }

    public RpnOperator CreateOperator(RpnToken token) {
      if (token.Negate && token.StrValue == "=") {
        token.StrValue = "!=";
        token.Negate = false;
      }
      RpnOperator newOp = new RpnOperator(token);
      if (IsArithmeticOperator(token)) {
        newOp.OpGroup = OperatorGroup.Arithmetic;
        if (token.StrValue == "**")
          newOp.OpType = OperatorType.Exponent;
        else
          if (token.StrValue == "//")
            newOp.OpType = OperatorType.Remainder;
          else
            newOp.OpType = (OperatorType)((int)token.StrValue[0]);
      } else
        if (IsComparisonOperator(token)) {
          newOp.OpGroup = OperatorGroup.Comparison;
          newOp.OpType = (OperatorType)((int)token.StrValue[0]);
          string ts = token.StrValue;
          if (ts == "<=" || ts == "<<=")
            newOp.OpType = OperatorType.CompLessThanEqual;
          else
            if (ts == ">=" || ts == ">>=")
              newOp.OpType = OperatorType.CompGreaterEqual;
          if (token.Negate) {
            newOp.Negate();
            newOp.StrValue = "!" + newOp.StrValue;
          }
        } else
          if (IsLogicalOperator(token)) {
            newOp.OpGroup = OperatorGroup.Logical;
            newOp.OpType = (OperatorType)(128 + (int)token.StrValue[0]);
          } else
            if (IsBitwiseOperator(token)) {
              newOp.OpGroup = OperatorGroup.Bitwise;
              newOp.OpType = (OperatorType)((int)token.StrValue[0]);
            } else
              if (IsCondOperator(token)) {
                newOp.OpGroup = OperatorGroup.Conditional;
                newOp.OpType = (OperatorType)((int)token.StrValue[0]);
              } else
                if (IsAssignOperator(token)) {
                  newOp.OpGroup = OperatorGroup.Assignment;
                  newOp.OpType = (OperatorType)(128 + (int)token.StrValue[0]);
                  if (token.StrValue[0] == '/' && token.StrValue[1] == '/')
                    newOp.OpType = OperatorType.AssignRemainder;
                } else
                  if (token.StrValue == "!") {
                    newOp.OpGroup = OperatorGroup.Logical;
                    newOp.OpType = OperatorType.Negation;
                  } else
                    throw new ApplicationException("Unhandled Operator : " + token.StrValue);
      return newOp;
    }

    private bool IsArithmeticOperator(RpnToken token) {
      int nPos = Array.IndexOf(arithmeticOps, token.StrValue);
      if (nPos != -1)
        return true;
      else
        return false;
    }
    private bool IsComparisonOperator(RpnToken token) {
      int nPos = Array.IndexOf(comparisonOps, token.StrValue);
      if (nPos != -1)
        return true;
      else
        return false;
    }
    private bool IsLogicalOperator(RpnToken token) {
      int nPos = Array.IndexOf(logicalOps, token.StrValue);
      if (nPos != -1)
        return true;
      else
        return false;
    }
    private bool IsBitwiseOperator(RpnToken token) {
      int nPos = Array.IndexOf(bitwiseOps, token.StrValue);
      if (nPos != -1)
        return true;
      else
        return false;
    }
    private bool IsCondOperator(RpnToken token) {
      int nPos = Array.IndexOf(condOps, token.StrValue);
      if (nPos != -1)
        return true;
      else
        return false;
    }
    private bool IsAssignOperator(RpnToken token) {
      int nPos = Array.IndexOf(assignOps, token.StrValue);
      if (nPos != -1)
        return true;
      else
        return false;
    }

  }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions