Click here to Skip to main content
15,898,134 members
Articles / Programming Languages / C#

Parsing - It Is Easy. Base CSharp Classes and Expressions Calculator

Rate me:
Please Sign up or sign in to vote.
3.67/5 (4 votes)
13 Mar 2008CPOL1 min read 20.9K   318   18  
Provide Base C# parsing classes with demo application and description
// Token.cs:
//
//    Copyright (C) 2007  by Alex Nek
//    alexnek@russinger.com
//    simpleparser.russinger.com
//
//    This file is part of SimpleParser classes.
//
//    SimpleParser is free software:
//    you can redistribute it and/or modify
//    it under the terms of the GNU General Public License as published by
//    the Free Software Foundation, either version 3 of the License, or
//    any later version.
//
//    This program is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU General Public License for more details.
//
//    You should have received a copy of the GNU General Public License
//    along with this program.  If not, see <http://www.gnu.org/licenses/>
//////////////////////////////////////////////////////////////////////
//
//////////////////////////////////////////////////////////////////////
using System;
using System.Globalization;


namespace SimpleParser
{
    /// <summary>
    /// Lexeme storing class
    /// </summary>
    public class CToken
    {
        #region Single Symbols declaration 
        public const char SYMBOL_ANGLE_BRACKET_OPEN = '<';
        public const char SYMBOL_ANGLE_BRACKET_CLOSE = '>';

        public const char SYMBOL_ROUND_BRACKET_OPEN = '(';
        public const char SYMBOL_ROUND_BRACKET_CLOSE = ')';

        public const char SYMBOL_SQUARE_BRACKET_OPEN = '[';
        public const char SYMBOL_SQUARE_BRACKET_CLOSE = ']';

        public const char SYMBOL_BRACES_OPEN = '{'; //curly bracket
        public const char SYMBOL_BRACES_CLOSE = '}';

        public const char SYMBOL_AT = '@';
        public const char SYMBOL_DOLLAR = '$';
        public const char SYMBOL_COLON = ':';
        public const char SYMBOL_EQUAL = '=';
        public const char SYMBOL_TILDE = '~';

        public const char SYMBOL_EXCLAMATION_MARK = '!';
        public const char SYMBOL_PLUS = '+';
        public const char SYMBOL_MINUS = '-';
        public const char SYMBOL_ASTERISK = '*';
        public const char SYMBOL_SLASH = '/';
        public const char SYMBOL_CIRCUMFLEX_ACCENT = '^';

        public const char SYMBOL_SHARP = '#';
        public const char SYMBOL_PERCENT = '%';
        public const char SYMBOL_AMPERSAND = '&';
        public const char SYMBOL_QUOTATION_MARK = '"';
        public const char SYMBOL_APOSTROPHE = '\'';
        public const char SYMBOL_DOT = '.';
        public const char SYMBOL_COMMA = ',';
        public const char SYMBOL_BACKSLASH = '\\';
        public const char SYMBOL_VERTICAL_LINE = '|';
        public const char SYMBOL_UNDERSCOPE = '_';  //em dash 
        public const char SYMBOL_PARAGRAPH = '�';
        public const char SYMBOL_QUESTION_MARK = '?';
        //public const char SYMBOL_EXCLAMATION_MARK = '!';
        #endregion

        #region Enums
        /// <summary>
        /// Token class
        /// defined big group of tokens
        /// </summary>
        public enum ETokenClass
        {
            /// <summary>
            /// init state
            /// </summary>
            None,			// nothing - init mode
            
            /// <summary>
            /// Identifier found
            /// </summary>
            Ident,			// identifier

            /// <summary>
            /// Keyword found.
            /// Keyword is reserved Identifier
            /// </summary>
            Keyword,

            /// <summary>
            /// Symbol combination found.
            /// Could be any combination started from any character
            /// </summary>
            Combination,

            /// <summary>
            /// Number found
            /// In this version we use only positiv integer
            /// </summary>
            Number,		// integer value

            /// <summary>
            /// Comment found
            /// </summary>
            Comment,		// comment

            /// <summary>
            /// "unknown" single symbol found which is not in combinations
            /// </summary>
            Symbol,		// symbol

            /// <summary>
            /// end of line found
            /// you must enable option for return EOL token in parser.
            /// by default EOL is skipped
            /// </summary>
            EOL,

            /// <summary>
            /// end of file found
            /// </summary>
            EOF,			// End of File found, no more tokens

            /// <summary>
            /// Reserved value for end of enumeration values. 
            /// You can use it for starting new enumeration from last value
            /// </summary>
            EndOfTokenClassValues
        };

        #region enum TokenType
        /// <summary>
        /// Token type extention
        /// only integer value used because you can use additional value in inherited class
        /// </summary>
        public enum ETokenType
        {
            NullValue = 0,
            EndOfFile = ETokenClass.EOF,
            EndOfLine = ETokenClass.EOL,
            StartOfTokeTypeValue = ETokenClass.EndOfTokenClassValues,

            IntegerValue = StartOfTokeTypeValue,
            FloatValue,

            /// <summary>
            /// keyword found
            /// </summary>
            ttKeyword,
            EndOfTokenTypeValue
        };
        #endregion enum TokenType
        #endregion Enums

        #region Class data member
        /// <summary>
        /// Store token class
        /// </summary>
        private ETokenClass m_eTokenClass = ETokenClass.None;

        /// <summary>
        /// Token type
        /// It is usefull when you need to expand tokens
        /// </summary>
        private int m_nTokenType = (int)ETokenClass.None;

        /// <summary>
        /// Store token value
        /// </summary>
        private string m_sValue = "";
        #endregion

        #region Constructor
        /// <summary>
        /// Token constructor
        /// </summary>
        public CToken ()
        {
        }
        #endregion

        #region Set Token Class
        /// <summary>
        /// Set Token class
        /// Pay attention that we set first token class and then token value
        /// Token value cleared here!
        /// </summary>
        /// <param name="TokenClass">Token class</param>
        public void SetTokenClass (ETokenClass TokenClass)
        {
            m_eTokenClass = TokenClass;
            m_nTokenType = (int)TokenClass;
            //Clear old value
            m_sValue = "";
        }
        #endregion

        #region Value transformations
        /// <summary>
        /// Get Token Value as integer
        /// </summary>
        /// <param name="bHex">token value in hexadecimal format</param>
        /// <returns>integer value</returns>
        public int GetInteger (bool bHex)
        {
            int nRet = 0;
            if (TokenClass == ETokenClass.Number)
            {
                if (bHex)
                {
                    nRet = Int32.Parse(Value, NumberStyles.HexNumber);
                }
                else
                {
                    nRet = Int32.Parse(Value, NumberStyles.Integer);
                }
            }
            return nRet;
        }

        /// <summary>
        /// Get Token value as double
        /// </summary>
        /// <returns>double value</returns>
        public double GetDouble ()
        {
            double Ret = 0;
            if (TokenClass == ETokenClass.Number)
            {
                Ret = double.Parse(Value);
            }
            return Ret;
        }

        /// <summary>
        /// Get token value as char
        /// </summary>
        /// <param name="bHex">Value in hexadecimal format</param>
        /// <returns>value as character</returns>
        public char GetCharFromNumber (bool bHex)
        {
            char ch = '\0';

            if (TokenClass == ETokenClass.Number)
            {
                int nValue = 0;
                if (bHex)
                {
                    nValue = Int32.Parse(Value, NumberStyles.HexNumber);
                }
                else
                {
                    nValue = Int32.Parse(Value, NumberStyles.Integer);
                }
                if (nValue < 256)
                {
                    ch = (char)nValue;
                }
            }
            return ch;
        }
        #endregion

        #region Properties
        /// <summary>
        /// Token class (get/set)
        /// If you set token class here then token value is not cleared
        /// </summary>
        public ETokenClass TokenClass
        {
            get
            {
                return m_eTokenClass;
            }
            set
            {
                m_eTokenClass = value;
            }
        }

        /// <summary>
        /// Token type (get/set)
        /// could be used as extention for token class
        /// </summary>
        public int TokenType
        {
            get
            {
                return m_nTokenType;
            }
            set
            {
                m_nTokenType = value;
            }
        }

        /// <summary>
        /// Token Value (get/set)
        /// </summary>
        public string Value
        {
            get
            {
                return m_sValue;
            }
            set
            {
                m_sValue = value;
            }
        }
        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer (Senior)
Germany Germany
I started my way with Fortran and going via Pascal, Delphi, C and C++. I'm falling in love with C# now.

Comments and Discussions