Click here to Skip to main content
15,891,409 members
Articles / Programming Languages / Visual Basic

A Tiny Parser Generator v1.2

Rate me:
Please Sign up or sign in to vote.
4.94/5 (201 votes)
21 Sep 2010CPOL25 min read 669.8K   17.5K   465  
@TinyPG is a utility that makes it easier to write and try out your own parser/compiler
// Copyright 2008 - 2010 Herre Kuijpers - <herre.kuijpers@gmail.com>
//
// This source file(s) may be redistributed, altered and customized
// by any means PROVIDING the authors name and all copyright
// notices remain intact.
// THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED. USE IT AT YOUR OWN RISK. THE AUTHOR ACCEPTS NO
// LIABILITY FOR ANY DATA DAMAGE/LOSS THAT THIS PRODUCT MAY CAUSE.
//-----------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.CodeDom.Compiler;
using System.Reflection;
using System.Text.RegularExpressions;

using TinyPG.Debug;
using System.Windows.Forms;

namespace TinyPG.Compiler
{
    public class Compiler
    {
        private Grammar Grammar;

        /// <summary>
        /// indicates if the grammar was parsed successfully
        /// </summary>
        public bool IsParsed;

        /// <summary>
        /// indicates if the grammar was compiled successfully
        /// </summary>
        public bool IsCompiled;

        /// <summary>
        /// a string containing the actual generated code for the scanner
        /// </summary>
        public string ScannerCode;

        /// <summary>
        /// a string containing the actual generated code for the parser
        /// </summary>
        public string ParserCode;

        /// <summary>
        /// a string containing the actual generated code for the Parse tree
        /// </summary>
        public string ParseTreeCode;

        /// <summary>
        /// a list of errors that occured during parsing or compiling
        /// </summary>
        public List<string> Errors;

        // the resulting compiled assembly
        private Assembly assembly;


        public Compiler()
        {
            IsCompiled = false;
            Errors = new List<string>();
        }

        public void Compile(Grammar grammar)
        {
            IsParsed = false;
            IsCompiled = false;
            Errors = new List<string>();
            if (grammar == null) throw new ArgumentNullException("Grammar may not be null");

            Grammar = grammar;
            grammar.Compile();
            IsParsed = true;

            ScannerCode = GenerateScanner(Grammar, false);
            ParserCode = GenerateParser(Grammar, false);
            ParseTreeCode = GenerateParseTree(Grammar, false);

            BuildCode();
            if (Errors.Count == 0)
                IsCompiled = true;
        }

        public string GenerateScanner(Grammar Grammar, bool Debug)
        {

            string scanner = File.ReadAllText(Grammar.GetTemplatePath() + @"Scanner.cs");

            int counter = 2;
            StringBuilder tokentype = new StringBuilder();
            StringBuilder regexps = new StringBuilder();
            StringBuilder skiplist = new StringBuilder();

            foreach (TerminalSymbol s in Grammar.SkipSymbols)
            {
                skiplist.AppendLine("\t\t\tSkipList.Add(TokenType." + s.Name + ");");
            }

            // build system tokens
            tokentype.AppendLine("\r\n\t\t\t//Non terminal tokens:");
            tokentype.AppendLine(Helper.Outline("_NONE_", 3, "= 0,", 5));
            tokentype.AppendLine(Helper.Outline("_UNDETERMINED_", 3, "= 1,", 5));

            // build non terminal tokens
            tokentype.AppendLine("\r\n\t\t\t//Non terminal tokens:");
            foreach (Symbol s in Grammar.GetNonTerminals())
            {
                tokentype.AppendLine(Helper.Outline(s.Name, 3, "= " + String.Format("{0:d},", counter), 5));
                counter++;
            }

            // build terminal tokens
            tokentype.AppendLine("\r\n\t\t\t//Terminal tokens:");
            bool first = true;
            foreach (TerminalSymbol s in Grammar.GetTerminals())
            {
                regexps.Append("\t\t\tregex = new Regex(" + s.Expression.ToString() + ", RegexOptions.Compiled);\r\n");
                regexps.Append("\t\t\tPatterns.Add(regex);\r\n");
                regexps.Append("\t\t\tTokens.Add(TokenType." + s.Name + ");\r\n\r\n");

                if (first) first = false;
                else tokentype.AppendLine(",");

                tokentype.Append(Helper.Outline(s.Name, 3, "= " + String.Format("{0:d}", counter), 5));
                counter++;
            }

            scanner = scanner.Replace(@"<%SkipList%>", skiplist.ToString());
            scanner = scanner.Replace(@"<%RegExps%>", regexps.ToString());
            scanner = scanner.Replace(@"<%TokenType%>", tokentype.ToString());

            if (Debug)
            {
                scanner = scanner.Replace(@"<%Namespace%>", "TinyPG.Debug");
                scanner = scanner.Replace(@"<%IToken%>", " : TinyPG.Debug.IToken");
            }
            else
            {
                scanner = scanner.Replace(@"<%Namespace%>", Grammar.Directives["TinyPG"]["Namespace"]);
                scanner = scanner.Replace(@"<%IToken%>", "");
            }

            return scanner;
        }

        public string GenerateParser(Grammar Grammar, bool Debug)
        {

            // generate the parser file
            StringBuilder parsers = new StringBuilder();
            string parser = File.ReadAllText(Grammar.GetTemplatePath() + @"Parser.cs");

            // build non terminal tokens
            foreach (NonTerminalSymbol s in Grammar.GetNonTerminals())
            {
                string method = s.GenerateParseMethod();
                parsers.Append(method);
            }


            if (Debug)
            {
                parser = parser.Replace(@"<%Namespace%>", "TinyPG.Debug");
                parser = parser.Replace(@"<%IParser%>", " : TinyPG.Debug.IParser");
                parser = parser.Replace(@"<%IParseTree%>", "TinyPG.Debug.IParseTree");
                
            }
            else
            {
                parser = parser.Replace(@"<%Namespace%>", Grammar.Directives["TinyPG"]["Namespace"]);
                parser = parser.Replace(@"<%IParser%>", "");
                parser = parser.Replace(@"<%IParseTree%>", "ParseTree");
            }

            parser = parser.Replace(@"<%ParseNonTerminals%>", parsers.ToString());
            return parser;
        }

        public string GenerateParseTree(Grammar Grammar, bool Debug)
        {
            // copy the parse tree file (optionally)
            string parsetree = File.ReadAllText(Grammar.GetTemplatePath() + @"ParseTree.cs");
            
            StringBuilder evalsymbols = new StringBuilder();
            StringBuilder evalmethods = new StringBuilder();

            // build non terminal tokens
            foreach (Symbol s in Grammar.GetNonTerminals())
            {
                evalsymbols.AppendLine("\t\t\t\tcase TokenType." + s.Name + ":");
                evalsymbols.AppendLine("\t\t\t\t\tValue = Eval" + s.Name + "(tree, paramlist);");
                //evalsymbols.AppendLine("\t\t\t\tValue = Token.Text;");
                evalsymbols.AppendLine("\t\t\t\t\tbreak;");

                evalmethods.AppendLine("\t\tprotected virtual object Eval" + s.Name + "(ParseTree tree, params object[] paramlist)");
                evalmethods.AppendLine("\t\t{");
                if (s.CodeBlock != null)
                {
                    // paste user code here
                    evalmethods.AppendLine(FormatCodeBlock(s as NonTerminalSymbol));
                }
                else
                {
                    if (s.Name == "Start") // return a nice warning message from root object.
                        evalmethods.AppendLine("\t\t\treturn \"Could not interpret input; no semantics implemented.\";");
                    else
                        evalmethods.AppendLine("\t\t\tthrow new NotImplementedException();");

                    // otherwise simply not implemented!
                }
                evalmethods.AppendLine("\t\t}\r\n");
            }

            if (Debug)
            {
                parsetree = parsetree.Replace(@"<%Namespace%>", "TinyPG.Debug");
                parsetree = parsetree.Replace(@"<%IParseTree%>", ", TinyPG.Debug.IParseTree");
                parsetree = parsetree.Replace(@"<%IParseNode%>", " : TinyPG.Debug.IParseNode");
                parsetree = parsetree.Replace(@"<%ITokenGet%>", "public IToken IToken { get {return (IToken)Token;} }");

                string inodes = "public List<IParseNode> INodes {get { return nodes.ConvertAll<IParseNode>( new Converter<ParseNode, IParseNode>( delegate(ParseNode n) { return (IParseNode)n; })); }}\r\n\r\n";
                parsetree = parsetree.Replace(@"<%INodesGet%>", inodes);
            }
            else
            {
                parsetree = parsetree.Replace(@"<%Namespace%>", Grammar.Directives["TinyPG"]["Namespace"]);
                parsetree = parsetree.Replace(@"<%IParseTree%>", "");
                parsetree = parsetree.Replace(@"<%IParseNode%>", "");
                parsetree = parsetree.Replace(@"<%ITokenGet%>", "");
                parsetree = parsetree.Replace(@"<%INodesGet%>", "");
            }

            parsetree = parsetree.Replace(@"<%EvalSymbols%>", evalsymbols.ToString());
            parsetree = parsetree.Replace(@"<%VirtualEvalMethods%>", evalmethods.ToString());

            return parsetree;
        }

        /// <summary>
        /// replaces $ variables with a c# statement
        /// the routine also implements some checks to see if $variables are matching with production symbols
        /// errors are added to the Error object.
        /// </summary>
        /// <param name="nts">non terminal and its production rule</param>
        /// <returns>a formated codeblock</returns>
        private string FormatCodeBlock(NonTerminalSymbol nts)
        {
            string codeblock = nts.CodeBlock;
            if (nts == null) return "";

            Regex var = new Regex(@"\$(?<var>[a-zA-Z_0-9]+)(\[(?<index>[^]]+)\])?", RegexOptions.Compiled);

            Symbols symbols = nts.DetermineProductionSymbols();

            
            Match match = var.Match(codeblock);
            while (match.Success)
            {
                Symbol s = symbols.Find(match.Groups["var"].Value);
                if (s == null)
                {
                    Errors.Add("Variable $" + match.Groups["var"].Value + " cannot be matched.");
                    break; // error situation
                }
                string indexer = "0";
                if (match.Groups["index"].Value.Length > 0)
                {
                    indexer = match.Groups["index"].Value;
                }

                string replacement = "this.GetValue(tree, TokenType." + s.Name + ", " + indexer + ")" ;

                codeblock = codeblock.Substring(0, match.Captures[0].Index) + replacement + codeblock.Substring(match.Captures[0].Index + match.Captures[0].Length);
                match = var.Match(codeblock);
            }

            codeblock = "\t\t\t" + codeblock.Replace("\n", "\r\n\t\t");
            return codeblock;
        }

        /// <summary>
        /// once the grammar compiles correctly, the code can be built.
        /// </summary>
        private void BuildCode()
        {
            CompilerResults Result;
            CodeDomProvider provider = new Microsoft.CSharp.CSharpCodeProvider();
            System.CodeDom.Compiler.CompilerParameters compilerparams = new System.CodeDom.Compiler.CompilerParameters();
            compilerparams.GenerateInMemory = true;
            compilerparams.GenerateExecutable = false;
            compilerparams.ReferencedAssemblies.Add("System.dll");

            // reference this assembly to share interfaces (for debugging only)
            string tinypgfile = Application.ExecutablePath;
            compilerparams.ReferencedAssemblies.Add(tinypgfile); 

            string[] sources = new string[3];
            sources[0] = GenerateScanner(Grammar, true); // generate the code with debug interface enabled
            sources[1] = GenerateParser(Grammar, true); // generate the code with debug interface enabled
            sources[2] = GenerateParseTree(Grammar, true); // generate the code with debug interface enabled

            if (sources.Length > 0)
            {
                Result = provider.CompileAssemblyFromSource(compilerparams, sources);

                if (Result.Errors.Count > 0)
                {
                    foreach (CompilerError o in Result.Errors)
                        Errors.Add(o.ErrorText + " on line " + o.Line.ToString());
                }
                else
                    assembly = Result.CompiledAssembly;
            }
        }

        /// <summary>
        /// evaluate the input expression
        /// </summary>
        /// <param name="input">the expression to evaluate with the parser</param>
        /// <returns>the output of the parser/compiler</returns>
        public CompilerResult Run(string input)
        {
            CompilerResult compilerresult = new CompilerResult();
            string output = null;
            if (assembly == null) return null;

            object instance = assembly.CreateInstance("TinyPG.Debug.Scanner");
            Type scanner = instance.GetType();

            object parserinstance = (IParser)assembly.CreateInstance("TinyPG.Debug.Parser", true, BindingFlags.CreateInstance, null, new object[] { instance }, null, null);
            IParser iparser = (IParser) assembly.CreateInstance("TinyPG.Debug.Parser", true, BindingFlags.CreateInstance, null, new object[] { instance }, null, null);
            Type parsertype = parserinstance.GetType();

            object treeinstance = parsertype.InvokeMember("Parse", BindingFlags.InvokeMethod, null, parserinstance, new object[] { input });
            IParseTree itree = treeinstance as IParseTree;// (IParseTree)parsertype.InvokeMember("Parse", BindingFlags.InvokeMethod, null, parserinstance, new object[] { input });
            
            compilerresult.ParseTree = itree;
            Type treetype = treeinstance.GetType();

            //object parsetreeoutput = treetype.InvokeMember("PrintTree", BindingFlags.InvokeMethod, null, treeinstance, null);
            //output += parsetreeoutput.ToString() + "\r\n";

            object parsetreeerrors = treetype.InvokeMember("Errors", BindingFlags.GetField, null, treeinstance, null);
            Type errorstype = parsetreeerrors.GetType();

            List<string> errors = (List<string>)errorstype.InvokeMember("GetAllErrors", BindingFlags.InvokeMethod, null, parsetreeerrors, null);
            if (errors.Count > 0)
            {
                foreach (string err in errors)
                    output += err + "\r\n";
            }
            else
            {
                output += "Parse was successful." + "\r\n";
                output += "Evaluating...";

                // parsing was successful, now try to evaluate... this should really be done on a seperate thread.
                // e.g. if the thread hangs, it will hang the entire application (!)
                try
                {
                    object result = itree.Eval(null);
                    output += "\r\nResult: " + (result==null ? "null" : result.ToString());
                }
                catch (Exception exc)
                {
                    output += "\r\nException occurred: " + exc.Message;
                    output += "\r\nStacktrace: " + exc.StackTrace;
                }

            }
            compilerresult.Output = output.ToString();
            return compilerresult;
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect Rubicon
Netherlands Netherlands
Currently Herre Kuijpers is employed at Rubicon. During his career he developed skills with all kinds of technologies, methodologies and programming languages such as c#, ASP.Net, .Net Core, VC++, Javascript, SQL, Agile, Scrum, DevOps, ALM. Currently he fulfills the role of software architect in various projects.

Herre Kuijpers is a very experienced software architect with deep knowledge of software design and development on the Microsoft .Net platform. He has a broad knowledge of Microsoft products and knows how these, in combination with custom software, can be optimally implemented in the often complex environment of the customer.

Comments and Discussions