Click here to Skip to main content
15,881,248 members
Articles / Programming Languages / Visual Basic

A Tiny Parser Generator v1.2

Rate me:
Please Sign up or sign in to vote.
4.94/5 (201 votes)
21 Sep 2010CPOL25 min read 660.3K   17.5K   465  
@TinyPG is a utility that makes it easier to write and try out your own parser/compiler
// Copyright 2008 - 2010 Herre Kuijpers - <herre.kuijpers@gmail.com>
//
// This source file(s) may be redistributed, altered and customized
// by any means PROVIDING the authors name and all copyright
// notices remain intact.
// THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED. USE IT AT YOUR OWN RISK. THE AUTHOR ACCEPTS NO
// LIABILITY FOR ANY DATA DAMAGE/LOSS THAT THIS PRODUCT MAY CAUSE.
//-----------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Globalization;

namespace TinyPG.Compiler
{

    public class GrammarTree : GrammarNode
    {
        public GrammarTree()
        {
        }
    }


    /// <summary>
    /// this class implements the semantics of the parsetree
    /// it will create a Grammar with production rules
    /// </summary>
    public partial class GrammarNode : ParseTree
    {

        public override ParseNode CreateNode(Token token, string text)
        {
            GrammarNode node = new GrammarNode(token, text);
            node.Parent = this;
            return node;
        }

        protected GrammarNode()
        {
        }

        protected GrammarNode(Token token, string text)
        {
            Token = token;
            this.text = text;
            nodes = new List<ParseNode>();
        }

        /// <summary>
        /// EvalStart will first do a semantic check to see if symbols are declared correctly
        /// then it will also check for attributes and parse the directives
        /// after that it will complete the transformation to the grammar tree.
        /// </summary>
        /// <param name="tree"></param>
        /// <param name="paramlist"></param>
        /// <returns></returns>
        protected override object EvalStart(ParseTree tree, params object[] paramlist)
        {
            TerminalSymbol terminal = null;
            bool StartFound = false;
            Grammar g = new Grammar();
            foreach (ParseNode n in Nodes)
            {
                if (n.Token.Type == TokenType.Directive)
                {
                    EvalDirective(tree, new object[] { g, n });
                }
                if (n.Token.Type == TokenType.ExtProduction)
                {

                    if (n.Nodes[n.Nodes.Count-1].Nodes[2].Nodes[0].Token.Type == TokenType.STRING)
                    {
                        try
                        {
                            terminal = new TerminalSymbol(n.Nodes[n.Nodes.Count - 1].Nodes[0].Token.Text, (string)n.Nodes[n.Nodes.Count - 1].Nodes[2].Nodes[0].Token.Text);
                            for (int i = 0; i < n.Nodes.Count - 1; i++)
                            {
                                if (n.Nodes[i].Token.Type == TokenType.Attribute)
                                    EvalAttribute(tree, new object[] {g, terminal, n.Nodes[i]});
                            }

                        }
                        catch (Exception ex)
                        {
                            tree.Errors.Add(new ParseError("regular expression for '" + terminal.Name + "' results in error: " + ex.Message, 0x1020, 0, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.EndPos - n.Nodes[0].Token.StartPos));
                        }

                        if (terminal.Name == "Start")
                            tree.Errors.Add(new ParseError("'Start' symbol cannot be a regular expression.", 0x1021, 0, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.EndPos - n.Nodes[0].Token.StartPos));

                        if (g.Symbols.Find(terminal.Name) == null)
                            g.Symbols.Add(terminal);
                        else
                            tree.Errors.Add(new ParseError("Terminal already declared: " + terminal.Name, 0x1022, 0, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.EndPos - n.Nodes[0].Token.StartPos));

                    }
                    else
                    {
                        NonTerminalSymbol nts = new NonTerminalSymbol(n.Nodes[n.Nodes.Count - 1].Nodes[0].Token.Text);
                        if (g.Symbols.Find(nts.Name) == null)
                            g.Symbols.Add(nts);
                        else
                            tree.Errors.Add(new ParseError("Non terminal already declared: " + nts.Name, 0x1023, 0, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.StartPos, n.Nodes[0].Token.EndPos - n.Nodes[0].Token.StartPos));

                        for (int i = 0; i < n.Nodes.Count - 1; i++)
                        {
                            if (n.Nodes[i].Token.Type == TokenType.Attribute)
                                EvalAttribute(tree, new object[] {g, nts, n.Nodes[i] });
                        }

                        if (nts.Name == "Start")
                            StartFound = true;
                    }
                }
            }

            if (!StartFound)
            {
                tree.Errors.Add(new ParseError("The grammar requires 'Start' to be a production rule.", 0x0024, 0, 0, 0, 0));
                return g;
            }

            foreach (ParseNode n in Nodes)
            {
                if (n.Token.Type == TokenType.ExtProduction)
                {
                    n.Eval(tree, g);
                }
            }

            return g;
        }


        protected override object EvalDirective(ParseTree tree, params object[] paramlist)
        {
            Grammar g = (Grammar) paramlist[0];
            GrammarNode node = (GrammarNode)paramlist[1];
            string name = node.Nodes[1].Token.Text;

            switch (name)
            {
                case "TinyPG":
                case "Parser":
                case "Scanner":
                case "ParseTree":
                case "TextHighlighter":
                    if (g.Directives.Find(name) != null)
                    {
                        tree.Errors.Add(new ParseError("Directive '" + name + "' is already defined", 0x1030, node.Nodes[1]));
                        return null; ;
                    }
                    break;
                default:
                    tree.Errors.Add(new ParseError("Directive '" + name + "' is not supported", 0x1031, node.Nodes[1]));
                    break;
            }

            Directive directive = new Directive(name);
            g.Directives.Add(directive);

            foreach (ParseNode n in node.Nodes)
            {
                if (n.Token.Type == TokenType.NameValue)
                    EvalNameValue(tree, new object[] { g, directive, n });
            }

            return null;
        }

        protected override object EvalNameValue(ParseTree tree, params object[] paramlist)
        {
            Grammar grammer = (Grammar)paramlist[0];
            Directive directive = (Directive)paramlist[1];
            GrammarNode node = (GrammarNode)paramlist[2];

            string key = node.Nodes[0].Token.Text;
            string value = node.Nodes[2].Token.Text.Substring(1, node.Nodes[2].Token.Text.Length-2);
            if (value.StartsWith("\"") )
                value = value.Substring(1);

            directive[key] = value;

            List<string> names = new List<string>(new string[] { "Namespace", "OutputPath", "TemplatePath",  });
            List<string> languages = new List<string>(new string[] { "c#", "cs", "csharp", "vb", "vb.net", "vbnet", "visualbasic" });
            switch (directive.Name)
            {
                case "TinyPG":
                    names.Add("Namespace");
                    names.Add("OutputPath");
                    names.Add("TemplatePath");
                    names.Add("Language");

                    if (key == "TemplatePath")
                        if (grammer.GetTemplatePath() == null)
                            tree.Errors.Add(new ParseError("Template path '" + value + "' does not exist", 0x1060, node.Nodes[2]));

                    if (key == "OutputPath")
                        if (grammer.GetOutputPath() == null)
                            tree.Errors.Add(new ParseError("Output path '" + value + "' does not exist", 0x1061, node.Nodes[2]));

                    if (key == "Language")
                        if (!languages.Contains(value.ToLower(CultureInfo.InvariantCulture)))
                            tree.Errors.Add(new ParseError("Language '" + value + "' is not supported", 0x1062, node.Nodes[2]));
                    break;
                case "Parser":
                case "Scanner":
                case "ParseTree":
                case "TextHighlighter":
                    names.Add("Generate");
                    break;
                default:
                    return null;
            }

            if (!names.Contains(key))
                tree.Errors.Add(new ParseError("Directive attribute '" + key + "' is not supported", 0x1034, node.Nodes[0]));

            return null;
        }

        protected override object EvalExtProduction(ParseTree tree, params object[] paramlist)
        {            
            return Nodes[Nodes.Count - 1].Eval(tree, paramlist);
        }

        protected override object EvalAttribute(ParseTree tree, params object[] paramlist)
        {
            Grammar grammar = (Grammar)paramlist[0];
            Symbol symbol = (Symbol)paramlist[1];
            GrammarNode node = (GrammarNode)paramlist[2];

            if (symbol.Attributes.ContainsKey(node.Nodes[1].Token.Text))
            {
                tree.Errors.Add(new ParseError("Attribute already defined for this symbol: " + node.Nodes[1].Token.Text, 0x1039, node.Nodes[1]));
                return null;
            }

            symbol.Attributes.Add(node.Nodes[1].Token.Text, (object[])EvalParams(tree, new object[] { node }));
            switch (node.Nodes[1].Token.Text)
            {
                case "Skip":
                    if (symbol is TerminalSymbol)
                        grammar.SkipSymbols.Add(symbol);
                    else
                        tree.Errors.Add(new ParseError("Attribute for Non terminal rule not allowed: " + node.Nodes[1].Token.Text, 0x1035, 0, node.Token.StartPos, node.Token.StartPos, node.Token.Length));
                    break;
                case "Color":
                    if (symbol is NonTerminalSymbol)
                        tree.Errors.Add(new ParseError("Attribute for Non terminal rule not allowed: " + node.Nodes[1].Token.Text, 0x1035, 0, node.Token.StartPos, node.Token.StartPos, node.Token.Length));

                    if (symbol.Attributes["Color"].Length != 1 && symbol.Attributes["Color"].Length != 3)
                        tree.Errors.Add(new ParseError("Attribute " + node.Nodes[1].Token.Text + " has too many or missing parameters" , 0x103A, node.Nodes[1]));


                    for (int i = 0; i < symbol.Attributes["Color"].Length; i++ )
                    {
                        if (symbol.Attributes["Color"][i] is string)
                        {
                            tree.Errors.Add(new ParseError("Parameter " + node.Nodes[3].Nodes[i * 2].Nodes[0].Token.Text + " is of incorrect type", 0x103A, node.Nodes[3].Nodes[i * 2].Nodes[0]));
                            break;
                        }
                    }
                    break;
                default:
                    tree.Errors.Add(new ParseError("Attribute not supported: " + node.Nodes[1].Token.Text, 0x1036, node.Nodes[1]));
                    break;
            }

            return symbol;
        }

        protected override object EvalParams(ParseTree tree, params object[] paramlist)
        {
            GrammarNode node = (GrammarNode)paramlist[0];
            if (node.Nodes.Count < 4) return null;
            if (node.Nodes[3].Token.Type != TokenType.Params ) return null;

            GrammarNode parms = (GrammarNode)node.Nodes[3];
            List<object> objects = new List<object>();
            for (int i = 0; i < parms.Nodes.Count; i += 2)
            {
                objects.Add(EvalParam(tree, new object[] { parms.Nodes[i] }));
            }
            return objects.ToArray();
        }

        protected override object EvalParam(ParseTree tree, params object[] paramlist)
        {
            GrammarNode node = (GrammarNode)paramlist[0];
            try
            {
                switch (node.Nodes[0].Token.Type)
                {
                    case TokenType.STRING:
                        return node.Nodes[0].Token.Text;
                    case TokenType.INTEGER:
                        return Convert.ToInt32(node.Nodes[0].Token.Text);
                    case TokenType.HEX:
                        return long.Parse(node.Nodes[0].Token.Text.Substring(2), System.Globalization.NumberStyles.HexNumber);
                    default:
                        tree.Errors.Add(new ParseError("Attribute parameter is not a valid value: " + node.Token.Text, 0x1037, 0, node.Token.StartPos, node.Token.StartPos, node.Token.Length));
                        break;
                }
            }
            catch (Exception)
            {
                tree.Errors.Add(new ParseError("Attribute parameter is not a valid value: " + node.Token.Text, 0x1038, node));
            }
            return null;
        }

        protected override object EvalProduction(ParseTree tree, params object[] paramlist)
        {
            Grammar g = (Grammar)paramlist[0];
            if (Nodes[2].Nodes[0].Token.Type == TokenType.STRING)
            {
                TerminalSymbol term = g.Symbols.Find(Nodes[0].Token.Text) as TerminalSymbol;
                if (term == null)
                    tree.Errors.Add(new ParseError("Symbol '" + Nodes[0].Token.Text + "' is not declared. ", 0x1040, Nodes[0]));
            }
            else
            {
                NonTerminalSymbol nts = g.Symbols.Find(Nodes[0].Token.Text) as NonTerminalSymbol;
                if (nts == null)
                    tree.Errors.Add(new ParseError("Symbol '" + Nodes[0].Token.Text + "' is not declared. ", 0x1041, Nodes[0]));
                Rule r = (Rule)Nodes[2].Eval(tree, g, nts);
                if (nts != null)
                    nts.Rules.Add(r);

                if (Nodes[3].Token.Type == TokenType.CODEBLOCK)
                {
                    string codeblock = Nodes[3].Token.Text;
                    nts.CodeBlock = codeblock;
                    ValidateCodeBlock(tree, nts, Nodes[3]);

                    // beautify the codeblock format
                    codeblock = codeblock.Substring(1, codeblock.Length - 3).Trim();
                    nts.CodeBlock = codeblock;

                }

            }
            return g;
        }

        protected override object EvalRule(ParseTree tree, params object[] paramlist)
        {
            return Nodes[0].Eval(tree, paramlist);
        }

        protected override object EvalSubrule(ParseTree tree, params object[] paramlist)
        {
            if (Nodes.Count == 1) // single symbol
                return Nodes[0].Eval(tree, paramlist);

            Rule choiceRule = new Rule(RuleType.Choice);
            // i+=2 to skip to the | symbols
            for (int i = 0; i < Nodes.Count; i += 2)
            {
                Rule rule = (Rule)Nodes[i].Eval(tree, paramlist);
                choiceRule.Rules.Add(rule);
            }

            return choiceRule;
        }

        protected override object EvalConcatRule(ParseTree tree, params object[] paramlist)
        {
            if (Nodes.Count == 1) // single symbol
                return Nodes[0].Eval(tree, paramlist);

            Rule concatRule = new Rule(RuleType.Concat);
            for (int i = 0; i < Nodes.Count; i++)
            {
                Rule rule = (Rule)Nodes[i].Eval(tree, paramlist);
                concatRule.Rules.Add(rule);
            }

            return concatRule;
        }


        protected override object EvalSymbol(ParseTree tree, params object[] paramlist)
        {
            ParseNode last = Nodes[Nodes.Count - 1];
            if (last.Token.Type == TokenType.UNARYOPER)
            {
                Rule unaryRule;
                string oper = last.Token.Text.Trim();
                if (oper == "*") unaryRule = new Rule(RuleType.ZeroOrMore);
                else if (oper == "+") unaryRule = new Rule(RuleType.OneOrMore);
                else if (oper == "?") unaryRule = new Rule(RuleType.Option);
                else throw new NotImplementedException("unknown unary operator");

                if (Nodes[0].Token.Type == TokenType.BRACKETOPEN)
                {
                    Rule rule = (Rule) Nodes[1].Eval(tree, paramlist);
                    unaryRule.Rules.Add(rule);
                }
                else
                {
                    Grammar g = (Grammar)paramlist[0];
                    if (Nodes[0].Token.Type == TokenType.IDENTIFIER)
                    {

                        Symbol s = g.Symbols.Find(Nodes[0].Token.Text);
                        if (s == null)
                        {
                            tree.Errors.Add(new ParseError("Symbol '" + Nodes[0].Token.Text + "' is not declared. ", 0x1042, Nodes[0]));
                        }
                        Rule r = new Rule(s);
                        unaryRule.Rules.Add(r);
                    }
                }
                return unaryRule;
            }

            if (Nodes[0].Token.Type == TokenType.BRACKETOPEN)
            { 
                // create subrule syntax tree
                return Nodes[1].Eval(tree, paramlist);
            }
            else
            {
                Grammar g = (Grammar)paramlist[0];
                Symbol s = (Symbol)g.Symbols.Find(Nodes[0].Token.Text);
                if (s == null)
                {
                    tree.Errors.Add(new ParseError("Symbol '" + Nodes[0].Token.Text + "' is not declared.", 0x1043, Nodes[0]));
                }
                return new Rule(s);
            }
        }

        /// <summary>
        /// validates whether $ variables are corresponding to valid symbols
        /// errors are added to the tree Error object.
        /// </summary>
        /// <param name="nts">non terminal and its production rule</param>
        /// <returns>a formated codeblock</returns>
        private void ValidateCodeBlock(ParseTree tree, NonTerminalSymbol nts, ParseNode node)
        {
            if (nts == null) return;
            string codeblock = nts.CodeBlock;

            Regex var = new Regex(@"\$(?<var>[a-zA-Z_0-9]+)(\[(?<index>[^]]+)\])?", RegexOptions.Compiled);

            Symbols symbols = nts.DetermineProductionSymbols();


            MatchCollection matches = var.Matches(codeblock);
            foreach (Match match in matches)
            {
                Symbol s = symbols.Find(match.Groups["var"].Value);
                if (s == null)
                {
                    tree.Errors.Add(new ParseError("Variable $" + match.Groups["var"].Value + " cannot be matched.", 0x1016, 0, node.Token.StartPos + match.Groups["var"].Index, node.Token.StartPos + match.Groups["var"].Index, match.Groups["var"].Length));
                    break; // error situation
                }
            }
        }

    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect Rubicon
Netherlands Netherlands
Currently Herre Kuijpers is employed at Rubicon. During his career he developed skills with all kinds of technologies, methodologies and programming languages such as c#, ASP.Net, .Net Core, VC++, Javascript, SQL, Agile, Scrum, DevOps, ALM. Currently he fulfills the role of software architect in various projects.

Herre Kuijpers is a very experienced software architect with deep knowledge of software design and development on the Microsoft .Net platform. He has a broad knowledge of Microsoft products and knows how these, in combination with custom software, can be optimally implemented in the often complex environment of the customer.

Comments and Discussions