Click here to Skip to main content
15,886,026 members
Articles / Programming Languages / Visual Basic

A Tiny Parser Generator v1.2

Rate me:
Please Sign up or sign in to vote.
4.94/5 (201 votes)
21 Sep 2010CPOL25 min read 662.5K   17.5K   465  
@TinyPG is a utility that makes it easier to write and try out your own parser/compiler
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using TinyPG;
using TinyPG.Compiler;

namespace TinyPG.CodeGenerators.CSharp
{
    public class ParserGenerator : ICodeGenerator
    {
        internal ParserGenerator()
        {
        }

        public string FileName
        {
            get { return "Parser.cs"; }
        }

        public string Generate(Grammar Grammar, bool Debug)
        {
            if (string.IsNullOrEmpty(Grammar.GetTemplatePath()))
                return null;

            // generate the parser file
            StringBuilder parsers = new StringBuilder();
            string parser = File.ReadAllText(Grammar.GetTemplatePath() + FileName);

            // build non terminal tokens
            foreach (NonTerminalSymbol s in Grammar.GetNonTerminals())
            {
                string method = GenerateParseMethod(s);
                parsers.Append(method);
            }


            if (Debug)
            {
                parser = parser.Replace(@"<%Namespace%>", "TinyPG.Debug");
                parser = parser.Replace(@"<%IParser%>", " : TinyPG.Debug.IParser");
                parser = parser.Replace(@"<%IParseTree%>", "TinyPG.Debug.IParseTree");

            }
            else
            {
                parser = parser.Replace(@"<%Namespace%>", Grammar.Directives["TinyPG"]["Namespace"]);
                parser = parser.Replace(@"<%IParser%>", "");
                parser = parser.Replace(@"<%IParseTree%>", "ParseTree");
            }

            parser = parser.Replace(@"<%ParseNonTerminals%>", parsers.ToString());
            return parser;
        }

        // generates the method header and body
        private string GenerateParseMethod(NonTerminalSymbol s)
        {

            StringBuilder sb = new StringBuilder();
            sb.AppendLine("        private void Parse" + s.Name + "(ParseNode parent)" + Helper.AddComment("NonTerminalSymbol: " + s.Name));
            sb.AppendLine("        {");
            sb.AppendLine("            Token tok;");
            sb.AppendLine("            ParseNode n;");
            sb.AppendLine("            ParseNode node = parent.CreateNode(scanner.GetToken(TokenType." + s.Name + "), \"" + s.Name + "\");");
            sb.AppendLine("            parent.Nodes.Add(node);");
            sb.AppendLine("");

            foreach (Rule rule in s.Rules)
            {
                sb.AppendLine(GenerateProductionRuleCode(s.Rules[0], 3));
            }

            sb.AppendLine("            parent.Token.UpdateRange(node.Token);");
            sb.AppendLine("        }" + Helper.AddComment("NonTerminalSymbol: " + s.Name));
            sb.AppendLine();
            return sb.ToString();
        }

        // generates the rule logic inside the method body
        private string GenerateProductionRuleCode(Rule r, int indent)
        {
            int i = 0;
            Symbols firsts = null;
            StringBuilder sb = new StringBuilder();
            string Indent = IndentTabs(indent);

            switch (r.Type)
            {
                case RuleType.Terminal:
                    // expecting terminal, so scan it.
                    sb.AppendLine(Indent + "tok = scanner.Scan(TokenType." + r.Symbol.Name + ");" + Helper.AddComment("Terminal Rule: " + r.Symbol.Name));
                    sb.AppendLine(Indent + "n = node.CreateNode(tok, tok.ToString() );");
                    sb.AppendLine(Indent + "node.Token.UpdateRange(tok);");
                    sb.AppendLine(Indent + "node.Nodes.Add(n);");
                    sb.AppendLine(Indent + "if (tok.Type != TokenType." + r.Symbol.Name + ") {");
                    sb.AppendLine(Indent + "    tree.Errors.Add(new ParseError(\"Unexpected token '\" + tok.Text.Replace(\"\\n\", \"\") + \"' found. Expected \" + TokenType." + r.Symbol.Name + ".ToString(), 0x1001, 0, tok.StartPos, tok.StartPos, tok.Length));");
                    sb.AppendLine(Indent + "    return;");
                    sb.AppendLine(Indent + "}");
                    break;
                case RuleType.NonTerminal:
                    sb.AppendLine(Indent + "Parse" + r.Symbol.Name + "(node);" + Helper.AddComment("NonTerminal Rule: " + r.Symbol.Name));
                    break;
                case RuleType.Concat:
                    foreach (Rule rule in r.Rules)
                    {
                        sb.AppendLine();
                        sb.AppendLine(Indent + Helper.AddComment("Concat Rule"));
                        sb.Append(GenerateProductionRuleCode(rule, indent));
                    }
                    break;
                case RuleType.ZeroOrMore:
                    firsts = r.GetFirstTerminals();
                    i = 0;
                    sb.Append(Indent + "tok = scanner.LookAhead(");
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append("TokenType." + s.Name);
                        else
                            sb.Append(", TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(");" + Helper.AddComment("ZeroOrMore Rule"));

                    i = 0;
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append(Indent + "while (tok.Type == TokenType." + s.Name);
                        else
                            sb.Append("\r\n" + Indent + "    || tok.Type == TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(")");
                    sb.AppendLine(Indent + "{");

                    foreach (Rule rule in r.Rules)
                    {
                        sb.Append(GenerateProductionRuleCode(rule, indent + 1));
                    }

                    i = 0;
                    sb.Append(Indent + "tok = scanner.LookAhead(");
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append("TokenType." + s.Name);
                        else
                            sb.Append(", TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(");" + Helper.AddComment("ZeroOrMore Rule"));
                    sb.AppendLine(Indent + "}");
                    break;
                case RuleType.OneOrMore:
                    sb.AppendLine(Indent + "do {" + Helper.AddComment("OneOrMore Rule"));

                    foreach (Rule rule in r.Rules)
                    {
                        sb.Append(GenerateProductionRuleCode(rule, indent + 1));
                    }

                    i = 0;
                    firsts = r.GetFirstTerminals();
                    sb.Append(Indent + "    tok = scanner.LookAhead(");
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append("TokenType." + s.Name);
                        else
                            sb.Append(", TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(");" + Helper.AddComment("OneOrMore Rule"));

                    i = 0;
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append(Indent + "} while (tok.Type == TokenType." + s.Name);
                        else
                            sb.Append("\r\n" + Indent + "    || tok.Type == TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(");" + Helper.AddComment("OneOrMore Rule"));
                    break;
                case RuleType.Option:
                    i = 0;
                    firsts = r.GetFirstTerminals();
                    sb.Append(Indent + "tok = scanner.LookAhead(");
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append("TokenType." + s.Name);
                        else
                            sb.Append(", TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(");" + Helper.AddComment("Option Rule"));

                    i = 0;
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append(Indent + "if (tok.Type == TokenType." + s.Name);
                        else
                            sb.Append("\r\n" + Indent + "    || tok.Type == TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(")");
                    sb.AppendLine(Indent + "{");

                    foreach (Rule rule in r.Rules)
                    {
                        sb.Append(GenerateProductionRuleCode(rule, indent + 1));
                    }
                    sb.AppendLine(Indent + "}");
                    break;
                case RuleType.Choice:
                    i = 0;
                    firsts = r.GetFirstTerminals();
                    sb.Append(Indent + "tok = scanner.LookAhead(");
                    foreach (TerminalSymbol s in firsts)
                    {
                        if (i == 0)
                            sb.Append("TokenType." + s.Name);
                        else
                            sb.Append(", TokenType." + s.Name);
                        i++;
                    }
                    sb.AppendLine(");" + Helper.AddComment("Choice Rule"));

                    sb.AppendLine(Indent + "switch (tok.Type)");
                    sb.AppendLine(Indent + "{" + Helper.AddComment("Choice Rule"));
                    foreach (Rule rule in r.Rules)
                    {
                        foreach (TerminalSymbol s in rule.GetFirstTerminals())
                        {
                            sb.AppendLine(Indent + "    case TokenType." + s.Name + ":");
                        }
                        sb.Append(GenerateProductionRuleCode(rule, indent + 2));
                        sb.AppendLine(Indent + "        break;");
                    }
                    sb.AppendLine(Indent + "    default:");
                    sb.AppendLine(Indent + "        tree.Errors.Add(new ParseError(\"Unexpected token '\" + tok.Text.Replace(\"\\n\", \"\") + \"' found.\", 0x0002, 0, tok.StartPos, tok.StartPos, tok.Length));");
                    sb.AppendLine(Indent + "        break;");
                    sb.AppendLine(Indent + "}" + Helper.AddComment("Choice Rule"));
                    break;
                default:
                    break;
            }
            return sb.ToString();
        }

        // replaces tabs by spaces, so outlining is more consistent
        public static string IndentTabs(int indent)
        {
            string t = "";
            for (int i = 0; i < indent; i++)
                t += "    ";

            return t;
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect Rubicon
Netherlands Netherlands
Currently Herre Kuijpers is employed at Rubicon. During his career he developed skills with all kinds of technologies, methodologies and programming languages such as c#, ASP.Net, .Net Core, VC++, Javascript, SQL, Agile, Scrum, DevOps, ALM. Currently he fulfills the role of software architect in various projects.

Herre Kuijpers is a very experienced software architect with deep knowledge of software design and development on the Microsoft .Net platform. He has a broad knowledge of Microsoft products and knows how these, in combination with custom software, can be optimally implemented in the often complex environment of the customer.

Comments and Discussions