Click here to Skip to main content
15,884,010 members
Articles / Programming Languages / Visual Basic

A Tiny Parser Generator v1.2

Rate me:
Please Sign up or sign in to vote.
4.94/5 (201 votes)
21 Sep 2010CPOL25 min read 661.8K   17.5K   465  
@TinyPG is a utility that makes it easier to write and try out your own parser/compiler
// Copyright 2008 - 2010 Herre Kuijpers - <herre.kuijpers@gmail.com>
//
// This source file(s) may be redistributed, altered and customized
// by any means PROVIDING the authors name and all copyright
// notices remain intact.
// THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED. USE IT AT YOUR OWN RISK. THE AUTHOR ACCEPTS NO
// LIABILITY FOR ANY DATA DAMAGE/LOSS THAT THIS PRODUCT MAY CAUSE.
//-----------------------------------------------------------------------
// version 0.4
using System;
using System.Collections.Generic;

namespace TinyPG.Compiler
{
    #region Parser

    public partial class Parser 
    {
        private Scanner scanner;
        private ParseTree tree;

        public Parser(Scanner scanner)
        {
            this.scanner = scanner;
        }

        public ParseTree Parse(string input)
        {
            tree = new ParseTree();
            return Parse(input, tree);
        }

        public ParseTree Parse(string input, ParseTree tree)
        {
            scanner.Init(input);

            this.tree = tree;
            ParseStart(tree);
            tree.Skipped = scanner.Skipped;

            return tree;
        }

		private void ParseStart(ParseNode node) // NonTerminalSymbol: Start
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.Start), "Start");
			node.Nodes.Add(n);
			node = n;


			 // Concat Rule
			tok = scanner.LookAhead(); // ZeroOrMore Rule
			while (tok.Type == TokenType.WHITESPACE
				|| tok.Type == TokenType.ATTRIBUTESKIP
				|| tok.Type == TokenType.IDENTIFIER)
			{

				 // Concat Rule
				tok = scanner.LookAhead(); // ZeroOrMore Rule
				while (tok.Type == TokenType.WHITESPACE)
				{
					tok = scanner.Scan(); // Terminal Rule: WHITESPACE
					if (tok.Type != TokenType.WHITESPACE)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.WHITESPACE.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					tok = scanner.LookAhead(); // ZeroOrMore Rule
				}

				 // Concat Rule
				tok = scanner.LookAhead(); // Option Rule
				if (tok.Type == TokenType.ATTRIBUTESKIP
					|| tok.Type == TokenType.IDENTIFIER)
				{
					ParseExtProduction(node); // NonTerminal Rule: ExtProduction
				}
				tok = scanner.LookAhead(); // ZeroOrMore Rule
			}

			 // Concat Rule
			tok = scanner.Scan(); // Terminal Rule: EOF
			if (tok.Type != TokenType.EOF)
				tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.EOF.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
			n = node.CreateNode(tok, tok.ToString() );
			node.Token.UpdateRange(tok);
			node.Nodes.Add(n);

		} // NonTerminalSymbol: Start

		private void ParseExtProduction(ParseNode node) // NonTerminalSymbol: ExtProduction
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.ExtProduction), "ExtProduction");
			node.Nodes.Add(n);
			node = n;


			 // Concat Rule
			tok = scanner.LookAhead(); // ZeroOrMore Rule
			while (tok.Type == TokenType.ATTRIBUTESKIP)
			{
				ParseAttribute(node); // NonTerminal Rule: Attribute
				tok = scanner.LookAhead(); // ZeroOrMore Rule
			}

			 // Concat Rule
			ParseProduction(node); // NonTerminal Rule: Production

		} // NonTerminalSymbol: ExtProduction

		private void ParseAttribute(ParseNode node) // NonTerminalSymbol: Attribute
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.Attribute), "Attribute");
			node.Nodes.Add(n);
			node = n;

			tok = scanner.Scan(); // Terminal Rule: ATTRIBUTESKIP
			if (tok.Type != TokenType.ATTRIBUTESKIP)
				tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.ATTRIBUTESKIP.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
			n = node.CreateNode(tok, tok.ToString() );
			node.Token.UpdateRange(tok);
			node.Nodes.Add(n);

		} // NonTerminalSymbol: Attribute

		private void ParseProduction(ParseNode node) // NonTerminalSymbol: Production
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.Production), "Production");
			node.Nodes.Add(n);
			node = n;


			 // Concat Rule
			tok = scanner.Scan(); // Terminal Rule: IDENTIFIER
			if (tok.Type != TokenType.IDENTIFIER)
				tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.IDENTIFIER.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
			n = node.CreateNode(tok, tok.ToString() );
			node.Token.UpdateRange(tok);
			node.Nodes.Add(n);

			 // Concat Rule
			tok = scanner.Scan(); // Terminal Rule: ARROW
			if (tok.Type != TokenType.ARROW)
				tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.ARROW.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
			n = node.CreateNode(tok, tok.ToString() );
			node.Token.UpdateRange(tok);
			node.Nodes.Add(n);

			 // Concat Rule
			ParseRule(node); // NonTerminal Rule: Rule

			 // Concat Rule
			tok = scanner.LookAhead();
			switch (tok.Type)
			{ // Choice Rule
				case TokenType.CODEBLOCK:
					tok = scanner.Scan(); // Terminal Rule: CODEBLOCK
					if (tok.Type != TokenType.CODEBLOCK)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.CODEBLOCK.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					break;
				case TokenType.SEMICOLON:
					tok = scanner.Scan(); // Terminal Rule: SEMICOLON
					if (tok.Type != TokenType.SEMICOLON)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.SEMICOLON.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					break;
				default:
					tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found.", 0, tok.Start, tok.Start, tok.End - tok.Start));
					break;
			} // Choice Rule

		} // NonTerminalSymbol: Production

		private void ParseRule(ParseNode node) // NonTerminalSymbol: Rule
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.Rule), "Rule");
			node.Nodes.Add(n);
			node = n;

			tok = scanner.LookAhead();
			switch (tok.Type)
			{ // Choice Rule
				case TokenType.REGEX:
					tok = scanner.Scan(); // Terminal Rule: REGEX
					if (tok.Type != TokenType.REGEX)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.REGEX.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					break;
				case TokenType.IDENTIFIER:
				case TokenType.BRACKETOPEN:
				case TokenType.UNARYOPER:
					ParseSubrule(node); // NonTerminal Rule: Subrule
					break;
				default:
					tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found.", 0, tok.Start, tok.Start, tok.End - tok.Start));
					break;
			} // Choice Rule

		} // NonTerminalSymbol: Rule

		private void ParseSubrule(ParseNode node) // NonTerminalSymbol: Subrule
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.Subrule), "Subrule");
			node.Nodes.Add(n);
			node = n;


			 // Concat Rule
			ParseSymbol(node); // NonTerminal Rule: Symbol

			 // Concat Rule
			tok = scanner.LookAhead(); // ZeroOrMore Rule
			while (tok.Type == TokenType.WHITESPACE
				|| tok.Type == TokenType.PIPE)
			{

				 // Concat Rule
				ParseBinaryOper(node); // NonTerminal Rule: BinaryOper

				 // Concat Rule
				ParseSymbol(node); // NonTerminal Rule: Symbol
				tok = scanner.LookAhead(); // ZeroOrMore Rule
			}

		} // NonTerminalSymbol: Subrule

		private void ParseSymbol(ParseNode node) // NonTerminalSymbol: Symbol
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.Symbol), "Symbol");
			node.Nodes.Add(n);
			node = n;


			 // Concat Rule
			tok = scanner.LookAhead();
			switch (tok.Type)
			{ // Choice Rule
				case TokenType.IDENTIFIER:
					tok = scanner.Scan(); // Terminal Rule: IDENTIFIER
					if (tok.Type != TokenType.IDENTIFIER)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.IDENTIFIER.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					break;
				case TokenType.BRACKETOPEN:

					 // Concat Rule
					tok = scanner.Scan(); // Terminal Rule: BRACKETOPEN
					if (tok.Type != TokenType.BRACKETOPEN)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.BRACKETOPEN.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);

					 // Concat Rule
					ParseSubrule(node); // NonTerminal Rule: Subrule

					 // Concat Rule
					tok = scanner.Scan(); // Terminal Rule: BRACKETCLOSE
					if (tok.Type != TokenType.BRACKETCLOSE)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.BRACKETCLOSE.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					break;
				default:
					tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found.", 0, tok.Start, tok.Start, tok.End - tok.Start));
					break;
			} // Choice Rule

			 // Concat Rule
			tok = scanner.LookAhead(); // Option Rule
			if (tok.Type == TokenType.UNARYOPER)
			{
				tok = scanner.Scan(); // Terminal Rule: UNARYOPER
				if (tok.Type != TokenType.UNARYOPER)
					tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.UNARYOPER.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
				n = node.CreateNode(tok, tok.ToString() );
				node.Token.UpdateRange(tok);
				node.Nodes.Add(n);
			}

		} // NonTerminalSymbol: Symbol

		private void ParseBinaryOper(ParseNode node) // NonTerminalSymbol: BinaryOper
		{
			Token tok;
			ParseNode n = node.CreateNode(scanner.GetToken(TokenType.BinaryOper), "BinaryOper");
			node.Nodes.Add(n);
			node = n;

			tok = scanner.LookAhead();
			switch (tok.Type)
			{ // Choice Rule
				case TokenType.WHITESPACE:
					tok = scanner.Scan(); // Terminal Rule: WHITESPACE
					if (tok.Type != TokenType.WHITESPACE)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.WHITESPACE.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					break;
				case TokenType.PIPE:
					tok = scanner.Scan(); // Terminal Rule: PIPE
					if (tok.Type != TokenType.PIPE)
						tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found. Expected " + TokenType.PIPE.ToString(), 0, tok.Start, tok.Start, tok.End - tok.Start));
					n = node.CreateNode(tok, tok.ToString() );
					node.Token.UpdateRange(tok);
					node.Nodes.Add(n);
					break;
				default:
					tree.Errors.Add(new ParseError("Unexpected token '" + tok.Text.Replace("\n", "") + "' found.", 0, tok.Start, tok.Start, tok.End - tok.Start));
					break;
			} // Choice Rule

		} // NonTerminalSymbol: BinaryOper


    }

    #endregion Parser
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect Rubicon
Netherlands Netherlands
Currently Herre Kuijpers is employed at Rubicon. During his career he developed skills with all kinds of technologies, methodologies and programming languages such as c#, ASP.Net, .Net Core, VC++, Javascript, SQL, Agile, Scrum, DevOps, ALM. Currently he fulfills the role of software architect in various projects.

Herre Kuijpers is a very experienced software architect with deep knowledge of software design and development on the Microsoft .Net platform. He has a broad knowledge of Microsoft products and knows how these, in combination with custom software, can be optimally implemented in the often complex environment of the customer.

Comments and Discussions