Click here to Skip to main content
11,437,816 members (42,980 online)
Click here to Skip to main content
Add your own
alternative version

A Simple Compiler for the Common Language Runtime

, 11 May 2003
An end-to-end example of a bottom up LALR(1) compiler for a fictitious language targeting the Common Language Runtime
compiler_demo.zip
ICSharpCode.TextEditor.dll
Images.bmp
Magic IDE.exe
MagicLibrary.DLL
Sharp.cgt
UtilityLibrary.dll
Bubble Sort
Bubble Sort.shp
Config.dck
mike.exe
Sort.exe
Sort.pdb
Sort.sh
Config.dck
Core.dll
compiler_src.zip
Compiler.sln.old
Compiler.suo
Core
App.ico
bin
Core.csproj.user
Core.suo
Magic IDE
App.ico
bin
Debug
Config.dck
Images.bmp
Sharp.cgt
ICSharpCode.TextEditor.dll
Magic IDE.csproj.user
Magic IDE.suo
MagicLibrary.DLL
UtilityLibrary.dll
/*
Sharp Compiler
Copyright (C) 2003  Michael Bebenita

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
using System;
using System.Diagnostics;
using System.Collections;
using System.Text;

namespace Core
{
	public delegate void ParserEventHandler (Token token, string message);

	public class Parser
	{
		private Scanner m_Scanner = null;
		private Language m_Language = null;
	
		public event ParserEventHandler Error;

		public Parser(Scanner scanner, Language language)
		{
			if(scanner == null)
				throw new ArgumentNullException("Scanner");
			if(language == null)
				throw new ArgumentNullException("Language");
			m_Scanner = scanner;
			m_Language = language;
		}

		public ParseTreeNode CreateParseTree()
		{
			// In block comment.
			bool inCommentBlock = false;

			// Parser stack.
			Stack stack = new Stack();

			// Tree stack.
			Stack treeStack = new Stack();

			m_Scanner.Reset();
			ParserState currentState = m_Language.ParserStartState;

			// Push start state.
			stack.Push(currentState);

			while(true)
			{
				
				// Get next token.
				Symbol nextSymbol = m_Scanner.PeekNextToken().Symbol;

				// Ignore whitespace.
				if(nextSymbol.Type == SymbolType.Whitespace)
				{
					m_Scanner.GetNextToken();
					continue;
				}

				// Ignore comment blocks
				if(nextSymbol.Type == SymbolType.CommentStart)
				{
					m_Scanner.GetNextToken();
					inCommentBlock = true;
					continue;
				}

				// Ignore comment blocks
				if(nextSymbol.Type == SymbolType.CommentEnd)
				{
					m_Scanner.GetNextToken();
					inCommentBlock = false;
					continue;
				}

				// Ignore stuff inside comments
				if(inCommentBlock)
				{
					m_Scanner.GetNextToken();
					continue;
				}


				Print(stack);

				// Lookup action out of current state.
				Action action = currentState.Find(nextSymbol);

				// Do we have a parser error ? (Entered an invalid state.)
				if(action == null)
				{
					Debug.WriteLine("Error");
					break;
				}

				// Should we shift token and the next state on the stack.
				else if(action.Type == ActionType.Shift)
				{
					stack.Push(m_Scanner.GetNextToken().Symbol);
					stack.Push(action.ParserState);

					// Push terminal onto tree stack.
					treeStack.Push(new ParseTreeNode(nextSymbol.Name,nextSymbol,null));
				}
				else if(action.Type == ActionType.Goto)
				{
					Debug.WriteLine("Goto");
				}
				// Should we reduce ?
				else if(action.Type == ActionType.Reduce)
				{
					// Pop off the stack however many state-symbol pairs as the Production
					// has Right Terminals,Nonterminals.

					int rightItems = action.Production.Right.Length;
					for(int i = 0;i < rightItems;i++)
					{
						stack.Pop();
						stack.Pop();
					}

					// Find the top of the stack.
					ParserState topState = (ParserState)stack.Peek();
					// Push left hand side of the production.
					stack.Push(action.Production.Left);
					// Find next state by looking up the action for the top of the stack
					// on the Left hand side symbol of the production.
					stack.Push(topState.Find(action.Production.Left).ParserState);

					// Create ParseTreeNode
					ParseTreeNode [] children = new ParseTreeNode[rightItems];
					ParseTreeNode node = new ParseTreeNode(action.Production.Left.Name,action.Production.Left,children);
					for(int i = rightItems - 1;i >= 0;i--)
					{
						children[i] = (ParseTreeNode)treeStack.Pop();
						children[i].Parent = node;
					}
					treeStack.Push(node);
				}
				else if(action.Type == ActionType.Accept)
				{
					Debug.WriteLine("Accept");
					return (ParseTreeNode)treeStack.Pop();
				}
				currentState = (ParserState)stack.Peek();
			}

			return null;
		}


		public Module CreateSyntaxTree()
		{
			// Are we currently in a comment block.
			bool inCommentBlock = false;

			// Parser stack.
			Stack stack = new Stack();

			// Syntax stack.
			SyntaxStack syntaxStack = new SyntaxStack();

			m_Scanner.Reset();
			ParserState currentState = m_Language.ParserStartState;

			// Push start state.
			stack.Push(currentState);

			while(true)
			{
				// Get next token.
				Token nextToken = m_Scanner.PeekNextToken();
				Symbol nextSymbol = nextToken.Symbol;

				// Ignore whitespace.
				if(nextSymbol.Type == SymbolType.Whitespace)
				{
					m_Scanner.GetNextToken();
					continue;
				}

				// Ignore comment blocks
				if(nextSymbol.Type == SymbolType.CommentStart)
				{
					m_Scanner.GetNextToken();
					inCommentBlock = true;
					continue;
				}

				// Ignore comment blocks
				if(nextSymbol.Type == SymbolType.CommentEnd)
				{
					m_Scanner.GetNextToken();
					inCommentBlock = false;
					continue;
				}

				// Ignore stuff inside comments
				if(inCommentBlock)
				{
					m_Scanner.GetNextToken();
					continue;
				}

				Print(stack);
				PrintSyntax(syntaxStack.Stack);

				// Lookup action out of current state.
				Action action = currentState.Find(nextSymbol);

				// Do we have a parser error ? (Entered an invalid state.)
				if(action == null)
				{
					StringBuilder message = new StringBuilder("Token Unexpected, expecting [ ");

					for(int x = 0; x < currentState.Actions.Length; x++)
					{
						if(currentState.Actions[x].Symbol.Type == SymbolType.Terminal)
							message.Append(currentState.Actions[x].Symbol.Name + " ");
					}
					message.Append("]");

					if(Error != null)
						Error(nextToken,message.ToString());
					
					return null;
				}

				// Should we shift token and the next state on the stack.
				else if(action.Type == ActionType.Shift)
				{
					Token token = m_Scanner.GetNextToken();
					stack.Push(token.Symbol);
					syntaxStack.Push(token.Text);
					stack.Push(action.ParserState);
				}
				// Should we reduce ?
				else if(action.Type == ActionType.Reduce)
				{
					// Pop off the stack however many state-symbol pairs as the Production
					// has Right Terminals,Nonterminals.

					int rightItems = action.Production.Right.Length;
					for(int i = 0;i < rightItems;i++)
					{
						stack.Pop();
						stack.Pop();
					}

					// Find the top of the stack.
					ParserState topState = (ParserState)stack.Peek();
					// Push left hand side of the production.
					stack.Push(action.Production.Left);
					// Find next state by looking up the action for the top of the stack
					// on the Left hand side symbol of the production.
					stack.Push(topState.Find(action.Production.Left).ParserState);

					// Apply semantic rule.
					Semantics.Apply(action.Production,syntaxStack);

				}
				else if(action.Type == ActionType.Accept)
				{
					Debug.WriteLine("Accept");
					return (Module)syntaxStack.Pop();
				}
				currentState = (ParserState)stack.Peek();
			}

			return null;
		}


		private void PrintSyntax(Stack stack)
		{
			string str = "Syntax : ";
			object [] items = stack.ToArray();

			for(int i = items.Length - 1;i >= 0;i--)
			{
				str += items[i].ToString()+ " ";
			}

			Debug.WriteLine(str);
		}
		private void Print(Stack stack)
		{
			string str = "Stack : ";
			object [] items = stack.ToArray();

			for(int i = items.Length - 1;i >= 0;i--)
			{
				object item = items[i];
				if(item.GetType() == typeof(ParserState))
				{
					str += ((ParserState)item).m_ID.ToString() + " ";
				}
				else if(item.GetType() == typeof(Symbol))
				{
					str += ((Symbol)item).Name + " ";
				}
			}

			Debug.WriteLine(str);
		}
	}

	/// <summary>
	/// Describes a node in a parse tree.
	/// </summary>
	public class ParseTreeNode
	{
		public string Name;

		public ParseTreeNode Parent = null;
		public readonly ParseTreeNode [] Children = null;
		public readonly Symbol Symbol = null;

		public ParseTreeNode(string name, Symbol symbol, ParseTreeNode [] children)
		{
			Name = name;
			Symbol = symbol;
			Children = children;
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

Michael Bebenita
Web Developer
United States United States
Currently a graduate student at UCI.

| Advertise | Privacy | Terms of Use | Mobile
Web04 | 2.8.150506.1 | Last Updated 12 May 2003
Article Copyright 2003 by Michael Bebenita
Everything else Copyright © CodeProject, 1999-2015
Layout: fixed | fluid