Click here to Skip to main content
15,892,737 members
Articles / Programming Languages / C#

A Simple Compiler for the Common Language Runtime

Rate me:
Please Sign up or sign in to vote.
4.89/5 (86 votes)
11 May 20039 min read 295.7K   5.1K   190  
An end-to-end example of a bottom up LALR(1) compiler for a fictitious language targeting the Common Language Runtime
/*
Sharp Compiler
Copyright (C) 2003  Michael Bebenita

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
using System;

namespace Core
{
	public class State
	{
		private bool m_IsAccepting;
		private Symbol m_Accepts;
		private Edge [] m_Edges;
		private int m_ID;

		/// <summary>
		/// State ID
		/// </summary>
		public int ID
		{
			get{return m_ID;}
			set{m_ID = value;}
		}

		/// <summary>
		/// State is an accepting state.
		/// </summary>
		public bool IsAccepting
		{
			get{return m_IsAccepting;}
			set{m_IsAccepting = value;}
		}

		/// <summary>
		/// The symbol the state accepts if its an accepting state.
		/// </summary>
		public Symbol Accepts
		{
			get{return m_Accepts;}
			set{m_Accepts = value;}
		}

		/// <summary>
		/// Outbound edges.
		/// </summary>
		public Edge[] Edges
		{
			get{return m_Edges;}
			set{m_Edges = value;}
		}

		/// <summary>
		/// Try to move to next state.
		/// </summary>
		public State Move(char c)
		{
			State nextState = new State();
			foreach(Edge edge in m_Edges)
			{
				nextState = edge.Move(c);
				if(nextState != null)
					return nextState;
			}
			return null;
		}
	}

	public class Edge
	{
		private string m_Characters;
		private State m_Target;
		
		/// <summary>
		/// Set of characters that cause an edge transition.
		/// </summary>
		public string Characters
		{
			get{return m_Characters;}
			set{m_Characters = value;}
		}

		/// <summary>
		/// State the edge is pointing to.
		/// </summary>
		public State Target
		{
			get{return m_Target;}
			set{m_Target = value;}
		}

		/// <summary>
		/// Try to move to target state.
		/// </summary>
		/// <param name="c">Character to move on.</param>
		/// <returns>Target state or null if character doesn't exist in the character set.</returns>
		public State Move(char c)
		{
			if(m_Characters.IndexOf(c) != -1)
				return Target;
			else
				return null;
		}
	}

	public enum SymbolType
	{
		Nonterminal = 0,
		Terminal,
		Whitespace,
		/// <summary>
		/// End of source input.
		/// </summary>
		End,
		CommentStart,
		CommentEnd,
		CommentLine,
		Error
	}

	public class Symbol
	{
		protected string m_Name;
		protected SymbolType m_Type;
		protected int m_ID;

		public string Name
		{
			get{return m_Name;}
			set{m_Name = value;}
		}

		public virtual SymbolType Type
		{
			get{return m_Type;}
			set{m_Type = value;}
		}

		public override string ToString()
		{
			if(m_Type == SymbolType.Terminal)
				return m_Name;
			else
				return "<" + m_Name + ">";	
		}
	}

	public enum TokenType
	{
		Eof         = 0,  // (EOF)
		Error       = 1,  // (Error)
		Whitespace  = 2,  // (Whitespace)
		Commentline = 3,  // (Comment Line)
		Identifier  = 4,  // Identifier
	}

	public class Token
	{
		public Symbol Symbol;
		public string Text;
		public int Line;
		public int Column;

		public Token(Symbol symbol)
		{
			Symbol = symbol;
		}

		public override string ToString()
		{
			if(Symbol != null)
				return "Token {" + Symbol.Name + "," + Symbol.Type.ToString() + ",'" + Text + "',[" + Line + "," + Column + "]}";
			else
				return "Token {Error" + "'" + Text + "',[" + Line + "," + Column + "]}";
		}
	}

	public enum ActionType
	{
		Shift = 1,
		Reduce = 2,
		Goto = 3,
		Accept = 4
	}

	public class Action
	{
		private ActionType m_Type;
		private Symbol m_Symbol;

		private ParserState m_ParserState;
		private Production m_Production;

		public ParserState ParserState
		{
			get{return m_ParserState;}
			set{m_ParserState = value;}
		}

		public Production Production
		{
			get{return m_Production;}
			set{m_Production = value;}
		}
		
		public Symbol Symbol
		{
			get{return m_Symbol;}
			set{m_Symbol = value;}
		}

		public ActionType Type
		{
			get{return m_Type;}
			set{m_Type = value;}
		}

	}

	public class ParserState
	{
		public int m_ID;

		private Action [] m_Actions;
	
		public Action Find(Symbol symbol)
		{
			foreach(Action action in m_Actions)
			{
				if(action.Symbol == symbol)
					return action;
			}

			return null;
		}

		public Action [] Actions
		{
			get{return m_Actions;}
			set{m_Actions = value;}
		}
	}

	public class Production
	{
		public int m_ID;
		private Symbol m_Left;
		private Symbol [] m_Right;

		/// <summary>
		/// Left hand side of the production.
		/// </summary>
		public Symbol Left
		{
			get{return m_Left;}
			set{m_Left = value;}
		}

		/// <summary>
		/// Right hand side of the production, terminals and nonterminals.
		/// </summary>
		public Symbol [] Right
		{
			get{return m_Right;}
			set{m_Right = value;}
		}

		public override string ToString()
		{
			string str = "";
			str = m_Left.ToString() + " =>";
			foreach(Symbol symbol in m_Right)
			{	
				str += " " + symbol.ToString();
			}
			return str;
		}
	}
	
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States
Currently a graduate student at UCI.

Comments and Discussions