Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

A Simple Compiler for the Common Language Runtime

, 11 May 2003
An end-to-end example of a bottom up LALR(1) compiler for a fictitious language targeting the Common Language Runtime
compiler_demo.zip
ICSharpCode.TextEditor.dll
Images.bmp
Magic IDE.exe
MagicLibrary.DLL
Sharp.cgt
UtilityLibrary.dll
Bubble Sort
Bubble Sort.shp
Config.dck
mike.exe
Sort.exe
Sort.pdb
Sort.sh
Config.dck
Core.dll
compiler_src.zip
Compiler.sln.old
Compiler.suo
Core
App.ico
bin
Core.csproj.user
Core.suo
Magic IDE
App.ico
bin
Debug
Config.dck
Images.bmp
Sharp.cgt
ICSharpCode.TextEditor.dll
Magic IDE.csproj.user
Magic IDE.suo
MagicLibrary.DLL
UtilityLibrary.dll
/*
Sharp Compiler
Copyright (C) 2003  Michael Bebenita

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
using System;
using System.IO;
using System.Text;
using System.Collections;
using System.Diagnostics;

namespace Core
{
	public class Language
	{
		private State[] m_States = null;
		private Production[] m_Productions = null;
		private ParserState [] m_ParserStates = null;
		private Symbol[] m_Symbols = null;

		private State m_StartState = null;
		private ParserState m_ParserStartState = null;

		public static Language FromFile(string path)
		{
			Language language = new Language();

			Grammar grammar = Grammar.FromFile(path);
		
			//
			// Create Symbols
			//

			language.m_Symbols = new Symbol[grammar.SymbolTable.Length];
			for(int i = 0; i < grammar.SymbolTable.Length; i++)
			{
				Symbol newSymbol = new Symbol();
				newSymbol.Name = grammar.SymbolTable[i].Name;
				newSymbol.Type = (SymbolType)grammar.SymbolTable[i].Kind;
				language.m_Symbols[i] = newSymbol;
			}


			//
			// Create DFA states.
			//

			language.m_States = new State[grammar.DFATable.Length];
			for(int i = 0; i < grammar.DFATable.Length; i++)
			{
				language.m_States[i] = new State();
				language.m_States[i].ID = i;
			}

			//
			// Create Edges and update states.
			//

			for(int i = 0; i < grammar.DFATable.Length; i++)
			{
				// Update states.
				language.m_States[i].IsAccepting = grammar.DFATable[i].IsAccepting;
				if(language.m_States[i].IsAccepting)
					language.m_States[i].Accepts = language.m_Symbols[grammar.DFATable[i].AcceptIndex];
			
				// Create edges.
				language.m_States[i].Edges = new Edge[grammar.DFATable[i].Edges.Length];
				for(int j = 0; j < grammar.DFATable[i].Edges.Length; j++)
				{
					Edge newEdge = new Edge();
					newEdge.Characters = grammar.CharacterSetTable[grammar.DFATable[i].Edges[j].CharacterSetIndex].Characters;
					newEdge.Target = language.m_States[grammar.DFATable[i].Edges[j].TargetIndex];
					language.m_States[i].Edges[j] = newEdge;
				}
		
			}

			//
			// Create productions.
			//

			language.m_Productions = new Production[grammar.RuleTable.Length];
			for(int i = 0;i<grammar.RuleTable.Length;i++)
			{
				Production production = new Production();
				production.m_ID = i;
				production.Left = language.m_Symbols[grammar.RuleTable[i].Nonterminal];
				production.Right = new Symbol[grammar.RuleTable[i].Symbols.Length];
				for(int j = 0; j < grammar.RuleTable[i].Symbols.Length;j++)
				{
					production.Right[j] = language.m_Symbols[grammar.RuleTable[i].Symbols[j]];
				}
				language.m_Productions[i] = production ;
			}

			//
			// Create parser states.
			//

			language.m_ParserStates = new ParserState[grammar.LALRTable.Length];

			for(int i = 0;i<grammar.LALRTable.Length;i++)
				language.m_ParserStates[i] = new ParserState();;

			for(int i = 0;i<grammar.LALRTable.Length;i++)
			{
				ParserState state = language.m_ParserStates[i];
 				state.Actions = new Action[grammar.LALRTable[i].Actions.Length];
				for(int j = 0; j < grammar.LALRTable[i].Actions.Length; j++)
				{
					Action action = new Action();
					action.Symbol = language.m_Symbols[grammar.LALRTable[i].Actions[j].SymbolIndex];
					action.Type = (ActionType)grammar.LALRTable[i].Actions[j].ActionType;
					if((action.Type == ActionType.Shift) || (action.Type == ActionType.Goto))
						action.ParserState = language.m_ParserStates[grammar.LALRTable[i].Actions[j].Target];
					else if(action.Type == ActionType.Reduce)
						action.Production = language.m_Productions[grammar.LALRTable[i].Actions[j].Target];
					state.Actions[j] = action;
				}

				state.m_ID = i;
			}


			language.m_StartState = language.m_States[grammar.InitialDFAState];
			language.m_ParserStartState = language.m_ParserStates[grammar.InitialLALRState];
	
			return language;
		}

		public State[] States
		{
			get{return m_States;}
		}

		public ParserState[] ParserStates
		{
			get{return m_ParserStates;}
		}

		public Production[] Productions
		{
			get{return m_Productions;}
		}

		public Symbol[] Symbols
		{
			get{return m_Symbols;}
		}

		public State StartState
		{
			get{return m_StartState;}
		}

		public ParserState ParserStartState
		{
			get{return m_ParserStartState;}
		}

	
		public void ApplySemantic(Production production, Stack syntaxStack)
		{
			switch(production.m_ID)
			{
				case 0:
					break;
			}

		}

	}

	public class Grammar
	{
		internal class CharacterSet
		{
			public short Index;
			public string Characters;
		}

		internal class Symbol
		{
			public short Index;
			public string Name;
			public short Kind;
		}

		internal class Rule
		{
			public short Index;
			public short Nonterminal;
			public short [] Symbols = null;
		}

		internal class Edge
		{
			public short CharacterSetIndex;
			public short TargetIndex;
		}

		internal class DFA
		{
			public short Index;
			public bool IsAccepting;
			public short AcceptIndex;
			public Edge [] Edges;
		}

		internal class Action
		{
			public short SymbolIndex;
			public short ActionType;
			public short Target;
		}

		internal class LALR
		{
			public short Index;
			public Action [] Actions;
		}

		private static BinaryReader m_Reader = null;
		private static Encoding m_Encoding = null;

		public static Grammar FromFile(string path)
		{
			m_Encoding = new UnicodeEncoding(false, true);
			m_Reader = new BinaryReader(new FileStream(path,FileMode.Open));

			if(ReadString() != "GOLD Parser Tables/v1.0")
			{
				m_Reader.Close();
				throw new Exception("Invalid file format.");
			}

			Grammar grammar = new Grammar();

			int count;

			//
			// Read Records
			//
			
			try
			{
				while(m_Reader.ReadByte() == (byte)'M')
				{
				
					Int16 entries = m_Reader.ReadInt16();

					//
					// Read entry
					//

					m_Reader.ReadByte();
					switch(m_Reader.ReadByte())
					{
						case (byte)'P':
							m_Reader.ReadByte();
							grammar.Name = ReadString();
							m_Reader.ReadByte();
							grammar.Version = ReadString();
							m_Reader.ReadByte();
							grammar.Author = ReadString();
							m_Reader.ReadByte();
							grammar.About = ReadString();
							m_Reader.ReadByte();
							grammar.IsCaseSensitive = !(m_Reader.ReadByte() == 0);
							m_Reader.ReadByte();
							grammar.StartSymbol = m_Reader.ReadInt16();
							break;

						case (byte)'T':
						
							m_Reader.ReadByte();
							grammar.SymbolTable = new Symbol[m_Reader.ReadInt16()];

							m_Reader.ReadByte();
							grammar.CharacterSetTable = new CharacterSet[m_Reader.ReadInt16()];

							m_Reader.ReadByte();
							grammar.RuleTable = new Rule[m_Reader.ReadInt16()];

							m_Reader.ReadByte();
							grammar.DFATable = new DFA[m_Reader.ReadInt16()];

							m_Reader.ReadByte();
							grammar.LALRTable = new LALR[m_Reader.ReadInt16()];

							break;

						case (byte)'C':

						{
							CharacterSet item = new CharacterSet();

							m_Reader.ReadByte();
							item.Index = m_Reader.ReadInt16();

							m_Reader.ReadByte();
							item.Characters = ReadString();

							grammar.CharacterSetTable[item.Index] = item;

						}
						
							break;

						case (byte)'R':

						{
							Rule item = new Rule();

							m_Reader.ReadByte();
							item.Index = m_Reader.ReadInt16();

							m_Reader.ReadByte();
							item.Nonterminal = m_Reader.ReadInt16();

							m_Reader.ReadByte();

							//
							// Read Symbol Indices
							//

							item.Symbols = new short[entries - 4];
							for(int x = 0; x < item.Symbols.Length; x++)
							{
								m_Reader.ReadByte();
								item.Symbols[x] = m_Reader.ReadInt16();
							}

							grammar.RuleTable[item.Index] = item;
						}

							break;

						case (byte)'S':

						{
							Symbol item = new Symbol();

							m_Reader.ReadByte();
							item.Index = m_Reader.ReadInt16();

							m_Reader.ReadByte();
							item.Name = ReadString();

							m_Reader.ReadByte();
							item.Kind = m_Reader.ReadInt16();

							grammar.SymbolTable[item.Index] = item;
						}


							break;

						case (byte)'D':

						
						{
							DFA item = new DFA();

							m_Reader.ReadByte();
							item.Index = m_Reader.ReadInt16();

							m_Reader.ReadByte();
							item.IsAccepting = !(m_Reader.ReadByte() == 0);

							m_Reader.ReadByte();
							item.AcceptIndex = m_Reader.ReadInt16();

							m_Reader.ReadByte();

							//
							// Read Edges
							//

							item.Edges = new Edge[(entries - 5) / 3];

							for(int x = 0; x < item.Edges.Length; x++)
							{									
								Edge edge = new Edge();
								
								m_Reader.ReadByte();
								edge.CharacterSetIndex = m_Reader.ReadInt16();

								m_Reader.ReadByte();
								edge.TargetIndex = m_Reader.ReadInt16();

								m_Reader.ReadByte();

								item.Edges[x] = edge;
							}

							grammar.DFATable[item.Index] = item;
						}

							break;

						case (byte)'L':

						
						{
							LALR item = new LALR();

							m_Reader.ReadByte();
							item.Index = m_Reader.ReadInt16();

							m_Reader.ReadByte();

							//
							// Read Actions
							//

							item.Actions = new Action[(entries - 3)/4];

							for(int x = 0; x < item.Actions.Length; x++)
							{									
								Action action = new Action();
								
								m_Reader.ReadByte();
								action.SymbolIndex = m_Reader.ReadInt16();

								m_Reader.ReadByte();
								action.ActionType = m_Reader.ReadInt16();

								m_Reader.ReadByte();
								action.Target = m_Reader.ReadInt16();

								m_Reader.ReadByte();
								
								item.Actions[x] = action;
							}

							grammar.LALRTable[item.Index] = item;
						}

							break;

						case (byte)'I':
							m_Reader.ReadByte();
							grammar.InitialDFAState = m_Reader.ReadInt16();
							m_Reader.ReadByte();
							grammar.InitialLALRState = m_Reader.ReadInt16();
							break;
					}
				
				}
			}
			catch(Exception x)
			{

			}

			return grammar;

		}

		private static string ReadString()
		{
			int index = 0;
			byte[] buffer = new byte[1024];

			while(true)
			{
				m_Reader.Read(buffer,index,2);
				if(buffer[index] == 0)
					break;
				index += 2;
			}

			return m_Encoding.GetString(buffer,0,index);
		}

		public string Name;
		public string Version;
		public string Author;
		public string About;
		public bool IsCaseSensitive;
		public Int16 StartSymbol;

		internal CharacterSet [] CharacterSetTable = null;
		internal Symbol [] SymbolTable = null;
		internal Rule [] RuleTable = null;
		internal DFA [] DFATable = null;
		internal LALR [] LALRTable = null;


		public short InitialDFAState;
		public short InitialLALRState;

		private Grammar()
		{
			
		}

	}

}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

Michael Bebenita
Web Developer
United States United States
Currently a graduate student at UCI.

| Advertise | Privacy | Terms of Use | Mobile
Web04 | 2.8.150414.1 | Last Updated 12 May 2003
Article Copyright 2003 by Michael Bebenita
Everything else Copyright © CodeProject, 1999-2015
Layout: fixed | fluid