Click here to Skip to main content
15,884,099 members
Articles / Programming Languages / C#

A Simple Compiler for the Common Language Runtime

Rate me:
Please Sign up or sign in to vote.
4.89/5 (86 votes)
11 May 20039 min read 295K   5.1K   190  
An end-to-end example of a bottom up LALR(1) compiler for a fictitious language targeting the Common Language Runtime
/*
Sharp Compiler
Copyright (C) 2003  Michael Bebenita

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
using System;
using System.IO;

namespace Core
{
	public class Scanner
	{
		private string m_Path;
		private char [] m_Buffer;
		private int m_Cursor = -1;
		private Language m_Language;
		private int m_Line = 0, m_Column = 0;

		public Scanner(string path, Language language)
		{
			if(!File.Exists(path))
				throw new FileNotFoundException();
			if(language == null)
				throw new ArgumentNullException("Language");
			m_Path = path;	
			m_Language = language;
			StreamReader m_Reader = File.OpenText(path);
			m_Buffer = m_Reader.ReadToEnd().ToCharArray();
			m_Reader.Close();

			Reset();
		}

		/// <summary>
		/// Peek at the next token.
		/// </summary>
		/// <returns>The next token found.</returns>
		public Token PeekNextToken()
		{
			int save = m_Cursor;
			int saveColumn = m_Column;
			int saveLine = m_Line;
			Token token = GetNextToken();
			m_Cursor = save;
			m_Column = saveColumn;
			m_Line = saveLine;
			return token;
		}

		/// <summary>
		/// Get next token.
		/// </summary>
		/// <returns>The next token found.</returns>
		public Token GetNextToken()
		{
			State currentState = m_Language.StartState;
			State lastAcceptingState = null;
			int tokenStart = m_Cursor + 1;
			int tokenEnd = tokenStart;

			int tokenStartColumn = m_Column;
			int tokenStartLine = m_Line;

			Token result = null;

			//
			// Retrieve one character at a time from the source input and walk through the DFA.
			// when we enter an accepting state save it as the lastAcceptingState and keep walking.
			// If we enter an error state (nextState == null) then return the lastAcceptingState, or
			// a null token if the lastAcceptingState is never set.
			//

			while(true)
			{
				// Don't advance the cursor.
				char nextChar = PeekNextChar();

				// Return an EOF token.
				if(nextChar == (char)0 && (lastAcceptingState == null))
				{
					result = new Token(m_Language.Symbols[0]);
					result.Column = tokenStartColumn;
					result.Line = tokenStartLine;
					break;
				}

				// Get next state from current state on the next character.
				State nextState = currentState.Move(nextChar);
				// If the next state is not an error state move to the next state.
				if(nextState != null)
				{
					// Save accepting state if its accepting.
					if(nextState.IsAccepting)
					{
						lastAcceptingState = nextState;
						tokenEnd = m_Cursor + 2;
					}
					// Move to the next state.
					currentState = nextState; 
					// Advance cursor.
					nextChar = GetNextChar();
				}
				else
				{
					// We have entered an error state. Thus either return the lastAcceptingState or
					// a null token.
					if(lastAcceptingState == null)
					{
						result = new Token(null);
						result.Column = tokenStartColumn;
						result.Line = tokenStartLine;
						result.Text = new string(m_Buffer,tokenStart,tokenEnd - tokenStart);
					}
					else
					{
						result = new Token(lastAcceptingState.Accepts);
						result.Column = tokenStartColumn;
						result.Line = tokenStartLine;
						result.Text = new string(m_Buffer,tokenStart,tokenEnd - tokenStart);
					}
					break;
				}
			}
			return result;
		}

		/// <summary>
		/// Resets the scanner.
		/// </summary>
		public void Reset()
		{
			m_Cursor = -1;
			m_Line = m_Column = 1;
		}

		private char GetChar(int index)
		{
			return (index >= m_Buffer.Length) || (index < 0) ? (char)0 : m_Buffer[index];
		}

		private char GetNextChar()
		{
			char nextChar = GetChar(++m_Cursor);

			if( (nextChar == (char)13) && (PeekNextChar() == (char)10))
			{
				m_Line++;
				m_Column = 0;
			}
			else
				m_Column ++;

			return nextChar;
		}

		private char PeekNextChar()
		{
			return GetChar(m_Cursor + 1);
		}

		private char GetCurrentChar()
		{
			return GetChar(m_Cursor);
		}

	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States
Currently a graduate student at UCI.

Comments and Discussions