Click here to Skip to main content
12,406,124 members (39,395 online)
Click here to Skip to main content

Stats

243.5K views
4.2K downloads
183 bookmarked
Posted

A Simple Compiler for the Common Language Runtime

, 11 May 2003
An end-to-end example of a bottom up LALR(1) compiler for a fictitious language targeting the Common Language Runtime
ICSharpCode.TextEditor.dll
Images.bmp
Magic IDE.exe
MagicLibrary.DLL
Sharp.cgt
UtilityLibrary.dll
Bubble Sort
Bubble Sort.shp
Config.dck
mike.exe
Sort.exe
Sort.pdb
Sort.sh
Config.dck
Core.dll
Compiler.sln.old
Compiler.suo
Core
App.ico
bin
Core.csproj.user
Core.suo
Magic IDE
App.ico
bin
Debug
Config.dck
Images.bmp
Sharp.cgt
ICSharpCode.TextEditor.dll
Magic IDE.csproj.user
Magic IDE.suo
MagicLibrary.DLL
UtilityLibrary.dll
/*
Sharp Compiler
Copyright (C) 2003  Michael Bebenita

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
using System;
using System.IO;

namespace Core
{
	public class Scanner
	{
		private string m_Path;
		private char [] m_Buffer;
		private int m_Cursor = -1;
		private Language m_Language;
		private int m_Line = 0, m_Column = 0;

		public Scanner(string path, Language language)
		{
			if(!File.Exists(path))
				throw new FileNotFoundException();
			if(language == null)
				throw new ArgumentNullException("Language");
			m_Path = path;	
			m_Language = language;
			StreamReader m_Reader = File.OpenText(path);
			m_Buffer = m_Reader.ReadToEnd().ToCharArray();
			m_Reader.Close();

			Reset();
		}

		/// <summary>
		/// Peek at the next token.
		/// </summary>
		/// <returns>The next token found.</returns>
		public Token PeekNextToken()
		{
			int save = m_Cursor;
			int saveColumn = m_Column;
			int saveLine = m_Line;
			Token token = GetNextToken();
			m_Cursor = save;
			m_Column = saveColumn;
			m_Line = saveLine;
			return token;
		}

		/// <summary>
		/// Get next token.
		/// </summary>
		/// <returns>The next token found.</returns>
		public Token GetNextToken()
		{
			State currentState = m_Language.StartState;
			State lastAcceptingState = null;
			int tokenStart = m_Cursor + 1;
			int tokenEnd = tokenStart;

			int tokenStartColumn = m_Column;
			int tokenStartLine = m_Line;

			Token result = null;

			//
			// Retrieve one character at a time from the source input and walk through the DFA.
			// when we enter an accepting state save it as the lastAcceptingState and keep walking.
			// If we enter an error state (nextState == null) then return the lastAcceptingState, or
			// a null token if the lastAcceptingState is never set.
			//

			while(true)
			{
				// Don't advance the cursor.
				char nextChar = PeekNextChar();

				// Return an EOF token.
				if(nextChar == (char)0 && (lastAcceptingState == null))
				{
					result = new Token(m_Language.Symbols[0]);
					result.Column = tokenStartColumn;
					result.Line = tokenStartLine;
					break;
				}

				// Get next state from current state on the next character.
				State nextState = currentState.Move(nextChar);
				// If the next state is not an error state move to the next state.
				if(nextState != null)
				{
					// Save accepting state if its accepting.
					if(nextState.IsAccepting)
					{
						lastAcceptingState = nextState;
						tokenEnd = m_Cursor + 2;
					}
					// Move to the next state.
					currentState = nextState; 
					// Advance cursor.
					nextChar = GetNextChar();
				}
				else
				{
					// We have entered an error state. Thus either return the lastAcceptingState or
					// a null token.
					if(lastAcceptingState == null)
					{
						result = new Token(null);
						result.Column = tokenStartColumn;
						result.Line = tokenStartLine;
						result.Text = new string(m_Buffer,tokenStart,tokenEnd - tokenStart);
					}
					else
					{
						result = new Token(lastAcceptingState.Accepts);
						result.Column = tokenStartColumn;
						result.Line = tokenStartLine;
						result.Text = new string(m_Buffer,tokenStart,tokenEnd - tokenStart);
					}
					break;
				}
			}
			return result;
		}

		/// <summary>
		/// Resets the scanner.
		/// </summary>
		public void Reset()
		{
			m_Cursor = -1;
			m_Line = m_Column = 1;
		}

		private char GetChar(int index)
		{
			return (index >= m_Buffer.Length) || (index < 0) ? (char)0 : m_Buffer[index];
		}

		private char GetNextChar()
		{
			char nextChar = GetChar(++m_Cursor);

			if( (nextChar == (char)13) && (PeekNextChar() == (char)10))
			{
				m_Line++;
				m_Column = 0;
			}
			else
				m_Column ++;

			return nextChar;
		}

		private char PeekNextChar()
		{
			return GetChar(m_Cursor + 1);
		}

		private char GetCurrentChar()
		{
			return GetChar(m_Cursor);
		}

	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

Michael Bebenita
Web Developer
United States United States
Currently a graduate student at UCI.

You may also be interested in...

| Advertise | Privacy | Terms of Use | Mobile
Web01 | 2.8.160726.1 | Last Updated 12 May 2003
Article Copyright 2003 by Michael Bebenita
Everything else Copyright © CodeProject, 1999-2016
Layout: fixed | fluid