Irony - .NET Compiler Construction Kit

Roman Ivantsov

Rate me:

4.97/5 (86 votes)

4 Jan 2008MIT19 min read

295.8K

3.2K

201

Introduction to Irony - a new technology of parser/compiler construction for .NET.

irony_src.zip
- Irony_src
  - Irony.GrammarExplorer
    - .svn
      - all-wcprops
      - entries
      - format
      - prop-base
      - props
      - text-base
        
        030.Irony.GrammarExplorer.csproj.svn-base
        
        app.config.svn-base
        
        fmGrammarExplorer.cs.svn-base
        
        fmGrammarExplorer.Designer.cs.svn-base
        
        fmGrammarExplorer.resx.svn-base
        
        fmShowException.cs.svn-base
        
        fmShowException.Designer.cs.svn-base
        
        fmShowException.resx.svn-base
        
        License.txt.svn-base
        
        Program.cs.svn-base
      - tmp
        
        prop-base
        
        props
        
        text-base
    - 030.Irony.GrammarExplorer.csproj
    - app.config
    - fmGrammarExplorer.cs
    - fmGrammarExplorer.Designer.cs
    - fmGrammarExplorer.resx
    - fmShowException.cs
    - fmShowException.Designer.cs
    - fmShowException.resx
    - License.txt
    - Program.cs
    - Properties
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        AssemblyInfo.cs.svn-base
        
        Resources.Designer.cs.svn-base
        
        Resources.resx.svn-base
        
        Settings.Designer.cs.svn-base
        
        Settings.settings.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - AssemblyInfo.cs
      - Resources.Designer.cs
      - Resources.resx
      - Settings.Designer.cs
      - Settings.settings
  - Irony.Samples
    - .svn
      - all-wcprops
      - entries
      - format
      - prop-base
      - props
      - text-base
        
        020.Irony.Samples.csproj.svn-base
        
        License.txt.svn-base
      - tmp
        
        prop-base
        
        props
        
        text-base
    - 020.Irony.Samples.csproj
    - License.txt
    - OtherGrammars
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        ExpressionGrammar.cs.svn-base
        
        GrammarEx434.cs.svn-base
        
        GrammarEx446.cs.svn-base
        
        GrammarExL514.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - ExpressionGrammar.cs
      - GrammarEx434.cs
      - GrammarEx446.cs
      - GrammarExL514.cs
    - Properties
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        AssemblyInfo.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - AssemblyInfo.cs
    - Python
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        Python_auth_svn.txt.svn-base
        
        PythonGrammar.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - Python_auth_svn.txt
      - PythonGrammar.cs
    - Ruby
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        Ruby_auth.txt.svn-base
        
        RubyGrammar.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - Ruby_auth.txt
      - RubyGrammar.cs
    - Scheme
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        SampleAstNodes.cs.svn-base
        
        SchemeGrammar.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - SampleAstNodes.cs
      - SchemeGrammar.cs
    - SourceSamples
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        _about.txt.svn-base
        
        99 bottles.py.svn-base
        
        99 bottles.rb.svn-base
        
        99 bottles.scm.svn-base
        
        ExprSample.txt.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - _about.txt
      - 99 bottles.py
      - 99 bottles.rb
      - 99 bottles.scm
      - ExprSample.txt
  - Irony
    - .svn
      - all-wcprops
      - entries
      - format
      - prop-base
      - props
      - text-base
        
        010.Irony.csproj.svn-base
        
        Common.cs.svn-base
        
        License.txt.svn-base
      - tmp
        
        prop-base
        
        props
        
        text-base
    - 010.Irony.csproj
    - Common.cs
    - Compiler
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        CompilerContext.cs.svn-base
        
        Enums.cs.svn-base
        
        EventArgs.cs.svn-base
        
        LanguageCompiler.cs.svn-base
        
        SyntaxError.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - AST
        
        .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        AstNode.cs.svn-base
        
        GenericNode.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
        
        AstNode.cs
        
        GenericNode.cs
      - CompilerContext.cs
      - Enums.cs
      - EventArgs.cs
      - Grammar
        
        .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        BnfElement.cs.svn-base
        
        BnfExpression.cs.svn-base
        
        Grammar.cs.svn-base
        
        GrammarData.cs.svn-base
        
        GrammarDataBuilder.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
        
        BnfElement.cs
        
        BnfExpression.cs
        
        Grammar.cs
        
        GrammarData.cs
        
        GrammarDataBuilder.cs
      - LanguageCompiler.cs
      - NonTerminals
        
        .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        NonTerminal.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
        
        NonTerminal.cs
      - Parser
        
        .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        Parser.cs.svn-base
        
        ParserStack.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
        
        Parser.cs
        
        ParserStack.cs
      - Scanner
        
        .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        Scanner.cs.svn-base
        
        SourceFile.cs.svn-base
        
        Token.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
        
        Scanner.cs
        
        SourceFile.cs
        
        Token.cs
      - SyntaxError.cs
      - Terminals
        
        .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        _Terminal.cs.svn-base
        
        CharLiteral.cs.svn-base
        
        CommentTerminal.cs.svn-base
        
        ConstantSetTerminal.cs.svn-base
        
        CustomTerminal.cs.svn-base
        
        IdentifierTerminal.cs.svn-base
        
        NumberTerminal.cs.svn-base
        
        RegExBasedTerminal.cs.svn-base
        
        StringLiteral.cs.svn-base
        
        SymbolTerminal.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
        
        _Terminal.cs
        
        CharLiteral.cs
        
        CommentTerminal.cs
        
        ConstantSetTerminal.cs
        
        CustomTerminal.cs
        
        IdentifierTerminal.cs
        
        NumberTerminal.cs
        
        RegExBasedTerminal.cs
        
        StringLiteral.cs
        
        SymbolTerminal.cs
      - TokenFilters
        
        .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        BraceMatchFilter.cs.svn-base
        
        CodeOutlineFilter.cs.svn-base
        
        TokenFilter.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
        
        BraceMatchFilter.cs
        
        CodeOutlineFilter.cs
        
        TokenFilter.cs
    - License.txt
    - Properties
      - .svn
        
        all-wcprops
        
        entries
        
        format
        
        prop-base
        
        props
        
        text-base
        
        AssemblyInfo.cs.svn-base
        
        tmp
        
        prop-base
        
        props
        
        text-base
      - AssemblyInfo.cs
  - Irony_All.sln
  - ReadMe.txt
irony_article.zip
- Irony_article
  - Irony.html
  - irony_exprTree.jpg
  - irony_GrammarExplorer.jpg
  - Irony_src.zip

#region License
/* **********************************************************************************
 * Copyright (c) Roman Ivantsov
 * This source code is subject to terms and conditions of the MIT License
 * for Irony. A copy of the license can be found in the License.txt file
 * at the root of this distribution. 
 * By using this source code in any fashion, you are agreeing to be bound by the terms of the 
 * MIT License.
 * You must not remove this notice from this software.
 * **********************************************************************************/
#endregion

using System;
using System.Collections.Generic;
using System.Text;
//using System.Text.RegularExpressions;

namespace Irony.Compiler {

  public interface ISourceStream {
    int Position { get;set;}
    char CurrentChar { get;} //char at Position
    char NextChar { get;}    //preview char at Position+1

    string Text { get;} //returns entire text buffer
    //returns substring from TokenStart.Position till (Position - 1)
    string GetLexeme();
    SourceLocation TokenStart { get;}
    bool EOF();
  }


  #region SourceLocation struct
  public struct SourceLocation {
    public int Position;
    public int Line;
    public int Column;
    public SourceLocation(int position, int line, int column) {
      Position = position;
      Line = line;
      Column = column;
    }
    public override string ToString() {
      return "L" + Line + ":" + "C" + Column;
    }
  }//SourceLocation
  #endregion

  #region SourceFile class
  public class SourceFile : ISourceStream {
    public SourceFile(string text, string fileName, int tabWidth): this(text, fileName) {
      _tabWidth = tabWidth;
    }
    public SourceFile(string text, string fileName) {
      _text = text;
      _fileName = fileName;
      Reset();
    }

    public string FileName {
      get { return _fileName; }
    } string _fileName;

    public int TabWidth {
      get {return _tabWidth;}
      set {_tabWidth = value;}
    } int  _tabWidth = 8;

    public void Reset() {
      Position = 0;
      _tokenStart = new SourceLocation();
      _nextNewLinePosition = _text.IndexOf('\n');
    }

    #region ISourceFile Members
    public int Position {
      get {return _position; }
      set {
        _position = value;
        try {  _currentChar = _text[_position]; } 
          catch { _currentChar = '\0'; }
      }
    } int _position;

    public bool EOF() {
      return _position >= Text.Length;
    }
    public char CurrentChar {
      get { return _currentChar; }
    } char _currentChar;

    public char NextChar {
      get {
        try {
          return _text[_position + 1];
        } catch { return '\0'; }
      }
    }
    public string Text {
      get { return _text; }
    }  string _text;

    //returns substring from TokenStart.Position till (Position - 1)
    public string GetLexeme() {
      string text = _text.Substring(_tokenStart.Position, _position - _tokenStart.Position);
      return text;
    }
    public SourceLocation TokenStart {
      get {return _tokenStart;}
      internal set { _tokenStart = value; }
    } SourceLocation  _tokenStart;

    #endregion

    public override string ToString() {
      //show just 30 chars from current position
      string result;
      if (Position + 30 < Text.Length)
        result = Text.Substring(Position, 30);
      else
        result = Text.Substring(Position);
      return result;
    }
    
    private int _nextNewLinePosition; //private field to cache position of next \n character
    //This is all about source scanning optimization - this seemingly strange code is aimed at improving perfomance,
    // so keep this in mind when reading it. 
    internal void SetNextTokenStart(string skipWhitespaceChars) {
      while (skipWhitespaceChars.IndexOf(CurrentChar) >= 0)
        Position++;
      int newPosition = this.Position;
      //currently _tokenStart field contains location (pos/line/col) of the last created token. 
      // First, check if new position is in the same line; if so, just adjust column and return
      //  Note that this case is not line start, so we do not need to check tab chars (and adjust column) 
      if (newPosition <= _nextNewLinePosition || _nextNewLinePosition < 0) {
        _tokenStart.Column += newPosition - _tokenStart.Position;
        _tokenStart.Position = newPosition;
        return;
      }
      //So new position is on new line (beyond _nextNewLinePosition)
      //First count \n chars in the string fragment
      int lineStart = _nextNewLinePosition;
      int nlCount = 1; //we start after old _nextNewLinePosition, so we count one NewLine char
      ScanTextForChar(_text, '\n', lineStart + 1, newPosition - 1, ref nlCount, ref lineStart);
      _tokenStart.Line += nlCount;
      //at this moment lineStart is at start of line where newPosition is located 
      //Calc # of tab chars from lineStart to newPosition to adjust column#
      int tabCount = 0;
      int dummy = 0;
      if (_tabWidth > 1)
        ScanTextForChar(_text, '\t', lineStart, newPosition - 1, ref tabCount, ref dummy);

      //adjust TokenStart with calculated information
      _tokenStart.Position = newPosition;
      _tokenStart.Column = newPosition - lineStart - 1;
      if (tabCount > 0)
        _tokenStart.Column += (_tabWidth - 1) * tabCount; // "-1" to count for tab char itself
      
      //finally cache new line
      _nextNewLinePosition = _text.IndexOf('\n', newPosition);
    }

    private static void ScanTextForChar(string text, char ch, int from, int until, ref int count, ref int lastPosition) {
      if (from > until) return;
      while (true) {
        int next = text.IndexOf(ch, from, until - from + 1);
        if (next < 0) return;
        lastPosition = next;
        count++;
        from = next + 1;
      }

    }


  }//class
  #endregion

}//namespace

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License

Written By

Roman Ivantsov

Software Developer (Senior) Microsoft

United States

25 years of professional experience. .NET/c#, databases, security.
Currently Senior Security Engineer, Cloud Security, Microsoft

Irony - .NET Compiler Construction Kit

License

Comments and Discussions