Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Irony - .NET Compiler Construction Kit

, 4 Jan 2008 MIT
Introduction to Irony - a new technology of parser/compiler construction for .NET.
irony_article.zip
Irony_article
irony_exprTree.jpg
irony_GrammarExplorer.jpg
Irony_src.zip
irony_src.zip
Irony_src
Irony.GrammarExplorer
.svn
all-wcprops
entries
format
prop-base
props
text-base
030.Irony.GrammarExplorer.csproj.svn-base
app.config.svn-base
fmGrammarExplorer.cs.svn-base
fmGrammarExplorer.Designer.cs.svn-base
fmGrammarExplorer.resx.svn-base
fmShowException.cs.svn-base
fmShowException.Designer.cs.svn-base
fmShowException.resx.svn-base
License.txt.svn-base
Program.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
Resources.Designer.cs.svn-base
Resources.resx.svn-base
Settings.Designer.cs.svn-base
Settings.settings.svn-base
tmp
prop-base
props
text-base
Settings.settings
Irony.Samples
.svn
all-wcprops
entries
format
prop-base
props
text-base
020.Irony.Samples.csproj.svn-base
License.txt.svn-base
tmp
prop-base
props
text-base
OtherGrammars
.svn
all-wcprops
entries
format
prop-base
props
text-base
ExpressionGrammar.cs.svn-base
GrammarEx434.cs.svn-base
GrammarEx446.cs.svn-base
GrammarExL514.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
Python
.svn
all-wcprops
entries
format
prop-base
props
text-base
Python_auth_svn.txt.svn-base
PythonGrammar.cs.svn-base
tmp
prop-base
props
text-base
Ruby
.svn
all-wcprops
entries
format
prop-base
props
text-base
Ruby_auth.txt.svn-base
RubyGrammar.cs.svn-base
tmp
prop-base
props
text-base
Scheme
.svn
all-wcprops
entries
format
prop-base
props
text-base
SampleAstNodes.cs.svn-base
SchemeGrammar.cs.svn-base
tmp
prop-base
props
text-base
SourceSamples
.svn
all-wcprops
entries
format
prop-base
props
text-base
_about.txt.svn-base
99 bottles.py.svn-base
99 bottles.rb.svn-base
99 bottles.scm.svn-base
ExprSample.txt.svn-base
tmp
prop-base
props
text-base
99 bottles.rb
99 bottles.scm
Irony
.svn
all-wcprops
entries
format
prop-base
props
text-base
010.Irony.csproj.svn-base
Common.cs.svn-base
License.txt.svn-base
tmp
prop-base
props
text-base
Compiler
.svn
all-wcprops
entries
format
prop-base
props
text-base
CompilerContext.cs.svn-base
Enums.cs.svn-base
EventArgs.cs.svn-base
LanguageCompiler.cs.svn-base
SyntaxError.cs.svn-base
tmp
prop-base
props
text-base
AST
.svn
all-wcprops
entries
format
prop-base
props
text-base
AstNode.cs.svn-base
GenericNode.cs.svn-base
tmp
prop-base
props
text-base
Grammar
.svn
all-wcprops
entries
format
prop-base
props
text-base
BnfElement.cs.svn-base
BnfExpression.cs.svn-base
Grammar.cs.svn-base
GrammarData.cs.svn-base
GrammarDataBuilder.cs.svn-base
tmp
prop-base
props
text-base
NonTerminals
.svn
all-wcprops
entries
format
prop-base
props
text-base
NonTerminal.cs.svn-base
tmp
prop-base
props
text-base
Parser
.svn
all-wcprops
entries
format
prop-base
props
text-base
Parser.cs.svn-base
ParserStack.cs.svn-base
tmp
prop-base
props
text-base
Scanner
.svn
all-wcprops
entries
format
prop-base
props
text-base
Scanner.cs.svn-base
SourceFile.cs.svn-base
Token.cs.svn-base
tmp
prop-base
props
text-base
Terminals
.svn
all-wcprops
entries
format
prop-base
props
text-base
_Terminal.cs.svn-base
CharLiteral.cs.svn-base
CommentTerminal.cs.svn-base
ConstantSetTerminal.cs.svn-base
CustomTerminal.cs.svn-base
IdentifierTerminal.cs.svn-base
NumberTerminal.cs.svn-base
RegExBasedTerminal.cs.svn-base
StringLiteral.cs.svn-base
SymbolTerminal.cs.svn-base
tmp
prop-base
props
text-base
TokenFilters
.svn
all-wcprops
entries
format
prop-base
props
text-base
BraceMatchFilter.cs.svn-base
CodeOutlineFilter.cs.svn-base
TokenFilter.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
#region License
/* **********************************************************************************
 * Copyright (c) Roman Ivantsov
 * This source code is subject to terms and conditions of the MIT License
 * for Irony. A copy of the license can be found in the License.txt file
 * at the root of this distribution. 
 * By using this source code in any fashion, you are agreeing to be bound by the terms of the 
 * MIT License.
 * You must not remove this notice from this software.
 * **********************************************************************************/
#endregion

using System;
using System.Collections.Generic;
using System.Text;
//using System.Text.RegularExpressions;

namespace Irony.Compiler {

  public interface ISourceStream {
    int Position { get;set;}
    char CurrentChar { get;} //char at Position
    char NextChar { get;}    //preview char at Position+1

    string Text { get;} //returns entire text buffer
    //returns substring from TokenStart.Position till (Position - 1)
    string GetLexeme();
    SourceLocation TokenStart { get;}
    bool EOF();
  }


  #region SourceLocation struct
  public struct SourceLocation {
    public int Position;
    public int Line;
    public int Column;
    public SourceLocation(int position, int line, int column) {
      Position = position;
      Line = line;
      Column = column;
    }
    public override string ToString() {
      return "L" + Line + ":" + "C" + Column;
    }
  }//SourceLocation
  #endregion

  #region SourceFile class
  public class SourceFile : ISourceStream {
    public SourceFile(string text, string fileName, int tabWidth): this(text, fileName) {
      _tabWidth = tabWidth;
    }
    public SourceFile(string text, string fileName) {
      _text = text;
      _fileName = fileName;
      Reset();
    }

    public string FileName {
      get { return _fileName; }
    } string _fileName;

    public int TabWidth {
      get {return _tabWidth;}
      set {_tabWidth = value;}
    } int  _tabWidth = 8;

    public void Reset() {
      Position = 0;
      _tokenStart = new SourceLocation();
      _nextNewLinePosition = _text.IndexOf('\n');
    }

    #region ISourceFile Members
    public int Position {
      get {return _position; }
      set {
        _position = value;
        try {  _currentChar = _text[_position]; } 
          catch { _currentChar = '\0'; }
      }
    } int _position;

    public bool EOF() {
      return _position >= Text.Length;
    }
    public char CurrentChar {
      get { return _currentChar; }
    } char _currentChar;

    public char NextChar {
      get {
        try {
          return _text[_position + 1];
        } catch { return '\0'; }
      }
    }
    public string Text {
      get { return _text; }
    }  string _text;

    //returns substring from TokenStart.Position till (Position - 1)
    public string GetLexeme() {
      string text = _text.Substring(_tokenStart.Position, _position - _tokenStart.Position);
      return text;
    }
    public SourceLocation TokenStart {
      get {return _tokenStart;}
      internal set { _tokenStart = value; }
    } SourceLocation  _tokenStart;

    #endregion

    public override string ToString() {
      //show just 30 chars from current position
      string result;
      if (Position + 30 < Text.Length)
        result = Text.Substring(Position, 30);
      else
        result = Text.Substring(Position);
      return result;
    }
    
    private int _nextNewLinePosition; //private field to cache position of next \n character
    //This is all about source scanning optimization - this seemingly strange code is aimed at improving perfomance,
    // so keep this in mind when reading it. 
    internal void SetNextTokenStart(string skipWhitespaceChars) {
      while (skipWhitespaceChars.IndexOf(CurrentChar) >= 0)
        Position++;
      int newPosition = this.Position;
      //currently _tokenStart field contains location (pos/line/col) of the last created token. 
      // First, check if new position is in the same line; if so, just adjust column and return
      //  Note that this case is not line start, so we do not need to check tab chars (and adjust column) 
      if (newPosition <= _nextNewLinePosition || _nextNewLinePosition < 0) {
        _tokenStart.Column += newPosition - _tokenStart.Position;
        _tokenStart.Position = newPosition;
        return;
      }
      //So new position is on new line (beyond _nextNewLinePosition)
      //First count \n chars in the string fragment
      int lineStart = _nextNewLinePosition;
      int nlCount = 1; //we start after old _nextNewLinePosition, so we count one NewLine char
      ScanTextForChar(_text, '\n', lineStart + 1, newPosition - 1, ref nlCount, ref lineStart);
      _tokenStart.Line += nlCount;
      //at this moment lineStart is at start of line where newPosition is located 
      //Calc # of tab chars from lineStart to newPosition to adjust column#
      int tabCount = 0;
      int dummy = 0;
      if (_tabWidth > 1)
        ScanTextForChar(_text, '\t', lineStart, newPosition - 1, ref tabCount, ref dummy);

      //adjust TokenStart with calculated information
      _tokenStart.Position = newPosition;
      _tokenStart.Column = newPosition - lineStart - 1;
      if (tabCount > 0)
        _tokenStart.Column += (_tabWidth - 1) * tabCount; // "-1" to count for tab char itself
      
      //finally cache new line
      _nextNewLinePosition = _text.IndexOf('\n', newPosition);
    }

    private static void ScanTextForChar(string text, char ch, int from, int until, ref int count, ref int lastPosition) {
      if (from > until) return;
      while (true) {
        int next = text.IndexOf(ch, from, until - from + 1);
        if (next < 0) return;
        lastPosition = next;
        count++;
        from = next + 1;
      }

    }


  }//class
  #endregion

}//namespace

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License

Share

About the Author

Roman Ivantsov
Architect Pulsar Informatics, Inc
United States United States
No Biography provided

| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.141223.1 | Last Updated 4 Jan 2008
Article Copyright 2008 by Roman Ivantsov
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid