Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Irony - .NET Compiler Construction Kit

, 4 Jan 2008 MIT
Introduction to Irony - a new technology of parser/compiler construction for .NET.
irony_article.zip
Irony_article
irony_exprTree.jpg
irony_GrammarExplorer.jpg
Irony_src.zip
irony_src.zip
Irony_src
Irony.GrammarExplorer
.svn
all-wcprops
entries
format
prop-base
props
text-base
030.Irony.GrammarExplorer.csproj.svn-base
app.config.svn-base
fmGrammarExplorer.cs.svn-base
fmGrammarExplorer.Designer.cs.svn-base
fmGrammarExplorer.resx.svn-base
fmShowException.cs.svn-base
fmShowException.Designer.cs.svn-base
fmShowException.resx.svn-base
License.txt.svn-base
Program.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
Resources.Designer.cs.svn-base
Resources.resx.svn-base
Settings.Designer.cs.svn-base
Settings.settings.svn-base
tmp
prop-base
props
text-base
Settings.settings
Irony.Samples
.svn
all-wcprops
entries
format
prop-base
props
text-base
020.Irony.Samples.csproj.svn-base
License.txt.svn-base
tmp
prop-base
props
text-base
OtherGrammars
.svn
all-wcprops
entries
format
prop-base
props
text-base
ExpressionGrammar.cs.svn-base
GrammarEx434.cs.svn-base
GrammarEx446.cs.svn-base
GrammarExL514.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
Python
.svn
all-wcprops
entries
format
prop-base
props
text-base
Python_auth_svn.txt.svn-base
PythonGrammar.cs.svn-base
tmp
prop-base
props
text-base
Ruby
.svn
all-wcprops
entries
format
prop-base
props
text-base
Ruby_auth.txt.svn-base
RubyGrammar.cs.svn-base
tmp
prop-base
props
text-base
Scheme
.svn
all-wcprops
entries
format
prop-base
props
text-base
SampleAstNodes.cs.svn-base
SchemeGrammar.cs.svn-base
tmp
prop-base
props
text-base
SourceSamples
.svn
all-wcprops
entries
format
prop-base
props
text-base
_about.txt.svn-base
99 bottles.py.svn-base
99 bottles.rb.svn-base
99 bottles.scm.svn-base
ExprSample.txt.svn-base
tmp
prop-base
props
text-base
99 bottles.rb
99 bottles.scm
Irony
.svn
all-wcprops
entries
format
prop-base
props
text-base
010.Irony.csproj.svn-base
Common.cs.svn-base
License.txt.svn-base
tmp
prop-base
props
text-base
Compiler
.svn
all-wcprops
entries
format
prop-base
props
text-base
CompilerContext.cs.svn-base
Enums.cs.svn-base
EventArgs.cs.svn-base
LanguageCompiler.cs.svn-base
SyntaxError.cs.svn-base
tmp
prop-base
props
text-base
AST
.svn
all-wcprops
entries
format
prop-base
props
text-base
AstNode.cs.svn-base
GenericNode.cs.svn-base
tmp
prop-base
props
text-base
Grammar
.svn
all-wcprops
entries
format
prop-base
props
text-base
BnfElement.cs.svn-base
BnfExpression.cs.svn-base
Grammar.cs.svn-base
GrammarData.cs.svn-base
GrammarDataBuilder.cs.svn-base
tmp
prop-base
props
text-base
NonTerminals
.svn
all-wcprops
entries
format
prop-base
props
text-base
NonTerminal.cs.svn-base
tmp
prop-base
props
text-base
Parser
.svn
all-wcprops
entries
format
prop-base
props
text-base
Parser.cs.svn-base
ParserStack.cs.svn-base
tmp
prop-base
props
text-base
Scanner
.svn
all-wcprops
entries
format
prop-base
props
text-base
Scanner.cs.svn-base
SourceFile.cs.svn-base
Token.cs.svn-base
tmp
prop-base
props
text-base
Terminals
.svn
all-wcprops
entries
format
prop-base
props
text-base
_Terminal.cs.svn-base
CharLiteral.cs.svn-base
CommentTerminal.cs.svn-base
ConstantSetTerminal.cs.svn-base
CustomTerminal.cs.svn-base
IdentifierTerminal.cs.svn-base
NumberTerminal.cs.svn-base
RegExBasedTerminal.cs.svn-base
StringLiteral.cs.svn-base
SymbolTerminal.cs.svn-base
tmp
prop-base
props
text-base
TokenFilters
.svn
all-wcprops
entries
format
prop-base
props
text-base
BraceMatchFilter.cs.svn-base
CodeOutlineFilter.cs.svn-base
TokenFilter.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
#region License
/* **********************************************************************************
 * Copyright (c) Roman Ivantsov
 * This source code is subject to terms and conditions of the MIT License
 * for Irony. A copy of the license can be found in the License.txt file
 * at the root of this distribution. 
 * By using this source code in any fashion, you are agreeing to be bound by the terms of the 
 * MIT License.
 * You must not remove this notice from this software.
 * **********************************************************************************/
#endregion

using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;


namespace Irony.Compiler {
  //Parser class implements LALR(1) parser DFM. Its behavior is controlled by a state transition graph
  // contained in _data.States list. Each state contains a dictionary of parser actions indexed by input 
  // element (token or non-terminal node). Parser takes input token stream and produces Abstract Syntax Tree.
  // A nice online overview of compiler construction principles including parsing is the following course:
  //Leonidas Fegaras, Design and Construction of Compilers 
  //http://lambda.uta.edu/cse5317/long/index.html 
  public class Parser {

    #region Constructors
    public Parser(GrammarData data) {
      Data = data;
    }
    #endregion

    #region Properties and fields: Data, Stack, _context, Input, CurrentState, LineCount, TokenCount
    public readonly GrammarData Data;
    public readonly ParserStack Stack = new ParserStack();

    private CompilerContext _context;

    public IEnumerator<Token> Input {
      get {return _input;}
    } IEnumerator<Token> _input;

    public ParserState CurrentState {
      get {return _currentState;}
    } ParserState  _currentState;


    public int LineCount {
      get {return _lineCount;}
    } int  _lineCount;

    public int TokenCount  {
      get {return _tokenCount;}
    } int  _tokenCount;

    #endregion

    #region Events: ParserAction, TokenReceived
    public event EventHandler<ParserActionEventArgs> ActionSelected;
    public event EventHandler<ParserActionEventArgs> ActionConflict;
    public event EventHandler<TokenEventArgs> TokenReceived;
    TokenEventArgs _tokenArgs = new TokenEventArgs(null); //declar as field and reuse it to avoid generating garbage

    protected void OnTokenReceived(Token token) {
      if (TokenReceived == null) return;
      _tokenArgs.Token = token;
      TokenReceived(this, _tokenArgs);
    }
    #endregion

    #region Parsing methods
    private void Reset() {
      Stack.Reset();
      _currentState = Data.InitialState;
      _lineCount = 0;
      _tokenCount = 0;
      _context.Errors.Clear();
    }
    
    private Token GetToken() {
      Token token;
      while (_input.MoveNext()) {
        token = _input.Current;
        _tokenCount++;
        _lineCount = token.Location.Line + 1;
        if (TokenReceived != null)
          OnTokenReceived(token);
        //check token category
        switch (token.Terminal.Category) {
          case TokenCategory.Content:
          case TokenCategory.Outline:
            return token;
          case TokenCategory.Comment:
            continue;
          case TokenCategory.Error:
            ReportError(token);
            Recover();
            continue;
        }//switch
      }//while
      //Normally we never get here. 
      // It might happen if the grammar directly uses EOF token in Root definition (which is not recommended);
      //  in this case we keep returning EOF token, to avoid breaking the parser.
      return new Token(Grammar.Eof, new SourceLocation(0, _lineCount - 1, 0), ""); 
    }//method


    public AstNode Parse(CompilerContext context, IEnumerable<Token> tokenStream) {
      _context = context;
      Reset();
      _input = tokenStream.GetEnumerator();
      Token token = GetToken();
      while (true) {
        if (_currentState == Data.FinalState)
          return Stack[0].Node;
        //Check for EOF
        //if (token.Terminal == Grammar.Eof && Stack.Count == 0) 
        //  yield break;
        //Figure out whether to shift or to reduce
        ActionRecord action = GetAction(token);
        if (ActionSelected != null) //just to improve performance we check it here
          OnActionSelected(_currentState, token, action);
        if (action.HasConflict())
          action = OnActionConflict(_currentState, token, action);
        switch(action.ActionType) {
          case ParserActionType.Operator:
            if (GetActionTypeForOperation(token) == ParserActionType.Shift)
              goto case ParserActionType.Shift;
            else
              goto case ParserActionType.Reduce;

          case ParserActionType.Shift:
            Stack.Push(token, token.Location, _currentState);
            _currentState = action.NewState;
            token = GetToken();
            break;

          case ParserActionType.Reduce:
            ExecuteReduceAction(action, token);
            break;
          
          case ParserActionType.Error:
            //TODO: add better error reporting here
            if (token.Terminal == Grammar.Eof) {
              ReportError(_input.Current.Location, "Unexpected end of file.");
              return null;
            } else
              ReportError(_input.Current.Location, "Syntax error.");
            Recover();
            token = GetToken();
            break;
        }//switch
      }//while
    }//Parse
    #endregion

    #region Error handling
    private void ReportError(SourceLocation location, string message, params object[] args) {
      if (args != null && args.Length > 0)
        message = string.Format(message, args);
      _context.AddError(location, message, _currentState);
    }

    private void ReportError(Token errorToken) {
      _context.AddError(errorToken.Location, errorToken.Text, _currentState);
    }

    private Token Recover() {
      //TODO: implement REAL recovery to some consistent state.
      Token token;
      token = GetToken(); //simply shift to next token
      return token;
    }
    #endregion

    #region event handling
    protected void OnActionSelected(ParserState state, Token input, ActionRecord action) {
      if (ActionSelected != null) {
        ParserActionEventArgs args = new ParserActionEventArgs(state, input, action);
        ActionSelected(this, args);
      }
    }
    protected ActionRecord OnActionConflict(ParserState state, Token input, ActionRecord action) {
      if (ActionConflict != null) {
        ParserActionEventArgs args = new ParserActionEventArgs(state, input, action);
        ActionConflict(this, args);
        return args.Action;
      }
      return action;
    }
    #endregion

    private ActionRecord GetAction(Token token) {
      ActionRecord action = null;
      if ((token.Terminal.MatchMode & TokenMatchMode.ByValue) != 0) {
        if (_currentState.Actions.TryGetValue(token.Text, out action))
          return action;
      }
      if ((token.Terminal.MatchMode & TokenMatchMode.ByType) != 0 &&  
        _currentState.Actions.TryGetValue(token.Terminal.Key, out action))
        return action;
      //return error action singleton
      return ActionRecord.ErrorAction;
    }
    private ParserActionType GetActionTypeForOperation(Token current) {
      OperatorInfo opInfo;
      string op = current.Text;
      if (!Data.Grammar.Operators.TryGetValue(op, out opInfo)) return ParserActionType.Shift;
      for (int i = Stack.Count - 2; i >= 0; i--) {
        Token tkn = Stack[i].Node as Token;
        if (tkn == null) continue;
        string prevOp = tkn.Text;
        OperatorInfo prevOpInfo;
        if (!Data.Grammar.Operators.TryGetValue(prevOp, out prevOpInfo)) continue;
        //if previous operator has the same precedence then use associativity
        if (prevOpInfo.Precedence == opInfo.Precedence) 
          return opInfo.Associativity == Associativity.Left ? ParserActionType.Reduce : ParserActionType.Shift;
        ParserActionType result = prevOpInfo.Precedence > opInfo.Precedence ? ParserActionType.Reduce : ParserActionType.Shift;
        return result;
      }
      return ParserActionType.Shift;
    }

    private void ExecuteReduceAction(ActionRecord action, Token current) {
      ParserState oldState = _currentState;
      int popCnt = action.PopCount;

      //Get new node's child nodes - these are nodes being popped from the stack 
      AstNodeList childNodes = new AstNodeList();
      for (int i = 0; i < action.PopCount; i++) {
        AstNode child = Stack[Stack.Count - popCnt + i].Node;
        Token tkn = child as Token;
        if (tkn != null && Data.PunctuationLookup.ContainsKey(tkn.Text))
          continue; //don't add this node, it is punctuation symbol
        childNodes.Add(child);
      }
      //recover state, location and pop the stack
      SourceLocation location = current.Location;
      if (popCnt > 0) {
        location = Stack[Stack.Count - popCnt].Location;
        _currentState = Stack[Stack.Count - popCnt].State;
        Stack.Pop(popCnt);
      }
      //Create new node
      AstNode node = Data.Grammar.CreateNode(_context, action, location, childNodes);
      // Push node/current state into the stack 
      Stack.Push(node, location, _currentState);
      //switch to new state
      ActionRecord gotoAction;
      if (_currentState.Actions.TryGetValue(action.NonTerminal.Key, out gotoAction)) {
        _currentState = gotoAction.NewState;
      } else 
        //should never happen
        throw new ApplicationException( string.Format("Cannot find transition for input {0}; state: {1}, popped state: {2}", action.NonTerminal, oldState, _currentState));
    }//method

  }//class

}//namespace

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License

Share

About the Author

Roman Ivantsov
Architect Pulsar Informatics, Inc
United States United States
No Biography provided

| Advertise | Privacy | Terms of Use | Mobile
Web01 | 2.8.1411023.1 | Last Updated 4 Jan 2008
Article Copyright 2008 by Roman Ivantsov
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid