Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version
Go to top

Irony - .NET Compiler Construction Kit

, 4 Jan 2008
Introduction to Irony - a new technology of parser/compiler construction for .NET.
irony_article.zip
Irony_article
irony_exprTree.jpg
irony_GrammarExplorer.jpg
Irony_src.zip
irony_src.zip
Irony_src
Irony.GrammarExplorer
.svn
all-wcprops
entries
format
prop-base
props
text-base
030.Irony.GrammarExplorer.csproj.svn-base
app.config.svn-base
fmGrammarExplorer.cs.svn-base
fmGrammarExplorer.Designer.cs.svn-base
fmGrammarExplorer.resx.svn-base
fmShowException.cs.svn-base
fmShowException.Designer.cs.svn-base
fmShowException.resx.svn-base
License.txt.svn-base
Program.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
Resources.Designer.cs.svn-base
Resources.resx.svn-base
Settings.Designer.cs.svn-base
Settings.settings.svn-base
tmp
prop-base
props
text-base
Settings.settings
Irony.Samples
.svn
all-wcprops
entries
format
prop-base
props
text-base
020.Irony.Samples.csproj.svn-base
License.txt.svn-base
tmp
prop-base
props
text-base
OtherGrammars
.svn
all-wcprops
entries
format
prop-base
props
text-base
ExpressionGrammar.cs.svn-base
GrammarEx434.cs.svn-base
GrammarEx446.cs.svn-base
GrammarExL514.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
Python
.svn
all-wcprops
entries
format
prop-base
props
text-base
Python_auth_svn.txt.svn-base
PythonGrammar.cs.svn-base
tmp
prop-base
props
text-base
Ruby
.svn
all-wcprops
entries
format
prop-base
props
text-base
Ruby_auth.txt.svn-base
RubyGrammar.cs.svn-base
tmp
prop-base
props
text-base
Scheme
.svn
all-wcprops
entries
format
prop-base
props
text-base
SampleAstNodes.cs.svn-base
SchemeGrammar.cs.svn-base
tmp
prop-base
props
text-base
SourceSamples
.svn
all-wcprops
entries
format
prop-base
props
text-base
_about.txt.svn-base
99 bottles.py.svn-base
99 bottles.rb.svn-base
99 bottles.scm.svn-base
ExprSample.txt.svn-base
tmp
prop-base
props
text-base
99 bottles.rb
99 bottles.scm
Irony
.svn
all-wcprops
entries
format
prop-base
props
text-base
010.Irony.csproj.svn-base
Common.cs.svn-base
License.txt.svn-base
tmp
prop-base
props
text-base
Compiler
.svn
all-wcprops
entries
format
prop-base
props
text-base
CompilerContext.cs.svn-base
Enums.cs.svn-base
EventArgs.cs.svn-base
LanguageCompiler.cs.svn-base
SyntaxError.cs.svn-base
tmp
prop-base
props
text-base
AST
.svn
all-wcprops
entries
format
prop-base
props
text-base
AstNode.cs.svn-base
GenericNode.cs.svn-base
tmp
prop-base
props
text-base
Grammar
.svn
all-wcprops
entries
format
prop-base
props
text-base
BnfElement.cs.svn-base
BnfExpression.cs.svn-base
Grammar.cs.svn-base
GrammarData.cs.svn-base
GrammarDataBuilder.cs.svn-base
tmp
prop-base
props
text-base
NonTerminals
.svn
all-wcprops
entries
format
prop-base
props
text-base
NonTerminal.cs.svn-base
tmp
prop-base
props
text-base
Parser
.svn
all-wcprops
entries
format
prop-base
props
text-base
Parser.cs.svn-base
ParserStack.cs.svn-base
tmp
prop-base
props
text-base
Scanner
.svn
all-wcprops
entries
format
prop-base
props
text-base
Scanner.cs.svn-base
SourceFile.cs.svn-base
Token.cs.svn-base
tmp
prop-base
props
text-base
Terminals
.svn
all-wcprops
entries
format
prop-base
props
text-base
_Terminal.cs.svn-base
CharLiteral.cs.svn-base
CommentTerminal.cs.svn-base
ConstantSetTerminal.cs.svn-base
CustomTerminal.cs.svn-base
IdentifierTerminal.cs.svn-base
NumberTerminal.cs.svn-base
RegExBasedTerminal.cs.svn-base
StringLiteral.cs.svn-base
SymbolTerminal.cs.svn-base
tmp
prop-base
props
text-base
TokenFilters
.svn
all-wcprops
entries
format
prop-base
props
text-base
BraceMatchFilter.cs.svn-base
CodeOutlineFilter.cs.svn-base
TokenFilter.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
format
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
#region License
/* **********************************************************************************
 * Copyright (c) Roman Ivantsov
 * This source code is subject to terms and conditions of the MIT License
 * for Irony. A copy of the license can be found in the License.txt file
 * at the root of this distribution. 
 * By using this source code in any fashion, you are agreeing to be bound by the terms of the 
 * MIT License.
 * You must not remove this notice from this software.
 * **********************************************************************************/
#endregion

using System;
using System.Collections.Generic;
using System.Text;

namespace Irony.Compiler {

  #region Grammar class
  public abstract class Grammar {

    #region properties: CaseSensitive, WhitespaceChars, Root, TokenFilters
    public bool CaseSensitive = true;
    public string WhitespaceChars = " \t\r\n\v";

    //Terminals not reachable from the Root (Comment terminal is usually one of them)
    public readonly TerminalList ExtraTerminals = new TerminalList();

    //Punctuation symbols are those that are excluded from arguments of node constructors
    public readonly KeyList PunctuationSymbols = new KeyList();

    //Default node type; if null then GenericNode type is used. 
    public Type DefaultNodeType;

    #region Comments
    //  If the following flag is true, the scanner removes all keyword-like terminals (those that start with a letter)
    //  from the list of terminals found in grammar rules. These symbols are treated 
    //  as something else in the input stream  and the grammar should include a terminal (usuall Identifier)
    //  that would match these keywords. 
    //  For ex., if there is a symbol "begin" used somewhere in grammar rules, it will not be included 
    //  into final list of terminals, and word "begin" in input stream will be represented by the Identifier token.  
    //  The parser will match it to the "begin" symbol specified in the expression 
    //   thus recognizing it as a keyword. 
    //  This is a pure optimization option, to improve scanner performance (terminal lookup by char returns less terminals). 
    //  It is recommended to set it to true for most languages. 
    //  For some languages like PHP that has all variable IDs start with "$" it should be set to false.
    #endregion
    public bool NoKeywordTerminals = true;

    public NonTerminal Root  {
      get {return _root;}
      set { _root = value;  }
    } NonTerminal _root;

    public TokenFilterList TokenFilters = new TokenFilterList();

    #endregion 

    #region Operators handling
    public readonly OperatorInfoTable Operators = new OperatorInfoTable();

    public void RegisterOperators(int precedence, params string[] opSymbols) {
      RegisterOperators(precedence, Associativity.Left, opSymbols);
    }
    public void RegisterOperators(int precedence, Associativity associativity, params string[] opSymbols) {
      foreach (string op in opSymbols) {
        if (Operators.ContainsKey(op))
          throw new ApplicationException("Operator '" + op + "' is registered more than once.");
        Operators[op] = new OperatorInfo(op, precedence, associativity);
      }
    }//method
    #endregion

    #region virtual methods: CreateNode
    // Override this method in language grammar if you want a custom node creation mechanism.
    public virtual AstNode CreateNode(CompilerContext context, ActionRecord reduceAction, 
                                      SourceLocation location, AstNodeList childNodes) {
      AstNode node;
      //First check and try custom NodeCreator method attached to non-terminal
      if (reduceAction.NonTerminal.NodeCreator != null) {
        node = reduceAction.NonTerminal.NodeCreator(context, reduceAction, location, childNodes);
        if (node != null) return node;
      }
      //General node-creation path
      Type nodeType = reduceAction.NonTerminal.NodeType ?? this.DefaultNodeType;
      if (childNodes.Count == 0) {
        //Create NULL node
        node = null; // new AstNode(reduceAction.NonTerminal, location, true);
      } else if (nodeType == null || nodeType == typeof(GenericNode)) {
        //GenericNode
        if (childNodes.Count == 1) 
          //Node bubbling. For the default case, if no node type is specified (meaning "use GenericNode"),
          // and new node has just one child node (meaning reduce on identity-like production A->B),
          // then we do not create new node but simply return the child node itself. So nodes "bubble-up" to higher
          // levels. This simplifies the default syntax tree. 
          node = childNodes[0];
        else
          node = new GenericNode(context, reduceAction.NonTerminal, location, childNodes);
      } else {
        //Custom node type
        node = (AstNode)Activator.CreateInstance(nodeType, context, reduceAction.NonTerminal, location, childNodes);
      }
      return node;      
    }
    #endregion

    #region Static utility methods used in custom grammars: Symbol(), ToElement, WithStar, WithPlus, WithQ
    protected static SymbolTerminal Symbol(string symbol) {
      return SymbolTerminal.GetSymbol(symbol);
    }
    protected static SymbolTerminal Symbol(string symbol, string name) {
      return SymbolTerminal.GetSymbol(symbol, name);
    }
    protected static BnfElement ToElement(BnfExpression expression) {
      string name = expression.ToString();
      return new NonTerminal(name, expression);
    }
    protected static BnfElement WithStar(BnfExpression expression) {
      return ToElement(expression).Star();
    }
    protected static BnfElement WithPlus(BnfExpression expression) {
      return ToElement(expression).Plus();
    }
    protected static BnfElement WithQ(BnfExpression expression) {
      return ToElement(expression).Q();
    }
    public static Token CreateErrorToken(SourceLocation location, string message, params object[] args) {
      if (args != null && args.Length > 0)
        message = string.Format(message, args);
      return new Token(Grammar.Error, location, message);
    }
    #endregion

    #region Standard terminals: EOF, Empty, NewLine, Indent, Dedent
    // Identifies end of file
    // Note: do not use this symbol in grammar rules. Parser automatically add this symbol 
    // as a lookahead to Root non-terminal
    public readonly static Terminal Eof = new Terminal("_EOF", TokenCategory.Outline);
    // Empty object is used to identify optional element: 
    //    elem.Rule = elem1 | Empty;
    public readonly static Terminal Empty = new Terminal("_Empty", TokenCategory.Outline);
    public readonly static Terminal Error = new Terminal("_Error", TokenCategory.Error);
    // The following terminals are used in indent-sensitive languages like Python 
    public readonly static Terminal NewLine = new Terminal("_LF", TokenCategory.Outline);
    public readonly static Terminal Indent = new Terminal("_Indent", TokenCategory.Outline);
    public readonly static Terminal Dedent = new Terminal("_Dedent", TokenCategory.Outline);

    // Special terminal for ReservedWord token produced by IdentifierTerminal when lexeme matches one of reserved words. 
    // It is sometimes(not always) necessary to distinguish reserved words from free identifiers in the input stream.
    public readonly static Terminal ReservedWord = new Terminal("ReservedWord", TokenMatchMode.ByValue);
    
    #endregion


    #region Optimization 
    //Not sure if this is useful ----------------------------------------------
    public void FlattenNestedORs() {
      NonTerminalList listInProgress = new NonTerminalList();
      FlattenNestedORs(this.Root, listInProgress);
    }
    private void FlattenNestedORs(NonTerminal nonTerminal, NonTerminalList listInProgress) {
    
      if (nonTerminal == null)
        return;
      if (listInProgress.Contains(nonTerminal))
        return; //prevent endless looping due to recurrency
      listInProgress.Add(nonTerminal);
      //process all children
      foreach (IBnfExpression  ichild in nonTerminal.Expression.Operands)
        FlattenNestedORs(ichild as NonTerminal, listInProgress);
      listInProgress.Remove(nonTerminal);
      //See if we need to process this NonTerminal
      if (nonTerminal.Expression.ExpressionType != BnfExpressionType.Alternative)
        return;

      //Go through children and see if we can embed grandchildren directly 
      BnfExpressionList args = nonTerminal.Expression.Operands;
      for (int i = 0; i < args.Count; i++) {
        NonTerminal arg = args[i] as NonTerminal;
        if (arg == null || arg == nonTerminal  //avoid infinite loops 
          || arg.Expression.ExpressionType != BnfExpressionType.Alternative
          || arg.Expression.Operands.Count == 0)
          continue;
        //Insert child's alternatives directly here
        args.RemoveAt(i);
        args.InsertRange(i, arg.Expression.Operands);
      }//for i
       
    }//method

    #endregion
        
  }//class
  #endregion


}//namespace

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License

Share

About the Author

Roman Ivantsov
Architect Pulsar Informatics, Inc
United States United States
No Biography provided

| Advertise | Privacy | Mobile
Web04 | 2.8.140926.1 | Last Updated 4 Jan 2008
Article Copyright 2008 by Roman Ivantsov
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid