Click here to Skip to main content
15,886,519 members
Articles / Programming Languages / C#

Irony - .NET Compiler Construction Kit

Rate me:
Please Sign up or sign in to vote.
4.97/5 (86 votes)
4 Jan 2008MIT19 min read 294.3K   3.2K   201  
Introduction to Irony - a new technology of parser/compiler construction for .NET.
#region License
/* **********************************************************************************
 * Copyright (c) Roman Ivantsov
 * This source code is subject to terms and conditions of the MIT License
 * for Irony. A copy of the license can be found in the License.txt file
 * at the root of this distribution. 
 * By using this source code in any fashion, you are agreeing to be bound by the terms of the 
 * MIT License.
 * You must not remove this notice from this software.
 * **********************************************************************************/
#endregion

using System;
using System.Collections.Generic;
using System.Text;
using Irony.Compiler;

namespace Irony.Samples.Ruby {

  // Note that this is a sample grammar, not real Ruby production-quality grammar. 
  // It is loosely based on original grammar from Matz whose purpose (as it seems) was to explain language syntax to programmers, 
  // not for directly building parsers.  
  // See Grammar Errors tab in GrammarExplorer for remaining conflicts.
  // Any help in building real LALR(1) grammar will be highly appreciated. 

  public class RubyGrammar : Grammar {

    public RubyGrammar() {

      #region Terminals
      //String Literals with single and double-quote start/end symbols
      Terminal StringLiteralDQ = new StringLiteral("StringLiteralDQ");
      Terminal StringLiteralSQ = new StringLiteral("StringLiteralSQ", "'", "'"); //-- implement me!
      Terminal HereDoc = new Terminal("HereDoc"); //-- implement me!
      Terminal RegExLiteral = new Terminal("RegExLiteral"); //-- implement me!
      IdentifierTerminal IDENTIFIER = new IdentifierTerminal("identifier", "_!?", "_$@");
      //                                                name     extraChars      extraFirstChars 
      //we need to isolate reserved words to avoid ambiguities in grammar 
      IDENTIFIER.AddReservedWords("do", "end", "def", "class", 
                                       "if", "case", "return", "yield", "while", "until");  //and some others...
      Terminal Number = new NumberTerminal("Number");
      Terminal Comment = new CommentTerminal("Comment", "#", "", "");
      ExtraTerminals.Add(Comment); //add comment explicitly to this list as it is not reachable from Root

      //some conveniency variables
      Terminal Pipe = Symbol("|");
      Terminal dot = Symbol(".");
      Terminal comma = Symbol(",");
      #endregion

      #region NonTerminals 
      //NT variables names match element names in original grammar 
      NonTerminal PROGRAM = new NonTerminal("PROGRAM");
      NonTerminal COMPSTMT = new NonTerminal("COMPSTMT");
      NonTerminal STMT = new NonTerminal("STMT");
      NonTerminal BLOCK = new NonTerminal("BLOCK");
      NonTerminal EXPR = new NonTerminal("EXPR");
      //NonTerminal CALL = new NonTerminal("CALL");
      NonTerminal COMMAND = new NonTerminal("COMMAND");
      NonTerminal FUNCTION = new NonTerminal("FUNCTION");
      NonTerminal ARG = EXPR;// new NonTerminal("ARG");
      NonTerminal PRIMARY = new NonTerminal("PRIMARY");
      NonTerminal WHEN_ARGS = new NonTerminal("WHEN_ARGS");
      NonTerminal THEN = new NonTerminal("THEN");
      NonTerminal BLOCK_BEGIN = new NonTerminal("BLOCK_BEGIN");
      NonTerminal BLOCK_END = new NonTerminal("BLOCK_END");
      NonTerminal BLOCK_VAR = new NonTerminal("BLOCK_VAR");
//      NonTerminal MLHS_ITEM = new NonTerminal("MLHS_ITEM");
      NonTerminal LHS = new NonTerminal("LHS");
      NonTerminal MRHS = new NonTerminal("MRHS");
//      NonTerminal MLHS = MRHS; // new NonTerminal("MLHS");
      NonTerminal CALL_ARGS = new NonTerminal("CALL_ARGS");
      NonTerminal CALL_ARGS_P = new NonTerminal("CALL_ARGS_P");
      NonTerminal AMP_ARG = new NonTerminal("AMP_ARG");
      NonTerminal STAR_ARG = new NonTerminal("STAR_ARG");
      NonTerminal ARGS = new NonTerminal("ARGS");
      NonTerminal ARGDECL = new NonTerminal("ARGDECL");
      NonTerminal ARGLIST = new NonTerminal("ARGLIST");
//      NonTerminal SINGLETON = new NonTerminal("SINGLETON");
      NonTerminal ASSOCS = new NonTerminal("ASSOCS");
      NonTerminal ASSOC = new NonTerminal("ASSOC");
//      NonTerminal VARIABLE = new NonTerminal("VARIABLE");  --merged into IDENTIFIER
      NonTerminal LITERAL = new NonTerminal("LITERAL");
      NonTerminal TERM = new NonTerminal("TERM");
      NonTerminal DO = new NonTerminal("DO");
//      NonTerminal VARNAME = new NonTerminal("VARNAME");   // note 1
      NonTerminal GLOBAL = new NonTerminal("GLOBAL");
      NonTerminal RETURN_STMT = new NonTerminal("RETURN_STMT");
      NonTerminal YIELD_STMT = new NonTerminal("YIELD_STMT");
      NonTerminal DEFINEDQ_STMT = new NonTerminal("DEFINEDQ_STMT");
      NonTerminal FUNCTION_STMT = new NonTerminal("FUNCTION_STMT");
      NonTerminal IF_STMT = new NonTerminal("IF_STMT");
      NonTerminal UNLESS_STMT = new NonTerminal("UNLESS_STMT");
      NonTerminal WHILE_STMT = new NonTerminal("WHILE_STMT");
      NonTerminal UNTIL_STMT = new NonTerminal("UNTIL_STMT");
      NonTerminal CASE_STMT = new NonTerminal("CASE_STMT");
      NonTerminal FOR_STMT = new NonTerminal("FOR_STMT");
      NonTerminal BLOCK_STMT = new NonTerminal("BLOCK_STMT");
      NonTerminal CLASS_DEF = new NonTerminal("CLASS_DEF");
      NonTerminal BASE_REF = new NonTerminal("BASE_REF");
      NonTerminal MODULE = new NonTerminal("MODULE_STMT");
      NonTerminal DEFFUNC_STMT = new NonTerminal("DEFFUNC_STMT");
      NonTerminal DEFSING_STMT = new NonTerminal("DEFSING_STMT");
      NonTerminal SINGLETON = new NonTerminal("SINGLETON");
      NonTerminal END = new NonTerminal("end");

      NonTerminal SYMBOL = new NonTerminal("SYMBOL");
      //Not in original grammar
      NonTerminal FNAME = new NonTerminal("FNAME");
      BLOCK_BEGIN.Expression = Symbol("do") | "{";
      BLOCK_END.Expression = Symbol("end") | "}";
      NonTerminal OPERATION = new NonTerminal("OPERATION");
      //      Terminal VARNAME = IDENTIFIER;
      NonTerminal AUG_ASGN = new NonTerminal("AUG_ASGN");
      NonTerminal BINOP = new NonTerminal("BINOP");
      NonTerminal UNOP = new NonTerminal("UNOP");
      NonTerminal DELIM = new NonTerminal("DELIM");

      #endregion

      #region Rules 
      //Set grammar root
      this.Root = PROGRAM;

      //PROGRAM         : COMPSTMT
      PROGRAM.Expression = COMPSTMT; // +Grammar.Eof;
      //COMPSTMT        : STMT (TERM EXPR)* [TERM] 
      COMPSTMT.Expression = NewLine.Q() + STMT.Plus(TERM) + TERM.Q();   

      /* STMT   : CALL do [`|' [BLOCK_VAR] `|'] COMPSTMT end
                | undef FNAME
                | alias FNAME FNAME
                | STMT if EXPR
                | STMT while EXPR
                | STMT unless EXPR
                | STMT until EXPR
                | `BEGIN' `{' COMPSTMT `}'
                | `"end"' `{' COMPSTMT `}'
                | LHS `=' COMMAND [do [`|' [BLOCK_VAR] `|'] COMPSTMT end]
                | EXPR    */
      STMT.Expression = FUNCTION
                      | COMMAND + BLOCK.Q()  
                      | "undef" + FNAME | "alias" + FNAME + FNAME 
                      | STMT + (Symbol("if")|"while"|"unless"|"until") + EXPR 
                      | Symbol("BEGIN") + "{" + COMPSTMT + "}"  
                     // | Symbol("end") + BLOCK_BEGIN + COMPSTMT + BLOCK_END    // don't quite get it
                   //   | LHS + "=" + COMMAND + BLOCK.Q()
                      | LHS + "=" + EXPR  //changed this 
                      | LHS + AUG_ASGN + EXPR
                      | EXPR; 
      BLOCK.Expression = "do" + WithQ(Pipe + BLOCK_VAR.Q() + Pipe) + COMPSTMT + "end";

      /* EXPR   : MLHS `=' MRHS
                | return CALL_ARGS
                | yield CALL_ARGS
                | EXPR and EXPR
                | EXPR or EXPR
                | not EXPR
                | COMMAND
                | `!' COMMAND
                | ARG   */
      //this one is completely changed, for better or worse...
      EXPR.Expression = //  MRHS + "=" + EXPR | //changed to EXPR   
                     //  LHS + "=" + EXPR  //changed this 
                    // | LHS + AUG_ASGN + EXPR
                      EXPR + BINOP + EXPR   
                     | UNOP + EXPR
                     //| "(" + EXPR + ")"
                      | EXPR + "?" + EXPR + ":" + EXPR   //added this to cover "?" operator
                     | "defined?" + ARG
                     | PRIMARY
                      ;
      ARG = EXPR;
      // CALL   : FUNCTION | COMMAND
     // CALL.Expression = FUNCTION | COMMAND; //expression embedded directly into STMT

      /* COMMAND         : OPERATION CALL_ARGS
                | PRIMARY `.' OPERATION CALL_ARGS
                | PRIMARY `::' OPERATION CALL_ARGS
                | super CALL_ARGS   */
      COMMAND.Expression =  OPERATION + CALL_ARGS
                         | PRIMARY + DELIM + OPERATION + CALL_ARGS
                         | "super" + CALL_ARGS;
      OPERATION.Expression =  IDENTIFIER;
      DELIM.Expression = dot | "::";

      /* FUNCTION   : OPERATION [`(' [CALL_ARGS] `)']
                | PRIMARY `.' OPERATION `(' [CALL_ARGS] `)'
                | PRIMARY `::' OPERATION `(' [CALL_ARGS] `)'
                | PRIMARY `.' OPERATION
                | PRIMARY `::' OPERATION
                | super `(' [CALL_ARGS] `)'
                | super  */
      FUNCTION.Expression = OPERATION + CALL_ARGS_P
                   | PRIMARY + DELIM + OPERATION + CALL_ARGS_P.Q() 
                   | "super" + CALL_ARGS_P;
      CALL_ARGS_P.Expression = "(" + CALL_ARGS.Q() + ")";        
      /*  ARG   : LHS `=' ARG
                | LHS OP_ASGN ARG
                | ARG `..' ARG
                | ARG `...' ARG
                | ARG `+' ARG
                | ARG `-' ARG
                | ARG `*' ARG
                | ARG `/' ARG
                | ARG `%' ARG
                | ARG `**' ARG
                | `+' ARG
                | `-' ARG
                | ARG `|' ARG
                | ARG `^' ARG
                | ARG `&' ARG
                | ARG `<=>' ARG
                | ARG `>' ARG
                | ARG `>=' ARG
                | ARG `<' ARG
                | ARG `<=' ARG
                | ARG `==' ARG
                | ARG `===' ARG
                | ARG `!=' ARG
                | ARG `=~' ARG
                | ARG `!~' ARG
                | `!' ARG
                | `~' ARG
                | ARG `<<' ARG
                | ARG `>>' ARG
                | ARG `&&' ARG
                | ARG `||' ARG
                | defined? ARG
                | PRIMARY   */

    /*  ARG.Expression = LHS + "=" + EXPR  //changed this 
                     | LHS + AUG_ASGN + EXPR   
                     | ARG + BINOP + ARG  //moved to EXPR 
                     | UNOP + ARG
                     | "defined?" + ARG
                     | PRIMARY
                     ; */
      AUG_ASGN.Expression = Symbol("+=") | "-=" | "*=" | "/=" | "%=" | "**=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "&&=" | "||=";

      BINOP.Expression = Symbol("..") | "..." | "+" | "-" | "*" | "/" | "%" | "**" | "|" | "^" | "&"
                         | "<=>" | ">" | ">=" | "<" | "<=" | "==" | "===" | "!=" | "=~" | "!~" | "<<" | ">>" | "&&" | "||"
                         | "and" | "or";  //added these two here
      UNOP.Expression = Symbol("+") | "-" | "!" | "~";
       /*PRIMARY:    */
      /*        `(' COMPSTMT `)'
                | LITERAL
                | VARIABLE
                | PRIMARY `::' IDENTIFIER
                | `::' IDENTIFIER
                | PRIMARY `[' [ARGS] `]'  
                | `[' [ARGS [`,']] `]'
                | `{' [(ARGS|ASSOCS) [`,']] `}'
                | return [`(' [CALL_ARGS] `)']
                | yield [`(' [CALL_ARGS] `)']
                | defined? `(' ARG `)' 
                | FUNCTION
                | FUNCTION `{' [`|' [BLOCK_VAR] `|'] COMPSTMT `}'     
                | if EXPR THEN
                  COMPSTMT
                  (elsif EXPR THEN COMPSTMT)*
                  [else COMPSTMT]
                  end
                | unless EXPR THEN
                  COMPSTMT
                  [else COMPSTMT]
                  end
                | while EXPR DO COMPSTMT end
                | until EXPR DO COMPSTMT end
                | case COMPSTMT
                  (when WHEN_ARGS THEN COMPSTMT)+
                  [else COMPSTMT]
                  end
                | for BLOCK_VAR in EXPR DO
                  COMPSTMT
                  end
                | begin
                  COMPSTMT
                  [rescue [ARGS] DO COMPSTMT]+
                  [else COMPSTMT]
                  [ensure COMPSTMT]
                  end
                | class IDENTIFIER [`<' IDENTIFIER]
                  COMPSTMT
                  end"=
                | module IDENTIFIER
                  COMPSTMT
                  end
                | def FNAME ARGDECL
                  COMPSTMT
                  end
                | def SINGLETON (`.'|`::') FNAME ARGDECL
                  COMPSTMT
                  end */
      PRIMARY.Expression = 
       // "(" + COMPSTMT + ")" |   //-- removed this to fix ambiguity
        LITERAL 
        | LHS  //note 1.
        | "[" + WithQ(ARGS + comma.Q()) + "]"
        | "{" + WithQ( (ARGS|ASSOC) + comma.Q() ) + "}"
        | RETURN_STMT | YIELD_STMT | DEFINEDQ_STMT | FUNCTION_STMT | IF_STMT | UNLESS_STMT | WHILE_STMT 
        | UNTIL_STMT | CASE_STMT | FOR_STMT | BLOCK_STMT | CLASS_DEF | MODULE | DEFFUNC_STMT | DEFSING_STMT;
         // LHS.Expression = VARIABLE | PRIMARY + "[" + ARGS.Q() + "]" | PRIMARY + "." + IDENTIFIER;                           

      RETURN_STMT.Expression = "return" + EXPR;// CALL_ARGS_P.Q(); //changed this
      YIELD_STMT.Expression = "yield" + CALL_ARGS_P.Q();
      DEFINEDQ_STMT.Expression = Symbol("defined?") + "(" + ARG + ")";
      FUNCTION_STMT.Expression = FUNCTION + WithQ("{" + WithQ("|" + BLOCK_VAR.Q() + "|") + COMPSTMT + "}");
      IF_STMT.Expression = "if" + EXPR + THEN + COMPSTMT + WithStar("elsif" + EXPR + THEN + COMPSTMT) + WithQ("else" + COMPSTMT) + END;
      UNLESS_STMT.Expression = "unless" + EXPR + THEN + COMPSTMT + "else" + COMPSTMT + END;
      WHILE_STMT.Expression = "while" + EXPR + DO + COMPSTMT + END;
      UNTIL_STMT.Expression = "until" + EXPR + DO + COMPSTMT + END;
      CASE_STMT.Expression = "case" + COMPSTMT + WithPlus("when" + WHEN_ARGS + THEN + COMPSTMT) 
                                 + WithQ("else" + COMPSTMT) + END;
      FOR_STMT.Expression = "for" + BLOCK_VAR + "in" + EXPR + DO + COMPSTMT + END;
      BLOCK_STMT.Expression = "begin" + COMPSTMT + WithPlus("rescue" + ARGS.Q() + DO + COMPSTMT) 
                                 + WithQ("else" + COMPSTMT) + WithQ("ensure" + COMPSTMT) + END;
      CLASS_DEF.Expression = "class" + IDENTIFIER + BASE_REF.Q() + COMPSTMT + END;
      BASE_REF.Expression = "<" + IDENTIFIER; 
      MODULE.Expression = "module" + IDENTIFIER + COMPSTMT + END;
      DEFFUNC_STMT.Expression = "def" + FNAME + ARGDECL.Q() + COMPSTMT + END;
      DEFSING_STMT.Expression = "def" + SINGLETON + (dot|"::") + FNAME + ARGDECL.Q() + COMPSTMT + END;
      END.Expression = "end"; // TERM.Q() + "end";
      //  SINGLETON : VARIABLE | `(' EXPR `)' 
      SINGLETON.Expression = IDENTIFIER | "(" + EXPR + ")"; 
      // WHEN_ARGS       : ARGS [`,' `*' ARG]  | `*' ARG
      WHEN_ARGS.Expression = ARGS + WithQ(comma + "*" + ARG) | "*" + ARG;
      // THEN   : TERM | then | TERM then
      THEN.Expression = TERM | "then" | TERM + "then";
      // DO     : TERM | do | TERM do
      DO.Expression = TERM | "do" | TERM + "do";
      //  BLOCK_VAR       : LHS | MLHS
//      BLOCK_VAR.Expression = LHS | MLHS;   // -- ambiguous, changing to the following:
      BLOCK_VAR.Expression = IDENTIFIER | "(" + IDENTIFIER.Plus(comma) + ")";
      //  MLHS  : MLHS_ITEM `,' [MLHS_ITEM (`,' MLHS_ITEM)*] [`*' [LHS]]  | `*' LHS   
//      MLHS.Expression = MLHS_ITEM.Plus(",") + WithQ("*" + LHS.Q()) | "*" + LHS;  --ambiguous
      //MLHS.Expression = PRIMARY.Plus(",") + WithQ("*" + LHS.Q()) | "*" + LHS;
      //  MLHS_ITEM  : LHS | '(' MLHS ')'  
      //MLHS_ITEM.Expression = LHS | "(" + MLHS + ")";  //--ambiguous!!! using PRIMARY
      //MLHS_ITEM = PRIMARY;

      /* LHS    : VARIABLE
                | PRIMARY `[' [ARGS] `]'
                | PRIMARY `.' IDENTIFIER  */
     // LHS.Expression = IDENTIFIER | PRIMARY + "[" + ARGS.Q() + "]" | PRIMARY + dot + IDENTIFIER;
      LHS.Expression = OPERATION 
                     | PRIMARY + "[" + ARGS.Q() + "]"
                     | "(" + EXPR + ")";
      //   MRHS : ARGS [`,' `*' ARG] | `*' ARG    
      MRHS.Expression = ARGS + WithQ(comma + "*" + ARG) | "*" + ARG;
      /* CALL_ARGS   : ARGS
                | ARGS [`,' ASSOCS] [`,' `*' ARG] [`,' `&' ARG]
                | ASSOCS [`,' `*' ARG] [`,' `&' ARG]
                | `*' ARG [`,' `&' ARG]
                | `&' ARG
                | COMMAND    */
      CALL_ARGS.Expression = // ARGS |  //removed this - it is covered by next expression
                             ARGS + WithQ(comma + ASSOCS) + STAR_ARG.Q() + AMP_ARG.Q()  
                           | ASSOCS + STAR_ARG.Q() + AMP_ARG.Q()
                           | "*" + ARG + AMP_ARG.Q()
                           | "&" + ARG
                           | COMMAND; 
      AMP_ARG.Expression = comma + "&" + ARG;
      STAR_ARG.Expression = comma + "*" + ARG;
      //  ARGS            : ARG (`,' ARG)*
      ARGS.Expression = ARG.Plus(comma);
      // ARGDECL         : `(' ARGLIST `)'  | ARGLIST TERM 
      ARGDECL.Expression = "(" + ARGLIST + ")" | ARGLIST + TERM;
      /*   ARGLIST         : IDENTIFIER(`,'IDENTIFIER)*[`,'`*'[IDENTIFIER]][`,'`&'IDENTIFIER]
                | `*'IDENTIFIER[`,'`&'IDENTIFIER]
                | [`&'IDENTIFIER]    */
      ARGLIST.Expression = IDENTIFIER.Plus(comma) + WithQ(comma + "*" + IDENTIFIER.Q()) + WithQ(comma + "&" + IDENTIFIER)
                           | "*" + IDENTIFIER + WithQ(comma + "&" + IDENTIFIER)
                           | "&" + IDENTIFIER; 
      // ASSOCS : ASSOC (`,' ASSOC)*
      ASSOCS.Expression = ASSOC.Plus(comma);
      //ASSOC : ARG `=>' ARG
      ASSOC.Expression = ARG + "=>" + ARG;
      //  VARIABLE : VARNAME | nil | self    -- variable is merged into IDENTIFIER
      //VARIABLE.Expression = IDENTIFIER | "nil" | "self";
      // LITERAL : numeric | SYMBOL | STRING | STRING2 | HERE_DOC | REGEXP
      LITERAL.Expression = Number | SYMBOL | StringLiteralDQ | StringLiteralSQ | HereDoc | RegExLiteral;
      SYMBOL.Expression = Symbol(":") + IDENTIFIER; // (FNAME | VARNAME); //note 1.
      /*  FNAME           : IDENTIFIER | `..' | `|' | `^' | `&'
                | `<=>' | `==' | `===' | `=~'
                | `>' | `>=' | `<' | `<='
                | `+' | `-' | `*' | `/' | `%' | `**'
                | `<<' | `>>' | `~'
                | `+@' | `-@' | `[]' | `[]='  */
      FNAME.Expression = IDENTIFIER | ".." | "|" | "^" | "&" | "<=>" | "==" | "===" | "=~"
                | ">" | ">=" | "<" | "<="  | "+" | "-" | "*" | "/" | "%" | "**"
                | "<<" | ">>" | "~" | "+@" | "-@" | "[]" | "[]=";
      // TERM : `;' | `\n'
      TERM.Expression = NewLine | ";";  //multiple newlines are compressed into one by scanner
      #endregion

      #region misc: Operators, TokenFilters, etc
      //Register operators - not sure if precedence is assigned correctly
      RegisterOperators(100, Associativity.Right, "**");
      RegisterOperators( 90, "<<", ">>");
      RegisterOperators( 80, "*", "/", "%");
      RegisterOperators( 70, "+", "-");
      RegisterOperators( 60, "&", "&&", "and");
      RegisterOperators( 50, "|", "||", "or", "^");
      RegisterOperators( 40, ">", ">=", "<", "<=", "?");  
      RegisterOperators( 30, "<=>" , "==" , "===" , "!=" , "=~" , "!~");
      RegisterOperators( 20, "..", "...");

      PunctuationSymbols.AddRange(new string[] { "(", ")", "," });

      TokenFilters.Add(new CodeOutlineFilter(false));
      #endregion


    }//constructor


  }//class
}//namespace

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License


Written By
Software Developer (Senior) Microsoft
United States United States
25 years of professional experience. .NET/c#, databases, security.
Currently Senior Security Engineer, Cloud Security, Microsoft

Comments and Discussions