Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version
Go to top

Parsing Expression Grammar Support for C# 3.0 Part 1 - PEG Lib and Parser Generator

, 7 Oct 2008
Introduction to the parsing method PEG with library and parser generator
PEG_GrammarExplorer.zip
PEG_GrammarExplorer
Documents
PEG_GrammarExplorer_fromCP-Dateien
PEG_GrammerExplorer.jpg
mssccprj.scc
PEG Explorer
mssccprj.scc
PEG Explorer.csproj.user
Properties
Settings.settings
vssver2.scc
vssver2.scc
PegBase
mssccprj.scc
Properties
vssver2.scc
vssver2.scc
PegSamples
BasicEncodingRules
input
CDURKR2URKR125195
DefiniteLengthForm
CDURKR2URKR125195
hello
TDAUTPTEUR0100011.tap3
vssver2.scc
hello
IndefiniteLengthForm
DefiniteLengthForm
TDAUTPTEUR0100011.tap3
TDAUTPTEUR0100011.tap3
TDAUTPTEUR0100011.tap3
TDAUTPTEUR0100011_withError.tap3
vssver2.scc
vssver2.scc
calc0_direct
input
vssver2.scc
vssver2.scc
calc0_tree
input
vssver2.scc
vssver2.scc
CSharp3
docu
input
vssver2.scc
vssver2.scc
C_KernighanRitchie2
input
vssver2.scc
vssver2.scc
EMail
input
vssver2.scc
vssver2.scc
Json
input
vssver2.scc
peg_template
vssver2.scc
vssver2.scc
mssccprj.scc
PEG Samples.csproj.user
PegGenerator
input
C#
TestCases
C#
.cs
vssver2.scc
vssver2.scc
vssver2.scc
vssver2.scc
Properties
vssver2.scc
python_2_5_2
input
adwords
awapi_python_samples_1.0.0
src
decoratorators_01
vssver2.scc
Problems
Sample PEG Console Parser
PEG Console Parser
input
Properties
Sample PEG Console Parser.csproj.user
vssver2.scc
PEG_GrammarExplorer_Submission.zip
PEG_GrammarExplorer_Submission
PEG_GrammerExplorer.jpg
PEG_GrammerExplorer.zip
<<Grammar Name="python_2_5_2_i"  
          encoding_class="ascii" 
          reference="http://www.python.org/doc/2.5.2/ref/grammar.txt"
          comment="'readable', but not optimized, heavily backtracking version of python grammar"
>>
{
      bool init_() 
      { in_raw_mode= false;
        indentStack.Clear(); 
        indentStack.Push(0); 
        indentLength_=-1; 
        return true; 
      }
      #region raw string handling
      bool in_raw_mode;
      bool set_raw_(){in_raw_mode=true;return true;}
      bool unset_raw_(){in_raw_mode=false;return true;}
      bool in_raw_(){return in_raw_mode;}
      #endregion raw string handling
      #region implicit line joining
      bool in_impl_line_join_;
      bool in_implicit_line_joining_(){return in_impl_line_join_;}
      bool set_implicit_line_joining_(bool bOn,out bool prev)
      {prev=in_impl_line_join_;in_impl_line_join_=bOn;return true;}
      bool set_implicit_line_joining_(bool bOn)
      {in_impl_line_join_=bOn;return true;}
      #endregion implicit line joining
      string indent_;
      int indentLength_;//accounts for tabs
      int indentLengthToRestore_;
      System.Collections.Generic.Stack<int> indentStack= new System.Collections.Generic.Stack<int>();
      int NormalizedIndentLength()
      {
         int indentLength = 0;
         for (int i = 0; i < indent_.Length; ++i)
         {
            if (indent_[i] == '\t') indentLength += 8 - i % 8;
            else                    indentLength++;
         }
         return indentLength;
      }
      bool continue_after_dedent_()
      {
         indentLengthToRestore_= indentLength_;
         if( indentLength_== -1 ) return false;
         if (indentLength_ == indentStack.Peek()){
            indentLength_=-1;
            return true;
         }
         return false;
      }
      bool RESTOREINDENT_(){indentLength_= indentLengthToRestore_;return false;}
      bool keep_handler_()
      {
          if( indentLength_!=-1 ) return false;//indentation belongs to other block
          indentLength_= NormalizedIndentLength();
          bool kept= indentStack.Peek()== indentLength_;
          if( kept ) indentLength_=-1;
          return kept;
      }
      bool indent_handler_()
      {
          if( indentLength_!=-1 ) return false;//indentation belongs to other block
          int indentLength = NormalizedIndentLength();
          if (indentLength <= indentStack.Peek()) return false;
          else{
                indentStack.Push(indentLength);
                indentLength_=-1;
                return true;
          }
      }
      bool dedent_handler_()
      {
         System.Diagnostics.Debug.Assert(indentStack.Count>1);
         if (indentLength_ >= indentStack.Peek() || !indentStack.Contains(indentLength_)) return false;
         else{
            indentStack.Pop();
            if( indentStack.Count==1 ) indentLength_=-1;
            return true;
         }
      }
}
Line_join_sem_{
      bool prev_;
      Line_join_sem_(){set_implicit_line_joining_(true,out prev_);}
      ~Line_join_sem_(){set_implicit_line_joining_(prev_);}
}
[1] ^^interactive_input: (compound_stmt/stmt_list?)
                        ( NEWLINE / FATAL<"syntax error: line break expected">);
[2] ^^file_input:       init_ BLANKLINE* (NEWLINE / statement)* ENDMARKER
                        (!./FATAL<"wrong indent or non-python text here">);
[3] ^^eval_input:       init_ expression_list NEWLINE*  ENDMARKER;
[4] ^^input_input:      init_ expression_list NEWLINE  ENDMARKER;

[5] ^^atom:              identifier S / literal / enclosure;
[6]   enclosure:         generator_expression / dict_display
                       / string_conversion / yield_atom
                       / parenth_form / list_display;
[7]   literal :          (STRING S)+ / NUMBER S ;
[8]   parenth_form:      '(' parenth_form_content @')' S;
[9]   parenth_form_content 
using Line_join_sem_:    S expression_list?;
[10]^^list_display:      '[' list_display_content @']' S;
[11]  list_display_content
using Line_join_sem_:   S (list_comprehension / expression_list )?;		
[12]^^list_comprehension:expression list_for;

[13]^^list_for:          'for' S2 target_list @'in' S2 old_expression_list list_iter?;
[14]^^old_expression_list:
                         old_expression ((',' S old_expression)+ (','S)?)?; 
[15]^^list_iter:         list_for / list_if;
[16]^^list_if:           'if' S2 old_expression list_iter?;
[17]^^generator_expression: 
                         '(' generator_expression_content @')' S;
[18]generator_expression_content
using Line_join_sem_:    S expression genexpr_for;
[19]^^genexpr_for:       'for' S2 target_list @'in' S2 or_test genexpr_iter?;
[20]^^genexpr_iter:      genexpr_for / genexpr_if;
[21]^^genexpr_if:        'if' S2 old_expression genexpr_iter?;
[22]^^dict_display:      '{' dict_display_content @'}' S ;
[23] dict_display_content
using Line_join_sem_:    S key_datum_list?;
[24]^^key_datum_list:    key_datum (',' S key_datum)* (',' S)? ;
[25]^^key_datum:         expression @':' S expression;
[26]^^string_conversion: '`' S expression_list @'`' S;
[27]^^yield_atom:        '(' yield_atom_content @')' S;
[28]yield_atom_content
using Line_join_sem_:    S yield_expression;
[29]^^yield_expression:  'yield' S2 expression_list?;
[30] ^primary:           atom (attributeref /  slicing  / call_args)*;
[31]^^attributeref:      '.' S @identifier S;
[32]^^slicing:           '[' slicing_content @']' S;
[33]slicing_content
using Line_join_sem_:    S slice_list;
[34]^^slice_list:        slice_item (',' S slice_item)* (','S)?;
[35]^^slice_item:        ellipsis / slice / expression ;
[36]^^slice:             lower_bound? ':' S upper_bound? stride?;
[37]^^stride:            ':' S expression?;
[38]^^lower_bound:       expression;
[39]^^upper_bound:       expression;
[40]^^ellipsis:          '.' S '.' S '.' S; // or '...' ?
[41]^^call_args:         '(' call_args_content @')' S;
[42] call_args_content
using Line_join_sem_      :S (expression genexpr_for / argument_list (',' S)? )?;
[43]^^argument_list:     keyword_arguments seq_arg? dict_arg?
                       / positional_arguments ((',' S) keyword_arguments)? seq_arg? dict_arg?
                       / seq_arg dict_arg?
                       / dict_arg;
[44]^^seq_arg:           ',' S '*' S expression;
[45]^^dict_arg:          ',' S '**' S expression;
[46]^^positional_arguments: 
                         expression (',' S !keyword_item expression)*;
[47]^^keyword_arguments: keyword_item (',' S keyword_item)*;
[48]^^keyword_item:      identifier S '=' S expression;
[49] ^power:             primary ('**' S @u_expr)?;
[50] ^u_expr:            power / ^'-' S @u_expr / ^'+'  S @u_expr / ^'~' S @u_expr;
[51] ^m_expr:            u_expr (^('*'/'//'/'/'/'%') S u_expr)*;
[52] ^a_expr:            m_expr ( ^[+-] S  @m_expr)*;
[53] ^shift_expr:        a_expr (^('<<' / '>>' ) S @a_expr)*;
[54] ^and_expr:          shift_expr ('&' S @shift_expr)*;
[55] ^xor_expr:          and_expr ( '^' S @and_expr)*;
[56] ^or_expr:           xor_expr ( '|' S @xor_expr)*;
[57] ^comparison:        or_expr ( comp_operator S @or_expr )*;
[58]^^comp_operator:     '>=' / '<=' / '<>' / '<' / '>' / '==' / '!=' / 
                         is_operator / in_operator;
[59]^^is_operator:	 'is' S2 (not_symbol S2)? ;
[60]^^in_operator:	 (not_symbol S2)? 'in';	
[61]  expression:        conditional_expression / lambda_form;
[62]^^old_expression:    or_test / old_lambda_form;
[63]^^conditional_expression: 
                         or_test ('if' S2 @or_test @(^'else') S2 expression)?;
[64] ^or_test:           and_test ('or' S2 @and_test)*;
[65] ^and_test:          not_test ( 'and' S2 @not_test)*;
[66] ^not_test:          (not_symbol S2)* comparison;
[67]^^not_symbol:        'not';
[68]^^lambda_form:       'lambda' S2 parameter_list? @':' S expression;
[69]^^old_lambda_form:   'lambda' S2 parameter_list? @':' S old_expression;
[70]  expression_list:   expression ( ',' S expression )* (',' S)?;
[71]  simple_stmt:
                         assert_stmt
                       / pass_stmt
                       / del_stmt
                       / print_stmt
                       / return_stmt
                       / yield_stmt
                       / raise_stmt
                       / break_stmt
                       / continue_stmt
                       / import_stmt
                       / global_stmt
                       / exec_stmt
                       / augmented_assignment_stmt
                       / assignment_stmt
                       / expression_stmt;
[72]^^expression_stmt:   expression_list;
[73]^^assert_stmt:       'assert' S2 @expression (',' S @expression)?;
[74]^^assignment_stmt:   (target_list '='!'=' S)+ @(expression_list / yield_expression);
[75]^^target_list:       target (',' S target)* (',' S)?;
  
[76]^^target:            '(' target_contents ')' S
                       / '[' target_contents ']' S
                       / assignable_primary;
[77]target_contents
using Line_join_sem_:    S target_list;

[78]^^assignable_primary:
                         (identifier S/ enclosure !targetlist_end)    
                         (attributeref /  slicing  / call_args !targetlist_end )*;
[79]^^targetlist_end:    S ([,=] S / 'in' S2);

[80]^^augmented_assignment_stmt: 
                         target augop S @(expression_list / yield_expression);
  
[81]^^augop: 	         '+=' / '-=' / '*=' / '/=' / '%=' / '**='
                       / '>>=' / '<<=' / '&=' / '^=' / '/=' / '//=';	// '//=' added by me

[82]^^pass_stmt:         'pass' S2;
[83]^^del_stmt:          'del' S2 @target_list;
[84]^^print_stmt:        'print' S2 (  '>>' S expression (',' S expression)+ (',' S)?
                          / expression (',' S expression)* (',' S)?
                                   )?;
[85]^^return_stmt:       'return' S2 expression_list?;
[86]^^yield_stmt:        yield_expression;
[87]^^raise_stmt:        'raise' S2 (expression (',' S expression (',' S expression)?)?)?;
[88]^^break_stmt:        'break' S2;
[89]^^continue_stmt:     'continue' S2;
[90]^^import_stmt:       import_name / import_from;
[91]^^import_name:       'import' S2 module import_as? ( ',' S module import_as? )*;
[92]^^import_from:       'from' S2 module @'import' S '*' S
                       / 'from' S2 relative_module @'import' S2  
                                   (imports_in_parens / imports_list);
[93]^^import_as:         'as' S2 @name;
[94]^^imports_in_parens: '(' imports_in_parens_content @')' S;
[95] imports_in_parens_content:
                         S2 identifier  S import_as? rest_import_list (',' S)?;
[96]^^imports_list:      identifier S import_as? rest_import_list;
[97]^^rest_import_list:  (',' S identifier S import_as? )*;
[98]^^module:            (identifier S '.' S)* identifier S;
[99]^^relative_module:   ('.' S)* module / ('.' S)+;
  
[100]^^name:             identifier S;
[101]^^global_stmt:      'global' S2 @identifier S (',' S @identifier S)*;
[102]^^exec_stmt:        'exec' S2 or_expr ('in' S2 expression (',' S expression)?)?;
[103]^^compound_stmt:    if_stmt / while_stmt / for_stmt / try_stmt / with_stmt 
                       / funcdef / classdef;
[104]^^suite:            stmt_list @(NEWLINE/ENDMARKER) / 
                         NEWLINE INDENT @statement (KEEPINDENT @statement)* DEDENT;
[105]  statement:        compound_stmt / stmt_list (NEWLINE/ENDMARKER);
[106]  stmt_list:        simple_stmt (';' S simple_stmt)* (';' S)? ;
[107]^^if_stmt:          'if' S2 @expression @':' S suite
                         (KEEPINDENT ^'elif'  S2 @expression @':' S suite / RESTOREINDENT_ )*
                         (KEEPINDENT ^'else' S @':' S suite / RESTOREINDENT_ )?;
[108]^^while_stmt:        'while' S2 @expression @':' S suite
                         (KEEPINDENT 'else' S @':' S suite / RESTOREINDENT_ )?;
[109]^^for_stmt:          'for' S2 target_list @'in' S2 expression_list
                          @':' S suite (KEEPINDENT 'else' S @':' S suite / RESTOREINDENT_ )?;
[110]^^try_stmt:          'try' S @':' S suite ( except_handler / finally_only_handler );
[111]^^finally_only_handler: 
                         KEEPINDENT 'finally' S @':' S suite / RESTOREINDENT_ ;
[112]^^except_handler:   (KEEPINDENT ^'except' S2 (expression (',' S target)?)? @':' S suite / RESTOREINDENT_ )+
                         (KEEPINDENT ^'else' S @':' S suite / RESTOREINDENT_ )? finally_only_handler?;
[113]^^with_stmt:        'with' S2 expression ('as' S2 @target)? @':' S suite;
[114]^^funcdef:          decorators? 'def' S2 @funcname S '(' parameter_list_in @')' S @':' S suite;
[115] parameter_list_in
using Line_join_sem_:    S parameter_list?;
[116]^^decorators:       decorator+;
[117]^^decorator:        '@' S dotted_name ('(' decorater_in @')' S)? NEWLINE 
                         (KEEPINDENT/FATAL<"decorator continuation incorrectly indented">);
[118] decorater_in
using Line_join_sem_:    S (argument_list (','S)?)?;
[119]^^dotted_name:      identifier S('.' S identifier S)*;

[120]^^parameter_list:   defparameter  (',' S defparameter )* (',' S seq_dict_pars?)? / seq_dict_pars;
[121]^^seq_dict_pars:    ('*' S identifier S (',' S '**' S identifier S)? / '**' S identifier S);
[122]^^defparameter:     parameter ('=' S @expression)?;
[123]^^sublist:          parameter (',' S parameter)* ','?;
[124]^^parameter:        identifier S / '('  parameter_in  @')' S;
[125] parameter_in:      S @sublist;
[126]^^funcname:         identifier;
[127]^^classdef:         'class' S classname S inheritance? ':' S suite;
[128]^^inheritance
using Line_join_sem_:    '('  S expression_list?  @')' S;
[129]^^classname:        identifier;

//token definitions added for PEG following the docs at python.org
[130]  S:                ( [ \t\v\f]+ /
                           COMMENT     / 
                           '\\' NL  / 
                          NL in_implicit_line_joining_
                         )* ;
[131]S2:                 ![A-Za-z_0-9] S;
[132]^^identifier:       !(KEYWORD S2) [A-Za-z_][A-Za-z0-9_]*;
[133]INDENT:             S:indent_ (indent_handler_/FATAL<"indentation error">);
[134]KEEPINDENT:         continue_after_dedent_ / S:indent_ keep_handler_;
[135]DEDENT:             S dedent_handler_/FATAL<"indentation error">;
[136]NEWLINE:            NL BLANKLINE*;
[137]NL:                 '\r'?'\n' / '\n' / '\r' ;
[138]BLANKLINE:          [ \t\v\f]* COMMENT? NL/ [ \t\v\f]+ COMMENT? !. / COMMENT !. ;
[139]NAME:               !(KEYWORD S2) [A-Za-z_][A-Za-z0-9_]* S;
[140]IDENT:              [A-Za-z_][A-Za-z0-9_]*;
[141]KEYWORD: 
                         ('and' /      'del' /       'from' /      'not' /       'while' /    
                          'as' /        'elif' /      'global' /    'or' /        'with' /     
                          'assert' /    'else' /      'if' /        'pass' /      'yield' /    
                          'break' /     'except' /    'import' /    'print' /  
                          'class' /     'exec' /      'in' /        'raise' /  
                          'continue' /  'finally' /   'is' /        'return' / 
                          'def' /       'for' /       'lambda' /    'try') ;
[142]COMMENT:            '#' (!NL.)*;
[143]ENDMARKER:          !./WARNING<"file end expected">;
[144]NUMBER:           imagnumber / floatnumber / integer ^([lL]?) ;
[145]STRING:           stringprefix? (longstring/shortstring) unset_raw_;
[146]^^integer:          [1-9][0-9]* / '0'[xX] [0-9a-fA-F]+ / '0'[0-7]*;
[147]^^floatnumber:      pointfloat exponent? / [0-9]+ exponent;
[148]pointfloat:         [0-9]* '.' [0-9]+ / [0-9]+ '.';
[149]exponent:           [eE][+-]?[0-9]+;
[150]^^imagnumber:       (floatnumber / [0-9]+) [jJ];
[151]  stringprefix:     raw_indicating_stringprefix / unicodeonly_stringprefix;
[152]^^raw_indicating_stringprefix: 
                         ('r' /   'ur' /  'R' /   'UR' /  'Ur' /  'uR') set_raw_;
[153]^^unicodeonly_stringprefix:   
                         'u' /  'U' ;
[154]shortstring:        '"' shortstringitem1_content 
                         ('"' / FATAL<"string not terminated before input end">) / 
                         ['] shortstringitem2_content 
                         ([']/ FATAL<"string not terminated before input end">);
[155]^^shortstringitem1_content: 
                         (!'"' shortstringitem)*;
[156]^^shortstringitem2_content: 
                         (!['] shortstringitem)*;
longstring:              '"""' longstring1_content 
                         ('"""' / FATAL<"triple quoted string not terminated">) 
                       / ['][']['] longstring2_content 
                         (['][']['] / FATAL<"triple quoted string not terminated">);
[157]^^longstring1_content: (!'"""' longstringitem)* ;
[158]^^longstring2_content: (!(['][']['])longstringitem)* ;
[159]shortstringitem:    escapeseq 
                       / '\\' NL
                       / '\\' check_unrecognized_escape  . 
                       / [^\n] 
                       / FATAL<"new line in string not allowed">;
[160]longstringitem:     escapeseq / '\\' check_unrecognized_escape . /  .  ;
[161]check_unrecognized_escape: 
                         !in_raw_ WARNING<"unrecognized escape"> / &.;
[162]escapeseq:          in_raw_ '\\' ( unicode_4digits  / unicode_8digits ) 
                       / !in_raw_ '\\' 
                         ( [\\'"abfnrtv]
                         / 'N{' NAME '}' 
                         / unicode_4digits 
                         / unicode_8digits 
                         / [0-8]{1,3}
                         / hex_2digits );
[163]^^unicode_4digits:  'u'([0-9A-Fa-f]{4}/WARNING<"u must be followed by 4 hex digits">);
[164]^^unicode_8digits:  'U'([0-9A-Fa-f]{8}/WARNING<"U must be followed by 8 hex digits">);
[165]^^hex_2digits:      'x'([0-9A-Fa-f]{2} /WARNING<"x must be followed by 2 hex digits">);
set_explicit_line_joining_:;


<</Grammar>>

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Martin.Holzherr

Switzerland Switzerland
No Biography provided

| Advertise | Privacy | Mobile
Web02 | 2.8.140926.1 | Last Updated 7 Oct 2008
Article Copyright 2008 by Martin.Holzherr
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid