Click here to Skip to main content
15,885,216 members
Articles / Containers / Virtual Machine

Parsing Expression Grammar Support for C# 3.0 Part 1 - PEG Lib and Parser Generator

Rate me:
Please Sign up or sign in to vote.
4.95/5 (49 votes)
7 Oct 2008CPOL40 min read 202.8K   2.1K   118  
Introduction to the parsing method PEG with library and parser generator
/*Author:Martin.Holzherr;Date:20080922;Context:"PEG Support for C#";Licence:CPOL
 * <<History>> 
 *  20080922;V1.0 created
 *  20080930;FIND_IN_SEMANTIC_BLOCK;corrected search in global semantic blocks: helper function <<PUtils.FindNode(<startnode>,int params[] ids)>> did not return closest node
 *  20081002;USING_BLOCK;added support for using in rules like'[9]   parenth_form_content  using Line_join_sem_: ...'
 * <</History>>
*/
using System;
using Peg.Base;
using System.IO;
using System.Diagnostics;
using System.Collections.Generic;
using System.Text;

namespace Peg.Samples
{
    enum EPegGeneratorNodes{FatalNode=1001,WarningNode=1002,EnumeratorNode=1003,
                                IntoVarWithContext=500,SemanticFunctionWithContext=501,
                                GenericCall=600};
    class PUtils
    {
        public static void Trim(StringBuilder s)
        {
            string sTrimChars = " \t\r\v\n";
            int i, j;
            for (i = 0; i < s.Length; ++i)
            {
                if (sTrimChars.IndexOf(s[i]) == -1) break;
            }
            for (j = s.Length; j > i; j--)
            {
                if (sTrimChars.IndexOf(s[j - 1]) == -1) break;
            }
            s.Remove(j, s.Length - j);
            s.Remove(0, i);
        }
        public static PegNode GetByPath(PegNode node, params int[] path)
        {
            for (int i = 0; i < path.Length; ++i)
            {
                if (node == null || node.id_ != path[i]) return null;
                if (i == path.Length - 1) return node; else node = node.child_;
            }
            return node;
        }
        
        public static PegNode FindNode(PegNode node, EPegGrammar id, int nodeDistance)
        {
            if (node == null || nodeDistance<=0) return null;
            if (node.id_ == (int)id) return node;
            PegNode foundNode = FindNode(node.child_, id, nodeDistance - 1);
            if (foundNode!=null) return foundNode;
            foundNode = FindNode(node.next_, id, nodeDistance - 1);
            if (foundNode != null) return foundNode;
            return null;
        }
        public static PegNode FindNode(PegNode node, EPegGrammar id)
        {
            return FindNode(node, id, 8);
        }
        public static PegNode FindNode(PegNode node, int nodeDistance, params EPegGrammar[] ids)/*FIND_IN_SEMANTIC_BLOCK*/
        {
            if (node == null || nodeDistance<=0) return null;
            foreach (EPegGrammar id in ids)
            {
                if (node.id_ == (int)id) return node;
            }
            PegNode foundNode = FindNode(node.child_, nodeDistance - 1,ids );
            if (foundNode!=null) return foundNode;
            foundNode = FindNode(node.next_, nodeDistance - 1,ids );
            if (foundNode != null) return foundNode;
            return null;
        }
        public static PegNode FindNode(PegNode node, params EPegGrammar[] ids)/*FIND_IN_SEMANTIC_BLOCK*/
        {
            return FindNode(node, 8,ids );
        }
        public static PegNode FindNodeInParents(PegNode node, EPegGrammar id)
        {
            for(; node!=null;node=node.parent_)
            {
                if( node.id_ == (int) id ) return node;
            }
            return null;
        }
        public static PegNode FindNodeInParents(PegNode node, params EPegGrammar[] ids)
        {
            foreach (EPegGrammar id in ids)
            {
                PegNode foundNode = FindNodeInParents(node, id);
                if (foundNode != null) return foundNode;
            }
            return null;
        }
        public static PegNode FindNodeNext(PegNode node, EPegGrammar id)
        {
            for (node = node.next_; node != null; node = node.next_)
            {
                if (node.id_ == (int)id) return node;
            }
            return null;
        }
        public static PegNode GetPegSpecification(PegNode root)
        {
            return root.child_.next_;
        }
        public static PegNode GetRuleFromRoot(PegNode root)
        {
            PegNode s= GetPegSpecification(root);
            return s.child_.next_.child_;
        }
       
        public static PegNode GetRuleId(PegNode rule, bool bMustExist)
        {
            Debug.Assert(		rule!=null
			&&	rule.id_== (int)EPegGrammar.peg_rule
            && rule.child_ != null && rule.child_.id_ == (int)EPegGrammar.lhs_of_rule);
            PegNode ruleId= rule.child_.child_;
	        if( bMustExist ){
                Debug.Assert(ruleId != null && ruleId.id_ == (int)EPegGrammar.rule_id); //either user defined or generated in preprocessing
                return ruleId;
	        }else{
                if (ruleId != null && ruleId.id_ == (int)EPegGrammar.rule_id)
                {
                    return ruleId;
		        }
		        return null;
	        }
        }
        public static string GetRuleNameFromRule(PegNode rule, string src)
        {
            PegNode ruleIdent = PUtils.FindNode(rule, EPegGrammar.rule_name, 8);
            string ruleName = ruleIdent.match_.GetAsString(src);
            return ruleName;
        }
        public static string GetRuleNameFromRuleRef(PegNode ruleRef,string src)
        {
            Debug.Assert(ruleRef.id_== (int)EPegGrammar.rule_ref );
            return ruleRef.match_.GetAsString(src);
        }
        public static string GetAsString(string src,PegNode n)
        {
            return n.match_.GetAsString(src);
        }
        public static void ReplaceNode( PegNode  toReplace, PegNode replacement)
        {	
            Debug.Assert(replacement.next_==null);
	        replacement.next_= toReplace.next_;
	        replacement.parent_= toReplace.parent_;
	        if( toReplace.parent_.child_==toReplace ){
		        toReplace.parent_.child_= replacement;
		        replacement.parent_= toReplace.parent_;
	        }else{
		        PegNode n;
		        for(n= toReplace.parent_.child_;
			        n.next_!=toReplace;
			        n= n.next_){
			    }
		        n.next_= replacement;
		        replacement.parent_= toReplace.parent_;
	        }
        }
        public static bool IsTemplateCall(PegNode ruleRef)
        {
	        PegNode n;
	        if( ruleRef.id_== (int)EPegGrammar.rule_ref){
                n= ruleRef.child_;
		        if( n!=null || n.id_!= (int) EPegGrammar.rule_name) return false;
		        return n.next_!=null && n.next_.id_== (int)EPegGrammar.rhs_of_rule;
	        }else return false;
        }
        public static string MakeFileName(string sFileTitle, params string[] directories)
        {
            string path="";
            foreach (string dir in directories)
            {
                path += dir;
                if (dir.Length>0 && dir[dir.Length - 1] != '\\') path += '\\';
            }
            return path + sFileTitle;
        }
        public static bool TreeOrAstPresent(PegNode n,out bool bIsTree,out bool bIsAst)
        {
	        bIsTree=false; bIsAst=false;
	        for( ;n!=null;n=n.next_){
		        switch(n.id_){
		        case (int)EPegGrammar.tree_symbol: bIsTree=true;return true;
                case (int)EPegGrammar.ast_symbol: bIsAst = true; return true;
		        default:break;
		        }
	        }
            return false;
        }
        public static PegNode GetRhs(PegNode rule)
        {	
            Debug.Assert( rule.id_== (int)EPegGrammar.peg_rule); 
	        PegNode n= rule.child_.next_;
	        Debug.Assert(n==null || n.id_== (int)EPegGrammar.rhs_of_rule);
	        return n;
        }
    }
    class NormalizeTree
    {
        #region Data Types
        delegate void NormalizeInRule(PegNode n);
        delegate bool RuleIsCandidate(PegNode rule);
        #endregion
        #region Data Members
        internal TreeContext c_;
        internal bool bOk_;
        HashSet<string> setRules_;
        #endregion Data Members
        #region Constructors
        internal NormalizeTree(TreeContext c, HashSet<string> setRules)
        {
            c_ = c;
            bOk_ = true;
            setRules_ = setRules;
            PegNode rule = PUtils.GetRuleFromRoot(c_.root_);
            PegNode specification = PUtils.GetPegSpecification(c_.root_);
            TranslateMandatoryToFatal(rule);
            InsertMissingRuleIds(rule);
            new TryFuseCharsets(this, rule);
            LinkIntoVariablesToSemanticBlocks(rule);
            new GenericParameters(this, rule);
            new SemanticFunctions(this, rule);
            //InsertMissingEnumerations(rule,specification);
        }
        #endregion
        PegNode NewNode(
                            EPegGrammar id,
                            PegBegEnd match,
                            PegNode child,
                            PegNode next)
        {
            PegNode n = new PegNode(null, (int)id);
            n.match_ = match;
            n.child_ = child;
            n.next_ = next;
            for (PegNode node = n.child_; node != null; node = node.next_) node.parent_ = n;
            return n;
        }
        PegNode NewNode(
                            EPegGrammar id,
                            PegBegEnd match)
        {
            return NewNode(id, match, null, null);
        }
        PegNode NewNode(
                            EPegGrammar id,
                            PegBegEnd match,
                            PegNode child)
        {
            return NewNode(id, match, child, null);
        }
        public class IdNode : PegGrammarParser.PGParserNode
        {
            internal IdNode(PegNode parent, int id, int ownId, PegBegEnd match, PegNode next)
                : base(parent, id)
            {
                ownId_ = ownId;
                base.match_ = match;
                next_ = next;
            }
            internal override string TreeNodeToString(string src)
            {
                return base.ToString() + ": " + ownId_.ToString();
            }
            public override string GetAsString(string s)
            {
                return ownId_.ToString();
            }
            public override PegNode Clone()
            {
                IdNode clone = new IdNode(parent_, id_, ownId_, match_, null);
                CloneSubTrees(clone);
                return clone;
            }
            public int ownId_;
        }
        public class Message : PegGrammarParser.PGParserNode
        {
            internal Message(PegNode parent, int id, string message, PegBegEnd match)
                : base(parent, id)
            {
                message_ = message;
                base.match_ = match;
            }
            internal override string TreeNodeToString(string src)
            {
                return message_;
            }
            public override PegNode Clone()
            {
                Message clone = new Message(parent_, id_, message_, match_);
                CloneSubTrees(clone);
                return clone;
            }
            public string message_;
        }
        public class TRepetition : PegGrammarParser.PGParserNode
        {
            internal TRepetition(int nLowerLimit, int nUpperLimit, EPegGrammar id, PegBegEnd begEnd)
                : base(null, (int)id)
            {
                base.match_ = begEnd;
                lower = nLowerLimit;
                upper = nUpperLimit;
            }
            internal override string TreeNodeToString(string src)
            {
                return "{" + lower.ToString() + ", " + upper.ToString() + "}";
            }
            public override PegNode Clone()
            {
                TRepetition clone = new TRepetition(lower, upper, (EPegGrammar)id_, match_);
                CloneSubTrees(clone);
                return clone;
            }
            public int lower;
            public int upper;
        }
        public class SemanticVarOrFuncWithContext : PegGrammarParser.PGParserNode
        {
            public SemanticVarOrFuncWithContext(int id, PegNode semBlock, PegNode variableOrFunc, PegNode intoVariableOrCall, bool isLocal)
                : base(intoVariableOrCall.parent_, id)
            {
                semBlock_ = semBlock;
                variableOrFunc_ = variableOrFunc;
                intoVariableOrCall_ = intoVariableOrCall;
                isLocal_ = isLocal;
            }
            internal override string TreeNodeToString(string src)
            {
                if (semBlock_.id_ == (int)EPegGrammar.named_semantic_block)
                {
                    return semBlock_.child_.GetAsString(src) + "::" + intoVariableOrCall_.GetAsString(src);
                }
                else
                {
                    Debug.Assert(semBlock_.id_ == (int)EPegGrammar.anonymous_semantic_block);
                    if (semBlock_.parent_.id_ == (int)EPegGrammar.toplevel_semantic_blocks)
                    {
                        return "::" + intoVariableOrCall_.GetAsString(src);
                    }
                    else
                    {
                        return "sem." + intoVariableOrCall_.GetAsString(src);
                    }
                }
            }
            public override PegNode Clone()
            {
                SemanticVarOrFuncWithContext clone = new SemanticVarOrFuncWithContext(id_, semBlock_, variableOrFunc_, intoVariableOrCall_, isLocal_);
                CloneSubTrees(clone);
                return clone;
            }
            public PegNode semBlock_;
            public PegNode variableOrFunc_;
            public PegNode intoVariableOrCall_;
            public bool isLocal_;
        }
        public class GenericCall : PegGrammarParser.PGParserNode
        {
            public GenericCall(PegNode genericParam, PegNode genericCall)
                : base(genericCall.parent_, (int)EPegGeneratorNodes.GenericCall)
            {
                match_ = genericCall.match_;
                genericParam_ = genericParam;
                genericCall_ = genericCall;
            }
            internal override string TreeNodeToString(string src)
            {
                return genericCall_.GetAsString(src);
            }
            public override string GetAsString(string s)
            {
                return genericCall_.GetAsString(s);
            }

            public override PegNode Clone()
            {
                GenericCall clone = new GenericCall(genericParam_, genericCall_);
                CloneSubTrees(clone);
                return clone;
            }
            public PegNode genericParam_, genericCall_;
        }
        #region MandatoryToFatal: translate @<item>  to (item/FATAL("item expected"))
        PegNode NewFatal(string sMsg, PegBegEnd match)
        {
            return new Message(null, (int)EPegGeneratorNodes.FatalNode, sMsg, match);
        }

        void RemoveComments(StringBuilder s)
        {
            string sContent = s.ToString();
            int ind = sContent.IndexOf("//");
            if (ind >= 0)
            {
                int ind1 = sContent.IndexOf('\n', ind);
                if (ind1 == -1) ind1 = sContent.Length - 1;
                s.Remove(ind, ind1 + 1 - ind);
                RemoveComments(s);
            }
        }
        void TrimInnerWhiteSpace(StringBuilder s)
        {
            RemoveComments(s);

            for (int i = s.Length; i > 0; --i)
            {
                if (Char.IsWhiteSpace(s[i - 1]) && i > 2)
                {
                    int end = i;
                    for (--i; i > 1; --i)
                    {
                        if (!Char.IsWhiteSpace(s[i - 1])) break;
                    }
                    if (end - i > 2)
                    {
                        s.Remove(i, end - i - 1);
                    }
                }
            }
        }
        string ExtractFatalMsg(PegNode n)
        {
            string s = n.match_.GetAsString(c_.src_);
            StringBuilder sb = new StringBuilder(s);
            PUtils.Trim(sb);
            TrimInnerWhiteSpace(sb);
            sb.Replace("\r\n", " ");
            sb.Replace("\n", " ");
            sb.Replace("\t", " ");

            sb.Replace("/", " or ");
            sb.Replace("&", "");

            return "<<" + sb.ToString() + ">> expected";
        }
        void RemoveSubTree(PegNode n)
        {
            Debug.Assert(n != null && n.parent_ != null);
            if (n.parent_.child_ == n)
            {
                n.parent_.child_ = n.next_;
            }
            else
            {
                Debug.Assert(n.parent_.child_ != null);
                PegNode cur;
                for (cur = n.parent_.child_; cur.next_ != n; cur = cur.next_)
                {
                    Debug.Assert(cur.next_ != null);
                }
                Debug.Assert(cur.next_ == n);
                cur.next_ = cur.next_.next_;

            }
        }
        void MandatoryToFatal(PegNode n)
        {	// example: <<@"}" >> is translated to << ("}"/Fatal<msg>) S>>
            //  corresponds to the tree transformation 
            //  (term atom_prefix(mandatory_symbol) atom) => term  atom(rhs(choice(term(atom("{"))) choice(term(atom(FATAL(" ..."))))
            Debug.Assert(n.id_ == (int)EPegGrammar.mandatory_symbol && n.parent_ != null && n.parent_.next_ != null && n.parent_.parent_ != null);
            string msg = ExtractFatalMsg(n.parent_.next_);
            PegNode term = n.parent_.parent_;
            PegNode atom = n.parent_.next_;
            PegBegEnd match = atom.match_;
            PegNode nodeFatalRoot =
                NewNode(EPegGrammar.atom, match,
                    NewNode(
                        EPegGrammar.rhs_of_rule,
                        match,
                        NewNode(EPegGrammar.choice,
                                    match,
                                    NewNode(EPegGrammar.term, match, atom),
                                    NewNode(EPegGrammar.choice, match,
                                        NewNode(EPegGrammar.term, match,
                                            NewNode(EPegGrammar.atom, match, NewFatal(msg, match))))
                                )
                        ));
            RemoveSubTree(n.parent_);
            term.child_ = nodeFatalRoot;
            nodeFatalRoot.parent_ = term;
        }
        void NormalizeMandatory(PegNode n)
        {
            if (n == null) return;
            switch (n.id_)
            {
                case (int)EPegGrammar.mandatory_symbol:
                    MandatoryToFatal(n);
                    break;
                default:
                    NormalizeMandatory(n.next_);
                    NormalizeMandatory(n.child_);
                    break;
            }
        }
        #endregion MandatoryToFatal
        string ExtractMsg(PegNode dblQuoteNode)
        {
            StringBuilder s = new StringBuilder();
            Debug.Assert(dblQuoteNode.id_ == (int)EPegGrammar.double_quote_literal);
            for (; dblQuoteNode != null; dblQuoteNode = dblQuoteNode.next_)
            {
                Debug.Assert(dblQuoteNode.match_.Length >= 2);
                s.Append(c_.src_.Substring(dblQuoteNode.match_.posBeg_ + 1, dblQuoteNode.match_.Length - 2));
            }
            return s.ToString();
        }
        string BuildEnumeratorFromDesc(string sMsg, int n)
        {
            StringBuilder s = new StringBuilder("e_");
            bool bFirstWhite = false;
            for (int i = 0; i < 30 && i < sMsg.Length; ++i)
            {
                if (char.IsLetterOrDigit(sMsg[i]))
                {
                    bFirstWhite = true;
                    s.Append(sMsg[i]);
                }
                else
                {
                    if (bFirstWhite)
                    {
                        bFirstWhite = false;
                        s.Append('_');
                    }
                }
            }
            if (sMsg.Length > 30)
            {
                s.Append("___");
            }
            s.Append(n.ToString());
            return s.ToString();
        }
        void TryInsertMissingEnumerationDesc(PegNode specification, PegNode n)
        {//creates an enumeration node for each fatal message
            Debug.Assert(specification.child_ != null
                            && specification.child_.id_ == (int)EPegGrammar.toplevel_semantic_blocks
                            && specification.child_.next_ != null
                            && specification.child_.next_.id_ == (int)EPegGrammar.peg_rule
                            && n != null
                            && n.id_ == (int)EPegGrammar.multiline_double_quote_literal);
            PegNode enumerationsNode = specification.child_.next_.next_;
            if (enumerationsNode == null)
            {
                enumerationsNode = specification.child_.next_.next_ = NewNode(EPegGrammar.enumeration_definitions, new PegBegEnd());
                enumerationsNode.parent_ = specification;
            }
            PegNode enumNode = enumerationsNode.child_;
            string sFatalMsg = n.child_.GetAsString(c_.src_);
            int i;
            for (i = 0; enumNode != null; enumNode = enumNode.next_, ++i)
            {
                Debug.Assert((enumNode != null) && enumNode.child_ != null && enumNode.child_.next_ != null && enumNode.child_.next_.id_ == (int)EPegGrammar.multiline_double_quote_literal);
                string s = enumNode.child_.next_.child_.GetAsString(c_.src_);
                if (s == sFatalMsg) return;
            }
            string sEnumerator = BuildEnumeratorFromDesc(sFatalMsg, i);
            int len = sEnumerator.Length;
            string sMsg = sEnumerator;
            PegBegEnd match, match0;
            match.posBeg_ = 0; match.posEnd_ = len;
            match0.posBeg_ = match0.posEnd_ = 0;
            enumNode = NewNode(EPegGrammar.enumeration_definition, match0,
                                NewNode(EPegGrammar.enumerator, match, null,
                                    NewNode(EPegGrammar.multiline_double_quote_literal, n.child_.match_,
                                        NewNode(EPegGrammar.double_quote_literal, n.child_.match_))));
            enumNode.parent_ = enumerationsNode;
        }
        void TryInsertMissingEnumeration(PegNode specification, PegNode n)
        {
            Debug.Assert(specification.child_ != null
                        && specification.child_.id_ == (int)EPegGrammar.toplevel_semantic_blocks
                        && specification.child_.next_ != null &&
                        specification.child_.next_.id_ == (int)EPegGrammar.peg_rule);
            PegNode enumerationsNode = specification.child_.next_.next_;
            PegBegEnd match;
            match.posBeg_ = match.posEnd_ = 0;
            if (enumerationsNode == null)
            { /* no enumerations present */
                enumerationsNode = specification.child_.next_.next_ = NewNode(EPegGrammar.enumeration_definitions, match);
                enumerationsNode.parent_ = specification;
                specification.child_.next_.next_ = enumerationsNode;
            }
            PegNode enumNode = enumerationsNode.child_, lastEnumNode;
            for (lastEnumNode = enumNode; enumNode != null; lastEnumNode = enumNode, enumNode = enumNode.next_)
            {
                Debug.Assert(enumNode != null && enumNode.child_ != null && enumNode.child_.id_ == (int)EPegGrammar.enumerator);
                if (enumNode.child_.match_.GetAsString(c_.src_) == n.match_.GetAsString(c_.src_)) return;
            }
            int nSize = n.match_.posEnd_ - n.match_.posBeg_;
            string sMsg = n.match_.GetAsString(c_.src_);
            PegBegEnd match0;
            match.posBeg_ = 0; match.posEnd_ = sMsg.Length;
            match0.posBeg_ = match0.posEnd_ = 0;
            enumNode = NewNode(EPegGrammar.enumeration_definition, match0,
                                NewNode(EPegGrammar.enumerator, n.match_, null,
                                    new Message(null, (int)EPegGeneratorNodes.EnumeratorNode, sMsg, match)));
            enumNode.parent_ = enumerationsNode;
            if (lastEnumNode == null) enumerationsNode.child_ = enumNode; else lastEnumNode.next_ = enumNode;
        }
        void InsertMissingEnumeration(PegNode n, PegNode specification)
        {
            if (n == null) return;
            switch (n.id_)
            {
                case (int)EPegGrammar.enumeration_definition: return;
                case (int)EPegGrammar.enumerator:
                    {
                        if (n.parent_ != null && n.parent_.id_ == (int)EPegGrammar.message)
                        {
                            TryInsertMissingEnumeration(specification, n);
                        }
                    }
                    break;
                case (int)EPegGrammar.multiline_double_quote_literal:
                    {
                        if (n.parent_ != null && n.parent_.id_ == (int)EPegGrammar.message)
                        {
                            TryInsertMissingEnumerationDesc(specification, n);
                        }
                    }
                    break;
                default:
                    InsertMissingEnumeration(n.next_, specification);
                    InsertMissingEnumeration(n.child_, specification);
                    break;
            }
        }
        void InsertMissingEnumerations(PegNode rule, PegNode specification)
        {
            for (; rule != null; rule = rule.next_)
            {
                InsertMissingEnumeration(rule.child_.next_, specification);
            }
        }
        void TranslateMandatoryToFatal(PegNode rule)
        {
            for (; rule != null; rule = rule.next_)
            {
                NormalizeMandatory(rule.child_.next_);
            }
        }
        #region GenericNormalizer: traverses all the nodes and applies normalizer in case of match with one of idNodes
        void NormalizeEachRule(PegNode n, RuleIsCandidate isCandidate, NormalizeInRule normalizer, params int[] idNodes)
        {
            if (n == null) return;
            if (n.id_ == (int)EPegGrammar.peg_rule && (isCandidate != null && !isCandidate(n)))
            {
                NormalizeEachRule(n.next_, isCandidate, normalizer, idNodes);
                return;
            }
            else
            {
                foreach (int id in idNodes)
                {
                    if (id == n.id_)
                    {
                        normalizer(n);
                        break;
                    }
                }
            }
            NormalizeEachRule(n.child_, isCandidate, normalizer, idNodes);
            NormalizeEachRule(n.next_, isCandidate, normalizer, idNodes);
        }
        #endregion GenericNormalizer
        #region MissingRuleIds: Assign arbitrary rule id to rules without a user defined rule id
        int SearchGap(SortedList<int, int> setRuleIds, int nNofIdsRequired)
        {
            int nLastKey = 0;
            foreach (var elem in setRuleIds)
            {
                if (elem.Key - nLastKey > nNofIdsRequired + 1) return nLastKey + 1;
                nLastKey = elem.Key;
            }
            return nLastKey + 1;
        }
        void CollectRuleIds(PegNode rule, SortedList<int, int> setRuleIds, ref int nNofIdsRequired)
        {
            int idMax = -1;
            for (; rule != null; rule = rule.next_)
            {
                PegNode ruleIdNode = PUtils.GetRuleId(rule, false);
                if (ruleIdNode != null)
                {
                    int id = Int32.Parse(ruleIdNode.GetAsString(c_.src_));
                    if (setRuleIds.IndexOfKey(id) == -1)
                    {
                        setRuleIds.Add(id, id);
                    }
                    else
                    {
                        c_.errOut_.WriteLine(c_.sErrorPrefix + "WARNING: rule <{0}> has id <{1}> which is alread used by one of previous rules",
                            PUtils.GetRuleNameFromRule(rule, c_.src_), id);
                    }
                    if (id > idMax) idMax = id;
                }
                else nNofIdsRequired++;
            }
        }
        void InsertRuleId(PegNode rule, int nRuleId)
        {
            Debug.Assert(rule != null
                        && rule.child_ != null
                        && rule.child_.child_ != null
                        && rule.child_.child_.id_ != (int)EPegGrammar.rule_id);
            PegNode next = rule.child_.child_;
            rule.child_.child_ =
                new IdNode(rule.child_, (int)EPegGrammar.rule_id, nRuleId, rule.child_.child_.match_, next);
        }
        void InsertRuleIds(PegNode rule, int nGapStartId)
        {
            for (; rule != null; rule = rule.next_)
            {
                PegNode ruleId = PUtils.GetRuleId(rule, false);
                if (ruleId == null)
                {
                    InsertRuleId(rule, nGapStartId++);
                }
            }
        }
        void InsertMissingRuleIds(PegNode rule)
        {
            SortedList<int, int> setRuleIds = new SortedList<int, int>();
            int nNofIdsRequired = 0;
            CollectRuleIds(rule, setRuleIds, ref nNofIdsRequired);
            if (nNofIdsRequired == 0) return;
            int nGapStartId = SearchGap(setRuleIds, nNofIdsRequired);
            InsertRuleIds(rule, nGapStartId);
        }
        #endregion MissingRuleIds

        class TryFuseCharsets //fuse alternative charsets to one charset
        {
            internal TryFuseCharsets(NormalizeTree normalizeTree, PegNode rule)
            {
                normalizeTree_ = normalizeTree;
                FuseAlternativeCharsets(rule);
            }
            void FuseCharsets(PegNode first, PegNode end, List<PegNode> fuseList)
            {
                Debug.Assert(fuseList.Count > 0 && fuseList[0].child_ != null);
                PegNode fused = fuseList[0];
                PegNode last;
                for (last = fused.child_; last.next_ != null; last = last.next_)
                    ;
                for (int i = 1; i < fuseList.Count; ++i)
                {
                    last.next_ = fuseList[i].child_;
                    for (last = last.next_, last.parent_ = fused; last.next_ != null; last = last.next_)
                    {
                        last.parent_ = fused;
                    }

                }
                first.next_ = end;
            }
            void FuseCharsets(PegNode n)
            {
                Debug.Assert(n.id_ == (int)EPegGrammar.rhs_of_rule);
                PegNode first = null;
                List<PegNode> fuseList = new List<PegNode>();
                for (PegNode choice = n.child_; choice != null; choice = choice.next_)
                {
                    PegNode charsetNode =
                        PUtils.GetByPath(choice, (int)EPegGrammar.choice,
                                                (int)EPegGrammar.term,
                                                (int)EPegGrammar.atom,
                                                (int)EPegGrammar.character_set);
                    bool canBeFused =
                                charsetNode != null
                            && charsetNode.child_ != null
                            && charsetNode.child_.id_ != (int)EPegGrammar.set_negation
                            && charsetNode.parent_.next_==null //no quantor (e.g. * + {l,h})
                            && choice.child_.next_ == null;//term must not have sibling term
                    if (canBeFused)
                    {
                        if (first == null) first = choice;
                        fuseList.Add(charsetNode);
                    }
                    else
                    {
                        if (first != null)
                        {
                            FuseCharsets(first, choice, fuseList);
                            first = null;
                            fuseList = new List<PegNode>();
                        }

                    }
                }
                if (first != null) FuseCharsets(first, null, fuseList);
            }
            void FuseAlternativeCharsets(PegNode rule)
            {
                normalizeTree_.NormalizeEachRule(rule, null, FuseCharsets, (int)EPegGrammar.rhs_of_rule);
            }
            NormalizeTree normalizeTree_;
        }

        #region IntoVariable: search context
        bool FindIntoVariableInTree(PegNode n, string intoVariable, ref PegNode variable)
        {
            if (n == null) return false;
            if (n.id_ == (int)EPegGrammar.variable)
            {
                string varName = n.GetAsString(c_.src_);
                Debug.Assert(varName.Length > 0);
                if (
                    varName == intoVariable
                || varName[0] == '@' && varName.Substring(1) == intoVariable)
                {
                    variable = n; return true;
                }
            }
            return
                FindIntoVariableInTree(n.child_, intoVariable, ref variable)
            || FindIntoVariableInTree(n.next_, intoVariable, ref variable);
        }
        delegate bool FindSemanticVariableOrFunction(PegNode n, string variableOrFunction, ref PegNode varOrFunc);
        PegNode FindSemanticBlock(FindSemanticVariableOrFunction findVarOrFunc, string intoVarOrFunc, PegNode n, out PegNode varOrFunc, out bool isLocal)
        {
            varOrFunc = null;
            isLocal = true;
            #region Search in Local Scope
            PegNode rule = PUtils.FindNodeInParents(n, EPegGrammar.peg_rule);
            Debug.Assert(rule != null);
            PegNode semBlock = PUtils.FindNode(rule.child_, EPegGrammar.named_semantic_block, EPegGrammar.anonymous_semantic_block);
            if (semBlock != null && findVarOrFunc(semBlock.child_, intoVarOrFunc, ref varOrFunc)) return semBlock;
            #endregion Search in Local Scope
            isLocal = false;
            #region Search in grammar scope
            PegNode pegSpec = PUtils.FindNodeInParents(rule, EPegGrammar.peg_specification);
            Debug.Assert(pegSpec != null && pegSpec.child_.id_ == (int)EPegGrammar.toplevel_semantic_blocks);
            for (
                semBlock = PUtils.FindNode(pegSpec.child_.child_, EPegGrammar.named_semantic_block, EPegGrammar.anonymous_semantic_block);
                semBlock != null && (semBlock.id_ == (int)EPegGrammar.named_semantic_block || semBlock.id_ == (int)EPegGrammar.anonymous_semantic_block);
                semBlock = semBlock.next_)
            {
                if (findVarOrFunc(semBlock.child_, intoVarOrFunc, ref varOrFunc)) return semBlock;
            }
            #endregion Search in grammar scope
            return null;
        }
        void LinkIntoVariables(PegNode n)
        {
            Debug.Assert(n.id_ == (int)EPegGrammar.into_variable);
            string intoVariable = n.GetAsString(c_.src_);
            PegNode variable;
            bool isLocal;
            PegNode semBlock = FindSemanticBlock(FindIntoVariableInTree, intoVariable, n, out variable, out isLocal);
            if (semBlock != null)
            {
                var intoVarWithContext = new NormalizeTree.SemanticVarOrFuncWithContext((int)EPegGeneratorNodes.IntoVarWithContext, semBlock, variable, n, isLocal);
                c_.semanticInfoNodes_.Add(variable);
                PUtils.ReplaceNode(n, intoVarWithContext);
                Debug.Assert(intoVarWithContext.parent_ != null);
                if (intoVarWithContext.parent_.id_ == (int)EPegGrammar.lower_limit
                    && intoVarWithContext.parent_.parent_.id_ == (int)EPegGrammar.repetition_range)
                {
                    var r = intoVarWithContext.parent_.parent_ as PegGrammarParser.TRange;
                    Debug.Assert(r != null);
                    r.lowerIntoVar = intoVarWithContext;
                }
                else if (intoVarWithContext.parent_.id_ == (int)EPegGrammar.upper_limit
                        && intoVarWithContext.parent_.parent_.id_ == (int)EPegGrammar.repetition_range)
                {
                    var r = intoVarWithContext.parent_.parent_ as PegGrammarParser.TRange;
                    Debug.Assert(r != null);
                    r.upperIntoVar = intoVarWithContext;
                }
            }
            else
            {
                c_.errOut_.WriteLine(c_.sErrorPrefix + "ERROR: into variable <" + intoVariable + "> not found in semantic block candidates");
                bOk_ = false;
            }
        }
        void LinkIntoVariablesToSemanticBlocks(PegNode rule)
        {
            NormalizeEachRule(rule, null, LinkIntoVariables, (int)EPegGrammar.into_variable);
        }
        #endregion

        class GenericParameters
        {
            internal GenericParameters(NormalizeTree normalizeTree, PegNode rule)
            {
                normalizeTree_ = normalizeTree;
                genericParam_ = null;
                LinkToGenericParams(rule);
            }
            PegNode FindGenericParam(string refName)
            {
                Debug.Assert(genericParam_ != null);
                for (PegNode param = genericParam_.child_; param != null; param = param.next_)
                {
                    Debug.Assert(param.id_ == (int)EPegGrammar.rule_param);
                    if (param.GetAsString(normalizeTree_.c_.src_) == refName) return param;
                }
                return null;
            }
            void LinkToGenericParam(PegNode n)
            {
                Debug.Assert(n.id_ == (int)EPegGrammar.rule_ref);
                string refName = n.GetAsString(normalizeTree_.c_.src_);
                PegNode genericParam = FindGenericParam(refName);
                if (genericParam != null)
                {
                    PegNode genericCall = new NormalizeTree.GenericCall(genericParam, n);
                    PUtils.ReplaceNode(n, genericCall);
                }
            }
            bool IsGenericRule(PegNode rule)
            {
                PegNode param = PUtils.FindNode(rule.child_.child_, EPegGrammar.peg_params);
                if (param != null)
                {
                    genericParam_ = param;
                    return true;
                }
                return false;
            }
            void LinkToGenericParams(PegNode rule)
            {
                normalizeTree_.NormalizeEachRule(rule, IsGenericRule, LinkToGenericParam, (int)EPegGrammar.rule_ref);
            }
            PegNode genericParam_;
            NormalizeTree normalizeTree_;
        }
        class SemanticFunctions
        {
            internal SemanticFunctions(NormalizeTree normalizeTree, PegNode rule)
            {
                normalizeTree_ = normalizeTree;
                LinkSemanticFunctionsToSemanticBlocks(rule);

            }
            bool FindSemanticFunctionInTree(PegNode n, string semanticFunction, ref PegNode memberName)
            {
                if (n == null) return false;
                if (n.id_ == (int)EPegGrammar.member_name && n.parent_.id_ == (int)EPegGrammar.sem_func_header)
                {
                    string thisMemberName = n.GetAsString(normalizeTree_.c_.src_);
                    Debug.Assert(thisMemberName.Length > 0);
                    if (
                        thisMemberName == semanticFunction
                    || thisMemberName[0] == '@' && thisMemberName.Substring(1) == semanticFunction)
                    {
                        memberName = n; return true;
                    }
                }
                return
                    FindSemanticFunctionInTree(n.child_, semanticFunction, ref memberName)
                || FindSemanticFunctionInTree(n.next_, semanticFunction, ref memberName);
            }
            void LinkSemanticFunctions(PegNode n)
            {
                Debug.Assert(n.id_ == (int)EPegGrammar.rule_ref);
                string semanticFunction = n.GetAsString(normalizeTree_.c_.src_);
                if (normalizeTree_.setRules_.Contains(semanticFunction)) return; //was a rule reference
                PegNode method;
                bool isLocal;
                PegNode semBlock = normalizeTree_.FindSemanticBlock(FindSemanticFunctionInTree, semanticFunction, n, out method, out isLocal);
                if (semBlock != null)
                {
                    var semanticFuncWithContext = new NormalizeTree.SemanticVarOrFuncWithContext((int)EPegGeneratorNodes.SemanticFunctionWithContext, semBlock, method, n, isLocal);
                    normalizeTree_.c_.semanticInfoNodes_.Add(method);
                    PUtils.ReplaceNode(n, semanticFuncWithContext);
                }
                else
                {
                    normalizeTree_.c_.errOut_.WriteLine(normalizeTree_.c_.sErrorPrefix + "ERROR: no rule found and no semantic function <" + semanticFunction + "> found in semantic block candidates");
                    normalizeTree_.bOk_ = false;
                }
            }
            void LinkSemanticFunctionsToSemanticBlocks(PegNode rule)
            {
                normalizeTree_.NormalizeEachRule(rule, null, LinkSemanticFunctions, (int)EPegGrammar.rule_ref);
            }
            NormalizeTree normalizeTree_;
        }
    }
    internal class TreeContext
    {
        public PegNode                      root_;
        public string                       src_;
        public TextWriter                   errOut_;
        public ParserPostProcessParams      generatorParams_;
        public Dictionary<string, PegNode>  dictNameRuleObj_;
        public Dictionary<string, string>   dictProperties_;
        public HashSet<PegNode>             semanticInfoNodes_;
        public Dictionary<string, PegNode>  dictSemanticInfo_;
        public HashSet<string>              referencedMembers_;
        public string                       sErrorPrefix;

        public static string caseSensitivity = "caseSensitivity";
        public static string encoding_class = "encoding_class";
        public static string Name = "name";

        public TreeContext(ParserPostProcessParams generatorParams)
        {
            generatorParams_ = generatorParams;
            root_ = generatorParams.root_;
            src_ = generatorParams.src_;
            errOut_ = generatorParams.errOut_;
            dictProperties_ = new Dictionary<string, string>();
            dictNameRuleObj_ = new Dictionary<string, PegNode>();
            semanticInfoNodes_ = new HashSet<PegNode>();
            dictSemanticInfo_ = new Dictionary<string, PegNode>();//member_name -> named/anonym-semanticblock
            referencedMembers_ = new HashSet<string>();
            sErrorPrefix = "<PEG_PARSER> FILE:'" + generatorParams_.grammarFileName_ + "' ";
            RegisterRules();
            RegisterProperties();
            RegisterSemanticMembers();
        }
        void RegisterRules()
        {
            PegNode rule = PUtils.FindNode(root_, EPegGrammar.peg_rule);
		    if( rule == null ) return;
		    for(PegNode cur= rule;cur!=null;cur=cur.next_){
			    string sRuleIdent= PUtils.GetAsString(src_,PUtils.FindNode(cur,EPegGrammar.rule_name,8));
                if( !dictNameRuleObj_.ContainsKey(sRuleIdent) ) dictNameRuleObj_.Add(sRuleIdent,cur);
		    }
        }
        public bool HasCaseSensitiveProperty()
        {
            string sCaseSensitive;
            if( dictProperties_.TryGetValue(caseSensitivity,out sCaseSensitive) ) return sCaseSensitive.ToUpper()=="YES";
            return false;
        }
        public bool IsGrammarForBinaryInput()
        {
            return dictProperties_.ContainsKey(encoding_class)
                && dictProperties_[encoding_class].Equals("binary", StringComparison.InvariantCultureIgnoreCase);
        }
        public string NameProperty()
        {
            string sName;
            return dictProperties_.TryGetValue(Name, out sName) ? sName : ""; 
        }
        void RegisterProperties()
        {
            PegNode attribute = PUtils.FindNode(root_, EPegGrammar.attribute);
            if (attribute == null) return;
            for (PegNode cur = attribute; cur != null; cur = cur.next_)
            {
                string key =   PUtils.FindNode(cur, EPegGrammar.attribute_key).GetAsString(src_).ToLower();
                string value = PUtils.FindNode(cur, EPegGrammar.attribute_value).child_.GetAsString(src_);
                string prevValue;
                if (dictProperties_.TryGetValue(key, out prevValue))
                {
                    value = prevValue + "\n" + value;
                    dictProperties_.Remove(key);
                }
                dictProperties_.Add(key, value);
            }
        }
        /// <summary>
        /// Register all members of top level non-local semantic blocks in the dictionary 'dictSemanticInfo_'
        /// </summary>
        void RegisterSemanticMembers()
        {
            PegNode topSemanticBlocks = PUtils.FindNode(root_, EPegGrammar.toplevel_semantic_blocks);
            List<PegNode> topLevelBlocksUsedAsLocalClasses = new List<PegNode> { };
            for(PegNode semBlock= topSemanticBlocks.child_;semBlock!=null;semBlock=semBlock.next_)
            {
                PegNode content=null;
                if( semBlock.id_==(int)EPegGrammar.named_semantic_block){
                    string className = semBlock.child_.GetAsString(src_);
                    if (className == "CREATE") continue; //a create block does not correspond to a local class
                    if (UsedAsLocalClass(className)){
                        topLevelBlocksUsedAsLocalClasses.Add(semBlock);
                        continue;
                    }
                    content = semBlock.child_.next_.child_;
                }
                else if (semBlock.id_ == (int)EPegGrammar.anonymous_semantic_block)
                {
                    content = semBlock.child_;
                }
                if (content == null) continue;
                RegisterSemBlockMembers(semBlock, content, dictSemanticInfo_,src_);

            }
            //register use of top level members in methods of local classes
            foreach(PegNode localSemBlock in topLevelBlocksUsedAsLocalClasses)
            {
                RegisterTopLevelMembersInUse(GetContent(localSemBlock));
            }
            for (PegNode rule = PUtils.FindNode(root_, EPegGrammar.peg_rule); rule != null; rule = rule.next_)
            {
                PegNode semBlock = PUtils.FindNode(rule.child_, EPegGrammar.named_semantic_block, EPegGrammar.anonymous_semantic_block);
                if (semBlock == null) continue;

                RegisterTopLevelMembersInUse(GetContent(semBlock));
            }
        }

        private PegNode GetContent(PegNode semBlock)
        {
            switch ((EPegGrammar)semBlock.id_)
            {
                case EPegGrammar.named_semantic_block: return semBlock.child_.next_.child_;
                case EPegGrammar.anonymous_semantic_block: return semBlock.child_;
                default: Debug.Assert(false); return null;
            }
        }

        private bool UsedAsLocalClass(string className)
        {
            for (PegNode rule = PUtils.GetRuleFromRoot(root_); rule != null; rule = rule.next_)
            {
                PegNode using_block = PUtils.FindNode(rule.child_, EPegGrammar.sem_block_name);
                if (using_block != null && using_block.GetAsString(src_) == className) return true;
            }
            return false;
        }

        private void RegisterTopLevelMembersInUse(PegNode content)
        {
            for (PegNode member = content.child_; member != null; member = member.next_)
            {
                switch ((EPegGrammar)member.id_)
                {
                    case EPegGrammar.constructor_decl:
                    case EPegGrammar.destructor_decl:
                    case EPegGrammar.sem_func_declaration:
                    case EPegGrammar.func_declaration:
                        RegisterTopLevelMembersInUseInBody(PUtils.FindNode(member.child_,EPegGrammar.method_body));
                        break;
                }
            }
        }
        private void RegisterTopLevelMembersInUseInBody(PegNode pegNode)
        {
            if (pegNode == null) return;
            if (pegNode.id_ == (int)EPegGrammar.designator)
            {
                string desigIdent = pegNode.child_.GetAsString(src_);
                if (dictSemanticInfo_.ContainsKey(desigIdent))
                {
                    if (!referencedMembers_.Contains(desigIdent)) referencedMembers_.Add(desigIdent);
                }
            }
            RegisterTopLevelMembersInUseInBody(pegNode.child_);
            RegisterTopLevelMembersInUseInBody(pegNode.next_);
        }

        private static void RegisterSemBlockMembers(PegNode semBlock, PegNode content, Dictionary<string, PegNode> dictSemanticInfo, string src)
        {
            for (PegNode member = content.child_; member != null; member = member.next_)
            {
                switch ((EPegGrammar)member.id_)
                {
                    case EPegGrammar.into_declaration:
                    case EPegGrammar.field_declaration:
                        for (PegNode variable = PUtils.FindNode(member.child_,EPegGrammar.variable); variable != null; variable = variable.next_)
                        {
                            dictSemanticInfo[variable.GetAsString(src)] = semBlock;
                        }
                        break;
                    case EPegGrammar.sem_func_declaration:
                    case EPegGrammar.func_declaration:
                    case EPegGrammar.creator_func_declaration:
                        PegNode memberName = PUtils.FindNode(member.child_, EPegGrammar.member_name);
                        dictSemanticInfo[memberName.GetAsString(src)] = semBlock;
                        break;
                    case EPegGrammar.outer_ident:
                        break;
                    default: continue;
                }
            }
        }
        public string GetModuleName()
        {
            return NameProperty();
        }
    }
    
    class PegParserGenerator : IParserPostProcessor
    {
        
        class CheckGrammar
        {
            #region Data Members
            internal HashSet<string> setRules_;
            internal TreeContext c_;
            internal bool bOk_;
            #endregion Data Members
            internal CheckGrammar(TreeContext c)
            {
                c_ = c;
                bOk_ = true;
                setRules_ = new HashSet<string>();
                PegNode n = PUtils.GetRuleFromRoot(c.root_);
                if (n == null) return;
                Debug.Assert(n.id_ == (int)EPegGrammar.peg_rule);
                CheckDoubleDefinitions(c);
                /*
                m_bIsTopModule = true;
                CheckReferenceDefined(c.m_context);
                m_bIsTopModule = true;
                CheckLeftRecursiveness(c.m_context);
                 * */
            }
            void CheckDoubleDefinitions(TreeContext c)
            {
                CheckDoubleDefinitions(c, PUtils.GetRuleFromRoot(c.root_));
            }
            void CheckDoubleDefinitions(TreeContext c, PegNode n)
            {
                for (; n != null; n = n.next_)
                {
                    Debug.Assert(n.id_ == (int)EPegGrammar.peg_rule);
                    string ruleName = PUtils.GetRuleNameFromRule(n, c_.src_);
                    if (setRules_.Contains(ruleName))
                    {
                        bOk_ = false;
                        c.errOut_.WriteLine(c.sErrorPrefix + "ERRROR: rule <" + ruleName + "> is duplicate");
                    }
                    else
                    {
                        setRules_.Add(ruleName);
                    }
                }
            }
        }
        #region Constructors
        #endregion Constructors
        #region IParserPostProcessor functions
        string IParserPostProcessor.ShortDesc { get { return "Create C# parser"; } }
        string IParserPostProcessor.ShortestDesc { get { return "Create C#"; } }
        string IParserPostProcessor.DetailDesc
        {
            get
            {
                return "Creates a C# parser for the given PEG grammar syntax tree";
            }
        }
        void IParserPostProcessor.Postprocess(ParserPostProcessParams postProcessorParams)
        {
            TreeContext context = new TreeContext(postProcessorParams);
            var checker = new CheckGrammar(context);
            if (!checker.bOk_) return;
            var normalize = new NormalizeTree(context,checker.setRules_);
            if (!normalize.bOk_) return;
            Peg.CSharp.PegCSharpGenerator cSharpGenerator = new Peg.CSharp.PegCSharpGenerator(context);
        }
        #endregion IParserPostProcessor functions
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Switzerland Switzerland
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions