Click here to Skip to main content
15,885,842 members
Articles / Programming Languages / C#

Cat - A Statically Typed Programming Language Interpreter in C#

Rate me:
Please Sign up or sign in to vote.
5.00/5 (14 votes)
4 Nov 2006MIT14 min read 70.6K   531   45  
This article contains the public domain implementation of an interpreter for a statically typed stack-based programming language in C# called Cat. The accompanying article is a high-level description of how the various modules work, a brief description of the language, and links to related work.
/// Public domain code by Christopher Diggins
/// http://www.cat-language.com

using System;
using System.Collections.Generic;
using System.Text;

namespace Cat
{
    /// <summary>
    /// The different kinds of nodes in a parse tree are distinguished 
    /// using a NodeType identifier. This allows any processor of an  
    /// abstract syntax tree (AST) to use efficient switch statements 
    /// to process nodes. 
    /// </summary>
    public enum NodeType
    {
        Definition,
        Namespace, // not used in the current version
        StackState,
        SimpleType,
        FxnType,
        Root,
        Quotation,
        Word,
        Number,
        String,
        ParanGroup,
        BraceGroup,
        LineComment,
        Reference,
        VarDecl,
        Undefined
    };
     
    /// <summary>
    /// An AstNode is a part of the abstract syntax tree. It can represent an identifier, 
    /// a definition, a symbol, a type, a part of a type, and so forth. For example
    /// in languages which support expressions an AstNode might be an expression, 
    /// a sub-expression, an operator, and values.
    /// 
    /// For example in the following expression: 5 * (2 + 3), if I was to put square 
    /// brackets around each node and label them, I would get:
    /// 
    /// [5]=number [*]=operator [ ( 2 + 3 ) ]=expression
    /// 
    /// Then the paranthesized sub-expression itself would also be broken down further
    /// into nodes:
    /// 
    /// [2]=number [+]=operator [3]=number
    /// 
    /// However, this example does not apply as-is to the Cat language. A sequence of 
    /// contiguous symbols (like arithmetic operators) are called identifiers, and 
    /// can be defined as functions. Cat also doesn't support paranethese except for 
    /// within function types. This is because Cat programs are unambiguous 
    /// </summary>
    public class AstNode
    {
        #region Constructors
        public AstNode(Parser p, AstNode parent_node, NodeType nt)
        {
            parser = p;
            parent = parent_node;
            node_type = nt;
            marker = parser.cur_index;
            line = parser.GetLine();
            col = parser.GetCol();
        }
        #endregion

        #region Public Functions
        public void Complete()
        {
            text = parser.input.Substring(marker, parser.cur_index - marker);
        }
        public void AddChild(AstNode n)
        {
            children.Add(n);
        }
        public AstNode GetLastChild()
        {
            if (children.Count == 0) return null;
            return children[children.Count - 1];
        }
        public void DeleteLastChildIfEmpty()
        {
            if (children.Count == 0) return;
            if (GetLastChild().children.Count != 0) return;
            children.RemoveAt(children.Count - 1);
        }
        public AstNode GetSibling()
        {
            int n = parent.children.IndexOf(this);
            if (n > parent.children.Count) return null;
            return parent.children[n];
        }
        public override string ToString()
        {
            return text;
        }
        #endregion

        #region Fields
        public Parser parser = null;
        public AstNode parent = null;
        public NodeType node_type = NodeType.Undefined;
        public List<AstNode> children = new List<AstNode>();
        public int marker = -1;
        public string text = "";
        public int line = -1;
        public int col = -1;

        /// <summary>
        /// Used to signal that the node parsing was cut-short.
        /// </summary>
        public bool is_incomplete = false;
        #endregion

        public bool IsComment()
        {
            return (node_type == NodeType.LineComment);
        }
    }

    /// <summary>
    /// The Parser creates an abstract syntax tree (AST) which represents the input 
    /// source code in a more easily managed format. Programs, expressions, and types 
    /// provide constructors for creating nodes from the parse tree.
    /// 
    /// Because the language is so simple, and computers are so fast, there 
    /// is no need for a lexing phase in modern compilers. All of my compilers and 
    /// interpreters merge the lexing phase and parse phase. 
    /// 
    /// Lexing, or lexical analysis is the transformation of source code into tokens.
    /// </summary>
    public class Parser 
    {
        #region Parsing Error and Validation functions
        public static string GetStringFromLine(Parser p, int nLine)
        {
            string[] lines = p.input.Split(new char[] { '\n' });
            return lines[nLine];
        }

        public static string GetPointerFromCol(Parser p, int nCol)
        {
            string s = new string(' ', nCol);
            return s + '^';
        }

        public static void Error(AstNode node, String msg)
        {
            Console.WriteLine("Parsing error at line " + (node.line + 1) + " : " + msg);
            Console.WriteLine(GetStringFromLine(node.parser, node.line));
            Console.WriteLine(GetPointerFromCol(node.parser, node.col));
            throw new Exception("Handled parsing error");
        }

        public static void AssertNodeType(AstNode node, NodeType nt)
        {
            if (node.node_type != nt)
            {
                Error(node, "unexpected node-type " + node.node_type.ToString() + ", expected " + nt.ToString());
            }
        }

        public static void AssertNonEmptyName(AstNode node)
        {
            AssertNodeType(node, NodeType.Word);
            string s = node.ToString();
            if (s.Length < 1) Error(node, "zero length name");
        }

        public static void AssertNodeChildCount(AstNode node, int cnt)
        {
            if (node.children.Count != cnt) Error(node, "expected " + cnt + " child-nodes, instead found " + node.children.Count);
        }
        #endregion

        #region Fields
        public string input;
        public int cur_index = 0;
        public AstNode root;
        public AstNode cur_node;
        private int cur_line = 0;
        private int cur_col = 0;
        #endregion

        public Parser()
        {
            root = new AstNode(this, null, NodeType.Root);
            cur_node = root;
        }
        public bool IsIncomplete()
        {
            return cur_node.is_incomplete;
        }
        public AstNode GetRoot()
        {
            return root;
        }
        public void Clear()
        {
            GetChildren().Clear();
            cur_node = root;
        }
        public List<AstNode> GetChildren()
        {
            return root.children;
        }

        // The parsing utility functions contain things like is_letter, at_number, etc. 
        #region Parsing Utility Functions
        public bool is_letter(char c)
        {
            return ((c >= 'a') && (c <= 'z')) ||
                ((c >= 'A') && (c <= 'Z')) ||
                (c == '_');
        }
        public bool is_number(char c)
        {
            return ((c >= '0') && (c <= '9'));
        }
        public bool is_alphanum(char c)
        {
            return is_letter(c) || is_number(c);
        }
        public bool is_space(char c)
        {
            return Char.IsWhiteSpace(c);
        }
        public bool is_newline(char c)
        {
            return (c == '\r') || (c == '\n') || (c == '\u000D') || (c == '\u000A') || (c == '\u0085') || (c == '\u2028');
        }
        public bool is_symbol(char c)
        {
            switch (c)
            {
                case (':'): return true;
                case ('~'): return true;
                case ('`'): return true;
                case ('!'): return true;
                case ('#'): return true;
                case ('$'): return true;
                case ('%'): return true;
                case ('^'): return true;
                case ('&'): return true;
                case ('*'): return true;
                case ('-'): return true;
                case ('/'): return true;
                case ('+'): return true;
                case ('='): return true;
                case ('|'): return true;
                case ('\''): return true;
                case ('<'): return true;
                case (','): return true;
                case ('>'): return true;
                case ('.'): return true;
                case ('?'): return true;
                case ('\\'): return true;
                default: return false;
            }
        }
        public bool at_end()
        {
            return cur_index >= input.Length;
        }
        public bool at_number()
        {
            if (at_end()) return false;
            return is_number(peek());
        }
        public bool at_letter()
        {
            if (at_end()) return false;
            return is_letter(peek());
        }
        public bool at_space()
        {
            if (at_end()) return false;
            return is_space(peek());
        }
        public bool at_newline()
        {
            if (at_end()) return true;
            return is_newline(peek());
        }
        public bool next_char_is(char c)
        {
            if (cur_index >= input.Length - 1) return false;
            return (peek_next() == c) ;
        }
        public bool at_alphanum()
        {
            if (at_end()) return false;
            return is_alphanum(peek());
        }
        public bool at_char(char c)
        {
            if (at_end()) return false;
            return peek() == c;
        }
        public bool at_symbol()
        {
            if (at_end()) return false;
            return is_symbol(peek());
        }
        public char peek()
        {
            return input[cur_index];
        }
        public char peek_next()
        {
            return input[cur_index + 1];
        }
        public char next()
        {
            ++cur_col;
            return input[++cur_index];
        }
        public void goto_next()
        {
            ++cur_col;
            if (is_newline(input[cur_index++]))
            {
                cur_line++;
                cur_col = 0;
            }
        }
        #endregion

        public void Load(string s)
        {
            input += s;
        }
        public bool Parse(string s)
        {
            Load(s);
            try
            {
                if (IsIncomplete())
                {
                    Resume();
                }
                ParseLoop();
            }
            catch (Exception e)
            {
                cur_node.col = cur_col;
                cur_node.line = cur_line;
                Error(cur_node, e.Message);
            }
            return cur_node == root;
        }
        public void SetInput(string s)
        {
            cur_index = 0;
            cur_col = 0;
            cur_line = 0;
            input = s;
        }
        /// <summary>
        /// Takes the oldest parse node from the parser, and uses it to create 
        /// an instruction, or a definition.
        /// </summary>
        public AstNode ExtractNode()
        {
            if (root.children.Count == 0) return null;
            AstNode result = root.children[0];
            if (result.is_incomplete) return null;
            root.children.RemoveAt(0);
            return result;
        }

        // The parse_loop function is a recursive version of the Parse function 
        #region Parse Loop
        private AstNode CreateNewNode(NodeType nt)
        {
            // it is VERY important that nodes only get added to the parent once 
            // they are fully parsed
            cur_node = new AstNode(this, cur_node, nt);
            return cur_node;
        }
        private void CompleteCurrentNode()
        {
            cur_node.Complete();
            // it is VERY important that nodes only get added to the parent once 
            // they are fully parsed
            cur_node.parent.AddChild(cur_node);
            cur_node.is_incomplete = false;
            cur_node = cur_node.parent;
            if (cur_node == null)
            {
                throw new Exception("parsing error");
            }
        }
        private void EatWS()
        {
            while (!at_end() && at_space()) goto_next();
        }
        private bool ParseText(string s)
        {
            int old_index = cur_index;
            int old_col = cur_col;
            int i = 0;
            while (i < s.Length)
            {
                if (at_end() || s[i] != peek())
                {
                    cur_index = old_index;
                    cur_col = old_col;
                    return false;
                }
                ++i;
                ++cur_index;
                ++cur_col;
            }
            if (!at_space())
            {
                cur_index = old_index;
                cur_col = old_col;
                return false;
            }
            return true;
        }
        private void advance_char(char c)
        {
            if (at_end() || peek() != c)
            {
                Error(cur_node, "expected " + c);
            }
            goto_next();
        }
        private void ParseBraceGroup()
        {
            CreateNewNode(NodeType.BraceGroup);
            advance_char('{');
            CompleteBraceGroup();
        }
        private void CompleteBraceGroup()
        {
            ParseLoop();
            if (at_end())
            {
                cur_node.is_incomplete = true;
            }
            else
            {
                advance_char('}');
                CompleteCurrentNode();
                
                // Complete the owner function as well. 
                CompleteCurrentNode();
            }
        }
        private void ParseQuote()
        {
            CreateNewNode(NodeType.Quotation);
            advance_char('[');
            CompleteQuote();
        }
        private void CompleteQuote()
        {
            ParseLoop();
            if (at_end())
            {
                cur_node.is_incomplete = true;
            }
            else
            {
                advance_char(']');
                CompleteCurrentNode();
            }
        }
        /// <summary>
        /// Generates a node representing a type from a string.
        /// </summary>
        public AstNode ParseType(string s)
        {
            Clear();
            SetInput(s);
            ParseType();            
            return root.children[0];
        }
        private void ParseType()
        {
            if (at_char('('))
            {
                ParseFxnType();
            }
            else if (at_letter())
            {
                ParseSimpleTypeOrVarDecl();
            }
            else
            {
                throw new Exception("Unrecognized type");
            }
        }
        private void ParseSimpleTypeOrVarDecl()
        {
            CreateNewNode(NodeType.SimpleType);
            ParseWord();
            if (at_char(':'))
            {
                // Change the node type, we know this is actually a Variable declaration
                cur_node.node_type = NodeType.VarDecl;
                advance_char(':');
                ParseWord();
                EatWS();
                if (at_char('*'))
                    ParseWord();
            }
            CompleteCurrentNode();
            EatWS();
        }
        private void ParseStackState()
        {
            CreateNewNode(NodeType.StackState);
            advance_char('(');
            EatWS();
            while (!at_char(')'))
            {
                ParseType();
                EatWS();
            }
            advance_char(')');
            CompleteCurrentNode();
            EatWS();
        }
        private void ParseFxnType()
        {
            CreateNewNode(NodeType.FxnType);
            ParseStackState();
            if (at_char('('))
            {
                ParseStackState();
                ParseWord(); // This is either "->" or "~>"
                EatWS();
                ParseStackState();
                ParseStackState();
            }
            else
            {
                ParseWord(); // This is either "->" or "~>"
                EatWS();
                ParseStackState();
            }
            CompleteCurrentNode();
            EatWS();
        }
        private void ParseNumber()
        {
            CreateNewNode(NodeType.Number);
            while (at_number())
            {
                goto_next();
            }
            CompleteCurrentNode();
        }
        private void ParseWord()
        {
            CreateNewNode(NodeType.Word);
            if (at_letter())
            {
                while (at_alphanum())
                    goto_next();
            }
            else if (at_symbol())
            {
                while (at_symbol())
                    goto_next();
            }
            CompleteCurrentNode();
        }
        private void ParseString()
        {
            CreateNewNode(NodeType.String);
            goto_next();
            while (!at_char('\"'))
                goto_next();
            goto_next();
            CompleteCurrentNode();
        }
        private void ParseTerm()
        {
            if (at_letter() || at_symbol())
            {
                ParseWord();
                EatWS();
            }
            else if (at_number())
            {
                ParseNumber();
                EatWS();
            }
            else if (at_char('"'))
            {
                ParseString();
                EatWS();
            }
            else {
                throw new Exception("Expected a term");
            }
        }
        private void Resume()
        {
            if (cur_node.node_type == NodeType.Quotation)
            {
                CompleteQuote();
            }
            else if (cur_node.node_type == NodeType.BraceGroup)
            {
                CompleteBraceGroup();
            }
            else
            {
                throw new Exception("only quotations and brace groups can be left in an incomplete state by the parser");
            }
        }
        private void ParseLoop()
        {
            while (!at_end())
            {
                if (at_char('['))
                {
                    ParseQuote();
                }
                else if (at_char(']'))
                {
                    return;
                }
                else if (at_char('('))
                {
                    ParseType();
                }
                else if (at_char(')'))
                {
                    return;
                }
                else if (at_char('{'))
                {
                    ParseBraceGroup();
                }
                else if (at_char('}'))
                {
                    return;
                }
                else if (at_char('\"'))
                {
                    ParseString();
                    EatWS();
                }
                else if (at_char('@'))
                {
                    CreateNewNode(NodeType.Reference);
                    goto_next();
                    EatWS();
                    ParseTerm();
                    CompleteCurrentNode();
                }
                else if (at_letter() || at_symbol())
                {
                    if (ParseText("define"))
                    {
                        CreateNewNode(NodeType.Definition);
                        cur_col += "define".Length;
                        EatWS();
                        ParseWord();
                        EatWS();
                        if (at_char(':'))
                        {
                            goto_next();
                            EatWS();
                            ParseType();
                            EatWS();
                        }
                        ParseBraceGroup();
                    }
                    else
                    {
                        ParseWord();
                        EatWS();
                    }
                }
                else if (at_space())
                {
                    EatWS();
                }
                else if (at_number())
                {
                    ParseNumber();
                    EatWS();
                }
                else if (at_char('/') && next_char_is('/'))
                {
                    CreateNewNode(NodeType.LineComment);
                    goto_next();
                    while (!at_end() && !at_newline())
                    {
                        goto_next();
                    }
                    CompleteCurrentNode();
                }
                else
                {
                    throw new Exception("failed to parse input string");
                }
            }
        }
        #endregion

        public int GetLine()
        {
            return cur_line;
        }

        public int GetCol()
        {
            return cur_col;
        }

        /// <summary>
        /// This function must be used instead of int.ParseInt due to bugs in the 
        /// current version of Mono.
        /// </summary>
        public static int StringToInt(string s)
        {
            int ret = 0;
            int nMul = 1;
            for (int i = s.Length - 1; i >= 0; --i)
            {
                char c = s[i];
                if ((c < '0') || (c > '9'))
                    throw new Exception("expected integer " + s);
                int nDigit = c - '0';
                ret += nDigit * nMul;
                nMul *= 10;
            }
            return ret;
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License


Written By
Software Developer Ara 3D
Canada Canada
I am the designer of the Plato programming language and I am the founder of Ara 3D. I can be reached via email at cdiggins@gmail.com

Comments and Discussions