Click here to Skip to main content
15,881,173 members
Articles / Programming Languages / C#

Predictive Parser to generate syntax tree and an Intermediate code for assignment satement

Rate me:
Please Sign up or sign in to vote.
4.61/5 (16 votes)
28 Apr 2007CPOL6 min read 110.7K   3.7K   42  
This project is a compiler program that translate assignment statement into an intermediate code .
/*************************************************
 * Khalid Ahmed                                  *
 * Parser for expression to generate parse tree  *
 * and syntax tree and three-address-code        *
 * 11/13/2006                                    *
 *************************************************/
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;

namespace ParserExpression
{
    public partial class Form1 : Form
    {

        Node[] SyntaxTree = new Node[30];
        int SymbolTablePTR = -1;
        int NodeZeroPtr;
        int NodeOnePtr;
        //
        string inputStream;
        int index = 0;
        const int NUM = 259;// token code for number
        const int NONE = -1;
        const int ID = 257;// token code for number
        string Lexbuf;
        TokenUnit token = new TokenUnit();
        SymbT[] SymbolTable = new SymbT[20];
        int Pst = 0;
        int t;
        bool Synerror = false; // flag to detrimine the syntax error
        bool Lexerror = false;//flag to detrimine the Lexical error
        //
        int lookahead;
        int count = -1;
        string Splace, Eplace, EdashPlace, Tplace, TdashPlace, Fplace, F1Place;// variables used to generate three address code
        FileStream fs;
        StreamWriter sw;

        public Form1()
        {
            InitializeComponent();

        }

        // part 1 ( Lexical analyzer )
        private TokenUnit nextToken()
        {
            Lexbuf = null;
            TokenUnit token = new TokenUnit();
            while (index < inputStream.Length)
            {
                t = (int)inputStream[index];

                //  white space
                if (t == ' ' || t == '\t' || t == '\n' || t == '\r')
                {
                    index++;
                    continue;
                }
                // Numbers
                else if (isdigit(t))
                {
                    while (isdigit(t))
                    {
                        Lexbuf += inputStream[index];
                        index++;
                        if (index == inputStream.Length) break;
                        t = (int)inputStream[index];
                    }
                    token.attribute = Int32.Parse(Lexbuf);
                    token.code = NUM;
                    return token;
                }
                // Identifier 
                else if (isletter(t))
                {
                    // next ch either character or digit
                    while (isletter(t) || isdigit(t))
                    {
                        Lexbuf += inputStream[index];
                        index++;
                        if (index >= inputStream.Length) break;
                        t = (int)inputStream[index];
                    }
                    int p = LookUp(Lexbuf);
                    if (p == 0) Pst = Insert(Lexbuf, ID);
                    else Pst = p;
                    token.attribute = Pst;
                    token.code = SymbolTable[Pst].code;
                    return token;
                }
                // operator sign
                else if (t == '=' || t == '+' || t == '-' || t == '*' || t == '/' || t == '(' || t == ')')
                {
                    index++;
                    token.attribute = 0;
                    token.code = t;
                    return token;
                }
                // otherwise Lexical error
                else
                {
                    Lexerror = true;
                    error();
                    token.attribute = NONE;
                    token.code = t;
                    return token;
                }
            }
            return token;
        }
        private bool isdigit(int t)
        {
            if (t >= '0' && t <= '9')
                return true;
            return false;
        }
        private bool isletter(int t)
        {
            if ((t >= 'A' && t <= 'Z') || (t >= 'a' && t <= 'z'))
                return true;
            return false;
        }
        // these function is to look up on the symbol table
        private int LookUp(string lexe)
        {
            for (int i = 0; i < 20; i++)
            {
                if (lexe == SymbolTable[i].lexeme)
                    return i;
            }
            return 0;
        }
        // this function to insert a symbol on the symbol table
        private int Insert(string lexe, int tcode)
        {
            Pst++;
            SymbolTable[Pst].lexeme = lexe;
            SymbolTable[Pst].code = tcode;
            return Pst;
        }
        //End of part 1 ( Lexical analyzer or scanner)

        // part 2  ( Parser )
        // these parser  generate both syntax tree and three-address-code for assignment statement

        /*  Grammar for an expression
         * 
         * S--> id = E
         * E--> T E'
         * E'--> + T E' | - T E' | $
         * T --> F T'
         * T' --> * F T' | / F T' | $
         * F --> ( E ) | id
         * 
         * */
        // Production S in grammar
        private int stmt()
        {
            int sPtr, ePtr, idPtr, idPlace;
            lookahead = token.code;
            if (lookahead == ID)
            {
                idPlace = token.attribute;
                match(ID);
                match('=');
                ePtr = expr();
                idPtr = mKnode(ID, idPlace);
                sPtr = mKnode('=', idPtr, ePtr);
                // Three Address Code
                Splace = newTemp();
                emit(Splace, "=", SymbolTable[idPlace].lexeme, "", "");
                emit(SymbolTable[idPlace].lexeme, "=", Eplace, "", "");

                return sPtr;
            }
            else error();
            return -1;
        }
        //Production E in grammar
        private int expr()
        {
            int ePtr, edPtr, tPtr;
            lookahead = token.code;
            if (lookahead == ID || lookahead == NUM || lookahead == '(' || lookahead == '-')
            {
                tPtr = term();
                edPtr = edash();
                ePtr = mKnode('+', tPtr, edPtr);
                // Three Address Code
                Eplace = newTemp();
                Tplace = "temp" + tPtr;
                EdashPlace = "temp" + edPtr;
                emit(Eplace, "=", Tplace, "+", EdashPlace);
                return ePtr;
            }
            else error();
            return -1;
        }
        //Production E' in grammar
        private int edash()
        {
            int edPtr, ed1Ptr, tPtr;
            lookahead = token.code;
            string Edash1place;
            if (lookahead == '+')
            {
                match('+');
                tPtr = term();
                ed1Ptr = edash();
                edPtr = mKnode('+', ed1Ptr, tPtr);
                // Three Address Code
                Edash1place = "temp" + ed1Ptr;
                Tplace = "temp" + tPtr;
                EdashPlace = newTemp();
                emit(EdashPlace, "=", Edash1place, "+", Tplace);
            }
            else if (lookahead == '-')
            {
                match('-');
                tPtr = term();
                ed1Ptr = edash();
                edPtr = mKnode('-', ed1Ptr, tPtr);
                // Three Address Code
                Edash1place = "temp" + ed1Ptr;
                Tplace = "temp" + tPtr;
                EdashPlace = newTemp();
                emit(EdashPlace, "=", Edash1place, "-", Tplace);
            }
            else
            {
                edPtr = NodeZeroPtr;
                // Three Address Code
                Edash1place = "temp0";
            }
            return edPtr;
        }
        //Production T in grammar
        private int term()
        {
            int tPtr, tdPtr, fPtr;
            lookahead = token.code;
            if (lookahead == ID || lookahead == NUM || lookahead == '(' || lookahead == '-')
            {
                fPtr = factor();
                tdPtr = tdash();
                tPtr = mKnode('*', fPtr, tdPtr);
                // Three Address Code
                Fplace = "temp" + fPtr;
                TdashPlace = "temp" + tdPtr;
                Tplace = newTemp();
                emit(Tplace, "=", Fplace, "*", TdashPlace);

                return tPtr;
            }
            else error();
            return -1;
        }
        // Production T' in grammar
        private int tdash()
        {
            int tdPtr, td1Ptr, fPtr;
            lookahead = token.code;
            string Tdash1Place;
            if (lookahead == '*')
            {
                match('*');
                fPtr = factor();
                td1Ptr = tdash();
                tdPtr = mKnode('*', td1Ptr, fPtr);
                // Three Address Code
                Tdash1Place = "temp" + td1Ptr; ;
                Fplace = "temp" + fPtr;
                TdashPlace = newTemp();
                emit(TdashPlace, "=", Tdash1Place, "*", Fplace);

                return tdPtr;
            }
            else if (lookahead == '/')
            {
                match('/');
                fPtr = factor();
                td1Ptr = tdash();
                tdPtr = mKnode('/', td1Ptr, fPtr);
                // Three Address Code
                Tdash1Place = "temp" + td1Ptr;
                Fplace = "temp" + fPtr;
                TdashPlace = newTemp();
                emit(TdashPlace, "=", Tdash1Place, "/", Fplace);

                return tdPtr;
            }
            else
            {
                tdPtr = NodeOnePtr;
                TdashPlace = "temp1";
            }
            return tdPtr;
        }
        //Production F in grammar
        private int factor()
        {
            int fPtr = -1; int f1Ptr = -1;
            int ePtr, idPlace, numValue;
            lookahead = token.code;
            if (lookahead == ID)
            {
                idPlace = token.attribute;
                match(ID);
                fPtr = mKnode(ID, idPlace);
                // Three Address Code
                Fplace = newTemp();
                emit(Fplace, "=", SymbolTable[idPlace].lexeme, "", "");

            }
            else if (lookahead == NUM)
            {
                numValue = token.attribute;
                match(NUM);
                fPtr = mKnode(NUM, numValue);
                // Three Address Code
                Fplace = newTemp();
                emit(Fplace, "=", numValue.ToString(), "", "");
            }
            else if (lookahead == '(')
            {
                match('(');
                ePtr = expr();
                match(')');
                fPtr = ePtr;
                // Three Address Code
                Fplace = Eplace;
            }
            else if (lookahead == '-')
            {
                match('-');
                f1Ptr = factor();
                fPtr = mKnode('-', f1Ptr);
                // Three Address Code
                F1Place = "temp" + f1Ptr;
                Fplace = newTemp();
                emit(Fplace, "=", "-", F1Place, "");
            }
            else error();
            return fPtr;
        }
        private void match(int t)
        {
            if (token.code == t)
                token = nextToken();
            else error();
        }

        private void button1_Click(object sender, EventArgs e)
        {
            string filename = textBoxFName.Text;
            try
            {
                fs = new FileStream(filename, FileMode.Create, FileAccess.Write);
                sw = new StreamWriter(fs);

                inputStream = textBox1.Text;
                NodeZeroPtr = mKnode(NUM, 0);
                emit(newTemp(), "=", "0", "", "");
                NodeOnePtr = mKnode(NUM, 1);
                emit(newTemp(), "=", "1", "", "");

                token = nextToken();
                lookahead = token.code;
                int sPtr = stmt();
                sw.Close();
                fs.Close();
                printSyntaxTree();
            }
            catch (Exception exc)
            {
                MessageBox.Show(exc.Message, "Error");
            }
        }
        // make node of syntax tree
        public int mKnode(int lable, int left, int right)
        {
            SymbolTablePTR++;
            SyntaxTree[SymbolTablePTR].Lable = lable;
            SyntaxTree[SymbolTablePTR].LPtr = left;// left pointer
            SyntaxTree[SymbolTablePTR].RPtr = right;// right pointer
            return SymbolTablePTR;
        }
        // make leafe of syntax tree
        public int mKnode(int lable, int pointer)
        {
            SymbolTablePTR++;
            SyntaxTree[SymbolTablePTR].Lable = lable;
            SyntaxTree[SymbolTablePTR].LPtr = pointer;// right pointer
            SyntaxTree[SymbolTablePTR].RPtr = -1; // ignore the right pointer
            return SymbolTablePTR;
        }
        // Display Errors
        private void error()
        {
            if (Lexerror)
            {
                MessageBox.Show("Lexical Error : Invalid token \r\n rewrite correctly an expression", "Lexical Error");
                Lexerror = false;
                return;
            }
            Synerror = true;
            MessageBox.Show("Syntax Error : Invalid Expression  \r\n rewrite correctly an expression", "Syntax Error");
        }
        // Display syntax tree
        void printSyntaxTree()
        {
            if (Synerror || Lexerror) return;
            textBox2.Text = "\t" + "Lable" + "\t" + "LPTR" + "\t" + "RPTR" + "\r\n";
            for (int i = 0; i <= SymbolTablePTR; i++)
            {
                if (SyntaxTree[i].Lable == ID)
                {
                    if (SyntaxTree[i].RPtr == -1)
                        textBox2.Text += "\r\n" + i + "\t" + "ID" + "\t" + SymbolTable[SyntaxTree[i].LPtr].lexeme;
                    else
                        textBox2.Text += "\r\n" + i + "\t" + "ID" + "\t" + SymbolTable[SyntaxTree[i].LPtr].lexeme + "\t" + SyntaxTree[i].RPtr;
                }
                else if (SyntaxTree[i].Lable == NUM)
                {
                    if (SyntaxTree[i].RPtr == -1)
                        textBox2.Text += "\r\n" + i + "\t" + "NUM" + "\t" + SyntaxTree[i].LPtr;
                    else
                        textBox2.Text += "\r\n" + i + "\t" + "NUM" + "\t" + SyntaxTree[i].LPtr + "\t" + SyntaxTree[i].RPtr;
                }
                else if (SyntaxTree[i].Lable == '-')
                    textBox2.Text += "\r\n" + i + "\t" + "uminus" + "\t" + SyntaxTree[i].LPtr;
                else
                    textBox2.Text += "\r\n" + i + "\t" + (char)SyntaxTree[i].Lable + "\t" + SyntaxTree[i].LPtr + "\t" + SyntaxTree[i].RPtr;
            }
            MessageBox.Show("Go to the file at\r\n" + fs.Name + "\r\nto see the three-address-code", "Parser successful");
        }
        // Generate temporaries for three-address-code
        public string newTemp()
        {
            count++;
            return "temp" + count;
        }
        // write three-address-code to disk
        public void emit(string str1, string str2, string str3, string str4, string str5)
        {
            if (Lexerror || Synerror) return;
            sw.WriteLine(str1 + str2 + str3 + str4 + str5);
        }
        // end of part 2 ( Parser )
    }
    public struct Node
    {
        public int Lable;
        public int RPtr;
        public int LPtr;
    }
    public class TokenUnit
    {
        public int attribute;
        public int code;
    }
    public struct SymbT
    {
        public string lexeme;
        public int code;
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer
Saudi Arabia Saudi Arabia
I am a software developer in Saudi Arabia. I’m interesting in learning new technologies including .NET technology, Natural Language Processing, System Programming, Network Security, and website Development. My favorite programming language is C#.

In my free time I enjoy reading books in the English language, and spending time with watching news.

Comments and Discussions