Click here to Skip to main content
15,879,326 members
Articles / Artificial Intelligence

Building a Programing Language – Part I (Creating BrainLess)

Rate me:
Please Sign up or sign in to vote.
4.92/5 (25 votes)
11 Oct 2013LGPL317 min read 61.9K   600   114  
This is the first of a series of articles where we will explore about writing compilers.
#ifndef __BRAINLESS_LEXER__
#define __BRAINLESS_LEXER__
/*
Author : Shakti Misra
All are welcome to use these files, distribute, modify and release. But This notice should be included.
*/

/*This header contains declarations and definitions for class which are used for tokenizing the souce file*/
#include <string>
#include <stdio.h>
#include "Helper.h"
using namespace std;

/*All the tokens should be here. If some one is adding a new token, they have to include it in this token list.*/
enum enmTokens
{
    eRdAndStrVal = 0, //>>
    eDspCurVal = 1, //.
    eExclaimationMark = 2, //!
    eLeftSqrParen = 3, //[
    eRightSqrParen = 4, //]
    eIncrContent = 5, //++
    eDecrContent = 6, //--
    eAddWithNext = 7, //+*
    eSubNext = 8, //-*
    eIncrHead = 9, //*
    eDecrHead = 10, //<
    eIntNumber = 11, //any integer number
    eTokenOver = 12, //Represents that we have reached end of file.
    eUnknown //Any value out side of the above values.
};

/*tokens can have attributes, like a group of characters like "123" is a single number with the
value attribute of 123. This value is held by this class.*/
class TokenAttrib
{
private:
    int m_iVal; //This holds the value of the attribute, currently only supports integer number
    bool m_bAttribSet; //If this is set to "true" then only we know we need to look for m_iVal.

public:
    TokenAttrib():m_bAttribSet(false)//Set default to false
    {m_iVal = -666;}//Giving some absurd but easy to remember number. Helps in debugging.
    void setAttribVal(const int val)//Will set the attribute value and also the bools.
    {
        m_iVal = val;//Set the value
        m_bAttribSet = true;//Do not foreget to make it true.
    }

    int getAttribVal(){return m_iVal;}
    bool isAttribSet(){return m_bAttribSet;}//Check if the attribute is set. Return true if it is set
    void resetAttrib(){m_bAttribSet = false;}//Reset the value.
};

/*This class will get one by one characters from the source file and then
catagorize it according to any of the enmTokens groups.*/
class Lex
{
private:
    char m_szCurChar;
    char m_szPrevChar;
    const string m_sFileName;//Stores the source file name
    const bool m_bWriteTokenTofile;//If we want to create a token file for fun we can create it. Not used still
    string m_sTokenFile;//Token file name.
    enmTokens m_eCurToken;//Type of the current token that is scanned.
    FILE* m_pTokenfile;//Token file name
    FileRead m_fileReader;//This is the reader class. It will help us read the source file.
    TokenAttrib m_tokAttrib;//Store the attributes of the token.

private:
    char getCurrentChar(){return m_szCurChar;}
    
    char getNextChar()
    {
        m_szPrevChar = m_szCurChar;//First store the previous character.
        while(' ' == (m_szCurChar = m_fileReader.getNextChar()))
            ;//NULL body while. This look is to skip spaces.
        return m_szCurChar;
    }
    
    void openTokenFile()
    {
        if(NULL == m_pTokenfile)
            m_pTokenfile = fopen(m_sTokenFile.c_str(), "w");
    }

public:
    Lex(const string sFileName, const bool WriteTokenTofile = false /*Make it default to false.*/):
        m_sFileName(sFileName),
        m_szCurChar(0),
        m_szPrevChar(0),
        m_bWriteTokenTofile(WriteTokenTofile),
        m_eCurToken(eUnknown),
        m_fileReader(m_sFileName)
    {
        m_pTokenfile = NULL;

        if(true == m_bWriteTokenTofile)
        {
            setTokenFileName(m_sFileName);
            openTokenFile();
        }
    }

    ~Lex()
    {
        if(NULL != m_pTokenfile)
        {
            fclose(m_pTokenfile);
            m_pTokenfile = NULL;
        }
    }

    void setTokenFileName(string sFileName)
    {
        m_sTokenFile = sFileName + ".Token";
    }
    
    string getTokenFileName()
    {return m_sTokenFile;}

    enmTokens getNextToken();//Get the next token

    enmTokens getCurrentToken(){return m_eCurToken;}//Get the current token. We do not increment the token here.

    int getAttribVal()
    {return m_tokAttrib.getAttribVal();}//Get the token attribute value here.

};


#endif //__BRAINLESS_LEXER__

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU Lesser General Public License (LGPLv3)


Written By
Architect
India India
I like to explore different aspects of technology. Try new things, and get delighted. My interests are programming language, and Imaging. But its not hard to work on other things also. Algorithms delight me over a coffee break.

I basically code in C++, but JAVA is not so alien for me. I know few scripting languages also. Basically I feel that knowing a programing language is just a matter of getting introduced to it.

For my other articles check my blog on homepage:

http://brainlesslabs.com/

https://github.com/BrainlessLabsInc

http://www.luxrender.net/en_GB/authors_contributors - SMISRA

Comments and Discussions