/* ====================================================================
* Copyright (c) 2005 Franz Klein, franzaklein@yahoo.co.uk
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*/
// Parser.cpp: Implementierung der Klasse CParser.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "Parser.h"
#include "singleton.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Konstruktion/Destruktion
//////////////////////////////////////////////////////////////////////
CParser::CParser() : m_nlevel(0), m_binsidecomment(false), m_inif(0), m_bifelse(false), m_biselse(false)
{
parsedatalist = CSingleton<Data>::Instance();
empty_string = "";
empty_statement = "{...}";
m_inwhile = 0;
m_indo = 0;
m_ncaselevel = 0;
m_ncase = 0;
m_bcase = false;
m_inelse = 0;
}
CParser::~CParser()
{
}
//Call this to start the parsing
void CParser::Parse(string buffer, int size)
{
codebuffer << buffer;
if (GetLinesofCode(size))
{
location currentline = codelines.begin();
location returnline = NULL;
while (currentline != codelines.end())
{
//check each line for a conditional statement
returnline = ConditionalSwitcher(currentline);
if (returnline == currentline)
currentline++;
else
currentline = returnline;
}
m_nlevel = 0;
//Only add the end if there were earlier additions
if (parsedatalist->size() > 0)
AddParseData("}", empty_string);
}
}
//Retrieves line of code and places them in a vector data structure
bool CParser::GetLinesofCode(int size)
{
int length = 0;
int buffersize = size - length;
char* buffer = new char[size];
while (codebuffer.getline(buffer, buffersize))
{
string line(buffer);
length = line.length();
//Don't store any lines that are just comments
RemoveComments(line);
//or empty
RemoveWhiteSpace(line);
//or macros
RemoveMacros(line);
if (line != empty_string)
{
codelines.push_back(line);
}
//how much have we left to process
buffersize = buffersize - length;
delete[] buffer;
if (buffersize > 0)
buffer = new char[buffersize];
if (buffersize == 0)
buffer = NULL;
}
if (buffersize != 0)
{
delete[] buffer;
buffer = NULL;
}
return true;
}
//Removes both types of comments
void CParser::RemoveComments(string& textline)
{
if (textline.find("//") != string::npos)
{
string::size_type commentpos;
if (IsCPPComment(textline, commentpos))
{
textline = textline.substr(0, commentpos);
}
}
if (textline.find("/*") != string::npos)
RemoveCStyleComment(textline);
}
//Removes the carriage return from the end of the line of code
void CParser::RemoveEndline(string& textline)
{
string::size_type idx;
do
{
idx = textline.find("\r");
//only if found and at start of line
if (idx != string::npos)
{
string tempbuffer;
tempbuffer = textline.substr(0, idx);
textline = tempbuffer;
}
} while (idx != string::npos);
}
//This is to remove spaces from the front of the line of code
void CParser::RemoveSpaces(string& textline)
{
string tempbuffer;
//Remove spaces from the front
string::size_type idx;
do
{
idx = textline.find(" ");
//only if found and at start of line
if ((idx != string::npos) && (idx == 0))
{
tempbuffer = textline.substr(idx+1);
textline = tempbuffer;
}
} while ((idx != string::npos) && (idx == 0));
//Remove spaces from the back
do
{
idx = textline.rfind(" ");
if (idx != string::npos)
{
if (idx == textline.length()-1)
{
tempbuffer = textline.substr(0, idx);
textline = tempbuffer;
}
else
idx = string::npos;
}
} while ((idx != string::npos) || (idx < textline.length()));
}
//This is to remove tabs from the front of the line of code
void CParser::RemoveTabs(string& textline)
{
string::size_type idx;
do
{
idx = textline.find("\t");
//only if found and at start of line
if ((idx != string::npos) && (idx == 0))
{
string tempbuffer;
tempbuffer = textline.substr(idx+1);
textline = tempbuffer;
}
// else
// break;
} while ((idx != string::npos) && (idx == 0));
}
//Removes various forms of whitespace from the front and back
//of a line of code
void CParser::RemoveWhiteSpace(string& textline)
{
RemoveTabs(textline);
RemoveEndline(textline);
RemoveSpaces(textline);
}
//Switches to the different parsers depending on the conditional statement
location CParser::ConditionalSwitcher(location input)
{
string text(*input);
assert(text.c_str());
if (input == codelines.end())
return input;
string searchline = *input;
if (m_dictionary.Search(searchline, "if"))
{
if (IsIfElseStatement(input))
return IfElseParser(input);
else
return IfParser(input);
}
if (m_dictionary.Search(searchline, "while"))
return WhileParser(input);
if (m_dictionary.Search(searchline, "do"))
return DoParser(input);
if (m_dictionary.Search(searchline, "switch"))
return SwitchParser(input);
return input;
}
//Determines if the 'if' statement has a corresponding 'else' part
bool CParser::IsIfElseStatement(location input)
{
//input currently contains the 'if' keyword
assert(m_dictionary.Search(*input, "if"));
int currentlevel;
currentlevel = m_nlevel;
location currentline = input;
if (IsCompoundStatement(currentline))
{
location end = NULL;
end = FindEndCompoundStatement(currentline);
//end should point to the line with '}' in it
if (m_dictionary.Search(*end, "else"))
{
m_bifelse = true;
return true;
}
else
{
end++;
if (end != codelines.end())
if (m_dictionary.Search(*end, "else"))
{
m_bifelse = true;
return true;
}
}
}
else
{
//not a compound statement so just a single statement after
//the 'if' statement
currentline++;
currentline++; //skip the statement
if (currentline != codelines.end())
//we should now be at the 'else' statement if there is one
if (m_dictionary.Search(*currentline, "else"))
return true;
}
return false;
}
//Parses an 'if' simple or compound statement
// returns the statement following the 'if' statement
location CParser::IfParser(location input)
{
//input contains the 'if' keyword
assert(m_dictionary.Search(*input, "if"));
string ifword = "if";
AddParseData(*input, ifword);
m_bcase = false;
m_bifelse = false;
input++;
m_inif++;
location result = ConditionalParser(input);
if (result == input)
result++;
m_inif--;
return result;
}
//determines if the current line of code contains a C++ style comment
bool CParser::IsCPPComment(string& textline, string::size_type& pos)
{
string::size_type location = textline.find("//");
if (location != string::npos)
{
string::size_type stringstart = textline.find("\"");
string::size_type stringend = textline.find("\"", stringstart+1);
//comment inside string
if ((location > stringstart) && (location < stringend))
return false;
pos = location;
return true;
}
return false;
}
//See if the line of code contains the start of a C-style comment
// returns true if found. Also returns the position of the '/*'
bool CParser::IsStartCStyleComment(string& textline, string::size_type& pos)
{
string::size_type idx = textline.find("/*");
if (idx != string::npos)//m_dictionary.Search(textline, "/*", idx))
{
string::size_type stringstart = textline.find("\"");
string::size_type stringend = textline.find("\"", stringstart+1);
//comment inside string
if ((idx > stringstart) && (idx < stringend))
return false;
m_binsidecomment = true;
pos = idx;
return true;
}
return false;
}
//See if the line of code contains the end of a C-style comment
// returns true if found. Also returns the position of the '*/'
bool CParser::IsEndCStyleComment(string& textline, string::size_type& pos)
{
string::size_type idx = textline.find("*/");
if (idx != string::npos)//m_dictionary.Search(textline, "*/", idx))
{
string::size_type stringstart = textline.find("\"");
string::size_type stringend = textline.find("\"", stringstart+1);
//comment inside string
if ((idx > stringstart) && (idx < stringend))
return false;
m_binsidecomment = false;
pos = idx+3;
return true;
}
return false;
}
//Parsers a 'do' simple or compound statement
location CParser::DoParser(location input)
{
//input should contain a 'do' statement
assert(m_dictionary.Search(*input, "do"));
m_indo++;
string doword = "do";
location dostart = input;
location returnline;
if (IsCompoundStatement(dostart))
{
location doend = FindEndCompoundStatement(dostart);
m_nlevel++;
if (IsEmptyCompoundStatement(dostart, doend))
{
//no nested conditionals
AddParseData(empty_statement, doword);
m_bcase = false;
m_bifelse = false;
m_indo--;
//endline should now contain the 'while' statement
assert(string::npos != doend->find("while"));
RemoveCBrace(*doend);
RemoveSpaces(*doend);
//add the 'while' part of 'do'
AddParseData(*doend, doword);
m_nlevel--;
return ++doend;
}
else
{
//search for nested conditionals
while (dostart != doend)
{
returnline = ConditionalSwitcher(dostart);
if (dostart == returnline)
dostart++;
else
dostart = returnline;
}
AddParseData(*dostart, empty_string);
m_bcase = false;
m_nlevel--;
m_indo--;
}
}
else
{
m_nlevel++;
//it is expected that the 'do' statement is found on its own line
dostart++;
AddParseData(*dostart, doword);
m_indo--;
m_bifelse = false;
dostart++;
RemoveCBrace(*dostart);
AddParseData(*dostart, empty_string);
m_nlevel--;
}
return ++dostart;
}
//Parses 'while' simple or compound statements
//returns a pointer to the end of the statement
location CParser::WhileParser(location input)
{
//input contains the 'while' statement
assert(m_dictionary.Search(*input, "while"));
string whileword = "while";
AddParseData(*input, whileword);
m_bcase = false;
m_inwhile++;
m_bifelse = false;
location result = ConditionalParser(++input);
m_inwhile--;
return result;
}
//Parsers a 'switch' compound statement and the contained 'case' and 'default' statements
//returns a pointer to the statement of the 'switch' compound statement
location CParser::SwitchParser(location input)
{
//input should contain the 'switch' keyword
location startline = input;
string switchword = "switch";
AddParseData(*input, switchword);
//the following line isn't really necessary but at least it finds
//the start of the compound statement after the switch statement
IsCompoundStatement(startline);
location endline = NULL;
endline = FindEndCompoundStatement(startline);
m_nlevel++;
while (startline != endline)
{
//now parse each simple or compound 'case'/'default' statement
if ((string::npos != startline->find("case")) || (string::npos != startline->find("default")))
{
//first see if there are other statements on the
//same line as the 'case x :' statements
RemoveCase(*startline);
RemoveSpaces(*startline);
if (*startline == empty_string)
startline++;
location returnline = CaseParser(startline);
if (startline == returnline)
startline++;
else
startline = returnline;
}
else
startline++;
}
m_ncase = 0;
m_nlevel--;
return startline;
}
//determines whether the statement following the conditional statement is
//a compound statement or not
// returns true if it is
bool CParser::IsCompoundStatement(location& start)
{
//input contains the conditional keyword e.g. 'if'
//programmer may have written either
//if () {
// or
//if ()
//{
if (m_dictionary.Search(*start, "{"))
return true;
else
{
start++;
if (start != codelines.end())
{
if (m_dictionary.Search(*start, "{"))
return true;
else
start--;
}
else
start--;
}
return false;
}
//determines if a compound statement is empty, i.e. it doesn't
//contain any conditional statements
// returns true if it is
bool CParser::IsEmptyCompoundStatement(location start, location end)
{
while (start != end)
{
if (HasWord(start))
return false;
start++;
}
return true;
}
//determines if the current line of code has a conditional statement
// returns true if the line has
bool CParser::HasWord(location currentline)
{
if (m_dictionary.Search(*currentline, "if"))
return true;
if (m_dictionary.Search(*currentline, "while"))
return true;
if (m_dictionary.Search(*currentline, "do"))
return true;
if (m_dictionary.Search(*currentline, "switch"))
return true;
return false;
}
//removes c-style comments even if they extend across many lines
void CParser::RemoveCStyleComment(string& textline)
{
if (!m_binsidecomment)
{
string::size_type startpos = string::npos;
if (IsStartCStyleComment(textline, startpos))
{
string::size_type endpos = string::npos;
if (IsEndCStyleComment(textline, endpos))
{
string prefix = textline.substr(0, startpos);
string suffix = textline.substr(endpos);
textline = prefix + suffix;
}
else
textline = textline.substr(0, startpos);
}
}
else //check where the comment ends
{
string::size_type pos = string::npos;
if (IsEndCStyleComment(textline, pos))
textline = textline.substr(pos);
else
textline = empty_string;
}
}
//Parses 'else' simple or compound statements
// returns that statement following the 'else' statement
location CParser::ElseParser(location input)
{
//input should contain the 'else' word
assert(m_dictionary.Search(*input, "else"));
RemoveElse(*input);
if (*input == "")
input++;
m_inelse++;
m_biselse = true;
location result = ConditionalParser(input);
m_inelse--;
return result;
}
void CParser::AddParseData(string codeline, string& word)
{
Data codedata;
codedata.label = codeline;
codedata.word = word;
codedata.level = m_nlevel;
codedata.inif = m_inif;
codedata.inwhile = m_inwhile;
if (m_bifelse)
codedata.ifelse = true;
if (m_biselse)
codedata.iselse = true;
m_biselse = false;
codedata.indo = m_indo;
codedata.incase = m_bcase;
codedata.inelse = m_inelse;
codedata.casenum = m_ncase;
parsedatalist->push_back(codedata);
}
//Removes either the 'case' or 'default' words up until the colon
//from a line of code
bool CParser::RemoveCase(string& codeline)
{
string::size_type colonidx = codeline.find(":");
if (colonidx != string::npos)
{
codeline = codeline.substr(colonidx+1);
return true;
}
return false;
}
//Removes a '}' from the current line of code
void CParser::RemoveCBrace(string& codeline)
{
string::size_type braceidx = codeline.find("}");
if (braceidx != string::npos)
codeline = codeline.substr(braceidx+1);
}
//Removes a '{' from the current line of code
void CParser::RemoveOBrace(string& codeline)
{
string::size_type braceidx = codeline.find("{");
if (braceidx != string::npos)
codeline = codeline.substr(braceidx+1);
}
//Searches for the end line of a compound statement.
//the end is indicated by a '}'. There may be a number of nested
//compound statements so use the nesting levels.
// returns end of compound statement or the last line of the code
location CParser::FindEndCompoundStatement(location input)
{
//input must contain '{'
assert(m_dictionary.Search(*input, "{"));
location current_line = input;
int level = m_nlevel;
level++;
while (/*(!m_dictionary.Search(*current_line, "}")) ||*/ (level > m_nlevel))
{
current_line++;
//Check for nested compound statements
if (m_dictionary.Search(*current_line, "{"))
level++;
if (m_dictionary.Search(*current_line, "}"))
level--;
}
return current_line;
}
//Parse an if/else statement
location CParser::IfElseParser(location input)
{
//First the 'if' part
location output = NULL;
m_bifelse = true;
output = IfParser(input);
m_bifelse = false;
//then the 'else' part
location result = ElseParser(output);
return result;
}
//General parser for 'if', 'else' and 'while' statements
location CParser::ConditionalParser(location input)
{
if (input == codelines.end())
return input;
location start = input;
location returnline = NULL;
//statements after 'if', 'else' or 'while' statement
//are surrounded by braces
if (IsCompoundStatement(start))
{
location end = NULL;
end = FindEndCompoundStatement(start);
m_nlevel++;
//no nested conditionals
if (IsEmptyCompoundStatement(start, end))
{
AddParseData(empty_statement, empty_string);
m_nlevel--;
m_bifelse = false;
return ++end;
}
else //contains nested conditional statements
{
while (start != end)
{
returnline = ConditionalSwitcher(start);
if (returnline == start)
start++;
else
start = returnline;
}
m_nlevel--;
return ++end;
}
}
else //just a simple statement follows the conditional statement
{
//start should automatically point to the statement
//following the conditional statement
m_nlevel++;
returnline = ConditionalSwitcher(start);
if (returnline == start)
{
AddParseData(empty_statement, empty_string);
m_bifelse = false;
}
else
start = returnline;
m_nlevel--;
}
return start;
}
//Parses both 'case' and 'default' simple or compound
//statements
location CParser::CaseParser(location input)
{
//input should contain either the 'case' or 'default'
//keyword
location startline = input;
string caseword = "case";
//a 'case' statement comes to the end when either a 'break'
//statement, a '}'or another 'case' or 'default' statement is
//found
location returnline = NULL;
//record how many 'case' there are in 'switch' statement
m_ncase++;
if (IsCompoundStatement(startline))
{
//'case' or 'default' statement is delimited by { and }
//now find the '}'
location endline = FindEndCompoundStatement(startline);
if (IsEmptyCompoundStatement(startline, endline))
{
m_nlevel++;
//doesn't have any nested conditionals
m_bcase = true;
AddParseData(empty_statement, caseword);
m_bcase = false;
m_nlevel--;
}
else
{
m_nlevel++;
m_bcase = true;
//contains nested conditionals
m_ncaselevel = m_nlevel;
while (startline != endline)
{
returnline = ConditionalSwitcher(startline);
if (returnline == startline)
startline++; //no conditional - move on to next line
else
startline = returnline;
}
m_bcase = false;
m_nlevel--;
}
}
else //no braces
{
bool casefound = false;
bool defaultfound = false;
bool breakfound = false;
m_nlevel++;
bool empty = true;
do
{
m_bcase = true;
returnline = ConditionalSwitcher(startline);
if (startline == returnline)
{
startline++; //no nested conditional - move on to next line
if (m_dictionary.Search(*startline, "}"))
//this 'case' just has a single statement
//no need to continue
break;
}
else
{
startline = returnline;
//there is a conditional so we can't represent this case
//statement as '{...}'
empty = false;
}
m_bcase = false;
//find the end of current 'case' statement - indicated by a 'break' statement,
//or the next 'case', 'default' statement
(startline->find("case") == string::npos) ? casefound = false : casefound = true;
(startline->find("default") == string::npos) ? defaultfound = false : defaultfound = true;
(startline->find("break") == string::npos) ? breakfound = false : breakfound = true;
} while (!casefound && !defaultfound && !breakfound);
if (empty)
{
m_bcase = true;
AddParseData(empty_statement, caseword);
m_bcase = false;
}
m_nlevel--;
}
//this should point to the last statement in the compound 'case' statement
//or the next 'case' statement
return startline;
}
//Removes the word 'do' from the line of code
void CParser::RemoveDo(string& codeline)
{
string::size_type doidx = codeline.find("do");
if (doidx != string::npos)
{
//it is taken 'do' is first
string temp = codeline.substr(doidx+2);
codeline = temp;
}
}
//Removes the word 'else' from the line of code
void CParser::RemoveElse(string& codeline)
{
string::size_type elseidx = codeline.find("else");
if (elseidx != string::npos)
{
//it is taken 'else' is first
string temp = codeline.substr(elseidx+4); //4 = length of 'else'
codeline = temp;
}
}
void CParser::RemoveMacros(string& textline)
{
string::size_type macroidx = textline.find("#");
if (macroidx != string::npos)
{
string temp = textline.substr(0, macroidx);
textline = temp;
}
}
//Used to parse compound statements that contain conditionals
location CParser::ParseCompoundStatement(location first_statement)
{
assert(first_statement != NULL);
location result = ConditionalSwitcher(first_statement);
if (result == first_statement)
result++;
// else
// first_statement = result;
return result;
}