Click here to Skip to main content
15,892,927 members
Articles / Programming Languages / C++

HexEdit - Window Binary File Editor

Rate me:
Please Sign up or sign in to vote.
4.96/5 (137 votes)
17 Oct 2012MIT45 min read 496.3K   22.4K   321  
Open-source hex editor with powerful binary templates
// TParser.cpp - parse C/C++/C# code for template data declarations
//
// Copyright (c) 2011 by Andrew W. Phillips.
//
// This file is distributed under the MIT license, which basically says
// you can do what you want with it but I take no responsibility for bugs.
// See http://www.opensource.org/licenses/mit-license.php for full details.
//

#include "stdafx.h"

#include <vector>
#include <map>
#include <algorithm>
#include <shlwapi.h>    // for PathFindOnPath

#include "hexedit.h"
#include "TParser.h"
#include "DFFDMisc.h"

// This function is the sole reason we derived a new class from TExpr - it allows
// the user to use symbols (with constant values) in a constant expression.
expr_eval::value_t TExpr::find_symbol(const char *sym, value_t parent, size_t index, int *pac,
	__int64 &sym_size, __int64 &sym_address, CString &sym_str)
{
	// We can ignore index, pac, sym_address and just check that parent is TYPE_NONE
	// We only return a valid sym_size if "sym" is in the type list which is
	// just used for getting size of types (eg "sizeof(int)").

	value_t retval;
	retval.int64 = 0;
	if (parent.typ != TYPE_NONE)
		return retval;

	sym_address = 0;                    // Put something here

	// Do macro (#define) substitutions first
	CString subst = sym;
	bool replaced = false;
	while (ptp_->pp_def_.find(subst) != ptp_->pp_def_.end())
	{
		replaced = true;
		subst = ptp_->pp_def_[subst];
		if (subst == sym)        // Subst back to original (avoid inf loop)
			break;
	}
	sym = subst;

	std::map<CString, __int64>::const_iterator pval;

	if (replaced)
	{
		// We end up with a number after #define substitutions
		sym_size = 8;
		retval.typ = TYPE_INT;
		retval.int64 = _atoi64(sym);
	}
	else if ((pval = ptp_->custom_consts_.find(sym)) != ptp_->custom_consts_.end())
	{
		// Found the value in list of constants
		sym_size = 8;
		retval.typ = TYPE_INT;
		retval.int64 = pval->second;
	}
	else if (ptp_->check_values_win_ && (pval = ptp_->win_consts_.find(sym)) != ptp_->win_consts_.end())
	{
		// Found the value in list of Windows constants
		sym_size = 8;
		retval.typ = TYPE_INT;
		retval.int64 = pval->second;
	}
	else if ((sym_size = ptp_->get_size(sym)) != -1)
	{
		// Signal that size is valid (although retval.int64 = 0 since we don't have a value)
		// This is for things like sizeof(type) which is allowed in constant expressions.
		retval.typ = TYPE_INT;
	}
	// else retval.type == TYPE_NONE which indicates that the symbol is not known

	return retval;
}

// Notes on padding (pack_):
// Parsed structures will be padded with dummy elements to conform to the current packing value.
// [Dummy elements added have type "none" and a name of the form "Fill$<n>" where n starts from zero - obsolete]
// The current packing value is obtained from packing_default_, but may be changed with #pragma pack() directives.
// The pack_ vector stores a stack of pack values the top (pack_.back()) value being the current value.
// (The stack is necessary to implement the push/pop options of #pragma pack().)
// Padding is added so that a field is aligned according to the smaller of its requirements & the current pack_ value.
// Padding may also be added at the end (according to elt with largest packing) to make sure all array elts are OK.
// Most DATA types have packing requirements just equal to their size (except "none" and "string").
// - eg for 32 bit int it will be aligned on 4 byte boundary as long as pack_ >= 4 else it is aligned on pack_ boundary.
// STRUCT packing requirements = largest packing of all contained elements (not > pack_ value in effect when parsed).
// - eg if STRUCT's largest elt is 32 bit int then STRUCT packing is 4 OR pack_ (if pack_ < 4).
// We have to save this for every STRUCT that we parse (saved in XML "pack" attr) since we don't know what pack_ value
// was in effect when the STRUCT was parsed - this ensures a STRUCT is aligned properly when used in another STRUCT etc.

TParser::TParser()
{
	ptree_ = NULL;
	pexpr_ = NULL;

	dec_point_ = theApp.dec_point_;
	packing_default_ = 4;

	base_storage_unit_ = false;
	check_values_win_ = check_values_custom_ = save_values_custom_ = false;
	check_std_ = check_custom_ = check_win_ = check_common_ = save_custom_ = false;
	search_include_ = false;
}

static const char * anon_type_name      = "unnamed$";         // temp name used to store unnamed struct/union/enum
static const char * ptr_type_name       = "pointer$";         // name of integer type to store a pointer
static const char * func_ptr_type_name  = "function_pointer$";// integer type to store a function pointer
static const char * enum_type_name      = "enum$";            // integer type that stores enums (usually same as int)
static const char * bitfield_type_name  = "bit_field$";       // underlying storage unit for bit-fields (usually same as int)

CXmlTree::CFrag TParser::Parse(CXmlTree *ptree, LPCTSTR str)
{
	ptree_ = ptree;
	pexpr_ = new TExpr(this);

	text_.clear();
	next_.clear();
	pp_nesting_.clear();
	filename_.clear();
	line_no_.clear();
	text_.push_back(CString(str));
	next_.push_back(text_.back().GetBuffer(0));
	pp_nesting_.push_back(0);
	filename_.push_back(CString());
	line_no_.push_back(0);

	pp_ = "";

	pack_.clear();
	pack_.push_back(packing_default_);
	//pad_count_ = 0;
	pp_def_.clear();
	win_consts_.clear();
	custom_consts_.clear();

	// Add some predefined macros
	time_t now = time(NULL);
	char * timestr = asctime(localtime(&now));
	CString tmp;
	tmp.Format("%3.3s %2.2s %4.4s", timestr+4, timestr+8, timestr+20);
	pp_def_[CString("__DATE__")] = tmp;                       // current date
	tmp.Format("%8.8s", timestr+11);
	pp_def_[CString("__TIME__")] = tmp;                       // current time
	pp_def_[CString("__TIMESTAMP__")] = CString(timestr, 24);

	pp_def_[CString("__STDC__")] = CString("1");              // indicate that we can handle standard C
	pp_def_[CString("__cplusplus")] = CString("1");           // indicate that we can handle C++
	//pp_def_[CString("_MSC_VER")] = CString("1300");
	//pp_def_[CString("_WIN32")] = CString("1");
	pp_def_[CString("defined:1")] = CString("defined($0$)");  // this avoids substitutions of defined() operator parameter

	// Load constants
	if (check_values_win_)
		LoadValuesFile("_windows_constants.txt", win_consts_, true);

	CString cust_values_filename = "_custom_constants.txt";
	if (check_values_custom_)
		LoadValuesFile(cust_values_filename, custom_consts_, false);

	// Load the type info (.XML) files that we are using
	if (check_std_)
	{
		if (!LoadTypeFile("_standard_types.xml", std_types_, true))
		{
			// Load essential types required for internal use
			CString tmp =	"<binary_file_format>\n"
							"  <data name=\"void\" type=\"none\" len=\"0\"/>\n"
							"  <data name=\"" + CString(ptr_type_name)      + "\" type=\"int\" len=\"4\"/>\n"
							"  <data name=\"" + CString(func_ptr_type_name) + "\" type=\"int\" len=\"4\"/>\n"
							"  <data name=\"" + CString(enum_type_name)     + "\" type=\"int\" len=\"4\"/>\n"
							"  <data name=\"" + CString(bitfield_type_name) + "\" type=\"int\" len=\"4\"/>\n"
							"</binary_file_format>";
			std_types_.LoadString(tmp);
		}
	}

	CString windows_types_filename = "_windows_types.xml";
	if (check_win_)
		LoadTypeFile(windows_types_filename, win_types_, true);

	if (check_common_)
		LoadTypeFile("_common_types.xml", common_types_, true);

	// We always load custom types if present - but file will not be present is user has never saved it (save_custom_)
	CString cust_types_filename = "_custom_types.xml";
	if (check_custom_)
		LoadTypeFile(cust_types_filename, custom_types_, false);
	else
		custom_types_.LoadString("<binary_file_format></binary_file_format>");

	CXmlTree::CFrag retval;
	try
	{
		long dummy;
		retval = parse_all("", dummy);
	}
	catch (CString ss)
	{
		CString tmp;
		if (filename_.size() > 1)
			tmp.Format("\r\n\r\nat line %ld of \"%s\".", long(line_no_.back()), filename_.back());
		AfxMessageBox(ss + tmp);
	}

	//CString debug_string = custom_types_.DumpXML();
	delete pexpr_;	pexpr_ = NULL;

	if (save_values_custom_ && !SaveValuesFile(cust_values_filename, custom_consts_))
	{
		CString ss;
		ss.Format("Could not save new values to\n\"%s\"", cust_values_filename);
		AfxMessageBox(ss);
	}

	if (save_custom_ && !custom_types_.Save(theApp.xml_dir_ + cust_types_filename))
	{
		CString ss;
		ss.Format("Could not save new types to\n\"%s\"", cust_types_filename);
		AfxMessageBox(ss);
	}

	return retval;
}

bool TParser::LoadTypeFile(LPCTSTR filename, CXmlTree &types, bool should_exist)
{
	bool retval = true;

	CString pathname = theApp.xml_dir_ + filename;
	WIN32_FIND_DATA wfd;
	if (::FindFirstFile(pathname, &wfd) == INVALID_HANDLE_VALUE)
	{
		if (should_exist)
		{
			CString ss;
			ss.Format("Types file not found:\n\"%s\"", pathname);
			AfxMessageBox(ss);
		}
		retval = false;
	}
	else if (!types.LoadFile(pathname))
	{
		CString ss;
		ss.Format("Error reading types file\n\"%s\"\n\n"
				  "XML parse error at line %ld:%s\n%s", 
				  pathname, long(ptree_->ErrorLine()),
				  ptree_->ErrorLineText(), ptree_->ErrorMessage());
		AfxMessageBox(ss);
		retval = false;
	}

	// Load minimal required XML if file load failed
	if (!retval && !types.LoadString("<binary_file_format></binary_file_format>"))
	{
		AfxMessageBox(FILENAME_DTD " not found");
		types.LoadString("<binary_file_format></binary_file_format>");
	}

	return retval;
}

bool TParser::LoadValuesFile(LPCTSTR filename, std::map<CString, __int64> &values, bool should_exist)
{
	bool retval = true;

	CString pathname = theApp.xml_dir_ + filename;
	WIN32_FIND_DATA wfd;
	retval = ::FindFirstFile(pathname, &wfd) != INVALID_HANDLE_VALUE;
	if (retval)
	{
		// Read the values from the file
		try 
		{
			CStdioFile fin(pathname, CFile::modeRead|CFile::shareDenyWrite|CFile::typeText);

			CString ss, name, value;
			while (fin.ReadString(ss))
			{
				if (ss.IsEmpty() || ss[0] == ';')    // ignore empty/comment lines
					continue;

				AfxExtractSubString(name, ss, 0, '|');
				AfxExtractSubString(value, ss, 1, '|');
				if (!name.IsEmpty())
					values[name] = _atoi64(value);
			}
		}
		catch (CFileException *pfe)
		{
			AfxMessageBox(::FileErrorMessage(pfe, CFile::modeRead));
			pfe->Delete();
			retval = false;
		}
	}
	else if (should_exist)
	{
		CString ss;
		ss.Format("Constants file not found:\n\"%s\"", pathname);
		AfxMessageBox(ss);
	}

	return retval;
}

bool TParser::SaveValuesFile(LPCTSTR filename, const std::map<CString, __int64> &values) const
{
	bool retval = true;

	CString pathname = theApp.xml_dir_ + filename;
	// Read the values from the file
	try 
	{
		CStdioFile fout(pathname, CFile::modeCreate|CFile::modeWrite|CFile::shareExclusive|CFile::typeText);
		fout.WriteString("; version 1\n");

		std::map<CString, __int64>::const_iterator pv;
		for (pv = values.begin(); pv != values.end(); ++pv)
		{
			char buf[1024];
#ifdef _DEBUG
			buf[sizeof(buf)-1] = '\xCD';
#endif
			sprintf(buf, "%.1000s|%I64d\n", pv->first, pv->second);  // max length 1000+1+20+2 (max I64 is 20 digits) = 1023
			ASSERT(buf[sizeof(buf)-1] == '\xCD');
			fout.WriteString(buf);
		}
	}
	catch (CFileException *pfe)
	{
		AfxMessageBox(::FileErrorMessage(pfe, CFile::modeWrite));
		pfe->Delete();
		retval = false;
	}

	return retval;
}

// Get next line allowing for continued lines using backslash (\) or multiline comment.
// Removes comments but stores the last one seen in last_comment_.
CString TParser::get_line()
{
	CString retval;
	bool in_str = false;                // Are we in a string?
	bool in_chr = false;                // Are we in a character literal?
	bool in_comment = false;            // Are we inside a /* C style comment */
	bool cr_seen = false;               // Remember if we saw a CR/LF in C style comment

	const char * start = next_.back(); // start of next line
	const char * pb = start;           // current point in the text string

	while (*pb != '\0')
	{
		if (*pb == '\n')
			line_no_.back()++;

		if (in_comment)
		{
			ASSERT(!in_str && !in_chr);
			if (*pb == '\r' || *pb == '\n')
			{
				cr_seen = true;
			}
			else if (*pb == '*' && *(pb+1) == '/')
			{
				in_comment = false;
				last_comment_ = CString(start, int(pb-start));
				start = pb + 2;
				++pb;
				if (cr_seen)
				{
					++pb;
					break;              // Don't allow /* CR/LF */ to join lines (even though latest VC++ does)
				}
			}
		}
		else if ((*pb == '\r' || *pb == '\n') && (pb == start || *(pb-1) != '\\'))
		{
			// End of the line
			retval += CString(start, int(pb-start));
			if (in_str)
				retval += '"';  // terminate string
			else if (in_chr)
				retval += '\'';
			start = pb;
			break;
		}
		else if (in_str)
		{
			ASSERT(!in_comment);
			ASSERT(pb > start);         // we must have already seen at least a quote (")
			if (*pb == '"' && *(pb-1) != '\\')
				in_str = false;
		}
		else if (in_chr)
		{
			// Note: we have to handle this because a character literal can in theory be something like '//' or '/*'
			ASSERT(!in_comment);
			ASSERT(pb > start);         // we must have already seen at least an apostrophe (')
			if (*pb == '\'' && *(pb-1) != '\\')
				in_chr = false;
		}
		else if (*pb == '/' && *(pb+1) == '*')
		{
			// C style comment
			retval += CString(start, int(pb-start)) + CString(" ");  // A comment is replaced by a single space
			in_comment = true;
			start = pb + 2;
			++pb;
		}
		else if (*pb == '/' && *(pb+1) == '/')
		{
			// C++ style comment
			retval += CString(start, int(pb-start));
			start = pb + 2;
			pb += 2;
			while (*pb != '\0' && *pb != '\r' && *pb != '\n')
				++pb;
			last_comment_ = CString(start, int(pb-start));
			start = pb;
			if (*pb == '\n')
				line_no_.back()++;
			break;
		}
		else if (*pb == '"')
		{
			in_str = true;
		}
		else if (*pb == '\'')
		{
			in_chr = true;
		}
		++pb;
	}

	retval += CString(start, int(pb-start));
	while (*pb == '\r' || *pb == '\n')
	{
		if (*pb == '\n')
			line_no_.back()++;
		++pb;
	}
	next_.back() = pb;
	return retval;
}

// Returns the next line but filters out preprocessing directives.
// Also performs substitutions based on any #defines already encountered.
CString TParser::get_pp_line()
{
	CString str;
	int skipping = 0;                   // 0 = not skipping, 1 = skip to #else/#elif/#endif, 2 = skip to #endif
	int skip_nesting;                   // pp_nesting_ value when we started skipping

	for (;;)
	{
		// Check if we are at the end of the file/input string
		while (*(next_.back()) == '\0')
		{
			if (pp_nesting_.back() > 0)
			{
				if (next_.size() > 1)
					AfxMessageBox("Warning: Missing #endif in #include file");
				else
					AfxMessageBox("Warning: Missing #endif");
			}
			if (next_.size() == 1)
				return "";              // That's it - end of string in input string (original text being parsed)
			text_.pop_back();
			next_.pop_back();
			pp_nesting_.pop_back();
			filename_.pop_back();
			line_no_.pop_back();
		}

		str = get_line();               // get next line (comments already removed)
		str.TrimLeft();
		if (str.IsEmpty())
			continue;

		// We don't do substitutions in #define, #ifdef etc (but do in #include)
		bool no_subst = false;
		if (str[0] == '#')
		{
			CString tmp = str.Mid(1);
			tmp.TrimLeft();
			if (strncmp(tmp, "define", 6) == 0 ||
				strncmp(tmp, "undef",  5) == 0 ||
//                strncmp(tmp, "if",     2) == 0 ||  // we did this so #if defined(MACRO) worked but it causes other problems
//                strncmp(tmp, "elif",   4) == 0 ||
				strncmp(tmp, "ifdef",  5) == 0 ||
				strncmp(tmp, "ifndef", 6) == 0 )
			{
				no_subst = true;
			}
		}
		else if (skipping)
			continue;                           // This line is conditionally excluded

		CString retval;                         // String after substitutions

		if (no_subst)
		{
			retval = str;                       // Just copy as is
		}
		else
		{
			// Do preprocessor substitutions
			const char * pb = str.GetBuffer(0);
			const char * start = pb;
			bool in_str = false;                // Are we in a string?
			bool in_chr = false;                // Are we in a character literal?

			while (*pb != '\0')
			{
				// Don't do subtitutions in string and character literals
				if (in_str)
				{
					ASSERT(pb > start);         // we must have already seen at least a quote (")
					if (*pb == '"' && *(pb-1) != '\\')
						in_str = false;
				}
				else if (in_chr)
				{
					ASSERT(pb > start);         // we must have already seen at least an apostrophe (')
					if (*pb == '\'' && *(pb-1) != '\\')
						in_chr = false;
				}
				else if (*pb == '"')
				{
					in_str = true;
				}
				else if (*pb == '\'')
				{
					in_chr = true;
				}
				else if (::isalpha(*pb) || *pb == '_')
				{
					// Save what we have to here
					retval += CString(start, int(pb-start));

					// Get identifier
					const char * pb2;
					for (pb2 = pb; ::isalnum(*pb2) || *pb2 == '_'; ++pb2)
						; // nothing here
					ASSERT(pb2 > pb);           // There must be at least one letter

					// Get the name and find any substitutions
					CString id(pb, int(pb2 - pb));
					CString subst = id;
					bool replaced = false;
					if (subst == "__FILE__")
					{
						replaced = true;
						if (filename_.size() < 2)
							subst = "\"EDIT_BOX\"";
						else
							subst = "\"" + filename_.back() + "\"";
					}
					else if (subst == "__LINE__")
					{
						replaced = true;
						subst.Format("%d", line_no_.back());
					}
					else
						while (pp_def_.find(subst) != pp_def_.end())
						{
							// This does any simple replacements avoiding the case of get
							replaced = true;
							subst = pp_def_[subst];
							if (subst == id)        // Subst back to original (avoid inf loop)
								break;
						}

					const char *saved = pb2;   // Save where we are up to in case it is not a macro
					while (::isspace(*pb2))    // Skip to '('
						++pb2;

					// Create name + colon to check for function like macros
					CString ss = CString(subst) + ":";
					std::map<CString, CString>::const_iterator pp = pp_def_.lower_bound(ss);
					if (!replaced && *pb2 == '(' &&
						pp != pp_def_.end() &&
						strncmp(ss, pp->first, ss.GetLength()) == 0)
					{
						// Appears to be a "function-like" macro
						CString tmp;
						std::vector<CString> pvalue;  // array of params to macro

						// First work out how many params to expect
						char *endp;
						tmp = pp->first;
						int num_params = strtoul(tmp.GetBuffer() + ss.GetLength(), &endp, 10);  // number of params the macro takes
						bool is_variadic = *endp == '+';                                        // is it a variadic (...) macro?

						int nesting = 0;                       // keeps track of brackets so we don't finish a parameter inside a nested expression
						bool in_str = false, in_chr = false;   // Keep track of string and character literals
						tmp.Empty();
						for (;;)
						{
							++pb2;
							if (*pb2 == '\0')
							{
								// We don't support multi-line macro invocations so exit at eoln
								pvalue.clear();
								break;
							}
							if (in_str)
							{
								if (*pb2 == '"' && *(pb2-1) != '\\')
									in_str = false;  // end of string found
							}
							else if (in_chr)
							{
								if (*pb2 == '\'' && *(pb2-1) != '\\')
									in_chr = false;
							}
							else if (*pb2 == '"')
							{
								in_str = true;
							}
							else if (*pb2 == '\'')
							{
								in_chr = true;
							}
							else if (nesting == 0 &&
									 (*pb2 == ')' || *pb2 == ',' && (!is_variadic || pvalue.size() < num_params - 1)) )
							{
								// This places parameters onto the end of pvalue unless there are variadic parameters
								// in which they are all stored together in the string for the "last" parameter
								pvalue.push_back(tmp);
								if (*pb2 == ')')
									break;
								tmp.Empty();
								continue;            // go to start of loop to avoid adding the comma as part of the subst
							}
							else if (*pb2 == '(')
								++nesting;
							else if (*pb2 == ')')
								nesting--;

							tmp += *pb2;
						}
						ASSERT(*pb2 == '\0' || *pb2 == ')');
						if (*pb2 == ')')
							++pb2;

						// Check that we got the right number of parameters
						if (pvalue.size() == num_params)
						{
							subst = pp->second;
							// Perform substitutions
							for (std::vector<CString>::const_iterator pv = pvalue.begin(); pv != pvalue.end(); ++pv)
							{
								tmp.Format("$%ld$", long(pv - pvalue.begin()));
								subst.Replace(tmp, *pv);
							}
							replaced = true;
						}
						else
							pb2 = saved;        // Wrong number of params so go back to open bracket
					}
					else
						pb2 = saved;

					retval += subst;
					start = pb2;
					pb = pb2 - 1;               // Allow for increment below
				}

				++pb;
			}

			retval += CString(start, int(pb-start));
			retval.Replace("##", "");           // Remove ## for pasting tokens together
		}

		if (retval[0] != '#')
			return retval;                  // We have it (a non-preprocessor line)

		// Handle preprocessor command
		const char * pb = retval.GetBuffer(0);
		++pb;
		while (::isspace(*pb))
			++pb;

		// Get preprocessor command
		const char * pb2;
		for (pb2 = pb; ::isalpha(*pb2) || *pb2 == '_'; ++pb2)
			; // nothing here
		CString tmp(pb, int(pb2-pb));
		pb = pb2;

		if (tmp == "ifdef" || tmp == "ifndef")
		{
			++pp_nesting_.back();
			if (!skipping)
			{
				while (::isspace(*pb))
					++pb;
				if (::isalpha(*pb) || *pb == '_')
				{
					// Get macro identifier
					for (pb2 = pb; ::isalnum(*pb2) || *pb2 == '_'; ++pb2)
						; // nothing here
					ASSERT(pb2 > pb);           // There must be at least one letter

					CString macro_name = CString(pb, int(pb2-pb));
					std::map<CString, CString>::const_iterator pp = pp_def_.find(macro_name);

					if (tmp == "ifdef"  && pp == pp_def_.end() || 
						tmp == "ifndef" && pp != pp_def_.end())
					{
						skipping = 1;   // skip to #endif/#else/#elif
						skip_nesting = pp_nesting_.back();
					}
				}
				else
					throw CString("Expected identifier after #ifdef");
			}
		}
		else if (tmp == "if")
		{
			++pp_nesting_.back();
			if (!skipping)
			{
				int ac = -1;
				ASSERT(pexpr_ != NULL);
				expr_eval::value_t val = pexpr_->evaluate(pb, 0, ac);

				if (val.typ == expr_eval::TYPE_INT)
					skipping = val.int64 != 0 ? 0 : 1;
				else if (val.typ == expr_eval::TYPE_BOOLEAN)
					skipping = val.boolean ? 0 : 1;
				else
					throw CString("Invalid expression in #if");
				skip_nesting = pp_nesting_.back();
			}
		}
		else if (tmp == "elif")
		{
			if (skipping == 0)
			{
				// We are apparently processing #if/#ifdef part (or #elif part) when we got to here -
				// so the following #else/#elif part must be skipped.
				skipping = 2;           // skip to next #endif
				skip_nesting = pp_nesting_.back();
			}
			else if (skipping == 1 && pp_nesting_.back() == skip_nesting)
			{
				ASSERT(skip_nesting <= pp_nesting_.back());
				int ac = -1;
				ASSERT(pexpr_ != NULL);
				expr_eval::value_t val = pexpr_->evaluate(pb, 0, ac);

				if (val.typ == expr_eval::TYPE_INT)
					skipping = val.int64 != 0 ? 0 : 1;
				else if (val.typ == expr_eval::TYPE_BOOLEAN)
					skipping = val.boolean ? 0 : 1;
				else
					throw CString("Invalid expression in #elif");
				skip_nesting = pp_nesting_.back();
			}
		}
		else if (tmp == "else")
		{
			if (skipping == 0)
			{
				// We are apparently processing #if/#ifdef part (or #elif part) when we got to here -
				// so the following #else/#elif part must be skipped.
				skipping = 2;           // skip to #endif (bypassing #else and #elif)
				skip_nesting = pp_nesting_.back();
			}
			else if (skipping == 1 && pp_nesting_.back() == skip_nesting)
			{
				ASSERT(skip_nesting <= pp_nesting_.back());
				skipping = 0;           // Found #else at same level so turn off skipping
			}
		}
		else if (tmp == "endif")
		{
			if (skipping > 0 && pp_nesting_.back() == skip_nesting)
			{
				ASSERT(skip_nesting <= pp_nesting_.back());
				skipping = 0;           // Found #endif at same level so turn off skipping
			}

			if (pp_nesting_.back() == 0)
				AfxMessageBox("Warning: Unexpected extra #endif");
			else
				pp_nesting_.back()--;
		}
		else if (skipping)
		{
			ASSERT(skip_nesting <= pp_nesting_.back());
			// Don't do other preprocessing if conditionally excluded
			continue;
		}
		else if (tmp == "include")
		{
			// Get name of file to include
			while (::isspace(*pb))
				++pb;
			char stop_ch;
			if (*pb == '<')
				stop_ch = '>';
			else if (*pb == '"')
				stop_ch = '"';
			else
				throw CString("Expected file name after #include");
			++pb;
			for (pb2 = pb; *pb2 != '\0' && *pb2 != stop_ch; ++pb2)
				; // nothing here

			CString inc_name(pb, int(pb2-pb));

			// If include is of the form #include <name> then only search in "standard" include directories
			// If include is of the form #include "name" then search current directory first, then include directories
			WIN32_FIND_DATA wfd;
			if (stop_ch == '>' || ::FindFirstFile(inc_name, &wfd) == INVALID_HANDLE_VALUE)
			{
				// Search in our list of include directories
				CString ss = search_path(theApp.GetProfileString("DataFormat", "IncludeFolders"), inc_name);
				// If not found also check %INCLUDE% environment variable
				if (ss.IsEmpty() && search_include_)
					ss = search_path(::getenv("INCLUDE"), inc_name);

				if (!ss.IsEmpty())
					inc_name = ss;
				else
					throw "Could not find include file \"" + inc_name + "\".";
			}

			try
			{
				// Open the file and read all its text
				CFile ff(inc_name, CFile::modeRead|CFile::shareDenyNone);
				size_t flen = size_t(ff.GetLength());
				char *pp = new char[flen+1];
				if (pp == NULL)
					throw "Not enough memory to load include file \"" + inc_name + "\".";
				ff.Read(pp, flen);
				pp[flen] = '\0';

				text_.push_back(CString(pp));
				next_.push_back(text_.back().GetBuffer(0));
				pp_nesting_.push_back(0);
				filename_.push_back(inc_name);
				line_no_.push_back(0);
				delete[] pp;
			}
			catch (CFileException *pfe)
			{
				CString err = ::FileErrorMessage(pfe, CFile::modeRead);
				pfe->Delete();
				throw err;
			}
			catch (std::bad_alloc)
			{
				throw "Not enough memory to load include file \"" + inc_name + "\".";
			}
		}
		else if (tmp == "define")
		{
			// We store macros and their replacement text in the std::map pp_def_.
			// For macros without parameters this just stores the name and the text. Eg:
			// #define ULONG  unsigned long      => name="ULONG", text="unsigned long"
			// For macros with parameters we also store the number of parameters in the macro
			// name, and where they are substituted in the replacement text using $<param_num>$
			// eg #define ADD(a,b) ((a)+(b))     => name="ADD:2" text="(( $0$ )+( $1$ ))"

			// For variadic macros we add a plus sign to the param count and use for __VA_ARGS__ the last param
			// eg #define M(s, ...) f(s,__VA_ARGS__)  => name="M:2+" text="f( $0$ , $1$ )"

			// Note that each parameter in the substitution text has a space before and after
			// to assist with paste (##) operator (see below).
			while (::isspace(*pb))
				++pb;

			if (::isalpha(*pb) || *pb == '_')
			{
				// Get macro identifier
				for (pb2 = pb; ::isalnum(*pb2) || *pb2 == '_'; ++pb2)
					; // nothing here
				ASSERT(pb2 > pb);           // There must be at least one letter

				CString macro_name = CString(pb, int(pb2-pb));
				pb = pb2;

				//while (::isspace(*pb))    // Don't skip spaces here since '(' must be right after macro name for function-like macro
				//	++pb;
				std::vector<CString> param;
				bool is_variadic = false;
				if (*pb == '(')
				{
					do
					{
						++pb;
						while (::isspace(*pb))
							++pb;
						// Check for variadic macro
						if (strncmp(pb, "...", 3) == 0)
						{
							is_variadic = true;
							pb += 3;
							param.push_back(CString("__VA_ARGS__"));
							// The ... must be at the end of the parameters so skip any white
							// space and break from loop (whence closing bracket expected).
							while (::isspace(*pb))
								++pb;
							break;
						}
						// Get parameter name
						for (pb2 = pb; ::isalnum(*pb2) || *pb2 == '_'; ++pb2)
							; // nothing here
						if (pb2 == pb)
							throw "Error in macro parameters for " + macro_name;

						param.push_back(CString(pb, int(pb2-pb)));
						pb = pb2;
						while (::isspace(*pb))
							++pb;
					} while (*pb == ',');
					if (*pb != ')')
						throw "Error in macro parameters for " + macro_name;
					++pb;
				}
				if (param.size() > 0)
				{
					// Add the number of parameters to the name preceded by a colon
					CString tmp;
					tmp.Format(":%ld%s", long(param.size()), is_variadic ? "+" : "");
					macro_name += tmp;
				}

				while (::isspace(*pb))
					++pb;
				// Get macro text looking for parameters
				CString txt;
				for (pb2 = pb; *pb2 != '\0'; ++pb2)
				{
					if (isalpha(*pb2) || *pb2 == '_')
					{
						const char * pb3;
						// Get what could be the name of a parameter
						for (pb3 = pb2; ::isalnum(*pb3) || *pb3 == '_'; ++pb3)
							; // nothing here
						tmp = CString(pb2, int(pb3-pb2));

						// See if this id is a parameter and replace it with $<param_num>$ if it is
						std::vector<CString>::const_iterator pparam = std::find(param.begin(), param.end(), tmp);
						if (pparam != param.end())
							tmp.Format(" $%ld$ ", long(pparam - param.begin()));  // Put spaces on either side so we get separate tokens (unless ## used - see below)
						txt += tmp;

						pb2 = pb3 - 1;
					}
					else
						txt += *pb2;
				}
				// Allow for pasting using ##
				txt.Replace("$ ##", "$##");
				txt.Replace("## $", "##$");

				txt.TrimLeft();                     // C std says leading/trailing space is removed
				txt.TrimRight();

				// Save the macro
				pp_def_[macro_name] = txt;
			}
			else
				throw CString("Expected macro name after #define");
		}
		else if (tmp == "undef")
		{
			while (::isspace(*pb))
				++pb;

			if (::isalpha(*pb) || *pb == '_')
			{
				// Get macro identifier
				for (pb2 = pb; ::isalnum(*pb2) || *pb2 == '_'; ++pb2)
					; // nothing here
				ASSERT(pb2 > pb);           // There must be at least one letter

				CString macro_name = CString(pb, int(pb2-pb));
				pp_def_.erase(macro_name);
			}
			else
				throw CString("Expected macro name after #undef");
		}
		else if (tmp == "pragma")
		{
			while (::isspace(*pb))
				++pb;

			if (::isalpha(*pb) || *pb == '_')
			{
				// Get pragma name
				for (pb2 = pb; ::isalnum(*pb2) || *pb2 == '_'; ++pb2)
					; // nothing here
				ASSERT(pb2 > pb);           // There must be at least one letter
				tmp = CString(pb, int(pb2-pb));
				pb = pb2;

				if (tmp == "pack")
				{
					while (::isspace(*pb))
						++pb;
					if (*pb != '(')
						throw CString("Expected #pragma pack parameters");

					++pb;
					while (::isspace(*pb))
						++pb;
					if (*pb == ')')
					{
						// Restore default packing
						pack_.back() = packing_default_;
					}
					for (pb2 = pb; ::isalnum(*pb2) || *pb2 == '_'; ++pb2)
						; // nothing here
					tmp = CString(pb, int(pb2-pb));
					pb = pb2;
					if (tmp == "pop")
					{
						if (pack_.size() > 0)
							pack_.pop_back();
						continue;
					}
					else if (tmp == "push")
					{
						ASSERT(pack_.size() > 0);
						pack_.push_back(pack_.back());

						while (::isspace(*pb))
							++pb;
						if (*pb == ')')
							continue;
						else if (*pb != ',')
							throw CString("Error in #pragma pack parameters");
						++pb;
						while (::isspace(*pb))
							++pb;
						for (pb2 = pb; ::isdigit(*pb2); ++pb2)
							; // nothing here
						tmp = CString(pb, int(pb2-pb));
						pb = pb2;
					}
					int val = atoi(tmp.GetBuffer(0));
					if (val == 1 || val == 2 || val == 4 || val == 8 || val == 16)
					{
						pack_.back() = val;
					}
					else
						throw CString("Invalid pack value in #pragma pack");
				}
				// else - ignore unhandled pragmas
			}
			else
				throw CString("#pragma name not found");
		}
		else if (tmp == "line")
			; // Just ignore #line directives
		else if (tmp == "error")
			throw "Preprocessor #error - " + CString(pb);
		else
			throw CString("Unknown preprocessor command - #") + tmp;
	}

	ASSERT(0);
	return "";
}

// Search for include file in list of directories
//     - returns the full path of the file if found or an empty string if not
// inc = list of directories separated by semicol (;) - eg "D:\\include;d:\\include\\sys"
//     - may be NULL in which case an empoty string is returned
// name = the name of the include file to look for
CString TParser::search_path(LPCTSTR inc, LPCTSTR name)
{
	CString str_include(inc);
	std::vector<CString> tmp;                   // array of the dir names
	std::vector<LPCTSTR> dirs;                  // array of pointers into tmp
	tmp.push_back(CString());
	for (int ii = 0; AfxExtractSubString(tmp.back(), str_include, ii, ';'); ++ii)
	{
		dirs.push_back((LPCTSTR)tmp.back());
		tmp.push_back(CString());
	}
	dirs.push_back((LPCTSTR)0);                 // terminate 

	char inc_path[_MAX_PATH];
	strcpy(inc_path, name);
	if (::PathFindOnPath(inc_path, &dirs[0]))
		return CString(inc_path);
	else
		return CString();
}

// All tokens that consist of more than one punctuation character.
char *TParser::ctokens[] =
{
	"::*",
	"::",  // Must be after "::*" in this list
	".*",
	"...",
	"<=",
	"<<=",
	"<<",  // Must be after "<<=" 
	">=",
	">>=",
	">>",  // Must be after ">>=" 
	// "##",  // this is handled during preprocessing
	"-=",
	"--",
	"->*",
	"->",  // Must be after "->*"
	"+=",
	"++",
	"*=",
	"/=",
	"/*",
	"//",
	"%=",
	"^=",
	"&=",
	"&&",
	"|=",
	"||",
	"==",
	"!=",
	NULL
};

// Get next token without advancing past it
CString TParser::peek_next()
{
	// First check if we have already peeked ahead for it
	if (next_token_.IsEmpty())
		next_token_ = get_next();
	return next_token_;
}

CString TParser::get_next()
{
	CString retval;

	// First check if we have already peeked ahead for it
	if (!next_token_.IsEmpty())
	{
		retval = next_token_;
		next_token_ = "";
		return retval;
	}

	// Skip any whitespace
	while (::isspace(*pp_))
		++pp_;

	// Check if we have reached the end of this line of text
	if (*pp_ == '\0')
	{
		str_ = get_pp_line();           // Get next line with preprocessing done
		if (str_.IsEmpty())
		{
			pp_ = "";
			return "";                  // That's all folks
		}

		pp_ = str_.GetBuffer(0);         // Set current input ptr to start of the new line
	}

	if (::isalpha(*pp_) || *pp_ == '_')
	{
		// Get keyword/identifier
		const char * pb;
		for (pb = pp_; ::isalnum(*pb) || *pb == '_'; ++pb)
			; // nothing here

		retval = CString(pp_, int(pb - pp_));
		pp_ = pb;
	}
	else if (*pp_ == '"')
	{
		// Get the string
		const char * pb = pp_ + 1;
		while (*pb != '\0' && *pb != '"')
		{
			if (*pb == '\\')
				++pb;
			++pb;
		}
		last_string_ = CString(pp_+1, int(pb - (pp_+1)));
		pp_ = pb;
		if (*pp_ == '"')
			++pp_;

		// Return quote (") to flag that it was a string
		retval = '"';
	}
	else if (*pp_ == '\'')
	{
		// Get the "character"
		const char * pb = pp_ + 1;
		is_real_ = false;
		last_int_ = 0;
		while (*pb != '\0' && *pb != '\'')
		{
			if (*pb == '\\')
			{
				if (*++pb == '\0')
					break;
			}
			last_int_ = (last_int_ << 8) + (unsigned char)*pb;
			++pb;
		}
		pp_ = pb;
		if (*pp_ == '\'')
			++pp_;

		// Return character value as a number
		retval.Format("%d", int(last_int_));
	}
	else if (::isdigit(*pp_) || *pp_ == dec_point_ && ::isdigit(*(pp_+1)))
	{
		// Get the number (integer or float)
		if (*pp_ == dec_point_)
			retval = "0";         // Add leading zero (so first char is always a digit)

		bool is_hex = false;
		is_real_ = false;

		const char * pb = pp_;

		// Get number
		if (_strnicmp(pb, "0x", 2) == 0)
		{
			is_hex = true;
			pb += 2;
		}
		if (is_hex)
			while (::isxdigit(*pb))
				++pb;
		else
			while (::isdigit(*pb))
				++pb;

		if (!is_hex && *pb == dec_point_)
		{
			is_real_ = true;
			// Skip fractional part
			++pb;
			while (::isdigit(*pb))
				++pb;
		}
		if (!is_hex && ::toupper(*pb) == 'E')
		{
			is_real_ = true;
			// Skip exponent
			++pb;
			if (*pb == '+' || *pb == '-') // Exponent sign
				++pb;
			while (::isdigit(*pb))
				++pb;
		}

		if (is_real_)
		{
			// Get value
			char *end;
			last_real_ = ::strtod(pp_, &end);
			ASSERT(end == pb);

			// Skip suffix
			if (::toupper(*pb) == 'F' || ::toupper(*pb) == 'L')
				++pb;
		}
		else
		{
			// Get value
#if _MSC_VER >= 1300
			char *end;
			last_int_ = ::_strtoi64(pp_, &end, 0 /*is_hex ? 16 : (*pp_ == '0') ? 8 : 10*/);
			ASSERT(end == pb);
#else
			last_int_ = _atoi64(pp_);    // does not handle "0x"
#endif

			// Skip suffix
			while (::toupper(*pb) == 'U' || ::toupper(*pb) == 'L')
				++pb;
		}

		// Return number as string
		retval += CString(pp_, int(pb - pp_));
		pp_ = pb;
	}
	else
	{
		// Just punctuation, so check for multi-char tokens
		size_t len = 0;
		char ** ptok;
		for (ptok = ctokens; *ptok != NULL; ++ptok)
		{
			len = strlen(*ptok);
			if (strncmp(pp_, *ptok, len) == 0)
				break;
		}
		if (*ptok == NULL)
		{
			// Just use next punct char as token
			retval = CString(*pp_);
			++pp_;
		}
		else
		{
			// Return the multi-char token found
			ASSERT(len > 1);
			retval = CString(pp_, int(len));
			pp_ += len;
		}
	}

	return retval;
}

// Parse a list of declarations
// outer_name is enclosing class/struct name - used for check for c'tor + saving class const names (class_name::const_name)
// max_align is returned alignment requirements for this class - depends on packing and size of largest member
// is_union is just used to indicate that all elements are at same location (union not struct)
CXmlTree::CFrag TParser::parse_all(LPCTSTR outer_name, long &max_align, bool is_union /*false*/)
{
	long curr_offset = 0;               // Current end of structure
	long max_size = 0;                  // Size of largest encountered (used for unions)
	max_align = 1;						// Biggest pack value found so far
	bool is_virtual = false;            // Used to work out if we need to add a vtable ptr
	bool is_class = false;              // True if in class/struct, false if just parsing var declarations
	int bits_used = 0;                  // Number of bits in immediately preceding bit-fields
	int bitfield_size = 0;              // Size of preceding bit-field or zero if not a bit-field
	int last_size = 0;					// Size of storage unit of preceding bit-field

	CXmlTree::CElt root;                // These are dummy params to find_elt that are not used
	int node_num;

	ASSERT(ptree_ != NULL);
	CXmlTree::CFrag retval(ptree_);
	CString ss = get_next();

	if (ss == "{" || ss == ":")
	{
		if (ss == ":")
		{
			ASSERT(!is_union);
			// get base classes
			ss = get_next();
			for (;;)
			{
				// Skip modifiers
				while (ss == "virtual" || ss == "public" || ss == "protected" || ss == "private" || ss == "internal")
				{
					if (ss == "virtual")
						is_virtual = true;
					ss = get_next();
				}

				// Get scoped class name
				while (peek_next() == "::" || peek_next() == ".")  // C# uses "." not "::"
				{
					ss += get_next();   // Add ::
					ss += get_next();   // Add next ID
				}

				CXmlTree::CElt base = find_elt(ss, root, node_num);
				if (base.IsEmpty())
					throw CString("Unknown base class type - ") + ss;
				base = CXmlTree::CElt(base.m_pelt, ptree_);    // Same node but with diff assoc. parent

				// We need to handle padding for multiple inheritance
				long curr_size = get_size(ss);
				ASSERT(curr_size > 0);
				long curr_pack = get_pack(ss);
				ASSERT(curr_pack == 1 || curr_pack == 2 || curr_pack == 4 || curr_pack == 8);

				ASSERT(pack_.size() > 0);
				long align = min(curr_pack, pack_.back());
				if (align > max_align)
					max_align = align;
				long pad = align - curr_offset%align;
				if (pad < align)
				{
					CString tmp;
					// Add padding
					CXmlTree::CElt pad_ee("data", ptree_);
					// Leave name attribute empty so field is not visible in view mode
					//tmp.Format("Fill$%d", int(pad_count_++));
					//pad_ee.SetAttr("name", tmp);
					pad_ee.SetAttr("type", "none");
					tmp.Format("%ld", long(pad));
					pad_ee.SetAttr("len", tmp);
					tmp.Format("Padding to align to %ld-byte boundary", long(align));
					pad_ee.SetAttr("comment", tmp);
					retval.AppendClone(pad_ee);

					curr_offset += pad;
				}
				//CXmlTree::CElt new_ee(base.m_pelt, ptree_);
				//CXmlTree::CElt ee = retval.AppendClone(new_ee);
				CXmlTree::CElt ee = retval.AppendClone(base);
				//ee.SetAttr("name", "__super");         // VC++ keyword for base class ptr
				ee.SetAttr("name", "base_class_" + ee.GetAttr("name"));   // indicate its a base class element

				ss = get_next();
				if (ss != ",")
					break;
				ss = get_next();
			}
		}
		is_class = true;

		ss = get_next();
	}

	while (!ss.IsEmpty() && ss != "}")
	{
		// Get next type declaration/definition
		bool is_typedef = false;        // If typedef we just store the type (in custom_types_)
		bool is_ignored = false;        // We don't store some things (eg static declaration in a class)
		bool is_const   = false;		// If const it might be just declaring a value
		bool is_unknown = false;        // If the data type name is not found (but could still be ptr to such)
		long curr_size = -1;            // The number of bytes for current base type
		long curr_pack = -1;            // Padding requirements for current base type

		// Check and skip modifiers
		ss = skip_modifiers(ss, is_typedef, is_ignored, is_virtual, is_const, is_class);

		if (ss == "struct" || ss == "class" || ss == "enum" || ss == "union")
		{
			bool is_enum   = ss == "enum";
			bool is_union  = ss == "union";

			if (peek_next() == "{")     // eg struct { ... } var;
				ss = anon_type_name;    // since no name given store using a temp name
			else
				ss = get_next();        // get type (class/struct) name

			// Include scope as part of name (eg "std::string")
			while (peek_next() == "::" || peek_next() == ".")  // C# uses "." not "::"
			{
				ss += get_next();   // Add scope operator
				ss += get_next();   // Add next ID
			}

			if (peek_next() == "{" || peek_next() == ":")
			{
				if (is_enum)
				{
					CString enum_name = ss;     // save the name for this "type"

					ss = get_next();
					ASSERT(ss == "{");

					CString enum_str = ss;
					ss = get_next();
					__int64 value = 0;          // Value of current enum constant (first defaults to zero)
					while (!ss.IsEmpty() && ss != "}")
					{
						CString const_name = ss;
						enum_str += ss;
						ss = get_next();
						if (ss == "=")
						{
							CString expr;
							while ((ss = get_next()) != "," && ss != "}" && !ss.IsEmpty())
								expr += ss;

							int ac = -1;
							ASSERT(pexpr_ != NULL);
							expr_eval::value_t val = pexpr_->evaluate(expr, 0, ac);

							if (val.typ != expr_eval::TYPE_INT)
								throw CString("Invalid enum constant - ") + expr;
							value = (long)val.int64;

							CString value_str;
							value_str.Format("%ld", long(value));
							enum_str += "=" + value_str;
						}
						custom_consts_[const_name] = value;  // Add to our list of custom constants
						if (outer_name[0] != '\0')
							custom_consts_[CString(outer_name)+"::"+const_name] = value;   // Also add value with scope of containing class

						if (ss == ",")
							ss = get_next();
						enum_str += ",";

						++value;
					}
					enum_str += "}";

					// Add enum to custom types
					CXmlTree::CElt new_ee = find_elt(enum_type_name, root, node_num); // Get default enum (mainly for "len" attrib)
					if (new_ee.IsEmpty())
						throw CString("Enum type missing from standard type list - \"_standard_types.xml\"");

					// Move node from std types to custom types and insert clone of it
					new_ee = custom_types_.GetRoot().InsertClone(CXmlTree::CElt(new_ee.m_pelt, &custom_types_));
					new_ee.SetAttr("name", enum_name);
					if (enum_name != anon_type_name)
						new_ee.SetAttr("type_name", "enum " + enum_name);
					new_ee.SetAttr("domain", enum_str);

					//CString debug_string = custom_types_.DumpXML();
					ss = enum_name;     // Set ss back to type name since it is used below to find the type of any decls
				}
				else
				{
					// Get the class/struct/union defn
					long pack = -1;             // Packing requirements for the new STRUCT
					CXmlTree::CFrag cdef = parse_all(ss, pack, is_union);

					if (!cdef.IsEmpty())
					{
						// Create "STRUCT" element and add it to the custom list
						CXmlTree::CElt new_ee = custom_types_.GetRoot().InsertNewChild("struct");
						new_ee.SetAttr("name", ss);
						new_ee.SetAttr("type_name", ss);
						CString pack_str;
						pack_str.Format("%ld", long(pack));
						new_ee.SetAttr("pack", pack_str);
						cdef.InsertKids(&new_ee);
					}
					else
						is_ignored = true;       // signal not to use this type (we can't use an empty struct)
				}
			}
		}
		else
		{
			// Include scope as part of name (eg "std::string")
			while (peek_next() == "::" || peek_next() == ".")  // C# uses "." not "::"
			{
				ss += get_next();   // Add scope operator
				ss += get_next();   // Add next ID
			}
		}


		// Check for multi-word types (signed char, long double etc)
		if (ss == "long" || ss == "signed" || ss == "unsigned")
		{
			CString next = peek_next();
			if (next == "char"   || next == "short"   || next == "int"     || next == "long"    || 
				next == "__int8" || next == "__int16" || next == "__int32" || next == "__int64" ||
				next == "double")
			{
				ss += "$" + next;
				(void)get_next();
				if (peek_next() == "int")    // ignored "int" part of 3 word types: signed/unsigned short/long int
					(void)get_next();
			}
		}

		if (peek_next() == "<")
		{
			// Include <template_spec> in the type name
			CString tmp = get_next();
			int nesting = 0;
			while (!tmp.IsEmpty())
			{
				ss += tmp;              // Add token to type name
				if (tmp == "<")
					++nesting;
				else if (tmp == ">" && --nesting == 0)
					break;
				tmp = get_next();
			}
		}

		ss = skip_modifiers(ss, is_typedef, is_ignored, is_virtual, is_const, is_class);

		bool is_func = false;       // Is it a function?
		if (ss == outer_name && peek_next() == "(" || ss == "~" && (ss=get_next()) == outer_name)
			is_func = true;         // c'tor or d'tor

		// At this point ss contains the base type name (eg "int", std::vector<int>, class/struct/union/enum name etc)
		CString type_name = ss;

		CXmlTree::CElt base = find_elt(ss, root, node_num);
		if (base.IsEmpty())
			is_unknown = true;								// Keep going in case it's a pointer (to unknown type)
		else
			base = CXmlTree::CElt(base.m_pelt, ptree_);     // Same node but with diff assoc. parent
		curr_size = get_size(ss);
		curr_pack = get_pack(ss);

		// Allow for anonymous struct or union (except at outer level)
		if (outer_name != "" && peek_next() == ';')
			ss = "anonymous";
		else
			ss = get_next();
		while (!ss.IsEmpty() && ss != ";" && ss != "}")
		{
			// We need to distinguish size of base type from size of variables in declarators. Consider:
			//    char a, *b, c[3];
			// The base type has size (curr_size) 1, as does 'a', since it has the base type.
			// However, b has size 4 (pointer) and c has size 3 (array).
			long decl_curr_size = curr_size; // size of var in decl (!= curr_size if ptr, etc)
			long decl_curr_pack = curr_pack;

			CXmlTree::CElt actual;      // XML element representing the actual variable as we currently know it
			if (!is_unknown)
				actual = base;          // Leave empty if unknown type
			int nest_level = 0;         // Current level of brackets ()
			int ptr_level = -1;         // Level at which * found or -1 if not found
			CString var_name;           // Actual variable name
			if (is_func)
				var_name = type_name;   // c'tor or d'tor so force parse of params

			// Get each declarator (incl var name) for this type (eg f(int), a[4], (*f())() etc)
			// Stop at the end of the declarator:
			//    int a,                     // end of declarator (presumably another one next)
			//    int a;                     // end of declaration
			//    { struct type {...} }      // missing semi-colon before }
			//    const int a =              // initialiser after declarator
			//    int f() const              // const member function
			//    int a :                    // bit-field
			while (!ss.IsEmpty() && ss != "," && ss != ";" && ss != "{" && ss != "}" && ss != "=" && ss != ":")
			{
				if (ss == "*" || ss == "&" || (::isalpha(ss[0]) || ss[0] == '_') && peek_next() == "::*")
				{
					// Type to be stored is a ptr (but may be array of ptrs, or func ptr - handled later)
					actual = find_elt(ptr_type_name, root, node_num);
					if (actual.IsEmpty())
						throw CString("Pointer type missing from standard type list - \"_standard_types.xml\"");
					actual = CXmlTree::CElt(actual.m_pelt, ptree_);    // Same node but with diff assoc. parent
					long pointer_len = atol(actual.GetAttr("len"));
					if (pointer_len != 2 && pointer_len != 4 && pointer_len != 8)
						throw CString("Invalid pointer length in \"_standard_types.xml\" - ") + ptr_type_name;
					decl_curr_size = decl_curr_pack = pointer_len;

					if (ptr_level == -1)
						ptr_level = nest_level;

					if (::isalpha(ss[0]) || ss[0] == '_')
						ss = get_next();                  // Skip to "::*"

					is_unknown = false; // even if base type is unknown we know this is a pointer
				}
				else if (::isalpha(ss[0]) || ss[0] == '_')
				{
					// Here it is - the actual variable name
					var_name = ss;
					while (peek_next() == "::" || peek_next() == ".")
					{
						var_name += get_next();
						var_name += get_next();
					}
					if (var_name == "operator")
						var_name += get_next();
				}
				else if (!var_name.IsEmpty() && (ss == "(" || ss == "<"))
				{
					// Function or function ptr (or array of func ptrs)
					if (ptr_level <= nest_level)
					{
						// Function declaration/definition
						is_func = true;  // signal that we have a function
					}
					else if (!is_func)
					{
						// At this level it must be a ptr to function
						if (actual.GetName() == "data")
						{
							// Convert ptr to function ptr
							ASSERT(actual.GetAttr("type") == "int" && actual.GetAttr("name") == ptr_type_name);
							actual = find_elt(func_ptr_type_name, root, node_num);
							if (actual.IsEmpty())
								throw CString("Function pointer type missing from standard type list - \"_standard_types.xml\"");
							actual = CXmlTree::CElt(actual.m_pelt, ptree_);     // Same node but with diff assoc. parent
							long pointer_len = atol(actual.GetAttr("len"));
							if (pointer_len != 2 && pointer_len != 4 && pointer_len != 8)
								throw CString("Invalid function pointer length in \"_standard_types.xml\" - ") + func_ptr_type_name;
							decl_curr_size = decl_curr_pack = pointer_len;
						}
					}
					// Else: We already know its a function (is_func == true) so we can just ignore subsequent
					// parameter lists - this can occur for function returning function ptr - eg void (*f())();

					// Skip any template specifier
					if (ss == "<")
					{
						int nesting = 0;
						while (!ss.IsEmpty())
						{
							if (ss == "<")
								++nesting;
							else if (ss == ">" && --nesting == 0)
								break;
							ss = get_next();
						}
					}

					ASSERT(ss == "(");
					// Now skip the (function or function ptr) parameter list
					int nesting = 0;
					while (!ss.IsEmpty())
					{
						if (ss == "(")
							++nesting;
						else if (ss == ")" && --nesting == 0)
							break;
						ss = get_next();
					}
				}
				else if (!var_name.IsEmpty() && ss == "[")
				{
					// Array or ptr to array
					CString expr;
					while ((ss = get_next()) != "]" && !ss.IsEmpty())
						expr += ss;
					if (ptr_level <= nest_level)
					{
						CString val_str = "0";  // default to zero elements for empty dimension []
						if (!expr.IsEmpty())
						{
							// Get index (constant int expr) and enclose current element in array
							int ac = -1;
							ASSERT(pexpr_ != NULL);
							expr_eval::value_t val = pexpr_->evaluate(expr, 0, ac);

							if (val.typ != expr_eval::TYPE_INT)
								throw "Could not evaluate expression - " + expr;
							decl_curr_size *= long(val.int64);
							val_str.Format("%ld", long(val.int64));
						}

						// TBD: check if this gives same index order as C (right to left)
						CXmlTree::CElt new_ee("for", ptree_);
						new_ee.SetAttr("count", val_str);
						CXmlTree::CElt child = new_ee.InsertClone(actual);
						child.RemoveAttr("name");                 // The element in a for does not need a name (accessed using [])
						child.RemoveAttr("comment");
						actual = new_ee;
					}
					// Else: Do nothing since it's still a pointer (ptr to array of whatever it pointed to before)
				}
				else if (ss == "(")
				{
					++nest_level;
				}
				else if (ss == ")")
				{
					nest_level--;
				}
				else if (ss == "const")
				{
					// "const" can be placed after decl spec
					is_const = true;
				}
				else
				{
					ASSERT(0);
					break;
				}

				ss = get_next();
			}
			if (nest_level != 0)
				throw CString("Matching right bracket ')' not found");

			bitfield_size = 0;
			if (ss == ":")
			{
				if (is_func)
				{
					// Must be ctor initialiser list - skip to start of function statement
					while (ss != "{")
						ss = get_next();
				}
				else
				{
					// Must be bitfield
					if (actual.GetAttr("type") != "int" && actual.GetAttr("type") != "char")
						throw CString("Bitfields can only use integer types");

					// Get const expression for size of bit-field
					CString expr;
					while ((ss = get_next()) != "," && ss != ";" && ss != "{" && ss != "}" && !ss.IsEmpty())
						expr += ss;

					// Get index (constant int expr) and enclose current element in array
					int ac = -1;
					ASSERT(pexpr_ != NULL);
					expr_eval::value_t val = pexpr_->evaluate(expr, 0, ac);

					if (val.typ != expr_eval::TYPE_INT)
						throw CString("Bitfield size must be an integer (or constant integer expression)");
					bitfield_size = int(val.int64);  // number of bits
					CString tmp;
					tmp.Format("%ld", long(bitfield_size));

					actual = find_elt(bitfield_type_name, root, node_num);
					if (actual.IsEmpty())
						throw CString("Bit-field unit (bit-field$) missing from standard type list - \"_standard_types.xml\"");
					actual = CXmlTree::CElt(actual.m_pelt, ptree_);    // This is still the node in standard types but with diff assoc. parent

					// Note this actually modifies the node in "_standard_types" but since we set the number of bits
					// each time we use it, and we don't save back to file this doesn't matter.
					actual.SetAttr("bits", tmp);

					if (base_storage_unit_)
						actual.SetAttr("len", base.GetAttr("len"));
					int bitfield_unit_size = atol(actual.GetAttr("len"));
					if (bitfield_unit_size != 1 && bitfield_unit_size != 2 && bitfield_unit_size != 4 && bitfield_unit_size != 8)
						throw CString("Invalid bitfield storage unit length");
					if (bitfield_size > bitfield_unit_size*8)
						throw CString("Bitfield size exceeds storage unit length");
					decl_curr_size = decl_curr_pack = bitfield_unit_size;
					ASSERT(last_size == 0 || last_size == bitfield_unit_size);
				}
			}

			if (is_func)
			{
				//if (ss == "const")    // const has now been filtered out (above)
				//	ss = get_next();

				// Nothing is stored for a function but we should skip body if present
				if (ss == "{")
				{
					int nesting = 0;
					while (!ss.IsEmpty())
					{
						if (ss == "{")
							++nesting;
						else if (ss == "}" && --nesting == 0)
						{
							ss = get_next();    // skip closing "}" too
							break;
						}
						ss = get_next();
					}
				}
				break;   // There is only one decl if its a function
			}
			else if (is_unknown)
			{
				if (!is_ignored)
					throw "Unknown type \"" + type_name + "\"";
			}
			else if (!var_name.IsEmpty())
			{
				ASSERT(decl_curr_size > 0);
				ASSERT(decl_curr_pack == 1 || decl_curr_pack == 2 || decl_curr_pack == 4 || decl_curr_pack == 8);

				if (is_typedef)
				{
					// Just store the type we extracted in our custom types (XML) container
					CXmlTree::CElt ee(actual.m_pelt, &custom_types_);
					ee = custom_types_.GetRoot().InsertClone(ee);
					CString type_str = ee.GetName();
					CString name_str = ee.GetAttr("name");
					ee.SetAttr("name", var_name);
					ee.SetAttr("type_name", var_name);
					if (type_str == "data" && name_str != ptr_type_name && name_str != func_ptr_type_name && name_str != enum_type_name)
						ee.SetAttr("comment", "typedef " + type_name);
				}
				else if (!is_ignored)
				{
					// Check if we need to terminate the previous bit-field
					if (bits_used > 0 && (bitfield_size == 0 || bits_used + bitfield_size > last_size*8))
					{
						curr_offset += last_size;
						bits_used = 0;
					}

					// If we are not in the middle of a bit-field
					if (bits_used == 0 || bitfield_size == 0)
					{
						// See if we need to add any padding
						ASSERT(pack_.size() > 0);
						long align = min(decl_curr_pack, pack_.back());
						if (align > max_align)
							max_align = align;
						long pad = align - curr_offset%align;
						if (!is_union && pad < align)
						{
							CString tmp;
							// Add padding
							CXmlTree::CElt pad_ee("data", ptree_);
							// Leave name attribute empty so field is not visible in view mode
							//tmp.Format("Fill$%d", int(pad_count_++));
							//pad_ee.SetAttr("name", tmp);
							pad_ee.SetAttr("type", "none");
							tmp.Format("%ld", long(pad));
							pad_ee.SetAttr("len", tmp);
							tmp.Format("Padding to align to %ld-byte boundary", long(align));
							pad_ee.SetAttr("comment", tmp);
							retval.AppendClone(pad_ee);

							curr_offset += pad;
						}
					}

					if (is_union)
					{
						// Insert into "JUMP" element.  The new position is 0 relative to current position so the
						// file position is not changed but when the JUMP "returns" the file position reverts back.
						// This effectively means that all elements of the union use the same bit of the file.  The
						// bytes for the union (= size of largest element) are reserved below using data type "none".
						CXmlTree::CElt new_ee("jump", ptree_);
						new_ee.SetAttr("offset", "0");
						new_ee.SetAttr("origin", "current");
						CXmlTree::CElt child = new_ee.InsertClone(actual);
						child.SetAttr("name", var_name);        // name the "data" elt (not "jump" elt which does not have name attr)
						child.RemoveAttr("comment");
						retval.AppendClone(new_ee);

						// Keep track of largest union member seen
						if (decl_curr_size > max_size)
							max_size = decl_curr_size;
					}
					else
					{
						// Store an instance of the type and set its name
						//CXmlTree::CElt src_elt(actual.m_pelt, ptree_);
						CXmlTree::CElt ee = retval.AppendClone(actual);
						ee.SetAttr("name", var_name);          // name the elt (data, struct or for)
						ee.RemoveAttr("comment");
					}

					bits_used += bitfield_size;
					if (bitfield_size != 0)
						last_size = atoi(actual.GetAttr("len"));
					else
					{
						last_size = 0;                        // Store zero for consistency checks
						curr_offset += decl_curr_size;
					}
				}
			}

			if (ss == "=")
			{
				// Scan for end of initialiser
				CString expr;
				while ((ss = get_next()) != ";" && ss != "," && ss != "}" && !ss.IsEmpty())  // not sure that it can occur?
					expr += ss;
				if (is_const)
				{
					int ac = -1;
					ASSERT(pexpr_ != NULL);
					expr_eval::value_t val = pexpr_->evaluate(expr, 0, ac);

					if (val.typ == expr_eval::TYPE_INT)
					{
						custom_consts_[var_name] = val.int64;      // Add to our list of custom constants
						if (outer_name[0] != '\0')
							custom_consts_[CString(outer_name)+"::"+var_name] = val.int64;   // Also add value with scope of containing type
					}
					else
						AfxMessageBox("Definition of constant \"" + var_name + "\" was ignored.\r\n"
									  "(Only integer constants are allowed.)");
				}
			}
			if (ss == ',')
				ss = get_next();
		}

		// If unnamed type remove it again from custom_types_
		if (type_name == anon_type_name)
		{
			CXmlTree::CElt ee = find_elt(anon_type_name, root, node_num);
			if (!ee.IsEmpty())
				root.DeleteChild(ee);
		}

		if (ss == ";")
			ss = get_next();
	}

	// Terminate properly if last field was a bit-field
	if (bitfield_size > 0)
		curr_offset += last_size;

	// Add vptr (vtable pointer) at the start if class is virtual
	if (is_virtual)
	{
		// Get an element for the pointer
		ASSERT(is_class);
		CXmlTree::CElt vtable_ee = find_elt(ptr_type_name, root, node_num);
		if (vtable_ee.IsEmpty())
			throw CString("Pointer type missing from \"_standard_types.xml\" - ") + ptr_type_name;
		vtable_ee = CXmlTree::CElt(vtable_ee.m_pelt, ptree_);       // Same node but with diff assoc. parent
		long pointer_len = atol(vtable_ee.GetAttr("len"));
		if (pointer_len != 2 && pointer_len != 4 && pointer_len != 8)
			throw CString("Invalid pointer length in \"_standard_types.xml\" - ") + ptr_type_name;
		else if (pointer_len < pack_.back())
		{
			CString ss;
			ss.Format("Sorry, size of pointer for vtable (%ld) must not be less than current packing (%ld)",
					  long(pointer_len), long(pack_.back()));
			throw ss;
		}

		// Update padding info
		curr_offset += pointer_len;
		long align = min(pointer_len, pack_.back());
		if (align > max_align)
			max_align = align;

		// Insert as the first element in the fragment
		CXmlTree::CElt ee = retval.InsertClone(vtable_ee, &retval.GetFirstChild());
		ee.SetAttr("name", "vptr");
		ee.SetAttr("comment", "pointer to vtable");
	}

	// Pad end of structure if necessary
	long pad = max_align - curr_offset%max_align;
	if (is_union && max_size > 0)
	{
		// Add padding equal to size of largest element
		CString tmp;
		CXmlTree::CElt pad_ee("data", ptree_);
		// Leave name attribute empty so field is not visible in view mode
		//tmp.Format("Fill$%d", int(pad_count_++));
		//pad_ee.SetAttr("name", tmp);
		pad_ee.SetAttr("type", "none");
		tmp.Format("%ld", long(max_size));
		pad_ee.SetAttr("len", tmp);
		pad_ee.SetAttr("comment", "padding for size of largest union member - " + tmp);
		retval.AppendClone(pad_ee);
	}
	else if (is_class && pad < max_align)
	{
		// A class/struct needs to be padded at end (eg, in case it is used in an array)
		CString tmp;
		// Add padding
		CXmlTree::CElt pad_ee("data", ptree_);
		// Leave name attribute empty so field is not visible in view mode
		//tmp.Format("Fill$%d", int(pad_count_++));
		//pad_ee.SetAttr("name", tmp);
		pad_ee.SetAttr("type", "none");
		tmp.Format("%ld", long(pad));
		pad_ee.SetAttr("len", tmp);
		tmp.Format("End padding to align to %ld-byte boundary", long(max_align));
		pad_ee.SetAttr("comment", tmp);
		retval.AppendClone(pad_ee);

		curr_offset += pad;
	}
	return retval;
}

// Skip any modifiers, noting anything special
CString TParser::skip_modifiers(CString ss, 
								bool &is_typedef, bool &is_ignored, bool &is_virtual, bool &is_const,
								bool is_class)
{
	while (ss == "typedef" || ss == "static" || ss == "extern" || ss == "register" || ss == "auto" || 
			ss == "__cdecl" || ss == "__stdcall" || ss == "__fastcall" ||
			ss == "__inline" || ss == "__forceinline" || ss == "inline" || 
			ss == "const" || ss == "volatile" || ss == "mutable" || ss == "readonly" || 
			ss == "friend" || ss == "explicit" || ss == "virtual" || ss == "typename" || 
			ss == "public" || ss == "private" || ss == "protected" || ss == "internal" || 
			// C# only keywords
			ss == "abstract" || ss == "override" || ss == "sealed" || ss == "unsafe" || ss == "event" || 
			// C++ .Net stuff
			ss == "__gc" || ss == "__nogc" || ss == "__abstract" || ss == "__sealed" || ss == "__interface" || 
			ss == "__value" || ss == "__pin" || ss == "__delegate" || ss == "__property" || ss == "__event" || 
			// DOS/Windows keywords and #defines
			ss == "near" || ss == "far" || ss == "NEAR" || ss == "FAR" || ss == "pascal" || ss == "PASCAL" || 
			ss == "cdecl" || ss == "CDECL" || ss == "WINAPI" || ss == "APIENTRY" || ss == "CALLBACK")
	{
		if (ss == "__declspec" || ss == "__based")
		{
			// Skip the "parameters" for these
			if ((ss = get_next()) == "(")
			{
				// Now skip the (function or function ptr) parameter list
				int nesting = 0;
				while (!ss.IsEmpty())
				{
					if (ss == "(")
						++nesting;
					else if (ss == ")" && --nesting == 0)
						break;
					ss = get_next();
				}
				ss = get_next();
			}
		}
		else if (ss == "public" || ss == "private" || ss == "protected" || ss == "internal")
		{
			ss = get_next();
			if (ss == ":")         // C/C++ but not C#
				ss = get_next();
		}
		else
		{
			if (ss == "static" || ss == "extern" || ss == "friend")
				is_ignored = true;
			else if (ss == "virtual")
				is_virtual = true;
			else if (ss == "typedef")
				is_typedef = true;
			else if (ss == "const")
			{
				is_const = true;
				if (!is_class)
					is_ignored = true;      // const declaration outside a class does not generate anything
			}

			ss = get_next();
		}
	}
	return ss;
}

// Find a type by searching all the (enabled) type files.
// Takes the name of the type ("name" attr) and returns a ptr to the node.
// Also returns the root node for the file and the index of the found node.
CXmlTree::CElt TParser::find_elt(LPCTSTR name, CXmlTree::CElt &root, int &node_num)
{
	// Search standard types then custom types then any optional list
	CXmlTree::CElt ee;
	if (check_std_)
	{
		root = std_types_.GetRoot();
		for (node_num = 0, ee = root.GetFirstChild(); !ee.IsEmpty(); ++node_num, ++ee)
		{
			if (ee.GetAttr("name") == name)
				return ee;
		}
	}

	// Always chekc custom since that is where newly parsed types are placed
	root = custom_types_.GetRoot();
	for (node_num = 0, ee = root.GetFirstChild(); !ee.IsEmpty(); ++node_num, ++ee)
	{
		if (ee.GetAttr("name") == name)
			return ee;
	}

	if (check_win_)
	{
		root = win_types_.GetRoot();
		for (node_num = 0, ee = root.GetFirstChild(); !ee.IsEmpty(); ++node_num, ++ee)
		{
			if (ee.GetAttr("name") == name)
				return ee;
		}
	}
	if (check_common_)
	{
		root = common_types_.GetRoot();
		for (node_num = 0, ee = root.GetFirstChild(); !ee.IsEmpty(); ++node_num, ++ee)
		{
			if (ee.GetAttr("name") == name)
				return ee;
		}
	}

	root = CXmlTree::CElt();            // Make root empty
	return root;                        // Return empty element to signal not found
}

// Return the size of a type (including nested types like STRUCT).
// If the type is not found or the size is not fixed -1 is returned.
long TParser::get_size(LPCTSTR name)
{
	CXmlTree::CElt root, ee;
	int ii;

	ee = find_elt(name, root, ii);
	if (!ee.IsEmpty())
		return ::get_size(root, ii, ii+1);
	else
		return -1;                      // not found
}

// Return the packing requirements for a type (1,2,4,8 or 16) or -1 if not found.
// For data types this is usually just the size of the object except for
// "none" types which have packing requirements of 1.
// For STRUCT types it returns the packing required for the whole structure,
// which is the smaller of the largest element packing value and pack_ value
// that was in force when the STRUCT was parsed.
// For "FOR" and "IF" types it is just the packing requirement of the contained elt.
// Note: Currently the largest data type is 8 bytes so 16 should never be returned.
long TParser::get_pack(LPCTSTR name)
{
	CXmlTree::CElt parent, ee;
	int child_num;

	ee = find_elt(name, parent, child_num);
	if (!ee.IsEmpty())
	{
		CString elt_type = ee.GetName();
		// Find the contained element of any IF or FOR elements
		while (elt_type == "if" || elt_type == "for")
		{
			parent = ee;
			ee = parent.GetFirstChild();
			child_num = 0;
			elt_type = ee.GetName();
		}

		if (elt_type == "data")
		{
			CString data_type = ee.GetAttr("type");
			if (data_type == "none" || data_type == "string")
				return 1;
			else if (data_type == "date" && ee.GetAttr("format") == "systemtime")
				return 2;                       // SYSTEMTIME structure with largest element of size 2 (WORD)
			else
				return ::get_size(parent, child_num, child_num+1);
		}
		else if (elt_type == "struct")
		{
			long pack = atol(ee.GetAttr("pack"));
			if (pack > 0)
				return pack;
			else
				return 1;           // use no padding by default for structs
		}
	}
	return -1;                      // not found
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License


Written By
Australia Australia
Andrew has a BSc (1983) from Sydney University in Computer Science and Mathematics. Andrew began programming professionally in C in 1984 and has since used many languages but mainly C, C++, and C#.

Andrew has a particular interest in STL, .Net, and Agile Development. He has written articles on STL for technical journals such as the C/C++ User's Journal.

In 1997 Andrew began using MFC and released the source code for a Windows binary file editor called HexEdit, which was downloaded more than 1 million times. From 2001 there was a shareware version of HexEdit (later called HexEdit Pro). HexEdit has been updated to uses the new MFC (based on BCG) and is once more open source.

Comments and Discussions