MiniXML, a Fast parser for the XML Configuration File

Richard Lin
Rate me:
3.84/5 (12 votes)
5 Oct 20054 min read
98.3K
1.1K
An article that presents a fast XML parser for accessing the configuration file.
minixml.zip
- minixml
  - bin
    - SampleXML.xml
  - ElementClone.cpp
  - makefile
  - MiniParser.h
  - MiniXML.cpp
  - MiniXML.dsp
  - MiniXML.dsw
  - MiniXML.h
  - StdAfx.cpp
  - StdAfx.h
  - Test.cpp
minixml_src.zip
- SampleXML.xml
- ElementClone.cpp
- MiniParser.h
- MiniXML.cpp
- MiniXML.dsp
- MiniXML.dsw
- MiniXML.h
- StdAfx.cpp
- StdAfx.h
- Test.cpp
src.zip
- SampleXML.xml
- ElementClone.cpp
- MiniParser.h
- MiniXML.cpp
- MiniXML.dsp
- MiniXML.dsw
- MiniXML.h
- StdAfx.cpp
- StdAfx.h
- Test.cpp
/********************************************************************
	created:	2005/05/27
	created:	27:5:2005   10:11
	filename: 	C:\tool\MiniXML\MiniXML.cpp
	file path:	C:\tool\MiniXML
	file ext:	cpp
	author:		Richard Lin
	
	purpose:
	Define class CElement, class CElementIterator, and class CXmlConf.
	Basically, users use those three classes to access the XML data.

    CXmlConf will acquire XML data from the given file or char* buffer.
	The CElement::Parse will parse the data stored in the internal buffer
	and create a DOM tree with root of CElement * m_pRoot, a member of 
	CXmlConf.
 
    CElement is the class that users can access Attributes, Chardata,
	element name and subElements.

    CElementIterator is a helper class to access the Children elements of
	a CElement.

    This file also define a global function called CElement* Clone (CElement* p);
    This function is majorily for demo purpose which shows how to use the public
	functions provided by CElement and CElementiter.
 
*********************************************************************/

#include "stdafx.h"
#include "MiniXML.h"



using namespace std;

CharType CharTypeTable::ChartypeTable[256]; 

//
//	Function name:void CharTypeArray::InitCharType()
//  Purpose:
//		Fill character type table.
//
void CharTypeTable::InitCharType()
{
	for (int i=0;i<256;i++) 
	{
		char ch= static_cast<char>(i);
		if (ch>='a'&&ch<='z'||
			ch>='A'&&ch<='Z'||
			ch>='0'&&ch<='9'||
			ch=='@'||
			ch=='$'||
			ch=='^'||
			ch=='&'||
			ch=='*'||
			ch=='('||
			ch==')'||
			ch=='_'||
			ch=='+'||
			ch==':'||
			ch=='{'||
			ch=='}'||
			ch=='['||
			ch==']'||
			ch=='.'||
			ch=='~'||
			ch=='\\')
		{
			CharTypeTable::ChartypeTable[i]=NormalChar;
		}
		else
		{
			CharTypeTable::ChartypeTable[i]=ErrorChar;
		}
	}

	CharTypeTable::ChartypeTable[' ']=WhiteSpace;
	CharTypeTable::ChartypeTable['\t']=WhiteSpace;
	CharTypeTable::ChartypeTable['\n']=WhiteSpace;
	CharTypeTable::ChartypeTable['\r']=WhiteSpace;
	CharTypeTable::ChartypeTable['<']=LeftArrow;
	CharTypeTable::ChartypeTable['>']=RightArrow;
	CharTypeTable::ChartypeTable['?']=QuestionMark;
	CharTypeTable::ChartypeTable['\"']=Quote;
	CharTypeTable::ChartypeTable['/']=ForwardSlash;
	CharTypeTable::ChartypeTable['-']=Dash;
	CharTypeTable::ChartypeTable['!']=Exclaimation;
	CharTypeTable::ChartypeTable['=']=Equal;
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
//
// MEMBER FUNCTIONS FOR THE CLASS CStringValue
// MEMBER FUNCTIONS FOR THE CLASS CStringValue
// MEMBER FUNCTIONS FOR THE CLASS CStringValue
// MEMBER FUNCTIONS FOR THE CLASS CStringValue
// MEMBER FUNCTIONS FOR THE CLASS CStringValue
//
//
//

//
// Function Name: CStringValue Destructor
//
//
CStringValue::~CStringValue()
{
	if (m_bUseMyBuffer) delete []m_Buffer;
}

//
// Function Name: CStringValue Constructor
//
CStringValue::CStringValue(char*pBegin, char*pEnd, bool bUseInternalBuffer)
{
	m_bUseMyBuffer=bUseInternalBuffer;
	if (m_bUseMyBuffer)
	{
		m_Buffer=new char[pEnd-pBegin];
		memcpy(m_Buffer,pBegin,pEnd-pBegin);
		m_pBegin=m_Buffer;
		m_pEnd=m_Buffer+(pEnd-pBegin);
	}
	else
	{
		m_pBegin=pBegin;
		m_pEnd=pEnd;
	}
}


//
// Function Name: CStringValue Constructor
//
CStringValue::CStringValue(char*pBegin, bool bUseInternalBuffer)
{
	m_bUseMyBuffer=bUseInternalBuffer;
	int stringSize=strlen(pBegin);
	if (m_bUseMyBuffer)
	{
		m_Buffer=new char[stringSize];
		std::copy(pBegin,pBegin+stringSize,m_Buffer);
		m_pBegin=m_Buffer;
		m_pEnd=m_Buffer+stringSize;
	}
	else
	{
		m_pBegin=pBegin;
		m_pEnd=pBegin+stringSize;
	}
}


//
// Function Name: CStringValue copy constructor
//
//
CStringValue::CStringValue (CStringValue const &obj)
{
 		if (obj.m_bUseMyBuffer)
		{
			m_Buffer=new char[obj.m_pEnd-obj.m_pBegin];
			memcpy(m_Buffer,obj.m_Buffer,obj.m_pEnd-obj.m_pBegin);
			m_pBegin=m_Buffer;
			m_pEnd=m_Buffer+(obj.m_pEnd-obj.m_pBegin);
			m_bUseMyBuffer=true;
		}
		else
		{
			m_pBegin=obj.m_pBegin;
			m_pEnd=obj.m_pEnd;
			m_bUseMyBuffer=false;
		}
}


//
// Function Name: CStringValue operator=
//
//
CStringValue& CStringValue::operator= (CStringValue const &obj)
{
	if (this!=&obj)
	{
		if (m_bUseMyBuffer) 
		{
			delete []m_Buffer;
		}

		if (obj.m_bUseMyBuffer)
		{
			m_Buffer=new char[obj.m_pEnd-obj.m_pBegin];
			memcpy(m_Buffer,obj.m_Buffer,obj.m_pEnd-obj.m_pBegin);
			m_pBegin=m_Buffer;
			m_pEnd=m_Buffer+(obj.m_pEnd-obj.m_pBegin);
			m_bUseMyBuffer=true;
		}
		else
		{
			m_pBegin=obj.m_pBegin;
			m_pEnd=obj.m_pEnd;
			m_bUseMyBuffer=false;
		}
	}
	return *this;
}





//
// Function Name: 
// 	friend bool operator==(CStringValue const& v1, CStringValue const& v2);
// Compare the two strings values
//
bool operator==(CStringValue const& v1, CStringValue const& v2)
{
	if (v1.m_pEnd-v1.m_pBegin==v2.m_pEnd-v2.m_pBegin)
	{
		return std::equal(v1.m_pBegin,v1.m_pEnd,v2.m_pBegin);
	}
	return false;
}

bool operator!=(CStringValue const& v1, CStringValue const& v2)
{
	return !(v1==v2);
}

//
// Member functions of CStringValue
//
std::ostream& operator <<(std::ostream& os, CStringValue const&obj)
{
	char*p=obj.m_pBegin;
  while(p<obj.m_pEnd)
  {
	  os<<*p;
	  p++;
  }
  return os;
}

bool CStringValue::Equal(const char*str)
{
	if (GetSize()!=strlen(str))
		return false;
	return std::equal(m_pBegin,m_pEnd,str);
}

//
// END END END
// END END END
// END END END
// MEMBER FUNCTIONS FOR THE CLASS CStringValue
//
//
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////


//
// Function name: void CToken::AcquireToken 
//
// Comments:
// The basic acquiretoken action
//
bool CToken::AcquireToken(	const char* pCurrent,
							const char* pEndPos,  // The end postion is the last legal position plus 1
							char* &postPostition) 
{
	m_pToken=pCurrent;
	postPostition=const_cast<char*>(m_pToken)+1;
	return true;
}


 



bool CQuoteToken::AcquireToken(const char* pCurrent,
							  const char* pEndPos,  // The end postion is the last legal position plus 1
							  char* &postPostition)
{
	char* pScan=const_cast<char*>(pCurrent);

	char* pFind=std::find(const_cast<char*>(pCurrent),const_cast<char*>(pEndPos),'\"');
	if (pFind==pEndPos)
	{
		return false;
	}
	postPostition=pFind;
	m_pToken=pCurrent;
	return true;
}
 
bool CWordToken::AcquireToken(const char* pCurrent,
							  const char* pEndPos,  // The end postion is the last legal position plus 1
							  char* &postPostition)
{
	char* pScan=const_cast<char*>(pCurrent);
 	while (pScan<pEndPos)
	{
		if (CharTypeTable::ChartypeTable[(unsigned char)*pScan]==NormalChar) pScan++;
		else
		{
			m_pToken=pCurrent;
			break;
		}
	}
		
	postPostition=pScan;
	return true;
}

bool CWhiteSpace::AcquireToken(const char* pCurrent,
							  const char* pEndPos,  // The end postion is the last legal position plus 1
							  char* &postPostition)
{
	char* pScan=const_cast<char*>(pCurrent);
 	while (pScan<pEndPos)
	{
		if (CharTypeTable::ChartypeTable[(unsigned char)*pScan]==WhiteSpace) pScan++;
		else
		{
			m_pToken=pCurrent;
			break;
		}
	}
	
	postPostition=pScan;
	return true;
}


//
//	Function name: const CToken* CScanner::GetNextToken()
//  Purpose:
//
//  Scan the char stream referenced by the internal pointer and
//  return a token. The function will mark the end of token position
//  with null terminator.
//
// Return:
// The function return its internal token object (so don't remove scanner objcet)
// when using the the returned token object.).
//
// The function returns NULL when NoToken was found in the string stream.
//
CToken* CScanner::GetNextToken()
{
	if (m_pCurrentPos>=m_EndPos) return &m_EofToken;

	char* pCurrentPosition=NULL;
   while (m_pCurrentPos<m_EndPos)
   {
    switch (CharTypeTable::ChartypeTable[(unsigned char)*m_pCurrentPos])
	{
	case ErrorChar:
		return &m_ErrorToken;
	case NormalChar:
		m_WordToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
 		m_pCurrentPos=pCurrentPosition;
		return &m_WordToken;
	case LeftArrow:
		m_LeftArrowToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
		m_pCurrentPos=pCurrentPosition;
		return &m_LeftArrowToken;
	case RightArrow:
		m_RightArrowToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
		m_pCurrentPos=pCurrentPosition;
		return &m_RightArrowToken;

	case QuestionMark:
		m_QuestionMarkToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
		m_pCurrentPos=pCurrentPosition;
		return &m_QuestionMarkToken;

	case ForwardSlash:
		m_ForwardSlashToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
		m_pCurrentPos=pCurrentPosition;
		return &m_ForwardSlashToken;

	case Dash:
		m_DashToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
		m_pCurrentPos=pCurrentPosition;
		return &m_DashToken;

	case Exclaimation:
		m_ExclaimationToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
		m_pCurrentPos=pCurrentPosition;
		return &m_ExclaimationToken;

	case Quote:
		if (m_QuoteToken.AcquireToken(m_pCurrentPos+1,m_EndPos,pCurrentPosition))
		{
			m_pCurrentPos=pCurrentPosition;
			return &m_QuoteToken;
		}
		else
		{
			return &m_ErrorToken;
		}
	case WhiteSpace:
		{
			m_WhiteSpaceToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
 			m_pCurrentPos=pCurrentPosition;
			return &m_WhiteSpaceToken;
		}
	case Equal:
		{
			m_EqualToken.AcquireToken(m_pCurrentPos,m_EndPos,pCurrentPosition);
 			m_pCurrentPos=pCurrentPosition;
			return &m_EqualToken;
		}


	default:
		//No default, I would be in trouble if the program ran in this branch!!!
	   return &m_ErrorToken;
		break;
	};
   }
   return &m_EofToken;
}

/********************************************************************
	created:	22:5:2005   12:36
	function Name: 	CScanner::SkipWhiteSpace
	purpose:	
*********************************************************************/
void CScanner::SkipWhiteSpace()
{
	while (m_pCurrentPos<m_EndPos &&
		CharTypeTable::ChartypeTable[(unsigned char)*m_pCurrentPos]==WhiteSpace)
		m_pCurrentPos++;
}


//
//	Function name: bool CScanner::GetNextStringMatch (const char* match)
//  Purpose:
//
// The function scan the buffer from the m_pCurrentPtr and tries to find the
// matched string for the <Key>. If the function successes, the m_pCurrentPtr
// should stop to the next char position after matched string.
//
// otherwise the function returns false.
//
// This function is used to quickly parse the comments and PI only.
//
bool CScanner::GetNextStringMatch (const char* Key)
{
	if (m_pCurrentPos>=m_EndPos) return false;

	int KeySize=strlen(Key);

	char* pFind=std::search(m_pCurrentPos,
									m_EndPos,
									Key,
									Key+KeySize);

	if (pFind== m_EndPos)
	{
		return false;
	}
	m_pCurrentPos=pFind+KeySize;
	return true;
}

// Parser.cpp: implementation of the CParser class.
//
//////////////////////////////////////////////////////////////////////

bool CCommentParser::Parse( CScanner* pScanner)
{
	return pScanner->GetNextStringMatch("-->");
}

//
// Function name:bool CPIParser::Parse( CScanner* pScanner)
//
// Parse the processing instructionl.
//
bool CPIParser::Parse( CScanner* pScanner)
{
return pScanner->GetNextStringMatch((const char*)"?>");
}

//
// Function Name:bool CChardata::Parse( CScanner* pScanner)
// Parse the remained part of Chardata
//
bool CChardata::Parse( CScanner* pScanner)
{
	if (!pScanner->GetNextStringMatch((const char*)"<"))
		return false;
	
	return true;
}



//
//	Function Name:
//		void CAttribute::SetValue(const char* Value);
//	Descriptions:
//  Set the value of the attributes
void CAttribute::SetValue(const char* Value)
{
	CStringValue s(const_cast<char*>(Value), true);
	m_AttributeValue=s;
}

//
// Function Name:
// bool CAttribute::Parse( CScanner* pScanner)
//
bool CAttribute::Parse( CScanner* pScanner)
{
	pScanner->SkipWhiteSpace();
    CToken* Token=	pScanner->GetNextToken();
	CToken::TokenCode tc=Token->GetTokenCode();

	if (tc==CToken::TC_LeftArrow||tc==CToken::TC_ForwardSlash)
	{
		pScanner->BackOneStep();
		return true;
	}

   if (tc!=CToken::TC_Word)
   {
	   return false;
   }

   CWordToken * wordToken =static_cast<CWordToken *>(Token);

   //
   // Get the attribute name
   //
   m_AttributeName.m_pBegin=const_cast<char*>(wordToken->GetTokenString());
   m_AttributeName.m_pEnd=const_cast<char*>(pScanner->GetCurrentPos());

   CToken * eqToken=	pScanner->GetNextToken();
   if (eqToken->GetTokenCode()!=CToken::TC_Equal)
   {
	   return false;
   }
   

    CToken * QuoteToken=	pScanner->GetNextToken();
	if (QuoteToken->GetTokenCode()!=CToken::TC_Quote)
	{
		return false;
	}

	//
	// Get the attribute values.
	//
    m_AttributeValue.m_pBegin=const_cast<char*>(
		static_cast<CQuoteToken *> (QuoteToken)->GetTokenString());
    m_AttributeValue.m_pEnd=const_cast<char*>(pScanner->GetCurrentPos());

	CAttribute* p =new CAttribute(*this);
	p->GetParent()->ParserAddAttribute(p);


   pScanner->SetCurrentPos(m_AttributeValue.m_pEnd+1);

   return true;
}


//
//	Function name:
// bool CStagParser::Parse( CScanner* pScanner)
//
//  Purpose:
//  Parse the XML STag.
//
//		STag		::=    '<' Name (S Attribute)* S? '>'
//
bool CStagParser::Parse( CScanner* pScanner)
{
   CToken * TokLeftArrow=	pScanner->GetNextToken();

   if (TokLeftArrow->GetTokenCode()!=CToken::TC_LeftArrow)
   {
	   return false;
   }

   CToken * TokWord=pScanner->GetNextToken();
   CToken::TokenCode tc=TokWord->GetTokenCode();
   if (tc!=CToken::TC_Word)
   {
	   //
	   // Check if it is a PI or COMMENT
	   //
	   if (tc==CToken::TC_Exclaimation)
	   {
		   tc=pScanner->GetNextToken()->GetTokenCode();
 			if (tc==CToken::TC_Dash)
			{
				tc=pScanner->GetNextToken()->GetTokenCode();
	 			if (tc==CToken::TC_Dash)
				{
					//
					// It is a comments elements.
					//
					CCommentParser cp(this->GetParent());
					if( cp.Parse(pScanner))
					{
						m_bIsComment=true;
						return true;
					}
 				}
			}
	   }
	   else  if (tc==CToken::TC_QuestionMark)
	   {
		   //
		   // Parse it as PI
		   //
		   CPIParser pp(this->GetParent());
		   if(pp.Parse(pScanner))
		   {
			   m_bIsPI=true;
			   return true;
		   }

	   }
	   return false;
   }

   //
   // Remember the start address and the end address of the <name>
   //
   m_StringValue.m_pBegin=const_cast<char*>(TokWord->GetTokenString());
   m_StringValue.m_pEnd=const_cast<char*>(pScanner->GetCurrentPos());

   //
   // Parse the attribute list
   //
   bool bParseAttrList=true;
   CAttribute AttrParser(this->GetParent());

   CToken* TokWhiteOrRightArrow= pScanner->GetNextToken();
   CToken::TokenCode tc_whiteOrRightArrow=TokWhiteOrRightArrow->GetTokenCode();

   while (bParseAttrList)
   {
		if (tc_whiteOrRightArrow==CToken::TC_WhiteSpace)
		{
			if (!AttrParser.Parse(pScanner))
				return false;

   		    tc_whiteOrRightArrow=pScanner->GetNextToken()->GetTokenCode();
		}
		else
		{
			bParseAttrList=false;
		}
   }

   //
   // Parse the '>'
   //
   if (tc_whiteOrRightArrow==CToken::TC_RightArrow)
   {
	   m_bIsEmptyElementTag=false;
	   return true;
   }

   if (tc_whiteOrRightArrow==CToken::TC_ForwardSlash)
   {
	   CToken::TokenCode tc=pScanner->GetNextToken()->GetTokenCode();
	   if (tc==CToken::TC_RightArrow)
	   {
		   m_bIsEmptyElementTag=true;
		   return true;
	   }
  }

  return false;
}

//
// Function name:
// bool CEtagParser::Parse( CScanner* pScanner)
//
// Desc:
// Parse the Etag element
//
bool CEtagParser::Parse( CScanner* pScanner)
{
   CToken * TokLeftArrow=	pScanner->GetNextToken();
   if (TokLeftArrow->GetTokenCode()!=CToken::TC_LeftArrow)
   {
	   return false;
   }

   CToken * TokForward=pScanner->GetNextToken();
   if (TokForward->GetTokenCode()!=CToken::TC_ForwardSlash)
   {
		return false;
   }


   CToken * TokWord=pScanner->GetNextToken();
   if (TokWord->GetTokenCode()!=CToken::TC_Word)
   {
		return false;
   }

   m_StringValue.m_pBegin= const_cast<char*>(TokWord->GetTokenString());
   m_StringValue.m_pEnd=const_cast<char*>(pScanner->GetCurrentPos());

     
   CToken * TokRightArrow=	pScanner->GetNextToken();
   if (TokRightArrow->GetTokenCode()!=CToken::TC_RightArrow)
   {
		return false;
   }

   return true;
}


bool CElement::Parse(CScanner* pScan)
{
	CStagParser StagParser(this);
	CEtagParser EtagParser(this);

	//
	// Parse the begin tag
	//
	if (!StagParser.Parse(pScan))
	{
		return false;
	}

	if (StagParser.IsEmptyElementTag()||StagParser.IsPITag()||StagParser.IsCommentTag())
	{
		m_StringValue=StagParser.GetNameObj();
		if (m_pParent)
		{
			m_pParent->AddChildElement(this);
		}
		m_bValid=true;
		return true;
	}
 
	//
	// Parse the content
	//
	CContent contentParser(this);
	if (!contentParser.Parse(pScan))
	{
		return false;
	}

    //
	// Parse the end tag
	//
	if (!EtagParser.Parse(pScan))
	{
		return false;
	}

	if(StagParser.GetNameObj()==EtagParser.GetNameObj())
	{
		m_StringValue=StagParser.GetNameObj();
		//
		// Parse success, add the element
		//
		if (m_pParent)
		{
			m_pParent->AddChildElement(this);
		}
		m_bValid=true;
		return true;
	}
	return false;
}

bool CContent::Parse(CScanner* pScan)
{
	//
	// Check the leading contents and decide if we need to 
	//
	const char* pCurrentPos=pScan->GetCurrentPos();
	CToken* TheToken=pScan->GetNextToken();
	CToken::TokenCode tc=TheToken->GetTokenCode();

	bool bChardataParsed=false;

	while (tc!=CToken::TC_Eof&&tc!=CToken::TC_Error)
	{
		switch (tc)
		{
		case CToken::TC_WhiteSpace:
			break; // Get the next token and check
			
		case CToken::TC_Word:
			{
				if (bChardataParsed)
				{
					return false;
				}
				//
				// Keep the start pointer of the CData
				//
				const char* pCurrentPos=TheToken->GetTokenString();
				
				// CData part
				CChardata cdataParser(this->GetParent());
				cdataParser.GetStringObj().m_pBegin=const_cast<char*>(pCurrentPos);
				if (!cdataParser.Parse(pScan))
				{
					return false;
				}

				//
				// Keep the end pointer of the CData
				//
				pScan->BackOneStep();
				cdataParser.GetStringObj().m_pEnd=const_cast<char*>(pScan->GetCurrentPos());
 				m_pParent->ParserAcquireCharData(cdataParser.GetStringObj());

 				bChardataParsed=true;
				break;
			}
		case CToken::TC_LeftArrow:
			{
				//
				// Check the following three conditions
				//
				// <!, </, <abc
				char* pCurrent=(char*)pScan->GetCurrentPos();
				if (CharTypeTable::ChartypeTable [(unsigned char)*pCurrent]==Exclaimation)
				{
					// This is comment
					pScan->SetCurrentPos(pCurrentPos);
					CCommentParser commonParser(this->GetParent());
					if (!commonParser.Parse(pScan))
					{
						return false;
					}
				}
				else if (CharTypeTable::ChartypeTable [(unsigned char)*pCurrent]==ForwardSlash)
				{
					//
					// This is an etag, my job is done.
					pScan->SetCurrentPos(pCurrentPos);
					return true;
				}
				else if (CharTypeTable::ChartypeTable [(unsigned char)*pCurrent]==NormalChar)
				{
					//
					// This is another element, parse it
					//
					pScan->SetCurrentPos(pCurrentPos);
					CElement*pElementParser=new  CElement(this->GetParent());
					if (!pElementParser->Parse(pScan))
					{
						delete pElementParser;
						return false;
					}
				}
				//
				// Otherwise, return false
				//
 				break;
			}
		default:
			return false;
		};
		pCurrentPos=pScan->GetCurrentPos();
	    TheToken=pScan->GetNextToken();
		tc=TheToken->GetTokenCode();
	}

	return false;
}

//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
//	THE MEMBER FUNCTIONS DEFINITIONS FOR THE CLASS CElement
//

//
// Function Name: 
// void CElement::Clean()
//
// Comment: Clean up all the subelements and sibling elements
//
void CElement::Clean()
{

	CElement*p=m_pFirstChild;
	while (p)
	{
		CElement*tmp=p;
		p=p->m_pNextSibling;
		delete tmp;

		if (p==m_pFirstChild)
		{
			break;
		}
	}
	//
	// Cleanup the attribute list
	//
	vector <CAttribute*>::iterator Abegin=m_AttributeList.begin();
	vector <CAttribute*>::iterator Aend=m_AttributeList.end();
	while (Abegin!=Aend)
	{
		delete *Abegin;
		Abegin++;
	}
	m_AttributeList.clear();


}

//
// Function Name:void CElement::
//	 AddChildElement(CStringValue& sv, CElement*p);
//
// Add the child elements of the current elements
//
void CElement::AddChildElement( CElement*p)
{
	//
	// Configure the sibling chain
	//
	if (m_pFirstChild)
	{
		p->m_pNextSibling=m_pFirstChild;
		p->m_pPrevSibling=m_pFirstChild->m_pPrevSibling;
		m_pFirstChild->m_pPrevSibling->m_pNextSibling=p;
		m_pFirstChild->m_pPrevSibling=p;
	}
	else
	{
		m_pFirstChild=p;
		m_pFirstChild->m_pPrevSibling=m_pFirstChild->m_pNextSibling=m_pFirstChild;
	}
}


//
// Function Name:void CElement::DeleteChild(CElement*p)
//
// Delete the child referenced by CElement*
//
bool CElement::DeleteChild (CElement*pRMV)
{
	if (!m_pFirstChild) return false;
	CElement*p=m_pFirstChild;
	bool bEndofLoop=false;
	while (p!=pRMV && !bEndofLoop)
	{
		p=p->m_pNextSibling;
		if (p==m_pFirstChild)
		{
			bEndofLoop=true;
		}
	}

	if (p==pRMV)
	{
		CElement* pFirstCandidate=m_pFirstChild;
		if (pRMV==m_pFirstChild)
		{
			pFirstCandidate=m_pFirstChild->m_pNextSibling;
		}
		bool bSingleChild=(m_pFirstChild->m_pPrevSibling==m_pFirstChild);
		pRMV->m_pPrevSibling->m_pNextSibling=pRMV->m_pNextSibling;
		pRMV->m_pNextSibling->m_pPrevSibling=pRMV->m_pPrevSibling;
		delete pRMV;
		if (bSingleChild) pFirstCandidate=NULL;

		m_pFirstChild=pFirstCandidate;
		
		return true;
	}
	return false;
}



//
// Function Name:
// std::ostream& operator<< (std::ostream& ,CElement const&);
//
// Comments:
// Output the XML stream from the current element
//
std::ostream& operator<< (std::ostream&os ,CElement &ElementParser)
{
	//
	// Begin Tag
	//
	os<<endl<<'<'<<ElementParser.GetName();
	
	//
	// Cleanup the attribute list
	//
	vector <CAttribute*>::iterator Abegin=ElementParser.m_AttributeList.begin();
	vector <CAttribute*>::iterator Aend=ElementParser.m_AttributeList.end();
	while (Abegin!=Aend)
	{
		os <<' '<<**Abegin;
		Abegin++;
	}

	os<<'>';

	//
	// Output the data part
	//
	os<<ElementParser.m_CharData;

 
 	//
	// Walk through the children chains and output each child
	//
	CElement* pElement=ElementParser.m_pFirstChild;
	bool bIsLastItem=false;
	while (!bIsLastItem &&pElement)
	{
		os<<*pElement;
		pElement=pElement->m_pNextSibling;

		if (!pElement||pElement==ElementParser.m_pFirstChild)
			bIsLastItem=true;
	}
	//
	// End tag
	//
	os<<endl<<"</"<<ElementParser.GetName()<<'>';
 	return os;
}


//
// ================================================================================
//
// Begin the attributes functions of the CElement
//
// 

 //
 //	Function name:int CElementIterator::GetAttributeCount()
 //  Purpose:
 // Get the number of the attributes of the element represented by the iterator
//
int CElement::GetAttributeCount()
{
	if (!IsValid()) return 0;
	return m_AttributeList.size();
}

bool CElement::GetAttributePairByIndex(int index, vector<char>&attName, vector<char>&attValue)
{
	if (index>=0 &&index<GetAttributeCount())
	{
		CAttribute* p=m_AttributeList[index];

		attName.resize(p->GetAttrNameObj().GetSize());
		std::copy(p->GetAttrNameObj().m_pBegin,p->GetAttrNameObj().m_pEnd,attName.begin());
		attName.push_back('\0');

		attValue.resize(p->GetAttrValueObj().GetSize());
		std::copy(p->GetAttrValueObj().m_pBegin,p->GetAttrValueObj().m_pEnd,attValue.begin());
		attValue.push_back('\0');


 		return true;
	}
	return false;
}

bool CElement::GetAttribute(const char*name, vector<char>&attValue)
{
	int size=GetAttributeCount();
	
	for (int i=0; i<size;i++)
	{
		CAttribute* p=m_AttributeList[i];
		if (const_cast<CStringValue&>(p->GetAttrNameObj()).Equal(name))
		{
 				attValue.resize(p->GetAttrValueObj().GetSize());
				std::copy(p->GetAttrValueObj().m_pBegin,
						  p->GetAttrValueObj().m_pEnd,
						  attValue.begin());
				attValue.push_back('\0');
 			return true;

		}
	}
	return false;
}

//
// Function Name:
// 	void CElement::AddAttributePair(const char*Name, const char*Value);
//
// Comments:
// Add the attributes value pair to the Element
//
void CElement::AddAttributePair(const char*Name, const char*Value)
{
	//
	// Create two string (enforece the internal buffer)
	//
	CStringValue strName(const_cast<char*>(Name),true);
	CStringValue strValue(const_cast<char*>(Value),true);
	CAttribute* p=new CAttribute(strName,strValue,this);
	ParserAddAttribute(p);
}

//
// Function Name:
// 	bool CElement::ModifyAttribute(const char*Name, const char*Value)
//
// Comments:
//
// Update the attribute value represented by 'Name'. 
// Return false if the attribute was not found.
//
bool CElement::ModifyAttribute(const char*Name, const char*Value)
{
	int size=GetAttributeCount();
	
	for (int i=0; i<size;i++)
	{
		CAttribute* p=m_AttributeList[i];

		if (const_cast<CStringValue&>(p->GetAttrNameObj()).Equal(Name))
			{
 				p->SetValue(Value);
 				return true;
			}
	}
	return false;
}

//
// Function Name:
// 	bool CElement::ModifyAttributeByIndex((int index, const char*Value)
//
// Comments:
//
// Update the attribute value represented by 'index'. 
// Return false if the attribute was not found.
//
bool CElement::ModifyAttributeByIndex(int index, const char*Value)
{
	int size=GetAttributeCount();

	if (index>=size) return false;
	
 	CAttribute* p=m_AttributeList[index];
 	p->SetValue(Value);
 	return true;
}

//
// Function Name:
// 	bool CElement::DeleteAttributeByName(const char*Name)
//
// Comments:
bool CElement::DeleteAttributeByName(const char*name)
{
	int size=GetAttributeCount();
	vector<CAttribute*>::iterator begin=m_AttributeList.begin();
	vector<CAttribute*>::iterator end=m_AttributeList.end();
	while (begin!=end)
	{
		CAttribute*p=(*begin);
		if (const_cast<CStringValue&>(p->GetAttrNameObj()).Equal(name))
		{
				delete p;
				m_AttributeList.erase(begin);
				return true;
		}
 		begin++;
	}
	return false;
}

//
// Function Name:
// 	bool CElement::DeleteAttributeByIndex(int index)
//
// Comments:
bool CElement::DeleteAttributeByIndex(int Index)
{
	int size=GetAttributeCount();
	vector<CAttribute*>::iterator begin=m_AttributeList.begin();
	vector<CAttribute*>::iterator end=m_AttributeList.end();
	int index=0;
	while (begin!=end)
	{
		if (index==Index)
		{
			CAttribute*p=(*begin);
			delete p;
			m_AttributeList.erase(begin);
			return true;
		}
		index++;
		begin++;
	}
	return false;
}













//
// End of Attributes functions
// ================================================================================

bool CElement::GetElementName(vector<char>&ret)
{
	ret.resize(m_StringValue.GetSize()+1);
	std::copy(m_StringValue.m_pBegin,m_StringValue.m_pEnd,ret.begin());
	ret.push_back('\0');
	return ret.size()>1;
}



//
//	Function name: 	bool CElement::GetCharData(vector<char>&ret);
//
//  Purpose:
//  Return the string value to the 'ret'. Return false if the Chardata is not
//  available.
//
bool CElement::GetCharData(vector<char>&ret)
{
	if (m_CharData)
	{
		ret.resize(m_CharData.m_pEnd-m_CharData.m_pBegin);
		std::copy(m_CharData.m_pBegin,m_CharData.m_pEnd,ret.begin());
		ret.push_back('\0');
		return true;
	}
	return false;
}

//
// Function Name: void CElement::SetCharData(vector<char>&buffer)
//
// Descriptions:
//
// 
void CElement::SetCharData(vector<char>&buffer)
{
	if (buffer.size())
	{
		CStringValue s(buffer.begin(),buffer.end(),true);
		m_CharData=s;
	}
}



void CElement::ParserAcquireCharData(CStringValue& s)
{
	m_CharData=(s);
}


std::ostream& operator<< (std::ostream&os ,CElement const&ElementParser)
{
	return os<<const_cast<CElement&>(ElementParser);
}


CElement* CElement::CreateNewElement(const char* Name, CElement*parent)
{
	CStringValue strValue(const_cast<char*>(Name),true);

	CElement* p= new CElement(parent);
	p->SetElementName(strValue);
	return p;
}

//
// Function name:CElement* GetRootElement(const char*nameString)  
//
// Comments:
// The input string is as name.name2.name3
// 
// The function will return the pointer of the firstChaild that match with
// the string.
CElement* CElement::GetFirstChild(const char*Name)  
{
	CElement* p=GetFirstChild();
	string s=(Name);
	int retPos=s.find_first_of(".",0);
	string sMyString=s;
	if (retPos!=-1) sMyString=s.substr(0,retPos);
 
	CElementIterator iter(p);
	while (iter.IsValid())
	{
		bool bFind=true;
		if (const_cast<CStringValue&>((*iter)->GetName()).
			Equal(const_cast<char*>(sMyString.c_str())))
		{
			if (retPos==-1) return p;
			else
			{
				string subString=s.substr(retPos+1,s.size());
				CElement*pFound=(*iter)->GetFirstChild(subString.c_str());
				if(pFound) return pFound;
			}
		}
		++iter;
	}
	return NULL;

}





////////////////////////////////////////////////////////////////////////////////////////////////////
// 
//			THE MEMBER FUNCTIONS OF THE 
//
//			class CElementIterator
//
//

//
// Copy constructors and operator =
//

CElementIterator::CElementIterator (CElement* pObj):m_bValid(false)
{
	if (pObj)
	{
		m_pCurrentElement=pObj;
		m_bValid=true;
	}
}

CElementIterator::CElementIterator(CElementIterator const &obj):
m_bValid(obj.m_bValid),m_pCurrentElement(obj.m_pCurrentElement)
{}

CElementIterator& CElementIterator::operator= (CElementIterator const&obj)
{
	if (this!=&obj)
	{
		m_bValid=obj.m_bValid;
		m_pCurrentElement=obj.m_pCurrentElement;
	}
	return *this;
}


//
// Function Name: CElementIterator& CElementIterator::operator++ ()
//
// Comments:
//
// move the CElementIterator to the next Elements that share the same
// root.
//
CElementIterator& CElementIterator::operator++ ()
{
	if(!IsValid())
	{
		return *this;
 	}

	if (m_pCurrentElement->m_pNextSibling && 
		m_pCurrentElement->m_pNextSibling!=m_pCurrentElement->GetParent()->m_pFirstChild)
	{
		m_pCurrentElement=m_pCurrentElement->m_pNextSibling;
	}
	else
	{
		m_bValid=false;
	}
	return *this;
}

//
// Function Name: void CElementIterator::Delete ()
//
// Comments:
//
// delete the associated element. After deletion, the element iterator will become invalidated.
// 
void CElementIterator::Delete ()
{
	if (IsValid())
	{
		CElement* pParent=	m_pCurrentElement->GetParent();
		if (!pParent)
		{
 			// This is the root, just delete it
 			delete m_pCurrentElement;
		}
		else
		{
			// Should call something like pParent->DeleteChild(m_pCurrentElement);

		}

		m_bValid=false;
		m_pCurrentElement=NULL;
	}

}

CXmlConf::~CXmlConf()
{
	if (m_pScanner) delete m_pScanner;
	if (m_pRoot) delete m_pRoot;
	if (m_pBuffer) delete m_pBuffer;
}

//
// Constructor: CXmlConf::CXmlConf(const char* fileName)
//
// Comments:
// Constructor by Reading the xml file from 'fileName',
//
CXmlConf::CXmlConf(const char* fileName):
		m_bValid(false),
		m_pRoot(NULL),
		m_pBuffer(NULL),
		m_pScanner(NULL)
{
   FILE * pFile=fopen(fileName,"r");
   if (!pFile)
	   return;

   fseek (pFile, 0, SEEK_END);
   unsigned int size=ftell (pFile);
   fseek (pFile, 0, SEEK_SET);

   m_pBuffer= new char[size];

   unsigned int bytesRead=0;
   while (bytesRead<size)
   {
	   unsigned int blockSize=fread(m_pBuffer+bytesRead,1,size-bytesRead,pFile);
	   if (blockSize<0)
	   {
		   fclose(pFile);
 		   delete []m_pBuffer;
 		   return;
	   }
	   else if (blockSize==0)
	   {
		   break;
	   }

	   bytesRead+=blockSize;
   }
   fclose(pFile);

   if (!Parse(m_pBuffer,bytesRead))
   {
	   delete[]m_pBuffer;
   }
 	   m_bValid=true;
}

//
// Constructor: CXmlConf::CXmlConf(char* Buffer, bool bAssigned); 
//
// Comments:
// Constructor by getting the input 'Buffer'
//
// If bAssigned is true: the buffer is assigned, we don't need to allocate the internal buffer
// false otherwise.
//
CXmlConf::CXmlConf(char* begin, char* end):
		m_bValid(false),
		m_pRoot(NULL),
		m_pBuffer(NULL),
		m_pScanner(NULL)
{
   int buffersize=end-begin;
   char*pBuffer= new char[buffersize];
   std::copy(begin,end,pBuffer);

   if (!Parse(pBuffer,buffersize))
   {
	   delete[]pBuffer;
   }
   else
   {
	   //
	   // Create the shared memory
	   //
	   m_pBuffer=pBuffer;
	   m_bValid=true;
   }

}

bool CXmlConf::Parse(char* pBuffer, int buffersize)
{
   m_pScanner= new CScanner(pBuffer,pBuffer+buffersize);
   m_pRoot=new CElement(NULL);
   m_pScanner->SkipWhiteSpace();

   CPIParser piParser(NULL);
   if (!piParser.Parse(m_pScanner)) 
   {
	   delete []pBuffer;
	   delete m_pRoot;
	   delete m_pScanner;
	   m_pRoot=NULL;
	   m_pScanner=NULL;
	   return false;
   }
   m_pScanner->SkipWhiteSpace();


   if (!m_pRoot->Parse(m_pScanner))
   {
	   delete []pBuffer;
	   delete m_pRoot;
	   delete m_pScanner;
	   m_pRoot=NULL;
	   m_pScanner=NULL;
	   return false;
   }
   return true;
}

//
//	Function name: CElement* CXmlConf::Clone()
//
//  Purpose:
//  Create a XML tree from the internal XML data and return the pointer of the Root.
//
CElement* CXmlConf::Clone()
{
	return ::Clone(m_pRoot);
}
 
CElement* CXmlConf::GetRootElement(const char*Name) 
{
	string s=(Name);
	int retPos=s.find_first_of(".",0);
	string sMyString=s;
	if (retPos!=-1) sMyString=s.substr(0,retPos);
 
	if (*this)
	{
		if (!const_cast<CStringValue&>(m_pRoot->GetName()).
			Equal(const_cast<char*>(sMyString.c_str()))) return NULL;

		if (retPos==-1) return m_pRoot;

		string sRemain=s.substr(retPos+1,s.size());
 		return GetRootElement()->GetFirstChild(sRemain.c_str());
	}
	return NULL;
}

ostream& operator<<(ostream&os, CXmlConf const& obj)
{
	os<<"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>";
	if (obj&&obj.GetRootElement())
		os<<*obj.GetRootElement()<<endl;
	return os;
}
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.
License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.
A list of licenses authors might use can be found here
Written By
Richard Lin
Architect
United States
Richard Lin is senior software engineer of in Silicon Valley.

Richard Lin was born in Beijing and came to US in the fall of 1995. He began his first software career in bay area of California in 1997. He has worked for many interesting projects including manufacturing testing systems, wireless AP firmware and applications, email anti-virus system and personal firewalls. He loves playing go (WeiQi in Chinese) and soccer in his spare time. He has a beautiful wife and a cute daughter and enjoys his life in San Jose of California.
MiniXML, a Fast parser for the XML Configuration File

License

Comments and Discussions