Introduction
If you ever dreamed about a simple MFC-Based collection to handle an ANSI
XML file then here is the solution.
CXXMLFile
: (Class) eXtended XML File
This class reads simple XML files.
It assumes the HTML/XML conventions in symbols and tags. But, the tag <?
.... ?> is ignored for now. This means that no codepage support is available
(to make code simpler).
When CXXMLFile
reads a file it creates a tag tree with tree
types of nodes:

Legend:
CElementPart
. Abstract base class for all node types. It
includes the common member variable 'm_Text
'.
CText
. Derived from CElementPart
. It represents a piece of
text, it can be surrounded by any kind of nodes. Symbol replacement will be
applied here, for example in HTML code.
CComment
. A comment in form <!---->
. It is not
affected by symbol replacement.
CElement
. A tag. It has three properties:
m_Text
(the tag name), AttributeMap
(A string map atname -> at value), and by
inheritance a CList
of Nodes.
Then, only a CElement
node type should have
children.
CXXMLFile Class Reference
List of all members.
Public Methods
CXXMLFile ()
Constructor.
virtual ~CXXMLFile ()
Destructor.
void RemoveAll ()
Delete all entries and make a default root node.
void AddSymbol (CString coded, CString decoded)
Adds a symbol.
void DefaultSymbols ()
Set default symbols.
void ClearSymbols ()
Clear symbols.
void AddOpenTag (CString tag)
Adds an open tag. (HTML ).
bool Write ()
Writes XML file.
CString GetFile ()
Get XML filename.
void SetFile (CString filename)
Set XML filename.
bool Read ()
The read XML function.
|
CElementPart * Root ()
Gets the root. (Create one if it's empty).
CElementPart * AddElement (CElementPart *Parent)
Adds a node type element.
CElementPart * AddComment (CElementPart *Parent, CString text)
Adds a comment node.
CElementPart * AddText (CElementPart *Parent, CString text)
Adds a text node.
void SetText (CElementPart *node, CString text)
Sets text property in a node.
void GetText (CElementPart *node, CString &text)
Gets text property in a node.
bool IsElement (CElementPart *node)
Determines wheter node is element.
bool IsComment (CElementPart *node)
Determines wheter node is comment.
bool IsText (CElementPart *node)
Determines wheter node is text.
CMapStringToString * GetElementAttrMap (CElementPart *node)
Returns a pointer to the attribute map of the element.
bool BuildChildList (CElementPart *node, CList< CElementPart *, CElementPart
* > &l)
Builds a list of child nodes.
|
Public Attributes
Private Methods
void Decodify (CString &html)
Decodify using symbol table.
void Codify (CString &html)
Codify using symbol table.
|
void WritePart (CStdioFile *f, CElementPart *p, int Depth, bool bNoIdent=false)
Writes an XML node to a file (used by Write only). |
Private Attributes
CElementPart * m_Root
CMapStringToString m_Symbols |
CString m_Filename
CMapStringToString m_OpenTags |
Header
#if !defined(AFX_XXMLFILE_H__F5E3CD25_0B84_4191_A1A7_B3669180DFFA__INCLUDED_)
#define AFX_XXMLFILE_H__F5E3CD25_0B84_4191_A1A7_B3669180DFFA__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif
#include <afxtempl.h>
class CXXMLFile {
public:
CXXMLFile();
virtual ~CXXMLFile();
public:
class CElementPart;
class CElementPart{
public:
CElementPart* m_Parent;
CString m_Text;
enum TType { TElement, TText, TComment } m_Type;
public:
CElementPart(){};
virtual ~CElementPart(){};
};
class CElement : public CElementPart, public CList<CElementPart*,CElementPart*>{
public:
CElement(){ m_Type=TElement; };
virtual ~CElement();
public:
CMapStringToString AttributeToValue;
};
class CText : public CElementPart{
public:
CText(){ m_Type=TText; };
};
class CComment : public CElementPart{
public:
CComment(){ m_Type=TComment; };
};
public:
CStringList m_ErrorList;
void RemoveAll();
void AddSymbol(CString coded, CString decoded);
void DefaultSymbols();
void ClearSymbols();
void AddOpenTag(CString tag);
bool Write();
CString GetFile();
void SetFile(CString filename);
bool Read();
CElementPart* Root();
CElementPart* AddElement(CElementPart* Parent);
CElementPart* AddComment(CElementPart* Parent, CString text);
CElementPart* AddText(CElementPart* Parent, CString text);
void SetText(CElementPart* node, CString text);
void GetText(CElementPart* node, CString &text);
bool IsElement(CElementPart* node);
bool IsComment(CElementPart* node);
bool IsText(CElementPart* node);
CMapStringToString* GetElementAttrMap(CElementPart* node);
bool BuildChildList(CElementPart* node, CList<CElementPart*,CElementPart*> &l);
private:
CElementPart* m_Root;
void Decodify(CString &html);
void Codify(CString &html);
CMapStringToString m_Symbols;
void WritePart(CStdioFile *f, CElementPart * p, int Depth, bool bNoIdent=false);
CString m_Filename;
CMapStringToString m_OpenTags;
};
#endif
Source
#include "stdafx.h"
#include "XXMLFile.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
CXXMLFile::CXXMLFile()
{
m_Root=NULL;
RemoveAll();
DefaultSymbols();
}
CXXMLFile::~CXXMLFile()
{
if(m_Root!=NULL) delete m_Root;
}
CXXMLFile::CElement::~CElement(){
while(!IsEmpty()) delete RemoveHead();
};
static bool IsSeparator(char ch){
switch (ch){
case ' ': return true;
case '\t': return true;
case '\r': return true;
case '\n': return true;
default: return false;
};
};
static bool HopSeparators(CString &html, int &pos,int FileRow){
while( (pos<html.GetLength()) && (IsSeparator(html.GetAt(pos))) ){
if(html.GetAt(pos)=='\n')
FileRow++;
pos++;
}
if(pos>=html.GetLength()) return false; else return true;
};
static int CountChars(CString s,char ch){
return s.Replace(CString()+ch,"");
};
static int FindChars(CString &html, int pos, LPCTSTR chars){
int tmp=pos;
CString seps = chars;
while(tmp<html.GetLength()) {
if(seps.FindOneOf( CString(html.GetAt(tmp)) )!=-1){
return tmp;
}
tmp++;
}
return -1;
};
bool CXXMLFile::Read()
{
CString filename = m_Filename;
int FileRow=1;
if(m_Root!=NULL) delete m_Root;
CElementPart** crut = &m_Root;
CElement* Element = new CElement();
Element->m_Parent=(*crut);
(*crut)=Element;
((CElement*)(*crut))->m_Text = "?root?";
m_ErrorList.RemoveAll();
CString html;
TRY{
CFile * f = new CFile(filename,CFile::modeRead|CFile::shareDenyNone);
if(f==NULL) ::AfxThrowFileException(CFileException::fileNotFound,-1,filename);
html.Empty();
char * buf = html.GetBufferSetLength(f->GetLength());
f->SeekToBegin();
f->Read(buf,f->GetLength());
f->Close();
delete f;
html.ReleaseBuffer();
html.FreeExtra();
html.Replace("\r\n","\n");
}CATCH_ALL(e){
m_ErrorList.AddTail("Error: File not found.");
return false;
}END_CATCH_ALL
int p1,p2,p3,p4;
p1=p2=p3=p4=0;
while(true){
if(p1>=html.GetLength()) return true;
p2 = html.Find('<',p1);
if(p2==-1){
m_ErrorList.AddTail("Warning at line "+CString(FileRow)+
": There's some text before EOF, ignoring.");
return true;
}
CString text = html.Mid(p1,p2-p1);
Decodify(text);
if(!text.IsEmpty()){
if( (*crut)==NULL ){
m_ErrorList.AddTail("Warning at line "+CString(FileRow)
+": No tag active but text found, must be at the start, ignoring.");
} else {
CText * t = new CText();
t->m_Text = text;
((CElement*)(*crut))->AddTail( t );
FileRow+=CountChars(text,'\n');
}
}
p1=p2+1;
if(p1>=html.GetLength()){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Tag started buf EOF found.");
return false;
} else
if(html.Mid(p1,3)=="!--"){
p1=p1+3;
p2 = html.Find("-->",p1);
CString text = html.Mid(p1,p2-p1);
if(p2==-1){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Comment tag unclosed.");
return false;
}
if( (*crut)==NULL ){
m_ErrorList.AddTail("Warning at line "+CString(FileRow)+
": No tag active but text found, ignoring.");
} else {
CComment * t = new CComment();
t->m_Text = text;
((CElement*)(*crut))->AddTail( t );
FileRow+=CountChars(text,'\n');
}
p1=p2+=3;
} else
if(html.GetAt(p1)=='?'){
p2 = html.Find("?>",p1+1);
if(p2==-1){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Tag <? unclosed.");
return false;
} else {
p1=p2+2;
continue;
}
} else
if(html.GetAt(p1)=='/'){
p2 = html.Find('>',p1);
if(p2==-1){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Tag unclosed.");
return false;
}
p1++;
CString tagname = html.Mid(p1,p2-p1);
tagname.TrimLeft();
tagname.TrimRight();
if( (*crut)==NULL ){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Closing tag when no tag???.");
return false;
}
if( (*crut)->m_Text!=tagname ){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Closing tag differs from open tag.");
return false;
}
(*crut)=(*crut)->m_Parent;
p1=p2+1;
} else {
if(!HopSeparators(html,p1,FileRow)){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Unspected EOF.");
return false;
}
p2=FindChars(html,p1," \t\r\n>");
if(p2==-1){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Unspected EOF.");
return false;
}
CString tag = html.Mid(p1,p2-p1);
CElement* Element = new CElement();
Element->m_Parent=(*crut);
((CElement*)(*crut))->AddTail( ((CElementPart*)Element) );
(*crut)=Element;
((CElement*)(*crut))->m_Text = tag;
p1=p2;
while(true){
if(!HopSeparators(html,p1,FileRow)){
m_ErrorList.AddTail("Error at line "+
CString(FileRow)+": Unspected EOF.");
return false;
}
if(html.GetAt(p1)=='>'){
p1+=1;
tag.MakeLower();
CString value;
if(m_OpenTags.Lookup(tag,value)){
(*crut)=(*crut)->m_Parent;
}
break;
};
if(html.Mid(p1,2)=="/>"){
p1+=2;
(*crut)=(*crut)->m_Parent;
break;
};
p2=html.Find("=\"",p1);
if(p2==-1){
m_ErrorList.AddTail("Error at line "+CString(FileRow)+
": Unspected value form.");
return false;
}
CString valname = html.Mid(p1,p2-p1);
valname.TrimLeft();
valname.TrimRight();
p1=p2+2;
CString value;
while(true){
if(p1>=html.GetLength()){
m_ErrorList.AddTail("Error at line "+
CString(FileRow)+": Unspected EOF.");
return false;
} else
if(html.GetAt(p1)=='\"'){
p1++;
break;
} else
if(html.Mid(p1,2)=="\\\""){
value+="\"";
p1+=2;
} else {
value+=html.GetAt(p1);
p1++;
}
}
Element->AttributeToValue[valname]=value;
}
}
}
}
void CXXMLFile::SetFile(CString filename)
{
m_Filename=filename;
}
CString CXXMLFile::GetFile()
{
return m_Filename;
}
void CXXMLFile::WritePart(CStdioFile *f, CElementPart * p, int Depth, bool bNoIdent){
int j;
if(p->m_Type==CElementPart::TElement){
{
if(f->GetPosition()!=0)
f->WriteString("\n");
for(j=0;j<Depth;j++)
f->WriteString(" ");
}
f->WriteString("<"+p->m_Text);
POSITION pos = ((CElement*)p)->AttributeToValue.GetStartPosition();
while(pos!=NULL){
CString AtName,AtValue;
((CElement*)p)->AttributeToValue.GetNextAssoc(pos,AtName,AtValue);
f->WriteString(" "+AtName+"=\""+AtValue+"\"");
};
if(((CElement*)p)->IsEmpty()) {
CString tag;
tag = p->m_Text;
tag.MakeLower();
CString value;
if(m_OpenTags.Lookup(tag,value)){
f->WriteString(">");
} else {
f->WriteString("/>");
}
} else {
f->WriteString(">");
bool NoIdent = false;
if(((CElement*)p)->GetCount()==1)
NoIdent=true;
pos = ((CElement*)p)->GetHeadPosition();
while(pos!=NULL){
CElementPart*e = ((CElement*)p)->GetAt(pos);
WritePart(f,e,Depth+1,NoIdent);
((CElement*)p)->GetNext(pos);
}
if(!NoIdent){
{
if(f->GetPosition()!=0)
f->WriteString("\n");
for(j=0;j<Depth;j++)
f->WriteString(" ");
}
}
f->WriteString("</"+p->m_Text+">");
}
} else
if(p->m_Type==CElementPart::TComment){
{
if(f->GetPosition()!=0)
f->WriteString("\n");
for(j=0;j<Depth;j++)
f->WriteString(" ");
}
f->WriteString("<!--"+p->m_Text+"-->");
} else
if(p->m_Type==CElementPart::TText){
CString empty_string = p->m_Text;
empty_string.Replace('\n',' ');
empty_string.Replace('\r',' ');
empty_string.Replace('\t',' ');
while(0!=empty_string.Replace(" "," "));
if((!empty_string.IsEmpty())&&(empty_string!=" ")) {
if(!bNoIdent){
{
if(f->GetPosition()!=0)
f->WriteString("\n");
for(j=0;j<Depth;j++)
f->WriteString(" ");
}
}
CString text = p->m_Text;
Codify(text);
f->WriteString(text);
}
}
};
bool CXXMLFile::Write()
{
m_ErrorList.RemoveAll();
if(m_Root==NULL) {
m_ErrorList.AddTail("Error: NULL tree.");
return false;
}
if(m_Root->m_Type!=CElementPart::TElement) {
m_ErrorList.AddTail("Error: tree root is not an Element.");
return false;
}
if(((CElement*)m_Root)->m_Text!="?root?") {
m_ErrorList.AddTail("Error: tree root is not named ?root?");
return false;
}
CStdioFile * f = new CStdioFile(m_Filename,CFile::modeCreate|
CFile::modeWrite|CFile::shareDenyNone|CFile::typeText);
if(f==NULL){
m_ErrorList.AddTail("Error: cannot open '"+m_Filename+"' for writing.");
return false;
}
CElement * root = ((CElement*)m_Root);
POSITION pos = root->GetHeadPosition();
while(pos!=NULL){
CElementPart * p = root->GetAt(pos);
WritePart(f,p,0);
root->GetNext(pos);
}
f->Close();
delete f;
return true;
}
void CXXMLFile::DefaultSymbols()
{
AddSymbol("<","<");
AddSymbol(">",">");
AddSymbol(""","\"");
AddSymbol(" "," ");
AddSymbol("'","'");
AddSymbol("&","&");
m_OpenTags["br"];
}
void CXXMLFile::ClearSymbols()
{
m_Symbols.RemoveAll();
m_OpenTags.RemoveAll();
}
void CXXMLFile::AddSymbol(CString coded, CString decoded)
{
m_Symbols[coded]=decoded;
}
void CXXMLFile::RemoveAll()
{
ClearSymbols();
m_OpenTags.RemoveAll();
if(m_Root!=NULL) delete m_Root;
CElementPart** crut = &m_Root;
CElement* Element = new CElement();
Element->m_Parent=(*crut);
(*crut)=Element;
((CElement*)(*crut))->m_Text = "?root?";
}
void CXXMLFile::Codify(CString &html)
{
int pos = 0;
while(pos<html.GetLength()){
POSITION p = m_Symbols.GetStartPosition();
while(p!=NULL){
CString coded, decoded;
m_Symbols.GetNextAssoc(p,coded,decoded);
if((pos+decoded.GetLength())<=html.GetLength()){
if(html.Mid(pos,decoded.GetLength())==decoded){
html = html.Left(pos) + coded +
html.Mid(pos+decoded.GetLength());
pos = pos + coded.GetLength()-1;
break;
}
}
}
pos++;
}
}
void CXXMLFile::Decodify(CString &html)
{
int pos = 0;
while(pos<html.GetLength()){
POSITION p = m_Symbols.GetStartPosition();
while(p!=NULL){
CString coded, decoded;
m_Symbols.GetNextAssoc(p,coded,decoded);
if((pos+coded.GetLength())<=html.GetLength()){
if(html.Mid(pos,coded.GetLength())==coded){
html = html.Left(pos) + decoded +
html.Mid(pos+coded.GetLength());
pos = pos + decoded.GetLength()-1;
break;
}
}
}
pos++;
}
}
void CXXMLFile::AddOpenTag(CString tag){
tag.MakeLower();
m_OpenTags[tag];
}
CXXMLFile::CElementPart* CXXMLFile::Root(){
if(m_Root!=NULL)
return (CXXMLFile::CElementPart*)m_Root;
CElementPart** crut = &m_Root;
CElement* Element = new CElement();
Element->m_Parent=(*crut);
(*crut)=Element;
((CElement*)(*crut))->m_Text = "?root?";
return Element;
}
CXXMLFile::CElementPart* CXXMLFile::AddElement(CElementPart* Parent){
if(Parent->m_Type!=CXXMLFile::CElementPart::TElement)
return NULL;
CXXMLFile::CElement * elem = (CXXMLFile::CElement*)Parent;
CXXMLFile::CElement * new_elem = new CElement();
elem->AddTail( ((CXXMLFile::CElementPart*)new_elem) );
return new_elem;
}
void CXXMLFile::SetText(CElementPart* node, CString text){
node->m_Text = text;
};
void CXXMLFile::GetText(CElementPart* node, CString &text){
text = node->m_Text;
};
CXXMLFile::CElementPart* CXXMLFile::AddComment(CElementPart* Parent, CString text){
if(Parent->m_Type!=CXXMLFile::CElementPart::TElement)
return NULL;
CXXMLFile::CElement * elem = (CXXMLFile::CElement*)Parent;
CXXMLFile::CComment * new_elem = new CComment();
elem->AddTail( ((CXXMLFile::CElementPart*)new_elem) );
return new_elem;
};
CXXMLFile::CElementPart* CXXMLFile::AddText(CElementPart* Parent, CString text){
if(Parent->m_Type!=CXXMLFile::CElementPart::TElement)
return NULL;
CXXMLFile::CElement * elem = (CXXMLFile::CElement*)Parent;
CXXMLFile::CText * new_elem = new CText();
elem->AddTail( ((CXXMLFile::CElementPart*)new_elem) );
return new_elem;
}
bool CXXMLFile::IsElement(CElementPart* node)
{
return (node->m_Type==CXXMLFile::CElementPart::TElement);
}
bool CXXMLFile::IsComment(CElementPart* node)
{
return (node->m_Type==CXXMLFile::CElementPart::TComment);
}
bool CXXMLFile::IsText(CElementPart* node)
{
return (node->m_Type==CXXMLFile::CElementPart::TText);
}
CMapStringToString* CXXMLFile::GetElementAttrMap(CElementPart* node)
{
if(node->m_Type!=CXXMLFile::CElementPart::TElement)
return NULL;
CXXMLFile::CElement * elem = (CXXMLFile::CElement*)node;
return &elem->AttributeToValue;
}
bool CXXMLFile::BuildChildList(CElementPart* node,
CList<CElementPart*,CElementPart*> &l)
{
if(node->m_Type!=CXXMLFile::CElementPart::TElement)
return false;
l.RemoveAll();
l.AddTail( ((CElementPart*)node) );
return true;
}
Example
The most simple example is in trying to load an XML file, converting in a CXMLFile
collection and then writing back an XML file.
We will do the test with books.xml (from MSDN XML SDK)
Source code (extract)
#include "stdafx.h"
#include "XMLTest.h"
....
CXXMLFile xml;
xml.SetFile("books.xml");
bool bok = xml.Read();
if(!bok){
POSITION pos = xml.m_ErrorList.GetHeadPosition();
while(pos!=NULL){
printf(xml.m_ErrorList.GetAt(pos)+"\n");
xml.m_ErrorList.GetNext(pos);
}
} else {
xml.SetFile("books_out.xml");
xml.ClearSymbols();
bok = xml.Write();
if(!bok){
POSITION pos = xml.m_ErrorList.GetHeadPosition();
while(pos!=NULL){
printf(xml.m_ErrorList.GetAt(pos)+"\n");
xml.m_ErrorList.GetNext(pos);
}
}
}