Click here to Skip to main content
15,886,199 members
Articles / Containers / Virtual Machine

TOOL

Rate me:
Please Sign up or sign in to vote.
4.98/5 (52 votes)
23 Oct 200676 min read 230.2K   5.4K   147  
TOOL (Tiny Object Oriented Language) is an easily-embedded, object-oriented, C++-like-language interpreter. The purpose of this article is to introduce the TOOL interpreter and language from the perspective of a person who has a desire to include a scripting solution as part of his project.
#ifndef XML_PARSER_H_INCLUDED
#define XML_PARSER_H_INCLUDED
/*****************************************************************************/
/*                              HEADER FILE                                  */
/*****************************************************************************/
/*
          $Archive:   $

         $Revision:   $

      Last Checkin:
             $Date:   $
                By:
           $Author:   $

 Last Modification:
          $ModTime:   $

       Description:   used to parse a buffer that has xml formatted data

                      TOOL And XML FORMS License
                      ==========================

                      Except where otherwise noted, all of the documentation 
                      and software included in the TOOL package is 
                      copyrighted by Michael Swartzendruber.

                      Copyright (C) 2005 Michael John Swartzendruber. 
                      All rights reserved.

                      Access to this code, whether intentional or accidental,
                      does NOT IMPLY any transfer of rights.

                      This software is provided "as-is," without any express 
                      or implied warranty. In no event shall the author be held
                      liable for any damages arising from the use of this software.

                      Permission is granted to anyone to use this software for 
                      any purpose, including commercial applications, and to 
                      alter and redistribute it, provided that the following 
                      conditions are met:

                      1. All redistributions of source code files must retain 
                         all copyright notices that are currently in place, 
                         and this list of conditions without modification.

                      2. The origin of this software must not be misrepresented;
                         you must not claim that you wrote the original software.

                      3. If you use this software in another product, an acknowledgment
                         in the product documentation would be appreciated but is
                         not required.

                      4. Modified versions in source or binary form must be plainly 
                         marked as such, and must not be misrepresented as being 
                         the original software.
*/
/*****************************************************************************/


#include "XmlUtilities.h"

#include <algorithm>
#include <string>
#include <sstream>
#include <iostream>
//using namespace std;

#pragma warning( disable : 4786 )


class CXMLParser
{
public:
  static long m_glParserDebugID;
  long        m_lDebugID;                   // id of parser (for debugging)

private:
  std::string m_oStringToParse;             // provides string interface to buffer
  char*       m_pchBufferLocation;          // parse buffer and offsets of where we are parsing
  long        m_lParseLength;
  long        m_lCurrentPosition;           // current position in parse buffer
  long        m_lFirstTagStart;             // tag, attributes and data position information
  long        m_lFirstTagEnd;
  long        m_lLastTagStart;
  long        m_lLastTagEnd;
  long        m_lNameStart;
  long        m_lNameEnd;
  long        m_lAttribsStart;
  long        m_lAttribsEnd;
  long        m_lValueStart;
  long        m_lValueEnd;


public:
  CXMLParser() : m_oStringToParse()
  { 
    Clear(); 
    m_lDebugID = m_glParserDebugID;
    m_glParserDebugID++;
  };

  bool Create( const char* pchBuffer, long lParseStart, long lParseEnd )
  {
    // if invalid stop
    //
    if ( !pchBuffer || lParseStart < 0 || lParseEnd < lParseStart )
    {
      return false;
    }
    else
    {
      return Create( pchBuffer + lParseStart, GetLength( lParseStart, lParseEnd ) );
    }
  }; 

  bool Create( const char* pchBuffer, long lLength )
  {
    // if invalid stop
    //
    if ( !pchBuffer || lLength <= 0 )
    {
      return false;
    }
    else
    {
      // parse buffer and offsets of where we are parsing
      //
      m_pchBufferLocation = (char *)pchBuffer;
      m_lParseLength      = lLength;

      // current position in parse buffer
      //
      m_lCurrentPosition    = 0;

      return true;
    }
  };  

  void  Release( void )                 {;};
  long  GetCurrent( void )              { return m_lCurrentPosition; };
  long  GetParseLength( void )          { return m_lParseLength; };
  long  GetCurLength( void )            { return GetOffsetLength( m_lCurrentPosition ); };
  long  GetOffsetLength( long lOffset ) { return GetLength( lOffset, m_lParseLength - 1 ); };
  char* GetBufferPosition( void )       { return m_pchBufferLocation; };
  char* GetLastBufferPosition( void )   { return m_pchBufferLocation + m_lParseLength; };

  char* GetCurrentPosition( void )
  {
    if ( IsValid() )
    {
      return m_pchBufferLocation + m_lCurrentPosition;
    }
    else
    {
      return NULL;
    }
  }

  char* GetParseState( long& lParseLength )
  {
    // if not valid stop
    //
    if ( !IsValid() )
    {
      lParseLength = 0;
      return GetCurrentPosition();
    }
    else
    {
      // get parse state
      //
      lParseLength = GetCurLength();
      return GetCurrentPosition();
    }
  }

  std::string& GetString()   { return m_oStringToParse; };
  void    Reset( void ) { ResetTagPositions(); }

  bool IsValid( void )
  {
    // if buffer state not valid
    //
    if ( !m_pchBufferLocation || m_lParseLength <= 0 )
    {
      return false;
    }
    else
    // if cur position not valid
    //
    if ( m_lCurrentPosition < 0 || m_lCurrentPosition > m_lParseLength )
    {
      return false;
    }
    else
    {
      return true;
    }
  }

  void ResetTagPositions( long lStart = -1 )
  {
    m_lFirstTagStart  = lStart;
    m_lFirstTagEnd    = lStart;
    m_lLastTagStart   = lStart;
    m_lLastTagEnd     = lStart;
    m_lNameStart      = lStart;
    m_lNameEnd        = lStart;
    m_lAttribsStart   = lStart;
    m_lAttribsEnd     = lStart;
    m_lValueStart     = lStart;
    m_lValueEnd       = lStart;
  }
  
  void Clear( void )
  {
    // parse buffer and offsets of where we are parsing
    //
    m_pchBufferLocation      = 0;
    m_lParseLength = 0;

    // current position in parse buffer
    //
    m_lCurrentPosition  = 0;

    // reset tag positions
    //
    Reset();
  }

  bool Parse( const char* pchBuffer, long lParseStart, long lParseEnd )
  {
    // if create fails stop
    //
    if ( !Create( pchBuffer, lParseStart, lParseEnd ) )
    {
      return false;
    }
    else
    {
      return Parse();
    }
  }

  bool Parse( const char* pchBuffer, long lParseLength )
  {
    // if create fails stop
    //
    if ( !Create( pchBuffer, lParseLength ) )
    {
      return false;
    }
    else
    {
      return Parse();
    }
  }

  bool Parse( void ) 
  {
    // init tag position
    //
    m_lFirstTagStart = m_lCurrentPosition;
    m_lFirstTagEnd   = m_lCurrentPosition;
    m_lLastTagStart  = m_lCurrentPosition;
    m_lLastTagEnd    = m_lCurrentPosition;

    // find first tag
    //
    long lFirst = Find( idTagLeft, m_lCurrentPosition );
    if ( lFirst == -1 )
    {
      return false;
    }

    // if find right tag
    //
    long lLast  = Find( idTagRight, lFirst );
    if ( lLast == -1  )
    {
      return false;
    }

    // set first tag start/end
    //
    m_lFirstTagStart = lFirst;
    m_lFirstTagEnd   = lLast;

    // now parse name
    //
    if ( !ParseName() )
    {
      return false;
    }

    // parse attributes
    //
    ParseAttributes();

    // if null tag no data or last tag
    //
    if ( HasNullTag() )
    {
      // update cur position
      //
      m_lCurrentPosition  = m_lFirstTagEnd + idTagRightLength;

      // done so show success
      //
      return true;
    }    

    // form end tag
    //
    std::string  oEndTag;
    oEndTag  = idTagEnd;
    oEndTag += GetName();
    oEndTag += idTagRight;

    // find last tag
    //
    lFirst = Find( oEndTag, lLast );
    if ( lFirst == -1 )
    {
      return false;
    }

    // set last tag start/end
    //
    m_lLastTagStart = lFirst;
    m_lLastTagEnd   = lFirst + oEndTag.size() - 1;

    // parse the value if not a null tag
    //
    if ( !HasNullTag() )
    {
      ParseValue();
    }

    // update cur position
    // we have parsed a tag so look for the start
    // of a new tag, if found set current position
    // to it, else set to last tag
    //
    long lPosition = Find( idTagLeft, m_lLastTagEnd );
    if ( lPosition != -1 )
    {
      m_lCurrentPosition = lPosition;
    }
    else
    {
      m_lCurrentPosition = m_lLastTagEnd;
    }
    
    return true;
  }

  bool Parse( std::string& roName, std::string& roValue, std::string& roAttributes, long& lCurrent )
  {
    return true;
  }

  bool HasNullTag( void )
  {
    // get end of first tag
    //
    char* pchBuffer = m_pchBufferLocation + m_lFirstTagEnd - 1;

    // if null tag marker
    //
    if ( *pchBuffer == '/' && *( pchBuffer + 1 ) == '>' )
    {
      return true;
    }
    else
    {
      return false;
    }
  }

protected:

  /////////////////////////////////////////////////////////
  // these methods are protected because the state
  // of parsing might not be properly setup, and
  // if that were so then calling these methods
  // would cause errors

  bool ParseName( void )
  {
    // if first tag search failed show failed
    //
    if ( m_lFirstTagStart < 0 
      || m_lFirstTagEnd   < 0 
      || m_lFirstTagEnd   <= m_lFirstTagStart )
    {
      m_lNameStart = -1;
      m_lNameEnd   = -1;
      return false;
    }

    // init name start/end position
    //
    m_lNameStart = m_lFirstTagStart + idTagLeftLength;
    m_lNameEnd   = m_lFirstTagEnd - 1;

    // if null tag then backup before null tag marker
    // 
    if ( HasNullTag() )
    {
      m_lNameEnd -= 1;
    }

    // check for separator (i.e. there are attributes)
    //
    long lLast = Find( ' ', m_lNameStart, GetNameLength() );
    if ( lLast != -1 )
    {
      // there are attributes so backup
      // before attributes
      //
      m_lNameEnd = lLast - 1;
    }
    return true;
  }

  bool ParseName ( std::string& roName )
  {
    // set name state
    //
    if ( !ParseName() )
    {
      return false;
    }
    else
    {
      roName = GetName();
      return true;
    }
  }

  bool ParseAttributes( void )
  {
    // init name start/end position
    //
    m_lAttribsStart = -1;
    m_lAttribsEnd   = -1;

    // if tag or name length invalid stop
    //
    long lTagLength  = GetTagLength();
    long lNameLength = GetNameLength();
    if ( lTagLength <= 0 || lNameLength <= 0 )
    {
      return 0;
    }

    // if the difference in the lengths is
    // less than the length of the left/right marker
    // then no attributes
    //
    long lAttribsSpan = GetTagLength() - GetNameLength();

    switch ( lAttribsSpan )
    {
      // no attribute case
      //
      case 0:
      case 1:
      case 2:
      //
      // fall through by design....
      //
      // no attribute case but has null tag
      //
      case 3:
        return false;
    }

    // init attributes start, move past space after name
    //
    m_lAttribsStart = m_lNameEnd + 2;

    // init attribute end move before right tag marker
    // if null tag move before null tag marker
    //
    m_lAttribsEnd = m_lFirstTagEnd - 1;
    if ( m_lAttribsEnd == m_lAttribsStart /*HasNullTag()*/ )
    {
      m_lAttribsEnd = -1;
    }
    return true;
  }

  bool ParseAttributes( std::string& roAttributes )
  {
    // set name state
    //
    if ( !ParseAttributes() )
    {
      return false;
    }
    else
    {
      roAttributes = GetAttributes();
      return true;
    }
  }

  bool ParseValue( void )
  {
    // if first tag search failed show failed
    //
    if (  m_lFirstTagStart < 0 
      ||  m_lLastTagEnd    < 0 
      ||  m_lLastTagEnd    <= m_lFirstTagStart )
    {
      m_lValueStart = -1;
      m_lValueEnd   = -1;
      return false;
    }
    else
    {
      // init value start/end positions
      //
      m_lValueStart = m_lFirstTagEnd + 1;
      m_lValueEnd   = m_lLastTagStart - 1;
      return true;
    }
  }

  bool ParseValue( std::string& roValue )
  {
    // set name state
    //
    if ( !ParseValue() )
    {
      return false;
    }
    else
    {
      m_lValueStart = m_lFirstTagEnd + 1;
      m_lValueEnd   = m_lLastTagStart - 1;
      roValue = GetValue();
      return true;
    }
  }

public:

  char* GetNamePosition( void )
  {
    if ( HasName() )
    {
      return m_pchBufferLocation + m_lNameStart;
    }
    else
    {
      return NULL;
    }
  }

  bool HasName( void )
  {
    if ( GetNameLength() > 0 )
    {
      return true;
    }
    else
    {
      return false;
    }
  }

  long GetNameLength( void )
  {
    return GetLength( m_lNameStart, m_lNameEnd );
  }

  std::string GetName( void )
  {
    long lLength = GetNameLength();

    // if length invalid show null string
    // else get string
    //
    if ( lLength <= 0 )
    {
      return std::string( "" );
    }
    else
    {
      return SubString( m_lNameStart, lLength );
    }
  }

  char* GetAttributesPosition( void )
  {
    if ( HasAttributes() )
    {
      return m_pchBufferLocation + m_lAttribsStart;
    }
    else
    {
      return NULL;
    }
  }

  bool HasAttributes( void )
  {
    if ( GetValueLength() > 0 )
    {
      return true;
    }
    else
    {
      return false;
    }
  }

  long GetAttributesLength( void )
  {
    return GetLength( m_lAttribsStart, m_lAttribsEnd );
  }

  std::string GetAttributes( void )
  {
    long lLength = GetAttributesLength();

    // if length invalid show null string
    // else get string
    //
    if ( lLength <= 0 )
    {
      return std::string( "" );
    }
    else
    {
      return SubString( m_lAttribsStart, lLength );
    }
  }

  char* GetValuePosition( void )
  {
    if ( HasValue() )
    {
      return m_pchBufferLocation + m_lValueStart;
    }
    else
    {
      return NULL;
    }
  }

  bool HasValue( void )
  {
    if ( GetValueLength() > 0 )
    {
      return true;
    }
    else
    {
      return false;
    }
  }

  long GetValueLength( void )
  {
    return GetLength( m_lValueStart, m_lValueEnd );
  }

  std::string GetValue( void )
  {
    long lLength = GetValueLength();

    // if length invalid show null string
    // else get string
    //
    if ( lLength <= 0 )
    {
      return std::string( "" );
    }
    else
    {
      return SubString( m_lValueStart, lLength );
    }
  }

  char* GetValueState( long& rValueLength )
  {
    rValueLength = GetValueLength();

    // return value buffer position
    //
    return m_pchBufferLocation + m_lValueStart;
  }

  bool ValueHasTag( void )
  {
    long lPosition = Find( idTagLeft, m_lValueStart, GetValueLength() );

    if ( lPosition != -1 )
    {
      return true;
    }
    else
    {
      return false;
    }
  }

  long GetTagLength( void )
  {
    return GetLength( m_lFirstTagStart, m_lFirstTagEnd );
  }

  long GetLastTagLength( void )
  {
    return GetLength( m_lLastTagStart, m_lLastTagEnd );
  }

  bool HasTag( void )
  {
    if ( GetTagLength() > 0 )
    {
      return true;
    }
    else
    {
      return false;
    }
  }

  bool HasLastTag( void )
  {
    if ( GetLastTagLength() > 0 )
    {
      return true;
    }
    else
    {
      return false;
    }
  }

  char* GetTagPosition( void )
  {
    if ( HasTag() )
    {
      return m_pchBufferLocation + m_lFirstTagStart;
    }
    else
    {
      return NULL;
    }
  }

  char* GetLastTagPosition( void )
  {
    if ( HasTag() )
    {
      return m_pchBufferLocation + m_lLastTagStart;
    }
    else
    {
      return NULL;
    }
  }

  std::string GetTag( void )
  {
    long  lLength = GetTagLength();
    return SubString( m_lFirstTagStart, lLength );
  }

  long GetLength( long lStartPosition, long lEndPosition )
  {
    if ( lStartPosition < 0 
      || lEndPosition   < 0 
      || lEndPosition   < lStartPosition )
    {
      return 0;
    }
    else
    {
      return lEndPosition - lStartPosition + 1;
    }
  }

  std::string::iterator begin( void )
  {
    std::string::iterator oBuffer = m_pchBufferLocation;
    return std::string::iterator( oBuffer );
  }

  std::string::iterator end( void )
  {
    std::string::iterator oBuffer = m_pchBufferLocation + m_lParseLength;
    return std::string::iterator( oBuffer );
  }

  long Find( char chToFind, long lOffset, long lLength = -1 )
  {
    // if no lLength set to lLength to
    // end of parse buffer
    //
    if ( lLength == -1 )
    {
      lLength = GetOffsetLength( lOffset );
    }

    // set start and end of search 
    std::string::iterator oStartIter = m_pchBufferLocation + lOffset;
    std::string::iterator oEndIter   = m_pchBufferLocation + ( lOffset + lLength );

    // search for it
    //
    std::string::iterator oFoundIter = std::find( oStartIter, oEndIter, chToFind );

    // if at end did not find it
    if ( oFoundIter >= oEndIter )
    {
      return -1;
    }
    else
    {
      // as a last check make sure found is valid
      //
      if ( oFoundIter < oStartIter )
      {
        return -1;
      }
      else
      {
        return ( oFoundIter - oStartIter ) + lOffset;
      }
    }
  }

  long Find( char* pchToFind, long lOffset, long lLength = -1 )
  {
    // if no lLength set to lLength to
    // end of parse buffer
    //
    if ( lLength == -1 )
    {
      lLength = GetOffsetLength( lOffset );
    }

    // set start and end of search 
    //
    std::string::iterator oStartIter = m_pchBufferLocation + lOffset;
    std::string::iterator oEndIter   = m_pchBufferLocation + ( lOffset + lLength );

    std::string::iterator oSearchStartIter = pchToFind;
    std::string::iterator oSearchEndIter   = pchToFind + strlen( pchToFind );

    // search for it
    //
    std::string::iterator oFoundIter = std::search( oStartIter, 
                                                    oEndIter, 
                                                    oSearchStartIter, 
                                                    oSearchEndIter );

    // if at end did not find it
    //
    if ( oFoundIter >= oEndIter )
    {
      return -1;
    }
    else
    {
      // as a last check make sure found is valid
      //
      if ( oFoundIter < oStartIter )
      {
        return -1;
      }
      else
      {
        return ( oFoundIter - oStartIter ) + lOffset;
      }
    }
  }

  long Find( std::string& oToFind, long lOffset, long lLength = -1 )
  {
    // if no lLength set to lLength to
    // end of parse buffer
    //
    if ( lLength == -1 )
    {
      lLength = GetOffsetLength(lOffset);
    }

    // set start and end of search 
    //
    std::string::iterator oStartIter = m_pchBufferLocation + lOffset;
    std::string::iterator oEndIter   = m_pchBufferLocation + ( lOffset + lLength );

    std::string::iterator oSearchStartIter = oToFind.begin();
    std::string::iterator oSearchEndIter   = oToFind.end();

    // search for it
    //
    std::string::iterator oFoundIter = std::search( oStartIter, 
                                                    oEndIter, 
                                                    oSearchStartIter, 
                                                    oSearchEndIter );
    // if at end did not find it
    //
    if ( oFoundIter >= oEndIter )
    {
      return -1;
    }
    else
    {
      // as a last check make sure found is valid
      //
      if ( oFoundIter < oStartIter )
      {
        return -1;
      }
      else
      {
        return ( oFoundIter - oStartIter ) + lOffset;
      }
    }
  }

  long RightFind( char chToFind, long lOffset, long lLength )
  {
    char  achToFind[ 2 ];

    achToFind[ 0 ] = chToFind;
    achToFind[ 1 ] = '\0';

    return RightFind( achToFind, lOffset, lLength );
  }

  long RightFind( char* pchToFind, long lOffset, long lLength )
  {
    return 0;
    /*
    // set start and end of search 
    string::reversem_iterator revStart = m_pchBufferLocation + (offset + length)
    string::reversem_iterator revEnd   = m_pchBufferLocation + offset;

    // search for it
    string::reversem_iterator found = std::find( start, end, srchStr );

    // get position 
    long pos = found - revStart;

    return pos;
    */
  }

  std::string SubString( long lOffset, long lLength )
  {
    // get start of sub string
    //
    char* pchStart = m_pchBufferLocation + lOffset;

    std::string oResult;
    oResult.assign( pchStart, lLength );
    return std::string( oResult );
  }
};


#endif


/*****************************************************************************/
/* Check-in history 
   $WorkFile:   $
    $Archive:   $

 *$Log:   $
*/
/*****************************************************************************/

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions