Click here to Skip to main content
15,896,606 members
Articles / Desktop Programming / MFC

The Win32 Foundation Classes (WFC) - Version 45

Rate me:
Please Sign up or sign in to vote.
4.93/5 (40 votes)
16 May 2000 470.2K   12.7K   280  
The Win32 Foundation Classes (WFC) are a library of C++ classes that extend Microsoft Foundation Classes (MFC) beyond mere GUI applications, and provide extensive support for system and NT specific applications
#if ! defined ( DATA_PARSER_CLASS_HEADER )

/*
** Author: Samuel R. Blackburn
** Internet: wfc@pobox.com
**
** You can use it any way you like as long as you don't try to sell it.
**
** Any attempt to sell WFC in source code form must have the permission
** of the original author. You can produce commercial executables with
** WFC but you can't sell WFC.
**
** Copyright, 2000, Samuel R. Blackburn
**
** $Workfile: CDataParser.hpp $
** $Revision: 20 $
** $Modtime: 2/20/00 7:59a $
*/

#define DATA_PARSER_CLASS_HEADER

/*
** Sorry this is so huge but I had to get performance up...
*/

class CDataParser
{
   private:

      // Don't allow canonical behavior (i.e. don't allow this class
      // to be passed by value)

      inline CDataParser( const CDataParser& ) {};
      inline CDataParser& operator=( const CDataParser& ) { return( *this ); };

   protected:

#if defined( _DEBUG )

      DWORD m_LastIndex;
      DWORD m_NumberOfTimesWeHaveBeenAskedForTheSameIndex;

#endif // _DEBUG

      BOOL         m_AutomaticallyDelete;
      CByteArray * m_Data;
      BOOL         m_IsASCII;
      BOOL         m_IsBigEndian;
      BOOL         m_IsUCS4;
      DWORD        m_UCS4Order;
      BYTE         m_UnicodeToASCIIErrorCharacter;

   public:

      inline CDataParser()
      {
         WFCLTRACEINIT( TEXT( "CDataParser::CDataParser()" ) );

         m_AutomaticallyDelete          = FALSE;
         m_Data                         = NULL;
         m_IsASCII                      = TRUE;
         m_IsBigEndian                  = FALSE;
         m_IsUCS4                       = FALSE;
         m_UCS4Order                    = 4321;
         m_UnicodeToASCIIErrorCharacter = 'q';

#if defined( _DEBUG )
         m_LastIndex                                   = 0;
         m_NumberOfTimesWeHaveBeenAskedForTheSameIndex = 0;
#endif // _DEBUG
      }

      /*
      ** Destructor should be virtual according to MSJ article in Sept 1992
      ** "Do More with Less Code:..."
      */

      inline ~CDataParser()
      {
         WFCLTRACEINIT( TEXT( "CDataParser::~CDataParser()" ) );
         Empty();
      }

      inline void AdvanceByOneCharacter( CParsePoint& parse_point, DWORD character_parameter = 0 ) const
      {
         try
         {
            if ( m_Data == NULL )
            {
               ::SetLastError( ERROR_INVALID_ADDRESS );
               return;
            }

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return;
            }

            DWORD number_of_bytes_per_character = 1;

            if ( IsTextUCS4() )
            {
               number_of_bytes_per_character = 4;
            }
            else if ( IsTextASCII() == FALSE )
            {
               number_of_bytes_per_character = 2;
            }

            DWORD last_index = ( parse_point.GetIndex() + number_of_bytes_per_character );

            if ( last_index >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               parse_point.SetIndex( last_index );
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return;
            }

            DWORD character = 0;

            BYTE little_end = m_Data->GetAt( parse_point.GetIndex() );

            if ( IsTextASCII() != TRUE )
            {
               if ( IsTextUCS4() == FALSE )
               {
                  // We are UNICODE, let's decipher that Endian thing

                  if ( ( parse_point.GetIndex() + 1 ) >= static_cast< DWORD >( m_Data->GetSize() ) )
                  {
                     ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
                     return;
                  }

                  BYTE big_end = m_Data->GetAt( parse_point.GetIndex() + 1 );

                  character = IsTextBigEndian() ? MAKEWORD( big_end, little_end ) : MAKEWORD( little_end, big_end );

                  /*
                  ** The above line of code is the same as saying:

                  if ( IsTextBigEndian() != TRUE )
                  {
                     // Intel Format
                     character = MAKEWORD( little_end, big_end );
                  }
                  else
                  {
                     // Sun (stupid) Format
                     character = MAKEWORD( big_end, little_end );
                  }

                  */

                  ::SetLastError( NO_ERROR );

                  if ( character_parameter == 0 )
                  {
                     parse_point.AutoIncrement( static_cast< BYTE >( character ) );
                  }
                  else
                  {
                     parse_point.AutoIncrement( static_cast< BYTE >( character_parameter ) );
                  }

                  // We are UNICODE so advance the byte index by 2
                  // The AutoIncrement() call above will advance the index
                  // by only one byte.

                  parse_point.SetIndex( parse_point.GetIndex() + 1 );
                  return;
               }
               else
               {
                  // The most complicated... UCS4!

                  if ( ( parse_point.GetIndex() + 3 ) >= static_cast< DWORD >( m_Data->GetSize() ) )
                  {
                     ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
                     return;
                  }

                  BYTE byte_2 = m_Data->GetAt( parse_point.GetIndex() + 1 );
                  BYTE byte_3 = m_Data->GetAt( parse_point.GetIndex() + 2 );
                  BYTE byte_4 = m_Data->GetAt( parse_point.GetIndex() + 3 );

                  // Now figure out the order

                  switch( GetUCS4Order() )
                  {
                     case 4321:

                        character = ( byte_4 << 24 ) + ( byte_3 << 16 ) + ( byte_2 << 8 ) + little_end;
                        break;

                     case 1234:

                        character = ( little_end << 24 ) + ( byte_2 << 16 ) + ( byte_3 << 8 ) + byte_4;
                        break;

                     case 2143:

                        character = ( byte_2 << 24 ) + ( little_end << 16 ) + ( byte_4 << 8 ) + byte_3;
                        break;

                     case 3412:

                        character = ( byte_3 << 24 ) + ( byte_4 << 16 ) + ( little_end << 8 ) + byte_2;
                        break;

                     default:

                        ::SetLastError( ERROR_MAPPED_ALIGNMENT );
                        return;
                  }

                  ::SetLastError( NO_ERROR );

                  if ( character_parameter == 0 )
                  {
                     parse_point.AutoIncrement( static_cast< BYTE >( character ) );
                  }
                  else
                  {
                     parse_point.AutoIncrement( static_cast< BYTE >( character_parameter ) );
                  }

                  // We are UCS4 so advance the byte index by 2
                  // The AutoIncrement() call above will advance the index
                  // by only one byte.

                  parse_point.SetIndex( parse_point.GetIndex() + 3 );
                  return;
               }
            }

            ::SetLastError( NO_ERROR );

            if ( character_parameter == 0 )
            {
               parse_point.AutoIncrement( little_end );
            }
            else
            {
               parse_point.AutoIncrement( static_cast< BYTE >( character_parameter ) );
            }
         }
         catch( ... )
         {
            return;
         }
      }

      inline void Empty( void )
      {
         WFCLTRACEINIT( TEXT( "CDataParser::Empty()" ) );

         try
         {
            if ( m_AutomaticallyDelete != FALSE )
            {
               if ( m_Data != NULL )
               {
                  delete m_Data;
                  m_Data = NULL;
               }
            }

            m_AutomaticallyDelete          = FALSE;
            m_Data                         = NULL;
            m_IsASCII                      = TRUE;
            m_IsBigEndian                  = FALSE;
            m_IsUCS4                       = FALSE;
            m_UCS4Order                    = 4321;
            m_UnicodeToASCIIErrorCharacter = 'q';

#if defined( _DEBUG )
            m_LastIndex                                   = 0;
            m_NumberOfTimesWeHaveBeenAskedForTheSameIndex = 0;
#endif // _DEBUG
         }
         catch( ... )
         {
            // We can't do anything in here because this may be NULL!
            return;
         }
      }

inline BOOL Find( const CParsePoint& parse_point, BYTE byte_to_find, CParsePoint& found_at ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::Find( BYTE )" ) );

   try
   {
      found_at.Copy( parse_point );

      if ( m_Data == NULL )
      {
         found_at.Empty();
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      DWORD number_of_data_bytes = m_Data->GetSize();

      if ( found_at.GetIndex() >= number_of_data_bytes )
      {
         found_at.Empty();
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      const BYTE * buffer = m_Data->GetData();

      while( found_at.GetIndex() < number_of_data_bytes )
      {
         if ( buffer[ found_at.GetIndex() ] == byte_to_find )
         {
            return( TRUE );
         }

         found_at.AutoIncrement( buffer[ found_at.GetIndex() ] );
      }

      found_at.Empty();

      ::SetLastError( NO_ERROR );
      return( FALSE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL Find( const CParsePoint& parse_point, const CString& string_to_find, CParsePoint& found_at ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::Find( CString )" ) );

   try
   {
      found_at.Copy( parse_point );

      if ( m_Data == NULL )
      {
         found_at.Empty();
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      DWORD number_of_data_bytes = m_Data->GetSize();

      if ( found_at.GetIndex() >= number_of_data_bytes )
      {
         found_at.Empty();
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      size_t string_length = string_to_find.GetLength();

      if ( string_length == 0 )
      {
         found_at.Empty();
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      DWORD character_to_test = 0;

      while( ( found_at.GetIndex() + string_length ) <= number_of_data_bytes )
      {
         if ( PeekAtCharacter( found_at, character_to_test, 0 ) == TRUE )
         {
            if ( character_to_test == static_cast< DWORD >( string_to_find.GetAt( 0 ) ) )
            {
               size_t loop_index = 1;

               BOOL string_was_found = TRUE;

               DWORD other_character = 0;

               while( loop_index < string_length )
               {
                  // We peek at string_length - 1 because the GetNextCharacter() call above
                  // advances the character index

                  if ( PeekAtCharacter( found_at, character_to_test, loop_index ) != TRUE )
                  {
                     found_at.Empty();
                     return( FALSE );
                  }

                  other_character = string_to_find.GetAt( loop_index );

                  if ( character_to_test != other_character )
                  {
                     loop_index = string_length;
                     string_was_found = FALSE;
                  }

                  loop_index++;
               }

               if ( string_was_found == TRUE )
               {
                  return( TRUE );
               }
            }

            GetNextCharacter( found_at, character_to_test );
         }
      }

      found_at.Empty();

      ::SetLastError( NO_ERROR );
      return( FALSE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL Find( const CParsePoint& parse_point, const CByteArray& bytes_to_find, CParsePoint& found_at ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::Find( CByteArray )" ) );

   try
   {
      found_at.Copy( parse_point );

      if ( m_Data == NULL )
      {
         found_at.Empty();
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      DWORD number_of_data_bytes = m_Data->GetSize();

      if ( found_at.GetIndex() >= number_of_data_bytes )
      {
         found_at.Empty();
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      size_t pattern_length = bytes_to_find.GetSize();

      const BYTE * buffer         = m_Data->GetData();
      const BYTE * pattern_buffer = bytes_to_find.GetData();

      // 2000-02-19
      // Thanks go to Arek Kotas (arekk@metrosoft-us.com) for finding a bug
      // here. I was using < when it should have been <=

      while( ( found_at.GetIndex() + pattern_length ) <= number_of_data_bytes )
      {
         if ( ::memcmp( &buffer[ found_at.GetIndex() ], pattern_buffer, pattern_length ) == 0 )
         {
            return( TRUE );
         }

         found_at.AutoIncrement( buffer[ found_at.GetIndex() ] );
      }

      found_at.Empty();

      ::SetLastError( NO_ERROR );
      return( FALSE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL FindNoCase( const CParsePoint& parse_point, const CString& string_to_find, CParsePoint& found_at ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::Find( CString )" ) );

   try
   {
      if ( IsTextASCII() == TRUE )
      {
         found_at.Copy( parse_point );

         if ( m_Data == NULL )
         {
            found_at.Empty();
            ::SetLastError( ERROR_INVALID_ADDRESS );
            return( FALSE );
         }

         DWORD number_of_data_bytes = m_Data->GetSize();

         if ( found_at.GetIndex() >= number_of_data_bytes )
         {
            found_at.Empty();
            ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
            return( FALSE );
         }

         // Don't forget to take into account that we may be a UNICODE build so we
         // need to multiply the number of characters by the number of bytes per
         // character to get the memcmp() to work properly.

         size_t number_of_bytes_in_string = string_to_find.GetLength() * sizeof( TCHAR );

         const BYTE * buffer = m_Data->GetData();

         while( ( found_at.GetIndex() + number_of_bytes_in_string ) <= number_of_data_bytes )
         {
            if ( ::_memicmp( &buffer[ found_at.GetIndex() ], static_cast< LPCTSTR >( string_to_find ), number_of_bytes_in_string ) == 0 )
            {
               return( TRUE );
            }

            found_at.AutoIncrement( buffer[ found_at.GetIndex() ] );
         }

         found_at.Empty();

         ::SetLastError( NO_ERROR );
         return( FALSE );
      }
      else
      {
         if ( IsTextUCS4() == FALSE )
         {
            // The array of bytes we are searching is UNICODE text

            CByteArray characters_to_search_for;

            BYTE little_end = 0;
            BYTE big_end    = 0;

            int loop_index = 0;

            WCHAR character_to_convert = 0;

            while( loop_index < string_to_find.GetLength() )
            {
               character_to_convert = string_to_find.GetAt( loop_index );

               if ( character_to_convert > 255 )
               {
                  little_end = LOBYTE( character_to_convert );
                  big_end    = HIBYTE( character_to_convert );
               }
               else
               {
                  little_end = static_cast< BYTE >( character_to_convert );
                  big_end    = 0;
               }

               if ( IsTextBigEndian() == TRUE )
               {
                  // Big Endian (Sun) format
                  characters_to_search_for.Add( big_end );
                  characters_to_search_for.Add( little_end );
               }
               else
               {
                  characters_to_search_for.Add( little_end );
                  characters_to_search_for.Add( big_end );
               }

               loop_index++;
            }

            return( FindNoCase( parse_point, characters_to_search_for, found_at ) );
         }
         else
         {
            // Text is UCS4, we've got work to do
            return( FALSE );
         }
      }
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL FindNoCase( const CParsePoint& parse_point, const CByteArray& bytes_to_find, CParsePoint& found_at ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::Find( CByteArray )" ) );

   try
   {
      found_at.Copy( parse_point );

      if ( m_Data == NULL )
      {
         found_at.Empty();
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      DWORD number_of_data_bytes = m_Data->GetSize();

      if ( found_at.GetIndex() >= number_of_data_bytes )
      {
         found_at.Empty();
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      size_t pattern_length = bytes_to_find.GetSize();

      const BYTE * buffer         = m_Data->GetData();
      const BYTE * pattern_buffer = bytes_to_find.GetData();

      while( ( found_at.GetIndex() + pattern_length ) <= number_of_data_bytes )
      {
         if ( ::_memicmp( &buffer[ found_at.GetIndex() ], pattern_buffer, pattern_length ) == 0 )
         {
            return( TRUE );
         }

         found_at.AutoIncrement( buffer[ found_at.GetIndex() ] );
      }

      found_at.Empty();

      ::SetLastError( NO_ERROR );
      return( FALSE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL Get( CParsePoint& parse_point, DWORD length, CByteArray& bytes_to_get ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::Get( CByteArray )" ) );

   try
   {
      bytes_to_get.RemoveAll();

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      // 1999-01-30 - Josh Mayfield (mayfielj@squared.com) found an off-by-one error here

      if ( ( parse_point.GetIndex() + length ) > static_cast< DWORD > ( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      DWORD index = 0;

      while( index < length )
      {
         bytes_to_get.Add( m_Data->GetAt( parse_point.GetIndex() ) );
         parse_point.AutoIncrement( m_Data->GetAt( parse_point.GetIndex() ) );
         index++;
      }

      ::SetLastError( NO_ERROR );
      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL Get( CParsePoint& parse_point, DWORD length, CString& string_to_get ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::Get( CString )" ) );

   try
   {
      string_to_get.Empty();

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      // 1999-01-30 - Josh Mayfield (mayfielj@squared.com) found an off-by-one error here

      if ( ( parse_point.GetIndex() + length ) > static_cast< DWORD > ( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      DWORD loop_index = 0;

      while( loop_index < length )
      {
         string_to_get += static_cast< TCHAR > ( m_Data->GetAt( parse_point.GetIndex() ) );
         parse_point.AutoIncrement( m_Data->GetAt( parse_point.GetIndex() ) );
         loop_index++;
      }

      ::SetLastError( NO_ERROR );
      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BYTE GetAt( DWORD byte_index ) const
{
   try
   {

#if defined( _DEBUG )

      if ( byte_index == m_LastIndex )
      {
         const_cast< DWORD& > ( m_NumberOfTimesWeHaveBeenAskedForTheSameIndex )++;

         if ( m_NumberOfTimesWeHaveBeenAskedForTheSameIndex > 25 )
         {
            // We've asked for this same stinking character 25 times already!
            // Looks like our caller is stuck in an endless loop

            ASSERT( FALSE );
         }
      }
      else
      {
         const_cast< DWORD& > ( m_LastIndex                                   ) = byte_index;
         const_cast< DWORD& > ( m_NumberOfTimesWeHaveBeenAskedForTheSameIndex ) = 0;
      }

#endif // _DEBUG

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( 0 );
      }

      if ( byte_index >= static_cast< DWORD > ( m_Data->GetSize() ) )
      {
         return( 0 );
      }

      return( m_Data->GetAt( byte_index ) );
   }
   catch( ... )
   {
      return( 0 );
   }
}

inline DWORD GetCharacter( const CParsePoint& const_parse_point, const DWORD number_of_characters_ahead = 0 ) const
{
   try
   {
      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( 0 );
      }

      CParsePoint parse_point( const_parse_point );

      if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( 0 );
      }

      DWORD number_of_bytes_per_character = 1;

      if ( IsTextUCS4() )
      {
         number_of_bytes_per_character = 4;
      }
      else if ( IsTextASCII() == FALSE )
      {
         number_of_bytes_per_character = 2;
      }

      DWORD number_of_bytes_to_skip_ahead = number_of_bytes_per_character * number_of_characters_ahead;

      // We must now skip the rest of the bytes that make up one character

      number_of_bytes_to_skip_ahead += ( number_of_bytes_per_character - 1 );

      DWORD last_index = ( parse_point.GetIndex() + number_of_bytes_to_skip_ahead );

      if ( last_index >= static_cast< DWORD >( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( 0 );
      }

      number_of_bytes_per_character = 0;

      BYTE little_end = 0;

      do
      {
         little_end = m_Data->GetAt( parse_point.GetIndex() );
         parse_point.AutoIncrement( little_end );
         number_of_bytes_per_character++;
      }
      while( number_of_bytes_per_character < number_of_bytes_to_skip_ahead );

      DWORD character = 0;

      if ( IsTextASCII() != TRUE )
      {
         if ( IsTextUCS4() == FALSE )
         {
            // We are UNICODE, let's decipher that Endian thing

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( 0 );
            }

            BYTE big_end = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( big_end );

            character = IsTextBigEndian() ? MAKEWORD( big_end, little_end ) : MAKEWORD( little_end, big_end );
            /*
            if ( IsTextBigEndian() != TRUE )
            {
               // Intel Format
               character = MAKEWORD( little_end, big_end );
            }
            else
            {
               // Sun Format
               character = MAKEWORD( big_end, little_end );
            }
            */

            ::SetLastError( NO_ERROR );

#if ! defined( UNICODE )
         // ASCII build, check for UNICODE translation problem

         if ( character > 255 )
         {
            character = GetUnicodeToASCIITranslationFailureCharacter();
         }
#endif // UNICODE

            return( character );
         }
         else
         {
            // The most complicated... UCS4!

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( 0 );
            }

            BYTE byte_2 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_2 );

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( 0 );
            }

            BYTE byte_3 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_3 );

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( 0 );
            }

            BYTE byte_4 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_4 );

            // Now figure out the order

            switch( GetUCS4Order() )
            {
               case 4321:

                  character = ( byte_4 << 24 ) + ( byte_3 << 16 ) + ( byte_2 << 8 ) + little_end;
                  break;

               case 1234:

                  character = ( little_end << 24 ) + ( byte_2 << 16 ) + ( byte_3 << 8 ) + byte_4;
                  break;

               case 2143:

                  character = ( byte_2 << 24 ) + ( little_end << 16 ) + ( byte_4 << 8 ) + byte_3;
                  break;

               case 3412:

                  character = ( byte_3 << 24 ) + ( byte_4 << 16 ) + ( little_end << 8 ) + byte_2;
                  break;

               default:

                  ::SetLastError( ERROR_MAPPED_ALIGNMENT );
                  return( 0 );
            }

            ::SetLastError( NO_ERROR );

#if ! defined( UNICODE )
         // ASCII build, check for UNICODE translation problem

         if ( character > 255 )
         {
            character = GetUnicodeToASCIITranslationFailureCharacter();
         }
#endif // UNICODE
            return( character );
         }
      }

      ::SetLastError( NO_ERROR );
      return( little_end );
   }
   catch( ... )
   {
      return( 0 );
   }
}

inline BOOL GetNextCharacter( CParsePoint& parse_point, DWORD& character ) const
{
   try
   {
      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      BYTE little_end = m_Data->GetAt( parse_point.GetIndex() );
      parse_point.AutoIncrement( little_end );

      if ( IsTextASCII() != TRUE )
      {
         if ( IsTextUCS4() == FALSE )
         {
            // We are UNICODE, let's decipher that Endian thing

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE big_end = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( big_end );

            character = IsTextBigEndian() ? MAKEWORD( big_end, little_end ) : MAKEWORD( little_end, big_end );

            /*
            if ( IsTextBigEndian() != TRUE )
            {
               // Intel Format

               character = MAKEWORD( little_end, big_end );
            }
            else
            {
               character = MAKEWORD( big_end, little_end );
            }
            */

            ::SetLastError( NO_ERROR );

#if ! defined( UNICODE )
         // ASCII build, check for UNICODE translation problem

         if ( character > 255 )
         {
            character = GetUnicodeToASCIITranslationFailureCharacter();
         }
#endif // UNICODE

            return( TRUE );
         }
         else
         {
            // The most complicated... UCS4!

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE byte_2 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_2 );

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE byte_3 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_3 );

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE byte_4 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_4 );

            // Now figure out the order

            switch( GetUCS4Order() )
            {
               case 4321:

                  character = ( byte_4 << 24 ) + ( byte_3 << 16 ) + ( byte_2 << 8 ) + little_end;
                  break;

               case 1234:

                  character = ( little_end << 24 ) + ( byte_2 << 16 ) + ( byte_3 << 8 ) + byte_4;
                  break;

               case 2143:

                  character = ( byte_2 << 24 ) + ( little_end << 16 ) + ( byte_4 << 8 ) + byte_3;
                  break;

               case 3412:

                  character = ( byte_3 << 24 ) + ( byte_4 << 16 ) + ( little_end << 8 ) + byte_2;
                  break;

               default:

                  ::SetLastError( ERROR_MAPPED_ALIGNMENT );
                  character = 0;
                  return( FALSE );
            }

            ::SetLastError( NO_ERROR );

#if ! defined( UNICODE )
         // ASCII build, check for UNICODE translation problem

         if ( character > 255 )
         {
            character = GetUnicodeToASCIITranslationFailureCharacter();
         }
#endif // UNICODE

            return( TRUE );
         }
      }

      character = little_end;
      ::SetLastError( NO_ERROR );
      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline DWORD GetUCS4Order( void ) const
{
   return( m_UCS4Order );
}

inline BYTE GetUnicodeToASCIITranslationFailureCharacter( void ) const
{
   return( m_UnicodeToASCIIErrorCharacter );
}

inline DWORD GetSize( void ) const
{
   try
   {
      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( 0 );
      }

      int the_size = m_Data->GetSize();

      return( ( the_size < 0 ) ? 0 : the_size );
   }
   catch( ... )
   {
      return( 0 );
   }
}

inline BOOL GetUntilAndIncluding( CParsePoint& parse_point, BYTE termination_byte, CByteArray& bytes_to_get ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::GetUntilAndIncluding( CByteArray )" ) );

   try
   {
      bytes_to_get.SetSize( 0, 8192 ); // For speed

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      if ( parse_point.GetIndex() >= static_cast< DWORD > ( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      BYTE byte_to_test = 0;

      do
      {
         if ( parse_point.GetIndex() < static_cast< DWORD > ( m_Data->GetSize() ) )
         {
            byte_to_test = m_Data->GetAt( parse_point.GetIndex() );
            bytes_to_get.Add( byte_to_test );
            parse_point.AutoIncrement( byte_to_test );
         }
         else
         {
            ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
            return( FALSE );
         }
      }
      while( byte_to_test != termination_byte );

      ::SetLastError( NO_ERROR );
      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL GetUntilAndIncluding( CParsePoint& parse_point, BYTE termination_byte, CString& string_to_get ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::GetUntilAndIncluding( CString )" ) );

   // 1999-07-01 - Optimized this routine

   try
   {
      // Start out with an empty string
      string_to_get.Empty();

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      DWORD number_of_bytes_in_data = static_cast< DWORD > ( m_Data->GetSize() );

      if ( parse_point.GetIndex() >= number_of_bytes_in_data )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      BYTE byte_to_test = 0;

      const BYTE * buffer_pointer = m_Data->GetData();

      TCHAR temp_buffer[ 257 ];
      temp_buffer[ 256 ] = 0x00; // NULL Terminate

      int temp_buffer_index = 0;

      do
      {
         if ( parse_point.GetIndex() < static_cast< DWORD > ( m_Data->GetSize() ) )
         {
            byte_to_test = buffer_pointer[ parse_point.GetIndex() ];

            temp_buffer[ temp_buffer_index ] = byte_to_test;
            temp_buffer_index++;

            if ( temp_buffer_index == 256 )
            {
               // Our buffer is full, let CString have it
               // This optimization greatly reduces the number of memory allocations
               // that CString must make.

               string_to_get += temp_buffer;
               temp_buffer_index = 0;
            }

            parse_point.AutoIncrement( byte_to_test );
         }
         else
         {
            string_to_get.Empty();
            ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
            return( FALSE );
         }
      }
      while( byte_to_test != termination_byte );

      if ( temp_buffer_index < 256 && temp_buffer_index > 0 )
      {
         temp_buffer[ temp_buffer_index ] = 0x00; // NULL Terminate
         string_to_get += temp_buffer;
      }

      ::SetLastError( NO_ERROR );
      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

// 1998-11-21, New for Release 38
// The following method was added by Jeff Barczewski (jb@snowflight.com)
// He added to fix a bug in the parsing of comment sections in XML
inline BOOL GetUntilAndIncluding( CParsePoint& parse_point, const CString& termination_characters, CString& string_to_get ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::GetUntilAndIncluding( CString )" ) );

   try
   {
      string_to_get.Empty();

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      if ( parse_point.GetIndex() >= static_cast< DWORD > ( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      CByteArray bytes;
      bytes.SetSize( 0, 8192 );

      DWORD character = 0;

      int termination_characters_length = termination_characters.GetLength();

#if defined( CSTRING_APPENDING_IS_FAST )

      while( GetNextCharacter( parse_point, character ) == TRUE )
      {
#if ! defined( UNICODE )
         // ASCII build, check for UNICODE translation problem

         if ( character > 255 )
         {
            character = GetUnicodeToASCIITranslationFailureCharacter();
         }
#endif // UNICODE

         string_to_get += static_cast< TCHAR >( character );

         if ( string_to_get.GetLength() >= termination_characters_length )
         {
            if ( string_to_get.Right( termination_characters_length ).Compare( termination_characters ) == 0 )
            {
               return( TRUE );
            }
         }
      }

#else // CSTRING_APPENDING_IS_FAST

      CByteArray byte_array;
      byte_array.SetSize( 0, 8192 );

      while( GetNextCharacter( parse_point, character ) == TRUE )
      {
#if ! defined( UNICODE )
         // ASCII build, check for UNICODE translation problem

         if ( character > 255 )
         {
            character = GetUnicodeToASCIITranslationFailureCharacter();
         }

         byte_array.Add( static_cast< BYTE >( character ) );
#else // UNICODE

         byte_array.Add( LOBYTE( character ) );
         byte_array.Add( HIBYTE( character ) );

#endif // UNICODE

         if ( byte_array.GetSize() >= termination_characters_length )
         {
            const BYTE * address = byte_array.GetData();
            const BYTE * address_to_compare = &address[ byte_array.GetSize() - ( termination_characters_length * sizeof( TCHAR ) ) ];

            // Don't forget to take into account that we may be a UNICODE build so we
            // need to multiply the number of characters by the number of bytes per
            // character to get the memcmp() to work properly.

            if ( ::memcmp( address_to_compare, termination_characters, termination_characters_length * sizeof( TCHAR ) ) == 0 )
            {
#if defined( UNICODE )
               byte_array.Add( 0 );
#endif // UNICODE
               byte_array.Add( 0 );
               string_to_get = (LPCTSTR) byte_array.GetData();
               return( TRUE );
            }
         }
      }

#endif // CSTRING_APPENDING_IS_FAST

      ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
      return( FALSE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL GetUntilAndIncluding( CParsePoint& parse_point, const CByteArray& termination_bytes, CString& string_to_get ) const
{
   WFCLTRACEINIT( TEXT( "CDataParser::GetUntilAndIncluding( CByteArray )" ) );

   try
   {
      string_to_get.Empty();

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      if ( parse_point.GetIndex() >= static_cast< DWORD > ( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      TCHAR character_to_test = 0;

      const BYTE * data_p = m_Data->GetData();
      const BYTE * termination_bytes_p = termination_bytes.GetData();

      DWORD termination_bytes_length = termination_bytes.GetSize();

      do
      {
         if ( parse_point.GetIndex() + termination_bytes_length - 1 < static_cast< DWORD > ( m_Data->GetSize() ) )
         {
            character_to_test = m_Data->GetAt( parse_point.GetIndex() );
            string_to_get += character_to_test;
            parse_point.AutoIncrement( static_cast< BYTE >( character_to_test ) );
         }
         else
         {
            ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
            return( FALSE );
         }
      }
      while( 0 != ::memcmp( data_p + parse_point.GetIndex(), termination_bytes_p, termination_bytes_length ) );

      for( DWORD loop_index = 0; loop_index < termination_bytes_length; loop_index++ )
      {
         //move to the end of the string
         character_to_test = m_Data->GetAt( parse_point.GetIndex() );
         string_to_get += static_cast< TCHAR > ( character_to_test );
         parse_point.AutoIncrement( static_cast< BYTE >( character_to_test ) );
      }

      ::SetLastError( NO_ERROR );

      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL Initialize( CByteArray * data, BOOL automatically_delete = FALSE )
{
   WFCLTRACEINIT( TEXT( "CDataParser::Initialize( CByteArray )" ) );

   try
   {
      Empty();

      m_AutomaticallyDelete = automatically_delete;
      m_Data                = data;

      ::SetLastError( NO_ERROR );

      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

inline BOOL Initialize( const CStringArray& strings )
{
   WFCLTRACEINIT( TEXT( "CDataParser::Initialize( CStringArray )" ) );

   try
   {
      // Let's create our own CByteArray

      CByteArray * bytes_p = NULL;

      try
      {
         bytes_p = new CByteArray;
      }
      catch( ... )
      {
         bytes_p = NULL;
      }

      if ( bytes_p == NULL )
      {
         WFCTRACE( TEXT( "Can't allocate memory for a new CByteArray" ) );
         return( FALSE );
      }

      CString a_string;

      DWORD index             = 0;
      DWORD number_of_strings = strings.GetSize();
      DWORD string_index      = 0;
      DWORD string_length     = 0;

      while( index < number_of_strings )
      {
         a_string = strings.GetAt( index );

         // CByteArray doesn't have a method where you can append a lot bytes at once
         // so we have to hack around it

         string_index = 0;
         string_length = a_string.GetLength();

         while( string_index < string_length )
         {
            bytes_p->Add( static_cast< BYTE > ( a_string.GetAt( string_index ) ) );
            string_index++;
         }

         index++;
      }

      Initialize( bytes_p, TRUE );

      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}
      inline BOOL IsTextASCII( void ) const     { return( ( m_IsASCII     == FALSE ) ? FALSE : ( m_IsUCS4 == TRUE ) ? FALSE : TRUE ); }
      inline BOOL IsTextBigEndian( void ) const { return( ( m_IsBigEndian == FALSE ) ? FALSE : TRUE ); }
      inline BOOL IsTextUCS4( void ) const      { return( ( m_IsUCS4      == FALSE ) ? FALSE : TRUE ); }

inline BOOL PeekAtCharacter( const CParsePoint& const_parse_point, DWORD& character, const DWORD number_of_characters_ahead = 1 ) const
{
   try
   {
      if ( m_Data == NULL )
      {
         ::SetLastError( ERROR_INVALID_ADDRESS );
         return( FALSE );
      }

      CParsePoint parse_point( const_parse_point );

      if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      DWORD number_of_bytes_per_character = 1;

      if ( IsTextUCS4() )
      {
         number_of_bytes_per_character = 4;
      }
      else if ( IsTextASCII() == FALSE )
      {
         number_of_bytes_per_character = 2;
      }

      // We must calculate the number of bytes to skip until we reach
      // the first byte of the desired character

      DWORD number_of_bytes_to_skip_ahead = number_of_bytes_per_character * number_of_characters_ahead;

      DWORD last_index = ( parse_point.GetIndex() + number_of_bytes_to_skip_ahead );

      if ( last_index >= static_cast< DWORD >( m_Data->GetSize() ) )
      {
         ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
         return( FALSE );
      }

      // Reuse the variable since we no longer need it
      number_of_bytes_per_character = 0;

      BYTE little_end = 0;

      while( number_of_bytes_per_character < number_of_bytes_to_skip_ahead )
      {
         little_end = m_Data->GetAt( parse_point.GetIndex() );
         parse_point.AutoIncrement( little_end );
         number_of_bytes_per_character++;
      }

      // Get the first byte of the character
      little_end = m_Data->GetAt( parse_point.GetIndex() );

      if ( IsTextASCII() != TRUE )
      {
         if ( IsTextUCS4() == FALSE )
         {
            // Advance over the byte we just read, we do it here so if we are
            // parsing an ASCII document, we won't incur the performance hit
            parse_point.AutoIncrement( little_end );

            // We are UNICODE, let's decipher that Endian thing

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE big_end = m_Data->GetAt( parse_point.GetIndex() );

            character = IsTextBigEndian() ? MAKEWORD( big_end, little_end ) : MAKEWORD( little_end, big_end );

            ::SetLastError( NO_ERROR );
            return( TRUE );
         }
         else
         {
            // The most complicated... UCS4!

            // Advance over the byte we just read, we do it here so if we are
            // parsing an ASCII document, we won't incur the performance hit
            parse_point.AutoIncrement( little_end );

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE byte_2 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_2 );

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE byte_3 = m_Data->GetAt( parse_point.GetIndex() );
            parse_point.AutoIncrement( byte_3 );

            if ( parse_point.GetIndex() >= static_cast< DWORD >( m_Data->GetSize() ) )
            {
               ::SetLastError( ERROR_ALLOTTED_SPACE_EXCEEDED );
               return( FALSE );
            }

            BYTE byte_4 = m_Data->GetAt( parse_point.GetIndex() );

            // Now figure out the order

            switch( GetUCS4Order() )
            {
               case 4321:

                  character = ( byte_4 << 24 ) + ( byte_3 << 16 ) + ( byte_2 << 8 ) + little_end;
                  break;

               case 1234:

                  character = ( little_end << 24 ) + ( byte_2 << 16 ) + ( byte_3 << 8 ) + byte_4;
                  break;

               case 2143:

                  character = ( byte_2 << 24 ) + ( little_end << 16 ) + ( byte_4 << 8 ) + byte_3;
                  break;

               case 3412:

                  character = ( byte_3 << 24 ) + ( byte_4 << 16 ) + ( little_end << 8 ) + byte_2;
                  break;

               default:

                  ::SetLastError( ERROR_MAPPED_ALIGNMENT );
                  character = 0;
                  return( FALSE );
            }

            ::SetLastError( NO_ERROR );
            return( TRUE );
         }
      }

      character = little_end;
      ::SetLastError( NO_ERROR );
      return( TRUE );
   }
   catch( ... )
   {
      return( FALSE );
   }
}

      inline DWORD PeekCharacter( const CParsePoint& parse_point, const LONG number_of_characters_ahead ) const
      {
         DWORD return_value = 0;
         (void) PeekAtCharacter( parse_point, return_value, number_of_characters_ahead );
         return( return_value );
      }

inline BOOL SetTextToASCII( BOOL text_is_ascii = TRUE )
{
   m_IsASCII = ( text_is_ascii == FALSE ) ? FALSE : TRUE;
   m_IsUCS4 = FALSE;
   return( TRUE );
}

inline BOOL SetTextToBigEndian( BOOL unicode_is_big_endian = TRUE )
{
   m_IsBigEndian = ( unicode_is_big_endian == FALSE ) ? FALSE : TRUE;
   return( TRUE );
}

inline BOOL SetTextToUCS4( BOOL text_is_ucs4 = TRUE )
{
   if ( text_is_ucs4 == FALSE )
   {
      m_IsUCS4 = FALSE;
   }
   else
   {
      m_IsASCII = FALSE;
      m_IsUCS4  = TRUE;
   }

   return( TRUE );
}

inline BOOL SetUCS4Order( DWORD order = 4321 )
{
   switch( order )
   {
      case 4321:
      case 1234:
      case 2143:
      case 3412:

         m_UCS4Order = order;
         return( TRUE );

      default:

         m_UCS4Order = 4321;
         return( FALSE );
   }
}

inline void SetUnicodeToASCIITranslationFailureCharacter( BYTE asci_character )
{
   m_UnicodeToASCIIErrorCharacter = asci_character;
}

#if defined( _DEBUG ) && ! defined( WFC_NO_DUMPING )

      virtual void  Dump( CDumpContext& dump_context ) const;

#endif // _DEBUG
};

#endif // DATA_PARSER_CLASS_HEADER

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
United States United States
I'm just a simple little NT programmer. Most of the work I do is remote controlling equipment in real time. I started out using Windows 3.0. Then came 3.1 and then NT. I started using NT but unfortunately, Microsoft didn't. I started using MFC but unfortunately, Microsoft didn't (and still doesn't) put any real support for NT into MFC so I wrote a bunch of C++ classes to make my life easier. Like all class libraries, mine grew. Now I'm giving it away, I call it Win32 Foundation Classes.

Check out Sam's homepage at www.SamBlackburn.com/wfc/.

Comments and Discussions