Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Include File Hierarchy Viewer

, 29 Jul 2003 CPOL
A tool to view the include file hierarchy of your source code.
//====================================================================
// Although great care has gone into developing this software,
// it is provided without any guarantee of reliability, accuracy
// of information, or correctness of operation.  I am not responsible
// for any damages that may occur as a result of using this software.
// Use this software entirely at your own risk.
// Copyright 2003, Chris Richardson
//
// Description: A simple text lexer.
//
//====================================================================

#include "stdafx.h"
#include "Lexer.h"
#include "Token.h"

//////////////////////////////////////////////////////////////////////
// CLexer Implementation.
//////////////////////////////////////////////////////////////////////

CLexer::CLexer() :
   c_poStream( NULL ),
   c_ulLine( -1 ),
   c_ulLinePos( 0 ),
   c_eMode( LEX_MODE_PARSER )
{
}
//
// ------------------------------------------------------------------
//
CLexer::~CLexer()
{
   c_poStream = NULL;
}
//
// ------------------------------------------------------------------
//
ParseStatus CLexer::SetStream( CStream * p_poStream )
{
   c_poStream  = p_poStream;
   c_ulLine    = 1;
   c_ulLinePos = -1;

   return PARSE_STATUS_OK;
}
//
// ------------------------------------------------------------------
//
ParseStatus CLexer::Lex( CToken * p_poToken )
{
   ParseStatus a_eStatus = PARSE_STATUS_OK;
   
   p_poToken->Reset();
   if( !c_poStream )
      return PARSE_STATUS_FILE_NOT_OPEN;

   TCHAR          a_cChar = 0;
   unsigned long  a_ulPos = 0;
   while( 1 )
   {
      a_ulPos = c_poStream->GetPos();
      a_cChar = c_poStream->GetC();
      c_ulLinePos++;
      
      switch( a_cChar )
      {
         case _T(' '):
         case _T('\t'):
            // Skip whitespace.
            // These chars often come one after another.
            while( a_cChar == _T(' ') || a_cChar == _T('\t') )
            {
               a_cChar = c_poStream->GetC();
            }
            c_ulLinePos--;

            // Put the last char back (it's not whitespace).
            c_poStream->UngetC( a_cChar );
            continue;
         
         case _T('\n'):
            IncLine();
            break;

         case _TEOF:
            p_poToken->c_eType = TOKEN_EOF;
            return PARSE_STATUS_OK;
         
         case _T('/'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('/') )
            {
               // It's a single line comment.
               while( 1 )
               {
                  a_cChar = c_poStream->GetC();
                  if( a_cChar == _T('\n') || a_cChar == _T('\r') || a_cChar == _TEOF )
                  {
                     // Push it back into the stream and go back to the top of the main while loop.
                     c_poStream->UngetC( a_cChar );
                     break;
                  }
               }
               
               // We will let the main switch handle whatever character we ended on.
               continue;
            }
            else
            if( a_cChar == _T('*') )
            {
               // It's a multiple line comment.
               while( 1 )
               {
                  a_cChar = c_poStream->GetC();
                  if( a_cChar == _T('*') )
                  {
                     a_cChar = c_poStream->GetC();
                     if( a_cChar == _T('/') )
                        // We're out of the comment.
                        break;
                     else
                        // Put the extra char back so it can be processed.
                        c_poStream->UngetC( a_cChar );
                  }
                  else
                  if( a_cChar == _T('\n') )
                     IncLine();
                  else
                  if( a_cChar == _TEOF )
                  {
                     // It's an unterminated comment.
                     p_poToken->c_eType = TOKEN_EOF;
                     return PARSE_STATUS_OK;
                  }
               }
            }
            else
            {
               c_poStream->UngetC( a_cChar );

               // It's a "divide" character.
               p_poToken->c_szID[0] = _T('/');
               p_poToken->c_szID[1] = _T('\0');
               p_poToken->c_eType = TOKEN_DIVIDE;

               return PARSE_STATUS_OK;
            }
            
            break;
         }
         
         case _T('<'):
         case _T('\"'):
         {
            if( c_eMode == LEX_MODE_PARSER && a_cChar == _T('<') )
            {
               // It's an operator.
               a_cChar = c_poStream->GetC();
               if( a_cChar == _T('<') )
               {
                  a_cChar = c_poStream->GetC();
                  if( a_cChar == _T('=') )
                     p_poToken->Set( _T("<<="), a_ulPos, TOKEN_LEFT_LEFT_EQUALS );
                  else
                  {
                     p_poToken->Set( _T("<<"), a_ulPos, TOKEN_LEFT_LEFT );

                     c_poStream->UngetC( a_cChar );
                  }
               }
               else
               {
                  if( a_cChar == _T('=') )
                     p_poToken->Set( _T(">="), a_ulPos, TOKEN_RIGHT_EQUALS );
                  else
                  {
                     p_poToken->Set( _T('<'), a_ulPos, TOKEN_LEFT );

                     c_poStream->UngetC( a_cChar );
                  }
               }
            
               return PARSE_STATUS_OK;
            }
            else
            {
               // Looks like the start of a string literal.
               // Either of these forms are accepted:
               // <StringLiteral>
               // "StringLiteral"
               BOOL a_bIsBrackets = (a_cChar == _T('<'));
               int a_iCurPos = 0;
               BOOL a_bDone = FALSE;
               p_poToken->c_eType = a_bIsBrackets ? TOKEN_BRACKET_STRING : TOKEN_QUOTED_STRING;
               while( !a_bDone )
               {
                  if( a_iCurPos == CToken::MAX_TOKEN-1 )
                  {
                     // The string is too long!  Oh no!
                     a_bDone = TRUE;
                     break;
                  }
               
                  // Get the next char, and decide what to do with it.
                  a_cChar = c_poStream->GetC();
                  switch( a_cChar )
                  {
                     case _T('\\'):
                        if( c_eMode == LEX_MODE_PARSER )
                        {
                           // Escape sequence.
                           a_eStatus = EscapeSequence( p_poToken, a_iCurPos );
                        }
                        else
                           p_poToken->c_szID[a_iCurPos++] = a_cChar;
                        break;
                  
                     case _T('>'):
                     case _T('\"'):
                        if( a_cChar == _T('>') && !a_bIsBrackets ||
                            a_cChar == _T('\"') && a_bIsBrackets )
                        {
                           // We haven't found the matching delimiter string, so we aren't done yet.
                           p_poToken->c_szID[a_iCurPos++] = a_cChar;
                           break;
                        }
                        else
                        {
                           // We're done.
                           a_bDone = TRUE;
                        }
                        break;
                  
                     case _T('\n'):
                     case _TEOF:
                        // Error.
                        if( a_cChar == _T('\n') )
                        {
                           IncLine();
                        }
                        else
                        {
                           p_poToken->c_eType = TOKEN_EOF;
                        }

                        a_bDone = TRUE;
                        break;

                     default:
                        p_poToken->c_szID[a_iCurPos++] = a_cChar;
                        break;
                  }
               }
            
               p_poToken->c_szID[a_iCurPos] = _T('\0');
            }
            return PARSE_STATUS_OK;
         }

         case _T('\''):
         {
            // Character constant.
            p_poToken->c_eType = TOKEN_CHAR_LIT;
            int a_iCurPos = 0;
            BOOL a_bDone = FALSE;
            while( !a_bDone )
            {
               a_cChar = c_poStream->GetC();
               switch( a_cChar )
               {
                  case _T('\''):
                     if( a_iCurPos == 0 )
                     {
                        // Empty character constant.
                        //Error( ERROR_EMPTY_CHARACTER_CONSTANT, p_poToken );
                     }
                     // We're done.
                     a_bDone = TRUE;
                     break;

                  case _T('\\'):
                  {
                     // Escape sequence.
                     a_eStatus = EscapeSequence( p_poToken, a_iCurPos );
                     break;
                  }

                  case _T('\n'):
                  case _TEOF:
                     // Error.
                     if( a_cChar == _T('\n') )
                     {
                        //Error( ERROR_NEWLINE_IN_CONSTANT, p_poToken );
                        c_ulLine++;
                     }
                     else
                     {
                        //Error( ERROR_EOF_UNEXPECTED, p_poToken );
                        p_poToken->c_eType = TOKEN_EOF;
                     }

                     a_bDone = TRUE;
                     break;

                  default:
                     p_poToken->c_szID[a_iCurPos++] = a_cChar;
                     break;
               }
            }
            
            // Null terminate.
            p_poToken->c_szID[a_iCurPos++] = _T('\0');
            p_poToken->c_ulFlags |= TOKEN_FLAG_CONST;
            if( a_iCurPos > 1 )
            {
               // The type gets promoted to an int.
            }
            else
            if( a_iCurPos > 4 )
            {
               // Too many characters in the constant.
            }
            return PARSE_STATUS_OK;
         }
         
         case _T('='):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("=="), a_ulPos, TOKEN_EQUAL_EQUAL );
            else
            {
               p_poToken->Set( _T('='), a_ulPos, TOKEN_EQUAL );
               c_poStream->UngetC( a_cChar );
            }
            return PARSE_STATUS_OK;
         }

         case _T(':'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T(':') )
            {
               // Scope resolution.
               p_poToken->Set( _T("::"), a_ulPos, TOKEN_COLON_COLON );
            }
            else
            {
               // Just a regular colon.
               p_poToken->Set( _T(':'), a_ulPos, TOKEN_COLON );
               
               // Push the extra char back.
               c_poStream->UngetC( a_cChar );
            }
            return PARSE_STATUS_OK;
         }
         
         case _T('['):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_LBRACKET );
            return PARSE_STATUS_OK;
         }
         
         case _T(']'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_RBRACKET );
            return PARSE_STATUS_OK;
         }
         
         case _T('{'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_LCURLY );
            return PARSE_STATUS_OK;
         }
         
         case _T('}'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_RCURLY );
            return PARSE_STATUS_OK;
         }

         case _T('('):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_LPAREN );
            return PARSE_STATUS_OK;
         }

         case _T(')'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_RPAREN );
            return PARSE_STATUS_OK;
         }
         
         case _T(','):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_COMMA );
            return PARSE_STATUS_OK;
         }
         
         case _T(';'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_SEMICOLON );
            return PARSE_STATUS_OK;
         }
         
         case _T('~'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_TILDE );
            return PARSE_STATUS_OK;
         }
         
         case _T('?'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_QUESTION );
            return PARSE_STATUS_OK;
         }
         
         case _T('#'):
         {
            p_poToken->Set( a_cChar, a_ulPos, TOKEN_POUND );
            return PARSE_STATUS_OK;
         }
         
         case _T('-'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('>') )
            {
               a_cChar = c_poStream->GetC();
               if( a_cChar == _T('*') )
                  // Dereference pointer to class member.
                  p_poToken->Set( _T("->*"), a_ulPos, TOKEN_RIGHT_POINTER_STAR );
               else
               {
                  // Member selection (pointer).
                  p_poToken->Set( _T("->"), a_ulPos, TOKEN_RIGHT_POINTER );
                  
                  c_poStream->UngetC( a_cChar );
               }
            }
            else
            if( a_cChar == _T('-') )
               // Minus minus.
               p_poToken->Set( _T("--"), a_ulPos, TOKEN_MINUS_MINUS );
            else
            if( a_cChar == _T('=') )
               // Minus equals.
               p_poToken->Set( _T("-="), a_ulPos, TOKEN_MINUS_EQUALS );
            else
            {
               // Just a regular minus!
               p_poToken->Set( _T('-'), a_ulPos, TOKEN_MINUS );
               
               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }
         
         case _T('+'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('+') )
               p_poToken->Set( _T("++"), a_ulPos, TOKEN_PLUS_PLUS );
            else
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("+="), a_ulPos, TOKEN_PLUS_EQUALS );
            else
            {
               // Regular plus.
               p_poToken->Set( _T('+'), a_ulPos, TOKEN_PLUS );

               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }

         case _T('*'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("*="), a_ulPos, TOKEN_MULTIPLY_EQUALS );
            else
            {
               // Regular star.
               p_poToken->Set( _T('*'), a_ulPos, TOKEN_MULTIPLY );

               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }

         case _T('%'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("%="), a_ulPos, TOKEN_MODULUS_EQUALS );
            else
            {
               // Regular modulus.
               p_poToken->Set( _T('%'), a_ulPos, TOKEN_MODULUS );

               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }
         
         case _T('&'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('&') )
               p_poToken->Set( _T("&&"), a_ulPos, TOKEN_AMPERSAND_AMPERSAND );
            else
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("&="), a_ulPos, TOKEN_AMPERSAND_EQUALS );
            else
            {
               // Regular ampersand.
               p_poToken->Set( _T('&'), a_ulPos, TOKEN_AMPERSAND );
               
               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }

         case _T('|'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('|') )
               p_poToken->Set( _T("||"), a_ulPos, TOKEN_BAR_BAR );
            else
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("|="), a_ulPos, TOKEN_BAR_EQUALS );
            else
            {
               // Regular bar.
               p_poToken->Set( _T('|'), a_ulPos, TOKEN_BAR );
               
               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }

         case _T('^'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("^="), a_ulPos, TOKEN_HAT_EQUALS );
            else
            {
               // Regular hat.
               p_poToken->Set( _T('^'), a_ulPos, TOKEN_HAT );
               
               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }

         case _T('!'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('=') )
               p_poToken->Set( _T("!="), a_ulPos, TOKEN_EXCLAMATION_EQUALS );
            else
            {
               // Regular exclamation.
               p_poToken->Set( _T('!'), a_ulPos, TOKEN_EXCLAMATION );

               c_poStream->UngetC( a_cChar );
            }

            return PARSE_STATUS_OK;
         }
         
         case _T('>'):
         {
            a_cChar = c_poStream->GetC();
            if( a_cChar == _T('>') )
            {
               a_cChar = c_poStream->GetC();
               if( a_cChar == _T('=') )
                  p_poToken->Set( _T(">>="), a_ulPos, TOKEN_RIGHT_RIGHT_EQUALS );
               else
               {
                  p_poToken->Set( _T(">>"), a_ulPos, TOKEN_RIGHT_RIGHT );

                  c_poStream->UngetC( a_cChar );
               }
            }
            else
            {
               if( a_cChar == _T('=') )
                  p_poToken->Set( _T(">="), a_ulPos, TOKEN_RIGHT_EQUALS );
               else
               {
                  p_poToken->Set( _T('>'), a_ulPos, TOKEN_RIGHT );

                  c_poStream->UngetC( a_cChar );
               }
            }
            
            return PARSE_STATUS_OK;
         }

         default:
         {
            // The default case handles the rest.
            if( _istdigit( a_cChar ) || a_cChar == _T('.') )
            {
               // Read a number, of some sort.
               //
            
               // We will default it to integer.  If a floating point is found, we'll set it to DECIMAL.
               p_poToken->c_eType = TOKEN_INTEGER;
               p_poToken->c_ulFlags |= TOKEN_FLAG_CONST;

               int a_iCurPos = 0;
               if( a_cChar == _T('0') )
               {
                  a_cChar = c_poStream->GetC();
                  if( a_cChar == _T('x') || a_cChar == _T('X') )
                  {
                     // We have a hex number to deal with.
                     p_poToken->c_szID[0] = _T('0');
                     p_poToken->c_szID[1] = a_cChar;
                     a_iCurPos += 2;
                     a_cChar = c_poStream->GetC();
                     while( isxdigit( a_cChar ) )
                     {
                        p_poToken->c_szID[a_iCurPos] = a_cChar;
                        a_iCurPos++;
                        a_cChar = c_poStream->GetC();
                     }
                     
                     if( a_iCurPos > 2 )
                        c_poStream->UngetC( a_cChar );
                     p_poToken->c_szID[a_iCurPos] = 0; // Null terminate
                     p_poToken->c_ulFlags |= TOKEN_FLAG_HEX;
                  }
                  else
                  {
                     if( _istdigit( a_cChar ) || a_cChar == _T('.') )
                     {
                        // We might have an octal or floating point number to deal with.
                        p_poToken->c_szID[0] = _T('0');
                        a_iCurPos++;
                        while( _istdigit( a_cChar ) )
                        {
                           p_poToken->c_szID[a_iCurPos++] = a_cChar;
                           a_cChar = c_poStream->GetC();
                        }
                        
                        if( a_cChar == _T('.') )
                        {
                           // Uh oh.  We've got a floating point number instead.
                           p_poToken->c_eType = TOKEN_DECIMAL;
                        }
                        else
                        {
                           p_poToken->c_szID[a_iCurPos] = 0; // Null terminate
                           p_poToken->c_ulFlags |= TOKEN_FLAG_OCT;
                           c_poStream->UngetC( a_cChar );

                           // We have to scan the octal number for errors now.
                           for( int i = 0; i<a_iCurPos; ++i )
                           {
                              TCHAR a_cChar = p_poToken->c_szID[i];
                              if( a_cChar - _T('0') > 7 )
                              {
                                 // Error.
                                 // Illegal for base 8;
                                 TCHAR a_acBase[2] = { _T('8'), 0 };
                                 TCHAR a_acIllegal[2] = { a_cChar, 0 };
                                 //Error( ERROR_ILLEGAL_DIGIT, p_poToken, a_acIllegal, a_acBase );
                              }
                           }
                        }
                     }
                     else
                     {
                        // We are a single integer: 0.
                        if( !_istalpha( a_cChar ) )
                           c_poStream->UngetC( a_cChar );
                        p_poToken->c_szID[0] = _T('0');
                        p_poToken->c_szID[1] = _T('\0');
                     }
                  }
                  
                  // Check to see if we are still flagged as an integer.
                  // If we're not, that means we found a '.' and we need to
                  // process a floating point number.
                  // If we are, we'll process any potential suffix.
                  if( p_poToken->c_eType == TOKEN_INTEGER )
                  {
                     // Let's check out a potential suffix.
                     if( _istalpha( a_cChar ) )
                     {
                        a_eStatus = IntegerSuffix( p_poToken, a_cChar );
                     }
                     
                     p_poToken->c_ulPos = a_ulPos;
                     return PARSE_STATUS_OK;
                  }
               }

               // We have a regular decimal integer or floating point.

               BOOL a_bIsFloating = a_cChar == _T('.')?TRUE:FALSE;
               if( a_bIsFloating )
               {
                  // Do a special check for the '.' operator and the ".*" operator.
                  TCHAR a_cSpecialCheck = c_poStream->GetC();
                  if( !_istdigit( a_cSpecialCheck ) )
                  {
                     if( a_cSpecialCheck == _T('*') )
                        // Apply pointer to class member (objects).
                        p_poToken->Set( _T(".*"), a_ulPos, TOKEN_DOT_STAR );
                     else
                     {
                        p_poToken->Set( a_cChar, a_ulPos, TOKEN_DOT );
               
                        c_poStream->UngetC( a_cSpecialCheck );
                     }

                     // We weren't a number, nothing else to do.
                     return PARSE_STATUS_OK;
                  }
                  else
                     c_poStream->UngetC( a_cSpecialCheck );

                  p_poToken->c_eType = TOKEN_DECIMAL;
               }

               while( _istdigit( a_cChar ) || a_cChar == _T('.') )
               {
                  p_poToken->c_szID[a_iCurPos++] = a_cChar;
                  a_cChar = c_poStream->GetC();

                  if( a_cChar == _T('.') )
                  {
                     p_poToken->c_eType = TOKEN_DECIMAL;
                     if( a_bIsFloating )
                        break; // We're done.
                     
                     a_bIsFloating = TRUE;
                  }
               }
               
               if( a_cChar == _T('e') || a_cChar == _T('E') )
               {
                  // Let's process the exponent.
                  a_bIsFloating = TRUE;
                  p_poToken->c_szID[a_iCurPos++] = a_cChar;
                  
                  a_cChar = c_poStream->GetC();
                  if( a_cChar == _T('-') || a_cChar == _T('+') )
                  {
                     // Process the +/-
                     
                     p_poToken->c_szID[a_iCurPos++] = a_cChar;
                     a_cChar = c_poStream->GetC();
                  }

                  if( _istdigit( a_cChar ) )
                  {
                     // Keep reading until we run out of numbers.
                     while( _istdigit( a_cChar ) )
                     {
                        p_poToken->c_szID[a_iCurPos++] = a_cChar;
                        a_cChar = c_poStream->GetC();
                     }
                  }
               }
               
               // Let's check out a potential suffix.
               if( _istalpha( a_cChar ) )
               {
                  if( a_bIsFloating )
                  {
                     // Check floating suffix.
                     switch( a_cChar )
                     {
                        case _T('l'):
                        case _T('L'):
                        {
                           // Check for presence of extraneous chars.
                           a_cChar = c_poStream->GetC();
                           if( _istalpha( a_cChar ) )
                           {
                              // Bad suffix on number.
                              TCHAR a_acSuffix[2] = { a_cChar, 0 };
                              //Error( ERROR_SYNTAX_BAD_SUFFIX, p_poToken, a_acSuffix );
                           }
                           else
                           {
                              // Modify the type.
                              p_poToken->c_ulFlags |= TOKEN_FLAG_LONG;
                           }
                           c_poStream->UngetC( a_cChar );
                           break;
                        }
                        
                        case _T('f'):
                        case _T('F'):
                        {
                           // Check for presence of extraneous chars.
                           a_cChar = c_poStream->GetC();
                           if( _istalpha( a_cChar ) )
                           {
                              // Bad suffix on number.
                              TCHAR a_acSuffix[2] = { a_cChar, 0 };
                              //Error( ERROR_SYNTAX_BAD_SUFFIX, p_poToken, a_acSuffix );
                           }
                           else
                           {
                              // Modify the type.
                              p_poToken->c_ulFlags |= TOKEN_FLAG_FLOAT;
                           }
                           c_poStream->UngetC( a_cChar );
                           break;
                        }

                        default:
                           // Bad suffix on number.
                           TCHAR a_acSuffix[2] = { a_cChar, 0 };
                           //Error( ERROR_SYNTAX_BAD_SUFFIX, p_poToken, a_acSuffix );
                           c_poStream->UngetC( a_cChar );
                           break;
                     }
                  }
                  else
                  {
                     // Check integer suffix.
                     a_eStatus = IntegerSuffix( p_poToken, a_cChar );
                  }
               }
               else
                  c_poStream->UngetC( a_cChar );

               p_poToken->c_szID[a_iCurPos] = 0; // Null terminate
               p_poToken->c_ulPos = a_ulPos;
            }
            else
            if( _istalpha( a_cChar ) || a_cChar == _T('_') )
            {
               p_poToken->c_eType = TOKEN_IDENTIFIER;

               // Read a word.
               int a_iCurPos = 0;
               while( _istalpha( a_cChar ) || _istdigit( a_cChar ) || a_cChar == _T('_') )
               {
                  p_poToken->c_szID[a_iCurPos] = a_cChar;
                  a_cChar = c_poStream->GetC();
                  a_iCurPos++;
               }

               // put the last char back into the stream.
               c_poStream->UngetC( a_cChar );
               p_poToken->c_szID[a_iCurPos] = 0; // Null terminate
               p_poToken->c_ulPos = a_ulPos;
            }
            else
            {
               if( c_ulLine == 530 )
                  int x = 10;
               // I'm not sure what to do with this case yet.
               //TCHAR a_szText[16] = {0};
               //_stprintf( a_szText, _T("Unknown %c\n"), a_cChar );
               //OutputDebugString( a_szText );
               p_poToken->c_szID[0] = a_cChar;
               p_poToken->c_szID[1] = 0;
               p_poToken->c_eType = TOKEN_NONE;
               p_poToken->c_ulPos = a_ulPos;
            }
            return PARSE_STATUS_OK;
         }
      }
   }
}
//
// ------------------------------------------------------------------
//
ParseStatus CLexer::EscapeSequence( CToken * p_poToken, int & p_riCurPos )
{
   ParseStatus a_eStatus = PARSE_STATUS_OK;

   TCHAR a_cChar = c_poStream->GetC();
   
   switch( a_cChar )
   {
      case _T('\''):
         p_poToken->c_szID[p_riCurPos++] = _T('\'');
         break;
      case _T('\"'):
         p_poToken->c_szID[p_riCurPos++] = _T('\"');
         break;
      case _T('\\'):
         p_poToken->c_szID[p_riCurPos++] = _T('\\');
         break;
      case _T('?'):
         p_poToken->c_szID[p_riCurPos++] = _T('\?');
         break;
      case _T('a'):
         p_poToken->c_szID[p_riCurPos++] = _T('\a');
         break;
      case _T('b'):
         p_poToken->c_szID[p_riCurPos++] = _T('\b');
         break;
      case _T('f'):
         p_poToken->c_szID[p_riCurPos++] = _T('\f');
         break;
      case _T('n'):
         p_poToken->c_szID[p_riCurPos++] = _T('\n');
         break;
      case _T('r'):
         p_poToken->c_szID[p_riCurPos++] = _T('\r');
         break;
      case _T('t'):
         p_poToken->c_szID[p_riCurPos++] = _T('\t');
         break;
      case _T('v'):
         p_poToken->c_szID[p_riCurPos++] = _T('\v');
         break;
      
      case _TEOF:
         //Error( ERROR_EOF_UNEXPECTED, p_poToken );
         c_poStream->UngetC( a_cChar );
         break;

      default:
      {
         BOOL a_bEscapeProcessed = FALSE;
         if( _istdigit( a_cChar ) && a_cChar-_T('0') >= 0 && a_cChar-_T('0') < 8 )
         {
            // Octal escape sequence.
            //

            // Put the char back, so the while block can handle it.
            c_poStream->UngetC( a_cChar );
            
            int  a_iDigits = 0;
            long a_lOctal  = 0;
            while( a_cChar-_T('0') >= 0 && a_cChar-_T('0') < 8 && a_iDigits < 3 )
            {
               a_cChar  = c_poStream->GetC();
               a_lOctal = a_lOctal * 8 + a_cChar - _T('0');
               
               a_iDigits++;
            }

            if( a_iDigits )
            {
               p_poToken->c_szID[p_riCurPos++] = (char)a_lOctal;
               a_bEscapeProcessed = TRUE;
               
               // Put the last char back.
               c_poStream->UngetC( a_cChar );
            }
         }
         else
         if( a_cChar == _T('x') )
         {
            // Hexadecimal escape sequence.
            //

            int  a_iDigits = 0;
            long a_lHex    = 0;
            a_cChar        = c_poStream->GetC();
            while( _istxdigit( a_cChar ) )
            {
               if( a_cChar >= _T('0') && a_cChar <= _T('9') )
               {
                  a_lHex = a_lHex * 16 + a_cChar - _T('0');
               }
               else
               if( a_cChar >= _T('a') && a_cChar <= _T('f') )
               {
                  a_lHex = a_lHex * 16 + a_cChar - _T('a') + 10;
               }
               else
               if( a_cChar >= _T('A') && a_cChar <= _T('F'))
               {
                  a_lHex = a_lHex * 16 + a_cChar - _T('A') + 10;
               }
               
               a_cChar = c_poStream->GetC();
               a_iDigits++;
            }
            
            if( a_iDigits )
            {
               p_poToken->c_szID[p_riCurPos++] = (char)a_lHex;
               a_bEscapeProcessed = TRUE;
               
               // Put the last char back.
               c_poStream->UngetC( a_cChar );
            }
         }
         
         if( !a_bEscapeProcessed )
         {
            // Warning.  Unrecognized escape sequence.
            //p_poToken->c_szID[p_riCurPos++] = _T('\\');
            p_poToken->c_szID[p_riCurPos++] = a_cChar;
         }
         break;
      }
   }


   return a_eStatus;
}
//
// ------------------------------------------------------------------
//
ParseStatus CLexer::IntegerSuffix( CToken * p_poToken, TCHAR & p_rcChar )
{
   ParseStatus a_eStatus = PARSE_STATUS_OK;
   
   switch( p_rcChar )
   {
      case _T('l'):
      case _T('L'):
         // Check for presence of 'u' or 'U'.
         p_rcChar = c_poStream->GetC();
         if( p_rcChar == _T('u') || p_rcChar == _T('U') )
         {
            // Modify the type.
            p_poToken->c_ulFlags |= TOKEN_FLAG_ULONG;
         }
         else
         {
            if( _istalpha( p_rcChar ) )
            {
               // Bad suffix on number.
               TCHAR a_acSuffix[2] = { p_rcChar, 0 };
               //Error( ERROR_SYNTAX_BAD_SUFFIX, p_poToken, a_acSuffix );
            }
            c_poStream->UngetC( p_rcChar );
         }
         break;
   
      case _T('u'):
      case _T('U'):
         // Check for presence of 'l' or 'L'.
         p_rcChar = c_poStream->GetC();
         if( p_rcChar == _T('l') || p_rcChar == _T('L') )
         {
            // Modify the type.
            p_poToken->c_ulFlags |= TOKEN_FLAG_ULONG;
         }
         else
         {
            if( _istalpha( p_rcChar ) )
            {
               // Bad suffix on number.
               TCHAR a_acSuffix[2] = { p_rcChar, 0 };
               //Error( ERROR_SYNTAX_BAD_SUFFIX, p_poToken, a_acSuffix );
            }
            c_poStream->UngetC( p_rcChar );
         }
         break;
   
      default:
         // Bad suffix on number.
         TCHAR a_acSuffix[2] = { p_rcChar, 0 };
         //Error( ERROR_SYNTAX_BAD_SUFFIX, p_poToken, a_acSuffix );
         c_poStream->UngetC( p_rcChar );
         break;
   }
   
   return a_eStatus;
}
//
// ------------------------------------------------------------------
//

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Chris Richardson
Software Developer (Senior)
United States United States
I like to program, I like to sail.

| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.150414.1 | Last Updated 30 Jul 2003
Article Copyright 2003 by Chris Richardson
Everything else Copyright © CodeProject, 1999-2015
Layout: fixed | fluid