Click here to Skip to main content
15,892,005 members
Articles / Programming Languages / C++

Wave: a Standard conformant C++ preprocessor library

Rate me:
Please Sign up or sign in to vote.
4.96/5 (58 votes)
10 Jan 200413 min read 399.9K   4.4K   81  
Describes a free and fully Standard conformant C++ preprocessor library
/*=============================================================================
    Wave: A Standard compliant C++ preprocessor

    Copyright (c) 2001-2004 Hartmut Kaiser
    http://spirit.sourceforge.net/

    Use, modification and distribution is subject to the Boost Software
    License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
    http://www.boost.org/LICENSE_1_0.txt)

    See Copyright.txt for full acknowledgements.
=============================================================================*/

#if !defined(IDL_TOKEN_IDS_HPP_414E9A58_F079_4789_8AFF_513815CE475B_INCLUDED)
#define IDL_TOKEN_IDS_HPP_414E9A58_F079_4789_8AFF_513815CE475B_INCLUDED

#include <string>
#include <boost/assert.hpp>

///////////////////////////////////////////////////////////////////////////////
namespace wave {
namespace idllexer {

///////////////////////////////////////////////////////////////////////////////
//  assemble tokenid's
#define TOKEN_FROM_ID(id, cat)   ((id) | (cat))
#define ID_FROM_TOKEN(tok)       ((tok) & ~TokenTypeMask)
#define BASEID_FROM_TOKEN(tok)   ((tok) & ~ExtTokenTypeMask)
#define CATEGORY_FROM_TOKEN(tok) ((tok) & TokenTypeMask)
#define EXTCATEGORY_FROM_TOKEN(tok) ((tok) & ExtTokenTypeMask)
#define IS_CATEGORY(tok, cat)    \
    ((CATEGORY_FROM_TOKEN(tok) == (cat)) ? true : false) \
    /**/
#define IS_EXTCATEGORY(tok, cat)    \
    ((EXTCATEGORY_FROM_TOKEN(tok) == (cat)) ? true : false) \
    /**/

///////////////////////////////////////////////////////////////////////////////
//  the token_category helps to classify the different token types 
enum token_category {
    IdentifierTokenType         = 0x10000000,
    ParameterTokenType          = 0x11000000,
    ExtParameterTokenType       = 0x11100000,
    KeywordTokenType            = 0x20000000,
    OperatorTokenType           = 0x30000000,
    LiteralTokenType            = 0x40000000,
    IntegerLiteralTokenType     = 0x41000000,
    FloatingLiteralTokenType    = 0x42000000,
    StringLiteralTokenType      = 0x43000000,
    CharacterLiteralTokenType   = 0x44000000,
    BoolLiteralTokenType        = 0x45000000,
    PPTokenType                 = 0x50000000,
    PPConditionalTokenType      = 0x50100000,

    UnknownTokenType            = 0xA0000000,
    EOLTokenType                = 0xB0000000,
    EOFTokenType                = 0xC0000000,
    WhiteSpaceTokenType         = 0xD0000000,
    InternalTokenType           = 0xE0000000,
    
    TokenTypeMask               = 0xFF000000,
    AltTokenType                = 0x00100000,
    TriGraphTokenType           = 0x00200000,
    ExtTokenTypeMask            = 0xFFF00000,
    TokenValueMask              = 0x000FFFFF,
    MainTokenMask               = TokenTypeMask|TokenValueMask,
};

///////////////////////////////////////////////////////////////////////////////
//  the token_id assigns unique numbers to the different C++ lexemes
enum token_id {
    T_FIRST_TOKEN  = 256,

    T_AND          = TOKEN_FROM_ID(T_FIRST_TOKEN, OperatorTokenType),
    T_ANDAND       = TOKEN_FROM_ID(257, OperatorTokenType),
    T_ASSIGN       = TOKEN_FROM_ID(258, OperatorTokenType),
    T_OR           = TOKEN_FROM_ID(259, OperatorTokenType),
    T_XOR          = TOKEN_FROM_ID(260, OperatorTokenType),
    T_COMMA        = TOKEN_FROM_ID(261, OperatorTokenType),
    T_COLON        = TOKEN_FROM_ID(262, OperatorTokenType),
    T_DIVIDE       = TOKEN_FROM_ID(263, OperatorTokenType),
    T_DOT          = TOKEN_FROM_ID(264, OperatorTokenType),
    T_EQUAL        = TOKEN_FROM_ID(265, OperatorTokenType),
    T_GREATER      = TOKEN_FROM_ID(266, OperatorTokenType),
    T_GREATEREQUAL = TOKEN_FROM_ID(267, OperatorTokenType),
    T_LEFTBRACE    = TOKEN_FROM_ID(268, OperatorTokenType),
    T_LESS         = TOKEN_FROM_ID(269, OperatorTokenType),
    T_LESSEQUAL    = TOKEN_FROM_ID(270, OperatorTokenType),
    T_LEFTPAREN    = TOKEN_FROM_ID(271, OperatorTokenType),
    T_LEFTBRACKET  = TOKEN_FROM_ID(272, OperatorTokenType),
    T_MINUS        = TOKEN_FROM_ID(273, OperatorTokenType),
    T_MINUSMINUS   = TOKEN_FROM_ID(274, OperatorTokenType),
    T_PERCENT      = TOKEN_FROM_ID(275, OperatorTokenType),
    T_NOT          = TOKEN_FROM_ID(276, OperatorTokenType),
    T_NOTEQUAL     = TOKEN_FROM_ID(277, OperatorTokenType),
    T_OROR         = TOKEN_FROM_ID(278, OperatorTokenType),
    T_PLUS         = TOKEN_FROM_ID(279, OperatorTokenType),
    T_PLUSPLUS     = TOKEN_FROM_ID(280, OperatorTokenType),
    T_QUESTION_MARK = TOKEN_FROM_ID(281, OperatorTokenType),
    T_RIGHTBRACE   = TOKEN_FROM_ID(282, OperatorTokenType),
    T_RIGHTPAREN   = TOKEN_FROM_ID(283, OperatorTokenType),
    T_RIGHTBRACKET = TOKEN_FROM_ID(284, OperatorTokenType),
    T_SEMICOLON    = TOKEN_FROM_ID(285, OperatorTokenType),
    T_SHIFTLEFT    = TOKEN_FROM_ID(286, OperatorTokenType),
    T_SHIFTRIGHT   = TOKEN_FROM_ID(287, OperatorTokenType),
    T_STAR         = TOKEN_FROM_ID(288, OperatorTokenType),
    T_COMPL        = TOKEN_FROM_ID(289, OperatorTokenType),

    T_FALSE        = TOKEN_FROM_ID(290, BoolLiteralTokenType),
    T_TRUE         = TOKEN_FROM_ID(291, BoolLiteralTokenType),

    T_PP_DEFINE    = TOKEN_FROM_ID(292, PPTokenType),
    T_PP_IF        = TOKEN_FROM_ID(293, PPConditionalTokenType),
    T_PP_IFDEF     = TOKEN_FROM_ID(294, PPConditionalTokenType),
    T_PP_IFNDEF    = TOKEN_FROM_ID(295, PPConditionalTokenType),
    T_PP_ELSE      = TOKEN_FROM_ID(296, PPConditionalTokenType),
    T_PP_ELIF      = TOKEN_FROM_ID(297, PPConditionalTokenType),
    T_PP_ENDIF     = TOKEN_FROM_ID(298, PPConditionalTokenType),
    T_PP_ERROR     = TOKEN_FROM_ID(299, PPTokenType),
    T_PP_LINE      = TOKEN_FROM_ID(300, PPTokenType),
    T_PP_PRAGMA    = TOKEN_FROM_ID(301, PPTokenType),
    T_PP_UNDEF     = TOKEN_FROM_ID(302, PPTokenType),
    T_PP_WARNING   = TOKEN_FROM_ID(303, PPTokenType),
    
    T_IDENTIFIER   = TOKEN_FROM_ID(304, IdentifierTokenType),
    
    T_OCTALINT     = TOKEN_FROM_ID(305, IntegerLiteralTokenType),
    T_DECIMALINT   = TOKEN_FROM_ID(306, IntegerLiteralTokenType),
    T_HEXAINT      = TOKEN_FROM_ID(307, IntegerLiteralTokenType),
    T_INTLIT       = TOKEN_FROM_ID(308, IntegerLiteralTokenType),
    T_FLOATLIT     = TOKEN_FROM_ID(309, FloatingLiteralTokenType),
    T_FIXEDPOINTLIT = TOKEN_FROM_ID(310, FloatingLiteralTokenType),
    T_CCOMMENT     = TOKEN_FROM_ID(311, WhiteSpaceTokenType),
    T_CPPCOMMENT   = TOKEN_FROM_ID(312, WhiteSpaceTokenType),
    T_CHARLIT      = TOKEN_FROM_ID(313, CharacterLiteralTokenType),
    T_STRINGLIT    = TOKEN_FROM_ID(314, StringLiteralTokenType),
    T_CONTLINE     = TOKEN_FROM_ID(315, EOLTokenType),
    T_SPACE        = TOKEN_FROM_ID(316, WhiteSpaceTokenType),
    T_SPACE2       = TOKEN_FROM_ID(317, WhiteSpaceTokenType),
    T_NEWLINE      = TOKEN_FROM_ID(318, EOLTokenType),
    
    T_POUND_POUND  = TOKEN_FROM_ID(319, OperatorTokenType),
    T_POUND        = TOKEN_FROM_ID(320, OperatorTokenType),
    T_ANYTOKEN     = TOKEN_FROM_ID(321, UnknownTokenType),

    T_PP_INCLUDE   = TOKEN_FROM_ID(322, PPTokenType),
    T_PP_QHEADER   = TOKEN_FROM_ID(323, PPTokenType),
    T_PP_HHEADER   = TOKEN_FROM_ID(324, PPTokenType),

    T_EOF          = TOKEN_FROM_ID(325, EOFTokenType),      // end of file reached
    T_EOI          = TOKEN_FROM_ID(326, EOFTokenType),      // end of input reached
    
    T_LAST_TOKEN_ID,
    T_LAST_TOKEN = ID_FROM_TOKEN(T_LAST_TOKEN_ID),
    
// pseudo tokens to help streamlining macro replacement, these should not 
// returned from the lexer nor should these be returned from the pp-iterator
    T_NONREPLACABLE_IDENTIFIER = TOKEN_FROM_ID(T_LAST_TOKEN+1, IdentifierTokenType),
    T_PLACEHOLDER = TOKEN_FROM_ID(T_LAST_TOKEN+2, WhiteSpaceTokenType),
    T_PLACEMARKER = TOKEN_FROM_ID(T_LAST_TOKEN+3, InternalTokenType),
    T_PARAMETERBASE = TOKEN_FROM_ID(T_LAST_TOKEN+4, ParameterTokenType),
    T_EXTPARAMETERBASE = TOKEN_FROM_ID(T_LAST_TOKEN+5, ExtParameterTokenType),
};

///////////////////////////////////////////////////////////////////////////////
//  return a token name
inline std::string
get_token_name(token_id tokid)
{
//  Table of token names
//
//      Please note that the sequence of token names must match the sequence of
//      token id's defined in then enum token_id above.
static char const *tok_names[] = {
        "AND",
        "ANDAND",
        "ASSIGN",
        "OR",
        "XOR",
        "COMMA",
        "COLON",
        "DIVIDE",
        "DOT",
        "EQUAL",
        "GREATER",
        "GREATEREQUAL",
        "LEFTBRACE",
        "LESS",
        "LESSEQUAL",
        "LEFTPAREN",
        "LEFTBRACKET",
        "MINUS",
        "MINUSMINUS",
        "PERCENT",
        "NOT",
        "NOTEQUAL",
        "OROR",
        "PLUS",
        "PLUSPLUS",
        "QUESTION_MARK",
        "RIGHTBRACE",
        "RIGHTPAREN",
        "RIGHTBRACKET",
        "SEMICOLON",
        "SHIFTLEFT",
        "SHIFTRIGHT",
        "STAR",
        "COMPL",

        "FALSE",
        "TRUE",

        "PP_DEFINE",
        "PP_IF",
        "PP_IFDEF",
        "PP_IFNDEF",
        "PP_ELSE",
        "PP_ELIF",
        "PP_ENDIF",
        "PP_ERROR",
        "PP_LINE",
        "PP_PRAGMA",
        "PP_UNDEF",
        "PP_WARNING",
        
        "IDENTIFIER",
        
        "OCTALINT",
        "DECIMALINT",
        "HEXAINT",
        "INTLIT",
        "FLOATLIT",
        "FIXEDPOINT",
        "CCOMMENT",
        "CPPCOMMENT",
        "CHARLIT",
        "STRINGLIT",
        "CONTLINE",
        "SPACE",
        "SPACE2",
        "NEWLINE",
        
        "POUND_POUND",
        "POUND",
        "ANYTOKEN",
        
        "PP_INCLUDE",
        "PP_QHEADER",
        "PP_HHEADER",
        
        "EOF",
        "EOI",
    };   
     
    unsigned int id = BASEID_FROM_TOKEN(tokid)-T_FIRST_TOKEN;

    BOOST_ASSERT(id < T_LAST_TOKEN-T_FIRST_TOKEN);
    return tok_names[id];
}

///////////////////////////////////////////////////////////////////////////////
}   // namespace idllexer 
}   // namespace wave

#endif // !defined(IDL_TOKEN_IDS_HPP_414E9A58_F079_4789_8AFF_513815CE475B_INCLUDED)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
United States United States
Actively involved in Boost and the development of the Spirit parser construction framework.

Comments and Discussions