Wave: a Standard conformant C++ preprocessor library

Hartmut Kaiser

Rate me:

4.96/5 (58 votes)

10 Jan 200413 min read

395.6K

4.4K

Describes a free and fully Standard conformant C++ preprocessor library

wave_preprocessor_src.zip
- wave
  - doc
    - acknowledgements.html
    - class_reference_context.html
    - class_reference_fileposition.html
    - class_reference_inputpolicy.html
    - class_reference_lexer.html
    - class_reference_tokentype.html
    - class_reference_tracepolicy.html
    - compiletime_config.html
    - index.html
    - introduction.html
    - macro_expansion_process.html
    - predefined_macros.html
    - preface.html
    - preliminary_cpp0x_support.html
    - quickstart.html
    - references.html
    - supported_pragmas.html
    - theme
      - bkd.gif
      - bkd2.gif
      - bullet.gif
      - l_arr.gif
      - l_arr_disabled.gif
      - r_arr.gif
      - r_arr_disabled.gif
      - style.css
      - u_arr.gif
      - uc.gif
      - wave.gif
    - token_ids.html
    - tracing_facility.html
    - wave_driver.html
  - test
    - boost-build.jam
    - cpp_tokens
    - Jamfile.v2
    - list_includes
    - project-root.jam
    - wave.vcproj
    - wave
      - cpp.cpp
      - cpp.hpp
      - cpp_config.hpp
      - cpp_version.hpp
      - instantiate_cpp_exprgrammar.cpp
      - instantiate_cpp_grammar.cpp
      - instantiate_cpp_literalgrammars.cpp
      - instantiate_defined_grammar.cpp
      - instantiate_predef_macros.cpp
      - instantiate_re2c_idllexer.cpp
      - instantiate_re2c_lexer.cpp
      - instantiate_slex_lexer.cpp
      - Jamfile
      - Jamfile.v2
      - runtests.sh
      - test_files
        
        scope1.cpp
        
        scope10.cpp
        
        scope11.cpp
        
        scope12.cpp
        
        scope13.cpp
        
        scope14.cpp
        
        scope15.cpp
        
        scope16.cpp
        
        scope17.cpp
        
        scope18.cpp
        
        scope19.cpp
        
        scope2.cpp
        
        scope20.cpp
        
        scope21.cpp
        
        scope22.cpp
        
        scope23.cpp
        
        scope24.cpp
        
        scope25.cpp
        
        scope26.cpp
        
        scope27.cpp
        
        scope3.cpp
        
        scope4.cpp
        
        scope5.cpp
        
        scope6.cpp
        
        scope7.cpp
        
        scope8.cpp
        
        scope9.cpp
        
        test1.cpp
        
        test10.cpp
        
        test11.cpp
        
        test12.cpp
        
        test13.cpp
        
        test14.cpp
        
        test15.cpp
        
        test16.cpp
        
        test17.cpp
        
        test18.cpp
        
        test19.cpp
        
        test2.cpp
        
        test20.cpp
        
        test21.cpp
        
        test22.cpp
        
        test23.cpp
        
        test24.cpp
        
        test25.cpp
        
        test26.cpp
        
        test27.cpp
        
        test28.cpp
        
        test29.cpp
        
        test3.cpp
        
        test30.cpp
        
        test31.cpp
        
        test32.cpp
        
        test33.cpp
        
        test34.cpp
        
        test35.cpp
        
        test36.cpp
        
        test37.cpp
        
        test38.cpp
        
        test39.cpp
        
        test4.cpp
        
        test40.cpp
        
        test41.cpp
        
        test41_0.hpp
        
        test41_1.hpp
        
        test42.cpp
        
        test43.cpp
        
        test44.cpp
        
        test45.cpp
        
        test46.cpp
        
        test47.cpp
        
        test48.cpp
        
        test49.cpp
        
        test5.cpp
        
        test50.cpp
        
        test51.cpp
        
        test52.cpp
        
        test53.cpp
        
        test54.cpp
        
        test55.cpp
        
        test56.cpp
        
        test57.cpp
        
        test58.cpp
        
        test59.cpp
        
        test6.cpp
        
        test60.cpp
        
        test61.cpp
        
        test62.cpp
        
        test63.cpp
        
        test64.cpp
        
        test65.cpp
        
        test66.cpp
        
        test67.cpp
        
        test68.cpp
        
        test69.cpp
        
        test7.cpp
        
        test70.cpp
        
        test71.cpp
        
        test72.cpp
        
        test73.cpp
        
        test74.cpp
        
        test75.cpp
        
        test76.cpp
        
        test77.cpp
        
        test78.cpp
        
        test79.cpp
        
        test8.cpp
        
        test9.cpp
      - trace_macro_expansion.hpp
  - wave
    - cpp_context.hpp
    - cpp_exceptions.hpp
    - cpp_iteration_context.hpp
    - cpplexer
      - cpp_lex_interface.hpp
      - cpp_lex_iterator.hpp
      - cpp_lex_token.hpp
      - cpp_token_ids.hpp
      - cpplexer_exceptions.hpp
      - macro_scoping_tokens.hpp
      - re2c_functor.hpp
      - re2clex
        
        aq.cpp
        
        aq.hpp
        
        cpp.re
        
        cpp.re.cpp
        
        cpp_re2c_lexer.hpp
        
        scanner.hpp
        
        test
        
        lextest.in
        
        lextest.output
        
        run_tests.sh
        
        test_lexer.c
        
        test_lexer.in
        
        test_lexer.output
      - slex
        
        cpp_slex_lexer.hpp
      - slex_functor.hpp
      - test
        
        instantiate_re2c_lexer.cpp
        
        instantiate_slex_lexer.cpp
        
        lextest.in
        
        lextest.re2c.output
        
        lextest.slex.output
        
        run_tests.sh
        
        test_re2c_lexer.cpp
        
        test_re2c_lexer.hpp
        
        test_slex_lexer.cpp
        
        test_slex_lexer.hpp
      - validate_universal_char.hpp
    - grammars
    - idllexer
      - idl_lex_interface.hpp
      - idl_lex_iterator.hpp
      - idl_lex_token.hpp
      - idl_token_ids.hpp
      - re2clex
        
        aq.cpp
        
        aq.hpp
        
        idl.re
        
        idl.re.cpp
        
        idl_re2c_lexer.hpp
        
        scanner.hpp
    - language_support.hpp
    - lex_iterator.hpp
    - lex_token.hpp
    - token_ids.hpp
    - trace_policies.hpp
    - util
    - wave.hpp
    - wave_version.hpp
wave_preprocessor_demo.zip
- wave.exe
wave_preprocessor_src1.zip
- acknowledgements.html
- class_reference_context.html
- class_reference_fileposition.html
- class_reference_inputpolicy.html
- class_reference_lexer.html
- class_reference_tokentype.html
- class_reference_tracepolicy.html
- compiletime_config.html
- index.html
- introduction.html
- macro_expansion_process.html
- predefined_macros.html
- preface.html
- preliminary_cpp0x_support.html
- quickstart.html
- references.html
- supported_pragmas.html
- bkd.gif
- bkd2.gif
- bullet.gif
- l_arr.gif
- l_arr_disabled.gif
- r_arr.gif
- r_arr_disabled.gif
- style.css
- u_arr.gif
- uc.gif
- wave.gif
- token_ids.html
- tracing_facility.html
- wave_driver.html
- boost-build.jam
- cpp_tokens.cpp
- cpp_tokens_config.hpp
- instantiate_cpp_exprgrammar.cpp
- instantiate_cpp_grammar.cpp
- instantiate_cpp_literalgrammars.cpp
- instantiate_re2c_lexer.cpp
- instantiate_slex_lexer.cpp
- Jamfile.v2
- instantiate_cpp_exprgrammar.cpp
- instantiate_cpp_grammar.cpp
- instantiate_cpp_literalgrammars.cpp
- instantiate_re2c_lexer.cpp
- instantiate_slex_lexer.cpp
- list_includes.cpp
- list_includes.hpp
- list_includes_version.hpp
- Readme.txt
- project-root.jam
- wave.vcproj
- cpp.cpp
- cpp.hpp
- cpp_config.hpp
- cpp_version.hpp
- instantiate_cpp_exprgrammar.cpp
- instantiate_cpp_grammar.cpp
- instantiate_cpp_literalgrammars.cpp
- instantiate_defined_grammar.cpp
- instantiate_predef_macros.cpp
- instantiate_re2c_lexer.cpp
- instantiate_slex_lexer.cpp
- Jamfile
- Jamfile.v2
- runtests.sh
- test1.cpp
- test10.cpp
- test11.cpp
- test12.cpp
- test13.cpp
- test14.cpp
- test15.cpp
- test16.cpp
- test17.cpp
- test18.cpp
- test19.cpp
- test2.cpp
- test20.cpp
- test21.cpp
- test22.cpp
- test23.cpp
- test24.cpp
- test25.cpp
- test26.cpp
- test27.cpp
- test28.cpp
- test29.cpp
- test3.cpp
- test30.cpp
- test31.cpp
- test32.cpp
- test33.cpp
- test4.cpp
- test5.cpp
- test6.cpp
- test7.cpp
- test8.cpp
- test9.cpp
- trace_macro_expansion.hpp
- cpp_context.hpp
- cpp_exceptions.hpp
- cpp_iteration_context.hpp
- cpp_lex_interface.hpp
- cpp_lex_iterator.hpp
- cpp_lex_token.hpp
- cpp_token_ids.hpp
- cpplexer_exceptions.hpp
- macro_scoping_tokens.hpp
- re2c_functor.hpp
- aq.cpp
- aq.hpp
- cpp.re
- cpp.re.cpp
- cpp_re2c_lexer.hpp
- scanner.hpp
- lextest.in
- lextest.output
- run_tests.sh
- test_lexer.c
- test_lexer.in
- test_lexer.output
- cpp_slex_lexer.hpp
- slex_functor.hpp
- instantiate_re2c_lexer.cpp
- instantiate_slex_lexer.cpp
- lextest.in
- lextest.re2c.output
- lextest.slex.output
- run_tests.sh
- test_re2c_lexer.cpp
- test_re2c_lexer.hpp
- test_slex_lexer.cpp
- test_slex_lexer.hpp
- validate_universal_char.hpp
- cpp_chlit_grammar.hpp
- cpp_defined_grammar.hpp
- cpp_defined_grammar_gen.hpp
- cpp_expression_grammar.hpp
- cpp_expression_grammar_gen.hpp
- cpp_floatlit_grammar.hpp
- cpp_grammar.hpp
- cpp_grammar_gen.hpp
- cpp_intlit_grammar.hpp
- cpp_literal_grammar_gen.hpp
- cpp_predef_macros_gen.hpp
- cpp_predef_macros_grammar.hpp
- language_support.hpp
- macro_trace_policies.hpp
- cpp_ifblock.hpp
- cpp_include_pathes.hpp
- cpp_iterator.hpp
- cpp_macromap.hpp
- eat_whitespace.hpp
- file_position.hpp
- flex_string.hpp
- insert_whitespace_detection.hpp
- interpret_pragma.hpp
- iteration_context.hpp
- macro_definition.hpp
- macro_helpers.hpp
- pattern_parser.hpp
- symbol_table.hpp
- time_conversion_helper.hpp
- transform_iterator.hpp
- unput_queue_iterator.hpp
- wave_version.hpp
wave_preprocessor_demo1.zip
- wave.exe

/*=============================================================================
    Wave: A Standard compliant C++ preprocessor

    SLex (Spirit Lex) based C++ lexer
    
    Copyright (c) 2001-2003 Hartmut Kaiser
    http://spirit.sourceforge.net/

    Permission to copy, use, modify, sell and distribute this software
    is granted provided this copyright notice appears in all copies.
    This software is provided "as is" without express or implied
    warranty, and with no claim as to its suitability for any purpose.

    See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/

#if !defined(CPP_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
#define CPP_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED

#include <string>
#if defined(BOOST_SPIRIT_DEBUG)
#include <iostream>
#endif // defined(BOOST_SPIRIT_DEBUG)

#include <boost/spirit/core.hpp>
#include <boost/spirit/core/assert.hpp>

#include <libs/spirit/example/application/slex/lexer.hpp>   // "spirit/lexer.hpp"

#include "wave/util/file_position.hpp"
#include "wave/util/time_conversion_helper.hpp"
#include "wave/cpplexer/validate_universal_char.hpp"
#include "wave/cpplexer/cpp_token_ids.hpp"
#include "wave/cpplexer/cpp_lex_token.hpp"
#include "wave/cpplexer/cpp_lex_interface.hpp"
#include "wave/cpplexer/macro_scoping_tokens.hpp"

#include "wave/language_support.hpp"

///////////////////////////////////////////////////////////////////////////////
namespace wave {
namespace cpplexer {
namespace slex {

///////////////////////////////////////////////////////////////////////////////
// 
//  encapsulation of the boost::spirit::slex based cpp lexer
//
///////////////////////////////////////////////////////////////////////////////

template <typename IteratorT, typename PositionT>
class lexer 
:   public boost::spirit::lexer<
        wave::util::position_iterator<IteratorT, PositionT> >
{
public:

    typedef wave::util::position_iterator<IteratorT, PositionT>  iterator_t;
    typedef typename std::iterator_traits<IteratorT>::value_type    char_t;
    typedef boost::spirit::lexer<iterator_t>        base_t;
    typedef wave::cpplexer::lex_token<PositionT>    token_t;
    
    lexer();
    void init_dfa(wave::language_support language);

// get time of last compilation
    static std::time_t get_compilation_time() 
        { return compilation_time.get_time(); }

private:
// initialization data (regular expressions for the token definitions)
    struct lexer_data {
        token_id tokenid;                       // token data
        char_t const *tokenregex;               // associated token to match
        typename base_t::callback_t tokencb;    // associated callback function
        unsigned int lexerstate;                // valid for lexer state
    };
    
    static lexer_data const init_data[];        // common patterns
    static lexer_data const init_data_cpp[];    // C++ only patterns
#if defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
    static lexer_data const init_data_cpp0x[];  // C++0x only patterns
#endif // defined(WAVE_ENABLE_CPP0X_EXTENSIONS)

// helper for calculation of the time of last compilation
    static wave::util::time_conversion_helper compilation_time;
};

///////////////////////////////////////////////////////////////////////////////
//  data required for initialization of the lexer (token definitions)
#define OR      "|"
#define Q(c)    "\\" c
#define TRI(c)  Q("?") Q("?") c

// definition of some subtoken regexps to simplify the regex definitions
#define BLANK           "[ \\t]"
#define CCOMMENT        \
    Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")
        
#define PPSPACE         "(" BLANK OR CCOMMENT ")*"

#define OCTALDIGIT      "[0-7]"
#define DIGIT           "[0-9]"
#define HEXDIGIT        "[0-9a-fA-F]"
#define SIGN            "[-+]?"
#define EXPONENT        "(" "[eE]" SIGN "[0-9]+" ")"

#define INTEGER_SUFFIX  "(" "[uU][lL]?|[lL][uU]?" ")"
#define FLOAT_SUFFIX    "(" "[fF][lL]?|[lL][fF]?" ")"
#define CHAR_SPEC       "L?"

#define BACKSLASH       "(" Q("\\") OR TRI(Q("/")) ")"
#define ESCAPESEQ       BACKSLASH "(" \
                            "[abfnrtv?'\"]" OR \
                            BACKSLASH OR \
                            "x" HEXDIGIT "+" OR \
                            OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
                        ")"
#define HEXQUAD         HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT 
#define UNIVERSALCHAR   BACKSLASH "(" \
                            "u" HEXQUAD OR \
                            "U" HEXQUAD HEXQUAD \
                        ")" 

#define POUNDDEF        "(" "#" OR TRI("=") OR Q("%:") ")"
#define NEWLINEDEF      "(" "\\n" OR "\\r" OR "\\r\\n" ")"

///////////////////////////////////////////////////////////////////////////////
//  sexer state constants
#define LEXER_STATE_NORMAL  0
#define LEXER_STATE_PP      1

#define NUM_LEXER_STATES    1

//  helper for initializing token data
#define TOKEN_DATA(id, regex) \
    { T_##id, regex, 0, LEXER_STATE_NORMAL }

#define TOKEN_DATA_EX(id, regex, callback) \
    { T_##id, regex, callback, LEXER_STATE_NORMAL }

///////////////////////////////////////////////////////////////////////////////
// common C++/C99 token definitions
template <typename IteratorT, typename PositionT>
typename lexer<IteratorT, PositionT>::lexer_data const 
lexer<IteratorT, PositionT>::init_data[] = 
{
    TOKEN_DATA(AND, "&"),
    TOKEN_DATA(ANDAND, "&&"),
    TOKEN_DATA(ASSIGN, "="),
    TOKEN_DATA(ANDASSIGN, "&="),
    TOKEN_DATA(OR, Q("|")),
    TOKEN_DATA(OR_TRIGRAPH, TRI("!")),
    TOKEN_DATA(ORASSIGN, Q("|=") OR TRI("!=")),
    TOKEN_DATA(XOR, Q("^")),
    TOKEN_DATA(XOR_TRIGRAPH, TRI("'")),
    TOKEN_DATA(XORASSIGN, Q("^=") OR TRI("'=")),
    TOKEN_DATA(COMMA, ","),
    TOKEN_DATA(COLON, ":"),
    TOKEN_DATA(DIVIDE, Q("/")),
    TOKEN_DATA(DIVIDEASSIGN, Q("/=")),
    TOKEN_DATA(DOT, Q(".")),
    TOKEN_DATA(ELLIPSIS, Q(".") Q(".") Q(".")),
    TOKEN_DATA(EQUAL, "=="),
    TOKEN_DATA(GREATER, ">"),
    TOKEN_DATA(GREATEREQUAL, ">="),
    TOKEN_DATA(LEFTBRACE, Q("{")),
    TOKEN_DATA(LEFTBRACE_ALT, "<" Q("%")),
    TOKEN_DATA(LEFTBRACE_TRIGRAPH, TRI("<")),
    TOKEN_DATA(LESS, "<"),
    TOKEN_DATA(LESSEQUAL, "<="),
    TOKEN_DATA(LEFTPAREN, Q("(")),
    TOKEN_DATA(LEFTBRACKET, Q("[")),
    TOKEN_DATA(LEFTBRACKET_ALT, "<:"),
    TOKEN_DATA(LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
    TOKEN_DATA(MINUS, Q("-")),
    TOKEN_DATA(MINUSASSIGN, Q("-=")),
    TOKEN_DATA(MINUSMINUS, Q("-") Q("-")),
    TOKEN_DATA(PERCENT, Q("%")),
    TOKEN_DATA(PERCENTASSIGN, Q("%=")),
    TOKEN_DATA(NOT, "!"),
    TOKEN_DATA(NOTEQUAL, "!="),
    TOKEN_DATA(OROR, Q("|") Q("|") OR TRI("!") Q("|") OR Q("|") TRI("!") OR 
                TRI("!") TRI("!")),
    TOKEN_DATA(PLUS, Q("+")),
    TOKEN_DATA(PLUSASSIGN, Q("+=")),
    TOKEN_DATA(PLUSPLUS, Q("+") Q("+")),
    TOKEN_DATA(ARROW, Q("->")),
    TOKEN_DATA(QUESTION_MARK, Q("?")),
    TOKEN_DATA(RIGHTBRACE, Q("}")),
    TOKEN_DATA(RIGHTBRACE_ALT, Q("%>")),
    TOKEN_DATA(RIGHTBRACE_TRIGRAPH, TRI(">")),
    TOKEN_DATA(RIGHTPAREN, Q(")")),
    TOKEN_DATA(RIGHTBRACKET, Q("]")),
    TOKEN_DATA(RIGHTBRACKET_ALT, ":>"),
    TOKEN_DATA(RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
    TOKEN_DATA(SEMICOLON, ";"),
    TOKEN_DATA(SHIFTLEFT, "<<"),
    TOKEN_DATA(SHIFTLEFTASSIGN, "<<="),
    TOKEN_DATA(SHIFTRIGHT, ">>"),
    TOKEN_DATA(SHIFTRIGHTASSIGN, ">>="),
    TOKEN_DATA(STAR, Q("*")),
    TOKEN_DATA(COMPL, Q("~")),
    TOKEN_DATA(COMPL_TRIGRAPH, TRI("-")),
    TOKEN_DATA(STARASSIGN, Q("*=")),
    TOKEN_DATA(ASM, "asm"),
    TOKEN_DATA(AUTO, "auto"),
    TOKEN_DATA(BOOL, "bool"),
    TOKEN_DATA(FALSE, "false"),
    TOKEN_DATA(TRUE, "true"),
    TOKEN_DATA(BREAK, "break"),
    TOKEN_DATA(CASE, "case"),
    TOKEN_DATA(CATCH, "catch"),
    TOKEN_DATA(CHAR, "char"),
    TOKEN_DATA(CLASS, "class"),
    TOKEN_DATA(CONST, "const"),
    TOKEN_DATA(CONSTCAST, "const_cast"),
    TOKEN_DATA(CONTINUE, "continue"),
    TOKEN_DATA(DEFAULT, "default"),
//    TOKEN_DATA(DEFINED, "defined"),
    TOKEN_DATA(DELETE, "delete"),
    TOKEN_DATA(DO, "do"),
    TOKEN_DATA(DOUBLE, "double"),
    TOKEN_DATA(DYNAMICCAST, "dynamic_cast"),
    TOKEN_DATA(ELSE, "else"),
    TOKEN_DATA(ENUM, "enum"),
    TOKEN_DATA(EXPLICIT, "explicit"),
    TOKEN_DATA(EXPORT, "export"),
    TOKEN_DATA(EXTERN, "extern"),
    TOKEN_DATA(FLOAT, "float"),
    TOKEN_DATA(FOR, "for"),
    TOKEN_DATA(FRIEND, "friend"),
    TOKEN_DATA(GOTO, "goto"),
    TOKEN_DATA(IF, "if"),
    TOKEN_DATA(INLINE, "inline"),
    TOKEN_DATA(INT, "int"),
    TOKEN_DATA(LONG, "long"),
    TOKEN_DATA(MUTABLE, "mutable"),
    TOKEN_DATA(NAMESPACE, "namespace"),
    TOKEN_DATA(NEW, "new"),
    TOKEN_DATA(OPERATOR, "operator"),
    TOKEN_DATA(PRIVATE, "private"),
    TOKEN_DATA(PROTECTED, "protected"),
    TOKEN_DATA(PUBLIC, "public"),
    TOKEN_DATA(REGISTER, "register"),
    TOKEN_DATA(REINTERPRETCAST, "reinterpret_cast"),
    TOKEN_DATA(RETURN, "return"),
    TOKEN_DATA(SHORT, "short"),
    TOKEN_DATA(SIGNED, "signed"),
    TOKEN_DATA(SIZEOF, "sizeof"),
    TOKEN_DATA(STATIC, "static"),
    TOKEN_DATA(STATICCAST, "static_cast"),
    TOKEN_DATA(STRUCT, "struct"),
    TOKEN_DATA(SWITCH, "switch"),
    TOKEN_DATA(TEMPLATE, "template"),
    TOKEN_DATA(THIS, "this"),
    TOKEN_DATA(THROW, "throw"),
    TOKEN_DATA(TRY, "try"),
    TOKEN_DATA(TYPEDEF, "typedef"),
    TOKEN_DATA(TYPEID, "typeid"),
    TOKEN_DATA(TYPENAME, "typename"),
    TOKEN_DATA(UNION, "union"),
    TOKEN_DATA(UNSIGNED, "unsigned"),
    TOKEN_DATA(USING, "using"),
    TOKEN_DATA(VIRTUAL, "virtual"),
    TOKEN_DATA(VOID, "void"),
    TOKEN_DATA(VOLATILE, "volatile"),
    TOKEN_DATA(WCHART, "wchar_t"),
    TOKEN_DATA(WHILE, "while"),
    TOKEN_DATA(PP_DEFINE, POUNDDEF PPSPACE "define"),
    TOKEN_DATA(PP_IF, POUNDDEF PPSPACE "if"),
    TOKEN_DATA(PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
    TOKEN_DATA(PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
    TOKEN_DATA(PP_ELSE, POUNDDEF PPSPACE "else"),
    TOKEN_DATA(PP_ELIF, POUNDDEF PPSPACE "elif"),
    TOKEN_DATA(PP_ENDIF, POUNDDEF PPSPACE "endif"),
    TOKEN_DATA(PP_ERROR, POUNDDEF PPSPACE "error"),
    TOKEN_DATA(PP_QHEADER, POUNDDEF PPSPACE "include" PPSPACE Q("\"") "[^\\n\\r\"]+" Q("\"")),
    TOKEN_DATA(PP_HHEADER, POUNDDEF PPSPACE "include" PPSPACE "<" "[^\\n\\r>]+" ">"),
    TOKEN_DATA(PP_INCLUDE, POUNDDEF PPSPACE "include" PPSPACE),
    TOKEN_DATA(PP_LINE, POUNDDEF PPSPACE "line"),
    TOKEN_DATA(PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
    TOKEN_DATA(PP_UNDEF, POUNDDEF PPSPACE "undef"),
    TOKEN_DATA(PP_WARNING, POUNDDEF PPSPACE "warning"),
    TOKEN_DATA(IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
//  TOKEN_DATA(OCTALINT, "0" OCTALDIGIT "*" INTEGER_SUFFIX "?"),
//  TOKEN_DATA(DECIMALINT, "[1-9]" DIGIT "*" INTEGER_SUFFIX "?"),
//  TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),
    TOKEN_DATA(INTLIT, "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR \
            "[1-9]" DIGIT "*" ")" INTEGER_SUFFIX "?"),
    TOKEN_DATA(FLOATLIT, 
        "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")" 
        EXPONENT "?" FLOAT_SUFFIX "?" OR
        DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
    TOKEN_DATA(CCOMMENT, CCOMMENT),
    TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),
    TOKEN_DATA(CHARLIT, CHAR_SPEC "'" 
                "(" ESCAPESEQ OR "[^\\n\\r']" OR UNIVERSALCHAR ")+" "'"),
    TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"") 
                "(" ESCAPESEQ OR "[^\\n\\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
    TOKEN_DATA(SPACE, BLANK "+"),
    TOKEN_DATA(SPACE2, "[\\v\\f]+"),
    TOKEN_DATA(CONTLINE, Q("\\") "\\n"), 
    TOKEN_DATA(NEWLINE, NEWLINEDEF),
    TOKEN_DATA(POUND_POUND, "##"),
    TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),
    TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
    TOKEN_DATA(POUND, "#"),
    TOKEN_DATA(POUND_ALT, Q("%:")),
    TOKEN_DATA(POUND_TRIGRAPH, TRI("=")),
    TOKEN_DATA(ANY, "."),
#if defined(WAVE_SUPPORT_MS_EXTENSIONS)
    TOKEN_DATA(MSEXINT8, "__int8"),
    TOKEN_DATA(MSEXINT16, "__int16"),
    TOKEN_DATA(MSEXINT32, "__int32"),
    TOKEN_DATA(MSEXINT64, "__int64"),
    TOKEN_DATA(MSEXBASED, "_?" "_based"),
    TOKEN_DATA(MSEXDECLSPEC, "_?" "_declspec"),
    TOKEN_DATA(MSEXCDECL, "_?" "_cdecl"),
    TOKEN_DATA(MSEXFASTCALL, "_?" "_fastcall"),
    TOKEN_DATA(MSEXSTDCALL, "_?" "_stdcall"),
    TOKEN_DATA(MSEXTRY , "__try"),
    TOKEN_DATA(MSEXEXCEPT, "__except"),
    TOKEN_DATA(MSEXFINALLY, "__finally"),
    TOKEN_DATA(MSEXLEAVE, "__leave"),
    TOKEN_DATA(MSEXINLINE, "_?" "_inline"),
    TOKEN_DATA(MSEXASM, "_?" "_asm"),
#endif // defined(WAVE_SUPPORT_MS_EXTENSIONS)
#if defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
    TOKEN_DATA(PP_REGION, POUNDDEF PPSPACE WAVE_PP_REGION),
    TOKEN_DATA(PP_ENDREGION, POUNDDEF PPSPACE WAVE_PP_ENDREGION),
    TOKEN_DATA(PP_IMPORT, POUNDDEF PPSPACE WAVE_PP_IMPORT),
#endif // defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
    { token_id(0) }       // this should be the last entry
};

///////////////////////////////////////////////////////////////////////////////
// C++ only token definitions
template <typename IteratorT, typename PositionT>
typename lexer<IteratorT, PositionT>::lexer_data const 
lexer<IteratorT, PositionT>::init_data_cpp[] = 
{
    TOKEN_DATA(AND_ALT, "bitand"),
    TOKEN_DATA(ANDAND_ALT, "and"),
    TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),
    TOKEN_DATA(OR_ALT, "bitor"),
    TOKEN_DATA(ORASSIGN_ALT, "or_eq"),
    TOKEN_DATA(OROR_ALT, "or"),
    TOKEN_DATA(XOR_ALT, "xor"),
    TOKEN_DATA(XORASSIGN_ALT, "xor_eq"),
    TOKEN_DATA(NOT_ALT, "not"),
    TOKEN_DATA(NOTEQUAL_ALT, "not_eq"),
    TOKEN_DATA(COMPL_ALT, "compl"),
    TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),
    TOKEN_DATA(DOTSTAR, Q(".") Q("*")),
    TOKEN_DATA(COLON_COLON, "::"),
    { token_id(0) }       // this should be the last entry
};

#if defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
///////////////////////////////////////////////////////////////////////////////
// C++0x only token definitions
template <typename IteratorT, typename PositionT>
typename lexer<IteratorT, PositionT>::lexer_data const 
lexer<IteratorT, PositionT>::init_data_cpp0x[] = 
{
    TOKEN_DATA(COMMA_ALT, "__comma__"),
    TOKEN_DATA(LEFTPAREN_ALT, "__lparen__"),
    TOKEN_DATA(RIGHTPAREN_ALT, "__rparen__"),
    { token_id(0) }       // this should be the last entry
};
#endif // defined(WAVE_ENABLE_CPP0X_EXTENSIONS)

///////////////////////////////////////////////////////////////////////////////
//  undefine macros, required for regular expression definitions
#undef POUNDDEF
#undef CCOMMENT
#undef PPSPACE
#undef DIGIT
#undef OCTALDIGIT
#undef HEXDIGIT
#undef SIGN
#undef EXPONENT
#undef INTEGER_SUFFIX
#undef FLOAT_SUFFIX
#undef CHAR_SPEC
#undef BACKSLASH    
#undef ESCAPESEQ    
#undef HEXQUAD      
#undef UNIVERSALCHAR

#undef Q
#undef TRI
#undef OR

#undef TOKEN_DATA
#undef TOKEN_DATA_EX

///////////////////////////////////////////////////////////////////////////////
// initialize cpp lexer with token data
template <typename IteratorT, typename PositionT>
inline
lexer<IteratorT, PositionT>::lexer() 
:   base_t(NUM_LEXER_STATES)
{
}

template <typename IteratorT, typename PositionT>
inline void
lexer<IteratorT, PositionT>::init_dfa(wave::language_support language)
{
// the modes are mutually exclusive
    BOOST_SPIRIT_ASSERT(!act_in_c99_mode || !act_in_cpp0x_mode);
    
    if (has_compiled_dfa())
        return;
        
    for (int i = 0; 0 != init_data[i].tokenid; ++i) {
        this->register_regex(init_data[i].tokenregex, init_data[i].tokenid, 
            init_data[i].tokencb, init_data[i].lexerstate);
    }

// if in C99 mode, some of the keywords are not valid    
    if (!wave::need_c99(language)) {
        for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
            this->register_regex(init_data_cpp[j].tokenregex, 
                init_data_cpp[j].tokenid, init_data_cpp[j].tokencb, 
                init_data_cpp[j].lexerstate);
        }
    }
    
#if defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
// C++0x mode has it's own new keywords
    if (wave::need_cpp0x(language)) {
        for (int k = 0; 0 != init_data_cpp0x[k].tokenid; ++j) {
            this->register_regex(init_data_cpp0x[k].tokenregex, 
                init_data_cpp0x[k].tokenid, init_data_cpp0x[k].tokencb, 
                init_data_cpp0x[k].lexerstate);
        }
    }
#endif // defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
}

///////////////////////////////////////////////////////////////////////////////
// get time of last compilation of this file
template <typename IteratorT, typename PositionT>
wave::util::time_conversion_helper 
    lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);

///////////////////////////////////////////////////////////////////////////////
//  
template <typename IteratorT, typename PositionT>
inline void 
init_lexer (lexer<IteratorT, PositionT> &lexer, 
    wave::language_support language, bool force_reinit = false)
{
    if (lexer.has_compiled_dfa())
        return;     // nothing to do
        
    using std::ifstream;
    using std::ofstream;
    using std::ios;
    using std::cerr;
    using std::endl;
    
ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);

    if (force_reinit || !dfa_in.is_open() ||
        !lexer.load (dfa_in, lexer.get_compilation_time()))
    {
#if defined(BOOST_SPIRIT_DEBUG)
        cerr << "Compiling regular expressions for slex ...";
#endif // defined(BOOST_SPIRIT_DEBUG)

        dfa_in.close();
        
        lexer.init_dfa(language);
        lexer.create_dfa();

    ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);

        if (dfa_out.is_open())
            lexer.save (dfa_out, lexer.get_compilation_time());

#if defined(BOOST_SPIRIT_DEBUG)
        cerr << " Done." << endl;
#endif // defined(BOOST_SPIRIT_DEBUG)
    }
}

///////////////////////////////////////////////////////////////////////////////
//  
//  lex_functor
//
///////////////////////////////////////////////////////////////////////////////

template <typename IteratorT, typename PositionT = wave::util::file_position_t>
class lex_functor 
:   public lex_input_interface<typename lexer<IteratorT, PositionT>::token_t>
{
public:

    typedef wave::util::position_iterator<IteratorT, PositionT>     iterator_t;
    typedef typename std::iterator_traits<IteratorT>::value_type    char_t;
    typedef WAVE_STRINGTYPE                                         string_t;
    typedef typename lexer<IteratorT, PositionT>::token_t           token_t;

    lex_functor(IteratorT const &first_, IteratorT const &last_, 
            PositionT const &pos_, wave::language_support language)
    :   first(first_, last_, pos_), at_eof(false)
    {
        // initialize lexer dfa tables
        init_lexer(lexer, language);  
    }
    virtual ~lex_functor() {}

// get the next token from the input stream
    token_t get()
    {
        token_t token;

        if (at_eof)
            return token;
            
        do {
        // generate and return the next token
        std::string value;
        PositionT const &pos = first.get_position();
        token_id id = token_id(lexer.next_token(first, last, &value));

            if ((token_id)(-1) == id)
                break;              // end of input reached

        string_t token_val(value.c_str());
        
            if (T_CONTLINE != id) {
                if (T_IDENTIFIER == id) {
                // test identifier characters for validity (throws if invalid 
                // chars found)
                    impl::validate_identifier_name(token_val, 
                        pos.get_line(), pos.get_column(), pos.get_file()); 
                }
                else if (T_STRINGLIT == id || T_CHARLIT == id) {
                // test literal characters for validity (throws if invalid 
                // chars found)
                    impl::validate_literal(token_val, 
                        pos.get_line(), pos.get_column(), pos.get_file()); 
                }
                else if (T_EOF == id) {
                // T_EOF is returned as a valid token, the next call will 
                // return T_EOI, i.e. the actual end of input
                    at_eof = true;
                }
                return token_t(id, token_val, pos);
            }
        
        // skip the T_CONTLINE token
        } while (true);
        return token;       // return T_EOF
    }
    void set_position(PositionT const &pos) 
    { first.set_position(pos); }
    
private:
    iterator_t first;
    iterator_t last;
    static lexer<IteratorT, PositionT> lexer;   // needed only once
    
    bool at_eof;
};

template <typename IteratorT, typename PositionT>
lexer<IteratorT, PositionT> lex_functor<IteratorT, PositionT>::lexer;

///////////////////////////////////////////////////////////////////////////////
//  
//  The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
//  should be defined inline, if the lex_functor shouldn't be instantiated 
//  separately from the lex_iterator.
//
//  Separate (explicit) instantiation helps to reduce compilation time.
//
///////////////////////////////////////////////////////////////////////////////

#if defined(WAVE_SEPARATE_LEXER_INSTANTIATION)
#define WAVE_SLEX_NEW_LEXER_INLINE
#else
#define WAVE_SLEX_NEW_LEXER_INLINE inline
#endif 

}   // namespace slex

///////////////////////////////////////////////////////////////////////////////
//
//  The 'new_lexer' function allows the opaque generation of a new lexer object.
//  It is coupled to the iterator type to allow to decouple the lexer/iterator 
//  configurations at compile time.
//
//  This function is declared inside the cpp_slex_token.hpp file, which is 
//  referenced by the source file calling the lexer and the sourcefile, which
//  instantiates the lex_functor. But is is defined here, so it will be 
//  instantiated only while compiling the sourcefile, which instantiates the 
//  lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
//  this file (cpp_slex_lexer.hpp) should be included only once. This allows
//  to decouple the lexer interface from the lexer implementation and reduces 
//  compilation time.
//
///////////////////////////////////////////////////////////////////////////////

template <typename IteratorT, typename PositionT>
WAVE_SLEX_NEW_LEXER_INLINE
lex_input_interface<wave::cpplexer::lex_token<PositionT> > *
new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
    IteratorT const &last, PositionT const &pos, 
    wave::language_support language)
{
    return new slex::lex_functor<IteratorT, PositionT>(first, last, pos, 
        language);
}

#undef WAVE_SLEX_NEW_LEXER_INLINE

///////////////////////////////////////////////////////////////////////////////
}   // namespace cpplexer
}   // namespace wave
     
#endif // !defined(CPP_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Written By

Hartmut Kaiser

United States

Actively involved in Boost and the development of the Spirit parser construction framework.

Wave: a Standard conformant C++ preprocessor library

License

Comments and Discussions