Click here to Skip to main content
15,885,155 members
Articles / Programming Languages / C++

Wave: a Standard conformant C++ preprocessor library

Rate me:
Please Sign up or sign in to vote.
4.96/5 (58 votes)
10 Jan 200413 min read 395.3K   4.4K   81  
Describes a free and fully Standard conformant C++ preprocessor library
/*=============================================================================
    Wave: A Standard compliant C++ preprocessor

    Copyright (c) 2001-2003 Hartmut Kaiser
    http://spirit.sourceforge.net/

    Permission to copy, use, modify, sell and distribute this software
    is granted provided this copyright notice appears in all copies.
    This software is provided "as is" without express or implied
    warranty, and with no claim as to its suitability for any purpose.

    See Copyright.txt for full copyright notices and acknowledegments.
=============================================================================*/

#include "cpp.hpp"                  // global configuration

#include <iterator>

///////////////////////////////////////////////////////////////////////////////
//  This sample requires the program_options library written by Vladimir Prus,
//  which is currently under Boost review. 
//  It is available here: http://boost-sandbox.sourceforge.net/program_options.
//
#include <boost/program_options.hpp>
#include <boost/filesystem/path.hpp>

#include "wave/cpplexer/cpplexer_exceptions.hpp"
#include "wave/cpplexer/cpp_token_ids.hpp"
#include "wave/cpplexer/cpp_lex_iterator.hpp"
#include "wave/cpplexer/cpp_lex_token.hpp"

#include "wave/cpp_exceptions.hpp"
#include "wave/cpp_context.hpp"

#include "trace_macro_expansion.hpp"

///////////////////////////////////////////////////////////////////////////////
//  include lexer specifics, import lexer names
//
#if defined(WAVE_USE_SLEX_CPP_LEXER)
// use the slex based C++ lexer

#if !defined(WAVE_SEPARATE_LEXER_INSTANTIATION)
#include "wave/cpplexer/slex/cpp_slex_lexer.hpp"
#endif // !defined(WAVE_SEPARATE_LEXER_INSTANTIATION)

#elif defined(WAVE_USE_RE2C_CPP_LEXER)
// use the re2c based C++ lexer

#if !defined(WAVE_SEPARATE_LEXER_INSTANTIATION)
#include "wave/cpplexer/re2clex/cpp_re2c_lexer.hpp"
#endif // !defined(WAVE_SEPARATE_LEXER_INSTANTIATION)

#endif

///////////////////////////////////////////////////////////////////////////////
//  include the grammar definitions, if these shouldn't be compiled separately
//  (ATTENTION: _very_ large compilation times!)
//
#if !defined(WAVE_SEPARATE_GRAMMAR_INSTANTIATION)
#include "wave/grammars/cpp_intlit_grammar.hpp"
#include "wave/grammars/cpp_chlit_grammar.hpp"
#include "wave/grammars/cpp_floatlit_grammar.hpp"
#include "wave/grammars/cpp_grammar.hpp"
#include "wave/grammars/cpp_expression_grammar.hpp"
#include "wave/grammars/cpp_predef_macros_grammar.hpp"
#include "wave/grammars/cpp_defined_grammar.hpp"
#endif // defined(WAVE_SEPARATE_GRAMMAR_INSTANTIATION)

///////////////////////////////////////////////////////////////////////////////
//  import required names
using namespace boost::spirit;

using std::string;
using std::pair;
using std::vector;
using std::getline;
using std::ifstream;
using std::cout;
using std::cerr;
using std::endl;
using std::ostream;
using std::istreambuf_iterator;

namespace po = boost::program_options;
namespace fs = boost::filesystem;

///////////////////////////////////////////////////////////////////////////////
// print the current version
int print_version()
{
    typedef wave::context<
            std::string::iterator, wave::cpplexer::lex_token<>,
            wave::iteration_context_policies::load_file_to_string,
            trace_macro_expansion> 
        context_t;
    string version (context_t::get_version_string());
    cout 
        << version.substr(1, version.size()-2)  // strip quotes
        << " (" << CPP_VERSION_DATE << ")"      // add date
        << endl;
    return 0;                       // exit app
}

///////////////////////////////////////////////////////////////////////////////
// print the copyright statement
int print_copyright()
{
char const *copyright[] = {
        "Wave: A Standard conformant C++ preprocessor",
        "Copyright (c) 2001-2003 Hartmut Kaiser",
        "It is hosted by: http://spirit.sourceforge.net/",
        "Permission to copy, use, modify, sell and distribute this software",
        "is granted provided this copyright notice appears in all copies.",
        "This software is provided \"as is\" without express or implied",
        "warranty, and with no claim as to its suitability for any purpose.",
        0
    };
    
    for (int i = 0; 0 != copyright[i]; ++i)
        cout << copyright[i] << endl;
        
    return 0;                       // exit app
}

///////////////////////////////////////////////////////////////////////////////
namespace cmd_line_util {

    // Additional command line parser which interprets '@something' as an 
    // option "config-file" with the value "something".
    pair<string, string> at_option_parser(string const&s)
    {
        if ('@' == s[0]) 
            return std::make_pair(string("config-file"), s.substr(1));
        else
            return pair<string, string>();
    }

    // class, which keeps include file information from the command line
    class include_paths {
    public:
        include_paths() : seen_separator(false) {}

        vector<string> paths;       // stores user paths
        vector<string> syspaths;    // stores system paths
        bool seen_separator;        // command line contains a '-I-' option

        // Function which validates additional tokens from command line.
        static void 
        validate(boost::any &v, vector<string> const &tokens)
        {
            if (v.empty())
                v = boost::any(include_paths());

            include_paths *p = boost::any_cast<include_paths>(&v);

            BOOST_SPIRIT_ASSERT(p);
            // Assume only one path per '-I' occurence.
            string t = tokens[0];
            if (t == "-") {
            // found -I- option, so switch behaviour
                p->seen_separator = true;
            } 
            else if (p->seen_separator) {
            // store this path as a system path
                p->syspaths.push_back(t); 
            } 
            else {
            // store this path as an user path
                p->paths.push_back(t);
            }            
        }
    };

    // Workaround for a problem in the program_options library: all options 
    // stored in a variables_map, which have an assigned validator function
    // need an extraction operator.
    std::istream& operator>>(std::istream& is, include_paths& p)
    {
        return is;
    }

    // Read all options from a given config file, parse and add them to the
    // given variables_map
    void read_config_file_options(string const &filename, 
        po::options_description const &desc, po::variables_map &vm,
        bool may_fail = false)
    {
    ifstream ifs(filename.c_str());

        if (!ifs.is_open()) {
            if (!may_fail) {
                cerr << filename 
                    << ": command line warning: config file not found"
                    << endl;
            }
            return;
        }
        
    vector<string> options;
    string line;

        while (std::getline(ifs, line)) {
        // skip empty lines
            string::size_type pos = line.find_first_not_of(" \t");
            if (pos == string::npos) 
                continue;

        // skip comment lines
            if ('#' != line[pos])
                options.push_back(line);
        }

        if (options.size() > 0) {
        po::options_and_arguments oa = po::parse_command_line(options, desc);

            po::store(oa, vm, desc);
        }
    }

///////////////////////////////////////////////////////////////////////////////
}

///////////////////////////////////////////////////////////////////////////////
//  do the actual preprocessing
int do_actual_work (po::options_and_arguments const opts, 
    po::variables_map const &vm)
{
// current file position is saved for exception handling
wave::util::file_position_t current_position;

    try {
    // process the given file
    string file_name(opts.arguments().front());
    ifstream instream(file_name.c_str());
    string instring;

        if (!instream.is_open()) {
            cerr << "wave: could not open input file: " << file_name << endl;
            return -1;
        }
        instream.unsetf(std::ios::skipws);
        
#if defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
        // this is known to be very slow for large files on some systems
        copy (istream_iterator<char>(instream),
              istream_iterator<char>(), 
              inserter(instring, instring.end()));
#else
        instring = string(istreambuf_iterator<char>(instream.rdbuf()),
                          istreambuf_iterator<char>());
#endif // defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)

    //  The template wave::cpplexer::lex_token<> is the token type to be used 
    //  by the Wave library.
        typedef wave::context<
                    std::string::iterator, wave::cpplexer::lex_token<>,
                    wave::iteration_context_policies::load_file_to_string,
                    trace_macro_expansion> 
                context_t;

    // The C++ preprocessor iterators shouldn't be constructed directly. They 
    // are to be generated through a wave::context<> object. This wave::context 
    // object is additionally to be used to initialize and define different 
    // parameters of the actual preprocessing.
    // The preprocessing of the input stream is done on the fly behind the 
    // scenes during iteration over the context_t::iterator_t stream.
    std::ofstream traceout;
    bool enable_trace = false;
    
        if (vm.count("traceto")) {
        // try to open the file, where to put the trace output
        string trace_file (vm["traceto"].as<string>());
        
            if (trace_file != "-") {
                traceout.open(trace_file.c_str());
                if (!traceout.is_open()) {
                    cerr << "wave: could not open trace file: " << trace_file 
                        << endl;
                    return -1;
                }
            }
            enable_trace = true;
        }
        if (enable_trace && !traceout.is_open()) {
        // trace to std::cerr
            traceout.copyfmt(cerr);
            traceout.clear(cerr.rdstate());
            static_cast<std::basic_ios<char> &>(traceout).rdbuf(cerr.rdbuf());
        }
        
    context_t ctx (instring.begin(), instring.end(), file_name.c_str(),
        trace_macro_expansion(traceout, enable_trace));

#if defined(WAVE_SUPPORT_VARIADICS_PLACEMARKERS)
    // enable C99 mode, if appropriate (implies variadics)
        if (vm.count("c99")) {
#if defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
            if (vm.count("c++0x")) {
                cerr << 
                    "wave: the C99 and C++0x modes are mutually exclusive, "
                    "working in C99 mode." << endl;
            }
#endif // defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
            ctx.set_language(wave::support_c99);
        }
#if defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
    // enable experimental C++0x mode (implies variadics)
        else if (vm.count("c++0x")) {
            ctx.set_language(wave::support_cpp0x);
        }
#endif // defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
        else if (vm.count("variadics")) {
        // enable variadics and placemarkers, if appropriate
            ctx.set_language(wave::enable_variadics(ctx.get_language()));
        }
#endif // defined(WAVE_SUPPORT_VARIADICS_PLACEMARKERS)
        
    // add include directories to the system include search paths
        if (vm.count("sysinclude")) {
        vector<string> syspaths = vm["sysinclude"].as<vector<string> >();
        
            vector<string>::const_iterator end = syspaths.end();
            for (vector<string>::const_iterator cit = syspaths.begin(); 
                 cit != end; ++cit)
            {
                ctx.add_sysinclude_path((*cit).c_str());
            }
        }
        
    // add include directories to the include search paths
        if (vm.count("include")) {
            cmd_line_util::include_paths const &ip = 
                vm["include"].as<cmd_line_util::include_paths>();
            vector<string>::const_iterator end = ip.paths.end();

            for (vector<string>::const_iterator cit = ip.paths.begin(); 
                 cit != end; ++cit)
            {
                ctx.add_include_path((*cit).c_str());
            }

        // if on the command line was given -I- , this has to be propagated
            if (ip.seen_separator) 
                ctx.set_sysinclude_delimiter();
                 
        // add system include directories to the include path
            vector<string>::const_iterator sysend = ip.syspaths.end();
            for (vector<string>::const_iterator syscit = ip.syspaths.begin(); 
                 syscit != sysend; ++syscit)
            {
                ctx.add_sysinclude_path((*syscit).c_str());
            }
        }
    
    // add additional defined macros 
        if (vm.count("define")) {
            vector<string> macros = vm["define"].as<vector<string> >();
            vector<string>::const_iterator end = macros.end();
            for (vector<string>::const_iterator cit = macros.begin(); 
                 cit != end; ++cit)
            {
                ctx.add_macro_definition(*cit);
            }
        }

    // add additional predefined macros 
        if (vm.count("predefine")) {
            vector<string> predefmacros = vm["predefine"].as<vector<string> >();
            vector<string>::const_iterator end = predefmacros.end();
            for (vector<string>::const_iterator cit = predefmacros.begin(); 
                 cit != end; ++cit)
            {
                ctx.add_macro_definition(*cit, true);
            }
        }

    // undefine specified macros
        if (vm.count("undefine")) {
            vector<string> undefmacros = vm["undefine"].as<vector<string> >();
            vector<string>::const_iterator end = undefmacros.end();
            for (vector<string>::const_iterator cit = undefmacros.begin(); 
                 cit != end; ++cit)
            {
                ctx.remove_macro_definition((*cit).c_str(), true);
            }
        }

    // maximal include nesting depth
        if (vm.count("nesting")) {
            int max_depth = vm["nesting"].as<int>();
            if (max_depth < 1 || max_depth > 100000) {
                cerr << "wave: bogus maximal include nesting depth: " 
                    << max_depth << endl;
                return -1;
            }
            ctx.set_max_include_nesting_depth(max_depth);
        }
        
    // open the output file
    std::ofstream output;
    
        if (vm.count("output")) {
        // try to open the file, where to put the preprocessed output
        string out_file (vm["output"].as<string>());
        
            output.open(out_file.c_str());
            if (!output.is_open()) {
                cerr << "wave: could not open output file: " << out_file << endl;
                return -1;
            }
        }
        else {
        // output the preprocessed result to std::cout
            output.copyfmt(cout);
            output.clear(cout.rdstate());
            static_cast<std::basic_ios<char> &>(output).rdbuf(cout.rdbuf());
        }

    // analyze the input file
    context_t::iterator_t first = ctx.begin();
    context_t::iterator_t last = ctx.end();
    
    // preprocess the required include files 
        if (vm.count("forceinclude")) {
        // add the filenames to force as include files in _reverse_ order
        // the second parameter 'is_last' for the force_include function should
        // be set to true for the last (first given) file.
            vector<string> force = vm["forceinclude"].as<vector<string> >();
            vector<string>::const_reverse_iterator rend = force.rend();
            for (vector<string>::const_reverse_iterator cit = force.rbegin(); 
                 cit != rend; /**/)
            {
                string filename(*cit);
                first.force_include(filename.c_str(), ++cit == rend);
            }
        }
        
    // loop over all generated tokens outputting the generated text 
        while (first != last) {
        // print out the string representation of this token (skip comments)
            using namespace wave::cpplexer;

        // store the last known good token position
            current_position = (*first).get_position();

        token_id id = token_id(*first);

            if (T_CPPCOMMENT == id || T_NEWLINE == id) {
            // C++ comment tokens contain the trailing newline
                output << endl;
            }
            else if (id != T_CCOMMENT) {
            // print out the current token value
                output << (*first).get_value();
            }
            ++first;        // advance to the next token
        }
    }
    catch (wave::cpp_exception &e) {
    // some preprocessing error
        cerr 
            << e.file_name() << "(" << e.line_no() << "): "
            << e.description() << endl;
        return 1;
    }
    catch (wave::cpplexer::lexing_exception &e) {
    // some lexing error
        cerr 
            << e.file_name() << "(" << e.line_no() << "): "
            << e.description() << endl;
        return 2;
    }
    catch (std::exception &e) {
    // use last recognized token to retrieve the error position
        cerr 
            << current_position.get_file() 
            << "(" << current_position.get_line() << "): "
            << "exception caught: " << e.what()
            << endl;
        return 3;
    }
    catch (...) {
    // use last recognized token to retrieve the error position
        cerr 
            << current_position.get_file() 
            << "(" << current_position.get_line() << "): "
            << "unexpected exception caught." << endl;
        return 4;
    }
    return 0;
}

///////////////////////////////////////////////////////////////////////////////
//  main entry point
int
main (int argc, char const *argv[])
{
    try {
    // analyze the command line options and arguments
    vector<string> cfg_files;
    
    // declare the options allowed from the command line only
    po::options_description desc_cmdline ("Options allowed on the command line only");
        
        desc_cmdline.add_options()
            ("help,h", "", "print out program usage (this message)")
            ("version,v", "", "print the version number")
            ("copyright,c", "", "print out the copyright statement")
            ("config-file", po::parameter("filepath", &cfg_files),
                "specify a config file (alternatively: @filepath)")
        ;

    // declare the options allowed on command line and in config files
    po::options_description desc_generic ("Options allowed additionally in a config file");

        desc_generic.add_options()
            ("output,o", "path", "specify a file to use for output instead of stdout")
            ("include,I", "path", "specify an additional include directory").
                validator(&cmd_line_util::include_paths::validate)
            ("sysinclude,S", po::parameter<vector<string> >("syspath"), 
                "specify an additional system include directory")
            ("forceinclude,F", po::parameter<vector<string> >("file"),
                "force inclusion of the given file")
            ("define,D", po::parameter<vector<string> >("macro[=[value]]"), 
                "specify a macro to define")
            ("predefine,P", po::parameter<vector<string> >("macro[=[value]]"), 
                "specify a macro to predefine")
            ("undefine,U", po::parameter<vector<string> >("macro"), 
                "specify a macro to undefine")
            ("nesting,n", po::parameter<int>("depth"), 
                "specify a new maximal include nesting depth")
        ;
        
    po::options_description desc_ext ("Extended options (allowed everywhere)");

        desc_ext.add_options()
            ("traceto,t", "path", "output trace info to a file [path] or to stderr [-]")
#if defined(WAVE_SUPPORT_VARIADICS_PLACEMARKERS)
            ("variadics", "", "enable certain C99 extensions in C++ mode")
            ("c99", "", "enable C99 mode (implies --variadics)")
#endif // defined(WAVE_SUPPORT_VARIADICS_PLACEMARKERS)
#if defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
            ("c++0x", "", "enable C++0x support (implies --variadics)")
#endif // defined(WAVE_ENABLE_CPP0X_EXTENSIONS)
        ;
    
    // combine the options for the different usage schemes
    po::options_description desc_overall_cmdline;

        desc_overall_cmdline.add(desc_cmdline);    
        desc_overall_cmdline.add(desc_generic);
        desc_overall_cmdline.add(desc_ext);

    po::options_description desc_overall_cfgfile;

        desc_overall_cfgfile.add(desc_generic);
        desc_overall_cfgfile.add(desc_ext);
        
    // parse command line and store results
    po::options_and_arguments opts = po::parse_command_line(argc, argv, 
        desc_overall_cmdline, 0, cmd_line_util::at_option_parser);
    po::variables_map vm;
    
        po::store(opts, vm, desc_overall_cmdline);

    // Try to find a wave.cfg in the same directory as the executable was 
    // startet from. If this exists, treat it as a wave config file
    fs::path filename(argv[0], fs::native);

        filename = filename.branch_path() / "wave.cfg";
        cmd_line_util::read_config_file_options(filename.string(), 
            desc_overall_cfgfile, vm, true);

    // if there is specified at least one config file, parse it and add the 
    // options to the main variables_map
        if (vm.count("config-file")) {
            vector<string>::const_iterator end = cfg_files.end();
            for (vector<string>::const_iterator cit = cfg_files.begin(); 
                 cit != end; ++cit)
            {
            // parse a single config file and store the results
                cmd_line_util::read_config_file_options(*cit, 
                    desc_overall_cfgfile, vm);
            }
        }

    // ... act as required 
        if (vm.count("help")) {
        po::options_description desc_help (
            "Usage: wave [options] [@config-file(s)] file");

            desc_help.add(desc_cmdline);    
            desc_help.add(desc_generic);
            desc_help.add(desc_ext);
            cout << desc_help << endl;
            return 1;
        }
        
        if (vm.count("version")) {
            return print_version();
        }

        if (vm.count("copyright")) {
            return print_copyright();
        }
        
    // if there is no input file given, then exit
        if (0 == opts.arguments().size()) {
            cerr << "wave: no input file given, "
                 << "use --help to get a hint." << endl;
            return 5;
        }

    // preprocess the given input file
        return do_actual_work(opts, vm);
    }
    catch (std::exception &e) {
        cout << "wave: exception caught: " << e.what() << endl;
        return 6;
    }
    catch (...) {
        cerr << "wave: unexpected exception caught." << endl;
        return 7;
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
United States United States
Actively involved in Boost and the development of the Spirit parser construction framework.

Comments and Discussions