/************************************************************************
Xport: XHTML Parsing & Objective Reporting Toolkit
Copyright (C) 2007 Mitchel Haas
This file is part of Xport.
Xport is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Xport is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Xport. If not, see <http://www.gnu.org/licenses/>.
For complete documentation on this library and alternative
licensing options, visit http://www.xportpro.com
Email questions, comments or suggestions to mitchel.haas@xportpro.com
************************************************************************/
#pragma once
#include "common.h"
#include "xhtml_markup.h"
#include "xhtml_doctype_declaration.h"
#include "xhtml_entity.h"
#include <iostream>
#include <fstream>
#include <string>
#include <set>
namespace Xport
{
template<typename T, typename U, typename V, typename W> class xhtml_parser;
template<typename T, typename U, typename V, typename W> class xhtml_element;
template<typename T, typename U> class xhtml_markup;
template<typename T, typename U, typename V, typename W> class xhtml_pcdata;
template<typename T, typename U, typename V, typename W> class xhtml_processing_instruction;
template<typename T, typename U, typename V, typename W> class xhtml_comment;
enum logging_verbosity { lv_none, lv_error, lv_warning, lv_start_tag = 4, lv_end_tag = 8, lv_all = 15};
enum parser_integer_option {};
enum parser_boolean_option {convert_entities, preserve_newlines};
enum parser_string_option {};
enum parser_log_option {log_verbosity};
enum parser_log_stream_option {log_stream};
}
template<typename DT, typename CT, typename MBT, typename SBT>
class Xport::xhtml_parser
{
public:
// construction/destruction
explicit xhtml_parser(const std::string& filename);
explicit xhtml_parser(std::basic_istream<CT>& in);
virtual ~xhtml_parser();
private:
xhtml_parser(const xhtml_parser& src) {} // no copying allowed
xhtml_parser& operator =(const xhtml_parser& src) { return *this; } // no assignment allowed
public:
// public interface
bool operator !() const { return input->fail(); }
std::basic_string<CT> doctype();
void add_entity(CT chr, short num, const std::basic_string<CT>& named_ref, xhtml_entity_appearance appearance);
bool entity_appearance(short entity_number, xhtml_entity_appearance appearance);
xhtml_entity_appearance entity_appearance(short entity_number);
// get option
int option(parser_integer_option opt);
bool option(parser_boolean_option opt);
std::basic_string<CT> option(parser_string_option opt);
logging_verbosity option(parser_log_option opt);
std::basic_ostream<CT>& option(parser_log_stream_option opt);
// set option
bool option(parser_integer_option opt, int value);
bool option(parser_boolean_option opt, bool value);
bool option(parser_string_option opt, const std::basic_string<CT>& value);
bool option(parser_log_option opt, logging_verbosity value);
bool option(parser_log_stream_option opt, std::basic_ostream<CT>& strm);
private:
// private interface
void initialize_entities() const;
virtual MBT* parse(MBT& elem) const;
void process_bom() const;
void reset() const;
// implementation
bool all_whitespace(const std::basic_string<CT>& pcdat) const;
void CheckForTrailingPcdataSpace(MBT& elem) const;
typename std::basic_string<CT>::const_iterator convert_entity(std::basic_string<CT>& pcdat, typename std::basic_string<CT>::size_type idx, typename std::basic_string<CT>::size_type len, const xhtml_entity<CT>& ent) const;
bool empty_tag(const MBT& mkup) const { return mkup.empty_tag(); }
std::basic_string<CT> end_tag(const MBT& elem) const { return elem.end_tag(); }
void log_msg(const std::basic_string<CT>& msg, const logging_verbosity lb) const;
xhtml_nesting_type nesting_type(const MBT* mkup) const { return mkup->nesting_type(); }
virtual void process_entities(std::basic_string<CT>& pcdat) const;
virtual MBT* read_comment(const MBT& parent) const;
std::basic_string<CT> read_doctype_declaration(const std::basic_string<CT>& partial_tag) const;
virtual MBT* read_element_end_tag() const;
virtual MBT* read_element_start_tag(const MBT& parent) const;
virtual MBT* read_next_markup_object(const MBT& parent) const;
virtual MBT* read_pcdata(const MBT& parent, const std::basic_string<CT>& initial_str) const;
virtual MBT* read_pre_markup() const;
virtual MBT* read_processing_instruction() const;
virtual MBT* read_tag(const MBT& parent) const;
bool remove_leading_whitespace() const;
void remove_trailing_whitespace(std::basic_string<CT>& pcdat) const;
bool validate_nesting(const MBT* pParent, const MBT* pChild) const { return pParent->validate_nesting(*pChild); }
virtual bool validate_parsed_end_tag(const MBT& cur_elem, const MBT* pParsed_tag) const;
// data
bool conv_entities;
logging_verbosity logging_verb;
bool preserve_new_lines;
mutable bool created_input;
mutable long line_no;
mutable xhtml_tag_enum last_tag_type;
mutable std::basic_istream<CT>* input;
mutable std::basic_ostream<CT>* log_strm;
mutable std::set<xhtml_entity<CT>, entity_ch_comp<CT> > character_entities;
mutable std::set<xhtml_entity<CT>, entity_num_comp<CT> > numeric_entities;
mutable std::set<xhtml_entity<CT>, entity_name_comp<CT> > named_entities;
mutable std::set<xhtml_entity<CT>, entity_num_comp<CT> > entities;
// friends
#if defined(_MSC_VER) && _MSC_VER < 1300
friend class xhtml_element<DT, CT, MBT, SBT>;
friend class xhtml_doc<DT, CT, MBT, SBT>;
#else
template<typename T, typename U, typename V, typename W> friend class xhtml_doc;
template<typename T, typename U, typename V, typename W> friend class xhtml_element;
#endif
};
#include "xhtml_parser.inl"