Click here to Skip to main content
15,885,914 members
Articles / Desktop Programming / Win32

Xport: XHTML Parsing and Objective Reporting Toolkit

Rate me:
Please Sign up or sign in to vote.
4.73/5 (10 votes)
4 May 2008GPL313 min read 60.1K   682   32  
Open source C++ class template library for generating and parsing xhtml documents.
/************************************************************************
Xport: XHTML Parsing & Objective Reporting Toolkit
Copyright (C) 2007  Mitchel Haas

This file is part of Xport.

Xport is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Xport is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Xport.  If not, see <http://www.gnu.org/licenses/>.

For complete documentation on this library and alternative
licensing options, visit http://www.xportpro.com
Email questions, comments or suggestions to mitchel.haas@xportpro.com
************************************************************************/
#pragma once
#include "common.h"
#include "xhtml_markup.h"
#include "xhtml_doctype_declaration.h"
#include "xhtml_entity.h"
#include <iostream>
#include <fstream>
#include <string>
#include <set>

namespace Xport
{
  template<typename T, typename U> class xhtml_parser;
  template<typename T, typename U> class xhtml_element;
  template<typename T, typename U> class xhtml_markup;
  template<typename T, typename U> class xhtml_pcdata;
  template<typename T, typename U> class xhtml_processing_instruction;
  template<typename T, typename U> class xhtml_comment;

  enum logging_verbosity { lv_none, lv_error, lv_warning, lv_start_tag = 4, lv_end_tag = 8, lv_all = 15};

  enum parser_integer_option {};
  enum parser_boolean_option {convert_entities, preserve_newlines};
  enum parser_string_option {};
  enum parser_log_option {log_verbosity};
  enum parser_log_stream_option {log_stream};
}


template<typename DT, typename CT>
class Xport::xhtml_parser
{
public:
  // construction/destruction
  explicit xhtml_parser(const std::string& filename);
  explicit xhtml_parser(std::basic_istream<CT>& in);
  virtual ~xhtml_parser();
private:
  xhtml_parser(const xhtml_parser& src) {}  // no copying allowed
  xhtml_parser& operator =(const xhtml_parser& src) { return *this; } // no assignment allowed

public:
  // public interface
  bool operator !() const { return input->fail(); }
  std::basic_string<CT> doctype();
  void add_entity(CT chr, short num, const std::basic_string<CT>& named_ref, xhtml_entity_appearance appearance);
  bool entity_appearance(short entity_number, xhtml_entity_appearance appearance);
  xhtml_entity_appearance entity_appearance(short entity_number);
  // get option
  int option(parser_integer_option opt);
  bool option(parser_boolean_option opt);
  std::basic_string<CT> option(parser_string_option opt);
  logging_verbosity option(parser_log_option opt);
  std::basic_ostream<CT>& option(parser_log_stream_option opt);
  // set option
  bool option(parser_integer_option opt, int value);
  bool option(parser_boolean_option opt, bool value);
  bool option(parser_string_option opt, const std::basic_string<CT>& value);
  bool option(parser_log_option opt, logging_verbosity value);
  bool option(parser_log_stream_option opt, std::basic_ostream<CT>& strm);

private:
  // private interface
  void initialize_entities() const;
  virtual xhtml_markup<DT, CT>* parse(xhtml_markup<DT, CT>& elem) const;
  void process_bom() const;
  void reset() const;

  // implementation
  bool all_whitespace(const std::basic_string<CT>& pcdat) const;
  void CheckForTrailingPcdataSpace(xhtml_markup<DT, CT>& elem) const;
  typename std::basic_string<CT>::const_iterator convert_entity(std::basic_string<CT>& pcdat, typename std::basic_string<CT>::size_type idx, typename std::basic_string<CT>::size_type len, const xhtml_entity<CT>& ent) const;
  bool empty_tag(const xhtml_markup<DT, CT>& mkup) const { return mkup.empty_tag(); }
  std::basic_string<CT> end_tag(const xhtml_markup<DT, CT>& elem) const { return elem.end_tag(); }
  void log_msg(const std::basic_string<CT>& msg, const logging_verbosity lb) const;
  xhtml_nesting_type nesting_type(const xhtml_markup<DT, CT>* mkup) const { return mkup->nesting_type(); }
  virtual void process_entities(std::basic_string<CT>& pcdat) const;
  virtual xhtml_markup<DT, CT>* read_comment(const xhtml_markup<DT, CT>& parent) const;
  std::basic_string<CT> read_doctype_declaration(const std::basic_string<CT>& partial_tag) const;
  virtual xhtml_markup<DT, CT>* read_element_end_tag() const;
  virtual xhtml_markup<DT, CT>* read_element_start_tag(const xhtml_markup<DT, CT>& parent) const;
  virtual xhtml_markup<DT, CT>* read_next_markup_object(const xhtml_markup<DT, CT>& parent) const;
  virtual xhtml_markup<DT, CT>* read_pcdata(const xhtml_markup<DT, CT>& parent, const std::basic_string<CT>& initial_str) const;
  virtual xhtml_markup<DT, CT>* read_pre_markup() const;
  virtual xhtml_markup<DT, CT>* read_processing_instruction() const;
  virtual Xport::xhtml_markup<DT, CT>* read_tag(const xhtml_markup<DT, CT>& parent) const;
  bool remove_leading_whitespace() const;
  void remove_trailing_whitespace(std::basic_string<CT>& pcdat) const;
  bool validate_nesting(const xhtml_markup<DT, CT>* pParent, const xhtml_markup<DT, CT>* pChild) const { return pParent->validate_nesting(*pChild); }
  virtual bool validate_parsed_end_tag(const xhtml_markup<DT, CT>& cur_elem, const xhtml_markup<DT, CT>* pParsed_tag) const;

  // data
  bool conv_entities;
  logging_verbosity logging_verb;
  bool preserve_new_lines;
  mutable bool created_input;
  mutable long line_no;
  mutable xhtml_tag_enum last_tag_type;
  mutable std::basic_istream<CT>* input;
  mutable std::basic_ostream<CT>* log_strm;
  mutable std::set<xhtml_entity<CT>, entity_ch_comp<CT> > character_entities;
  mutable std::set<xhtml_entity<CT>, entity_num_comp<CT> > numeric_entities;
  mutable std::set<xhtml_entity<CT>, entity_name_comp<CT> > named_entities;
  mutable std::set<xhtml_entity<CT>, entity_num_comp<CT> > entities;

  // friends
  #if defined(_MSC_VER) && _MSC_VER < 1300
    friend class xhtml_doc<DT, CT>;
    friend class xhtml_element<DT, CT>;
  #else
    template<typename T, typename U> friend class xhtml_doc;
    template<typename T, typename U> friend class xhtml_element;
  #endif
};



#include "xhtml_parser.inl"

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)



Comments and Discussions