Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Xport: XHTML Parsing and Objective Reporting Toolkit

, 4 May 2008 GPL3
Open source C++ class template library for generating and parsing xhtml documents.
xport_1.6.1.zip
descendant_markup_iterator.inl
stylesheet.inl
stylesheet_formatter.inl
stylesheet_rule.inl
tag.inl
tag_trait.inl
tag_traits.inl
xhtml_comment.inl
xhtml_doc.inl
xhtml_element.inl
xhtml_formatter.inl
xhtml_frameset.inl
xhtml_markup.inl
xhtml_parser.inl
xhtml_processing_instruction.inl
xhtml_strict.inl
xhtml_transitional.inl
xport_1.6.5.zip
descendant_markup_iterator.inl
stylesheet.inl
stylesheet_formatter.inl
stylesheet_rule.inl
tag.inl
tag_trait.inl
tag_traits.inl
xhtml_comment.inl
xhtml_doc.inl
xhtml_element.inl
xhtml_formatter.inl
xhtml_frameset.inl
xhtml_markup.inl
xhtml_parser.inl
xhtml_processing_instruction.inl
xhtml_strict.inl
xhtml_transitional.inl
xport_1.6.7.zip
descendant_markup_iterator.inl
stylesheet.inl
stylesheet_formatter.inl
stylesheet_rule.inl
tag.inl
tag_trait.inl
tag_traits.inl
xhtml_comment.inl
xhtml_doc.inl
xhtml_element.inl
xhtml_formatter.inl
xhtml_frameset.inl
xhtml_markup.inl
xhtml_parser.inl
xhtml_processing_instruction.inl
xhtml_strict.inl
xhtml_transitional.inl
xport_documentation.zip
Xport.chm
xport_htmlhelp.zip
Xport.chm
/************************************************************************
Xport: XHTML Parsing & Objective Reporting Toolkit
Copyright (C) 2007  Mitchel Haas

This file is part of Xport.

Xport is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Xport is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Xport.  If not, see <http://www.gnu.org/licenses/>.

For complete documentation on this library and alternative
licensing options, visit http://www.xportpro.com
Email questions, comments or suggestions to mitchel.haas@xportpro.com
************************************************************************/
#pragma once
#include "common.h"
#include "xhtml_markup.h"
#include "xhtml_doctype_declaration.h"
#include "xhtml_entity.h"
#include <iostream>
#include <fstream>
#include <string>
#include <set>

namespace Xport
{
  template<typename T, typename U> class xhtml_parser;
  template<typename T, typename U> class xhtml_element;
  template<typename T, typename U> class xhtml_markup;
  template<typename T, typename U> class xhtml_pcdata;
  template<typename T, typename U> class xhtml_processing_instruction;
  template<typename T, typename U> class xhtml_comment;

  enum logging_verbosity { lv_none, lv_error, lv_warning, lv_start_tag = 4, lv_end_tag = 8, lv_all = 15};

  enum parser_integer_option {};
  enum parser_boolean_option {convert_entities, preserve_newlines};
  enum parser_string_option {};
  enum parser_log_option {log_verbosity};
  enum parser_log_stream_option {log_stream};
}


template<typename DT, typename CT>
class Xport::xhtml_parser
{
public:
  // construction/destruction
  explicit xhtml_parser(const std::string& filename);
  explicit xhtml_parser(std::basic_istream<CT>& in);
  virtual ~xhtml_parser();
private:
  xhtml_parser(const xhtml_parser& src) {}  // no copying allowed
  xhtml_parser& operator =(const xhtml_parser& src) { return *this; } // no assignment allowed

public:
  // public interface
  bool operator !() const { return input->fail(); }
  std::basic_string<CT> doctype();
  void add_entity(CT chr, short num, const std::basic_string<CT>& named_ref, xhtml_entity_appearance appearance);
  bool entity_appearance(short entity_number, xhtml_entity_appearance appearance);
  xhtml_entity_appearance entity_appearance(short entity_number);
  // get option
  int option(parser_integer_option opt);
  bool option(parser_boolean_option opt);
  std::basic_string<CT> option(parser_string_option opt);
  logging_verbosity option(parser_log_option opt);
  std::basic_ostream<CT>& option(parser_log_stream_option opt);
  // set option
  bool option(parser_integer_option opt, int value);
  bool option(parser_boolean_option opt, bool value);
  bool option(parser_string_option opt, const std::basic_string<CT>& value);
  bool option(parser_log_option opt, logging_verbosity value);
  bool option(parser_log_stream_option opt, std::basic_ostream<CT>& strm);

private:
  // private interface
  void initialize_entities() const;
  virtual xhtml_markup<DT, CT>* parse(xhtml_markup<DT, CT>& elem) const;
  void process_bom() const;
  void reset() const;

  // implementation
  bool all_whitespace(const std::basic_string<CT>& pcdat) const;
  void CheckForTrailingPcdataSpace(xhtml_markup<DT, CT>& elem) const;
  typename std::basic_string<CT>::const_iterator convert_entity(std::basic_string<CT>& pcdat, typename std::basic_string<CT>::size_type idx, typename std::basic_string<CT>::size_type len, const xhtml_entity<CT>& ent) const;
  bool empty_tag(const xhtml_markup<DT, CT>& mkup) const { return mkup.empty_tag(); }
  std::basic_string<CT> end_tag(const xhtml_markup<DT, CT>& elem) const { return elem.end_tag(); }
  void log_msg(const std::basic_string<CT>& msg, const logging_verbosity lb) const;
  xhtml_nesting_type nesting_type(const xhtml_markup<DT, CT>* mkup) const { return mkup->nesting_type(); }
  virtual void process_entities(std::basic_string<CT>& pcdat) const;
  virtual xhtml_markup<DT, CT>* read_comment(const xhtml_markup<DT, CT>& parent) const;
  std::basic_string<CT> read_doctype_declaration(const std::basic_string<CT>& partial_tag) const;
  virtual xhtml_markup<DT, CT>* read_element_end_tag() const;
  virtual xhtml_markup<DT, CT>* read_element_start_tag(const xhtml_markup<DT, CT>& parent) const;
  virtual xhtml_markup<DT, CT>* read_next_markup_object(const xhtml_markup<DT, CT>& parent) const;
  virtual xhtml_markup<DT, CT>* read_pcdata(const xhtml_markup<DT, CT>& parent, const std::basic_string<CT>& initial_str) const;
  virtual xhtml_markup<DT, CT>* read_pre_markup() const;
  virtual xhtml_markup<DT, CT>* read_processing_instruction() const;
  virtual Xport::xhtml_markup<DT, CT>* read_tag(const xhtml_markup<DT, CT>& parent) const;
  bool remove_leading_whitespace() const;
  void remove_trailing_whitespace(std::basic_string<CT>& pcdat) const;
  bool validate_nesting(const xhtml_markup<DT, CT>* pParent, const xhtml_markup<DT, CT>* pChild) const { return pParent->validate_nesting(*pChild); }
  virtual bool validate_parsed_end_tag(const xhtml_markup<DT, CT>& cur_elem, const xhtml_markup<DT, CT>* pParsed_tag) const;

  // data
  bool conv_entities;
  logging_verbosity logging_verb;
  bool preserve_new_lines;
  mutable bool created_input;
  mutable long line_no;
  mutable xhtml_tag_enum last_tag_type;
  mutable std::basic_istream<CT>* input;
  mutable std::basic_ostream<CT>* log_strm;
  mutable std::set<xhtml_entity<CT>, entity_ch_comp<CT> > character_entities;
  mutable std::set<xhtml_entity<CT>, entity_num_comp<CT> > numeric_entities;
  mutable std::set<xhtml_entity<CT>, entity_name_comp<CT> > named_entities;
  mutable std::set<xhtml_entity<CT>, entity_num_comp<CT> > entities;

  // friends
  #if defined(_MSC_VER) && _MSC_VER < 1300
    friend class xhtml_doc<DT, CT>;
    friend class xhtml_element<DT, CT>;
  #else
    template<typename T, typename U> friend class xhtml_doc;
    template<typename T, typename U> friend class xhtml_element;
  #endif
};



#include "xhtml_parser.inl"

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)

Share

About the Author

Mitchel Haas
Software Developer Datasoft Solutions
United States United States
I'm a c++ programmer in the midwest, now using VC7 at work and at home. I enjoy creating generic libraries, and template based programming.
 
I also enjoy web development (xhtml, css, javascript, php).

| Advertise | Privacy | Mobile
Web04 | 2.8.141022.2 | Last Updated 4 May 2008
Article Copyright 2008 by Mitchel Haas
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid