Click here to Skip to main content
15,878,970 members
Articles / Desktop Programming / Win32

Xport: XHTML Parsing and Objective Reporting Toolkit

Rate me:
Please Sign up or sign in to vote.
4.73/5 (10 votes)
4 May 2008GPL313 min read 60K   682   32  
Open source C++ class template library for generating and parsing xhtml documents.
/************************************************************************
Xport: XHTML Parsing & Objective Reporting Toolkit
Copyright (C) 2007  Mitchel Haas

This file is part of Xport.

Xport is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Xport is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Xport.  If not, see <http://www.gnu.org/licenses/>.

For complete documentation on this library and alternative
licensing options, visit http://www.xportpro.com
Email questions, comments or suggestions to mitchel.haas@xportpro.com
************************************************************************/
#pragma once

namespace Xport 
{
  template<typename T> class xhtml_entity;
  enum xhtml_entity_appearance {unaltered, character_entity, numeric_entity, named_entity};
  template<typename T> struct entity_ch_comp;
  template<typename T> struct entity_num_comp;
  template<typename T> struct entity_name_comp;
}

template<typename CT>
class Xport::xhtml_entity
{
public:
  xhtml_entity(CT ch, short num, const std::basic_string<CT>& nm, xhtml_entity_appearance ea) : app(ea), character(ch), name(nm), number(num) {}
  // conversion constructors for convenience in set.find() operation
  explicit xhtml_entity(CT ch);
  explicit xhtml_entity(int num) : number(num) {}
  explicit xhtml_entity(const std::basic_string<CT>& nm) : name(nm) {}

  void appearance(xhtml_entity_appearance a) { app = a; }
  xhtml_entity_appearance appearance() const { return app; }
  CT entity_character() const { return character; }
  std::basic_string<CT> entity_name() const { return name; }
  short entity_number() const { return number; }
  static void populate_entities(std::set<xhtml_entity<CT>, entity_num_comp<CT> >&);

private:
  xhtml_entity_appearance app;
  CT character;
  std::basic_string<CT> name;
  short number;

  // friends
  friend struct entity_ch_comp<CT>;
  friend struct entity_num_comp<CT>;
  friend struct entity_name_comp<CT>;

};


template<>
inline Xport::xhtml_entity<char>::xhtml_entity(const char ch) : app(unaltered), character(ch), name(""), number(0)
{

}

template<>
inline Xport::xhtml_entity<wchar_t>::xhtml_entity(const wchar_t ch) : app(unaltered), character(ch), name(L""), number(0)
{
  character <<= 8;
  character >>= 8;
}


template<typename CT>
struct Xport::entity_ch_comp
{
  bool operator() (const xhtml_entity<CT>& lhs, const xhtml_entity<CT>& rhs) const { return lhs.character < rhs.character; }
};

template<typename CT>
struct Xport::entity_num_comp
{
  bool operator() (const xhtml_entity<CT>& lhs, const xhtml_entity<CT>& rhs) const { return lhs.number < rhs.number; }
};

template<typename CT>
struct Xport::entity_name_comp
{
  bool operator() (const xhtml_entity<CT>& lhs, const xhtml_entity<CT>& rhs) const { return lhs.name < rhs.name; }
};

template<typename CT>
inline void Xport::xhtml_entity<CT>::populate_entities(std::set<xhtml_entity<CT>, entity_num_comp<CT> >& entity_vec) 
{
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('"'),   34,   typed_string<CT>("quot"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\''),  39,   typed_string<CT>("apos"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('&'),   38,   typed_string<CT>("amp"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('<'),   60,   typed_string<CT>("lt"),     named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('>'),   62,   typed_string<CT>("gt"),     named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>(' '),   160,  typed_string<CT>("nbsp"),   unaltered));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA1'),  161,  typed_string<CT>("iexcl"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA4'),  164,  typed_string<CT>("curren"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA2'),  162,  typed_string<CT>("cent"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA3'),  163,  typed_string<CT>("pound"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA5'),  165,  typed_string<CT>("yen"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA6'),  166,  typed_string<CT>("brvbar"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA7'),  167,  typed_string<CT>("sect"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA8'),  168,  typed_string<CT>("uml"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA9'),  169,  typed_string<CT>("copy"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAA'),  170,  typed_string<CT>("ordf"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAB'),  171,  typed_string<CT>("laquo"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAC'),  172,  typed_string<CT>("not"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAE'),  174,  typed_string<CT>("reg"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAF'),  175,  typed_string<CT>("macr"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB0'),  176,  typed_string<CT>("deg"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB1'),  177,  typed_string<CT>("plusmn"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB2'),  178,  typed_string<CT>("sup2"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB3'),  179,  typed_string<CT>("sup3"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB4'),  180,  typed_string<CT>("acute"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB5'),  181,  typed_string<CT>("micro"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB6'),  182,  typed_string<CT>("para"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB7'),  183,  typed_string<CT>("middot"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB8'),  184,  typed_string<CT>("cedil"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB9'),  185,  typed_string<CT>("sup1"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBA'),  186,  typed_string<CT>("ordm"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBB'),  187,  typed_string<CT>("raquo"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBC'),  188,  typed_string<CT>("frac14"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBD'),  189,  typed_string<CT>("frac12"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBE'),  190,  typed_string<CT>("frac34"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBF'),  191,  typed_string<CT>("iquest"),   named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xD7'),  215,  typed_string<CT>("times"),    named_entity));
  entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xF7'),  247,  typed_string<CT>("divide"),   named_entity));
}


By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)



Comments and Discussions