/************************************************************************
Xport: XHTML Parsing & Objective Reporting Toolkit
Copyright (C) 2007 Mitchel Haas
This file is part of Xport.
Xport is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Xport is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Xport. If not, see <http://www.gnu.org/licenses/>.
For complete documentation on this library and alternative
licensing options, visit http://www.xportpro.com
Email questions, comments or suggestions to mitchel.haas@xportpro.com
************************************************************************/
#pragma once
namespace Xport
{
template<typename T> class xhtml_entity;
enum xhtml_entity_appearance {unaltered, character_entity, numeric_entity, named_entity};
template<typename T> struct entity_ch_comp;
template<typename T> struct entity_num_comp;
template<typename T> struct entity_name_comp;
}
template<typename CT>
class Xport::xhtml_entity
{
public:
xhtml_entity(CT ch, short num, const std::basic_string<CT>& nm, xhtml_entity_appearance ea) : app(ea), character(ch), name(nm), number(num) {}
// conversion constructors for convenience in set.find() operation
explicit xhtml_entity(CT ch);
explicit xhtml_entity(int num) : number(num) {}
explicit xhtml_entity(const std::basic_string<CT>& nm) : name(nm) {}
void appearance(xhtml_entity_appearance a) { app = a; }
xhtml_entity_appearance appearance() const { return app; }
CT entity_character() const { return character; }
std::basic_string<CT> entity_name() const { return name; }
short entity_number() const { return number; }
static void populate_entities(std::set<xhtml_entity<CT>, entity_num_comp<CT> >&);
private:
xhtml_entity_appearance app;
CT character;
std::basic_string<CT> name;
short number;
// friends
friend struct entity_ch_comp<CT>;
friend struct entity_num_comp<CT>;
friend struct entity_name_comp<CT>;
};
template<>
inline Xport::xhtml_entity<char>::xhtml_entity(const char ch) : app(unaltered), character(ch), name(""), number(0)
{
}
template<>
inline Xport::xhtml_entity<wchar_t>::xhtml_entity(const wchar_t ch) : app(unaltered), character(ch), name(L""), number(0)
{
character <<= 8;
character >>= 8;
}
template<typename CT>
struct Xport::entity_ch_comp
{
bool operator() (const xhtml_entity<CT>& lhs, const xhtml_entity<CT>& rhs) const { return lhs.character < rhs.character; }
};
template<typename CT>
struct Xport::entity_num_comp
{
bool operator() (const xhtml_entity<CT>& lhs, const xhtml_entity<CT>& rhs) const { return lhs.number < rhs.number; }
};
template<typename CT>
struct Xport::entity_name_comp
{
bool operator() (const xhtml_entity<CT>& lhs, const xhtml_entity<CT>& rhs) const { return lhs.name < rhs.name; }
};
template<typename CT>
inline void Xport::xhtml_entity<CT>::populate_entities(std::set<xhtml_entity<CT>, entity_num_comp<CT> >& entity_vec)
{
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('"'), 34, typed_string<CT>("quot"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\''), 39, typed_string<CT>("apos"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('&'), 38, typed_string<CT>("amp"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('<'), 60, typed_string<CT>("lt"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('>'), 62, typed_string<CT>("gt"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>(' '), 160, typed_string<CT>("nbsp"), unaltered));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA1'), 161, typed_string<CT>("iexcl"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA4'), 164, typed_string<CT>("curren"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA2'), 162, typed_string<CT>("cent"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA3'), 163, typed_string<CT>("pound"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA5'), 165, typed_string<CT>("yen"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA6'), 166, typed_string<CT>("brvbar"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA7'), 167, typed_string<CT>("sect"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA8'), 168, typed_string<CT>("uml"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xA9'), 169, typed_string<CT>("copy"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAA'), 170, typed_string<CT>("ordf"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAB'), 171, typed_string<CT>("laquo"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAC'), 172, typed_string<CT>("not"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAE'), 174, typed_string<CT>("reg"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xAF'), 175, typed_string<CT>("macr"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB0'), 176, typed_string<CT>("deg"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB1'), 177, typed_string<CT>("plusmn"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB2'), 178, typed_string<CT>("sup2"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB3'), 179, typed_string<CT>("sup3"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB4'), 180, typed_string<CT>("acute"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB5'), 181, typed_string<CT>("micro"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB6'), 182, typed_string<CT>("para"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB7'), 183, typed_string<CT>("middot"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB8'), 184, typed_string<CT>("cedil"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xB9'), 185, typed_string<CT>("sup1"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBA'), 186, typed_string<CT>("ordm"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBB'), 187, typed_string<CT>("raquo"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBC'), 188, typed_string<CT>("frac14"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBD'), 189, typed_string<CT>("frac12"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBE'), 190, typed_string<CT>("frac34"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xBF'), 191, typed_string<CT>("iquest"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xD7'), 215, typed_string<CT>("times"), named_entity));
entity_vec.insert(xhtml_entity<CT>(typed_char<CT>('\xF7'), 247, typed_string<CT>("divide"), named_entity));
}