Click here to Skip to main content
15,881,089 members
Articles / Desktop Programming / Win32

Xport: XHTML Parsing and Objective Reporting Toolkit

Rate me:
Please Sign up or sign in to vote.
4.73/5 (10 votes)
4 May 2008GPL313 min read 60K   682   32  
Open source C++ class template library for generating and parsing xhtml documents.
/************************************************************************
Xport: XHTML Parsing & Objective Reporting Toolkit
Copyright (C) 2007  Mitchel Haas

This file is part of Xport.

Xport is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Xport is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Xport.  If not, see <http://www.gnu.org/licenses/>.

For complete documentation on this library and alternative
licensing options, visit http://www.xportpro.com
Email questions, comments or suggestions to mitchel.haas@xportpro.com
************************************************************************/
#if defined(_MSC_VER) && _MSC_VER < 1300
  #pragma warning(disable : 4786)
#endif
#pragma once
#include "ct_support.h"
#include <string>
#include <set>
#include <algorithm>
#include <sstream>
#include <functional>
#include <stdlib.h>
#undef small


namespace Xport 
{
  enum xhtml_markup_type {mt_unknown, mt_doctype_declaration, mt_element, mt_comment, mt_processing_instruction, mt_pcdata};
  enum xhtml_nesting_type { block = 1, in_line = 2, pc_data = 4 };  // for allowed nesting type, in_line also implies pc_data
  enum tag_layout_style { inline_layout, newline_before_start_tag, newline_after_start_tag, block_level_start_tag, newline_before_end_tag, newline_after_end_tag = 8, block_level_end_tag = 12, block_level_layout = 15, pre_layout = 32 };

  enum xhtml_tag_enum 
  { 
    root,
    a,      abbr,   acronym,  address,  applet,   area,   b,      base,   basefont, bdo,    
    big,    blockquote, body,   br,     button,   caption,  center,   cite,   code,   col,    
    colgroup, dd,     del,    dfn,    dir,    div,    dl,     dt,     em,     fieldset,   
    font,   form,   frame,    frameset, h1,     h2,     h3,     h4,     h5,     h6,     
    head,   hr,     html,   i,      iframe,   img,    input,    ins,    isindex,  kbd,    
    label,    legend,   li,     link,   map,    menu,   meta,   noframes, noscript, object,   
    ol,     optgroup, option,   p,      param,    pre,    q,      s,      samp,   script,   
    select,   small,    span,   strike,   strong,   style,    sub,    sup,    table,    tbody,    
    td,     textarea, tfoot,    th,     thead,    title,    tr,     tt,     u,      ul,     
    var,    invalid_tag 
  };


  namespace attribute 
  {
    enum xhtml_attribute 
    {
      abbr, above, accept, accept_charset, accesskey, action, align, alink, alt, archive, autocomplete, axis,
      background, balance, behavior, below, bgcolor, bgproperties, border, border_color, bordercolordark, bordercolorlight, bottommargin, 
      cellpadding, cellspacing, challenge, char_attribute, charoff, charset, checked, cite, class_attribute, classid, clear, clip, code, codebase, 
      codetype, color, cols, colspan, compact, content, contenteditable, coords,
      data, datafld, dataformatas, datapagesize, datasrc, datetime, declare, defer, dir, direction, disabled, dynsrc,
      enctype, event, face, for_attribute, frame, frameborder, framespacing, galleryimg, gutter, 
      headers, height, hidden, href, hreflang, hspace, http_equiv,
      id, ismap, label, lang, language, left, left_margin, link, longdesc, loop, lowsrc, 
      marginheight, marginwidth, maxlength, mayscript, media, method, methods, multiple, name, nohref, noresize, noshade, nowrap, object,
      pagex, pagey, pluginspage, pluginurl, point_size, profile, prompt, rbspan, readonly, rel, rev, rightmargin, rows, rowspan, rules,
      scheme, scope, scroll, scrollamount, scrolldeley, scrolling, security, selected, shape, size, span, src, standby, start, style, summary, 
      tabindex, target, text, title, top, topmargin, truespeed, type, units, unselectable, urn,
      usemap, valign, value, valuetype, version, visibility, vlink, volume, vspace, weight, width, wrap, z_index,
      DOMActivate, DOMAttrModified, DOMCharacterDataModified, DOMFocusIn, DOMFocusOut, DOMNodeInserted, DOMNodeInsertedIntoDocument,
      DOMNodeRemoved, DOMNodeRemovedFromDocument, DOMSubtreeModified, onabort, onactivate, onafterprint, onafterupdate, onbeforeactivate,
      onbeforecopy, onbeforecut, onbeforedeactivate, onbeforeeditfocus, onbeforepaste, onbeforeprint, onbeforeunload, onbeforeupdate,
      onblur, onbounce, oncellchange, onchange, onclick, oncontextmenu, oncontrolselect, oncopy, oncut, ondataavailable, ondatasetchanged,
      ondatasetcomplete, ondblclick, ondeactivate, ondrag, ondragdrop, ondragend, ondragenter, ondragleave, ondragover, ondragstart, 
      ondrop, onerror, onerrorupdate, onfilterchange, onfinish, onfocus, onfocusin, onfocusout, onhelp, onkeydown,
      onkeypress, onkeyup, onlayoutcomplete, onload, onlosecapture, onmousedown, onmouseenter, onmouseleave, onmousemove,
      onmouseout, onmouseover, onmouseup, onmousewheel, onmove, onmoveend, onmovestart, onpaste, onpropertychange, onreadystatechange,
      onreset, onresize, onresizeend, onresizestart, onrowenter, onrowexit, onrowsdelete, onrowsinserted, onscroll, onselect, 
      onselectionchange, onselectstart, onstart, onsubmit, onunload, xmllang, xmlns, xmlspace, invalid_attribute = -1
    };
  
  


    const attribute::xhtml_attribute a_attributes[] =  // 31
    { accesskey, charset, class_attribute, coords, dir, href, hreflang, id, 
    lang, name, onblur, onclick, ondblclick, onfocus, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, rel, rev, shape, style, tabindex, target, title, 
    type, xmllang
    };

    const attribute::xhtml_attribute abbr_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute acronym_attributes[] = // 16
    {class_attribute, id, lang, onclick, ondblclick, onkeydown, onkeypress, onkeyup, 
    onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute address_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute applet_attributes[] = // 15
    {align, alt, archive, class_attribute, code, codebase, height, hspace, id, 
    name, object, style, title, vspace, width};

    const attribute::xhtml_attribute area_attributes[] = // 27
    {accesskey, alt, class_attribute, coords, dir, href, id, lang, nohref, 
    onblur, onclick, ondblclick, onfocus, onkeydown, onkeypress, onkeyup, 
    onmousedown, onmousemove, onmouseout, onmouseover, onmouseup,
    shape, style, tabindex, target, title, xmllang};

    const attribute::xhtml_attribute b_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute base_attributes[] = {href, target};  // 2

    const attribute::xhtml_attribute basefont_attributes[] = {color, face, id, size};  // 4

    const attribute::xhtml_attribute bdo_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute big_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute blockquote_attributes[] = // 18
    {cite, class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute body_attributes[] = // 25
    {alink, background, bgcolor, class_attribute, dir, id, lang, link, 
    onclick, ondblclick, onkeydown, onkeypress, onkeyup, onload, onmousedown,
    onmousemove, onmouseout, onmouseover, onmouseup, onunload, style, 
    text, title, vlink, xmllang};

    const attribute::xhtml_attribute br_attributes[] = // 5
    {class_attribute, clear, id, style, title}; 

    const attribute::xhtml_attribute button_attributes[] = // 25
    {accesskey, class_attribute, dir, disabled, 
    id, lang, name, onblur, onclick, ondblclick, onfocus, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, tabindex, title, type, value, xmllang};

    const attribute::xhtml_attribute caption_attributes[] = // 17
    {class_attribute,  dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute center_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup,
    style, title, xmllang};

    const attribute::xhtml_attribute cite_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute code_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute col_attributes[] = // 20
    {align, char_attribute, charoff, class_attribute, dir, id, lang, onclick, 
    ondblclick, onkeydown, onmousemove, onmouseout, onmouseover, 
    onmouseup, span, style, title, valign, width, xmllang};

    const attribute::xhtml_attribute colgroup_attributes[] = // 23
    {align, char_attribute, charoff, class_attribute, dir, id, lang, onclick, 
    ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, span, style, title, 
    valign, width, xmllang};

    const attribute::xhtml_attribute dd_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute del_attributes[] = // 19
    {cite, class_attribute, datetime, dir, id, lang, onclick, ondblclick, 
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute dfn_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute dir_attributes[] = // 18
    {class_attribute, compact, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute div_attributes[] = // 18
    {align, class_attribute, dir, id, lang, 
    onclick, ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove,
    onmouseout, onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute dl_attributes[] = // 18
    {class_attribute, compact, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout,
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute dt_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout,
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute em_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout,
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute fieldset_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute font_attributes[] = // 10
    {class_attribute, color, dir, face, id, lang, size, style, 
    title, xmllang};

    const attribute::xhtml_attribute form_attributes[] = // 26
    {accept, accept_charset, action, class_attribute, dir, enctype, id, lang, 
    method, name, onclick, ondblclick, onkeydown, onkeypress,
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    onreset, onsubmit, style, target, title, xmllang};

    const attribute::xhtml_attribute frame_attributes[] = // 12
    {class_attribute, frameborder, id, longdesc, marginheight, marginwidth, name, 
    noresize, scrolling, src, style, title};

    const attribute::xhtml_attribute frameset_attributes[] = // 8
    {class_attribute, cols, id, onload, onunload, rows, style, title};

    const attribute::xhtml_attribute h_attributes[] = // 18
    {align, class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover,
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute head_attributes[] = // 4
    {dir, lang, profile, xmllang};

    const attribute::xhtml_attribute hr_attributes[] = // 21
    {align, class_attribute, dir, id, lang, noshade, onclick, ondblclick, 
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, size, style, title, width, xmllang};

    const attribute::xhtml_attribute html_attributes[] = // 4
    {dir, lang, xmllang, xmlns};

    const attribute::xhtml_attribute i_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup,
    style, title, xmllang};

    const attribute::xhtml_attribute iframe_attributes[] = // 14
    {align, class_attribute, frameborder, height, id, longdesc, marginheight, 
    marginwidth, name, scrolling,  src, style, title, width};

    const attribute::xhtml_attribute img_attributes[] = // 29
    {align, alt, border, class_attribute, dir, height, hspace, id, ismap, lang, 
    longdesc, name, onclick, ondblclick, onkeydown, onkeypress,
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    src, style, title, usemap, vspace, width, xmllang};

    const attribute::xhtml_attribute input_attributes[] = // 37
    {accept, accesskey, align, alt, checked, class_attribute, 
    dir, disabled, id, ismap, lang, maxlength, name,
    onblur, onchange, onclick, ondblclick, onfocus, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    onselect, readonly, size, src, style, tabindex, title, type, 
    usemap, value, xmllang};

    const attribute::xhtml_attribute ins_attributes[] = // 19
    {cite, class_attribute, datetime, dir, id, lang, onclick, ondblclick, 
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute isindex_attributes[] = // 8
    {class_attribute, dir, id, lang, prompt, style, title, xmllang};

    const attribute::xhtml_attribute kbd_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute label_attributes[] = // 21
    {accesskey, class_attribute, dir, for_attribute, id, lang, onblur, onclick, 
    ondblclick, onfocus, onkeydown, onkeypress, onkeyup, onmousedown, 
    onmousemove, onmouseout, onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute legend_attributes[] = // 19
    {accesskey, align, class_attribute, dir, id, lang, onclick, ondblclick, 
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute li_attributes[] = // 19
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, type, value, xmllang};

    const attribute::xhtml_attribute link_attributes[] = // 25
    {charset, class_attribute, dir, href, hreflang, id, lang, media, onclick, 
    ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, rel, rev, style, target, 
    title, type, xmllang};

    const attribute::xhtml_attribute map_attributes[] = // 18
    {class_attribute, dir, id, lang, name, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute menu_attributes[] = // 18
    {class_attribute, compact, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute meta_attributes[] = // 7
    {content, dir, http_equiv, lang, name, scheme, xmllang};

    const attribute::xhtml_attribute noframes_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute noscript_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute object_attributes[] = // 34
    {align, archive, border, class_attribute, classid, codebase, codetype, data, 
    declare, dir, height, hspace, id, 
    lang, name, onclick, ondblclick, onkeydown, onkeypress, onkeyup, 
    onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, standby, 
    style, tabindex, title, type, usemap, vspace, width, xmllang};

    const attribute::xhtml_attribute ol_attributes[] = // 20
    {class_attribute, compact, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, start, style, title, type, xmllang};

    const attribute::xhtml_attribute optgroup_attributes[] = // 21
    {class_attribute, dir, disabled, id, label, lang, onclick, ondblclick, 
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout,
    onmouseover, onmouseup, selected, style, title, value, xmllang};

    const attribute::xhtml_attribute option_attributes[] = // 21
    {class_attribute, dir, disabled, id, label, lang, onclick, ondblclick, 
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, selected, style, title, value, xmllang};

    const attribute::xhtml_attribute p_attributes[] = // 18
    {align, class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute param_attributes[] = // 5
    {id, name, type, value, valuetype};

    const attribute::xhtml_attribute pre_attributes[] = // 19
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, width, xmllang, xmlspace};

    const attribute::xhtml_attribute q_attributes[] = // 18
    {cite, class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute s_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute samp_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute script_attributes[] = // 6
    {charset, for_attribute, language, src, type, xmlspace};

    const attribute::xhtml_attribute select_attributes[] = // 28
    {class_attribute, datafld, dataformatas, datasrc, dir, disabled, id, lang, 
    multiple, name, onblur, onchange, onclick, ondblclick, onfocus, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, size, style, tabindex, title, xmllang};

    const attribute::xhtml_attribute small_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute span_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, 
    ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute strike_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute strong_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute style_attributes[] = // 7
    {dir, lang, media, title, type, xmllang, xmlspace};

    const attribute::xhtml_attribute sub_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute sup_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute table_attributes[] = // 26
    {align, bgcolor, border, cellpadding, cellspacing, class_attribute, 
    dir, frame, id, lang, 
    onclick, ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, 
    onmousemove, onmouseout, onmouseover, onmouseup, rules, style, 
    summary, title, width, xmllang};

    const attribute::xhtml_attribute tbody_attributes[] = // 21
    {align, char_attribute, charoff, class_attribute, dir, id, lang, onclick, 
    ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, style, title, valign, 
    xmllang};

    const attribute::xhtml_attribute td_attributes[] = // 31
    {abbr, align, axis, bgcolor, char_attribute, charoff, class_attribute, colspan, 
    dir, headers, height, id, lang, nowrap, onclick, ondblclick,
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, rowspan, scope, style, 
    title, valign, width, xmllang};

    const attribute::xhtml_attribute textarea_attributes[] = // 28
    {accesskey, class_attribute, cols, dir, 
    disabled, id, lang, name, onblur, onchange, onclick, ondblclick, 
    onfocus, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, onselect, readonly, rows, 
    style, tabindex, title, xmllang};

    const attribute::xhtml_attribute tfoot_attributes[] = // 21
    {align, char_attribute, charoff, class_attribute, dir, id, lang, onclick, 
    ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, style, title, valign, xmllang};

    const attribute::xhtml_attribute th_attributes[] = // 31
    {abbr, align, axis, bgcolor, char_attribute, charoff, class_attribute, colspan, 
    dir, headers, height, id, lang, nowrap, onclick, ondblclick, 
    onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, rowspan, scope, style, title, valign, 
    width, xmllang};

    const attribute::xhtml_attribute thead_attributes[] = // 21
    {align, char_attribute, charoff, class_attribute, dir, id, lang, onclick, 
    ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, onmousemove, 
    onmouseout, onmouseover, onmouseup, style, title, valign, xmllang};

    const attribute::xhtml_attribute title_attributes[] = // 3
    {dir, lang, xmllang};

    const attribute::xhtml_attribute tr_attributes[] = // 22
    {align, bgcolor, char_attribute, charoff, class_attribute, dir, id, lang, 
    onclick, ondblclick, onkeydown, onkeypress, onkeyup, onmousedown, 
    onmousemove, onmouseout, onmouseover, onmouseup, style, title, 
    valign, xmllang};

    const attribute::xhtml_attribute tt_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, onkeypress, 
    onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, 
    style, title, xmllang};

    const attribute::xhtml_attribute u_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, xmllang};

    const attribute::xhtml_attribute ul_attributes[] = // 19
    {class_attribute, compact, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, onmouseover, 
    onmouseup, style, title, type, xmllang};

    const attribute::xhtml_attribute var_attributes[] = // 17
    {class_attribute, dir, id, lang, onclick, ondblclick, onkeydown, 
    onkeypress, onkeyup, onmousedown, onmousemove, onmouseout, 
    onmouseover, onmouseup, style, title, xmllang};

  }

  namespace css 
  {
    enum css_property 
    { background, background_attachment, background_color, background_image, background_position,
    background_repeat, border, border_bottom, border_bottom_color, border_bottom_style,
    border_bottom_width, border_collapse, border_color, border_left, border_left_color,
    border_left_style, border_left_width, border_right, border_right_color, border_right_style,
    border_right_width, border_spacing, border_style, border_top, border_type_color,
    border_top_style, border_top_width, border_width, bottom, caption_side, clear, clip, color, 
    content, counter_increment, counter_reset, cursor, direction, display, empty_cells, float_css, 
    font, font_family, font_size, font_size_adjust, font_stretch, font_style, font_variant, font_weight, 
    height, left, letter_spacing, line_height, list_style, list_style_image, list_style_position, 
    list_style_type, margin, margin_bottom, margin_left, margin_right, margin_top, marker_offset, 
    max_height, max_width, min_height, min_width, outline, outline_color, outline_style, outline_width, 
    overflow, padding, padding_bottom, padding_left, padding_right, padding_top, position, quotes, 
    right, table_layout, text_align, text_decoration, text_indent, text_shadow, text_transform,
    top, unicode_bidi, vertical_align, visibility, white_space, width, word_spacing, z_index,
    marks, orphans, page, page_break_after, page_break_before, page_break_inside, size, widows, invalid_css_property = -1
    };

  }


  inline std::set<attribute::xhtml_attribute> populate_attribs(const attribute::xhtml_attribute* const pAttr, const int count)
  {
    std::set<attribute::xhtml_attribute> attrSet;
    for (int i = 0; i < count; ++i)
      attrSet.insert(pAttr[i]);

    return attrSet;
  }

  template<typename DT>
    std::set<DT> populate_nested_tags(const DT* const pTags, const int count)
  {
    std::set<DT> tags;
    for (int i = 0; i < count; ++i)
      tags.insert(pTags[i]);

    return tags;
  }

  const wchar_t tag_names[92][11] =   
  {
    L"doc root",
    L"a",
    L"abbr",
    L"acronym",
    L"address",
    L"applet",
    L"area",
    L"b",
    L"base",
    L"basefont",
    L"bdo",
    L"big",
    L"blockquote",
    L"body",
    L"br",
    L"button",
    L"caption",
    L"center",
    L"cite",
    L"code",
    L"col",
    L"colgroup",
    L"dd",
    L"del",
    L"dfn",
    L"dir",
    L"div",
    L"dl",
    L"dt",
    L"em",
    L"fieldset",
    L"font",
    L"form",
    L"frame",
    L"frameset",
    L"h1",
    L"h2",
    L"h3",
    L"h4",
    L"h5",
    L"h6",
    L"head",
    L"hr",
    L"html",
    L"i",
    L"iframe",
    L"img",
    L"input",
    L"ins",
    L"isindex",
    L"kbd",
    L"label",
    L"legend",
    L"li",
    L"link",
    L"map",
    L"menu",
    L"meta",
    L"noframes",
    L"noscript",
    L"object",
    L"ol",
    L"optgroup",
    L"option",
    L"p",
    L"param",
    L"pre",
    L"q",
    L"s",
    L"samp",
    L"script",
    L"select",
    L"small",
    L"span",
    L"strike",
    L"strong",
    L"style",
    L"sub",
    L"sup",
    L"table",
    L"tbody",
    L"td",
    L"textarea",
    L"tfoot",
    L"th",
    L"thead",
    L"title",
    L"tr",
    L"tt",
    L"u",
    L"ul",
    L"var"
  };


  const char attribute_name[][28] =
  {
    "abbr",
      "above",
      "accept",
      "accept-charset",
      "accesskey",
      "action",
      "align",
      "alink",
      "alt",
      "archive",
      "autocomplete",
      "axis",
      "background",
      "balance",
      "behavior",
      "below",
      "bgcolor",
      "bgproperties",
      "border",
      "border-color",
      "bordercolordark",
      "bordercolorlight",
      "bottommargin",
      "cellpadding",
      "cellspacing",
      "challenge",
      "char-attr",
      "charoff",
      "charset",
      "checked",
      "cite",
      "class",
      "classid",
      "clear",
      "clip",
      "code",
      "codebase",
      "codetype",
      "color",
      "cols",
      "colspan",
      "compact",
      "content",
      "contenteditable",
      "coords",
      "data",
      "datafld",
      "dataformatas",
      "datapagesize",
      "datasrc",
      "datetime",
      "declare",
      "defer",
      "dir",
      "direction",
      "disabled",
      "dynsrc",
      "enctype",
      "event",
      "face",
      "for",
      "frame",
      "frameborder",
      "framespacing",
      "galleryimg",
      "gutter",
      "headers",
      "height",
      "hidden",
      "href",
      "hreflang",
      "hspace",
      "http-equiv",
      "id",
      "ismap",
      "labe",
      "lang",
      "language",
      "left",
      "left-margin",
      "link",
      "longdesc",
      "loop",
      "lowsrc",
      "marginheight",
      "marginwidth",
      "maxlength",
      "mayscript",
      "media",
      "method",
      "methods",
      "multiple",
      "name",
      "nohref",
      "noresize",
      "noshade",
      "nowrap",
      "object",
      "pagex",
      "pagey",
      "pluginspage",
      "pluginurl",
      "point-size",
      "profile",
      "prompt",
      "rbspan",
      "readonly", 
      "rel",
      "rev",
      "rightmargin",
      "rows",
      "rowspan",
      "rules",
      "scheme",
      "scope",
      "scroll",
      "scrollamount",
      "scrolldelay",
      "scrolling",
      "security",
      "selected",
      "shape",
      "size",
      "span",
      "src",
      "standby",
      "start",
      "style",
      "summary",
      "tabindex",
      "target",
      "text",
      "title",
      "top",
      "topmargin",
      "truespeed",
      "type",
      "units",
      "unselectable",
      "urn",
      "usemap",
      "valign",
      "value",
      "valuetype",
      "version",
      "visibility",
      "vlink",
      "volume",
      "vspace",
      "weight",
      "width",
      "wrap",
      "z-index",
      "DOMActivate",
      "DOMAttrModified",
      "DOMCharacterDataModified",
      "DOMFocusIn",
      "DOMFocusOut",
      "DOMNodeInserted",
      "DOMNodeInsertedIntoDocument",
      "DOMNodeRemoved",
      "DOMNodeRemovedFromDocument",
      "DOMSubtreeModified",
      "onabort",
      "onactivate",
      "onafterprint",
      "onafterupdate",
      "onbeforeactivate",
      "onbeforecopy",
      "onbeforecut",
      "onbeforedeactivate",
      "onbeforeeditfocus",
      "onbeforepaste",
      "onbeforeprint",
      "onbeforeunload",
      "onbeforeupdate",
      "onblur",
      "onbounce",
      "oncellchange",
      "onchange",
      "onclick",
      "oncontextmenu",
      "oncontrolselect",
      "oncopy",
      "oncut",
      "ondataavailable",
      "ondatasetchanged",
      "ondatasetcomplete",
      "ondblclick",
      "ondeactivate",
      "ondrag",
      "ondragdrop",
      "ondragend",
      "ondragenter",
      "ondragleave",
      "ondragover",
      "ondragstart",
      "ondrop",
      "onerror",
      "onerrorupdate",
      "onfilterchange",
      "onfinish",
      "onfocus",
      "onfocusin",
      "onfocusout",
      "onhelp",
      "onkeydown",
      "onkeypress",
      "onkeyup",
      "onlayoutcomplete",
      "onload",
      "onlosecapture",
      "onmousedown",
      "onmouseenter",
      "onmouseleave",
      "onmousemove",
      "onmouseout",
      "onmouseover",
      "onmouseup",
      "onmousewheel",
      "onmove",
      "onmoveend",
      "onmovestart",
      "onpaste",
      "onpropertychange",
      "onreadystatechange",
      "onreset",
      "onresize",
      "onresizeend",
      "onresizestart",
      "onrowenter",
      "onrowexit",
      "onrowsdelete",
      "onrowsinserted",
      "onscroll",
      "onselect",
      "onselectionchange",
      "onselectstart",
      "onstart",
      "onsubmit",
      "onunload",
      "xml:lang",
      "xmlns",
      "xmlspace"
  };


  const wchar_t wattribute_name[][28] =
  {
      L"abbr",
      L"above",
      L"accept",
      L"accept-charset",
      L"accesskey",
      L"action",
      L"align",
      L"alink",
      L"alt",
      L"archive",
      L"autocomplete",
      L"axis",
      L"background",
      L"balance",
      L"behavior",
      L"below",
      L"bgcolor",
      L"bgproperties",
      L"border",
      L"border-color",
      L"bordercolordark",
      L"bordercolorlight",
      L"bottommargin",
      L"cellpadding",
      L"cellspacing",
      L"challenge",
      L"char-attr",
      L"charoff",
      L"charset",
      L"checked",
      L"cite",
      L"class",
      L"classid",
      L"clear",
      L"clip",
      L"code",
      L"codebase",
      L"codetype",
      L"color",
      L"cols",
      L"colspan",
      L"compact",
      L"content",
      L"contenteditable",
      L"coords",
      L"data",
      L"datafld",
      L"dataformatas",
      L"datapagesize",
      L"datasrc",
      L"datetime",
      L"declare",
      L"defer",
      L"dir",
      L"direction",
      L"disabled",
      L"dynsrc",
      L"enctype",
      L"event",
      L"face",
      L"for",
      L"frame",
      L"frameborder",
      L"framespacing",
      L"galleryimg",
      L"gutter",
      L"headers",
      L"height",
      L"hidden",
      L"href",
      L"hreflang",
      L"hspace",
      L"http-equiv",
      L"id",
      L"ismap",
      L"label",
      L"lang",
      L"language",
      L"left",
      L"left-margin",
      L"link",
      L"longdesc",
      L"loop",
      L"lowsrc",
      L"marginheight",
      L"marginwidth",
      L"maxlength",
      L"mayscript",
      L"media",
      L"method",
      L"methods",
      L"multiple",
      L"name",
      L"nohref",
      L"noresize",
      L"noshade",
      L"nowrap",
      L"object",
      L"pagex",
      L"pagey",
      L"pluginspage",
      L"pluginurl",
      L"point-size",
      L"profile",
      L"prompt",
      L"rbspan",
      L"readonly", 
      L"rel",
      L"rev",
      L"rightmargin",
      L"rows",
      L"rowspan",
      L"rules",
      L"scheme",
      L"scope",
      L"scroll",
      L"scrollamount",
      L"scrolldelay",
      L"scrolling",
      L"security",
      L"selected",
      L"shape",
      L"size",
      L"span",
      L"src",
      L"standby",
      L"start",
      L"style",
      L"summary",
      L"tabindex",
      L"target",
      L"text",
      L"title",
      L"top",
      L"topmargin",
      L"truespeed",
      L"type",
      L"units",
      L"unselectable",
      L"urn",
      L"usemap",
      L"valign",
      L"value",
      L"valuetype",
      L"version",
      L"visibility",
      L"vlink",
      L"volume",
      L"vspace",
      L"weight",
      L"width",
      L"wrap",
      L"z-index",
      L"DOMActivate",
      L"DOMAttrModified",
      L"DOMCharacterDataModified",
      L"DOMFocusIn",
      L"DOMFocusOut",
      L"DOMNodeInserted",
      L"DOMNodeInsertedIntoDocument",
      L"DOMNodeRemoved",
      L"DOMNodeRemovedFromDocument",
      L"DOMSubtreeModified",
      L"onabort",
      L"onactivate",
      L"onafterprint",
      L"onafterupdate",
      L"onbeforeactivate",
      L"onbeforecopy",
      L"onbeforecut",
      L"onbeforedeactivate",
      L"onbeforeeditfocus",
      L"onbeforepaste",
      L"onbeforeprint",
      L"onbeforeunload",
      L"onbeforeupdate",
      L"onblur",
      L"onbounce",
      L"oncellchange",
      L"onchange",
      L"onclick",
      L"oncontextmenu",
      L"oncontrolselect",
      L"oncopy",
      L"oncut",
      L"ondataavailable",
      L"ondatasetchanged",
      L"ondatasetcomplete",
      L"ondblclick",
      L"ondeactivate",
      L"ondrag",
      L"ondragdrop",
      L"ondragend",
      L"ondragenter",
      L"ondragleave",
      L"ondragover",
      L"ondragstart",
      L"ondrop",
      L"onerror",
      L"onerrorupdate",
      L"onfilterchange",
      L"onfinish",
      L"onfocus",
      L"onfocusin",
      L"onfocusout",
      L"onhelp",
      L"onkeydown",
      L"onkeypress",
      L"onkeyup",
      L"onlayoutcomplete",
      L"onload",
      L"onlosecapture",
      L"onmousedown",
      L"onmouseenter",
      L"onmouseleave",
      L"onmousemove",
      L"onmouseout",
      L"onmouseover",
      L"onmouseup",
      L"onmousewheel",
      L"onmove",
      L"onmoveend",
      L"onmovestart",
      L"onpaste",
      L"onpropertychange",
      L"onreadystatechange",
      L"onreset",
      L"onresize",
      L"onresizeend",
      L"onresizestart",
      L"onrowenter",
      L"onrowexit",
      L"onrowsdelete",
      L"onrowsinserted",
      L"onscroll",
      L"onselect",
      L"onselectionchange",
      L"onselectstart",
      L"onstart",
      L"onsubmit",
      L"onunload",
      L"xml:lang",
      L"xmlns",
      L"xmlspaceL"
  };

  template<typename DT> struct tag_info;


  inline void get_attribute_name(const attribute::xhtml_attribute _attribute, std::basic_string<char>& name) { name = attribute_name[_attribute]; }
  inline void get_attribute_name(const attribute::xhtml_attribute _attribute, std::basic_string<wchar_t>& name) { name = wattribute_name[_attribute]; }
  inline attribute::xhtml_attribute get_attribute_enum(const std::basic_string<char>& attr_name);
  inline attribute::xhtml_attribute get_attribute_enum(const std::basic_string<wchar_t>& attr_name);
  inline xhtml_tag_enum get_tag_type_from_name(const std::basic_string<char>& tag_name);
  inline xhtml_tag_enum get_tag_type_from_name(const std::basic_string<wchar_t>& tag_name);
  inline void left_trim(std::basic_string<char>& data);
  inline void left_trim(std::basic_string<wchar_t>& data);
  inline void right_trim(std::basic_string<char>& data);
  inline void right_trim(std::basic_string<wchar_t>& data);
  inline void toLower(std::basic_string<char>& str);
  inline void toLower(std::basic_string<wchar_t>& str);
}

inline Xport::attribute::xhtml_attribute Xport::get_attribute_enum(const std::basic_string<char>& attr_name) 
{
  std::basic_string<char> attr(attr_name);
  toLower(attr);
  std::ostringstream oattr;
  oattr << attr.c_str();

  const size_t array_size = sizeof(attribute_name) / sizeof(*attribute_name); 
  const char(* const pName)[28] = std::find(attribute_name, attribute_name + array_size, oattr.str());

  if (pName != attribute_name + array_size) {
    return static_cast<attribute::xhtml_attribute>(pName - attribute_name);
  }

  return attribute::invalid_attribute;
}


inline Xport::attribute::xhtml_attribute Xport::get_attribute_enum(const std::basic_string<wchar_t>& attr_name) 
{
  std::basic_string<wchar_t> attr(attr_name);
  toLower(attr);
  std::wostringstream oattr;
  oattr << attr.c_str();

  const size_t array_size = sizeof(wattribute_name) / sizeof(*wattribute_name); 
  const wchar_t(* const pName)[28] = std::find(wattribute_name, wattribute_name + array_size, oattr.str());

  if (pName != wattribute_name + array_size) {
    return static_cast<attribute::xhtml_attribute>(pName - wattribute_name);
  }

  return attribute::invalid_attribute;
}

inline Xport::xhtml_tag_enum Xport::get_tag_type_from_name(const std::basic_string<char>& tag_name)
{
  return get_tag_type_from_name(typed_string<wchar_t>(tag_name));
}


inline Xport::xhtml_tag_enum Xport::get_tag_type_from_name(const std::basic_string<wchar_t>& tag_name)
{
  std::basic_string<wchar_t> tg(tag_name);
  toLower(tg);
  std::wostringstream otag;
  otag << tg.c_str();

  const size_t array_size = sizeof(tag_names) / sizeof(*tag_names); 
  const wchar_t(* const pName)[11] = std::find(tag_names, tag_names + array_size, otag.str());

  if (pName != tag_names + array_size) {
    return static_cast<xhtml_tag_enum>(pName - tag_names);
  }

  return invalid_tag;
}

inline void Xport::left_trim(std::basic_string<char>& data) 
{
  std::locale loc;
  std::basic_string<char>::iterator it = data.begin();
  while (it != data.end() && std::isspace(*it, loc))
    ++it;

  data.erase(data.begin(), it);
}

inline void Xport::left_trim(std::basic_string<wchar_t>& data) 
{
  std::locale loc;
  std::basic_string<wchar_t>::iterator it = data.begin();
  while (it != data.end() && std::isspace(*it, loc))
    ++it;

  data.erase(data.begin(), it);
}



inline void Xport::right_trim(std::basic_string<char>& data) 
{
  std::locale loc;
  std::basic_string<char>::reverse_iterator rit = data.rbegin();
  while (rit != data.rend() && std::isspace(*rit, loc))
    ++rit;

  data.erase(rit.base(), data.end()); 
}

inline void Xport::right_trim(std::basic_string<wchar_t>& data) 
{
  std::locale loc;
  std::basic_string<wchar_t>::reverse_iterator rit = data.rbegin();
  while (rit != data.rend() && std::isspace(*rit, loc))
    ++rit;

  data.erase(rit.base(), data.end()); 
}


inline void Xport::toLower(std::basic_string<char>& str) 
{ 
  std::basic_string<char>::iterator it = str.begin();
  const std::basic_string<char>::iterator it_end = str.end();
  std::locale loc;
  for (; it != it_end; ++it) {
    *it = std::tolower(*it, loc);
  }
} 

inline void Xport::toLower(std::basic_string<wchar_t>& str) 
{ 
  std::basic_string<wchar_t>::iterator it = str.begin();
  const std::basic_string<wchar_t>::iterator it_end = str.end();
  std::locale loc;
  for (; it != it_end; ++it) {
    *it = towlower(*it);
  }
} 


/************************************************************************/
/* tag_info                                                             */
/************************************************************************/
template<typename DT>
struct Xport::tag_info
{
  void clear() 
  {
    tag_name = "";
    tag_enum = invalid_tag;
    attr_set.clear();
    allowed_nested_tags.clear();
    nested_tag_exclusions.clear();
  }

  xhtml_tag_enum tag_enum;
  std::string tag_name;
  xhtml_nesting_type nt;
  xhtml_nesting_type allowed_nt;
  std::set<attribute::xhtml_attribute> attr_set;
  std::set<xhtml_tag_enum> allowed_nested_tags;
  std::set<xhtml_tag_enum> nested_tag_exclusions;
};

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)



Comments and Discussions