// released into the public domain
// by Christopher Diggins 2004
// http://www.cdiggins.com
#ifndef XML_RULES_HPP_INCLUDED
#define XML_RULES_HPP_INCLUDED
#include "re_ops.hpp"
#include "rules.hpp"
#include <string>
namespace xml_grammar
{
using namespace yard;
// ============================================================
// string representations
struct CDStart_string { static char const* GetString() { return "<![CDATA["; } };
struct CDEnd_string { static char const* GetString() { return "]]>"; } };
struct CDComment_begin_string { static char const* GetString() { return "<!--"; } };
struct CDComment_end_string { static char const* GetString() { return "-->"; } };
struct XMLDecl_begin_string { static char const* GetString() { return "<?xml"; } };
struct XMLDecl_end_string { static char const* GetString() { return "?>"; } };
struct doctypedecl_string { static char const* GetString() { return "<!DOCTYPE"; } };
struct PI_begin_string { static char const* GetString() { return "<?"; } };
struct PI_end_string { static char const* GetString() { return "?>"; } };
// ============================================================
// forward fxn decls
bool AcceptElement(ParserInputStream<char>& stream);
bool AcceptComment(ParserInputStream<char>& stream);
bool AcceptCDSect(ParserInputStream<char>& stream);
bool AcceptPI(ParserInputStream<char>& stream);
// ============================================================
// the grammar productions
struct Char : public
re_or<
re_or<
MatchChar<0x9>,
MatchChar<0xA>
>,
re_or<
MatchChar<0xD>,
MatchCharRange<0x20, 127>
>
>
{ };
struct S : public
re_plus<
re_or<
re_or<
MatchChar<0x20>,
MatchChar<0x9>
>,
re_or<
MatchChar<0xD>,
MatchChar<0xA>
>
>
>
{ };
struct NameChar : public
re_or<
MatchIdentOtherChar,
re_or3<
MatchChar<'-'>,
MatchChar<':'>,
MatchChar<'.'>
>
>
{ };
struct Name : public
re_and<
re_or<
MatchIdentFirstChar,
MatchChar<':'>
>,
re_star<NameChar>
>
{ };
struct Names : public
re_and<
Name,
re_star<
re_and<
MatchChar<0x20>,
Name
>
>
>
{ };
struct NMToken : public
re_plus<NameChar>
{ };
struct NMTokens : public
re_and<
NMToken,
re_star<
re_and<
MatchChar<0x20>,
NMToken
>
>
>
{ };
struct CDStart : public MatchString<CDStart_string>
{ };
struct CDEnd : public MatchString<CDEnd_string>
{ };
struct CDSect : public re_and<CDStart, re_until<CDEnd> >
{ };
struct AttValue : public
re_or<
re_and<
MatchChar<'"'>,
re_until<
MatchChar<'"'>
>
>,
re_and<
MatchChar<'\''>,
re_until<
MatchChar<'\''>
>
>
>
{ };
struct Eq : public
re_and3<
re_opt<S>,
MatchChar<'='>,
re_opt<S>
>
{ };
struct Attribute : public
re_and3<
Name,
Eq,
AttValue
>
{ };
struct Attributes : public
re_star<
re_and<
S,
Attribute
>
>
{ };
struct Comment : public
re_and<
MatchString<CDComment_begin_string>,
re_until<
MatchString<CDComment_end_string>
>
>
{ };
struct TagContents : public
re_and3<
Name,
Attributes,
re_opt<S>
>
{ };
struct STag : public
re_and3<
MatchChar<'<'>,
TagContents,
MatchChar<'>'>
>
{ };
struct ETag : public
re_and<
re_and3<
MatchChar<'<'>,
MatchChar<'/'>,
Name
>,
re_and<
re_opt<S>,
MatchChar<'>'>
>
>
{ };
struct EmptyElemTag : public
re_and<
re_and<
MatchChar<'<'>,
Name
>,
re_and3<
Attributes,
re_opt<S>,
re_and<
MatchChar<'/'>,
MatchChar<'>'>
>
>
>
{ };
struct CharData
{
static bool Accept(ParserInputStream<char>& stream) {
while (!stream.AtEnd() && (stream.GetElem() != '<')) {
stream.GotoNext();
}
return true;
}
};
struct content
{
static bool Accept(ParserInputStream<char>& stream) {
while (!stream.AtEnd()) {
CharData::Accept(stream);
if (!(AcceptElement(stream)
|| AcceptComment(stream)
|| AcceptCDSect(stream)
|| AcceptPI(stream)))
{
break;
}
}
return true;
}
};
struct NonEmptyElemTag : public
re_and3<
STag,
content,
ETag
>
{ };
struct element : public
re_or<
EmptyElemTag,
NonEmptyElemTag
>
{ };
struct XMLDecl : public
re_and<
MatchString<XMLDecl_begin_string>,
re_until<
MatchString<XMLDecl_end_string>
>
>
{ };
struct doctypedecl : public
re_and<
MatchString<doctypedecl_string>,
re_until<
MatchChar<'>'>
>
>
{ };
struct PI : public
re_and<
MatchString<PI_begin_string>,
re_until<
MatchString<PI_end_string>
>
>
{ };
struct Misc : public
re_or3<
Comment,
PI,
S
>
{ };
struct prolog : public
re_and3<
re_opt<XMLDecl>,
re_star<Misc>,
re_opt<
re_and<
doctypedecl,
re_star<Misc>
>
>
>
{ };
struct document : public
re_and3<
prolog,
element,
re_star<Misc>
>
{ };
// ============================================================
// function definitions
bool AcceptElement(ParserInputStream<char>& stream) {
return element::Accept(stream);
}
bool AcceptComment(ParserInputStream<char>& stream) {
return Comment::Accept(stream);
}
bool AcceptCDSect(ParserInputStream<char>& stream) {
return CDSect::Accept(stream);
}
bool AcceptPI(ParserInputStream<char>& stream) {
return PI::Accept(stream);
}
};
#endif // #ifndef XML_RULES_HPP_INCLUDED