Click here to Skip to main content
15,886,724 members
Articles / Programming Languages / XML

XMLLib for PUGXML with XPath

Rate me:
Please Sign up or sign in to vote.
4.33/5 (11 votes)
29 Oct 2009CPOL5 min read 126K   1.2K   38  
A library for PugXML which implements XPath
//**************************************************************************************************************************
//* Blue Xml Extension
//* Copyright (c) 2002-2003 Josh Harler
//*
//* Blue - General Purpose C++ Library
//* Copyright (c) 2002-2003 Josh Harler
//*
//* This software is provided 'as-is', without any express or implied warranty. In no event
//* will the authors be held liable for any damages arising from the use of this software.
//*
//* Permission is granted to anyone to use this software for any purpose, including commercial
//* applications, and to alter it and redistribute it freely, subject to the following restrictions:
//*
//*     1. The origin of this software must not be misrepresented; you must not claim that you
//*     wrote the original software. If you use this software in a product, an acknowledgment in the
//*     product documentation would be appreciated but is not required.
//*
//*     2. Altered source versions must be plainly marked as such, and must not be misrepresented as
//*     being the original software.
//*
//*     3. This notice may not be removed or altered from any source distribution.
//*
//* Modified by JCrane2 to support PugXML parser:
//* http://www.codeproject.com/soap/pugxml.asp
//* http://www.codeproject.com/soap/JCraneArticle.asp
//*
//* file   PugXpathExpression.h
//**

//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_PUGXPATHEXPRESSION_H__0005D4D5_A9EB_45EB_A638_71BC589ED18A__INCLUDED_)
#define AFX_PUGXPATHEXPRESSION_H__0005D4D5_A9EB_45EB_A638_71BC589ED18A__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#include "BString.h"
#include "Platform.h"
#include "Array.h"
#include "xml.h"

// Include the Pug XML library.
#include "pugxml.h"
#include <sstream>
#include <io.h>
#include <sys\stat.h>

using namespace std;
using namespace pug;


// Public Defines/Enums/Typedefs/Etc. ======================================================================================

// Public Classes/Structs ==================================================================================================

namespace blue
{
    namespace ext
    {
        namespace xml
        {
        /***
        * Forward Declarations
            */
            class PugXPathToken;
            class PugXPathFunction;
            class PugXPathPredicate;

            /**
            * \class PugXPathExpression
            * \brief Resolves an PugXPath expression.
            * \ingroup Xml
            *
            * PugXPath is for Xml files as SQL is for relational databases.
            * Essentially, XPath is a way to query the Xml file and return
            * nodes that match the condition of the XPath expression.
            *
            * You can read the official XPath specs here:
            *     http:// www.w3.org/TR/xpath
            *
            * An excellent XPath tutorial can be found here:
            *     http:// www.zvon.org/xxl/XPathTutorial/General/examples.html
            *
            */
            class PugXPathExpression
            {
            public:
                // ===========================================================
                //  creation/destruction
                // ===========================================================

                /** Constructor. */
                PugXPathExpression();
                /** Constructor. */
                PugXPathExpression(xml_node_struct* root);

                /** Destructor. */
                ~PugXPathExpression();

                // ===========================================================
                //  query
                // ===========================================================

                /**
                * Returns the node being used a the root node for all
                * expressions.
                */
                xml_node_struct* getRootNode();
                /** overload. */
                const xml_node_struct* getRootNode() const;

                /**
                * Returns a pointer to the requested function.
                */
                PugXPathFunction* getFunction(BString function);

                /** overload. */
                const PugXPathFunction* getFunction(BString function) const;


                /**
                * Returns the nodes that match the given expression.
                */
                Array<xml_node_struct*> findNodes(BString xpathExpr);
                /** overload. */
                const Array<xml_node_struct*> findNodes(BString xpathExpr) const;

                Array<xml_node_struct*> findNodes(const char* xpathExpr);
                /** overload. */
                const Array<xml_node_struct*> findNodes(const char* xpathExpr) const;

                /**
                * Returns the values of the nodes that match the given
                * expression.
                */
                Array<BString> findValues(BString xpathExpr) const;

                /**
                * Returns the first that matches the given expression.
                */
                xml_node_struct* findNode(BString xpathExpr);
                /** overload. */
                const xml_node_struct* findNode(BString xpathExpr) const;

                /**
                * Returns the value of the first node that matches the given
                * expression.
                */
                BString findValue(BString xpathExpr) const;

                // ===========================================================
                //  manipulation
                // ===========================================================

                /**
                * Sets the node to use as the root node for all expressions.
                */
                void setRootNode(xml_node_struct* root);

                /**
                * Adds a custom function that can be used in XPath expressions.
                */
                void addFunction(PugXPathFunction* function);
                /**
                * Removes a function from use within an XPath expression.
                */
                void removeFunction(BString function);

            private:
            /**
            * Copy constructor. Private because PugXPathExpressions should not be
            * manipulated by more than one instance.
                */
                PugXPathExpression(const PugXPathExpression&);
                /** Private assignment operator. See copy constructor documentation. */
                const PugXPathExpression& operator=(const PugXPathExpression&);

                struct xp_path_item
                {
                    BString m_axis;
                    BString m_node;
                    Array< Array<PugXPathToken> > m_predicates;
                };

                void extractInfo(BString xpathExpr, Array< Array<xp_path_item> >& info);

                Array<xml_node_struct*> processPass(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processPassAxis(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processRecursiveMatch(xml_node_struct* node, xp_path_item& item);

                Array<xml_node_struct*> processAxisChild(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisDescendant(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisParent(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisAncestor(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisFollowingSibling(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisPrecedingSibling(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisFollowing(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisPreceding(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisDescendantOrSelf(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisAncestorOrSelf(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisSelf(Array<xml_node_struct*> search, xp_path_item& item);
                Array<xml_node_struct*> processAxisAttribute(Array<xml_node_struct*> search, xp_path_item& item);

                Array<xml_node_struct*> processPredicates(Array<xml_node_struct*> search, xp_path_item& item);

                bool checkForMatch(xml_node_struct* node, xp_path_item& item);

                xml_node_struct* m_root;
                PugXPathPredicate* m_predicate;
    };

    /**
    * \class PugXPathToken
    * \ingroup Xml
    * \brief Represents a token in an XPath predicate.
    */
    class PugXPathToken
    {
    public:
        enum type_e
        {
            STRING,      //!< BString value
            NUMBER_INT,  //!< Integer numeric value
            NUMBER_DBL,  //!< Double floating point precision value
            BOOLEAN,     //!< Boolean value
            NODESET,     //!< Nodeset

            OPERATOR,    //!< An operator
            PAREN,       //!< A parenthesis
            FUNCTION,    //!< The name of a function
        };

        // ===========================================================
        //  creation/destruction
        // ===========================================================

        /** Constructor. */
        PugXPathToken();
        /** Constructor. */
        PugXPathToken(BString value, type_e type);
        /** Constructor. */
        PugXPathToken(BString value);
        /** Constructor. */
        PugXPathToken(bool value);
        /** Constructor. */
        PugXPathToken(int value);
        /** Constructor. */
        PugXPathToken(double value);

        // ===========================================================
        //  query
        // ===========================================================

        /** Returns the value of the token as a BString. */
        BString getValue(xml_node_struct* context) const;
        /** Returns the type of the value of this token. */
        type_e getType() const;

        /**
        * Returns the value of this token if it is a boolean.
        * If the value is not a boolean, an XPathException will be thrown.
        */
        BString getValueString() const;
        /**
        * Returns the value of this token if it is a boolean.
        * If the value is not a boolean, an XPathException will be thrown.
        */
        bool getValueBool() const;
        /**
        * Returns the value of this token if it is a number.
        * If the value is not a number, an XPathException will be thrown.
        */
        int getValueNumberInt() const;
        /**
        * Returns the value of this token if it is a number.
        * If the value is not a number, an XPathException will be thrown.
        */
        double getValueNumberDouble() const;
        /**
        * Returns the value of this token if it is a nodeset.
        * If the value is not a nodeset, an XPathException will be thrown.
        */
        Array<xml_node_struct*> getValueNodeSet(xml_node_struct* context) const;

        /**
        * Determines if the value type of this token is compatible with the
        * given value type.
        *
        * For example, a BOOLEAN is compatible with a STRING(true => "true").
        */
        bool isCompatibleWith(type_e type) const;

    private:
        BString m_value;
        type_e m_type;
    };

    /**
    * \class PugXPathFunction
    * \ingroup Xml
    * \brief A function that can be called in an XPath predicate.
    *
    * Custom PugXPath functions are created by deriving new classes
    * from this one and overloading the execute() function.
    */
    class PugXPathFunction
    {
    public:
        // ===========================================================
        //  creation/destruction
        // ===========================================================

        /** Destructor. */
        virtual ~PugXPathFunction()
        {}


        // ===========================================================
        //  query
        // ===========================================================

        /** Returns the name of the function. */
        BString getName() const;
        /** Returns the number of parameters the function expects. */
        int getParmCount() const;

        // ===========================================================
        //  manipulation
        // ===========================================================

        /**
        * Called to execute the function.  This must be overloaded by
        * all deriving classes.
        *
        * \param search - The nodes that match the path so far.
        * \param context - The node currently being processed
        * \param parms - The parameters being passed to the function.
        *        these should be validated using validateParm().
        */
        virtual PugXPathToken execute(Array<xml_node_struct*> search, xml_node_struct* context, Array<PugXPathToken> parms) = 0;


    protected:
    /**
    * Constructor.
    *
    * \param name - The name of the function.
    * \param parmCount - The number of parameters that must be
    *        passed to the function.  If this value is '-1' then
    *        there is no set number of parameters.
        */
        PugXPathFunction(BString name, int parmCount);

        /**
        * Called in the execute() function to validate the given parameters.
        */
        void validateParm(PugXPathToken& parm, PugXPathToken::type_e type);

    private:
    /**
    * Copy constructor. Private because PugXPathFunctions should not be
    * manipulated by more than one instance.
        */
        PugXPathFunction(const PugXPathFunction&);
        /** Private assignment operator. See copy constructor documentation. */
        const PugXPathFunction& operator=(const PugXPathFunction&);

        BString m_name;
        int    m_parmCount;
    };

    /**
    * \class PugXPathException
    * \brief Thrown when a syntax error is encountered in the PugXPath expression.
    * \ingroup XmlExceptions
    */
    class PugXPathException :public XmlException
    {
    public:
        PugXPathException(BString desc)
            :XmlException(desc) {}

        virtual BString getException() const
        {
            return ("PugXPathException");
        }
    };
}
}
}   // namespaces


// Public External Variables ===============================================================================================

// Public Function Prototypes ==============================================================================================

// Public Inline Functions =================================================================================================

#endif // !defined(AFX_PUGXPATHEXPRESSION_H__0005D4D5_A9EB_45EB_A638_71BC589ED18A__INCLUDED_)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions