Click here to Skip to main content
15,887,083 members
Articles / Programming Languages / XML

XMLLib for PUGXML with XPath

Rate me:
Please Sign up or sign in to vote.
4.33/5 (11 votes)
29 Oct 2009CPOL5 min read 126.1K   1.2K   38  
A library for PugXML which implements XPath
//**************************************************************************************************************************
//* Blue Xml Extension
//* Copyright (c) 2002-2003 Josh Harler
//*
//* Blue - General Purpose C++ Library
//* Copyright (c) 2002-2003 Josh Harler
//*
//* This software is provided 'as-is', without any express or implied warranty. In no event
//* will the authors be held liable for any damages arising from the use of this software.
//*
//* Permission is granted to anyone to use this software for any purpose, including commercial
//* applications, and to alter it and redistribute it freely, subject to the following restrictions:
//*
//*     1. The origin of this software must not be misrepresented; you must not claim that you
//*     wrote the original software. If you use this software in a product, an acknowledgment in the
//*     product documentation would be appreciated but is not required.
//*
//*     2. Altered source versions must be plainly marked as such, and must not be misrepresented as
//*     being the original software.
//*
//*     3. This notice may not be removed or altered from any source distribution.
//*
//* Modified by JCrane2 to support PugXML parser:
//* http://www.codeproject.com/soap/pugxml.asp
//* http://www.codeproject.com/soap/JCraneArticle.asp
//*
//* file   PugXPathPredicate.cpp
//**

//
//////////////////////////////////////////////////////////////////////

#define _CRT_SECURE_NO_WARNINGS 1


// Blue library headers
#include "StringTokenizer.h"

// matching header
#include "PugXPathPredicate.h"

// Extension headers
#include "PugXPathFunctions.h"

// Private Defines/Enums/Typedefs/Etc ======================================================================================

using namespace blue;
using namespace blue::common;
using namespace blue::util;
using namespace blue::ext;
using namespace blue::ext::xml;


namespace
{
    inline bool priv_isOperator(BString op)
    {
        return (
            op == "="   || op == "!="   || op == "<"   ||
            op == "<="  || op == ">"    || op == ">="  ||
            op == "+"   || op == "-"    || op == "*"   ||
            op == "div" || op == "mult" || op == "mod" ||
            op == "or"  || op == "and"  || op == ","
            );
    }


    void priv_parseCheckMinus(Array<BString>& tokens)
    {
        int iT = 1;
        while (iT < tokens.getSize() - 1)
        {
            if (tokens[iT] == "-")
            {
                if (tokens[iT - 1] != " ")
                {
                    tokens[iT - 1] += tokens[iT];

                    if (tokens[iT + 1].trim().getLength() > 0)
                    {
                        tokens[iT - 1] += tokens[iT + 1];
                        tokens.remove(iT + 1);
                    }
                    tokens.remove(iT);
                    continue;
                }
            }
            ++iT;
        }
    }


    void priv_parseRemoveWhitespace(Array<BString>& tokens)
    {
        int iT = 0;
        while (iT < tokens.getSize())
        {
            if (tokens[iT].trim().getLength() == 0)
            {
                tokens.remove(iT);
                continue;
            }
            ++iT;
        }
    }


    void priv_parseConcatOperators(Array<BString>& tokens)
    {
        int iT = 0;
        while (iT < tokens.getSize())
        {
            if (tokens[iT].getLength() == 1)
            {
                switch (tokens[iT][0])
                {
                    case '<': case '>':
                    case '!':
                        if (iT < tokens.getSize() - 1 && tokens[iT + 1] == "=")
                        {
                            tokens[iT] += "=";
                            tokens.remove(iT + 1);
                        }
                        break;

                    case '*':
                        if (iT > 0)
                        {
                            BString token = tokens[iT - 1];
                            if (
                                !priv_isOperator(token) && !token.isValidDouble() && token != "(")
                            {
                                tokens[iT - 1] += "*";
                                tokens.remove(iT);
                                continue;
                            }
                        }
                        break;
                }
            }
            ++iT;
        }
    }


    void priv_parseConcatNonOperators(Array<BString>& tokens)
    {
        int iT = 1;
        bool lastOpParen = false;
        while (iT < tokens.getSize() - 1)
        {
            bool op = priv_isOperator(tokens[iT]);
            bool paren = (op ? false :(tokens[iT] == "(" || tokens[iT] == ")"));

            if (op || paren)
            {
                lastOpParen = true;
            }
            else
            {
                if (!lastOpParen)
                {
                    tokens[iT - 1] += tokens[iT];
                    tokens.remove(iT);
                    continue;
                }
                lastOpParen = false;
            }
            ++iT;
        }
    }

    void priv_checkForNotFunctions(Array<BString>& tokens, PugXPathPredicate& predicate)
    {
        int iT = 1;
        while (iT < tokens.getSize() - 2)
        {
            if (tokens[iT] == "(" && tokens[iT + 1] == ")")
            {
                if (!predicate.hasFunction(tokens[iT - 1]))
                {
                    tokens[iT - 1] += "()";
                    tokens.remove(iT + 1);
                    tokens.remove(iT);
                    continue;
                }
            }
            ++iT;
        }
    }


    int priv_findOperatorIdx(Array<PugXPathFunction*> operators, BString find)
    {
        for (int i = 0; i < operators.getSize(); ++i)
        {
            if (operators[i]->getName() == find)
            {
                return (i);
            }
        }

        return (-1);
    }
}


// Functions ===============================================================================================================

namespace blue
{
    namespace ext
    {
        namespace xml
        {
            // ---------------------------------------------------------------------------------------------------------------------

            PugXPathPredicate::PugXPathPredicate()
            {
                m_defaults.append(new PugXPathFunctionCount());
                m_defaults.append(new PugXPathFunctionLast());
                m_defaults.append(new PugXPathFunctionLocalName());
                m_defaults.append(new PugXPathFunctionName());
                m_defaults.append(new PugXPathFunctionPosition());
                m_defaults.append(new PugXPathFunctionConcat());
                m_defaults.append(new PugXPathFunctionContains());
                m_defaults.append(new PugXPathFunctionNormalizeSpace());
                m_defaults.append(new PugXPathFunctionStartsWith());
                m_defaults.append(new PugXPathFunctionString());
                m_defaults.append(new PugXPathFunctionStringLength());
                m_defaults.append(new PugXPathFunctionSubstring());
                m_defaults.append(new PugXPathFunctionSubstringAfter());
                m_defaults.append(new PugXPathFunctionSubstringBefore());
                m_defaults.append(new PugXPathFunctionTranslate());
                m_defaults.append(new PugXPathFunctionCeiling());
                m_defaults.append(new PugXPathFunctionFloor());
                m_defaults.append(new PugXPathFunctionRound());
                m_defaults.append(new PugXPathFunctionSum());
                m_defaults.append(new PugXPathFunctionBoolean());
                m_defaults.append(new PugXPathFunctionFalse());
                m_defaults.append(new PugXPathFunctionNot());
                m_defaults.append(new PugXPathFunctionTrue());

                m_operators.append(new PugXPathOperatorOr());
                m_operators.append(new PugXPathOperatorAnd());
                m_operators.append(new PugXPathOperatorEqual());
                m_operators.append(new PugXPathOperatorNotEqual());
                m_operators.append(new PugXPathOperatorLessEqual());
                m_operators.append(new PugXPathOperatorLess());
                m_operators.append(new PugXPathOperatorGreatEqual());
                m_operators.append(new PugXPathOperatorGreat());
                m_operators.append(new PugXPathOperatorAdd());
                m_operators.append(new PugXPathOperatorSub());
                m_operators.append(new PugXPathOperatorMult());
                m_operators.append(new PugXPathOperatorDiv());
                m_operators.append(new PugXPathOperatorMod());

                for (int iD = 0; iD < m_defaults.getSize(); ++iD)
                {
                    addFunction(m_defaults[iD]);
                }
            }

            // ---------------------------------------------------------------------------------------------------------------------

            PugXPathPredicate::~PugXPathPredicate()
            {
                for (int iD = 0; iD < m_defaults.getSize(); ++iD)
                {
                    delete m_defaults[iD];
                }

                for (int iO = 0; iO < m_operators.getSize(); ++iO)
                {
                    delete m_operators[iO];
                }
            }

            // ---------------------------------------------------------------------------------------------------------------------

            Array<PugXPathToken> PugXPathPredicate::parsePredicate(BString predicate)
            {
                StringTokenizer toker;
                toker.addContainer("\"", "\"", StringTokenizer::EXCLUSIVE);
                toker.addContainer("\'", "\'", StringTokenizer::EXCLUSIVE);
                toker.addContainer("[",  "]",  StringTokenizer::NORMAL);
                toker.addContainer("(",  ")",  StringTokenizer::NORMAL);
                flags32_t tokerFlags = StringTokenizer::KEEP_DELIMITERS | StringTokenizer::NO_WHITESPACE | StringTokenizer::TRIM_RESULTS;
                BString delimiters = " (),+=-*!<>";

                Array<BString> predTokens = toker.tokenize(predicate, delimiters, tokerFlags);

                // Tokenize into strings
                int iT = 0;
                while (iT < predTokens.getSize())
                {
                    Array<BString> tokens = toker.tokenize(predTokens[iT], delimiters, tokerFlags);
                    if (tokens.getSize() > 1)
                    {
                        predTokens.remove(iT);
                        for (int i = 0; i < tokens.getSize(); ++i)
                        {
                            predTokens.insert(tokens[i], iT + i);
                        }
                    }

                    ++iT;
                }

                // check for '-' in node names versus operator
                priv_parseCheckMinus(predTokens);

                // remove all whitespace
                priv_parseRemoveWhitespace(predTokens);

                // concatinate operators & empty functions
                priv_parseConcatOperators(predTokens);

                // concatinate non-operator, non-paren tokens
                priv_parseConcatNonOperators(predTokens);

                // check for non-functions
                priv_checkForNotFunctions(predTokens, *this);

                // evaluate into PugXPathTokens
                Array<PugXPathToken> results;
                iT = 0;
                while (iT < predTokens.getSize())
                {
                    BString token = predTokens[iT];

                    if (priv_isOperator(token))
                    {
                        results.append(PugXPathToken(token, PugXPathToken::OPERATOR));
                    }
                    else if (token == "(" || token == ")")
                    {
                        results.append(PugXPathToken(token, PugXPathToken::PAREN));
                    }
                    else if (hasFunction(token))
                    {
                        results.append(PugXPathToken(token, PugXPathToken::FUNCTION));
                    }
                    else if (token.isValidDouble())
                    {
                        if (token.isValidInt())
                        {
                            results.append(PugXPathToken(token, PugXPathToken::NUMBER_INT));
                        }
                        else
                        {
                            results.append(PugXPathToken(token, PugXPathToken::NUMBER_DBL));
                        }
                    }
                    else if (token == "true" || token == "false")
                    {
                        results.append(PugXPathToken(token, PugXPathToken::BOOLEAN));
                    }
                    else if ((token.beginsWith("\"") && token.endsWith("\"")) ||
                        (token.beginsWith("\'") && token.endsWith("\'")))
                    {
                        results.append(PugXPathToken(token.stripFromLeft(1).stripFromRight(1), PugXPathToken::STRING));
                    }
                    else
                    {
                        results.append(PugXPathToken(token, PugXPathToken::NODESET));
                    }

                    ++iT;
                }

                return (results);
            }

            // ---------------------------------------------------------------------------------------------------------------------

            bool PugXPathPredicate::hasFunction(BString function)
            {
                for (int i = 0; i < m_functions.getSize(); ++i)
                {
                    if (function == m_functions[i]->getName())
                    {
                        return (true);
                    }
                }

                return (false);
            }

            // ---------------------------------------------------------------------------------------------------------------------

            PugXPathFunction* PugXPathPredicate::getFunction(BString function)
            {
                for (int i = 0; i < m_functions.getSize(); ++i)
                {
                    if (function == m_functions[i]->getName())
                    {
                        return (m_functions[i]);
                    }
                }

                return (0);
            }

            // ---------------------------------------------------------------------------------------------------------------------

            const PugXPathFunction* PugXPathPredicate::getFunction(BString function) const
            {
                return ((PugXPathPredicate*)this)->getFunction(function);
            }

            // ---------------------------------------------------------------------------------------------------------------------

            void PugXPathPredicate::addFunction(PugXPathFunction* function)
            {
                if (!hasFunction(function->getName()))
                {
                    m_functions.append(function);
                }
            }

            // ---------------------------------------------------------------------------------------------------------------------

            void PugXPathPredicate::removeFunction(BString function)
            {
                for (int i = 0; i < m_functions.getSize(); ++i)
                {
                    if (function == m_functions[i]->getName())
                    {
                        m_functions.remove(i);
                        break;
                    }
                }
            }

            // ---------------------------------------------------------------------------------------------------------------------

            PugXPathToken PugXPathPredicate::evaluate(Array<PugXPathToken> predicate, Array<xml_node_struct*> search, xml_node_struct* context)
            {
                Array<PugXPathToken> wrkPredicate = predicate.copy();

                while (true)
                {
                    int lastParen = -1;
                    for (int i = 0; i < wrkPredicate.getSize(); ++i)
                    {
                        BString value = wrkPredicate[i].getValueString();
                        if (value == "(")
                        {
                            lastParen = i;
                        }
                        else
                        {
                            if (value == ")")
                            {
                                if (lastParen <= 0)
                                {
                                    throw PugXPathException("Invalid PugXPath expression given");
                                }
                                int begIdx = lastParen + 1;
                                int endIdx = i - 1;
                                int begRmv = lastParen;
                                int endRmv = i;
                                bool func  = false;

                                if (lastParen >= 1)
                                {
                                    if (wrkPredicate[lastParen - 1].getType() == PugXPathToken::FUNCTION)
                                    {
                                        begIdx -= 2;
                                        endIdx += 1;
                                        begRmv -= 1;
                                        func    = true;
                                    }
                                }

                                PugXPathToken result = evaluateSection(wrkPredicate, search, context, begIdx, endIdx, func);

                                for (int i = endRmv; i >= begRmv; --i)
                                {
                                    wrkPredicate.remove(i);
                                }

                                wrkPredicate.insert(result, begRmv);
                                break;
                            }
                        }
                    }

                    if (lastParen == -1)
                    {
                        int wrkSize = wrkPredicate.getSize();
                        if (wrkSize == 1)
                        {
                            return wrkPredicate[0];
                        }
                        return evaluateSection(wrkPredicate, search, context, 0, wrkPredicate.getSize() - 1, false);
                    }
                }

                return PugXPathToken();
            }

            // ---------------------------------------------------------------------------------------------------------------------

            PugXPathToken PugXPathPredicate::evaluateSection(Array<PugXPathToken> predicate, Array<xml_node_struct*> search, xml_node_struct* context, int begIdx, int endIdx, bool func)
            {
                if (func)
                {
                    BString functionName = predicate[begIdx].getValueString();

                    Array<PugXPathToken> parms;

                    if (predicate[begIdx + 1].getValueString() != "(" && predicate[endIdx].getValueString() != ")")
                    {
                        throw PugXPathException("Invalid parameters to function given to PugXPathPredicate::evaluateSection");
                    }

                    int lastBeg = begIdx + 2;
                    for (int i = begIdx + 2; i <= endIdx; ++i)
                    {
                        if (predicate[i].getValueString() == "," || i == endIdx)
                        {
                            if (i - lastBeg > 1)
                            {
                                PugXPathToken result = evaluateSection(predicate, search, context, lastBeg, i - 1, false);
                                parms.append(result);
                            }
                            else if (i - lastBeg != 0)
                            {
                                parms.append(predicate[i - 1]);
                            }
                            lastBeg = i + 1;
                        }
                    }

                    PugXPathFunction* function = getFunction(functionName);
                    if (function == 0)
                    {
                        throw PugXPathException("Invalid PugXPath function specified: '" + functionName + "'");
                    }

                    int parmsAllowed = function->getParmCount();
                    if (parmsAllowed != -1 && parmsAllowed != parms.getSize())
                    {
                        throw PugXPathException("Invalid number of arguments passed to PugXPath function '" + functionName + "'");
                    }

                    return function->execute(search, context, parms);
                }
                else
                {
                    if (((endIdx - begIdx) + 1) % 2 == 0)
                    {
                        throw PugXPathException("Invalid operation specified in PugXPath expression");
                    }

                    Array<PugXPathToken> wrkPredicate(endIdx - begIdx + 1);
                    for (int iC = begIdx; iC <= endIdx; ++iC)
                    {
                        wrkPredicate[iC - begIdx] = predicate[iC];
                    }

                    while (true)
                    {
                        int maxIdx = -1;
                        int actIdx = -1;
                        for (int i = 0; i < wrkPredicate.getSize() - 1; i += 2)
                        {
                            PugXPathToken& oper = wrkPredicate[i + 1];

                            if (oper.getType() != PugXPathToken::OPERATOR)
                            {
                                throw PugXPathException("Invalid PugXPath operator '" + oper.getValueString() + "'");
                            }

                            int opIdx = priv_findOperatorIdx(m_operators, oper.getValueString());
                            if (opIdx > maxIdx)
                            {
                                maxIdx = opIdx;
                                actIdx = i + 1;
                            }
                        }

                        if (actIdx == -1)
                        {
                            throw PugXPathException("Invalid operation specified in PugXPath expression");
                        }

                        PugXPathToken& lparm = wrkPredicate[actIdx - 1];
                        PugXPathToken& oper  = wrkPredicate[actIdx];
                        PugXPathToken& rparm = wrkPredicate[actIdx + 1];

                        if (!lparm.isCompatibleWith(rparm.getType()))
                        {
                            throw PugXPathException("Parameters on the left and right side of an operation must be compatible");
                        }

                        Array<PugXPathToken> parms(2);
                        parms[0] = lparm;
                        parms[1] = rparm;

                        PugXPathToken result = m_operators[maxIdx]->execute(search, context, parms);

                        if (wrkPredicate.getSize() == 3)
                        {
                            return (result);
                        }

                        wrkPredicate.remove(actIdx + 1);
                        wrkPredicate.remove(actIdx);
                        wrkPredicate.remove(actIdx - 1);

                        wrkPredicate.insert(result, actIdx - 1);
                    }
                }
    }
}
}
}   // namespaces

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions