Click here to Skip to main content
15,886,788 members
Articles / Programming Languages / XML

XInclude

Rate me:
Please Sign up or sign in to vote.
2.80/5 (5 votes)
30 Oct 2001 76.6K   1.9K   14  
XML <xinclude> implementation using XML SAX2 API from Microsoft XML Parser
// 
// XInclude.cpp : Implementation of <include> element
//				  For more information see http://xinclude.net
// 
// Copyright (C) 2001 XInclude.net
// 
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//

#include "stdafx.h"

// Defines for namespace and element tag name
const wchar_t* STR_XINCLUDE_NAMESPACE  = L"http://www.w3.org/2001/XInclude";
const wchar_t* STR_XINCLUDE_TAG        = L"include";
const wchar_t* STR_XINCLUDE_HREF_ATTR  = L"href";
const wchar_t* STR_XINCLUDE_PARSE_ATTR = L"parse";
const wchar_t* STR_XINCLUDE_PARSE_XML  = L"xml";
const wchar_t* STR_XINCLUDE_PARSE_TEXT = L"text";


// Forward declaration
class ISAXFilter;
HRESULT	XInclude	(const string& strUrl, const CComPtr<ISAXFilter>& spFilter);
string	ToStr		(const wchar_t *pwchChars, int cchChars);
HRESULT DumpError   (const string& strError, const string& strErrorMessage,
				     const string& strUrl = "", const CComPtr<ISAXLocator>& spLocator = NULL);


// #define LOG_FUNCTIONS
#ifdef LOG_FUNCTIONS
#define LOG_FUNCTION(STR)												cout << STR << endl;
#define LOG_FUNCTION_1(STR, P1, C1)										cout << STR << "('" << ToStr(P1, C1) << "')" << endl;
#define LOG_FUNCTION_2(STR, P1, C1, P2, C2)								cout << STR << "('" << ToStr(P1, C1) << "', '" << ToStr(P2, C2) << "')" <<endl;
#define LOG_FUNCTION_3(STR, P1, C1, P2, C2, P3, C3)						cout << STR << "('" << ToStr(P1, C1) << "', '" << ToStr(P2, C2) << "', '" << ToStr(P3, C3) << "')" <<endl;
#define LOG_FUNCTION_4(STR, P1, C1, P2, C2, P3, C3, P4, C4)				cout << STR << "('" << ToStr(P1, C1) << "', '" << ToStr(P2, C2) << "', '" << ToStr(P3, C3) << "', '" << ToStr(P4, C4) << "')" <<endl;
#define LOG_FUNCTION_5(STR, P1, C1, P2, C2, P3, C3, P4, C4, P5, C5)		cout << STR << "('" << ToStr(P1, C1) << "', '" << ToStr(P2, C2) << "', '" << ToStr(P3, C3) << "', '" << ToStr(P4, C4) << "', '" << ToStr(P5, C5) << "')" <<endl;
#else
#define LOG_FUNCTION(STR)
#define LOG_FUNCTION_1(STR, P1, C1)
#define LOG_FUNCTION_2(STR, P1, C1, P2, C2)
#define LOG_FUNCTION_3(STR, P1, C1, P2, C2, P3, C3)
#define LOG_FUNCTION_4(STR, P1, C1, P2, C2, P3, C3, P4, C4)
#define LOG_FUNCTION_5(STR, P1, C1, P2, C2, P3, C3, P4, C4, P5, C5)
#endif

// Globals
CComModule			_Module;


//
// ToStr
//
string ToStr(const wchar_t *pwchChars, int cchChars)
{
	if (cchChars <= 512)
	{
		char rgch[512];
		wcstombs(rgch, pwchChars, cchChars);

		return string(rgch, cchChars);
	}

	vector<char> vecch;
	vecch.reserve(cchChars);

	wcstombs(&vecch[0], pwchChars, cchChars);

	return string(&vecch[0], cchChars);
}


//
// IComStream
//
// Helper class for conversion from 
// a COM IStream interface to std::ostream
//
class ATL_NO_VTABLE IComStream : 
	public CComObjectRootEx<CComSingleThreadModel>,
	public IStream
{
public:

BEGIN_COM_MAP(IComStream)
	COM_INTERFACE_ENTRY(ISequentialStream)
	COM_INTERFACE_ENTRY(IStream)
END_COM_MAP()

	IComStream()
	{
	}

	~IComStream()
	{
	}

	HRESULT Init(ostream* pstrmout, bool bUTF8)
	{
		_pstrmout = pstrmout;
		_bUTF8    = bUTF8;

		return S_OK;
	}

	static HRESULT CreateInstance(IComStream** ppStream, ostream* pstrmout, const string& strEnc)
	{
		CComObject<IComStream>* pStream = NULL;
		CComObject<IComStream>::CreateInstance(&pStream);
		if (pStream == NULL)
		{
			return E_FAIL;
		}

		pStream->AddRef();
		pStream->Init(pstrmout, stricmp(strEnc.c_str(), "UTF-8") == 0);

		*ppStream = pStream;

		return S_OK;
	}

	//
	// ISequentialStream methods 
	//
	STDMETHOD(Read)(void* pv, ULONG cb, ULONG* pcbRead)
			{ return E_NOTIMPL; }

	STDMETHOD(Write)(void const* pv, ULONG cb, ULONG * pcbWritten)
	{
		if (pv == NULL || pcbWritten == NULL)
		{
			return E_FAIL;
		}

		if (cb == 0)
		{
			return S_OK;
		}

		if (_bUTF8)
		{
			string str(static_cast<const char*>(pv), cb);

			StripReturns(&str);
			*_pstrmout << str;
		}
		else
		{
			string str = ToStr(static_cast<const wchar_t*>(pv), cb / sizeof(wchar_t));
			StripReturns(&str);

			*_pstrmout << str;
		}

		*pcbWritten = cb;

		return S_OK;
	}

	// IStream methods
	STDMETHOD(Seek)(LARGE_INTEGER dlibMove, DWORD dwOrigin, ULARGE_INTEGER * plibNewPosition)
			{ return E_NOTIMPL; }

	STDMETHOD(SetSize)(ULARGE_INTEGER libNewSize)
			{ return E_NOTIMPL; }

	STDMETHOD(CopyTo)(IStream * pstm, ULARGE_INTEGER cb, ULARGE_INTEGER * pcbRead, ULARGE_INTEGER * pcbWritten)
			{ return E_NOTIMPL; }
    
	STDMETHOD(Commit)(DWORD grfCommitFlags)
			{ return E_NOTIMPL; }

	STDMETHOD(Revert)(void)
			{ return E_NOTIMPL; }

	STDMETHOD(LockRegion)(ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType)
			{ return E_NOTIMPL; }

	STDMETHOD(UnlockRegion)(ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType)
			{ return E_NOTIMPL; }

	STDMETHOD(Stat)(STATSTG * pstatstg, DWORD grfStatFlag)
			{ return E_NOTIMPL; }

	STDMETHOD(Clone )(IStream ** ppstm)
			{ return E_NOTIMPL; }

private:
	void StripReturns(string* pstr)
	{
		string::size_type idx = pstr->rfind('\r');

		while (idx != -1)
		{
			pstr->erase(idx, 1);

			idx = pstr->rfind('\r', idx);
		}
	}

	ostream*	_pstrmout;
	bool		_bUTF8;
};


//
// ISAXFilter
//
class ATL_NO_VTABLE ISAXFilter : 
	public CComObjectRootEx<CComSingleThreadModel>,
	public ISAXContentHandler,
	public ISAXLexicalHandler,
	public ISAXDeclHandler,
	public ISAXDTDHandler,
	public ISAXErrorHandler
{
public:

BEGIN_COM_MAP(ISAXFilter)
	COM_INTERFACE_ENTRY(ISAXContentHandler)
	COM_INTERFACE_ENTRY(ISAXLexicalHandler)
	COM_INTERFACE_ENTRY(ISAXDeclHandler)
	COM_INTERFACE_ENTRY(ISAXDTDHandler)
	COM_INTERFACE_ENTRY(ISAXErrorHandler)
END_COM_MAP()

	//
	// Nested class: ISAXFilterContext
	//
	class ISAXFilterContext
	{
	public:
		ISAXFilterContext(const string& strParentIndent = "", int level = 0) :
			_bTrailingNewline(true),
			_strParentIndent(strParentIndent),
			_level(level)
		{
		}

		void SetTrailingNewline(bool bNewline)
		{
			_bTrailingNewline = bNewline;
		}

		bool GetTrailingNewline()
		{
			return _bTrailingNewline;
		}

		const string& GetParentIndent()
		{
			return _strParentIndent;
		}

		void SetIndent(const string& strIndent)
		{
			for (string::const_iterator itr = strIndent.begin(); itr != strIndent.end() && isspace(*itr); ++itr);

			_strIndent = string(distance(strIndent.begin(), itr), ' ');
		}

		const string& GetIndent()
		{
			return _strIndent;
		}

		void SetURL(const string& strUrl)
		{
			_strUrl = strUrl;
		}

		const string& GetURL()
		{
			return _strUrl;
		}

		int GetLevel()
		{
			return _level;
		}

		bool IsIncluded()
		{
			return _level > 0;
		}

		void SetLocator(ISAXLocator *pLocator)
		{
			_spLocator = pLocator;
		}

		const CComPtr<ISAXLocator>& GetLocator()
		{
			return _spLocator;
		}

	private:
		bool					_bTrailingNewline;
		string					_strParentIndent;
		string					_strIndent;
		string					_strUrl;
		int						_level;
		CComPtr<ISAXLocator>	_spLocator;
	};


	//
	// Constructor
	//
	ISAXFilter() :
		_bCDATA(false),
		_bEntity(false)
	{
		_qContext = auto_ptr<ISAXFilterContext>(new ISAXFilterContext);
		assert(s_pFilter == NULL);
		s_pFilter = this;
	}


	//
	// Destructor
	//
	~ISAXFilter()
	{
		s_pFilter = NULL;
	}


	//
	// Init
	//
	HRESULT Init(ostream* pstrmout)
	{
		HRESULT hr;

		CComPtr<IMXWriter> spWriter;
		__HR(spWriter.CoCreateInstance(CLSID_MXXMLWriter, NULL, CLSCTX_SERVER));
		if (spWriter == NULL)
		{
			return E_FAIL;
		}

		__HR(spWriter->put_encoding(L"UTF-8"));

		_spContentHandler = spWriter;
		_spLexicalHandler = spWriter;
		_spDeclHandler    = spWriter;
		_spDTDHandler     = spWriter;

		CComBSTR ccombstr;
		__HR(spWriter->get_encoding(&ccombstr));
		bstr_t bstr(ccombstr);

		__HR(IComStream::CreateInstance(&_spStream, pstrmout, static_cast<string>(bstr)));

		CComVariant varOutput;
		varOutput = _spStream;
		__HR(spWriter->put_output(varOutput));

		return S_OK;
	}


	//
	// CreateInstance
	//
	static HRESULT CreateInstance(ISAXFilter** ppFilter, ostream* pstrmout)
	{
		CComObject<ISAXFilter>* pFilter = NULL;
		CComObject<ISAXFilter>::CreateInstance(&pFilter);
		if (pFilter == NULL)
		{
			return E_FAIL;
		}

		pFilter->AddRef();
		pFilter->Init(pstrmout);

		*ppFilter = pFilter;

		return S_OK;
	}


	//
	// GetStaticFilter
	//
	static ISAXFilter* GetStaticFilter()
	{
		return s_pFilter;
	}


	//
	// setURL
	//
	HRESULT setURL(const string& strUrl) const
	{
		_qContext->SetURL(strUrl);

		return S_OK;
	}


	//
	// flush
	//
	void flush()
	{
		CComQIPtr<IMXWriter> spWriter = _spContentHandler;
		if (spWriter != NULL)
		{
			spWriter->flush();
		}
	}


	//
	// Interface: ISAXContentHandler
	//
	virtual HRESULT STDMETHODCALLTYPE putDocumentLocator( 
		/* [in] */ ISAXLocator *pLocator)
	{
		LOG_FUNCTION("putDocumentLocator");

		_qContext->SetLocator(pLocator);

		return _spContentHandler->putDocumentLocator(pLocator);
	}

	virtual HRESULT STDMETHODCALLTYPE startDocument( void)
	{
		LOG_FUNCTION("startDocument");

		Prelude();
		
		return _spContentHandler->startDocument();
	}

	virtual HRESULT STDMETHODCALLTYPE endDocument( void)
	{
		LOG_FUNCTION("endDocument");

		Prelude();
		
		return _spContentHandler->endDocument();
	}

	virtual HRESULT STDMETHODCALLTYPE startPrefixMapping( 
		/* [in] */ const wchar_t *pwchPrefix,
		/* [in] */ int cchPrefix,
		/* [in] */ const wchar_t *pwchUri,
		/* [in] */ int cchUri)
	{
		LOG_FUNCTION_2("startPrefixMapping", pwchPrefix, cchPrefix, pwchUri, cchUri);

		Prelude();
		
		return _spContentHandler->startPrefixMapping(pwchPrefix, cchPrefix, pwchUri, cchUri);
	}

	virtual HRESULT STDMETHODCALLTYPE endPrefixMapping( 
		/* [in] */ const wchar_t *pwchPrefix,
		/* [in] */ int cchPrefix)
	{
		LOG_FUNCTION_1("endPrefixMapping", pwchPrefix, cchPrefix);

		Prelude();
		
		return _spContentHandler->endPrefixMapping(pwchPrefix, cchPrefix);
	}

	virtual HRESULT STDMETHODCALLTYPE startElement( 
		/* [in] */ const wchar_t *pwchNamespaceUri,
		/* [in] */ int cchNamespaceUri,
		/* [in] */ const wchar_t *pwchLocalName,
		/* [in] */ int cchLocalName,
		/* [in] */ const wchar_t *pwchQName,
		/* [in] */ int cchQName,
		/* [in] */ ISAXAttributes *pAttributes)
	{
		LOG_FUNCTION_3("startElement", pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName);

		Prelude();

		if (wcsicmp(pwchNamespaceUri, STR_XINCLUDE_NAMESPACE) == 0 &&
			wcscmp( pwchLocalName,    STR_XINCLUDE_TAG	    ) == 0)
		{
			HRESULT hr;

			string strHRef;
			{
				const wchar_t* wchValueHRef;
				int			   cchValueHRef;

				hr = pAttributes->getValueFromName(L"", 0, STR_XINCLUDE_HREF_ATTR, wcslen(STR_XINCLUDE_HREF_ATTR),
												   &wchValueHRef, &cchValueHRef);

				if (FAILED(hr))
				{
					DumpError("Error", L"Required attribute 'href' issing for <include> element\n");
					return E_FAIL;
				}
				strHRef = ToStr(wchValueHRef, cchValueHRef);
			}

			string::size_type idx = strHRef.find('#');
			if (idx != -1)
			{
				// Strip off the XPointer part
				strHRef.erase(idx);
			}

			const wchar_t* wchValueParse;
			int			   cchValueParse;

			hr = pAttributes->getValueFromName(L"", 0, STR_XINCLUDE_PARSE_ATTR, wcslen(STR_XINCLUDE_PARSE_ATTR),
											   &wchValueParse, &cchValueParse);

			bool bParseXML = true;

			if (SUCCEEDED(hr))
			{
				if (wcscmp(wchValueParse, STR_XINCLUDE_PARSE_TEXT) == 0)
				{
					bParseXML = false;
				}
				else if (wcscmp(wchValueParse, STR_XINCLUDE_PARSE_XML) != 0)
				{
					string str = "Found invalid value for attribute 'parse': '" + ToStr(wchValueParse, cchValueParse) + "'\n";
					bstr_t bstr;
					bstr = str.c_str();

					DumpError("Error", bstr);
					return E_FAIL;
				}
			}

			// Disable XML declaration for nested XML files
			CComQIPtr<IMXWriter> spWriter = _spContentHandler;
			spWriter->put_omitXMLDeclaration(VARIANT_TRUE);

			// Save current context
			auto_ptr<ISAXFilterContext> qContextCur = _qContext;

			// Create new context with identation
			_qContext = auto_ptr<ISAXFilterContext>(
						    new ISAXFilterContext(qContextCur->GetParentIndent() + qContextCur->GetIndent(),
												  qContextCur->GetLevel() + 1));

			HRESULT hrIncluded = bParseXML ? XInclude(strHRef, this) :
											 IncludeText(strHRef);

			// Restore current context (destroying previous one)
			_qContext = qContextCur;

			return hrIncluded;
		}
		
		return _spContentHandler->startElement(pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName, pAttributes);
	}

	virtual HRESULT STDMETHODCALLTYPE endElement( 
		/* [in] */ const wchar_t *pwchNamespaceUri,
		/* [in] */ int cchNamespaceUri,
		/* [in] */ const wchar_t *pwchLocalName,
		/* [in] */ int cchLocalName,
		/* [in] */ const wchar_t *pwchQName,
		/* [in] */ int cchQName)
	{
		LOG_FUNCTION_3("endElement", pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName);

		Prelude();

		if (wcsicmp(pwchNamespaceUri, STR_XINCLUDE_NAMESPACE) == 0 &&
			wcscmp( pwchLocalName,    STR_XINCLUDE_TAG	    ) == 0)
		{
			return S_OK;
		}

		return _spContentHandler->endElement(pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName);
	}

	virtual HRESULT STDMETHODCALLTYPE characters( 
		/* [in] */ const wchar_t *pwchChars,
		/* [in] */ int cchChars)
	{
		LOG_FUNCTION_1("characters", pwchChars, cchChars);

		bool bTrailingNewline = Prelude();

		if (_bEntity)
		{
			// Ignore contents of entities
			return S_OK;
		}

		string str = ToStr(pwchChars, cchChars);

		string::size_type indexLast = -1;
		string::size_type index		= str.find('\n');
		
		while (index != -1)
		{
			indexLast = index;

			if (index + 1 == str.size())
			{
				// newline as last character
				_qContext->SetTrailingNewline(true);
				str.append(_qContext->GetParentIndent());

				indexLast = -1;
				break;
			}
			else
			{
				str.insert(index, _qContext->GetParentIndent());
				index += _qContext->GetParentIndent().size();

				// find next
				index = str.find('\n', index + 1);
			}
		}

		if (indexLast != -1 && 
			indexLast + 1 != str.size())
		{
			_qContext->SetIndent(str.substr(indexLast + 1, str.size() - indexLast - 1));
		}
		
		if (bTrailingNewline && !_bCDATA)
		{
			_qContext->SetIndent(ToStr(pwchChars, cchChars));
		}

		if (str.size() == static_cast<string::size_type>(cchChars))
		{
			return _spContentHandler->characters(pwchChars, cchChars);
		}
		else
		{
			bstr_t bstr;
			bstr = str.c_str();

			return _spContentHandler->characters(bstr, bstr.length());
		}
	}

	virtual HRESULT STDMETHODCALLTYPE ignorableWhitespace( 
		/* [in] */ const wchar_t *pwchChars,
		/* [in] */ int cchChars)
	{
		LOG_FUNCTION_1("ignorableWhitespace", pwchChars, cchChars);

		Prelude();
		
		return _spContentHandler->ignorableWhitespace(pwchChars, cchChars);
	}

	virtual HRESULT STDMETHODCALLTYPE processingInstruction( 
		/* [in] */ const wchar_t *pwchTarget,
		/* [in] */ int cchTarget,
		/* [in] */ const wchar_t *pwchData,
		/* [in] */ int cchData)
	{
		LOG_FUNCTION_2("processingInstruction", pwchTarget, cchTarget, pwchData, cchData);

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spContentHandler->processingInstruction(pwchTarget, cchTarget, pwchData, cchData);
	}

	virtual HRESULT STDMETHODCALLTYPE skippedEntity( 
		/* [in] */ const wchar_t *pwchName,
		/* [in] */ int cchName)
	{ 
		LOG_FUNCTION_1("skippedEntity", pwchName, cchName);

		Prelude();

		return _spContentHandler->skippedEntity(pwchName, cchName);
	}

	//
	// Interface: ISAXLexicalHandler
	//
    virtual HRESULT STDMETHODCALLTYPE startDTD( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName,
        /* [in] */ const wchar_t *pwchPublicId,
        /* [in] */ int cchPublicId,
        /* [in] */ const wchar_t *pwchSystemId,
        /* [in] */ int cchSystemId)
	{
		LOG_FUNCTION_3("startDTD", pwchName, cchName, pwchPublicId, cchPublicId, pwchSystemId, cchSystemId);

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spLexicalHandler->startDTD(pwchName, cchName, pwchPublicId, cchPublicId, pwchSystemId, cchSystemId);
	}
    
    virtual HRESULT STDMETHODCALLTYPE endDTD( void)
	{
		LOG_FUNCTION("endDTD");

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spLexicalHandler->endDTD();
	}
    
    virtual HRESULT STDMETHODCALLTYPE startEntity( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName)
	{
		LOG_FUNCTION_1("startEntity", pwchName, cchName);

		Prelude();

		assert(_bEntity == false);
		_bEntity = true;

		{
			HRESULT hr;

			// Output the entity
			stringstream strstrm;
			strstrm << "&" << ToStr(pwchName, cchName) << ";";

			// Re-enable output espacing
			__HR(WriteWithoutEscaping(strstrm));
		}

		return _spLexicalHandler->startEntity(pwchName, cchName);
	}
    
    virtual HRESULT STDMETHODCALLTYPE endEntity( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName)
	{
		LOG_FUNCTION_1("endEntity", pwchName, cchName);

		Prelude();

		assert(_bEntity == true);
		_bEntity = false;

		return _spLexicalHandler->endEntity(pwchName, cchName);
	}
    
    virtual HRESULT STDMETHODCALLTYPE startCDATA( void)
	{
		LOG_FUNCTION("startCDATA");

		Prelude();

		assert(_bCDATA == false);
		_bCDATA = true;

		return _spLexicalHandler->startCDATA();
	}
    
    virtual HRESULT STDMETHODCALLTYPE endCDATA( void)
	{
		LOG_FUNCTION("endCDATA");

		Prelude();

		assert(_bCDATA == true);
		_bCDATA = false;

		return _spLexicalHandler->endCDATA();
	}
    
    virtual HRESULT STDMETHODCALLTYPE comment( 
        /* [in] */ const wchar_t *pwchChars,
        /* [in] */ int cchChars)
	{
		LOG_FUNCTION_1("comment", pwchChars, cchChars);

		Prelude(false);

		_strComment += ToStr(pwchChars, cchChars);

		if (cchChars > 0 && pwchChars[cchChars - 1] == '\n')
		{
			// If comment ends with a newline, add indentation
			_strComment += _qContext->GetParentIndent();
		}

		return S_OK;
	}


	//
	// Interface: ISAXDeclHandler
	//
    virtual HRESULT STDMETHODCALLTYPE elementDecl( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName,
        /* [in] */ const wchar_t *pwchModel,
        /* [in] */ int cchModel)
	{
		LOG_FUNCTION_2("elementDecl", pwchName, cchName, pwchModel, cchModel);

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spDeclHandler->elementDecl(pwchName, cchName, pwchModel, cchModel);
	}
    
    virtual HRESULT STDMETHODCALLTYPE attributeDecl( 
        /* [in] */ const wchar_t *pwchElementName,
        /* [in] */ int cchElementName,
        /* [in] */ const wchar_t *pwchAttributeName,
        /* [in] */ int cchAttributeName,
        /* [in] */ const wchar_t *pwchType,
        /* [in] */ int cchType,
        /* [in] */ const wchar_t *pwchValueDefault,
        /* [in] */ int cchValueDefault,
        /* [in] */ const wchar_t *pwchValue,
        /* [in] */ int cchValue)
	{
		LOG_FUNCTION_5("attributeDecl", pwchElementName, cchElementName, pwchAttributeName, cchAttributeName,
										pwchType, cchType, pwchValueDefault, cchValueDefault, pwchValue, cchValue);

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spDeclHandler->attributeDecl(pwchElementName, cchElementName, pwchAttributeName, cchAttributeName,
											 pwchType, cchType, pwchValueDefault, cchValueDefault, pwchValue, cchValue);
	}

    virtual HRESULT STDMETHODCALLTYPE internalEntityDecl( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName,
        /* [in] */ const wchar_t *pwchValue,
        /* [in] */ int cchValue)
	{
		LOG_FUNCTION_2("internalEntityDecl", pwchName, cchName, pwchValue, cchValue);

		Prelude();

		if (cchValue == 1 && !_qContext->IsIncluded())
		{
			//
			// For simple definitions, write it ourselves using 
			// the decimal notation
			//

			// Create the entity declaration
			stringstream strstrm;
			strstrm << "<!ENTITY " << ToStr(pwchName, cchName) << " \"&#" << pwchValue[0] << ";\">" << endl;

			return WriteWithoutEscaping(strstrm);
		}

		return _qContext->IsIncluded() ? S_OK :
			   _spDeclHandler->internalEntityDecl(pwchName, cchName, pwchValue, cchValue);
	}
    
    virtual HRESULT STDMETHODCALLTYPE externalEntityDecl( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName,
        /* [in] */ const wchar_t *pwchPublicId,
        /* [in] */ int cchPublicId,
        /* [in] */ const wchar_t *pwchSystemId,
        /* [in] */ int cchSystemId)
	{
		LOG_FUNCTION_3("externalEntityDecl", pwchName, cchName, pwchPublicId, cchPublicId, pwchSystemId, cchSystemId);

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spDeclHandler->externalEntityDecl(pwchName, cchName, pwchPublicId, cchPublicId, pwchSystemId, cchSystemId);
	}


	//
	// Interface: ISAXDTDHandler
	//
    virtual HRESULT STDMETHODCALLTYPE notationDecl( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName,
        /* [in] */ const wchar_t *pwchPublicId,
        /* [in] */ int cchPublicId,
        /* [in] */ const wchar_t *pwchSystemId,
        /* [in] */ int cchSystemId)
	{
		LOG_FUNCTION_3("notationDecl", pwchName, cchName, pwchPublicId, cchPublicId, pwchSystemId, cchSystemId);

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spDTDHandler->notationDecl(pwchName, cchName, pwchPublicId, cchPublicId, pwchSystemId, cchSystemId);
	}
    
    virtual HRESULT STDMETHODCALLTYPE unparsedEntityDecl( 
        /* [in] */ const wchar_t *pwchName,
        /* [in] */ int cchName,
        /* [in] */ const wchar_t *pwchPublicId,
        /* [in] */ int cchPublicId,
        /* [in] */ const wchar_t *pwchSystemId,
        /* [in] */ int cchSystemId,
        /* [in] */ const wchar_t *pwchNotationName,
        /* [in] */ int cchNotationName)
	{
		LOG_FUNCTION_4("unparsedEntityDecl", pwchName, cchName, pwchPublicId, cchPublicId,
											 pwchSystemId, cchSystemId, pwchNotationName, cchNotationName);

		Prelude();

		return _qContext->IsIncluded() ? S_OK :
			   _spDTDHandler->unparsedEntityDecl(pwchName, cchName, pwchPublicId, cchPublicId,
												 pwchSystemId, cchSystemId, pwchNotationName, cchNotationName);
	}


	//
	// Interface: ISAXErrorHandler
	//
    virtual HRESULT STDMETHODCALLTYPE error( 
        /* [in] */ ISAXLocator *pLocator,
        /* [in] */ const wchar_t *pwchErrorMessage,
        /* [in] */ HRESULT hrErrorCode)
	{
		int cchErrorMessage = (pwchErrorMessage != NULL) ? wcslen(pwchErrorMessage) : 0;
		LOG_FUNCTION_1("error", pwchErrorMessage, cchErrorMessage);

		DumpError("Error", pwchErrorMessage);

		return S_OK;
	}
    
    virtual HRESULT STDMETHODCALLTYPE fatalError( 
        /* [in] */ ISAXLocator *pLocator,
        /* [in] */ const wchar_t *pwchErrorMessage,
        /* [in] */ HRESULT hrErrorCode)
	{
		int cchErrorMessage = (pwchErrorMessage != NULL) ? wcslen(pwchErrorMessage) : 0;
		LOG_FUNCTION_1("fatalError", pwchErrorMessage, cchErrorMessage);

		DumpError("Fatal error", pwchErrorMessage);

		return S_OK;
	}
    
    virtual HRESULT STDMETHODCALLTYPE ignorableWarning( 
        /* [in] */ ISAXLocator *pLocator,
        /* [in] */ const wchar_t *pwchErrorMessage,
        /* [in] */ HRESULT hrErrorCode)
	{
		int cchErrorMessage = (pwchErrorMessage != NULL) ? wcslen(pwchErrorMessage) : 0;
		LOG_FUNCTION_1("ignorableWarning", pwchErrorMessage, cchErrorMessage);

		DumpError("Warning", pwchErrorMessage);

		return S_OK;
	}


private:
	//
	// Prelude
	//
	bool Prelude(bool bFlushComment = true)
	{
		if (bFlushComment && !_strComment.empty())
		{
			bstr_t bstrComment;
			bstrComment = _strComment.c_str();
			_spLexicalHandler->comment(bstrComment, bstrComment.length());

			_strComment.clear();
		}

		bool bNewline = _qContext->GetTrailingNewline();
		_qContext->SetTrailingNewline(false);

		return bNewline;
	}


	//
	// WriteWithoutEscaping
	//
    HRESULT WriteWithoutEscaping(const stringstream& strstrm)
	{
		HRESULT hr;

		// First disable output espacing (otherwise the '&' gets escaped)
		CComQIPtr<IMXWriter> spWriter = _spContentHandler;
		__HR(spWriter->put_disableOutputEscaping(VARIANT_TRUE));

		bstr_t bstr;
		bstr = strstrm.str().c_str();
		__HR(_spContentHandler->characters(bstr, bstr.length()));

		// Re-enable output espacing
		__HR(spWriter->put_disableOutputEscaping(VARIANT_FALSE));

		return S_OK;
	}


	//
	// DumpError
	//
	HRESULT DumpError(const string& strError, const string& strErrorMessage)
	{
		return ::DumpError(strError, strErrorMessage,
						   _qContext->GetURL(), _qContext->GetLocator());
	}

	//
	// DumpError
	//
	HRESULT DumpError(const string& strError, const wchar_t *pwchErrorMessage)
	{
		return DumpError(strError, ToStr(pwchErrorMessage, wcslen(pwchErrorMessage)));
	}


	//
	// IncludeText
	//
	HRESULT IncludeText(const string& strUrl)
	{
		HRESULT hr;

		CComPtr<IStream> spStream;
		hr = URLOpenBlockingStream(0, strUrl.c_str(), &spStream, 0, 0);

		if (FAILED(hr))
		{
			string strProtUrl;

			if (PathFileExists(strUrl.c_str()))
			{
				string strPath;
				strPath.reserve(1024);
				if (_fullpath(&strPath[0], strUrl.c_str(), strPath.capacity()) == NULL)
				{
					DumpError("Fatal Error", "Failed to open URL " + strUrl + "\n");
					return E_FAIL;
				}

				strProtUrl = "file://";
				strProtUrl += strPath.data();
			}
			else
			{
				strProtUrl = "http://" + strUrl;
			}

			hr = URLOpenBlockingStream(0, strProtUrl.c_str(), &spStream, 0, 0);

			if (FAILED(hr))
			{
				DumpError("Fatal Error", "Failed to open URL " + strProtUrl + "\n");
				return E_FAIL;
			}
		}

		char  rgch[1024];

		while (true)
		{
			ULONG cbRead;
			hr = spStream->Read(rgch, sizeof(rgch) - 1, &cbRead);

			if (FAILED(hr))
			{
				return E_FAIL;
			}
			else if (cbRead == 0)
			{
				break;
			}

			WriteIndented(string(rgch, cbRead));
		}

		return S_OK;
	}


	//
	// WriteIndented
	//
	HRESULT WriteIndented(const string& str)
	{
		HRESULT hr;

		string::size_type indexLast = -1;
		string::size_type index	= str.find('\n');
		
		while (index != -1 && index + 1 != str.size())
		{
			char chSave = const_cast<string&>(str)[index + 1];
			const_cast<string&>(str)[index + 1] = '\0';

			bstr_t bstr;
			bstr = str.data() + indexLast + 1;
			__HR(_spContentHandler->characters(bstr, bstr.length()));

			bstr = _qContext->GetParentIndent().c_str();
			__HR(_spContentHandler->characters(bstr, bstr.length()));

			const_cast<string&>(str)[index + 1] = chSave;

			indexLast = index;

			// find next
			index = str.find('\n', index + 1);
		}

		bstr_t bstr;
		bstr = str.c_str() + indexLast + 1;
		__HR(_spContentHandler->characters(bstr, bstr.length()));

		return S_OK;
	}

private:
	CComQIPtr<ISAXContentHandler> _spContentHandler;
	CComQIPtr<ISAXLexicalHandler> _spLexicalHandler;
	CComQIPtr<ISAXDeclHandler>	  _spDeclHandler;
	CComQIPtr<ISAXDTDHandler>	  _spDTDHandler;
	CComPtr<IComStream>			  _spStream;

	bool						  _bCDATA;
	bool						  _bEntity;
	string						  _strComment;

	auto_ptr<ISAXFilterContext>	  _qContext;

	static ISAXFilter*			  s_pFilter;
};


// Static
ISAXFilter*	ISAXFilter::s_pFilter = NULL;


//
// DumpHResult
//
void DumpHResult(HRESULT hr)
{
	if (ISAXFilter::GetStaticFilter() != NULL)
	{
		ISAXFilter::GetStaticFilter()->flush();
	}

	if (HRESULT_FACILITY(hr) == FACILITY_WINDOWS)
	{
		hr = HRESULT_CODE(hr);
	}
	
	cerr << endl << "*********" << endl;

	LPTSTR szMessage;
	DWORD dwFlags = FORMAT_MESSAGE_ALLOCATE_BUFFER;
	if (FormatMessage(dwFlags | FORMAT_MESSAGE_FROM_SYSTEM, NULL,
					  hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
					  (LPTSTR)&szMessage, 0, NULL) ||
		FormatMessage(dwFlags | FORMAT_MESSAGE_FROM_HMODULE, GetModuleHandle("URLMON.DLL"),
					  hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
					  (LPTSTR)&szMessage, 0, NULL) ||
		FormatMessage(dwFlags | FORMAT_MESSAGE_FROM_HMODULE, GetModuleHandle("MSXML3.DLL"),
					  hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
					  (LPTSTR)&szMessage, 0, NULL))
	{
		cerr << "Error: " << szMessage;
		LocalFree(szMessage);
	}
	else
	{
		cerr << "Error: Unknown HRESULT code: " << hex << hr << endl;
	}
}


//
// DumpError
//
HRESULT DumpError(const string& strError, const string& strErrorMessage,
				  const string& strUrl, const CComPtr<ISAXLocator>& spLocator)
{
	if (ISAXFilter::GetStaticFilter() != NULL)
	{
		ISAXFilter::GetStaticFilter()->flush();
	}

	cerr << endl << "*********" << endl;
	cerr << strError;
	
	if (!strUrl.empty())
	{
		cerr << " at " << "URL '" << strUrl;
	}

	if (spLocator != NULL)
	{
		HRESULT hr;
		int nColumn, nLine;
		__HR(spLocator->getColumnNumber(&nColumn));
		__HR(spLocator->getLineNumber(&nLine));

		cerr << "', Line " << nLine << ", Position " << nColumn;
	}

	cerr << ": " << strErrorMessage;

	return S_OK;
}


//
// SetHandler
//
template <class T>
HRESULT SetHandler(const wchar_t* pwchHandler, const CComPtr<ISAXFilter>& spFilter, const CComPtr<ISAXXMLReader>& spReader)
{
	HRESULT hr;

	CComQIPtr<T> spHandler = spFilter;

	variant_t var;
	var = spHandler;
	__HR(spReader->putProperty(pwchHandler, var));

	return S_OK;
}


//
// XInclude
//
HRESULT XInclude(const string& strUrl, const CComPtr<ISAXFilter>& spFilter)
{
	HRESULT hr;

	CComPtr<ISAXXMLReader> spReader;
	__HR(spReader.CoCreateInstance(CLSID_SAXXMLReader, NULL, CLSCTX_SERVER));
	if (spReader == NULL)
	{
		return E_FAIL;
	}

	__HR(spReader->putContentHandler(spFilter));
	__HR(spReader->putDTDHandler(spFilter));
	__HR(spReader->putErrorHandler(spFilter));

	__HR(SetHandler<ISAXLexicalHandler>(L"http://xml.org/sax/properties/lexical-handler",	  spFilter, spReader));
	__HR(SetHandler<ISAXDeclHandler>   (L"http://xml.org/sax/properties/declaration-handler", spFilter, spReader));

	__HR(spFilter->setURL(strUrl));

	hr = spReader->parseURL(bstr_t(strUrl.c_str()));
	if (hr == INET_E_OBJECT_NOT_FOUND)
	{
		::DumpError("Error", "Failed to open document from URL '" + strUrl + "'\n");
		return E_FAIL;
	}
	else if (FAILED(hr))
	{
		::DumpHResult(hr);
		return hr;
	}

	return S_OK;
}


//
// XInclude
//
HRESULT XInclude(const string& strUrl)
{
	HRESULT hr;

	CComPtr<ISAXFilter> spFilter;
	__HR(ISAXFilter::CreateInstance(&spFilter, &cout));

	__HR(XInclude(strUrl, spFilter));

	return S_OK;
}


//
// PrintUsage
//
void PrintUsage()
{
	cout << "Usage: XInclude <inputfile.xml>" << endl;
	cout << "For more information, visit http://xinclude.net" << endl;
}


//
// main
//
int main(int argc, char* argv[])
{
	bool bPrintUsage = false;

	if (argc < 2 || argc > 2)
	{
		bPrintUsage = true;
	}
	else if (argc == 2 && strlen(argv[1]) > 0)
	{
		string str = argv[1];

		if (str[0] == '-' || str.find('?') != -1)
		{
			bPrintUsage = true;
		}
	}
	
	if (bPrintUsage)
	{
		PrintUsage();
		return EXIT_FAILURE;
	}

	CoInitialize(NULL);

	if (FAILED(XInclude(argv[1])))
	{
		return EXIT_FAILURE;
	}

	CoUninitialize();

	return EXIT_SUCCESS;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions