Click here to Skip to main content
15,891,993 members
Articles / Programming Languages / Visual Basic

EfTidy: The Tidy Library Wrapper

Rate me:
Please Sign up or sign in to vote.
4.91/5 (36 votes)
6 Sep 2013GPL310 min read 186.1K   5K   88  
A free component for HTML parsing and cleaning
// tidyCom.cpp : Implementation of CtidyCom

/**

Author :Alok Gupta
Email :thatsalok@gmail.com

You are Free to Modify,use code etc,till above name is not removed.
I am not responisible for problem coming while using above code+
it will cool, if you notify me about using this component!
thanks

*/
#include "stdafx.h"
#include "EfTidy.h"
#include "tidyCom.h"
#include "eftidynode.h"

/////////////////////////////////////////////////////////////////////////////
// CtidyCom

STDMETHODIMP CtidyCom::InterfaceSupportsErrorInfo(REFIID riid)
{
	if(IID_ItidyCom==riid)
		return S_OK;

	return S_FALSE;
}

/*********************************************************************
Method Name:TidyFiletoMem(BSTR sourceFile, BSTR *result)
Last Modified:23 nov 2004
Function Descrption: this function map file to buffer
Interafce Support =YES

*********************************************************************/


STDMETHODIMP CtidyCom::TidyFiletoMem(BSTR sourceFile, BSTR *result)
{
	USES_CONVERSION;

	if(::SysStringLen(sourceFile)==0)
	{
		Error(_T("First Parameter is Zero Length String"));
		return S_FALSE;
	}


	//some useful variable declaration
	EfTidyString strFileName(sourceFile);
	
	auto_ptr<tmbstr> tmbFileName(new tmbstr[strFileName.GetLength() +1]);

	
	
#ifdef UNICODE
	wcstombs((char*)tmbFileName.get(),strFileName,strFileName.GetLength());
#else
	strcpy_s((char*)tmbFileName.get(),strFileName.GetLength() +1,strFileName);
#endif
	
	if(!tidyFileExists(this->m_objTidyDocuemnt,(ctmbstr)tmbFileName.get()))
	{
		Error(_T("File Doesn\'t Exists"));
		return S_FALSE;
	}

	TidyBuffer output={0};
	TidyBuffer errbuf={0};
	BOOL bOK;
	int rc=-1;


	bOK = TRUE;//tidyOptSetBool(this->m_objTidyDocuemnt, TidyXhtmlOut, yes ); // Convert to XHTML

	if ( bOK )
	{
		// MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_objTidyDocuemnt, &errbuf ); // Capture diagnostics
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = ::tidyParseFile(this->m_objTidyDocuemnt,(ctmbstr)tmbFileName.get());//tidyParseString( tdoc, input ); // Parse the input

	}


	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_objTidyDocuemnt); // Tidy it up!

	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_objTidyDocuemnt); // Kvetch

	}

	if ( rc > 1 ) // If error, force output.
	{
		// MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_objTidyDocuemnt, TidyForceOutput, yes) ? rc : -1 );
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveBuffer(this->m_objTidyDocuemnt, &output ); // Pretty Print

	}


	*result= CComBSTR((char *)output.bp).Detach();
	m_strError=CComBSTR((char*)errbuf.bp).Detach();;
	tidyBufFree( &output );
	tidyBufFree( &errbuf );
	this->m_bOperationPerformed=TRUE;
	
	return S_OK;
}

/*********************************************************************
Method Name:TidyFileToFile(BSTR sourceFile, BSTR destFile)
Last Modified:23 Nov 2004
Function Descrption: This function will Tidy File to File
Interface Support =YES

*********************************************************************/

STDMETHODIMP CtidyCom::TidyFileToFile(BSTR sourceFile, BSTR destFile)
{
	if(::SysStringLen(sourceFile)==0)
	{
		Error(_T("First Parameter is Zero Length String"));
		return S_FALSE;
	}

	if(::SysStringLen(destFile)==0)
	{
		Error(_T("Second Parameter is Zero Length String"));
		return S_FALSE;
	}

	//some useful variable declaration
	EfTidyString strSrcFileName(sourceFile);
	EfTidyString strDestFileName(destFile);

	//MessageBox(NULL,strFileName,"From dll",MB_OK);

	if(!tidyFileExists(this->m_objTidyDocuemnt,(ctmbstr)strSrcFileName))
	{
		Error(_T("File Doesn\'t Exists"));
		return S_FALSE;
	}
	/*TidyBuffer output={0};*/
	TidyBuffer errbuf={0};
	BOOL bOK;
	int rc=-1;


	bOK = TRUE;//tidyOptSetBool(this->m_objTidyDocuemnt, TidyXhtmlOut, yes ); // Convert to XHTML

	if ( bOK )
	{
		// MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_objTidyDocuemnt, &errbuf ); // Capture diagnostics
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = ::tidyParseFile(this->m_objTidyDocuemnt,(ctmbstr)strSrcFileName);//tidyParseString( tdoc, input ); // Parse the input

	}


	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_objTidyDocuemnt); // Tidy it up!

	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_objTidyDocuemnt); // Kvetch

	}

	if ( rc > 1 ) // If error, force output.
	{
		// MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_objTidyDocuemnt, TidyForceOutput, yes) ? rc : -1 );
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveFile(this->m_objTidyDocuemnt,(ctmbstr)strDestFileName ); // Pretty Print

	}


	//copy ther error Buffer

	m_strError=CComBSTR((char*)errbuf.bp).Detach();

	tidyBufFree( &errbuf );

	this->m_bOperationPerformed=TRUE;
	return S_OK;
}

/*********************************************************************
Method Name:GetFolderByID(long lFolderID, IFolder **ppNewFolder)
Last Modified:19 oct 2004
Function Descrption: this functionopen the folder based on ID
Interafce Support =YES

*********************************************************************/

STDMETHODIMP CtidyCom::TidyMemToMem(BSTR sourceStr, BSTR *result)
{
	USES_CONVERSION;
	if(::SysStringLen(sourceStr)==0)
	{
		Error(_T("Buffer is Empty ie First Parameter is Zero Length String"));
		return S_FALSE;
	}
	//some useful variable declaration
	EfTidyString strFileName(sourceStr);
	//MessageBox(NULL,strFileName,"From dll",MB_OK);

	TidyBuffer output={0};
	TidyBuffer errbuf={0};

	//Initialise it to ZERO
	tidyBufInit(&output);
	tidyBufInit(&errbuf);
	BOOL bOK;
	int rc=-1;


	bOK =TRUE; //tidyOptSetBool(this->m_objTidyDocuemnt, TidyXhtmlOut, yes ); // Convert to XHTML

	if ( bOK )
	{
		// MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_objTidyDocuemnt, &errbuf ); // Capture diagnostics
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = tidyParseString(this->m_objTidyDocuemnt,(ctmbstr)strFileName);//tidyParseString( tdoc, input ); // Parse the input

	}


	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_objTidyDocuemnt); // Tidy it up!

	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_objTidyDocuemnt); // Kvetch

	}

	if ( rc > 1 ) // If error, force output.
	{
		// MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_objTidyDocuemnt, TidyForceOutput, yes) ? rc : -1 );
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveBuffer(this->m_objTidyDocuemnt, &output ); // Pretty Print

	}

	*result=CComBSTR((char *)output.bp).Detach();
	m_strError=CComBSTR((char*)errbuf.bp).Detach();

	//free
	tidyBufFree( &output );
	tidyBufFree( &errbuf );

	this->m_bOperationPerformed=TRUE;
	return S_OK;

}

/*********************************************************************
Method Name:TidyMemtoFile(BSTR buffer, BSTR destFile)
Last Modified:2 May 2005
Function Descrption:
Interafce Support =YES

*********************************************************************/

STDMETHODIMP CtidyCom::TidyMemtoFile(BSTR buffer, BSTR destFile)
{
	if(::SysStringLen(buffer)==0)
	{
		Error(_T("Buffer is Empty"));
		return S_FALSE;
	}



	if(::SysStringLen(destFile)==0)
	{
		Error("Destination fileName is Not Given");
		return S_FALSE;
	}

	//some useful variable declaration
	EfTidyString strBuffer(buffer);
	EfTidyString strDestFileName(destFile);
	
	TidyBuffer errbuf={0};
	BOOL bOK;
	int rc=-1;


	bOK =TRUE; //tidyOptSetBool(this->m_objTidyDocuemnt, TidyXhtmlOut, yes ); // Convert to XHTML

	if ( bOK )
	{
		// MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_objTidyDocuemnt, &errbuf ); // Capture diagnostics
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = tidyParseString( m_objTidyDocuemnt, (ctmbstr)strBuffer ); // Parse the input

	}


	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_objTidyDocuemnt); // Tidy it up!

	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_objTidyDocuemnt); // Kvetch

	}

	if ( rc > 1 ) // If error, force output.
	{
		// MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_objTidyDocuemnt, TidyForceOutput, yes) ? rc : -1 );
	}

	if ( rc >= 0 )
	{
		// MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveFile(this->m_objTidyDocuemnt,(ctmbstr)strDestFileName ); // Pretty Print

	}


	this->m_bOperationPerformed=TRUE;


	m_strError=CComBSTR((char*)errbuf.bp).Detach();
	tidyBufFree( &errbuf );

	return S_OK;
}


/*********************************************************************
Property Name:get_Messages(short *pVal)
Last Modified:19 oct 2004
Function Descrption: return total message count
Interafce Support =YES
*********************************************************************/

STDMETHODIMP CtidyCom::get_TotalWarnings(long *pVal)
{
	if(!this->m_bOperationPerformed)
	{
		Error("First Do some Parsing");
		return S_FALSE;
	}
	*pVal=tidyWarningCount(this->m_objTidyDocuemnt);
	return S_OK;
}


/*********************************************************************
Property Name:get_Messages(short *pVal)
Last Modified:19 oct 2004
Function Descrption: return total message count
Interafce Support =YES
*********************************************************************/

STDMETHODIMP CtidyCom::get_TotalErrors(long *pVal)
{
	if(!this->m_bOperationPerformed)
	{
		Error("First Do some Parsing");
		return S_FALSE;
	}
	*pVal=tidyErrorCount(this->m_objTidyDocuemnt);

	return S_OK;
}

/*********************************************************************
Property Name:get_Messages(short *pVal)
Last Modified:19 oct 2004
Function Descrption: return total message count
Interafce Support =YES
*********************************************************************/

STDMETHODIMP CtidyCom::get_ErrorWarning(BSTR *pVal)
{
	USES_CONVERSION;

	if(!this->m_bOperationPerformed)
	{
		Error("First Do some Parsing");
		return S_FALSE;
	}


	*pVal=CComBSTR(this->m_strError).Detach();

	return S_OK;
}
/*********************************************************************
Property Name:get_Messages(short *pVal)
Last Modified:19 oct 2004
Function Descrption: return total message count
Interafce Support =YES
*********************************************************************/

STDMETHODIMP CtidyCom::get_Option(ItidyOption* *pVal)
{
	HRESULT hr = m_pOptions->QueryInterface(IID_ItidyOption,
		reinterpret_cast<void**>(pVal));
	return S_OK;
}

/*********************************************************************
Property Name:get_Messages(short *pVal)
Last Modified:19 oct 2004
Function Descrption: return total message count
Interafce Support =YES
*********************************************************************/


STDMETHODIMP CtidyCom::EfTidyNode(EfTidyMainNode Type, IEfTidyNode **ppNewEfTidyNode)
{
	if(!this->m_bOperationPerformed)
	{
		Error("Kindly do parsing before proceeeding to this step!");
		return S_FALSE;
	}

	CComObject<CEfTidyNode>* m_pTidyNode;
	TidyNode LocalNode;
	HRESULT hr = CComObject<CEfTidyNode>::CreateInstance(&m_pTidyNode);

	m_pTidyNode->setTidyDocument(&this->m_objTidyDocuemnt);
	//m_pTidyNode->m_pNode=new TidyNode;

	switch(Type)
	{
	case TIDY_ROOT:
		LocalNode=tidyGetRoot(this->m_objTidyDocuemnt);
		break;
	case TIDY_HTML:
		LocalNode=tidyGetHtml(this->m_objTidyDocuemnt);
		break;
	case TIDY_HEAD:
		LocalNode=tidyGetHead(this->m_objTidyDocuemnt);
		break;
	case TIDY_BODY:
		LocalNode=tidyGetBody(this->m_objTidyDocuemnt);
		break;
	default: Error("NodeType Not Found");
		return S_FALSE;
	}

	m_pTidyNode->m_pNode=LocalNode;

	hr =	m_pTidyNode->QueryInterface(__uuidof(IEfTidyNode),
		reinterpret_cast<void**>(ppNewEfTidyNode));
	return S_OK;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)


Written By
Software Developer (Senior)
India India
He used to have biography here Smile | :) , but now he will hire someone (for free offcourse Big Grin | :-D ), Who writes his biography on his behalf Smile | :)

He is Great Fan of Mr. Johan Rosengren (his idol),Lim Bio Liong, Nishant S and DavidCrow and Believes that, he will EXCEL in his life by following there steps!!!

He started with Visual C++ then moved to C# then he become language agnostic, you give him task,tell him the language or platform, he we start immediately, if he knows the language otherwise he quickly learn it and start contributing productively

Last but not the least, For good 8 years he was Visual CPP MSMVP!

Comments and Discussions