Click here to Skip to main content
15,886,422 members
Articles / Programming Languages / Visual Basic

EfTidy: The Tidy Library Wrapper

Rate me:
Please Sign up or sign in to vote.
4.91/5 (36 votes)
6 Sep 2013GPL310 min read 185.1K   5K   88  
A free component for HTML parsing and cleaning
// tidyCom.cpp : Implementation of CtidyCom

/**

  Author :Alok Gupta
  Email  :thatsalok@gmail.com
  Personel Homepage :http://www.thatsalok.com

  You are Free to Modify,use code etc,till above name is not removed.
  I am not responisible for problem coming while using above code+
   it will cool, if you notify me about using this component!
   thanks


*/
#include "stdafx.h"
#include "EfTidy.h"
#include "tidyCom.h"
#include "eftidynode.h"






/////////////////////////////////////////////////////////////////////////////
// CtidyCom

STDMETHODIMP CtidyCom::InterfaceSupportsErrorInfo(REFIID riid)
{
	if(IID_ItidyCom==riid)
			return S_OK;
	
	return S_FALSE;
}

/*********************************************************************
  Method Name:TidyFiletoMem(BSTR sourceFile, BSTR *result)
  Last Modified:23 nov 2004
  Function Descrption: this function map file to buffer
  Interafce Support =YES

 *********************************************************************/


STDMETHODIMP CtidyCom::TidyFiletoMem(BSTR sourceFile, BSTR *result)
{
   USES_CONVERSION;
	
	if(::SysStringLen(sourceFile)==0)
	{
		Error(_T("First Parameter is Zero Length String"));
		return S_FALSE;
	}

	
	//some useful variable declaration
	_bstr_t strFileName(sourceFile);
	//MessageBox(NULL,strFileName,"From dll",MB_OK);

	if(!tidyFileExists((ctmbstr)strFileName))
	{
		Error(_T("File Doesn\'t Exists"));
		return S_FALSE;
	}
	
	TidyBuffer output={0};
	TidyBuffer errbuf={0};
	BOOL bOK;
	int rc=-1;

	
	bOK = TRUE;//tidyOptSetBool(this->m_tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
	
	if ( bOK )
	{
	//	MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_tdoc, &errbuf );      // Capture diagnostics
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = ::tidyParseFile(this->m_tdoc,(ctmbstr)strFileName);//tidyParseString( tdoc, input );           // Parse the input

	}
	

	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_tdoc);               // Tidy it up!

	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_tdoc);               // Kvetch

	}
	
	if ( rc > 1 )                                    // If error, force output.
	{
	//	MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_tdoc, TidyForceOutput, yes) ? rc : -1 );
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveBuffer(this->m_tdoc, &output );          // Pretty Print

	}
      

	*result=_bstr_t((char *)output.bp).Detach();
	m_strError=_bstr_t((char*)errbuf.bp);
	tidyBufFree( &output );
	tidyBufFree( &errbuf );
	this->m_bOperationPerformed=TRUE;
	return S_OK;
}

/*********************************************************************
  Method Name:TidyFileToFile(BSTR sourceFile, BSTR destFile)
  Last Modified:23 Nov 2004
  Function Descrption: This function will Tidy File to File
  Interface Support =YES

 *********************************************************************/

STDMETHODIMP CtidyCom::TidyFileToFile(BSTR sourceFile, BSTR destFile)
{
	if(::SysStringLen(sourceFile)==0)
	{
		Error(_T("First Parameter is Zero Length String"));
		return S_FALSE;
	}

	if(::SysStringLen(destFile)==0)
	{
		Error(_T("Second Parameter is Zero Length String"));
		return S_FALSE;
	}

	//some useful variable declaration
	_bstr_t strSrcFileName(sourceFile);
	_bstr_t strDestFileName(destFile);

	//MessageBox(NULL,strFileName,"From dll",MB_OK);
	
	if(!tidyFileExists((ctmbstr)strSrcFileName))
	{
		Error(_T("File Doesn\'t Exists"));
		return S_FALSE;
	}
	/*TidyBuffer output={0};*/
		TidyBuffer errbuf={0};
	BOOL bOK;
	int rc=-1;

	
	bOK = TRUE;//tidyOptSetBool(this->m_tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
	
	if ( bOK )
	{
	//	MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_tdoc, &errbuf );      // Capture diagnostics
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = ::tidyParseFile(this->m_tdoc,(ctmbstr)strSrcFileName);//tidyParseString( tdoc, input );           // Parse the input

	}
	

	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_tdoc);               // Tidy it up!

	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_tdoc);               // Kvetch

	}
	
	if ( rc > 1 )                                    // If error, force output.
	{
	//	MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_tdoc, TidyForceOutput, yes) ? rc : -1 );
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveFile(this->m_tdoc,(ctmbstr)strDestFileName );          // Pretty Print

	}
      

	/*strFileName.Format("%s",output.bp);

	MessageBox(NULL,strFileName,"",MB_OK);

	strFileName.Format("%s",errbuf.bp);
	MessageBox(NULL,strFileName,"error buff",MB_OK);
	
	CComBSTR RetVal((char*)output.bp);


	*result=RetVal.Detach();
	 
	tidyBufFree( &output );
	;*/

	//copy ther error Buffer

	m_strError=_bstr_t((char*)errbuf.bp);
	 
	 tidyBufFree( &errbuf );
	 
	 this->m_bOperationPerformed=TRUE;
	return S_OK;
}

/*********************************************************************
  Method Name:GetFolderByID(long lFolderID, IFolder **ppNewFolder)
  Last Modified:19 oct 2004
  Function Descrption: this functionopen the folder based on ID
  Interafce Support =YES

 *********************************************************************/

STDMETHODIMP CtidyCom::TidyMemToMem(BSTR sourceStr, BSTR *result)
{
	USES_CONVERSION;
	if(::SysStringLen(sourceStr)==0)
	{
		Error(_T("Buffer is Empty ie First Parameter is Zero Length String"));
		return S_FALSE;
	}
	//some useful variable declaration
	_bstr_t strFileName(sourceStr);
	//MessageBox(NULL,strFileName,"From dll",MB_OK);
	
	TidyBuffer output={0};
	TidyBuffer errbuf={0};

	//Initialise it to ZERO
	tidyBufInit(&output);
	tidyBufInit(&errbuf);
	BOOL bOK;
	int rc=-1;

	
	bOK =TRUE; //tidyOptSetBool(this->m_tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
	
	if ( bOK )
	{
	//	MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_tdoc, &errbuf );      // Capture diagnostics
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = tidyParseString(this->m_tdoc,(ctmbstr)strFileName);//tidyParseString( tdoc, input );           // Parse the input

	}
	

	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_tdoc);               // Tidy it up!

	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_tdoc);               // Kvetch

	}
	
	if ( rc > 1 )                                    // If error, force output.
	{
	//	MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_tdoc, TidyForceOutput, yes) ? rc : -1 );
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveBuffer(this->m_tdoc, &output );          // Pretty Print

	}
      
	*result=_bstr_t((char *)output.bp).Detach();
	m_strError=_bstr_t((char*)errbuf.bp);

	 //free
	tidyBufFree( &output );
	tidyBufFree( &errbuf );

	this->m_bOperationPerformed=TRUE;
	return S_OK;

}

/*********************************************************************
  Method Name:TidyMemtoFile(BSTR buffer, BSTR destFile)
  Last Modified:2 May 2005
  Function Descrption: 
  Interafce Support =YES

 *********************************************************************/

STDMETHODIMP CtidyCom::TidyMemtoFile(BSTR buffer, BSTR destFile)
{
	if(::SysStringLen(buffer)==0)
	{
		Error(_T("Buffer is Empty"));
		return S_FALSE;
	}
	
	

	if(::SysStringLen(destFile)==0)
	{
		Error("Destination fileName is Not Given");
		return S_FALSE;
	}
	
	//some useful variable declaration
	_bstr_t strBuffer(buffer);
	_bstr_t strDestFileName(destFile);
	//EfTidyString strBuffer(buffer);
	//EfTidyString strDestFileName(destFile);

	//MessageBox(NULL,strFileName,"From dll",MB_OK);
	
	/*TidyBuffer output={0};*/
	TidyBuffer errbuf={0};
	BOOL bOK;
	int rc=-1;

	
	bOK =TRUE; //tidyOptSetBool(this->m_tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
	
	if ( bOK )
	{
	//	MessageBox(NULL,"cleared First","From dll",MB_OK);
		rc = tidySetErrorBuffer(this->m_tdoc, &errbuf );      // Capture diagnostics
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared 2","From dll",MB_OK);
		rc = tidyParseString( m_tdoc, (ctmbstr)strBuffer );           // Parse the input

	}
	

	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared third","From dll",MB_OK);
		rc = tidyCleanAndRepair(this->m_tdoc);               // Tidy it up!

	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"cleared fourth","From dll",MB_OK);
		rc = tidyRunDiagnostics(this->m_tdoc);               // Kvetch

	}
	
	if ( rc > 1 )                                    // If error, force output.
	{
	//	MessageBox(NULL,"forced output","From dll",MB_OK);
		rc = ( tidyOptSetBool(this->m_tdoc, TidyForceOutput, yes) ? rc : -1 );
	}
	
	if ( rc >= 0 )
	{
	//	MessageBox(NULL,"saved output","From dll",MB_OK);
		rc = tidySaveFile(this->m_tdoc,(ctmbstr)strDestFileName );          // Pretty Print

	}
      

	this->m_bOperationPerformed=TRUE;

	
	m_strError=_bstr_t((char*)errbuf.bp);
	tidyBufFree( &errbuf );

	return S_OK;
}


/*********************************************************************
  Property Name:get_Messages(short *pVal)
  Last Modified:19 oct 2004
  Function Descrption: return total message count
  Interafce Support =YES
 *********************************************************************/

STDMETHODIMP CtidyCom::get_TotalWarnings(long *pVal)
{
  if(!this->m_bOperationPerformed)
  {
	  Error("First Do some Parsing");
	  return S_FALSE;
  }
	*pVal=tidyWarningCount(this->m_tdoc);
	return S_OK;
}


/*********************************************************************
  Property Name:get_Messages(short *pVal)
  Last Modified:19 oct 2004
  Function Descrption: return total message count
  Interafce Support =YES
 *********************************************************************/

STDMETHODIMP CtidyCom::get_TotalErrors(long *pVal)
{
	 if(!this->m_bOperationPerformed)
	 {
		 Error("First Do some Parsing");
		 return S_FALSE;
	 }
	*pVal=tidyErrorCount(this->m_tdoc);

	return S_OK;
}
  
/*********************************************************************
  Property Name:get_Messages(short *pVal)
  Last Modified:19 oct 2004
  Function Descrption: return total message count
  Interafce Support =YES
 *********************************************************************/

STDMETHODIMP CtidyCom::get_ErrorWarning(BSTR *pVal)
{
       USES_CONVERSION;

	 if(!this->m_bOperationPerformed)
	 {
		 Error("First Do some Parsing");
		 return S_FALSE;
	 }
	 
	 
	 *pVal=CComBSTR(this->m_strError.c_str()).Detach();
	  
	  return S_OK;
}
/*********************************************************************
  Property Name:get_Messages(short *pVal)
  Last Modified:19 oct 2004
  Function Descrption: return total message count
  Interafce Support =YES
 *********************************************************************/

STDMETHODIMP CtidyCom::get_Option(ItidyOption* *pVal)
{
	HRESULT hr = m_pOptions->QueryInterface(IID_ItidyOption,
		reinterpret_cast<void**>(pVal));
	return S_OK;
}

/*********************************************************************
  Property Name:get_Messages(short *pVal)
  Last Modified:19 oct 2004
  Function Descrption: return total message count
  Interafce Support =YES
 *********************************************************************/


STDMETHODIMP CtidyCom::EfTidyNode(EfTidyMainNode Type, IEfTidyNode **ppNewEfTidyNode)
{
	if(!this->m_bOperationPerformed)
	 {
		 Error("First Do some Parsing");
		 return S_FALSE;
	 }
	 
	CComObject<CEfTidyNode>* m_pTidyNode;
	TidyNode LocalNode;
	HRESULT hr =  CComObject<CEfTidyNode>::CreateInstance(&m_pTidyNode);

	m_pTidyNode->m_tdoc =&this->m_tdoc;
	//m_pTidyNode->m_pNode=new TidyNode;

	switch(Type)
	{
	case TIDY_ROOT: 
		   LocalNode=tidyGetRoot(this->m_tdoc);
		  	 break;
	case TIDY_HTML:
		 LocalNode=tidyGetHtml(this->m_tdoc);
		break;
	case TIDY_HEAD:
		LocalNode=tidyGetHead(this->m_tdoc);
		break;
	case TIDY_BODY: 
		LocalNode=tidyGetBody(this->m_tdoc);
		break;
	default: Error("NodeType Not Found");
		     return S_FALSE;
	}
	
	m_pTidyNode->m_pNode=LocalNode;
	hr =m_pTidyNode->QueryInterface(__uuidof(IEfTidyNode),
		reinterpret_cast<void**>(ppNewEfTidyNode));

	return S_OK;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)


Written By
Software Developer (Senior)
India India
He used to have biography here Smile | :) , but now he will hire someone (for free offcourse Big Grin | :-D ), Who writes his biography on his behalf Smile | :)

He is Great Fan of Mr. Johan Rosengren (his idol),Lim Bio Liong, Nishant S and DavidCrow and Believes that, he will EXCEL in his life by following there steps!!!

He started with Visual C++ then moved to C# then he become language agnostic, you give him task,tell him the language or platform, he we start immediately, if he knows the language otherwise he quickly learn it and start contributing productively

Last but not the least, For good 8 years he was Visual CPP MSMVP!

Comments and Discussions