Click here to Skip to main content
15,881,882 members
Articles / Desktop Programming / WTL

EfTidyNet: .NET Wrapper for Tidy library

Rate me:
Please Sign up or sign in to vote.
4.87/5 (12 votes)
6 Sep 2013GPL38 min read 114.9K   1.6K   36  
Free component for parsing HTML, .NET version of EfTidyCom
// EfTidyNet.h
/**

Author : Alok Gupta
Email  : thatsalok@gmail.com

You are Free to Modify,use code etc,till above name is not removed.
I am not responisible for problem coming while using above code. as it provided AS IS without any warranty.
Please use it at your own Risk.

It is not compulsory to notify me if you using this component or changing the code, but it would be appreciated.
Thanks
**/
#pragma once
#include "..\\TidyMyLib\\tidy.h"
#include "..\\TidyMyLib\\buffio.h"
#pragma comment(lib,"..\\output\\tidymylib.lib")

using namespace System;
using namespace System::Collections::Generic;


namespace  EfTidyNet 
{
	namespace EfTidyOpt
	{
		
		public  enum class ECharEncodingType
		{
		 ASCII		= 0 ,
		 LATIN1		= 1 , 
		 RAW		= 2, 
		 UTF8		= 3, 
		 ISO2022	= 4,
		 MAC		= 5,
		 WIN1252	= 6,
		 UTF16LE	= 7,
		 UTF16BE	= 8,
		 UTF16		= 9,
		 BIG5		= 10,
		 SHIFTJIS	= 11,
		 CHARUNKNOWN	= 12
		} ;

		[Flags]
		public  enum class EOutputType {
		  XmlOut=1,          /**< Create output as XML */
		  XhtmlOut=2,        /**< Output extensible HTML */
		  HtmlOut=4         /**< Output plain HTML, even for XHTML input.*/
		};


		[Flags]
		public  enum class EIndentScheme 
		{
			NOINDENT=0,
			INDENTBLOCKS,
			AUTOINDENT
		} ;
	
		[Flags]
		public  enum class EDoctypeModes
		{
		    DoctypeOmit,    /**< Omit DOCTYPE altogether */
		    DoctypeAuto,    /**< Keep DOCTYPE in input.  Set version to content */
		    DoctypeStrict,  /**< Convert document to HTML 4 strict content model */
		    DoctypeLoose,   /**< Convert document to HTML 4 transitional
		                            content model */
		    DoctypeUser     /**< Set DOCTYPE FPI explicitly */
		} ;

		typedef enum tagTypes
		{
			EMPTYTAGS,INLINETAGS,PRETAGS,BLOCKTAGS
		} ETagsType;
		public ref class TidyNetOpt
		{
			TidyDoc			m_tdoc;
			List<String^>^  m_vecEmptyTags;
			List<String^>^  m_vecInlineTags;
			List<String^>^  m_vecPreTags;
			List<String^>^  m_vecBlockTags;
			String^			m_SconfigName;

			String^ ReturnTagsString(ETagsType  Tags_Type);
			void ParseTags		 (ETagsType  Tags_Type, String^  TagString);
			void AddNewTags		 (ETagsType  Tags_Type,	String^  TagString);

		public:
			
			TidyNetOpt(TidyDoc %);
			bool LoadConfigFile(String^ SConfigFile);
			bool ResetToDefaultValue();
			//MarkUp
			String^ Doctype();
			bool Doctype(String^ newVal);

			//property TidyMark: Add meta element indicating tidied doc
			bool TidyMark();
			bool TidyMark(bool newVal);

			bool HideEndTag();
			bool HideEndTag(bool newVal);

			bool EncloseText();
			bool EncloseText(bool newVal);

			bool EncloseBlockText();
			bool EncloseBlockText(bool newVal);

			bool LogicalEmphasis();
			bool LogicalEmphasis(bool newVal);

			bool DefaultAltText();
			bool DefaultAltText(bool  newVal);

			bool Clean();
			bool Clean(bool newVal);

			bool DropFontTags();
			bool DropFontTags(bool newVal);

			bool DropEmptyParas();
			bool DropEmptyParas(bool newVal);

			bool Word2000();
			bool Word2000(bool newVal);

			bool FixBadComment();
			bool FixBadComment(bool newVal);

			bool FixBackslash();
			bool FixBackslash(bool newVal);

			bool Bare();
			bool Bare(bool newVal);

			bool DropPropAttr();
			bool DropPropAttr(bool newVal);

			String^ NewEmptyTags();
			bool NewEmptyTags(String ^ newVal);

			String^ NewInlineTags();
			bool NewInlineTags(String ^ newVal);

			String^ NewBlockLevelTags();
			bool NewBlockLevelTags(String^ newVal);

			String^ NewPreTags();
			bool    NewPreTags(String^ newVal);

			//Outputtype
 			EOutputType  OutputType();
			bool OutputType(EOutputType newVal);
	
			bool InputAsXML();
			bool InputAsXML(bool newVal);
			
			bool ADDXmlDecl();
			bool ADDXmlDecl(bool newVal);
	
			bool AddXmlSpace();
			bool AddXmlSpace(bool newVal);

		    bool AssumeXmlProcins();
			bool AssumeXmlProcins(bool newVal);
	
			////EnCoding
			
			ECharEncodingType CharEncoding();
			bool CharEncoding(ECharEncodingType newVal);

			bool NumericsEntities();
		    bool NumericsEntities(bool newVal);

			bool QuoteMarks();
			bool QuoteMarks(bool newVal);
	
			bool QuoteNBSP();
			bool QuoteNBSP(bool newVal);

			bool QuoteAmpersand();
			bool QuoteAmpersand(bool newVal);
			
			//	
			//  //New Added in encoding
			ECharEncodingType InCharEncoding();
			bool InCharEncoding(ECharEncodingType newVal);
			ECharEncodingType OutCharEncoding();
			bool OutCharEncoding(ECharEncodingType newVal);

			//Layout
			bool OutputTagInUpperCase();
			bool OutputTagInUpperCase(bool newVal);

			bool OutputAttrInUpperCase();
			bool OutputAttrInUpperCase(bool newVal);

			bool WrapScriptlets();
			bool WrapScriptlets(bool newVal);

			bool WrapAttVals();
			bool WrapAttVals(bool newVal);

			bool WrapSection();
			bool WrapSection(bool newVal);

			bool WrapAsp();
			bool WrapAsp(bool newVal);

			bool WrapJste();
			bool WrapJste(bool newVal);

			bool WrapPhp();
			bool WrapPhp(bool newVal);
	
			bool IndentAttributes();
			bool IndentAttributes(bool newVal);
		
			bool BreakBeforeBR();
			bool BreakBeforeBR(bool newVal);

			bool LiteralAttribs();
			bool LiteralAttribs(bool newVal);

		
			long IndentSpace();
			bool IndentSpace(long newVal);

			long WrapLen();
			bool WrapLen(long newVal);

			long TabSize();
			bool TabSize(long newVal);
		
			EIndentScheme Indent();
			bool Indent(EIndentScheme newVal);

			//Operation
			bool MarkUp();
			bool MarkUp(bool newVal);

			bool ShowWarnings();
			bool ShowWarnings(bool newVal);

			bool Quiet();
			bool Quiet(bool newVal);

			bool KeepTime();
			bool KeepTime(bool newVal);

			bool GnuEmacs();
			bool GnuEmacs(bool newVal);
		
			
		
		//newly added
			bool FixUrl();
			bool FixUrl(bool newVal);

			bool BodyOnly();
			bool BodyOnly(bool newVal);

			bool HideComments();
			bool HideComments(bool newVal);

			String^ ErrorFile();
			void ErrorFile(String^ newVal);
		

			EDoctypeModes DoctypeMode();
			bool DoctypeMode(EDoctypeModes newVal);
	

		};
	}

	public ref class TidyNet
	{
		bool m_bOperationPerformed;
		 TidyDoc m_tdoc;// = tidyCreate();
		//static EfTidyOpt::TidyNetOpt^	m_objOption = gcnew EfTidyOpt::TidyNetOpt(m_tdoc);
		String^			m_SError;


	public:
		TidyNet(){ 			
			 m_bOperationPerformed = false;
			 m_tdoc = tidyCreate();
			 Option = gcnew EfTidyOpt::TidyNetOpt(m_tdoc);
		}

		TidyNet(TidyNet % obj){}

		bool TidyFiletoMem (const String^ SFileName			, String^ %		SResult);
		bool TidyFileToFile( String^ SsourceFileName   ,  String^ SDestFile);
		bool TidyMemToMem  (String^ SsourceData		, String^ %		SResult);
		bool TidyMemtoFile (String^ SBuffer			, String^ SDestFile);
		bool TotalWarnings (long %pVal);
		bool TotalErrors   (long %pVal);
		String^ ErrorWarning  ();		
		//EfTidyOpt::TidyNetOpt^ Option(){ return m_objOption;}
		EfTidyOpt::TidyNetOpt^	Option;


	};


}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)


Written By
Software Developer (Senior)
India India
He used to have biography here Smile | :) , but now he will hire someone (for free offcourse Big Grin | :-D ), Who writes his biography on his behalf Smile | :)

He is Great Fan of Mr. Johan Rosengren (his idol),Lim Bio Liong, Nishant S and DavidCrow and Believes that, he will EXCEL in his life by following there steps!!!

He started with Visual C++ then moved to C# then he become language agnostic, you give him task,tell him the language or platform, he we start immediately, if he knows the language otherwise he quickly learn it and start contributing productively

Last but not the least, For good 8 years he was Visual CPP MSMVP!

Comments and Discussions