Click here to Skip to main content
15,868,016 members
Articles / Operating Systems / Windows

A managed wrapper for the HTML Tidy library

Rate me:
Please Sign up or sign in to vote.
4.83/5 (17 votes)
12 Jan 2007CPOL2 min read 140.3K   2.6K   28  
A managed C++ for a small part of the HTML Tidy C library
#ifndef __TIDY_INT_H__
#define __TIDY_INT_H__

/* tidy-int.h -- internal library declarations

  (c) 1998-2003 (W3C) MIT, ERCIM, Keio University
  See tidy.h for the copyright notice.

  CVS Info :

    $Author: hoehrmann $ 
    $Date: 2004/03/06 17:07:02 $ 
    $Revision: 1.8 $ 

*/

#include "tidy.h"
#include "config.h"
#include "tags.h"
#include "attrs.h"
#include "lexer.h"
#include "pprint.h"
#include "access.h"

#ifndef MAX
#define MAX(a,b) (((a) > (b))?(a):(b))
#endif
#ifndef MIN
#define MIN(a,b) (((a) < (b))?(a):(b))
#endif

struct _TidyDocImpl
{
    /* The Document Tree (and backing store buffer) */
    Node                root;       /* This MUST remain the first declared 
                                       variable in this structure */
    Lexer*              lexer;

    /* Config + Markup Declarations */
    TidyConfigImpl      config;
    TidyTagImpl         tags;
    TidyAttribImpl      attribs;

#if SUPPORT_ACCESSIBILITY_CHECKS
    /* Accessibility Checks state */
    TidyAccessImpl      access;
#endif

    /* The Pretty Print buffer */
    TidyPrintImpl       pprint;

    /* I/O */
    StreamIn*           docIn;
    StreamOut*          docOut;
    StreamOut*          errout;
    TidyReportFilter    mssgFilt;
    TidyOptCallback     pOptCallback;

    /* Parse + Repair Results */
    uint                optionErrors;
    uint                errors;
    uint                warnings;
    uint                accessErrors;
    uint                infoMessages;
    uint                docErrors;
    int                 parseStatus;

    uint                badAccess;   /* for accessibility errors */
    uint                badLayout;   /* for bad style errors */
    uint                badChars;    /* for bad char encodings */
    uint                badForm;     /* for badly placed form tags */

    /* Miscellaneous */
    ulong               appData;
    uint                nClassId;
    Bool                inputHadBOM;

#ifdef TIDY_STORE_ORIGINAL_TEXT
    Bool                storeText;
#endif

#if PRESERVE_FILE_TIMES
    struct utimbuf      filetimes;
#endif
    tmbstr              givenDoctype;
};


/* Twizzle internal/external types */
#ifdef NEVER
TidyDocImpl* tidyDocToImpl( TidyDoc tdoc );
TidyDoc      tidyImplToDoc( TidyDocImpl* impl );

Node*        tidyNodeToImpl( TidyNode tnod );
TidyNode     tidyImplToNode( Node* node );

AttVal*      tidyAttrToImpl( TidyAttr tattr );
TidyAttr     tidyImplToAttr( AttVal* attval );

const TidyOptionImpl* tidyOptionToImpl( TidyOption topt );
TidyOption   tidyImplToOption( const TidyOptionImpl* option );
#else

#define tidyDocToImpl( tdoc )       ((TidyDocImpl*)(tdoc))
#define tidyImplToDoc( doc )        ((TidyDoc)(doc))

#define tidyNodeToImpl( tnod )      ((Node*)(tnod))
#define tidyImplToNode( node )      ((TidyNode)(node))

#define tidyAttrToImpl( tattr )     ((AttVal*)(tattr))
#define tidyImplToAttr( attval )    ((TidyAttr)(attval))

#define tidyOptionToImpl( topt )    ((const TidyOptionImpl*)(topt))
#define tidyImplToOption( option )  ((TidyOption)(option))

#endif

/* Create/Destroy a Tidy "document" object */
TidyDocImpl* tidyDocCreate(void);
void         tidyDocRelease( TidyDocImpl* impl );

int          tidyDocStatus( TidyDocImpl* impl );

/* Parse Markup */
int          tidyDocParseFile( TidyDocImpl* impl, ctmbstr htmlfil );
int          tidyDocParseStdin( TidyDocImpl* impl );
int          tidyDocParseString( TidyDocImpl* impl, ctmbstr content );
int          tidyDocParseBuffer( TidyDocImpl* impl, TidyBuffer* inbuf );
int          tidyDocParseSource( TidyDocImpl* impl, TidyInputSource* docIn );
int          tidyDocParseStream( TidyDocImpl* impl, StreamIn* in );


/* Execute post-parse diagnostics and cleanup.
** Note, the order is important.  You will get different
** results from the diagnostics depending on if they are run
** pre-or-post repair.
*/
int          tidyDocRunDiagnostics( TidyDocImpl* doc );
int          tidyDocCleanAndRepair( TidyDocImpl* doc );


/* Save cleaned up file to file/buffer/sink */
int          tidyDocSaveFile( TidyDocImpl* impl, ctmbstr htmlfil );
int          tidyDocSaveStdout( TidyDocImpl* impl );
int          tidyDocSaveString( TidyDocImpl* impl, tmbstr buffer, uint* buflen );
int          tidyDocSaveBuffer( TidyDocImpl* impl, TidyBuffer* outbuf );
int          tidyDocSaveSink( TidyDocImpl* impl, TidyOutputSink* docOut );
int          tidyDocSaveStream( TidyDocImpl* impl, StreamOut* out );

#endif /* __TIDY_INT_H__ */

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Chief Technology Officer Zeta Software GmbH
Germany Germany
Uwe does programming since 1989 with experiences in Assembler, C++, MFC and lots of web- and database stuff and now uses ASP.NET and C# extensively, too. He has also teached programming to students at the local university.

➡️ Give me a tip 🙂

In his free time, he does climbing, running and mountain biking. In 2012 he became a father of a cute boy and in 2014 of an awesome girl.

Some cool, free software from us:

Windows 10 Ereignisanzeige  
German Developer Community  
Free Test Management Software - Intuitive, competitive, Test Plans.  
Homepage erstellen - Intuitive, very easy to use.  
Offline-Homepage-Baukasten

Comments and Discussions