Click here to Skip to main content
12,456,128 members (52,628 online)
Click here to Skip to main content

Stats

91K views
2.7K downloads
149 bookmarked
Posted

AngleSharp

, 3 Jul 2013 BSD
Bringing the DOM to C# with a HTML5/CSS3 parser written in C#.
AngleSharp
Css
Tokens
DOM
Collections
Css
Properties
Rules
Selector
Values
Enums
Exception
Html
Basic
Formatting
Forms
Frames
Head
Lists
Media
Objects
Semantic
Tables
Interfaces
Mathml
Objects
Svg
Various
Xml
Foundation
Attributes
Helpers
Mutations
Html
Tokens
Properties
Stream
Xml
Tokens
packages
ModernUI.WPF.1.0.4
lib
net40
FirstFloor.ModernUI.dll
Microsoft.Windows.Shell.dll
net45
FirstFloor.ModernUI.dll
ModernUI.WPF.1.0.4.nupkg
ModernUI.WPF.1.0.4.nuspec
OxyPlot.Core.2013.1.49.1
lib
portable-net4+sl4+wp71+win8
OxyPlot.dll
OxyPlot.Core.2013.1.49.1.nupkg
OxyPlot.Core.2013.1.49.1.nuspec
OxyPlot.Wpf.2013.1.49.1
lib
NET40
OxyPlot.Wpf.dll
OxyPlot.Xps.dll
NET45
OxyPlot.Wpf.dll
OxyPlot.Xps.dll
OxyPlot.Wpf.2013.1.49.1.nupkg
OxyPlot.Wpf.2013.1.49.1.nuspec
Samples
Logo.ico
Pages
Properties
ViewModels
Documents
using System;
using System.Diagnostics;
using AngleSharp.DOM;
using AngleSharp.DOM.Html;
using System.IO;
using AngleSharp.Html;
using AngleSharp.DOM.Collections;
using AngleSharp.DOM.Css;
using AngleSharp.Css;

namespace AngleSharp
{
    /// <summary>
    /// A handy helper to construct various kinds of documents
    /// from a given source code, URL or stream.
    /// </summary>
    public sealed class DocumentBuilder
    {
        #region Members

        IParser parser;

        #endregion

        #region ctor

        /// <summary>
        /// Creates a new builder with the specified source.
        /// </summary>
        /// <param name="source">The code manager.</param>
        /// <param name="document">The document to fill.</param>
        DocumentBuilder(SourceManager source, HTMLDocument document)
        {
            parser = new HtmlParser(document, source);
            parser.ErrorOccurred += ParseErrorOccurred;
        }

        /// <summary>
        /// Creates a new builder with the specified source.
        /// </summary>
        /// <param name="source">The code manager.</param>
        /// <param name="sheet">The document to fill.</param>
        DocumentBuilder(SourceManager source, CSSStyleSheet sheet)
        {
            parser = new CssParser(sheet, source);
            parser.ErrorOccurred += ParseErrorOccurred;
        }

        #endregion

        #region Properties

        /// <summary>
        /// Gets the result of an HTML parsing.
        /// </summary>
        public HTMLDocument HtmlResult
        {
            get { return ((HtmlParser)parser).Result; }
        }

        /// <summary>
        /// Gets the result of a CSS parsing.
        /// </summary>
        public CSSStyleSheet CssResult
        {
            get { return ((CssParser)parser).Result; }
        }

        #endregion

        #region HTML Construction

        /// <summary>
        /// Builds a new HTMLDocument with the given source code string.
        /// </summary>
        /// <param name="sourceCode">The string to use as source code.</param>
        /// <returns>The constructed HTML document.</returns>
        public static HTMLDocument Html(String sourceCode)
        {
            var source = new SourceManager(sourceCode);
            var db = new DocumentBuilder(source, new HTMLDocument());
            return db.HtmlResult;
        }

        /// <summary>
        /// Builds a new HTMLDocument with the given URL.
        /// </summary>
        /// <param name="url">The URL which points to the address containing the source code.</param>
        /// <returns>The constructed HTML document.</returns>
        public static HTMLDocument Html(Uri url)
        {
            var stream = Builder.Stream(url);
            var source = new SourceManager(stream);
            var db = new DocumentBuilder(source, new HTMLDocument());
            return db.HtmlResult;
        }

        /// <summary>
        /// Builds a new HTMLDocument with the given network stream.
        /// </summary>
        /// <param name="networkStream">The stream of chars to use as source code.</param>
        /// <returns>The constructed HTML document.</returns>
        public static HTMLDocument Html(Stream networkStream)
        {
            var source = new SourceManager(networkStream);
            var db = new DocumentBuilder(source, new HTMLDocument());
            return db.HtmlResult;
        }

        /// <summary>
        /// Builds a list of nodes according with 8.4 Parsing HTML fragments.
        /// </summary>
        /// <param name="sourceCode">The string to use as source code.</param>
        /// <param name="context">The context node to use.</param>
        /// <returns>A list of parsed nodes.</returns>
        public static NodeList HtmlFragment(String sourceCode, Node context = null)
        {
            var source = new SourceManager(sourceCode);
            var doc = new HTMLDocument();
            var db = new DocumentBuilder(source, doc);

            if (context != null)
            {
                if (context.OwnerDocument != null && context.OwnerDocument.QuirksMode != QuirksMode.Off)
                    doc.QuirksMode = context.OwnerDocument.QuirksMode;

                //    Note: For performance reasons, an implementation that does not report errors and that uses
                //          the actual state machine described in this specification directly could use the
                //          PLAINTEXT state instead of the RAWTEXT and script data states where those are mentioned
                //          in the list above. Except for rules regarding parse errors, they are equivalent, since
                //          there is no appropriate end tag token in the fragment case, yet they involve far
                //          fewer state transitions.

                ((HtmlParser)db.parser).SwitchToFragment(context);
                return db.HtmlResult.DocumentElement.ChildNodes;
            }

            return db.HtmlResult.ChildNodes;
        }

        #endregion

        #region CSS Construction

        /// <summary>
        /// Builds a new CSSStyleSheet with the given source code string.
        /// </summary>
        /// <param name="sourceCode">The string to use as source code.</param>
        /// <returns>The constructed CSS stylesheet.</returns>
        public static CSSStyleSheet Css(String sourceCode)
        {
            var source = new SourceManager(sourceCode);
            var db = new DocumentBuilder(source, new CSSStyleSheet());
            return db.CssResult;
        }

        /// <summary>
        /// Builds a new CSSStyleSheet with the given URL.
        /// </summary>
        /// <param name="url">The URL which points to the address containing the source code.</param>
        /// <returns>The constructed CSS stylesheet.</returns>
        public static CSSStyleSheet Css(Uri url)
        {
            var stream = Builder.Stream(url);
            var source = new SourceManager(stream);
            var db = new DocumentBuilder(source, new CSSStyleSheet());
            return db.CssResult;
        }

        /// <summary>
        /// Builds a new CSSStyleSheet with the given network stream.
        /// </summary>
        /// <param name="networkStream">The stream of chars to use as source code.</param>
        /// <returns>The constructed CSS stylesheet.</returns>
        public static CSSStyleSheet Css(Stream networkStream)
        {
            var source = new SourceManager(networkStream);
            var db = new DocumentBuilder(source, new CSSStyleSheet());
            return db.CssResult;
        }

        #endregion

        #region Event Handlers

        /// <summary>
        /// Called once a helper class finds a parse error.
        /// </summary>
        /// <param name="sender">The helper that encountered the error.</param>
        /// <param name="e">The arguments passed from the helper instance.</param>
        void ParseErrorOccurred(object sender, ParseErrorEventArgs e)
        {
            Debug.WriteLine(e);
        }

        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The BSD License

Share

About the Author

Florian Rappl
Architect
Germany Germany
Florian lives in Munich, Germany. He started his programming career with Perl. After programming C/C++ for some years he discovered his favorite programming language C#. He did work at Siemens as a programmer until he decided to study Physics.

During his studies he worked as an IT consultant for various companies. After graduating with a PhD in theoretical particle Physics he is working as a senior technical consultant in the field of home automation and IoT.

Florian has been giving lectures in C#, HTML5 with CSS3 and JavaScript, software design, and other topics. He is regularly giving talks at user groups, conferences, and companies. He is actively contributing to open-source projects. Florian is the maintainer of AngleSharp, a completely managed browser engine.

You may also be interested in...

Pro
Pro
| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.160826.1 | Last Updated 4 Jul 2013
Article Copyright 2013 by Florian Rappl
Everything else Copyright © CodeProject, 1999-2016
Layout: fixed | fluid