Click here to Skip to main content
Click here to Skip to main content
Articles » Languages » C# » General » Downloads
 
Add your own
alternative version

A non-well-formed HTML Parser and CSS Resolver

, 20 Jul 2007
A non-well-formed HTML parser and CSS Resolver builded by pure .NET C#
dols_html.zip
DOLS
Backup
DFuzzy.rar
DOLRss.rar
DXHtmlObjectModel.rar
bin
Debug
doc
Check.doc
DOL
DBase
DHtml
DCssResolver
DHtmlParser
Node
DOLBase.csproj.user
DOLHtml.csproj.user
DOLS.suo
HTMLTreeDemo
bin
Debug
HTMLTreeDemo.vshost.exe
Google News.files
cleardot.gif
envelope.gif
news(1).jpg
news(10).jpg
news(11).jpg
news(12).jpg
news(13).jpg
news(14).jpg
news(15).jpg
news(16).jpg
news(17).jpg
news(18).jpg
news(19).jpg
news(2).jpg
news(20).jpg
news(21).jpg
news(22).jpg
news(23).jpg
news(24).jpg
news(25).jpg
news(3).jpg
news(4).jpg
news(5).jpg
news(6).jpg
news(7).jpg
news(8).jpg
news(9).jpg
news.gif
news.jpg
Thumbs.db
obj
Debug
TempPE
Properties
Settings.settings
VTune
HTMLTreeDemo.vpj
obj
Debug
TempPE
Properties
Settings.settings
VTune
DOLBase.vpj
DOLHtml.vpj
DOLHtml.vws
dols_html_20070322.zip
DFuzzy.rar
DOLRss.rar
DXHtmlObjectModel.rar
Demo
bin
Debug
Google News.files
cleardot.gif
envelope.gif
news(1).jpg
news(10).jpg
news(11).jpg
news(12).jpg
news(13).jpg
news(14).jpg
news(15).jpg
news(16).jpg
news(17).jpg
news(18).jpg
news(19).jpg
news(2).jpg
news(20).jpg
news(21).jpg
news(22).jpg
news(23).jpg
news(24).jpg
news(25).jpg
news(3).jpg
news(4).jpg
news(5).jpg
news(6).jpg
news(7).jpg
news(8).jpg
news(9).jpg
news.gif
news.jpg
obj
Properties
Check.doc
DOLBase.csproj.user
DOLS.suo
DOLS.vsmdi
Settings.settings
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;

namespace HTMLTreeDemo
{
    public partial class HTMLTreeDemo : Form
    {
        /////////////////////////////////////////////////////////////////////////////////
        public HTMLTreeDemo()
        {
            m_htmlParser.StyleCreatedEvent += new DOL.DHtml.DHtmlParser.StyleEventHandler(SytleTagTran);
            m_htmlDoc = new DOL.DHtml.DHtmlParser.DHtmlDocument(m_htmlParser);

            InitializeComponent();
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void exitToolStripMenuItem_Click(object sender, EventArgs e)
        {
            Close();
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void SytleTagTran(DOL.DHtml.DHtmlParser.Node.DHtmlStyle style)
        {
            m_cssResolver.Resolve(style.Style, m_selectorList);
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void openToolStripMenuItem_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = "(HTML *.htm)|*.htm|(HTML *.html)|*.html";
            if(ofd.ShowDialog() == DialogResult.OK)
            {                
                m_path = ofd.FileName;
                DateTime start = DateTime.Now;

                m_htmlDoc.Load(m_path);

                TimeSpan loadSpan = DateTime.Now - start;
                start = DateTime.Now;

                m_htmlDoc.Save(m_path + ".temp.htm");

                TimeSpan saveSpan = DateTime.Now - start;

                this.Text = "HTMLTreeDemo Load " + loadSpan.TotalMilliseconds + " ms Save " + saveSpan.TotalMilliseconds + " ms";
                m_propertyGrid.SelectedObject = null;

                try
                {
                    m_webBrowser.AllowNavigation = true;
                    m_webBrowser.Navigate(m_path + ".temp.htm");
                }
                catch
                {
                }

                StringBuilder builder = new StringBuilder();
                m_htmlDoc.Dump(builder, "");
                System.Diagnostics.Debug.Write("\n" + builder.ToString());

                CreateHTMLTree();
                CreateCSSTree();
            }
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void CreateCSSTree()
        {
            for(int selectorIndex = 0, selectorCount = m_selectorList.Count; selectorIndex < selectorCount; ++selectorIndex)
                m_selectorList[selectorIndex].Priority = selectorIndex;

            m_selectorList.Sort();

            m_cssTreeView.SuspendLayout();
            m_cssTreeView.Nodes.Clear();
            TreeNode root = new TreeNode("CSS Tree");


            foreach(DOL.DHtml.DCssResolver.DCssSelector selector in m_selectorList)
                CreateCSSTreeNode(root, selector);

            m_cssTreeView.Nodes.Add(root);
            m_cssTreeView.ResumeLayout();
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void CreateCSSTreeNode(TreeNode parent, DOL.DHtml.DCssResolver.DCssSelector selector)
        {
            TreeNode treeNode = new TreeNode("Selector: " + selector.Selector);
            treeNode.Tag = selector;
            parent.Nodes.Add(treeNode);            

            foreach(DOL.DHtml.DCssResolver.DCssProperty property in selector.Properties)
            {
                TreeNode propertyNode = new TreeNode(property.CSS);
                propertyNode.Tag = property;
                treeNode.Nodes.Add(propertyNode);            
            }
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void CreateHTMLTree()
        {
            m_htmlTreeView.SuspendLayout();
            m_htmlTreeView.Nodes.Clear();
            TreeNode root = new TreeNode("HTML Tree");


            foreach(DOL.DHtml.DHtmlParser.Node.DHtmlNode child in m_htmlDoc.Nodes)
                CreateHTMLTreeNode(root, child);

            m_htmlTreeView.Nodes.Add(root);
            m_htmlTreeView.ResumeLayout();
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void CreateHTMLTreeNode(TreeNode parent, DOL.DHtml.DHtmlParser.Node.DHtmlNode node)
        {
            TreeNode treeNode = new TreeNode("");
            treeNode.Tag = node;
            parent.Nodes.Add(treeNode);


            DOL.DHtml.DHtmlParser.Node.DHtmlText text = node as DOL.DHtml.DHtmlParser.Node.DHtmlText;
            if(text != null)
            {
                if(text.IsWhiteSpace)
                    treeNode.Text = "White Space";
                else treeNode.Text = text.Text;
                return;
            }

            DOL.DHtml.DHtmlParser.Node.DHtmlElement element = node as DOL.DHtml.DHtmlParser.Node.DHtmlElement;
            if(element != null)
            {
                treeNode.Text = element.Tag;
                foreach(DOL.DHtml.DHtmlParser.Node.DHtmlNode child in element.Nodes)
                    CreateHTMLTreeNode(treeNode, child);
                return;
            }

            DOL.DHtml.DHtmlParser.Node.DHtmlStyle style = node as DOL.DHtml.DHtmlParser.Node.DHtmlStyle;
            if(style != null)
            {
                treeNode.Text = style.Tag;
                return;
            }

            DOL.DHtml.DHtmlParser.Node.DHtmlProcessingInstruction pi = node as DOL.DHtml.DHtmlParser.Node.DHtmlProcessingInstruction;
            if(pi != null)
            {
                treeNode.Text = pi.Value;
                return;
            }

            DOL.DHtml.DHtmlParser.Node.DHtmlScript script = node as DOL.DHtml.DHtmlParser.Node.DHtmlScript;
            if(script != null)
            {
                treeNode.Text = script.Tag;
                return;
            }

            DOL.DHtml.DHtmlParser.Node.DHtmlComment comment = node as DOL.DHtml.DHtmlParser.Node.DHtmlComment;
            if(comment != null)
            {
                treeNode.Text = comment.Comment;
                return;

            }

            return;
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void m_htmlTreeView_BeforeSelect(object sender, TreeViewCancelEventArgs e)
        {
            m_propertyGrid.SelectedObject = e.Node.Tag;

            DOL.DHtml.DHtmlParser.Node.DHtmlNode node = e.Node.Tag as DOL.DHtml.DHtmlParser.Node.DHtmlNode;
            if(node != null)
                m_textBox.Text = node.HTML;
            else m_textBox.Text = "";

            if(m_selectElement != null)
                m_selectElement.Attributes.RemoveAt(m_selectElement.Attributes.Count - 1);

            m_selectElement = e.Node.Tag as DOL.DHtml.DHtmlParser.Node.DHtmlElement;
            if(m_selectElement != null)            
            {
                m_propertyGrid.SelectedObject = m_selectElement;
                m_selectElement.Attributes.Add(new DOL.DHtml.DHtmlParser.DHtmlAttribute("style", "border:5px solid #FF0000"));

                StringBuilder builder = new StringBuilder("Selector Match:\r\n");
                for(int selectorIndex = 0, selectorCount = m_selectorList.Count; selectorIndex < selectorCount; ++selectorIndex)
                {
                     DOL.DHtml.DCssResolver.DCssSelector selector = m_selectorList[selectorIndex];
                    if(selector.HasPseudo == false && selector.IsMatching(m_selectElement) == true)
                    {
                        builder.Append(selector.CSS);
                        builder.Append("\r\n");
                    }                
                }

                m_selectorTextBox.Text = builder.ToString();
            }

            m_htmlDoc.Save(m_path + ".temp.htm");
            try
            {
                m_webBrowser.AllowNavigation = true;
                m_webBrowser.Navigate(m_path + ".temp.htm");
            }
            catch
            {
            }
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void m_cssTreeView_BeforeSelect(object sender, TreeViewCancelEventArgs e)
        {
            m_propertyGrid.SelectedObject = e.Node.Tag;

            DOL.DHtml.DCssResolver.DCssProperty property = e.Node.Tag as DOL.DHtml.DCssResolver.DCssProperty;
            DOL.DHtml.DCssResolver.DCssSelector selector = e.Node.Tag as DOL.DHtml.DCssResolver.DCssSelector;

            if(property != null)
                m_textBox.Text = property.CSS;
            else if(selector != null)
                m_textBox.Text = selector.CSS;
            else m_textBox.Text = "";

            m_selectorTextBox.Text = "";
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void m_propertyGrid_PropertyValueChanged(object s, PropertyValueChangedEventArgs e)
        {
            m_htmlDoc.Save(m_path + ".temp.htm");
            try
            {
                m_webBrowser.AllowNavigation = true;
                m_webBrowser.Navigate(m_path + ".temp.htm");
            }
            catch
            {
            }

            CreateHTMLTree();
            CreateCSSTree();
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void m_webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            m_webBrowser.AllowNavigation = false;
        }

        /////////////////////////////////////////////////////////////////////////////////
        private void httpdesignstudiolookinatToolStripMenuItem_Click(object sender, EventArgs e)
        {
            System.Diagnostics.Process.Start("IExplore.exe", "http://www.codeproject.com/useritems/DOL_HTML_Parser.asp");
        }

        /////////////////////////////////////////////////////////////////////////////////
        private string m_path;
        private DOL.DHtml.DHtmlParser.Node.DHtmlElement m_selectElement = null;

        private DOL.DHtml.DHtmlParser.DHtmlGeneralParser m_htmlParser = new DOL.DHtml.DHtmlParser.DHtmlGeneralParser();
        private DOL.DHtml.DCssResolver.DCssResolver m_cssResolver = new DOL.DHtml.DCssResolver.DCssResolver();

        private List<DOL.DHtml.DCssResolver.DCssSelector> m_selectorList= new List<DOL.DHtml.DCssResolver.DCssSelector>();
        private DOL.DHtml.DHtmlParser.DHtmlDocument m_htmlDoc = null;


        
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

James S.F. Hsieh
Web Developer
United States United States
James S.F. Hsieh(Nomad Libra) Working as engineer for "Corel Intervideo" company situated in Taiwan.
He received his master degree in Graduate Institute of Network Learning Technology, National Central University, Taiwan in 2006.
His research interests are semantic Web services, intelligent software agent, machine learning, algorithm, software
engineering and multimedia programming.

| Advertise | Privacy | Terms of Use | Mobile
Web01 | 2.8.150327.1 | Last Updated 20 Jul 2007
Article Copyright 2007 by James S.F. Hsieh
Everything else Copyright © CodeProject, 1999-2015
Layout: fixed | fluid