Click here to Skip to main content
15,887,425 members
Articles / Programming Languages / C#

Check Help Links Tool

Rate me:
Please Sign up or sign in to vote.
4.97/5 (13 votes)
6 Oct 2005CPOL6 min read 85.9K   17.3K   34  
A tool to check links across merged help (CHM) files.
using System;
using System.IO;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using System.Diagnostics;

using mshtml;

using HtmlHelp;
using HtmlHelp.ChmDecoding;

using Common;

namespace CheckHelpLinks
{
	/// <summary>
	/// Summary description for DlgBrowser.
	/// </summary>
	public class DlgBrowser : System.Windows.Forms.Form
	{
		/// <summary>
		/// Required designer variable.
		/// </summary>
		private System.ComponentModel.Container components = null;

		private System.Windows.Forms.ProgressBar _ProgressBar;
		private System.Windows.Forms.TextBox _TextBoxUrl;
		private AxSHDocVw.AxWebBrowser _WebBrowser;

		private HtmlFileItem _HtmlFile = null;
		private HtmlFileItemCollection _HtmlFiles = null;
		private string _Url = null;
		private bool _AutoClose = true;
		private bool _GetLinks = true;

		private bool _Loading = false;

		public HtmlFileItem HtmlFile { get { return _HtmlFile; } set { _HtmlFile = value; } }
		public HtmlFileItemCollection HtmlFiles { get { return _HtmlFiles; } set { _HtmlFiles = value; } }
		public string Url { get { return _Url; } set { _Url = value; } }
		public bool AutoClose { get { return _AutoClose; } set { _AutoClose = value; } }
		public bool GetLinks { get { return _GetLinks; } set { _GetLinks = value; } }

		public static readonly Regex _Comment = new Regex( "^(?<before>.*)(?<comment><!--.*?-->)(?<after>.*)$", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled );
		public static readonly Regex _Anchor  = new Regex( "<\\s*A\\s[^>]*name\\s*=\\s*(?<quote>['\"])(?<anchor>.*?)\\k<quote>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled );
		public static readonly Regex _Anchor2 = new Regex( "<\\s*A\\s[^>]*name\\s*=\\s*(?<anchor>[^'\"].*?)[\\s>]", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled );
//		public static readonly Regex _HRef    = new Regex( "href\\s*=\\s*['\"](?<url>[^'\">]*)", RegexOptions.IgnoreCase | RegexOptions.Compiled );
		public static readonly Regex _HRef    = new Regex( "<\\s*(?:A|AREA)\\s[^>]*href\\s*=\\s*(?<quote>['\"])(?<url>.*?)\\k<quote>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled );
		public static readonly Regex _HRef2   = new Regex( "<\\s*(?:A|AREA)\\s[^>]*href\\s*=\\s*(?<url>[^'\"].*?)[\\s>]", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled );

		public DlgBrowser()
		{
			InitializeComponent();
		}

		/// <summary>
		/// Clean up any resources being used.
		/// </summary>
		protected override void Dispose( bool disposing )
		{
			if( disposing )
			{
				if(components != null)
				{
					components.Dispose();
				}
			}
			base.Dispose( disposing );
		}

		#region Windows Form Designer generated code
		/// <summary>
		/// Required method for Designer support - do not modify
		/// the contents of this method with the code editor.
		/// </summary>
		private void InitializeComponent()
		{
			System.Resources.ResourceManager resources = new System.Resources.ResourceManager(typeof(DlgBrowser));
			this._ProgressBar = new System.Windows.Forms.ProgressBar();
			this._TextBoxUrl = new System.Windows.Forms.TextBox();
			this._WebBrowser = new AxSHDocVw.AxWebBrowser();
			((System.ComponentModel.ISupportInitialize)(this._WebBrowser)).BeginInit();
			this.SuspendLayout();
			// 
			// _ProgressBar
			// 
			this._ProgressBar.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
				| System.Windows.Forms.AnchorStyles.Right)));
			this._ProgressBar.Location = new System.Drawing.Point(8, 8);
			this._ProgressBar.Name = "_ProgressBar";
			this._ProgressBar.Size = new System.Drawing.Size(616, 23);
			this._ProgressBar.TabIndex = 0;
			// 
			// _TextBoxUrl
			// 
			this._TextBoxUrl.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
				| System.Windows.Forms.AnchorStyles.Right)));
			this._TextBoxUrl.Location = new System.Drawing.Point(8, 40);
			this._TextBoxUrl.Name = "_TextBoxUrl";
			this._TextBoxUrl.Size = new System.Drawing.Size(616, 20);
			this._TextBoxUrl.TabIndex = 1;
			this._TextBoxUrl.Text = "";
			this._TextBoxUrl.KeyPress += new System.Windows.Forms.KeyPressEventHandler(this._TextBoxUrl_KeyPress);
			// 
			// _WebBrowser
			// 
			this._WebBrowser.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) 
				| System.Windows.Forms.AnchorStyles.Left) 
				| System.Windows.Forms.AnchorStyles.Right)));
			this._WebBrowser.Enabled = true;
			this._WebBrowser.Location = new System.Drawing.Point(8, 72);
			this._WebBrowser.OcxState = ((System.Windows.Forms.AxHost.State)(resources.GetObject("_WebBrowser.OcxState")));
			this._WebBrowser.Size = new System.Drawing.Size(616, 368);
			this._WebBrowser.TabIndex = 2;
			this._WebBrowser.DocumentComplete += new AxSHDocVw.DWebBrowserEvents2_DocumentCompleteEventHandler(this.WebBrowser_DocumentComplete);
			// 
			// DlgBrowser
			// 
			this.AutoScaleBaseSize = new System.Drawing.Size(5, 13);
			this.ClientSize = new System.Drawing.Size(632, 450);
			this.Controls.Add(this._WebBrowser);
			this.Controls.Add(this._TextBoxUrl);
			this.Controls.Add(this._ProgressBar);
			this.Icon = ((System.Drawing.Icon)(resources.GetObject("$this.Icon")));
			this.Name = "DlgBrowser";
			this.ShowInTaskbar = false;
			this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
			this.Text = "Getting Links";
			this.Load += new System.EventHandler(this.DlgBrowser_Load);
			((System.ComponentModel.ISupportInitialize)(this._WebBrowser)).EndInit();
			this.ResumeLayout(false);

		}
		#endregion

//-----------------------------------------------------------------------------

		private void DlgBrowser_Load(object sender, System.EventArgs e)
		{
			User32.PostMessage( Handle, Global.WM_GetLinks, 0, 0 );
		}

		protected override void WndProc( ref Message m )
		{
			if ( m.Msg == Global.WM_GetLinks )
			{
				GetLinksCore();
				return;
			}

			base.WndProc( ref m );
		}

//-----------------------------------------------------------------------------

		private void _TextBoxUrl_KeyPress(object sender, System.Windows.Forms.KeyPressEventArgs e)
		{
			if ( e.KeyChar == '\r' ) Navigate( _TextBoxUrl.Text );
		}

//-----------------------------------------------------------------------------

		private void GetLinksCore()
		{
			if ( _HtmlFile != null )
			{
				GetLinksCore( _HtmlFile );
			}

			if ( _HtmlFiles != null )
			{
				_ProgressBar.Maximum = _HtmlFiles.Count;
				_ProgressBar.Value = 0;

				int i = 0;
				foreach ( HtmlFileItem htmlFile in _HtmlFiles.Values )
				{
					_ProgressBar.Value = i++;
					GetLinksCore( htmlFile );
				}
			}

			if ( _Url != null ) Navigate( _Url );

			if ( _AutoClose ) DialogResult = DialogResult.OK;
		}

		private void GetLinksCore( HtmlFileItem htmlFile )
		{
			if ( Global.FastParse )
				GetLinksFast( htmlFile );
			else
				GetLinksSlow( htmlFile );
		}

		private void GetLinksFast( HtmlFileItem htmlFile )
		{
			htmlFile.AnchorsHeader.AnchorItems.Clear();
			htmlFile.Links.Clear();

			string html = htmlFile.Html;
			if ( html.Length == 0 ) return;

			for(;;)
			{
				MatchCollection comment = _Comment.Matches( html );
				if ( comment.Count != 1 ) break;
				string before = comment[ 0 ].Groups[ "before" ].Value;
				string after  = comment[ 0 ].Groups[ "after"  ].Value;
				html = before + after;
			}

			GetAnchorsFast( html, htmlFile );

			if ( _GetLinks ) GetLinksFast( html, htmlFile );
		}

		private void GetAnchorsFast( string html, HtmlFileItem htmlFile )
		{
			MatchCollection matches = null;
			
			// _Anchor
			matches = _Anchor.Matches( html );
			foreach ( Match match in matches )
			{
//				string sMatch = match.Value;
				GroupCollection groups = match.Groups;
				Group group = groups[ "anchor" ];

				string anchor = group.Value;

				AddAnchor( anchor, htmlFile );
			}

			// _Anchor2
			matches = _Anchor2.Matches( html );
			foreach ( Match match in matches )
			{
				GroupCollection groups = match.Groups;
				Group group = groups[ "anchor" ];

				string anchor = group.Value;

				AddAnchor( anchor, htmlFile );
			}
		}

		private void GetLinksFast( string html, HtmlFileItem htmlFile )
		{
			MatchCollection matches = null;

			// _HRef
			matches = _HRef.Matches( html );
			foreach ( Match match in matches )
			{
//				string sMatch = match.Value;
				GroupCollection groups = match.Groups;
				Group group = groups[ "url" ];

				string url = group.Value;

				AddLink( url, htmlFile );
			}

#if true
			// _HRef2
			matches = _HRef2.Matches( html );
			foreach ( Match match in matches )
			{
//				string sMatch = match.Value;
				GroupCollection groups = match.Groups;
				Group group = groups[ "url" ];

				string url = group.Value;

				AddLink( url, htmlFile );
			}
#endif
		}

		private void GetLinksSlow( HtmlFileItem htmlFile )
		{
			_Loading = true;

			Navigate( htmlFile.TopicEntry.URL );

			while ( _Loading ) { Application.DoEvents(); System.Threading.Thread.Sleep( 10 ); }

			htmlFile.AnchorsHeader.AnchorItems.Clear();
			htmlFile.Links.Clear();

			IHTMLDocument2 HTMLDocument = (IHTMLDocument2) _WebBrowser.Document;

			GetAnchorsSlow( HTMLDocument, htmlFile );

			if ( _GetLinks ) GetLinksSlow( HTMLDocument, htmlFile );
		}

		private void GetAnchorsSlow( IHTMLDocument2 HTMLDocument, HtmlFileItem htmlFile )
		{
			IHTMLElementCollection anchors = HTMLDocument.anchors;

			foreach ( object oAnchor in anchors )
			{
				string type = oAnchor.GetType().ToString();

				if ( oAnchor is HTMLAnchorElementClass )
				{
					HTMLAnchorElementClass el = ( HTMLAnchorElementClass ) oAnchor;

					string anchor = el.name;

					AddAnchor( anchor, htmlFile );

					continue;
				}

				Debug.WriteLine( "Unknown anchor type: '" + oAnchor.GetType() + "' in '" + htmlFile.TopicEntry.URL + "'." );
				Debug.Assert( false );
			}
		}

		private void GetLinksSlow( IHTMLDocument2 HTMLDocument, HtmlFileItem htmlFile )
		{
			IHTMLElementCollection links = HTMLDocument.links;

			foreach ( object oLink in links )
			{
				if ( oLink is HTMLAnchorElementClass )
				{
					HTMLAnchorElementClass el = ( HTMLAnchorElementClass ) oLink;

					LinkItem link = new HtmlLinkItem( htmlFile, el.href );

					htmlFile.Links.Add( link );

					continue;
				}

				if ( oLink is HTMLAreaElementClass )
				{
					HTMLAreaElementClass el = ( HTMLAreaElementClass ) oLink;

					LinkItem link = new HtmlLinkItem( htmlFile, el.href );

					htmlFile.Links.Add( link );

					continue;
				}

				Debug.WriteLine( "Unknown link type: '" + oLink.GetType() + "' in '" + htmlFile.TopicEntry.URL + "'." );
				Debug.Assert( false );
			}
		}

		private void WebBrowser_DocumentComplete(object sender, AxSHDocVw.DWebBrowserEvents2_DocumentCompleteEvent e)
		{
			_Loading = false;
		}

//-----------------------------------------------------------------------------

		private void AddAnchor( string anchor, HtmlFileItem htmlFile )
		{
			if ( anchor == null ) { Debug.Assert( false ); return; }

			AnchorItem existing = htmlFile.AnchorsHeader.AnchorItems[ anchor ] as AnchorItem;
			if ( existing != null )
			{
				existing.State = State.Broken;
				htmlFile.AnchorsHeader.State = State.Broken;
				return;
			}

			string url =
				"ms-its:" + htmlFile.Parent.ChmFile.ChmFilePath + "::/" +
				htmlFile.LinkFile + "#" + anchor;

			AnchorItem item = new AnchorItem( htmlFile.AnchorsHeader, url );

			htmlFile.AnchorsHeader.AnchorItems.Add( anchor, item );
		}

		private void AddLink( string url, HtmlFileItem htmlFile )
		{
			if ( Global.IgnoreCss && Global.IsCss( url ) ) return;

			if ( url.Length > 0 )
				if ( url[ 0 ] == '#' )
				{
					url = htmlFile.LinkFile + url;
				}
				else
					if ( url[ 0 ] != '/' )
				{
					int i2 = url.IndexOf( ':' );
					if ( i2 < 0 )
					{
						string dir = Path.GetDirectoryName( htmlFile.LinkFile );
						if ( dir.Length > 0 ) url = dir + "/" + url;
					}
				}

			int i = url.IndexOf( ':' );
			if ( i < 0 || i == 1 )
			{
				string s = "ms-its:" + htmlFile.Parent.ChmFile.ChmFilePath + "::";
				if ( url.Length > 0 && url[ 0 ] != '/' ) s += "/";
				s += url;
				url = s;
			}

//			if ( Global.IsHelp( url ) && ! Global.IsHtml( url ) )
//				continue;

			LinkItem link = new HtmlLinkItem( htmlFile, url );

			htmlFile.Links.Add( link );
		}

//-----------------------------------------------------------------------------

		private void Navigate( string url )
		{
			_TextBoxUrl.Text = url;

			object Zero = 0;
			object EmptyString = String.Empty;
			_WebBrowser.Navigate( url,
				ref Zero, ref EmptyString, ref EmptyString, ref EmptyString);

//			MessageBox.Show( url );
		}

//-----------------------------------------------------------------------------
	}

}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
United Kingdom United Kingdom
I discovered C# and .NET 1.0 Beta 1 in late 2000 and loved them immediately.
I have been writing software professionally in C# ever since

In real life, I have spent 3 years travelling abroad,
I have held a UK Private Pilots Licence for 20 years,
and I am a PADI Divemaster.

I now live near idyllic Bournemouth in England.

I can work 'virtually' anywhere!

Comments and Discussions