Click here to Skip to main content
15,897,273 members
Articles / Programming Languages / C#

Harvesting Web Content into MHTML Archive

Rate me:
Please Sign up or sign in to vote.
4.69/5 (14 votes)
18 Feb 20068 min read 62.1K   729   49  
Capture and archive web resources by saving them in RFC-2557 (MHTML) compliant format. This library includes a framework for augmenting the capture process and allowing programmatic control, from downloading through saving. A replacement for CDOSYS/CDONTS.
using System;
using Rilling.Web.Mhtml.Mime.Artifacts;
using Rilling.Web.Mhtml.Mime.Handlers;
using System.Collections;
using System.Text;
using System.IO;
using System.Net;
using System.Web;
using Rilling.Web.Mhtml.Resolvers;
using System.Xml;
using NUnit.Framework;

namespace Rilling.Web.Mhtml.TestHarness
{
	[TestFixture()]
	public class StylesheetLinkResolverTest
	{
		[Test()]
		public void InstantiateDefaultObject_Test()
		{
			StylesheetLinkResolver lr = new StylesheetLinkResolver();

			Assertion.Assert("The SearchCSSFiles property was not initialized properly.", 
				lr.SearchCSSFiles);
			Assertion.Assert("The SearchHTMLFiles property was not initialized properly.", 
				lr.SearchHTMLFiles);
			Assertion.Assert("The IncludeNestedCssFiles property was not initialized properly.", 
				lr.IncludeNestedCssFiles);
		}

		[Test()]
		public void SetSearchCSSFiles_Test()
		{
			StylesheetLinkResolver lr = new StylesheetLinkResolver();
			bool testValue = true;

			lr.SearchCSSFiles = testValue;
			Assertion.AssertEquals(testValue, lr.SearchCSSFiles);

			lr.SearchCSSFiles = !testValue;
			Assertion.AssertEquals(!testValue, lr.SearchCSSFiles);
		}

		[Test()]
		public void SetSearchHTMLFiles_Test()
		{
			StylesheetLinkResolver lr = new StylesheetLinkResolver();
			bool testValue = true;

			lr.SearchHTMLFiles = testValue;
			Assertion.AssertEquals(testValue, lr.SearchHTMLFiles);

			lr.SearchHTMLFiles = !testValue;
			Assertion.AssertEquals(!testValue, lr.SearchHTMLFiles);
		}

		[Test()]
		public void LocateReferencesInNoFiles_Test()
		{
//			HtmlMimeHandler mh = new HtmlMimeHandler("image/jpg");
//			StylesheetLinkResolver lr = new StylesheetLinkResolver();
//								   lr.SearchCSSFiles = false;
//								   lr.SearchHTMLFiles = false;
//			HttpWebRequest  req  = (HttpWebRequest)WebRequest.Create("http://localhost/Rilling.Web.Mhtml.TestSite/CSSSample.css");
//			HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
//
//			LinkReplacementInfo[] linkInfo = 
//				lr.LocateReferences((TextArtifact)mh.CreateArtifact(resp), new Uri("http://localhost/Rilling.Web.Mhtml.TestSite"));
//
//			Assertion.Assert("Wrong number of links resolved.", linkInfo.Length==0);
		}

		[Test()]
		public void LocateReferencesInCSSFiles_Test()
		{
//			StylesheetMimeHandler mh = new StylesheetMimeHandler("image/jpg");
//			StylesheetLinkResolver lr = new StylesheetLinkResolver();
//								   lr.SearchHTMLFiles = false;
//			HttpWebRequest  req  = (HttpWebRequest)WebRequest.Create("http://localhost/Rilling.Web.Mhtml.TestSite/CSSSample.css");
//			HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
//
//			LinkReplacementInfo[] linkInfo = 
//				lr.LocateReferences((TextArtifact)mh.CreateArtifact(resp), new Uri("http://localhost/Rilling.Web.Mhtml.TestSite/"));
//
//			// Identify all the files that were identified.
//			Hashtable linkIdx = new Hashtable();
//			foreach(LinkReplacementInfo li in linkInfo)
//			{
//				string fileName = Path.GetFileName(li.TargetUrl.AbsolutePath);
//				linkIdx.Add(fileName, fileName);
//			}
//
//			// Define an ordered list of expected links.
//			string[] expectedLinks = 
//				new string[] {"wnScreenBase.css", "wnScreenFinance.css", 
//							  "wnColor.css", "wnTemp.css"};
//
//			Assertion.AssertEquals("Wrong number of links resolved.", linkInfo.Length, expectedLinks.Length);
//
//			// Verify that each expected file was actually returned.
//			foreach(string f in expectedLinks)
//			{
//				Assertion.Assert("The expected file '" + f + "' was not identified", linkIdx.ContainsKey(f));
//			}
		}

		[Test()]
		public void SetIncludeNestedCssFiles_Test()
		{
			StylesheetLinkResolver lr = new StylesheetLinkResolver();
			bool testValue = true;

			lr.IncludeNestedCssFiles = testValue;
			Assertion.AssertEquals(testValue, lr.IncludeNestedCssFiles);

			lr.IncludeNestedCssFiles = !testValue;
			Assertion.AssertEquals(!testValue, lr.IncludeNestedCssFiles);
		}

		[Test()]
		public void LocateWithoutNestingCss_Test()
		{
//			HtmlMimeHandler mh = new HtmlMimeHandler("image/jpg");
//			StylesheetLinkResolver lr = new StylesheetLinkResolver();
//								   lr.IncludeNestedCssFiles = false;
//			HttpWebRequest  req  = (HttpWebRequest)WebRequest.Create("http://localhost/Rilling.Web.Mhtml.TestSite/ImageSamples.htm");
//			HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
//
//			LinkReplacementInfo[] linkInfo = 
//				lr.LocateReferences((TextArtifact)mh.CreateArtifact(resp), new Uri("http://localhost/Rilling.Web.Mhtml.TestSite/"));
//
//			// Identify all the files that were identified.
//			Hashtable linkIdx = new Hashtable();
//			foreach(LinkReplacementInfo li in linkInfo)
//			{
//				string fileName = Path.GetFileName(li.TargetUrl.AbsolutePath);
//				linkIdx.Add(fileName, fileName);
//			}
//
//			// Define an ordered list of expected links.
//			string[] expectedLinks = 
//				new string[] {"css007.css", "css008.css"};
//
//			Assertion.AssertEquals("Wrong number of links resolved.", linkInfo.Length, expectedLinks.Length);
//
//			// Verify that each expected file was actually returned.
//			foreach(string f in expectedLinks)
//			{
//				Assertion.Assert("The expected file '" + f + "' was not identified", linkIdx.ContainsKey(f));
//			}
		}

		[Test()]
		public void LocateReferencesInHTMLFiles_Test()
		{
//			HtmlMimeHandler mh = new HtmlMimeHandler("image/jpg");
//			StylesheetLinkResolver lr = new StylesheetLinkResolver();
//								   lr.SearchCSSFiles = false;
//			HttpWebRequest  req  = (HttpWebRequest)WebRequest.Create("http://localhost/Rilling.Web.Mhtml.TestSite/ImageSamples.htm");
//			HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
//
//			LinkReplacementInfo[] linkInfo = 
//				lr.LocateReferences((TextArtifact)mh.CreateArtifact(resp), new Uri("http://localhost/Rilling.Web.Mhtml.TestSite/"));
//
//			// Identify all the files that were identified.
//			Hashtable linkIdx = new Hashtable();
//			foreach(LinkReplacementInfo li in linkInfo)
//			{
//				string fileName = Path.GetFileName(li.TargetUrl.AbsolutePath);
//				linkIdx.Add(fileName, fileName);
//			}
//
//			// Define an ordered list of expected links.
//			string[] expectedLinks = 
//				new string[] {"css001.css", "css002.css", "css003.css", 
//							  "css004.css", "css005.css", "css006.css",
//							  "css007.css", "css008.css"};
//
//			Assertion.AssertEquals("Wrong number of links resolved.", linkInfo.Length, expectedLinks.Length);
//
//			// Verify that each expected file was actually returned.
//			foreach(string f in expectedLinks)
//			{
//				Assertion.Assert("The expected file '" + f + "' was not identified", linkIdx.ContainsKey(f));
//			}
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Software Developer (Senior)
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions