Click here to Skip to main content
Click here to Skip to main content

Using MbUnit StaticTestFactory to Validate Sitemap.xml Links

, 12 May 2010
Rate this:
Please Sign up or sign in to vote.
I’ve been investigating a replacement for our current link checker (SEO Optimization Toolkit) to be run automatically as part of our build so that we can get a quick heads up if one of our dynamic pages breaks.  The problem is that as most of our sites are built with Ektron there’s a lot of pot

I’ve been investigating a replacement for our current link checker (SEO Optimization Toolkit) to be run automatically as part of our build so that we can get a quick heads up if one of our dynamic pages breaks.  The problem is that as most of our sites are built with Ektron there’s a lot of potential for a combination of content and code to break individual pages that share a template with working pages. 

As these pages are data driven hard coding tests is very timely and very fragile.  Fortunately, most of our Ektron sites have automatically generated sitemap.xml which gives us a neat list of urls to test which is where MbUnit’s StaticTestFactory comes in useful as it allows us to dynamically create distinct tests for each url.

Enjoy.

   1: using System;
   2: using System.Collections.Generic;
   3: using System.Globalization;
   4: using System.Linq;
   5: using System.Net;
   6: using System.Xml.Linq;
   7: using MbUnit.Framework;
   8:  
   9: namespace MartinOnDotNet.VerificationTests
  10: {
  11:     /// <span class="code-SummaryComment"><summary>
</span>  12:     /// Includes methods to verify the validity of a sitemap.xml
  13:     /// <span class="code-SummaryComment"></summary>
</span>  14:     public sealed class ValidateSiteMap
  15:     {
  16:  
  17:         /// <span class="code-SummaryComment"><summary>
</span>  18:         /// Generates a static test for each url referenced within the sitemap
  19:         /// <span class="code-SummaryComment"></summary>
</span>  20:         [StaticTestFactory, Parallelizable(TestScope.Descendants)]
  21:         public static IEnumerable<Test> GenerateSiteMapLinkTests()
  22:         {
                  // Uri for Xml Sitemap to test : http://localhost/sitemap.xml
  23:             Uri sitemapUri = new Uri(Properties.Settings.Default.SiteMapXmlUri); 
                  //timeout for each request in ms : 300ms
  24:             int requestTimeout = Properties.Settings.Default.SiteMapRequestTimeout; 
  25:  
  26:             IEnumerable<string> locations = GetSitemapLocations(sitemapUri);
  27:             //is sitemap populated
  28:             yield return CreateSitemapHasNodesTest(sitemapUri, locations);
  29:             
  30:             //are all reference urls valid
  31:             foreach (string location in locations)
  32:             {
  33:                 yield return CreateLocationTest(requestTimeout, location,
                          HttpStatusCode.OK);
  34:             }
  35:             
  36:             // check that robots.txt is present
  37:             Uri robotstxtUri = new Uri(sitemapUri, "/robots.txt");
  38:             yield return CreateLocationTest(requestTimeout, robotstxtUri.ToString(),
                      HttpStatusCode.OK);
  39:             //finally, let's check that a deliberately incorrect url
  40:             Uri nonExistantUri = new Uri(sitemapUri, "/nonexistantfileonserver/");
  41:             yield return CreateLocationTest(requestTimeout,
                      nonExistantUri.ToString(), HttpStatusCode.NotFound);
  42:             
  43:         }
  44:  
  45:         /// <span class="code-SummaryComment"><summary>
</span>  47:         /// <span class="code-SummaryComment"></summary>
</span>  48:         /// <span class="code-SummaryComment"><param name="sitemapUri">The sitemap URI.</param>
</span>  49:         /// <span class="code-SummaryComment"><param name="locations">The locations.</param>
</span>  50:         /// <span class="code-SummaryComment"><returns>A test that checks the sitemap has nodes</returns>
</span>  51:         private static TestCase CreateSitemapHasNodesTest(Uri sitemapUri,
                  IEnumerable<string> locations)
  52:         {
  53:             return new TestCase(string.Format(CultureInfo.InvariantCulture, 
                      "{0} - Sitemap Has Entries", sitemapUri), () =>
  54:             {
  55:                 Assert.IsTrue(locations.Any());
  56:             });
  57:         }
  58:  
  59:         /// <span class="code-SummaryComment"><summary>
</span>  60:         /// Creates the location test.
  61:         /// <span class="code-SummaryComment"></summary>
</span>  62:         /// <span class="code-SummaryComment"><param name="requestTimeout">The request timeout.</param>
</span>  63:         /// <span class="code-SummaryComment"><param name="location">The location.</param>
</span>  64:         /// <span class="code-SummaryComment"><returns>A unique test for a sitemap location</returns>
</span>  65:         private static TestCase CreateLocationTest(int requestTimeout,
                  string location, HttpStatusCode expectedResult)
  66:         {
  67:             return new TestCase(location, () =>
  68:             {
  69:                 HttpWebRequest wrq =
                          HttpWebRequest.Create(location) as HttpWebRequest;
                      // appear to be google to escape any custom error handling
  70:                 wrq.UserAgent = "Googlebot/2.1 (+http://www.google.com/bot.html)"; 
  71:                 wrq.Timeout = requestTimeout;
  72:                 HttpWebResponse wrp = null;
  73:                 try
  74:                 {
  75:                     wrp = GetResponse(wrq);
  76:                     Assert.AreEqual<System.Net.HttpStatusCode>(expectedResult,
                              wrp.StatusCode);
  77:                 }
  78:                 finally
  79:                 {
  80:                     if (wrp != null) wrp.Close();
  81:                 }
  82:             });
  83:         }
  84:  
  85:         #region Helper Methods
  86:  
  87:         /// <span class="code-SummaryComment"><summary>
</span>  88:         /// Gets the sitemap locations.
  89:         /// <span class="code-SummaryComment"></summary>
</span>  90:         /// <span class="code-SummaryComment"><param name="sitemapUri">The sitemap URI.</param>
</span>  91:         /// <span class="code-SummaryComment"><returns>A list of locations referenced within the sitemap</returns>
</span>  92:         private static IEnumerable<string> GetSitemapLocations(Uri sitemapUri)
  93:         {
  94:             XNamespace xn = XNamespace.Get(
                      @"http://www.sitemaps.org/schemas/sitemap/0.9");
  95:             XDocument xdoc = XDocument.Load(sitemapUri.ToString(),
                      LoadOptions.PreserveWhitespace);
  96:             return from loc in xdoc.Descendants(xn + "loc")
  97:                             select loc.Value;
  98:         }
  99:    
 100:         /// <span class="code-SummaryComment"><summary>
</span> 101:         /// Gets the response object and handles any protocol exceptions
 102:         /// <span class="code-SummaryComment"></summary>
</span> 103:         /// <span class="code-SummaryComment"><param name="request">The request.</param>
</span> 104:         /// <span class="code-SummaryComment"><returns>The response object if available</returns>
</span> 105:         private static HttpWebResponse GetResponse(HttpWebRequest request)
 106:         {
 107:             try
 108:             {
 109:                 return request.GetResponse() as HttpWebResponse;
 110:             }
 111:             catch (WebException wex)
 112:             {
 113:                 if (wex.Status == WebExceptionStatus.ProtocolError)
 114:                 {
 115:                     return wex.Response as HttpWebResponse;
 116:                 }
 117:                 else
 118:                 {
 119:                     throw;
 120:                 }
 121:             }
 122:         }
 123:  
 124:         #endregion
 125:  
 126:     }
 127: }

Attachment: Visual Studio Project

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

About the Author

Martin Jarvis
Software Developer (Senior) Freestyle Interactive Ltd
United Kingdom United Kingdom
I'm a lead developer for Freestyle Interactive Ltd where we create many wonderful websites built on Microsofts ASP.Net and Ektron CMS.
 
I've been developing .Net applications (both Windows and Web) since 2002.
Follow on   Twitter

Comments and Discussions

 
-- There are no messages in this forum --
| Advertise | Privacy | Mobile
Web01 | 2.8.140718.1 | Last Updated 12 May 2010
Article Copyright 2010 by Martin Jarvis
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid