Click here to Skip to main content
15,894,540 members
Articles / Programming Languages / XML

RSS Consumer

Rate me:
Please Sign up or sign in to vote.
4.14/5 (11 votes)
12 May 2005CPOL2 min read 75K   1.1K   35  
Building an application to read RSS feeds.
<!--TOOLBAR_EXEMPT-->
<HTML >
<HEAD>
	<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=windows-1252" />
    	<TITLE>Building a Desktop News Aggregator (Extreme XML)</TITLE>
<META NAME="Description" CONTENT="Dare Obasanjo builds a C# application that retrieves and displays news feeds from various Web sites. The application utilizes XPath, XSLT, XML Schema, the DOM, and XML Serialization in the .NET Framework."/>
<META NAME="Robots" CONTENT=""/>
<META NAME="Keywords" CONTENT=""/>
<META NAME="MS.LOCALE" CONTENT="en-us"/>

<LINK REL="stylesheet" TYPE="text/css" HREF="/library/shared/common/css/ie4.css" />
<LINK REL="stylesheet" TYPE="text/css" HREF="/library/shared/comments/css/ie5.css" />

	<style>
	BODY
	{
		font-family:verdana,arial,helvetica;
		margin:0;
	}
	</style>
	
<SCRIPT LANGUAGE="javascript" SRC="/library/toolbar/toolbar.js"></SCRIPT>
<SCRIPT LANGUAGE="javascript" SRC="/library/svy/Broker.js"></SCRIPT>

<LINK REL="stylesheet" TYPE="text/css" HREF="/library/shared/eyebrow/css/default.css" />
<LINK REL="stylesheet" TYPE="text/css" HREF="/library/shared/webparts/ie.css" />


   <SCRIPT LANGUAGE="JavaScript"><!--
   function BrowserData()
{
		this.userAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)";

		this.bot = false;

		this.browser = "MSIE";

		this.majorVer = 6;

		this.minorVer = "0";

		this.betaVer = "0";

		this.platform = "NT";

		this.platVer = "5.1; SV1; .NET CLR 1.1.4322";

		this.getsNavBar = true;

		this.doesActiveX = true;

		this.doesPersistence = true;

		this.fullVer = 6;

   }

   var oBD = new BrowserData();

   //--></SCRIPT><BASE TARGET="_top" />

<script>
	if( self == top )
{
	location = "/library/en-us/dnexxml/html/xml02172003.asp";
}

</script>

<xml id='xmlPageContext'><eyebrow findmenu="false">
	<item label="MSDN Home" url="/default.asp"/>
	<item label="MSDN Library" url="/library/default.asp"/>
	<item label=".NET Development" url="/library/en-us/dnanchor/html/netdevanchor.asp" id="msdnlib450" xmlsrc="/library/en-us/toc/msdnlib/msdnlib450_.xml"/><item label="XML and the .NET Framework" id="msdnlib552" xmlsrc="/library/en-us/toc/msdnlib/msdnlib552_.xml"/><item label="Columns" id="msdnlib555"/><item label="Extreme XML" id="msdnlib556"/><item label="Building a Desktop News Aggregator" url="/library/en-us/dnexxml/html/xml02172003.asp" id="dnexxml15"/></eyebrow></xml>
<script>
var sContentID = "_966232"; 
 </script>


        <!--VENUS_START-->
        <meta name="MSHTOCTitle" content="Building a Desktop News Aggregator" />
        <meta name="MSHRLTitle" content="Building a Desktop News Aggregator" />
        <meta name="MSHKeywordA" content="xml02172003"/>
        <meta name="MSHKeywordK" content="XML serialization"/>
        <meta name="MSHKeywordK" content="XML, news aggregator"/>
        <meta name="MSHKeywordK" content="XPath"/>
        <meta name="MSHKeywordA" content="xml02172003"/>
        <meta name="MSHAttr" content="DevLang:CSharp"/>
        <meta name="MSHAttr" content="DevLangVers:CSharp"/>
        <meta name="MSHAttr" content="DevLang:XML"/>
        <meta name="MSHAttr" content="DevLangVers:kbXML"/>
        <meta name="MSHAttr" content="DevLang:XSL"/>
        <meta name="MSHAttr" content="DevLangVers:kbXSL"/>
        <meta name="MSHAttr" content="DocSet:kbmsdn"/>
        <meta name="MSHAttr" content="HostCPU:kbx86"/>
        <meta name="MSHAttr" content="HostOS:Windows"/>
        <meta name="MSHAttr" content="HostOSVers:kbWinOS"/>
        <meta name="MSHAttr" content="Locale:kbEnglish"/>
        <meta name="MSHAttr" content="Media:kbText"/>
        <meta name="MSHAttr" content="TargetCPU:kbx86"/>
        <meta name="MSHAttr" content="TargetOSVers:kbWinOS"/>
        <meta name="MSHAttr" content="Technology:kbNetFramewk"/>
        <meta name="MSHAttr" content="TechnologyVers:kbNetFramewk"/>
        <meta name="MSHAttr" content="Technology:XML"/>
        <meta name="MSHAttr" content="TechnologyVers:kbXML"/>
        <meta name="MSHAttr" content="TechnologyVers:kbXSLT"/>
        <meta name="MSHAttr" content="TopicType:kbArticle"/>
        <meta name="MSHAttr" content="TargetOS:Windows"/>

        <!---VENUS_END--->


<link rel=stylesheet type=text/css href='/library/toolbar/3.0/css.aspx?c=/nonlibraryshell.config' />
<script language=javascript>
var doImage=doImage;var TType=TType;
function mhHover(tbl,idx,cls){var t,d;if(document.getElementById)t=document.getElementById(tbl);else t=document.all(tbl);if(t==null)return;if(t.getElementsByTagName)d=t.getElementsByTagName("TD");else d=t.all.tags("TD");if(d==null)return;if(d.length<=idx)return;d[idx].className=cls;}
function footerjs(doc){if(doImage==null){var tt=TType==null?"PV":TType;doc.write('<layer visibility="hide"><div style="display:none"><img src="http://c.microsoft.com/trans_pixel.asp?source=msdn&TYPE=' + tt + '&p=library_en-us_dnexxml_html&URI=%2flibrary%2ftoolbar%2f3.0%2fasp.aspx%3fmode%3dhead%26c%3d%2fnonlibraryshell.config%26h%3dmsdn%252Emicrosoft%252Ecom%26u%3d%252Flibrary%252Fen%252Dus%252Fdnexxml%252Fhtml%252Fxml02172003%252Easp%26r%3dhttp%253A%252F%252Fmsdn%252Emicrosoft%252Ecom%252Flibrary%252Fshared%252Fdeeptree%252Fasp%252Frightframe%252Easp%253Fdtcfg%253D%252Flibrary%252Fdeeptreeconfig%252Exml%2526url%253D%252Flibrary%252Fen%252Dus%252Fdnexxml%252Fhtml%252Fxml02172003%252Easp%253Fframe%253Dtrue%2526hidetoc%253Dfalse&GUID=1F4FC18C-F71E-47FB-8FC9-612F8EE59C61&r=http%3a%2f%2fmsdn.microsoft.com%2flibrary%2fshared%2fdeeptree%2fasp%2frightframe.asp%3fdtcfg%3d%2flibrary%2fdeeptreeconfig.xml%26url%3d%2flibrary%2fen-us%2fdnexxml%2fhtml%2fxml02172003.asp%3fframe%3dtrue%26hidetoc%3dfalse" width=0 height=0 hspace=0 vspace=0 border=0 /></div></layer>');}}
</script>
 </HEAD> <BODY TOPMARGIN="0"  LEFTMARGIN="0" MARGINHEIGHT="0" MARGINWIDTH="0" BGCOLOR="#FFFFFF" TEXT="#000000">

       <TABLE BORDER="0" CELLSPACING="0" CELLPADDING="4" HEIGHT="24" WIDTH="100%" BGCOLOR="#FFFFFF">
       <TR>
        <TD CLASS="eyebrow" VALIGN="middle" ALIGN="left" WIDTH="100%">&nbsp;&nbsp;

            <a class="small" target="_top" href="/default.asp">MSDN Home</a>&nbsp;>&nbsp;
            <a class="small" target="_top" href="/library/default.asp">MSDN Library</a>&nbsp;>&nbsp;
            <a class="small" target="_top" href="/library/default.asp?url=/library/en-us/dnanchor/html/netdevanchor.asp">.NET Development</a>&nbsp;>&nbsp;<a href=""></a>
        </TD>
       </TR>
       </TABLE>
 <TABLE STYLE="table-layout:fixed" class='clsContainer' CELLPADDING='15' CELLSPACING='0' float='left' WIDTH='100%' BORDER='0'> <TR> <TD VALIGN='top'>
<TABLE cellpadding="0" cellspacing="0" width="145" id="topTable" class="clsPartContainer" style="FLOAT: right; MARGIN-LEFT: 8px; MARGIN-RIGHT: 6px" border="0">
	<TBODY>
		<TR>
			<TD valign="top">
				<!-- Page Options web part Start -->
				<TABLE cellpadding="0" cellspacing="0" class="clsPart" width="145" border="0" id="BF309568-1CD4-4c9c-A46E-BB1CA97E0C97">
					<TR>
						<TD class="clsPartHead" valign="center" align="left" height="19" width="15">
							<IMG class="clsPartHead" src="/library/shared/webparts/images/gripblue.gif" height="19" width="15" align="absmiddle">
						</TD>
						<TD class="clsPartHead" valign="center" align="center" width="115">
							<B class="clsPartHead">Page Options</B>
						</TD>
						<TD class="clsPartRight" valign="center" align="right" height="19" width="25">
							<IMG class="clsMinimize" src="/library/shared/webparts/images/downlevel.gif" height="19" width="25" align="absmiddle">
						</TD>
					</TR>
					<TR>
						<TD colspan="3">
							<TABLE bgcolor="#ffffff" width="100%" cellpadding="0" cellspacing="0" border="0" id="Table1">
								<TR>
									<TD bgcolor="#6699cc" colspan="1" width="1" valign="top"><DIV style="PADDING-RIGHT:0px;PADDING-LEFT:0px;PADDING-BOTTOM:0px;MARGIN:1px;PADDING-TOP:0px"></DIV>
									</TD>
									<TD width="145" bgcolor="#f1f1f1" valign="top">
									<IFRAME id='frmRatingsOptions' src='/msdn-online/shared/components/ratings/ratings.aspx?opt=1&ContentID=_966232&HideDiscuss=1' height='130' frameborder='0' scrolling='no' width='100%' vspace='0' hspace='0'></IFRAME>
									</TD>
									<TD bgcolor="#6699cc" colspan="1" width="1" valign="top"><DIV style="PADDING-RIGHT:0px;PADDING-LEFT:0px;PADDING-BOTTOM:0px;MARGIN:1px;PADDING-TOP:0px"></DIV>
									</TD>
								</TR>
								<TR>
									<TD bgcolor="#6699cc" colspan="3" height="1" valign="top"><DIV style="PADDING-RIGHT:0px;PADDING-LEFT:0px;PADDING-BOTTOM:0px;MARGIN:1px;PADDING-TOP:0px"></DIV>
									</TD>
								</TR>
							</TABLE>
						</TD>
					</TR>
				</TABLE>
				<!-- Page Options web part end -->
				<DIV class="storeUserData" id="oLayout"></DIV>
			</TD>
		</TR>
	</TBODY>
</TABLE>

<!--TOOLBAR_START-->
<!--TOOLBAR_EXEMPT-->
<!--TOOLBAR_END-->
<!-- Begin Content -->

<!--NONSCROLLING BANNER START-->
<div id="nsbanner">

<div id="TitleRow">
<H1 class="dtH1"><A NAME="xml02172003"></A>Building a Desktop News Aggregator</H1>
</div></div>
<!--NONSCROLLING BANNER END-->
<DIV id="nstext" valign="bottom">&nbsp;
<P>Dare Obasanjo<BR>
Microsoft Corporation</P>

<P>Revised March 14, 2003</P>

<P><B>Summary:</B> Dare Obasanjo builds a C# application that retrieves and displays news feeds from various Web sites. The application utilizes XPath, XSLT, XML Schema, the DOM, and XML Serialization in the .NET Framework. (12 printed pages)</P>

<P><A HREF="http://download.microsoft.com/download/9/4/5/945f1108-45a3-42cb-97d9-4ce04eb4f051/xml02172003_sample.exe">Download the xml02172003_sample.exe</A>.</P>

<BLOCKQUOTE class="dtBlock">
<B>Note&nbsp;&nbsp;&nbsp;</B>The sample application associated with this article was updated on March 14, 2003. Significant upgrades to various features have been made and it is recommended that you upgrade earlier versions to this latest release. </BLOCKQUOTE>

<H2 class="dtH1">Introduction</H2>

<P>Like most people who spend time online, I have a number of Web sites I read on a daily basis. I recently noticed that I was checking an average of five to ten Web sites every other hour when I wanted to see if there were any new articles or updates to the content on a site. This prompted me to investigate the likelihood of creating a desktop application that would do all the legwork for me and alert me when new content appeared on my favorite Web sites. My investigations led to my discovery of RSS and the creation of my desktop news aggregator, RSS Bandit. </P>

<H2 class="dtH1">What Is RSS? </H2>

<P>RSS is an XML format used for syndicating news and similar content from online news sources. RSS is used by news sites like <A HREF="http://news.com.com">C|Net</A> and <A HREF="http://www.wired.com">Wired</A>, online technical journals like <A HREF="http://www.xml.com">XML.com</A>, and Web logs like <A HREF="http://www.gotdotnet.com/team/dbox/spoutletex.aspx">Don Box's Spoutletex</A> and <A HREF="http://joelonsoftware.com/">Joel on Software</A>. </P>

<P>An RSS feed is a regularly updated XML document that contains metadata about a news source and the content in it. Minimally an RSS feed consists of a <code class="ce">channel</code> that represents the news source, which has a <code class="ce">title</code>, <code class="ce">link</code>, and <code class="ce">description</code> that describe the news source. Additionally, an RSS feed typically contains one or more <code class="ce">item</code> elements that represent individual news items, each of which should have a <code class="ce">title</code>, <code class="ce">link</code>, or <code class="ce">description</code>. </P>

<BLOCKQUOTE class="dtBlock">
<B>Note&nbsp;&nbsp;&nbsp;</B>The aforementioned elements appear in most RSS feeds but are not the only ones that can appear. Many RSS feeds also contain additional elements such as <code class="ce">date</code> or <code class="ce">language</code>. However, these elements and many others appear less commonly in practice. </BLOCKQUOTE>

<P>Below is a sample RSS 0.91 feed for the MSDN XML Developer Center:</P>

<PRE class="code">&lt;rss version="0.91"&gt;
  &lt;channel&gt;
    &lt;title&gt;MSDN XML Developer Center&lt;/title&gt;
    &lt;link&gt;http://msdn.microsoft.com/xml/&lt;/link&gt;
    &lt;description&gt; Extensible Markup Language (XML) is the universal format
    for data on the Web. XML allows developers to easily describe and 
    deliver rich, structured data from any application in a standard, 
    consistent way. XML does not replace HTML; rather, it is a 
    complementary format. &lt;/description&gt;
    &lt;item&gt;
      &lt;title&gt; XML Files: XPath Selections and Custom Functions, and More 
      &lt;/title&gt;
      &lt;link&gt; 
      http://msdn.microsoft.com/msdnmag/issues/03/02/xmlfiles/TOC.asp
      &lt;/link&gt;
      &lt;description&gt; Get your questions about XPath selections, custom 
      functions, and more answered in this month's column.  (February 5, 
      Article)&lt;/description&gt;
    &lt;/item&gt;
    &lt;item&gt;
      &lt;title&gt; Extreme XML: XML Serialization in the .NET Framework &lt;/title&gt;
      &lt;link&gt; http://msdn.microsoft.com/library/en-us/dnexxml/html/xml01202003.asp
      &lt;/link&gt;
      &lt;description&gt; Dare Obasanjo discusses XML serialization and how you 
      can use it within the .NET Framework to improve interoperability and 
      meet W3C standards.  (February 3, Article) &lt;/description&gt;
    &lt;/item&gt;
  &lt;/channel&gt;
&lt;/rss&gt;
</PRE>

<P>For more information about RSS, read Mark Pilgrim's informative article entitled <A HREF="http://www.xml.com/pub/a/2002/12/18/dive-into-xml.html">What is RSS?</A> on XML.com. </P>

<H2 class="dtH1">Functional Requirements for the News Aggregator</H2>

<P>RSS news aggregators are desktop or Web applications that are used to retrieve and display RSS feeds from various news sources. Examples of RSS news aggregators include <A HREF="http://www.newzcrawler.com/">NewzCrawler</A>, <A HREF="http://www.newsgator.com/">NewsGator</A>, and <A HREF="http://www.disobey.com/amphetadesk/">AmphetaDesk</A>. I tried a few RSS news aggregators but didn't find one with the right mix of features and functionality for my tastes. Therefore, I decided to write one myself that met my needs. </P>

<P>I had the following functional requirements for my news aggregator 

<OL type="1">
	<LI>The news aggregator must be able to process the three most popular versions of RSS (versions 0.91, 1.0 and 2.0).</li>

	<LI>The news aggregator must use a three-paned user interface similar to Microsoft Outlook&reg; Express for displaying RSS feeds. </li>

	<LI>The news aggregator must use an embedded Web browser to allow viewing rich content and navigation to Web pages linked to from RSS items. </li>

	<LI>The news aggregator must allow importation and exportation of a list of subscribed feeds using <A HREF="http://www.opml.org/">OPML</A>, the standard format used by other aggregators. </li>

	<LI>The news aggregator must provide the option to control how often each individual feed is checked. </li>

	<LI>The news aggregator should provide keyboard shortcuts for common tasks like navigating through new items. </li>

	<LI>The news aggregator must be able to track what messages have already been read between invocations of the application. </li>

	<LI>The news aggregator should be able to show you the raw XML from a particular RSS feed. </li>

	<LI>The news aggregator should cache RSS feeds on disk between invocations of the program. </li>

	<LI>The news aggregator should provide the ability to mark read items as unread. </li>

	<LI>The news aggregator should support ISA clients and/or Web proxies. </li>

	<LI>The news aggregator must use <A HREF="http://fishbowl.pastiche.org/archives/001132.html">HTTP conditional GET requests</A> to reduce bandwidth costs on news sources. </li>

	<LI>The news aggregator must be able to run on a system that meets the following minimum requirements:
<UL type="disc">
	<LI>Microsoft Windows&reg; 2000, Windows XP, or above </li>

	<LI>Microsoft .NET Framework 1.0 </li>

	<LI>LAN/Dialup Internet Connection</li>
</UL>
</li>
</OL>

<P>In implementing my RSS news aggregator, called RSS Bandit, I satisfied all of the aforementioned functional requirements except for number 9. Initial tests showed that there wasn't a significant performance difference between loading feeds from disk and refreshing from the Web on a broadband connection, although the former added some complexity to the code. Secondly, given that the expected usage pattern for RSS Bandit is as an <I>always-on</I> application, this feature did not seem absolutely necessary. </P>

<H2 class="dtH1">A Look at the RSS Bandit User Interface </H2>

<P>The user interface for RSS Bandit is inspired by mail and newsreaders such as Microsoft Outlook and Microsoft Outlook Express. Figure 1 is a screenshot of RSS Bandit showing the embedded Web browser in action.</P>
<a href="xml02172003-fig01big.gif" target="_Top"><img src="xml02172003-fig01.gif" border=0 alt="click for larger image"></a>
<P class="label"><B>Figure 1. Reading News with RSS Bandit</B></P>

<P>Figure 2 is a screenshot that shows the popup message that indicates that new items have been retrieved. </P>
<a href="xml02172003-fig02big.gif" target="_Top"><img src="xml02172003-fig02.gif" border=0 alt="click for larger image"></a>
<P class="label"><B>Figure 2. Receiving New Messages with RSS Bandit</B></P>

<H2 class="dtH1">Overview of the RSS Bandit Architecture</H2>

<P>The RSS Bandit application is primarily driven by two classes. The <B>RssHandler</B> class manages downloading of RSS feeds, while the <B>RssBanditView</B> class provides a graphical front end for viewing RSS feeds and interacting with the <B>RssHandler</B> class. </P>

<P>The <B>RssHandler</B> class downloads RSS feeds at specified intervals and stores them. The class is not tightly coupled to the user interface and can be reused by other applications that need to process RSS feeds. Clients that utilize the <B>RssHandler</B> class register a callback (delegate) upon instantiating the class. The <B>RssHandler</B> object then invokes the registered callback when new or updated feeds are downloaded. The information about which feeds to download and other configuration data is obtained from a feed subscription list written in XML. Since the amount of time between each download of a particular RSS feed is user configurable, the <B>RssHandler</B> class has a timer that goes off every five minutes and checks each feed to see whether enough time has elapsed between download attempts for that particular feed. This means that a feed cannot have more than one download attempt made against in a five-minute span. </P>

<P>The <B>RssBanditView</B> is a <A HREF="/library/en-us/vbcon/html/vbconintroductiontowfcforms.asp">Windows Form</A> that contains a tree view for displaying the list of subscribed feeds, a list view for displaying information about items from the currently selected feed, and an embedded Web browser for displaying content. On startup, the <B>RssBanditView</B> registers a delegate with the <B>RssHandler</B> that handles downloading and processing RSS feeds. Whenever new or updated feeds are downloaded, the <B>RssBanditView</B> is updated through the delegate in a thread-safe manner using techniques described in the <A HREF="/library/en-us/dnforms/html/winforms06112002.asp">Safe, Simple Multithreading in Windows Forms, Part 1</A> article by<CODE> </CODE>Chris Sells. </P>

<P>The user interface also enables the user to manage various aspects of the behavior of the <B>RssHandler</B> class. The user can add and remove feeds from the subscription list, configure how often feeds should be downloaded, and set proxy server information. </P>

<H2 class="dtH1">XML Technologies and RSS Bandit</H2>

<P>The RSS Bandit application makes significant use of the XML technologies in the .NET Framework. RSS Bandit uses XML Serialization to convert the feed subscription list to objects and vice versa, XSLT to convert OPML files to the feed subscription list format, XSD validation to ensure the feed subscription list is valid, and XPath to process RSS feeds.</P>

<H3 class="dtH1">W3C XML Schema in RSS Bandit</H3>

<P>The first step in working on RSS Bandit was deciding what information was necessary for the application to function on startup. After some brainstorming, I came up with two broad classes of information&#151;feed subscriptions and configuration data. The application would need to be able to determine what feeds I wanted to read, how often it should fetch new items, and what news items were already read. Secondly the application would need to know information about the proxy server through which to direct Web requests. </P>

<P>After deciding what information the application needed on startup, I needed to decide between storing this information in a configuration file and storing it in the Windows registry. I decided to go with an XML configuration file over storing the data in the Windows registry for several reasons. Not only is an XML configuration file more portable than registry settings, but it also allows me to process my settings using the wide range of technologies for processing XML information.</P>

<P>Let's first take a look at the schema for the configuration file for RSS Bandit:</P>

<PRE class="code">&lt;xs:schema targetNamespace='http://www.25hoursaday.com/2003/RSSBandit/feeds/' 
xmlns:xs='http://www.w3.org/2001/XMLSchema' elementFormDefault='qualified' 
xmlns:f='http://www.25hoursaday.com/2003/RSSBandit/feeds/'&gt;
  &lt;xs:element name='feeds'&gt;
    &lt;xs:complexType&gt;
      &lt;xs:sequence&gt;

        &lt;xs:element name='feed' minOccurs='0' maxOccurs='unbounded'&gt;
          &lt;xs:complexType&gt;
            &lt;xs:sequence&gt;
              &lt;xs:element name='title' type='xs:string' /&gt;
              &lt;xs:element name='link' type='xs:anyURI' /&gt;
              &lt;xs:element name='refresh-rate' type='xs:int' minOccurs='0'&gt;
                &lt;xs:annotation&gt;
                  &lt;xs:documentation&gt;
       This describes how often the feed must be refreshed in 
       milliseconds. 
      &lt;/xs:documentation&gt;
                &lt;/xs:annotation&gt;
              &lt;/xs:element&gt;
              &lt;xs:element name='last-retrieved' type='xs:dateTime' 
              minOccurs='0' /&gt;
              &lt;xs:element name='etag' type='xs:string' minOccurs='0' /&gt;
              &lt;xs:element name='stories-recently-viewed' minOccurs='0'&gt;
                &lt;xs:complexType&gt;
                  &lt;xs:sequence&gt;
                    &lt;xs:element name='story' type='xs:string' 
                    minOccurs='0' maxOccurs='unbounded' /&gt;
                  &lt;/xs:sequence&gt;
                &lt;/xs:complexType&gt;
              &lt;/xs:element&gt;
            &lt;/xs:sequence&gt;
            &lt;xs:attribute name='category' type='xs:string' use='optional' /&gt;
          &lt;/xs:complexType&gt;
        &lt;/xs:element&gt;

   &lt;xs:element name='categories' minOccurs='0'&gt;
    &lt;xs:complexType&gt;
     &lt;xs:sequence&gt;
      &lt;xs:element name='category' type='xs:string' maxOccurs='unbounded' /&gt;
     &lt;/xs:sequence&gt;
    &lt;/xs:complexType&gt;
   &lt;/xs:element&gt;

      &lt;/xs:sequence&gt;
      &lt;xs:attribute name='refresh-rate' type='xs:int' use='optional' /&gt;
      &lt;xs:attribute name='proxy-server' type='xs:string' use='optional' /&gt;
      &lt;xs:attribute name='proxy-port' type='xs:positiveInteger' 
      use='optional' /&gt;
    &lt;/xs:complexType&gt;      

    &lt;xs:key name='categories-key'&gt;
     &lt;xs:selector xpath='f:categories/f:category'/&gt;
     &lt;xs:field xpath='.'/&gt;
    &lt;/xs:key&gt;
    
    &lt;xs:keyref name='categories-keyref' refer='f:categories-key' &gt;
     &lt;xs:selector xpath='f:feed'/&gt;
     &lt;xs:field xpath='@category'/&gt;
    &lt;/xs:keyref&gt;
  
  &lt;/xs:element&gt;
&lt;/xs:schema&gt;</PRE>

<P>On startup, the first thing the application tries to do is find a file named <B>feeds.xml</B> in the current directory. If the file is found, then it is loaded and validated against the above schema to ensure it is a valid feed subscription list. Similar validation occurs if an attempt is made to import a feed subscription list during the execution of the RSS Bandit application. </P>

<P>Most of the information in the schema is straightforward although the <code class="ce">refresh-rate</code>, <code class="ce">etag</code>, and <code class="ce">story</code> elements could do with some clarification. The <code class="ce">refresh-rate</code> element describes how often an attempt should be made to download a feed in seconds. The <code class="ce">etag</code> element contains information from the <A HREF="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19">ETag</A> header that was sent back from the Web server the last time the feed was downloaded. This information is used when performing <A HREF="http://fishbowl.pastiche.org/archives/001132.html">HTTP conditional GET requests</A>. The <code class="ce">story</code> element contains the link to the news item, which doubles as a unique identifier used for distinguishing read versus unread stories. </P>

<P>The key constraint specifies that the each <code class="ce">category</code> element within <code class="ce">categories</code> must be unique and can be referenced as a key by another element or attribute. The <code class="ce">keyref</code> constraint specifies that the <CODE><code class="ce">category</code> </CODE>attribute of a <code class="ce">feed</code> must have the same value as one of the <code class="ce">category</code> elements under <code class="ce">categories</code>.</P>

<H3 class="dtH1">XML Serialization in RSS Bandit</H3>

<P>The information in the feed subscription list has to be accessed and modified quite often during the execution of RSS Bandit, which tends to favor storing the information in native data structures instead of in an XML document. For this reason, upon successful validation the feed subscription list is converted to objects using the XML Serialization technology described in last month's column, <A HREF="/library/en-us/dnexxml/html/xml01202003.asp">XML Serialization in the .NET Framework</A>. </P>

<P>Below is the class that maps to the anonymous complex type that acts as the type definition of the <code class="ce">feed</code> element:</P>

<PRE class="code">   /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlTypeAttribute
   (Namespace="http://www.25hoursaday.com/2003/RSSBandit/feeds/")]
   public class feedsFeed {
    
      /// &lt;remarks/&gt;
      public string title;
    
      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlElementAttribute(DataType="anyURI")]
      public string link;
    
      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlElementAttribute("refresh-rate")]
      public int refreshrate;
    
      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlIgnoreAttribute()]
      public bool refreshrateSpecified;
    
      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlElementAttribute("last-retrieved")]
      public System.DateTime lastretrieved;
    
      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlIgnoreAttribute()]
      public bool lastretrievedSpecified;
    
      /// &lt;remarks/&gt;
      public string etag;
    
      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlIgnoreAttribute()]
      public bool containsNewMessages; 

      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlArrayAttribute(ElementName = 
      "stories-recently-viewed", IsNullable = false)]
      [System.Xml.Serialization.XmlArrayItemAttribute("story", Type = 
      typeof(System.String), IsNullable = false)]
      public ArrayList storiesrecentlyviewed;

      /// &lt;remarks/&gt;
      [System.Xml.Serialization.XmlAttributeAttribute("category")]
      public string category;
   }
</PRE>

<P>The two attributes annotating the <B>storiesrecentlyviewed</B> viewed property are the most interesting annotations in the class. The annotations basically state that the element named <code class="ce">stories-recently-viewed</code> maps to an <B>ArrayList</B> while its child <code class="ce">story</code> elements map to strings stored in the <B>ArrayList</B>.  </P>

<H3 class="dtH1">XPath and the DOM in RSS Bandit</H3>

<P>As mentioned earlier, an RSS feed contains one or more <code class="ce">item</code> elements that optionally have <code class="ce">title</code>, <code class="ce">link</code>, or <code class="ce">description</code> elements as children. However, how to locate these elements differs depending on which version of RSS an application is processing. In RSS 0.91, the <code class="ce">item</code> element is a child of the <code class="ce">channel</code> element and neither element has a namespace name. In RSS 1.0, the <code class="ce">item</code> element is part of the "http://purl.org/rss/1.0/" namespace and it is a child of the <code class="ce">RDF</code> element. The <code class="ce">RDF</code> element itself belongs to the "http://www.w3.org/1999/02/22-rdf-syntax-ns#" namespace. In RSS 2.0, the <code class="ce">item</code> element is a child of the <code class="ce">channel</code> element and either element can either have no namespace name or if one exists, they have the same one. </P>

<P>I decided to abstract away from the aforementioned differences in the various flavors of RSS and create a class that represented the typical information in an RSS item. During processing of RSS feeds, the <B>RssHandler</B> class retrieves the <code class="ce">item</code> elements from an RSS feed and converts them to <B>RssItem</B> objects. Below is a code fragment showing how to locate all the <code class="ce">item</code> elements from an RSS feed stored in an <B>XmlDocument</B> regardless of what version of RSS being processed. </P>

<PRE class="code">string rssNamespaceUri = String.Empty; 

         if(feed.DocumentElement.LocalName.Equals("RDF") &amp;&amp;
            feed.DocumentElement.NamespaceURI.Equals 
            ("http://www.w3.org/1999/02/22-rdf-syntax-ns#")){
            
            rssNamespaceUri = "http://purl.org/rss/1.0/";

         }else if(feed.DocumentElement.LocalName.Equals("rss")){

               rssNamespaceUri = feed.DocumentElement.NamespaceURI;             
         }else{
            throw new ApplicationException("This XML document does not 
            look like an RSS feed");
         }

            
         //convert RSS items in feed to RssItem objects and add to list
         XmlNamespaceManager nsMgr = new 
         XmlNamespaceManager(feed.NameTable); 
         nsMgr.AddNamespace("rss", rssNamespaceUri); 

         foreach(XmlNode node in feed.SelectNodes("//rss:item", nsMgr)){      
            RssItem item = MakeRssItem((XmlElement)node);
            items.Add(item);                
         }//foreach
</PRE>

<P>The above code fragment loops through each <code class="ce">item</code> element in the XML document independent of whether the RSS version being processed is 0.91, 1.0, or 2.0. Similar code is used to process the child elements of each <code class="ce">item</code> element when converting it to an <B>RssItem</B> object. The application code takes advantage of the fact that although RSS feeds from different versions are structured differently, the <code class="ce">item</code> elements in them are similar (that is, there are shared <I>islands of structure</I>). </P>

<H3 class="dtH1">XSLT in RSS Bandit</H3>

<P>Popular news aggregators including <A HREF="http://bitworking.org/Aggie.html">Aggie</A>, <A HREF="http://www.disobey.com/amphetadesk/">AmphetaDesk</A>, and <A HREF="http://www.rassoc.com/newsgator/">NewsGator</A> support importing and exporting RSS feed subscriptions using an XML format known as <A HREF="http://www.opml.org/spec">OPML</A>. Because interoperability is always a good thing, I decided to support converting my feed subscription list format to OPML and vice versa. </P>

<P>Converting my feed subscription list to OPML turned out to be fairly straightforward because I just needed to write out some of the information from the objects generated from the feed subscription list as XML. The code is shown below:</P>

<PRE class="code">   StringBuilder sb = new StringBuilder("&lt;opml&gt;\n&lt;body&gt;\n"); 
            
            if(_feedsTable != null){

               foreach(feedsFeed f in _feedsTable.Values){
                  sb.AppendFormat("&lt;outline title='{0}' xmlUrl='{1}' 
                  /&gt;\n", f.title, f.link);
               }
            }            
   sb.Append("&lt;/body&gt;\n&lt;/opml&gt;");
</PRE>

<P>However, converting the OPML files to my feed subscription list format required a bit more work. I decided the best route would be to use technology explicitly designed for converting between XML formats, which is XSLT. Below is the stylesheet that converts OPML files to my feed subscription list format: </P>

<PRE class="code">&lt;xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"&gt;
  &lt;xsl:output method="xml" indent="yes" /&gt;
  &lt;xsl:template match="/"&gt;
    &lt;feeds xmlns="http://www.25hoursaday.com/2003/RSSBandit/feeds/"&gt;
      &lt;xsl:for-each select="/opml/body/outline"&gt;
        &lt;feed&gt;
          &lt;title&gt;
            &lt;xsl:value-of select="@title" /&gt;
          &lt;/title&gt;
          &lt;link&gt;
            &lt;xsl:choose&gt;
              &lt;xsl:when test="@xmlUrl"&gt;
                &lt;xsl:value-of select="@xmlUrl" /&gt;
              &lt;/xsl:when&gt;
              &lt;xsl:when test="@xmlurl"&gt;
                &lt;xsl:value-of select="@xmlurl" /&gt;
              &lt;/xsl:when&gt;
              &lt;xsl:otherwise&gt;ERROR: No RSS Feed URL in OPML 
              File&lt;/xsl:otherwise&gt;
            &lt;/xsl:choose&gt;
          &lt;/link&gt;
        &lt;/feed&gt;
      &lt;/xsl:for-each&gt;
    &lt;/feeds&gt;
  &lt;/xsl:template&gt;
&lt;/xsl:stylesheet&gt;
</PRE>

<P>Once the OPML file is converted to the RSS Bandit feed subscription list format, it is merged with the internal representation of the feed subscription list processed at startup.  </P>

<H2 class="dtH1">Future Plans for RSS Bandit</H2>

<P>I currently use RSS Bandit on a daily basis and find it quite useful. Before publishing this article, I made the installer available on GotDotNet and it has been <A HREF="http://www.gotdotnet.com/Community/UserSamples/Details.aspx?SampleGuid=d2682e0f-90e3-4a50-a37a-76bcb4544126">downloaded 1000 times</A>. Given the positive feedback about RSS Bandit, I have created a <A HREF="http://www.gotdotnet.com/Community/Workspaces/Workspace.aspx?id=cb8d3173-9f65-46fe-bf17-122e3703bb00">GotDotNet Workspace</A> for the project and will work with others to continue developing it. There are a number of features I'd like to add, such as support for caching RSS feeds on disk, providing feedback when an RSS feed is invalid, implementing <A HREF="http://diveintomark.org/archives/2002/08/15/ultraliberal_rss_locator.html">RSS autodiscovery</A>, support for embedding RSS Bandit in Internet Explorer, and automatic updating of the application using either the <A HREF="http://msdn.microsoft.com/msdnmag/issues/03/02/BITS/">Background Intelligent Transfer Service API</A> or the <A HREF="http://windowsforms.net/articles/appupdater.aspx">.NET Application Updater Component</A>. Developers that would like to work on further development of RSS Bandit can join the GotDotNet workspace. </P>

<H2 class="dtH1">Locating RSS Feeds</H2>

<P>The RSS Bandit installer places a number of feed subscription lists in a subdirectory of the RSS Bandit application on installation. These lists contain feeds for technology news sites, XML-centric news sources, and developer Web logs. Good places to start when searching for more RSS feeds include <A HREF="http://www.newsisfree.com/">News Is Free</A> or <A HREF="http://www.syndic8.com">Syndic8</A> to see if your favorite Web sites offer syndication.  </P>
<HR NOSHADE SIZE=1>
<P><B>Dare Obasanjo</B> is a member of Microsoft's WebData team, which among other things develops the components within the System.Xml and System.Data namespace of the .NET Framework, Microsoft XML Core Services (MSXML), and Microsoft Data Access Components (MDAC). </P>

<P>Feel free to post any questions or comments about this article on the <A HREF="http://www.gotdotnet.com/community/messageboard/MessageBoard.aspx?id=207">Extreme XML message board</A> on GotDotNet.</P>
 <!--closes the topic content div-->
<!--FOOTER_END-->


<!-- End Content -->

 </TD> </TR> </TABLE><TABLE cellpadding='0' cellspacing='0' border='0' style='width:100%'><TR><TD style='width:600' NOWRAP></TD></TR><TR><TD><IFRAME id='frmRatings' src='/msdn-online/shared/components/ratings/ratings.aspx?ContentID=_966232&HideDiscuss=1' frameborder='0' scrolling='no' width='100%' height='250' vspace='0' hspace='0' ></IFRAME></TD></TR></TABLE><br clear="all" style="line-height: 1px; overflow: hidden" /><table id="msviFooter" width="100%" cellpadding="0" cellspacing="0"><tr valign="bottom"><td id="msviFooter2" style="filter:progid:DXImageTransform.Microsoft.Gradient(startColorStr='#FFFFFF', endColorStr='#669AFF', gradientType='1')"><div id="msviLocalFooter"><nobr><a href="http://go.microsoft.com/?linkid=317027">Manage Your Profile</a> |</nobr><wbr /><nobr><a href="/isapi/gomscom.asp?target=/legal/" target="_parent">Legal</a> |</nobr><wbr /><nobr><a href="http://register.microsoft.com/contactus30/contactus.asp?domain=msdn" target="_parent">Contact Us</a> |</nobr><wbr /><nobr><a href="/flash/" target="_parent">MSDN Flash Newsletter</a></nobr></div><div id="msviGlobalFooter"><span dir="ltr">&#169; 2005 Microsoft Corporation. All rights reserved.&nbsp;</span><nobr><a href="http://www.microsoft.com/info/cpyright.mspx">Terms of Use</a> |</nobr><wbr /><nobr><a href="/library/toolbar/3.0/trademarks/en-us.mspx">Trademarks</a> |</nobr><wbr /><nobr><a href="http://www.microsoft.com/info/privacy.mspx">Privacy Statement</a></nobr></div></td><td bgcolor="#669AFF" width="105"><img src="/library/toolbar/3.0/text.aspx?t=TQ%3d%3d&amp;f=FFFFFF&amp;b=669AFF&amp;font=Microsoft+Logo+95%2c+13pt&amp;w=105&amp;h=29&amp;a=0&amp;l=0&amp;v=0&amp;c=eAdQAhJgwJihw2DKZGDGx4FnKIM%3d" width="105" height="29" alt="Microsoft" title="" border="0" /></td></tr></table><script language="javascript">var msviFooter2;if (document.getElementById){msviFooter2 = document.getElementById("msviFooter2");msviFooter2.style.filter = "";}</script><table cellpadding="0" cellspacing="0" width="100%" height="58" style="height: expression(msviFooter2.offsetHeight+1); width: expression(msviFooter2.offsetWidth)"><tr valign="top"><td><div style="width: 200px"></div></td><td width="100%" style="filter:progid:DXImageTransform.Microsoft.Gradient(startColorStr='#FFFFFF', endColorStr='#669AFF', gradientType='1')"></td></tr></table>
<script language="javascript">footerjs(document);</script>
 </BODY> </HTML>

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Team Leader
India India
Techincal Architect

16 years of overall work experience.

Proven Expertise in Agile process implementation.

Managed complex projects through their lifecycle.

Methodologies used include Agile, SDLC, Waterfall models.

Comments and Discussions