- rssfeeder_src.zip
- RSSFeeder
- RSSBlogAPI
- RSSCommon
- RSSFeeder.sln
- RSSFeeder
- RSSFeederResources
- RSSFeederSetup
- Messages.ico
- RSSFeederSetup.vdproj
- Test
- RSSStarter
- RSSTests
- Thirdparty
- AxInterop.DHTMLEDLib.dll
- AxInterop.SHDocVw.dll
- Eyefinder.dll
- GotDotNet.Exslt.dll
- HttpDownloader.dll
- Interop.DHTMLEDLib.dll
- Interop.SHDocVw.dll
- Interop.WindowsInstaller.dll
- Microsoft.ApplicationBlocks.Updater.ActivationProcessors.dll
- Microsoft.ApplicationBlocks.Updater.dll
- Microsoft.ApplicationBlocks.Updater.Downloaders.dll
- Microsoft.Practices.EnterpriseLibrary.Caching.dll
- Microsoft.Practices.EnterpriseLibrary.Common.dll
- Microsoft.Practices.EnterpriseLibrary.Configuration.dll
- Microsoft.Practices.EnterpriseLibrary.ExceptionHandling.dll
- Microsoft.Practices.EnterpriseLibrary.ExceptionHandling.Logging.dll
- Microsoft.Practices.EnterpriseLibrary.Logging.dll
- Microsoft.Practices.EnterpriseLibrary.Security.Cache.CachingStore.dll
- Microsoft.Practices.EnterpriseLibrary.Security.Cryptography.dll
- Microsoft.Practices.EnterpriseLibrary.Security.dll
- NotifyIconBalloon.dll
- SandBar.dll
- SandDock.dll
- SgmlReaderDll.dll
|
// Copyright � 2005 by Omar Al Zabir. All rights are reserved.
//
// If you like this code then feel free to go ahead and use it.
// The only thing I ask is that you don't remove or alter my copyright notice.
//
// Your use of this software is entirely at your own risk. I make no claims or
// warrantees about the reliability or fitness of this code for any particular purpose.
// If you make changes or additions to this code please mark your code as being yours.
//
// website http://www.oazabir.com, email OmarAlZabir@gmail.com, msn oazabir@hotmail.com
using System;
using System.Globalization;
using System.Text;
using System.IO;
using System.Collections;
using System.Xml;
namespace RSSCommon
{
/// <summary>
/// Summary description for FeedProcessor.
/// </summary>
public class FeedProcessor
{
/// <summary>
/// Parse the XML Document and find out what type of feed is it.
/// Based on the type, build a channel collection of feeds
/// </summary>
/// <param name="doc"></param>
public IList Parse( XmlReader reader )
{
IList channels = new ArrayList();
while( reader.Read() )
{
if( reader.NodeType == XmlNodeType.Element )
{
string name = reader.Name.ToLower();
switch( name )
{
case "atom:feed": // We have Atom Feed
case "feed": // We have Atom Feed
channels.Add( this.ProcessAtomFeed( reader ) );
break;
case "rdf:rdf": // We have rdf feed
case "rdf": // We have rdf feed
case "rss:rss": // We have rss feed
case "rss": // We have rss feed
channels.Add( this.ProcessRssFeed( reader ) );
break;
}
}
}
return channels;
}
/// <summary>
/// Process an atom feed node
/// </summary>
/// <param name="reader"></param>
/// <returns></returns>
private RssChannel ProcessAtomFeed( XmlReader reader )
{
RssChannel channel = new RssChannel();
channel.Type = RssTypeEnum.Atom;
channel.Feeds = new ArrayList();
while( reader.Read() )
{
if( reader.NodeType == XmlNodeType.Element )
{
string name = reader.Name;
switch( name )
{
case "title": // title for channel
channel.Title = ReadString( reader );
break;
case "link": // link to website
reader.MoveToAttribute("href");
if( reader.ReadAttributeValue() )
{
channel.Link = reader.Value;
}
break;
case "tagline": // description of the channel
channel.Description = ReadString( reader );
break;
case "description": // Same
channel.Description = ReadString( reader );
break;
case "entry": // Aha! an entry
channel.Feeds.Add( this.ProcessAtomEntry( reader ) );
break;
}
}
else if( reader.NodeType == XmlNodeType.EndElement )
{
if( reader.Name == "feed" )
break;
}
}
return channel;
}
private RssChannel ProcessRssFeed( XmlReader reader )
{
RssChannel channel = new RssChannel();
channel.Type = RssTypeEnum.RSS;
string tagName = reader.Name;
channel.Feeds = new ArrayList();
while( reader.Read() )
{
if( reader.NodeType == XmlNodeType.Element )
{
string name = reader.Name;
switch( name )
{
case "title":
channel.Title = ReadString( reader );
break;
case "link":
channel.Link = ReadString( reader );
break;
case "description":
channel.Description = ReadString( reader );
break;
case "item":
channel.Feeds.Add( this.ProcessRssItem( reader ) );
break;
}
}
if( reader.NodeType == XmlNodeType.EndElement )
{
if( reader.Name == tagName )
break;
}
}
return channel;
}
/// <summary>
/// Process Atom feed entry
/// </summary>
/// <param name="reader"></param>
/// <returns>An RSS Feed object which contains a post</returns>
private RssFeed ProcessAtomEntry( XmlReader reader )
{
return ProcessFeedNode( reader, "entry", "title", "id", "link", "issued" );
}
/// <summary>
/// Process a entry node which contains one post. This method is generic for Atom and RSS Feeds.
///
/// All the Atom/RSS feed specified node names are specified via parameters.
/// </summary>
/// <param name="reader">Reader positioned to the entry node</param>
/// <param name="itemNodeName">Name of the node which identifies the post. For RSS it's "item", for Atom is's "entry"</param>
/// <param name="titleNodeName">Name of title node</param>
/// <param name="guidNodeName">Name of GUID node.</param>
/// <param name="linkNodeName">Name of link node</param>
/// <param name="pubDateNodeName">Name of publish date node</param>
/// <returns>Returns a populated feed object</returns>
private RssFeed ProcessFeedNode( XmlReader reader, string itemNodeName,
string titleNodeName, string guidNodeName, string linkNodeName,
string pubDateNodeName )
{
RssFeed feed = new RssFeed();
// Build a buffer which stores the entire XML content of the entry
StringBuilder buffer = new StringBuilder( 1024 );
XmlTextWriter writer = new XmlTextWriter( new StringWriter( buffer ) );
writer.Namespaces = false;
writer.Indentation = 1;
writer.IndentChar = '\t';
writer.Formatting = Formatting.Indented;
writer.WriteStartElement(itemNodeName);
string lastNode = reader.Name;
while( (reader.NodeType == XmlNodeType.Element && lastNode != reader.Name) || reader.Read() )
{
if( reader.NodeType == XmlNodeType.Element )
{
lastNode = reader.Name;
writer.WriteStartElement( reader.Name );
writer.WriteAttributes( reader, true );
if( reader.Name == titleNodeName )
{
feed.Title = ReadString( reader );
writer.WriteString(feed.Title);
}
else if( reader.Name == guidNodeName )
{
feed.Guid = ReadString( reader );
writer.WriteString(feed.Guid);
}
else if( reader.Name == linkNodeName )
{
// Atom feed contains the link as "href" attribute
string link = reader.GetAttribute("href", "");
if( null == link )
{
// but Rss feed has the link as value
link = ReadString( reader );
writer.WriteString( link );
}
if( feed.Guid == null )
{
feed.Guid = link;
}
}
else if( reader.Name == pubDateNodeName )
{
string date = ReadString( reader );
feed.PublishDate = this.FormatDate( date );
writer.WriteString(date);
}
else
{
writer.WriteRaw( reader.ReadInnerXml() );
}
// Close the element started
writer.WriteEndElement();
// For empty elements, ReadEndElement fails
if( reader.NodeType == XmlNodeType.EndElement )
{
if( reader.Name == itemNodeName ) break;
reader.ReadEndElement();
}
}
if( reader.NodeType == XmlNodeType.EndElement )
{
if( reader.Name == itemNodeName )
break;
}
}
writer.WriteEndElement();
writer.Close();
feed.XML = buffer.ToString();
return feed;
}
private RssFeed ProcessRssItem( XmlReader reader )
{
return ProcessFeedNode( reader, "item", "title", "guid", "link", "pubDate" );
}
private DateTime FormatDate( string date )
{
string RFC822 = "ddd, dd MMM yyyy HH:mm:ss zzz";
//string RFC1123 = "yyyyMMddTHHmmss";
//string RFCUnknown = "yyyy-MM-ddTHH:mm:ssZ";
int indexOfPlus = date.LastIndexOf('+');
if( indexOfPlus > 0 ) date = date.Substring( 0, indexOfPlus-1 );
string [] formats = new string[] { "r", "S", "U" };
try
{
// Parse the dates using the standard universal date format
return DateTime.Parse(date, CultureInfo.InvariantCulture,
DateTimeStyles.AdjustToUniversal);
}
catch
{
try
{
// Standard formats failed, try the "r" "S" and "U" formats
return DateTime.ParseExact( date, formats, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal );
}
catch
{
try
{
// All the standards formats have failed, try the dreaded RFC822 format
return DateTime.ParseExact( date, RFC822, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AdjustToUniversal );
}
catch
{
try
{
// Last try, may be the date ends with some +600 or something, remove that
string strippedDate = date.Substring (0, date.Length - 5);
strippedDate += "GMT";
return Convert.ToDateTime(strippedDate);
}
catch
{
// All failed! The RSS Feed source should be sued
return DateTime.Now;
}
}
}
}
}
private StringBuilder buffer = new StringBuilder(100);
/// <summary>
/// Alternative to reader.ReadString. The ReadString method of XmlReader not
/// only reads the string inside the node, but also jumps over the node end tag.
/// We need to read only the characters inside the tag pair, and stop when the
/// end tag is reached.
/// </summary>
/// <param name="reader"></param>
/// <returns></returns>
private string ReadString( XmlReader reader )
{
/// Reuse existing buffer in order to prevent frequent StringBuffer allocation
buffer.Length = 0;
/// Empty elements have no content
if( reader.IsEmptyElement ) return string.Empty;
/// Skip the begin tag and all white spaces before the first character of content is found
while(!reader.EOF
&& ( reader.NodeType == XmlNodeType.Element
|| reader.NodeType == XmlNodeType.Whitespace ) )
reader.Read();
/// Read and store in buffer when we are getting text and CDATA sections. But stop immediately
/// whenever we read the end element.
while( reader.NodeType == XmlNodeType.CDATA
|| reader.NodeType == XmlNodeType.Text
&& reader.NodeType != XmlNodeType.EndElement )
{
buffer.Append( reader.Value );
reader.Read();
}
/// Now the read is poting to the EndElement. Return the content of the buffer
/// we have prepared for this node
return buffer.ToString();
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.