RDF to RSS Converter

Govardhana Reddy

1.44/5 (3 votes)

Oct 25, 2007

CPOL

15849

This article will help you to find a way to convert RDF to RSS Feed format.

Introduction

Today we know that Feeds play a major role in sharing information. RSS makes it possible for people to keep up with their favorite websites in an automated manner that's easier than checking them manually.

Along with these, there came many other versions like RDF, ATOM, etc. The code provided here can help in converting ATOM to RSS Feed format.

Using the Code

Here the code accepts the RDF Feed's content as the argument and then returns back the XML content which is in RSS Feed format.

//
public string RDFToRssConverter(XmlDocument rdfDoc)
{

    System.Net.WebClient download_Obj = new System.Net.WebClient();
    UTF8Encoding utf = new UTF8Encoding();

    WebClient WC = new WebClient();
    MatchCollection mc = null;
    Match m = null;
    Match m1 = null;

    string title = String.Empty;
    string link = String.Empty;
    string description = String.Empty;
    string author = String.Empty;
    string pubDate = String.Empty;

    RegexOptions ro = new RegexOptions();

    ro = ro | RegexOptions.IgnoreCase;
    ro = ro | RegexOptions.Multiline;

    string content = Content;

    content = content.Replace("\n", " ");
    content = content.Replace("\r", " ");

    const string rssVersion = "2.0";
    const string rssLanguage = "en-US";

    string rssGenerator = "RDFFeedConverter";
    MemoryStream memoryStream = new MemoryStream();
    XmlTextWriter xmlWriter = new XmlTextWriter(memoryStream, null);
    xmlWriter.Formatting = Formatting.Indented;
    Regex r = new Regex(@"<channel.*</channel>", ro);

    m = r.Match(content);
    r = new Regex(@"<title>.*</title>", ro);
    m1 = r.Match(m.ToString());
    string feedTitle = m1.ToString().Substring(
       m1.ToString().IndexOf(">") + 1, (
       m1.ToString().IndexOf("</") - 
       m1.ToString().IndexOf(">") - 1));

    //string feedTitle = m1.ToString();
    r = new Regex(@"<link>.*</link>", ro);
    m1 = r.Match(m.ToString());
    string feedLink = m1.ToString().Substring(m1.ToString().IndexOf(">") + 1, 
      (m1.ToString().IndexOf("</") - m1.ToString().IndexOf(">") - 1));

    //string feedLink = m1.ToString(); 
    r = new Regex(@"<description>.*</description>", ro);
    m1 = r.Match(m.ToString());
    string rssDescription = m1.ToString().Substring(m1.ToString().IndexOf(">") + 1, 
      (m1.ToString().IndexOf("</") - m1.ToString().IndexOf(">") - 1));

    //string rssDescription = m1.ToString(); 
    xmlWriter.WriteStartElement("rss");
    xmlWriter.WriteAttributeString("version", rssVersion);
    xmlWriter.WriteStartElement("channel");
    xmlWriter.WriteElementString("title", feedTitle);
    xmlWriter.WriteElementString("link", feedLink);
    xmlWriter.WriteElementString("description", rssDescription);
    xmlWriter.WriteElementString("language", rssLanguage);
    xmlWriter.WriteElementString("generator", rssGenerator);

    int index = content.IndexOf("</channel>");
    index += 10;

    content = content.Substring(index);

    int beginIndex = 0;
    int endIndex = 0;
    string itemContent = "";
    string str = "";

    r = new Regex(@"<item", ro);

    mc = r.Matches(content);

    for (int i = 0; i < mc.Count; i++)
    {
        beginIndex = mc[i].Index;
        if (i == mc.Count - 1)
            endIndex = content.Length;
        else
            endIndex = mc[i + 1].Index;
        itemContent = content.Substring(beginIndex, (endIndex - beginIndex));

        r = new Regex(@"<title>.*</title>", ro);  // * </channel>

        m1 = null;
        m1 = r.Match(itemContent);

        if (m1.Length != 0)
        {
            //<![CDATA[
            str = m1.ToString().Substring(m1.ToString().IndexOf(">") + 1, 
              (m1.ToString().IndexOf("</") - m1.ToString().IndexOf(">") - 1));
            str = str.Trim();
            if (str.StartsWith("<![CDATA["))
                str = str.Substring(9, str.Length - 3 - 9);
            title = str;
        }

        r = new Regex(@"<link>.*</link>", ro);
        m1 = r.Match(itemContent);

        if (m1.Length != 0)
        {
            //<![CDATA[
            str = m1.ToString().Substring(m1.ToString().IndexOf(">") + 1, 
              (m1.ToString().IndexOf("</") - m1.ToString().IndexOf(">") - 1));
            str = str.Trim();
            if (str.StartsWith("<![CDATA["))
                str = str.Substring(9, str.Length - 3 - 9);
            link = str;
        }

        r = new Regex(@"<description>.*</description>", ro);
        m1 = null;
        m1 = r.Match(itemContent);

        if (m1.Length != 0)
        {
            //<![CDATA[
            str = m1.ToString().Substring(m1.ToString().IndexOf(">") + 1, 
              (m1.ToString().IndexOf("</") - m1.ToString().IndexOf(">") - 1));
            str = str.Trim();
            if (str.StartsWith("<![CDATA["))
                str = str.Substring(9, str.Length - 3 - 9);
            description = str;
        }

        //r = new Regex(@"publicationDate>.*publicationDate>",ro);  
        r = new Regex(@"Date>.*Date>", ro);
        m1 = null;
        m1 = r.Match(itemContent);

        if (m1.Length != 0)
        {
            //<![CDATA[
            str = m1.ToString().Substring(m1.ToString().IndexOf(">") + 1, 
              (m1.ToString().IndexOf("</") - m1.ToString().IndexOf(">") - 1));
            str = str.Trim();
            if (str.StartsWith("<![CDATA["))
                str = str.Substring(9, str.Length - 3 - 9);
            pubDate = str;
        }

        r = new Regex(@"creator>.*creator>", ro);
        m1 = null;
        m1 = r.Match(itemContent);

        if (m1.Length != 0)
        {
            m1.ToString().IndexOf(">");
            m1.ToString().IndexOf("</");

            //<![CDATA[
            str = m1.ToString().Substring(m1.ToString().IndexOf(">") + 1, 
              (m1.ToString().IndexOf("</") - m1.ToString().IndexOf(">") - 1));
            str = str.Trim();

            if (str.StartsWith("<![CDATA["))
                str = str.Substring(9, str.Length - 3 - 9);

            author = str;
        }

        if (title.Length == 0)
            title = "Not Specified";

        if (link.Length == 0)
            link = "Not Specified";

        if (description.Length == 0)
            description = " Not Specified";

        if (author.Length == 0)
            author = "Not Specified";

        if (pubDate.Length == 0)
            pubDate = "Not Specified";

        xmlWriter.WriteStartElement("item");
        xmlWriter.WriteElementString("title", title);
        xmlWriter.WriteElementString("link", link);
        xmlWriter.WriteElementString("pubDate", 
          Convert.ToDateTime(pubDate).ToUniversalTime().ToString(
          @"ddd, dd MMM yyyy HH:mm:ss G\MT"));
        xmlWriter.WriteElementString("author", author);
        xmlWriter.WriteElementString("description", description);
        xmlWriter.WriteEndElement();

        title = String.Empty;
        link = String.Empty;
        description = String.Empty;
        author = String.Empty;
        pubDate = String.Empty;
    }

    xmlWriter.WriteEndElement();
    xmlWriter.Flush();
    xmlWriter.Close();

    string outStr = Encoding.UTF8.GetString(memoryStream.ToArray());

    XmlDocument retDoc = new XmlDocument();
    retDoc.LoadXml(outStr);
    retDoc.Save("c:\\gova.xml");
    memoryStream.Close();

    return outStr;

}//

Points of Interest

I have tried to extract only the Title, Description, Time, Author, and Link. You can at any point of time extend this. I have even tried to save the converted content; if you wish, you can remove that statement in the code.