<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<!--
   Stylesheet to produce a useable XML document from a CodeProject forum
   relies on the ability of the Mozilla HTML parser to build a DOM from
   imperfect HTML that can be processed by an XSL processor (Mozilla's TransforMiix) 
   
   Lots of ugly XPaths needed to extract data from the mess of CP's HTML - better here than
   scattered throughout JS code, etc.
-->

<!-- build top-level element: <forum> -->
<xsl:template match="/">
   <xsl:variable name="forumID" select="substring-before(substring-after(//a[text()='Refresh']/@href, 'forumid='), '&amp;')"/>
   <!-- lounge -->
   <xsl:variable name="title1" select="/html/body/table/tbody/tr[2]/td[3]/table/tbody/tr/td/font/b"/>
   <!-- article -->
   <xsl:variable name="title2" select="/html/body/table/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr/td/b/font"/>
   <!-- forum -->
   <xsl:variable name="title3" select="/html/body/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/font/b"/>
   <!-- survey / news -->
   <xsl:variable name="title4" select="/html/body/table/tbody/tr/td[3]/table/tbody/tr/td/table/tbody/tr/td
"/>
   <xsl:variable name="title">
      <xsl:choose>
         <xsl:when test="$title1"><xsl:value-of select="$title1"/></xsl:when>
         <xsl:when test="$title2"><xsl:value-of select="$title2"/></xsl:when>
         <xsl:when test="$title3"><xsl:value-of select="$title3"/></xsl:when>
         <xsl:when test="$title4"><xsl:value-of select="$title4"/></xsl:when>
      </xsl:choose>
   </xsl:variable>
   <xsl:variable name="subtitle">
      <xsl:choose>
         <xsl:when test="$title1"><xsl:value-of select="/html/body/table/tbody/tr[2]/td[3]/table/tbody/tr/td/p"/></xsl:when>
         <xsl:when test="$title2"><xsl:value-of select="/html/body/table/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr/td/font[2]"/></xsl:when>
         <xsl:when test="$title3"><xsl:value-of select="$title3/../../text()"/></xsl:when>
         <xsl:when test="$title4"><xsl:value-of select="/html/body/table/tbody/tr/td[3]/p/font/b|/html/body/table/tbody/tr[2]/td[3]/h3"/></xsl:when>
      </xsl:choose>
   </xsl:variable>
   <Forum forumID="{$forumID}" title="{$title}" subtitle="{$subtitle}">
      <xsl:apply-templates select="//tr[substring-before(@id, '_h0')]"/>
   </Forum>
</xsl:template>

<!-- matches each post header, builds a <post> element with header and content info -->
<xsl:template match="//tr[substring-before(@id, '_h0')]">
   <xsl:variable name="postID" select="substring-before(@id, '_h0')"/>
   <Post id="{$postID}">
      <xsl:call-template name="PostHeader">
         <xsl:with-param name="postID" select="$postID"/>
      </xsl:call-template>
      <xsl:call-template name="PostBody">
         <xsl:with-param name="postID" select="$postID"/>
      </xsl:call-template>
   </Post>
</xsl:template>

<!-- extracts information from the post header -->
<xsl:template name="PostHeader">
   <xsl:param name="postID"/>
   
   <!-- In "Topic View" (default for user blogs), the first message in a thread is indented -->
   <xsl:variable name="indent">
      <xsl:if test="count(td) &gt; 3">
         <xsl:value-of select=".//img[@src='/script/images/ind.gif']/@width"/>
      </xsl:if>
   </xsl:variable>
   <xsl:attribute name="indent">
      <xsl:value-of select="$indent"/>
   </xsl:attribute>
   
   <!-- the parent post is the first one preceeding it with a lesser indent.
      of course, the parent might not be visible on this page, or this might be a top-level
      post. -->
   <xsl:attribute name="parentID">
      <xsl:if test="$indent">
         <xsl:value-of select="substring-before((preceding-sibling::tr[substring-before(@id, '_h0') and (not(.//img[@src='/script/images/ind.gif']/@width) or .//img[@src='/script/images/ind.gif']/@width &lt; $indent)]/@id)[last()], '_h0')" />
      </xsl:if>
   </xsl:attribute>
      
   <!-- subject -->
   <xsl:attribute name="subject">
      <xsl:value-of select="td[1]"/>
   </xsl:attribute>
   
   <!-- Message type -->
   <xsl:variable name="MessageType" select="./td/table/tbody/tr/td/img/@src"/>
   <xsl:attribute name="type">
      <xsl:choose>
         <xsl:when test="$MessageType='/script/images/news_info.gif'">2</xsl:when>
         <xsl:when test="$MessageType='/script/images/news_question.gif'">4</xsl:when>
         <xsl:when test="$MessageType='/script/images/news_answer.gif'">8</xsl:when>
         <xsl:when test="$MessageType='/script/images/news_game.gif'">16</xsl:when>
         <xsl:when test="$MessageType='/script/images/news_sticky.gif'">32</xsl:when>
         <xsl:otherwise>1</xsl:otherwise>
      </xsl:choose>
   </xsl:attribute>
      
   <!-- timestamp (may be absolute (EST) or relevant to "current" time -->
   <xsl:attribute name="timeStamp">
      <xsl:value-of select="td[last()]"/>
   </xsl:attribute>

   <!-- score, number of votes -->
   <xsl:variable name="controlRow" select="//tr[@id=concat($postID,'_h1')]//tr[td/font/@class='messagecontent']/following-sibling::tr[1]"/>
   <xsl:attribute name="score">
      <xsl:value-of select="substring-before(substring-after($controlRow, 'Score: '), ' (')"/>
   </xsl:attribute>
   <xsl:attribute name="votes">
      <xsl:value-of select="substring-before(substring-after(substring-after($controlRow, 'Score: '), ' ('), ' vote')"/>
   </xsl:attribute>

   <!-- 
      User information (<User> tag) 
      currently, i don't really classify user type, just store the image that represents them
   -->
   <xsl:variable name="userLink" select="td[2]/a | td[2]/img"/>
   <xsl:variable name="userID" select="substring-after($userLink/@href, 'id=')"/>
      <xsl:variable name="userType" select="substring-after($userLink/img/@src|$userLink/@src, '/script/images/')"/>
   <xsl:variable name="userName" select="td[last()-1]"/>
   <User userID="{$userID}" type="{$userType}">
      <xsl:choose>
         <xsl:when test="$indent">
            <xsl:copy-of select="$userName/font/text()|$userName/font/*"/>
         </xsl:when>
         <xsl:otherwise>
            <xsl:copy-of select="$userName/font/b/text()|$userName/font/b/*"/>
         </xsl:otherwise>
      </xsl:choose>
   </User>
   
</xsl:template>

<!-- extracts post content information (<Content> tag) -->
<xsl:template name="PostBody">
   <xsl:param name="postID"/>
   
   <Content>
      <xsl:variable name="content" select="//tr[@id=concat($postID,'_h1')]//td[font/@class='messagecontent']"/>
      <xsl:apply-templates select="$content/text()|$content/*" mode="copyContent"/>
   </Content>
</xsl:template>

<!-- 
   Due to how Mozilla handles invalid markup, we need to do a bit of extra work here:
      Unclosed tags within a message will result in Mozilla throwing in duplicate <font>
      tags all through the message.  We know there should only be one, wrapping the entire
      message, and so this code ensures that they are removed entirely.
      
      Of course, nothing stops a user from throwing their own <font class="messagecontent">
      tags into a message... for now, i just assume they won't.
-->

<xsl:template match="font[@class='messagecontent']" mode="copyContent">
   <xsl:apply-templates select="node()|text()" mode="copyContent"/>
</xsl:template>

<xsl:template match="node()|text()|@*" mode="copyContent">
   <xsl:copy>
      <xsl:apply-templates select="node()|text()|@*" mode="copyContent"/>
   </xsl:copy>
</xsl:template>

</xsl:stylesheet>
