Multiple Language Syntax Highlighting, Part 1: JScript

Jonathan de Halleux
Rate me:
4.96/5 (58 votes)
12 Mar 200312 min read
284K
6.1K
170
Makes automaticaly highlighting source code in web page a reality (for C,C++,JScript, VBScript, XML)
highlight_src.zip
- articlehelp.htm
- examples.htm
- highlight.css
- highlight.htm
- highlight.js
- highlight.png
- highlight.xml
- highlight.xsd
- highlight.xsl
- highlight.xsx
- parsecontext.png
- pipe.png
- schema.png
- tn_schema.jpg
highlight-demo.zip
- highlight.htm
- highlight.png
- parsecontext.png
- pipe.png
- schema.png
- tn_schema.jpg
highlight.zip
- articlehelp.htm
- highlight.js
- highlight.xml
- highlight.xsd
- highlight.xsl
- highlight.xsx
<!----------------------------- Ignore ----------------------------->
<link rel="stylesheet" type="text/css" href="http://www.codeproject.com/styles/global.css">
<p><b>Please choose 'View Source' in your browser to view the HTML, or File | Save to save 
			this file to your hard drive for editing.</b></p>
<hr size="1" noshade>
<style type="text/css"> <!-- .xml-tag { color:#AA4400 }
	.xml-bracket { color:#0000FF }
	.xml-cdata { color:#AA0088 }
	.xml-attribute-name { color=#FF0000 }
	.xml-attribute-value { color:#0000FF }
	--></style>
<xml id="languageSyntax">
		<highlight needs-build="yes" save-build="no">
			<!-- families of keywords -->
			<keywordlists>
				<!-- C keywords -->
				<keywordlist id="kwc-keywords">
					<kw>auto</kw>
					<kw>const</kw>
					<kw>double</kw>
					<kw>float</kw>
					<kw>int</kw>
					<kw>short</kw>
					<kw>struct</kw>
					<kw>unsigned</kw>
					<kw>break</kw>
					<kw>continue</kw>
					<kw>else</kw>
					<kw>for</kw>
					<kw>long</kw>
					<kw>signed</kw>
					<kw>switch</kw>
					<kw>void</kw>
					<kw>case</kw>
					<kw>default</kw>
					<kw>enum</kw>
					<kw>goto</kw>
					<kw>register</kw>
					<kw>sizeof</kw>
					<kw>typedef</kw>
					<kw>volatile</kw>
					<kw>char</kw>
					<kw>do</kw>
					<kw>extern</kw>
					<kw>if</kw>
					<kw>return</kw>
					<kw>static</kw>
					<kw>union</kw>
					<kw>while</kw>
				</keywordlist>
				<!-- C++ keywords -->
				<keywordlist id="kwcpp-keywords">
					<kw>asm</kw>
					<kw>dynamic_cast</kw>
					<kw>namespace</kw>
					<kw>reinterpret_cast</kw>
					<kw>try</kw>
					<kw>bool</kw>
					<kw>explicit</kw>
					<kw>new</kw>
					<kw>static_cast</kw>
					<kw>typeid</kw>
					<kw>catch</kw>
					<kw>false</kw>
					<kw>operator</kw>
					<kw>template</kw>
					<kw>typename</kw>
					<kw>class</kw>
					<kw>friend</kw>
					<kw>private</kw>
					<kw>this</kw>
					<kw>using</kw>
					<kw>const_cast</kw>
					<kw>inline</kw>
					<kw>public</kw>
					<kw>throw</kw>
					<kw>virtual</kw>
					<kw>delete</kw>
					<kw>mutable</kw>
					<kw>protected</kw>
					<kw>true</kw>
					<kw>wchar_t</kw>
				</keywordlist>
				<!-- C++ preprocessor keywords -->
				<keywordlist id="kwcpp-preprocessors" pre="#">
					<kw>define</kw>
					<kw>error</kw>
					<kw>import</kw>
					<kw>undef</kw>
					<kw>elif</kw>
					<kw>if</kw>
					<kw>include</kw>
					<kw>else</kw>
					<kw>ifdef</kw>
					<kw>line</kw>
					<kw>endif</kw>
					<kw>ifndef</kw>
					<kw>pragma</kw>
				</keywordlist>
				<!-- STL keywords -->
				<keywordlist id="kwcpp-stl" pre="(std::){0,1}">
					<kw>vector</kw>
					<kw>set</kw>
					<kw>map</kw>
					<kw>list</kw>
					<kw>stack</kw>
					<kw>deque</kw>
					<kw>multimap</kw>
					<kw>multiset</kw>
				</keywordlist>
				<!-- VBScript keywords -->
				<keywordlist id="kwvbscript-keywords">
					<kw>Empty</kw>
					<kw>False</kw>
					<kw>Nothing</kw>
					<kw>Null</kw>
					<kw>True</kw>
				</keywordlist>
				<keywordlist id="kwvbscript-statements">
					<kw>Call</kw>
					<kw>Class</kw>
					<kw>Const</kw>
					<kw>Dim</kw>
					<kw>Do</kw>
					<kw>Loop</kw>
					<kw>Erase</kw>
					<kw>Execute</kw>
					<kw>ExecuteGlobal</kw>
					<kw>Exit</kw>
					<kw>For</kw>
					<kw>Each</kw>
					<kw>Next</kw>
					<kw>Function</kw>
					<kw>If</kw>
					<kw>Then</kw>
					<kw>Else</kw>
					<kw>On</kw>
					<kw>Error</kw>
					<kw>Option</kw>
					<kw>Explicit</kw>
					<kw>Private</kw>
					<kw>Property</kw>
					<kw>Get</kw>
					<kw>Property</kw>
					<kw>Let</kw>
					<kw>Property</kw>
					<kw>Set</kw>
					<kw>Public</kw>
					<kw>Randomize</kw>
					<kw>ReDim</kw>
					<kw>Rem</kw>
					<kw>Select</kw>
					<kw>Case</kw>
					<kw>Stop</kw>
					<kw>Sub</kw>
					<kw>While</kw>
					<kw>Wend</kw>
					<kw>With</kw>
				</keywordlist>
				<keywordlist id="kwvbscript-methods">
					<kw>Clear</kw>
					<kw>Execute</kw>
					<kw>Raise</kw>
					<kw>Replace</kw>
					<kw>Test</kw>
					<kw>Write</kw>
					<kw>WriteLine</kw>
				</keywordlist>
				<keywordlist id="kwvbscript-functions">
					<kw>Abs</kw>
					<kw>Array</kw>
					<kw>Asc</kw>
					<kw>Atn</kw>
					<kw>CBool</kw>
					<kw>CByte</kw>
					<kw>CCur</kw>
					<kw>CDate</kw>
					<kw>CDbl</kw>
					<kw>Chr</kw>
					<kw>CInt</kw>
					<kw>CLng</kw>
					<kw>Conversions</kw>
					<kw>Cos</kw>
					<kw>CreateObject</kw>
					<kw>CSng</kw>
					<kw>CStr</kw>
					<kw>Date</kw>
					<kw>DateAdd</kw>
					<kw>DateDiff</kw>
					<kw>DatePart</kw>
					<kw>DateSerial</kw>
					<kw>DateValue</kw>
					<kw>Day</kw>
					<kw>Derived</kw>
					<kw>Math</kw>
					<kw>Escape</kw>
					<kw>Eval</kw>
					<kw>Exp</kw>
					<kw>Filter</kw>
					<kw>FormatCurrency</kw>
					<kw>FormatDateTime</kw>
					<kw>FormatNumber</kw>
					<kw>FormatPercent</kw>
					<kw>GetLocale</kw>
					<kw>GetObject</kw>
					<kw>GetRef</kw>
					<kw>Hex</kw>
					<kw>Hour</kw>
					<kw>InputBox</kw>
					<kw>InStr</kw>
					<kw>InStrRev</kw>
					<kw>Int</kw>
					<kw>Fix</kw>
					<kw>IsArray</kw>
					<kw>IsDate</kw>
					<kw>IsEmpty</kw>
					<kw>IsNull</kw>
					<kw>IsNumeric</kw>
					<kw>IsObject</kw>
					<kw>Join</kw>
					<kw>LBound</kw>
					<kw>LCase</kw>
					<kw>Left</kw>
					<kw>Len</kw>
					<kw>LoadPicture</kw>
					<kw>Log</kw>
					<kw>LTrim</kw>
					<kw>RTrim</kw>
					<kw>and</kw>
					<kw>Trim</kw>
					<kw>Maths</kw>
					<kw>Mid</kw>
					<kw>Minute</kw>
					<kw>Month</kw>
					<kw>MonthName</kw>
					<kw>MsgBox</kw>
					<kw>Now</kw>
					<kw>Oct</kw>
					<kw>Replace</kw>
					<kw>RGB</kw>
					<kw>Right</kw>
					<kw>Rnd</kw>
					<kw>Round</kw>
					<kw>ScriptEngine</kw>
					<kw>ScriptEngineBuildVersion</kw>
					<kw>ScriptEngineMajorVersion</kw>
					<kw>ScriptEngineMinorVersion</kw>
					<kw>Second</kw>
					<kw>SetLocale</kw>
					<kw>Sgn</kw>
					<kw>Sin</kw>
					<kw>Space</kw>
					<kw>Split</kw>
					<kw>Sqr</kw>
					<kw>StrComp</kw>
					<kw>String</kw>
					<kw>StrReverse</kw>
					<kw>Tan</kw>
					<kw>Time</kw>
					<kw>Timer</kw>
					<kw>TimeSerial</kw>
					<kw>TimeValue</kw>
					<kw>TypeName</kw>
					<kw>UBound</kw>
					<kw>UCase</kw>
					<kw>Unescape</kw>
					<kw>VarType</kw>
					<kw>Weekday</kw>
					<kw>WeekdayName</kw>
					<kw>Year</kw>
				</keywordlist>
				<!-- JSCript keywords -->
				<keywordlist id="kwjscript-statements">
					<kw>break</kw>
					<kw>catch</kw>
					<kw>continue</kw>
					<kw>debugger</kw>
					<kw>do</kw>
					<kw>while</kw>
					<kw>for</kw>
					<kw>in</kw>
					<kw>function</kw>
					<kw>if</kw>
					<kw>else</kw>
					<kw>Labeled</kw>
					<kw>return</kw>
					<kw>switch</kw>
					<kw>this</kw>
					<kw>throw</kw>
					<kw>try</kw>
					<kw>while</kw>
					<kw>var</kw>
					<kw>with</kw>
				</keywordlist>
				<keywordlist id="kwjscript-objects">
					<kw>ActiveXObject</kw>
					<kw>Array</kw>
					<kw>arguments</kw>
					<kw>Boolean</kw>
					<kw>Date</kw>
					<kw>Debug</kw>
					<kw>Enumerator</kw>
					<kw>Error</kw>
					<kw>Function</kw>
					<kw>Global</kw>
					<kw>Math</kw>
					<kw>Number</kw>
					<kw>Object</kw>
					<kw>RegExp</kw>
					<kw>String</kw>
					<kw>VBArray</kw>
				</keywordlist>
				<keywordlist id="kwxml-esc">
					<kw><![CDATA[&lt;]]></kw>
					<kw><![CDATA[&gt;]]></kw>
					<kw><![CDATA[&amp;]]></kw>
					<kw><![CDATA[&quot;]]></kw>
					<kw><![CDATA[&apos;]]></kw>
				</keywordlist>
			</keywordlists>
			<!-- Languages syntax specification -->
			<languages>
				<!-- C language specification -->
				<language id="c">
					<!-- Code contexts: default (most common) is code. -->
					<contexts default="code">
						<!-- basic source code context -->
						<context id="code" attribute="code">
							<!-- /* block of comment */ -->
							<detect2chars attribute="hidden" context="blockcomment" char="/" char1="*" />
							<!-- " " literals -->
							<regexp attribute="literal" context="code" expression="&quot;(.|\\&quot;|\\\r\n)*?((\\\\)+&quot;|[^\\]{1}&quot;)" />
							<!-- keyword c -->
							<keyword attribute="keyword" context="code" family="kwc-keywords" />
							<!-- preprocessor keywords -->
							<keyword attribute="keyword" context="code" family="kwcpp-preprocessors" />
						</context>
						<!-- block comment /* ... �*/ -->
						<context id="blockcomment" attribute="cpp-blockcomment">
							<!-- finish line of comment, start block -->
							<detect2chars attribute="hidden" context="code" char="*" char1="/" />
						</context>
					</contexts>
				</language>
				<!-- C++ language specification -->
				<language id="cpp">
					<!-- Code contexts: default (most common) is code. -->
					<contexts default="code">
						<!-- basic source code context -->
						<context id="code" attribute="code">
							<!-- /* block of comment */ -->
							<detect2chars attribute="hidden" context="blockcomment" char="/" char1="*" />
							<!-- // line of comment -->
							<detect2chars attribute="hidden" context="linecomment" char="/" char1="/" />
							<!-- " " literals -->
							<regexp attribute="literal" context="code" expression="&quot;(.|\\&quot;|\\\r\n)*?((\\\\)+&quot;|[^\\]{1}&quot;)" />
							<!-- keyword c -->
							<keyword attribute="keyword" context="code" family="kwc-keywords" />
							<!-- keyword c++ -->
							<keyword attribute="keyword" context="code" family="kwcpp-keywords" />
							<!-- keyword stl -->
							<keyword attribute="keyword" context="code" family="kwcpp-stl" />
							<!-- preprocessor keywords -->
							<keyword attribute="keyword" context="code" family="kwcpp-preprocessors" />
						</context>
						<!-- block comment /* ... �*/ -->
						<context id="blockcomment" attribute="cpp-blockcomment">
							<!-- finish line of comment, start block -->
							<detect2chars attribute="hidden" context="code" char="*" char1="/" />
						</context>
						<!-- line comment // ... -->
						<context id="linecomment" attribute="cpp-linecomment">
							<!-- finish line of comment, start block -->
							<detect2chars attribute="hidden" context="blockcomment" char="/" char1="*" />
							<!-- finish line of comment end of line -->
							<linecontinue attribute="hidden" context="code" />
						</context>
					</contexts>
				</language>
				<!-- VB Script -->
				<language id="vbscript">
					<contexts default="code">
						<!-- Code context -->
						<context id="code" attribute="code">
							<!-- Single line of comment -->
							<regexp attribute="vb-comment" context="code" expression="'.*$" />
							<!-- litteral -->
							<regexp attribute="literal" context="code" expression="&quot;(.|&quot;&quot;)*?&quot;" />
							<!-- Keywords -->
							<keyword attribute="keyword" context="code" family="kwvbscript-keywords" />
							<!-- Statements -->
							<keyword attribute="keyword" context="code" family="kwvbscript-statements" />
							<!-- methods -->
							<keyword attribute="keyword" context="code" family="kwvbscript-methods" />
							<!-- methods -->
							<keyword attribute="keyword" context="code" family="kwvbscript-functions" />
						</context>
					</contexts>
				</language>
				<!-- JSCript -->
				<language id="jscript">
					<!-- Code contexts: default (most common) is code. -->
					<contexts default="code">
						<!-- basic source code context -->
						<context id="code" attribute="code">
							<!-- /* block of comment */ -->
							<detect2chars attribute="hidden" context="blockcomment" char="/" char1="*" />
							<!-- // line of comment -->
							<detect2chars attribute="hidden" context="linecomment" char="/" char1="/" />
							<!-- " " literals -->
							<regexp attribute="literal" context="code" expression="&quot;(.|\\&quot;|\\\r\n)*?((\\\\)+&quot;|[^\\]{1}&quot;)" />
							<!-- statements -->
							<keyword attribute="keyword" context="code" family="kwjscript-statements" />
							<!-- objects -->
							<keyword attribute="keyword" context="code" family="kwjscript-objects" />
						</context>
						<!-- block comment /* ... �*/ -->
						<context id="blockcomment" attribute="cpp-blockcomment">
							<!-- finish line of comment, start block -->
							<detect2chars attribute="hidden" context="code" char="*" char1="/" />
						</context>
						<!-- line comment // ... -->
						<context id="linecomment" attribute="cpp-linecomment">
							<!-- finish line of comment, start block -->
							<detect2chars attribute="hidden" context="cpp-blockcomment" char="/" char1="*" />
							<!-- finish line of comment end of line -->
							<linecontinue attribute="hidden" context="code" />
						</context>
					</contexts>
				</language>
				<!-- XML -->
				<language id="xml">
					<contexts default="value">
						<!-- Value context -->
						<context id="value" attribute="xml-value">
							<regexp attribute="vb-comment" context="value" expression="&lt;--(.|\n)*?--&gt;" />
							<regexp attribute="hidden" context="cdata" expression="&lt;!\[CDATA\[" />
							<regexp attribute="xml-bracket" context="closing-element" expression="&lt;/" />
							<regexp attribute="xml-bracket" context="opening-element" expression="&lt;\?{0,1}" />
						</context>
						<context id="cdata" attribute="xml-cdata">
							<regexp attribute="hidden" context="value" expression="\]\]&gt;" />
						</context>
						<!-- opening element context -->
						<context id="opening-element" attribute="xml-code">
				// find tag...
				<regexp attribute="xml-tag" context="element-attributes" expression="\b((\w|-)+\:){0,1}(\w|-)+\b" />
			</context>
						<!-- closing element -->
						<context id="closing-element" attribute="xml-code">
							<regexp attribute="xml-tag" context="closing-element" expression="\b(\w|-)+\b" />
							<regexp attribute="xml-bracket" context="value" expression="(\?|/){0,1}&gt;" />
						</context>
						<!-- attributes group -->
						<context id="element-attributes" attribute="xml-code">
				// find end of element
				<regexp attribute="xml-bracket" context="value" expression="(\?|/){0,1}&gt;" />
				// find attributes...
				<regexp attribute="xml-attribute-name" context="attribute-name" expression="\b(\w|-)+\b" />
			</context>
						<context id="attribute-name" attribute="xml-attribute-name">
							<regexp attribute="xml-attribute-value" context="element-attributes" expression="=(&quot;.*?&quot;|\b(\w|-)+\b)" />
						</context>
					</contexts>
				</language>
			</languages>
		</highlight>
	</xml>
<xml id="codeXSL">
		<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
			<xsl:output encoding='ISO-8859-1' indent='no' omit-xml-declaration='yes' />
			<!-- Include basic templates -->
			<!-- <xsl:include href="pglcore.xsl"/> -->
			<xsl:template match="code">
				<xsl:value-of select="text()" disable-output-escaping="yes" />
			</xsl:template>
			<xsl:template match="cpp-linecomment">
				<span class="cpp-comment">//<xsl:value-of select="text()" disable-output-escaping="yes" /></span>
			</xsl:template>
			<xsl:template match="vb-comment">
				<span class="cpp-comment">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="cpp-blockcomment">
				<span class="cpp-comment">/*<xsl:value-of select="text()" disable-output-escaping="yes" />*/</span>
			</xsl:template>
			<xsl:template match="literal">
				<span class="cpp-literal">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="keyword">
				<span class="cpp-keyword">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="pgl">
				<span class="pgl">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="preprocessor">
				<span class="cpp-preprocessor">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="xml-value">
				<xsl:value-of select="text()" disable-output-escaping="yes" />
			</xsl:template>
			<xsl:template match="xml-tag">
				<span class="xml-tag">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="xml-bracket">
				<span class="xml-bracket">
					<xsl:value-of select="text()" />
				</span>
			</xsl:template>
			<xsl:template match="xml-cdata">
				<span class="xml-bracket">
					<xsl:text>&lt;![CDATA[</xsl:text>
				</span>
				<span class="xml-cdata">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
				<span class="xml-bracket">
					<xsl:text>]]&gt;</xsl:text>
				</span>
			</xsl:template>
			<xsl:template match="xml-attribute-name">
				<span class="xml-attribute-name">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="xml-attribute-value">
				<span class="xml-attribute-value">
					<xsl:value-of select="text()" disable-output-escaping="yes" />
				</span>
			</xsl:template>
			<xsl:template match="parsedcode">
				<xsl:choose>
					<xsl:when test="@in-box[.=0]">
						<xsl:element name="span">
							<xsl:attribute name="class">cpp-inline</xsl:attribute>
							<xsl:attribute name="lang">
								<xsl:value-of select="@lang" />
							</xsl:attribute>
							<xsl:apply-templates />
						</xsl:element>
					</xsl:when>
					<xsl:otherwise>
						<xsl:element name="pre">
							<xsl:attribute name="class">cpp-pre</xsl:attribute>
							<xsl:attribute name="lang">
								<xsl:value-of select="@lang" />
							</xsl:attribute>
							<xsl:apply-templates />
						</xsl:element>
					</xsl:otherwise>
				</xsl:choose>
			</xsl:template>
			<xsl:template match="/">
				<xsl:apply-templates />
			</xsl:template>
		</xsl:stylesheet>
	</xml>	
<xml id="codeExamples">
		<examples>
			<example id="c"><![CDATA[<h4>C</h4>
Boxed code
<pre lang="c">/*this is c*/
char* sText;
sText="hello\" test\
       strings";
</pre>
and inlined code <code lang="c">char* str="string"; /*comment*/</code>.]]>
</example>
			<example id="cpp">
<![CDATA[<h4>C++</h4>
Boxed code
<pre lang="cpp">// this is c++
char* sText;
sText="hello\" test\
       strings";
</pre>
and inlined code <code lang="cpp">char* str="string"; //comment</code>.]]>
</example>
			<example id="jscript">
<![CDATA[<h4>Javascript</h4>
Boxed code
<pre lang="jscript">//this is jscript
var sText;
sText="hello\" test" + "another \
       string";
</pre>
and inlined code <code lang="jscript">var str="string"; // comment</code>.]]>
</example>
			<example id="vbscript">
<![CDATA[<h4>VBScript</h4>
Boxed code
<pre lang="vbscript">' this is vbscript
Dim sText
sText="hello "" string" & test  & " test" 'playing with strings
</pre>
and inlined code <code lang="vbscript">Dim str="string" 'comment</code>.]]>
</example>
			<example id="xml">
<h4>XML</h4>
<![CDATA[<pre lang="xml">
<?xml someparam="string" ?>
<elem attr1="text" attr2="string">
	a value.
</elem>
<empty-elem />
<ns:elem-with-namespace attr="test" />
<!-- Some comment -->
</pre>]]>
</example>
		</examples>
	</xml>
<script language="javascript">
/// <summary>HTML Syntax highlighting methods in JavaScript.</summary>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>

/// <summary>Handles exceptions</summary>
/// <param name="exception">a catched exception</param>
/// <code>
///	try
///	{	
///		// returns false if failed
///		if (!doSomething())
///			throw "Could not do anything";
///	}
///	catch (exception)
///	{	
///		handleException(exception);
///	}
/// </code>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function handleException( exception )
{
	if (typeof(exception) == "string")
		alert("Error: "+ exception);
	else if (exception.description == null )
		alert("Error: "+ exception.message );
	else
		alert("Error: "+ exception.description );
//	Response.Write("<b>Error in script: " + exception + "</b></br>");
}

/// <summary>Loads an xml file</summary>
/// <param name="sFileName">XML file name</param>
/// <returns>a DOMDocument object ( i.e. a ActiveXObject("Msxml2.DOMDocument") ) </returns>
/// <exception>If file not loaded successfully</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function loadXML( sFileName)
{
	var xmlDoc = new ActiveXObject("Msxml2.DOMDocument");
	xmlDoc.async="false";

	try
	{	
		// try loading xml file, throw exception if failed
		if (!xmlDoc.load( sFileName ))
			throw "Could not load xml file " + sFileName;
	}
	catch (exception)
	{	
		xmlDoc=null;
		handleException(exception);
	}
	
	return xmlDoc;
};

/// <summary>adds a CDATA child elem</summary>
/// <param name="node">node to append child</param>
/// <param name="nodeName">new child node name</param>
/// <param name="cdata">CDATA value</param>
/// <exception>If could not create child node</exception>
/// <exception>If could not create CDATA node</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function addChildCDATAElem( node, nodeName, cdata )
{
	var newNode = node.ownerDocument.createElement( nodeName);
	if (newNode == null)
		throw "Could not append node to " + node.nodeName;		
	node.appendChild( newNode );
	
	var newCDATANode = node.ownerDocument.createCDATASection( cdata );
	if (newCDATANode == null)
		throw "Could not append CDATA node to " + newNode.nodeName;
	newNode.appendChild( newCDATANode );
}

/// <summary>adds a text child elem</summary>
/// <param name="node">node to append child</param>
/// <param name="nodeName">new child node name</param>
/// <param name="text">text value</param>
/// <exception>If could not create child node</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function addChildElem( node, nodeName, text )
{
	var newNode = node.ownerDocument.createElement( nodeName);
	if (newNode == null)
		throw "Could not append node to " + node.nodeName;		
	newNode.text = text;
	node.appendChild( newNode );
}

/// <summary>Adds \ to regular expression character</summary>
/// <param name="char0">character to transform</param>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function stringToRegExp( char0 )
{
	var regExp = /(\-|\+|\*|\?|\(|\)|\[|\]|\\|\$|\^|\!)/g; 

	return char0.replace(regExp, "\\$1");
}

/// <summary>Builds keywords family regular expressions</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <remarks>This method create regular expression that match a whole keyword family and 
///	add it as a parameter "regexp" to the keywordlist node.</remarks>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildKeywordRegExp( languageNode )
{
	var keywordListList,keywordListNode;
	var sRegExp,preNode, postNode;
	var kwList, kwNode,rootNode;
	
	rootNode = languageNode.selectSingleNode("/*");

	// iterating keywords	
	keywordListList = rootNode.selectNodes("keywordlists/keywordlist");
	keywordListList.reset();
	for ( keywordListNode = keywordListList.nextNode(); keywordListNode != null; keywordListNode= keywordListList.nextNode() )
	{
		sRegExp="\\b";
		
		// adding pre...
		preNode = keywordListNode.attributes.getNamedItem("pre");
		if (preNode != null)
			sRegExp=sRegExp+preNode.nodeTypedValue;
		
		sRegExp=sRegExp+"(";
		
		// build regular expression...
		kwList = keywordListNode.selectNodes("kw");
		kwList.reset();
		// iterate kw elements
		for (kwNode = kwList.nextNode() ; kwNode != null; kwNode = kwList.nextNode() )
		{
			sRegExp=sRegExp +  stringToRegExp( kwNode.nodeTypedValue ) + "|"; 
		}
		
		// close string
		if (sRegExp.length > 1)
			sRegExp=sRegExp.substring(0,sRegExp.length-1);

		sRegExp=sRegExp+")";
		// adding pre...
		postNode = keywordListNode.attributes.getNamedItem("post");
		if (postNode != null)
			sRegExp=sRegExp+postNode.nodeTypedValue;
			
		sRegExp=sRegExp+"\\b";
		
		// add to keywordListNode
		keywordListNode.setAttribute( "regexp", sRegExp );
	}

}

/// <summary>Builds regular expression out of contextNode</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <param name="contextNode"><see also cref="XMLDOMNode"/> context node</para>
/// <remarks>This method create regular expression that match all the context rules
/// add it as a parameter "regexp" to the context node.</remarks>
/// <exception>If keyword family not corresponding to keyword attribute.</exception>
/// <exception>Regular expression rule missing regexp argument</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildRuleRegExp( languageNode, contextNode )
{
	var sRegExp, ruleNode, regExpExprNode, rootNode;
	var keywordListNode, keywordListNameNode, keywordListRegExpNode,xp;
	
	rootNode = languageNode.selectSingleNode("/*");
	sRegExp="(";

	var ruleList=contextNode.childNodes;
	// building regular expression	
	for (ruleNode=ruleList.nextNode(); ruleNode != null; ruleNode=ruleList.nextNode() )
	{
		if (ruleNode.nodeName == "#comment")
			continue;
			
		// apply rule...
		if (ruleNode.nodeName == "detect2chars")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			var char1=ruleNode.attributes.getNamedItem("char1").nodeTypedValue;
			sRegExp= sRegExp + stringToRegExp( char0 + char1 ) + "|";
		}
		else if (ruleNode.nodeName == "detectchar")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			sRegExp=sRegExp + stringToRegExp( char0 ) + "|";
		}
		else if (ruleNode.nodeName == "linecontinue")
		{
			sRegExp=sRegExp + "\n|"
		}
		else if (ruleNode.nodeName == "regexp" )
		{
			regExpExprNode = ruleNode.attributes.getNamedItem("expression");
			if ( regExpExprNode == null )
				throw "Regular expression rule missing expression attribute";
				
			sRegExp=sRegExp + regExpExprNode.nodeTypedValue + "|";
		}
		else if (ruleNode.nodeName == "keyword")
		{
			// finding keywordlist
			keywordListNameNode = ruleNode.attributes.getNamedItem("family");
			if (keywordListNameNode == null)
				throw "Keyword rule missing family";
			xp="keywordlists/keywordlist[@id=\""
					+ keywordListNameNode.nodeTypedValue 
					+ "\"]";
			keywordListNode = rootNode.selectSingleNode(xp);
			if (keywordListNode == null)
				throw "Could not find keywordlist (xp: "+ xp + ")";
				
			keywordListRegExpNode = keywordListNode.attributes.getNamedItem("regexp");
			if (keywordListRegExpNode == null)
				throw "Could not find keywordlist regular expression";
				
			// adding regexp
			sRegExp=sRegExp+keywordListRegExpNode.nodeTypedValue+"|";
		}
	}

	if (sRegExp.length > 1)
		sRegExp=sRegExp.substring(0,sRegExp.length-1)+")";
	else
		sRegExp="";
	
	return sRegExp;	
};

/// <summary>Precompiles regular expressions, search strings and prepares rules attribute</summary>
/// <param name="xmlDoc"><seealso DOMDocument/> highlight syntax document</param>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> context node</para>
/// <exception>If rule id not corresponding to a rule family</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildRules( languageNode )
{
	var contextList, contextNode, sRegExp, rootNode;	
	var rulePropList, rulePropNode, rulePropNodeAttributes, ruleList, ruleNode;

	rootNode = languageNode.selectSingleNode("/*");
	
	// first building keyword regexp
	buildKeywordRegExp( languageNode );	
	
	contextList = languageNode.selectNodes("contexts/context");
	// create regular expressions for context
	for (contextNode = contextList.nextNode(); contextNode != null; contextNode = contextList.nextNode())
	{
		sRegExp = buildRuleRegExp( languageNode, contextNode );
		// add attribute
		contextNode.setAttribute( "regexp", sRegExp );	
	}
}

/// <summary>Prepares syntax xml file</summary>
/// <param name="sXMLSyntax">xml Syntax file name</param>
/// <returns><seealso cref"DOMDocument"> language description </returns>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function loadAndBuildSyntax( sXMLSyntax )
{
	var xmlDoc, xmlModDoc, languageNode, languageNodeList;
	var needBuildNode, bNeedBuild;
	
	// get highlight file	
	xmlDoc = loadXML( sXMLSyntax );

	// check if build needed...
	bNeedBuild = true;
	needBuildNode = xmlDoc.documentElement.selectSingleNode("/highlight").attributes.getNamedItem("needs-build");
	if (needBuildNode == null  || needBuildNode.nodeTypedValue=="yes")
	{
		// iterate languages and prebuild
		languageNodeList = xmlDoc.documentElement.selectNodes("/highlight/languages/language");
		languageNodeList.reset();
		for(languageNode = languageNodeList.nextNode(); languageNode != null; languageNode = languageNodeList.nextNode())
		{
			/////////////////////////////////////////////////////////////////////////		
			// build regular expressions
			buildRules( languageNode );	
		}

		// updating...
		xmlDoc.documentElement.selectSingleNode("/highlight").setAttribute("needs-build","no");
	}
	
	// save file if asked
	saveBuildNode = xmlDoc.documentElement.selectSingleNode("/highlight").attributes.getNamedItem("save-build");
	if (saveBuildNode != null && saveBuildNode.nodeTypedValue == "yes")
		xmlDoc.save( sXMLSyntax );
		
	// closing file
	return xmlDoc;
}

/// <summary>Finds the rule that trigerred the match</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <param name="contextNode"><see also cref="XMLDOMNode"/> context node</para>
/// <param name="sMatch"><see also cref="String/> that matched the context regular expression</param>
/// <remarks>If the <seealso RegExp/> finds a rule occurence, this method is used to find which rule has been trigerred.</remarks>
/// <exception>Triggers if sMatch does not match any rule of contextNode</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function findRule( languageNode, contextNode, sMatch )
{
	var regExpNode, regExp, sRegExp, arr, familyNode,xp;
	var ruleNode, regExpExprNode,rootNode;
	var ruleList=contextNode.childNodes;
	
	rootNode=languageNode.selectSingleNode("/*");

	// building regular expression	
	for (ruleNode=ruleList.nextNode(); ruleNode != null ; ruleNode = ruleList.nextNode() )
	{
		if (ruleNode.nodeName == "#comment")
			continue;
	
		if (ruleNode.nodeName == "detect2chars")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			var char1=ruleNode.attributes.getNamedItem("char1").nodeTypedValue;
			if ( sMatch == char0 + char1)			
				return ruleNode;
		}
		else if (ruleNode.nodeName == "detectchar")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			if (char0 == sMatch)
				return ruleNode;
		}
		else if (ruleNode.nodeName == "linecontinue")
		{
			if ( "\n" == sMatch)
				return ruleNode;
		}
		else if (ruleNode.nodeName == "regexp")
		{
			regExpExprNode=ruleNode.attributes.getNamedItem("expression");
			if ( regExpExprNode == null )
				throw "Regular expression rule missing expression attribute";
			
			regExp = new RegExp( regExpExprNode.nodeTypedValue, "m" );
			arr = regExp.exec(sMatch);
			if ( arr != null )
				return ruleNode;
		}	
		else if (ruleNode.nodeName == "keyword")
		{
			familyNode = ruleNode.attributes.getNamedItem("family");
			if ( familyNode == null)
				throw "Could not find family attribute for keyword";
			xp="keywordlists/keywordlist[@id=\"" 
					+ familyNode.nodeTypedValue 
					+ "\"]/@regexp";
			regExpNode = rootNode.selectSingleNode( xp );
			if ( regExpNode == null)
				throw "Could not find regular expression for keyword family "+ ruleNode.attributes.getNamedItem("attribute").nodeTypedValue + "(xp: "+xp+")";

			// estimate regular expression	
			sRegExp="(" + regExpNode.nodeTypedValue + ")";
			regExp = new RegExp( sRegExp, "m" );
			arr=regExp.exec(sMatch);
			if ( arr != null )
				return ruleNode;
		}
	}
	return null;
}

/// <summary>Applies the context rules succesively to sString</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <param name="contextNode"><see also cref="XMLDOMNode"/> context node</para>
/// <param name"sString">String to parse and convert</param>
/// <param name="parsedCodeNode"><seealso cref="XMLDOMNode">mother node for dumping parsed code</param>
/// <remarks>This methods uses the pre-computed regular expressions of context rules, rule matching, etc...
/// the result is outputted in the xmlResult document, starting at parsedCodeNode node.
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function applyRules( languageNode, contextNode, sString, parsedCodeNode)
{
	var regExp, arr,sRegExp;
	var ruleNode,newNode, newCDATANode;

	// building regExp 
	sRegExp=contextNode.attributes.getNamedItem("regexp").nodeTypedValue;
	var regExp = new RegExp( sRegExp, "m" );

	while (sString.length > 0)
	{
		// apply
		arr = regExp.exec( sString );
		if (arr == null)
		{
			addChildCDATAElem( parsedCodeNode,
							contextNode.attributes.getNamedItem("attribute").nodeTypedValue, 
							sString );
			
			// finished parsing
			regExp=null;
			return null;
		}
		else
		{
			// adding text
			addChildCDATAElem(parsedCodeNode, 
							contextNode.attributes.getNamedItem("attribute").nodeTypedValue,
							sString.substring(0, arr.index ) );
			
			// find rule...
			ruleNode = findRule( languageNode, contextNode, arr[0] );
			if (ruleNode == null)
				throw "Didn't matching rule, regular expression false ? ( context: " + contextNode.attributes.getNamedItem("id").nodeTypedValue;
			
			// check if rule nees to be added to result...
			attributeNode=ruleNode.attributes.getNamedItem("attribute");
			if (attributeNode != null && attributeNode.nodeTypedValue!="hidden" )
			{
				addChildCDATAElem(parsedCodeNode,
								ruleNode.attributes.getNamedItem("attribute").nodeTypedValue ,
								arr[0]);
			}
			
			// update context if necessary
			if ( contextNode.attributes.getNamedItem("id").nodeTypedValue != ruleNode.attributes.getNamedItem("context").nodeTypedValue )
			{
				// return new context 
				var xpContext = "contexts/context[@id=\"" 
								+ ruleNode.attributes.getNamedItem("context").nodeTypedValue
								+ "\"]";
				contextNode = languageNode.selectSingleNode( xpContext);
				if (contextNode == null)
					throw "Didn't matching context, error in xml specification ?";
					
				// build new regular expression
				sRegExp=contextNode.attributes.getNamedItem("regexp").nodeTypedValue;
				regExp = new RegExp( sRegExp, "m" );
			}
			sString = sString.substring(arr.index+arr[0].length, sString.length);			
		}
	}
	regExp = null;
}


/// <summary>Create and populate an xml document with the corresponging language</summary>
/// <param name="xmlDoc"><seealso DOMDocument/> highlight syntax document</param>
/// <param name="sLang">language string description. For C++, use cpp.</param> 
/// <param name="sRootTag">Root tag (under parsed code) for the generated xml tree.</param> 
/// <param name="bInBox>true if in box</param>
/// <param name="sCode">Code to parse</param>
/// <returns><seealso cref="DOMDocument"> document containing parsed node.</returns>
/// <remarks>This method builds an XML tree containing context node. Use an xsl file to render it.</remarks>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildHighlightTree( xmlDoc, sLang, sRootTag, bInBox, sCode )
{
	var languageAttribute,languageNode,xp;
	var resultMainNode;
	var sHighlightedCode, sDefault;

	try
	{			
		/////////////////////////////////////////////////////////////////////////		
		// getting language
		xp="/highlight/languages/language[@id=\"" + sLang + "\"]";
		languageNode=xmlDoc.documentElement.selectSingleNode( xp );
		if (languageNode == null)
			throw "Could not find " + sLang + "language (xpath: " + xp + ")";
	
		/////////////////////////////////////////////////////////////////////////		
		// getting context
		contextsNode=languageNode.selectSingleNode( "contexts" );
		if (contextsNode == null)
			throw "Could not find contexts node for " + sLang + "language";

		/////////////////////////////////////////////////////////////////////////		
		// getting default context	
		sDefault=contextsNode.attributes.getNamedItem("default").nodeTypedValue;
		xp="context[@id=\"" +  sDefault + "\"]";
		contextNode=contextsNode.selectSingleNode( xp );
		if (contextNode == null)
			throw "Could not find default context for " + sLang + "language (xpath: " + xp + ")";
	
		// create result xml
		xmlResult = new ActiveXObject("Msxml2.DOMDocument");

		///////////////////////////////////////////////////////////////////////////	
		// creating main node
		resultMainNode=xmlResult.createElement( "parsedcode" );
		if (resultMainNode == null)
			throw "Could not create main node parsedcode";
		xmlResult.appendChild(resultMainNode);
					
		///////////////////////////////////////////////////////////////////////////	
		// adding language attribute
		resultMainNode.setAttribute("lang", sRootTag );
		resultMainNode.setAttribute("in-box", bInBox );

		///////////////////////////////////////////////////////////////////////////	
		// parse and populate xmlResult
		applyRules( languageNode, contextNode, sCode, resultMainNode);

		return xmlResult;
	}
	catch(exception)
	{
		handleException (exception);
		xmlResult=null;
		return null;
	}
}

/// <summary>Apply syntax matching to sCode with the corresponding language sLang</summay>
/// <param name="sLang">language string description. For C++, use cpp.</param> 
/// <param name="sRootTag">Root tag (under parsed code) for the generated xml tree.</param> 
/// <param name="sCode">Code to parse</param>
/// <returns>the highlighted code.</returns>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function highlightCode( sLang, sRootTag, bInBox, sCode)
{
	var xmlResult, sResult;

	try
	{		

		// re-build highlight tree	
		xmlResult = buildHighlightTree( xmlGlobDoc, sLang, sRootTag, bInBox, sCode );
		// render xml
		sResult=xmlResult.transformNode( xslGlobDoc );	
	}
	catch(exception)
	{
		handleException (exception);
		xmlResult=null;
		return "";
	}
	finally
	{
		xmlResult=null;
		return sResult;
	}	
};

/// <summary>Find the lang in the tag</summary>
/// <param name="sMatch">a string</param>
/// <returns>the value of the parameter corresponding to sGlobDefaultLong</returns>
function findLang( sMatch )
{
	var sRegExp, regExp, arr;
	
	// build regular expression
	sRegExp= sGlobLangTag + "\\s*=(\"[a-z]+\"|[a-z]+)";
	regExp = new RegExp( sRegExp, "im"); 

	arr = regExp.exec( sMatch );
	if (arr==null || arr.length < 2 )
		return null;
	else
	{	
		if (arr[1].charAt(0)=="\"")
			return arr[1].substring(1, arr[1].length-1);
		else
			return arr[1];
	}
}

/// <summary> Helper function to be used in String::Replace</summary>
/// <param name="sMatch">Full match ($0)</param>
/// <param name="sValue">text inside tags ($1)</param>
function replaceByCode( sMatch, sValue )
{
	var sLang, sTemplate, xp, languageNode;
	
	// get language
	sLang = findLang( sMatch );
	// if no language... do nothing
	if (sLang == null)
		return sMatch;
	// find language in language file if not found return text...
	xp="/highlight/languages/language[@id=\"" + sLang + "\"]";
	languageNode=xmlGlobDoc.documentElement.selectSingleNode( xp );
	if (languageNode == null)
		return sMatch;
	
	//highlight code
	sTemplate = sLang;
	
	return highlightCode( sLang, sTemplate, bGlobCodeInBox, sValue);
}
			
/// <summary>Processes HTML and highlight code in <pre>...</pre> and in <code>...</code></summary>
/// <param name="sValue">HTML code</param>
/// <param name="sOT">character opening tag: usually &lt;</param>
/// <param name="sTag">tag containing code</param>
/// <param name="sCT">character closing tag: usually &gt;</param>
/// <param name="bInBox">boolean: true if should be in box, false otherwize</param>
/// <returns>HTML with colored code</returns>
/// Available languages: C++ -> cpp, JSCript -> jscript, VBScript -> vbscript
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function processAndHighlightText( sValue, sOT, sTag, sCT, bInBox )
{
	var sRegExp, regExp;
	// <pre lang="cpp">using</pre>
	
	// setting global variables
	sGlobOT = sOT;
	sGlobCT = sCT;
	sGlobTag = sTag;
	bGlobCodeInBox=bInBox;
		
	// building regular expression
	sRegExp = sGlobOT 
		+"\\s*"
		+ sGlobTag
		+".*?"
		+sGlobCT
		+"((.|\\n)*?)"
		+sGlobOT
		+"\\s*/\\s*"
		+sGlobTag
		+"\\s*"
		+sGlobCT;

	regExp=new RegExp(sRegExp, "gim");

	// render pre
	return sValue.replace( regExp,  function( $0, $1 ){ return replaceByCode( $0, $1 );} );
};

//////////////////////////////////////////////////////////////////////////////////////
// Initialization
/// <summary>Load language files and preprocess them. Loads xsl file.</summary>
function initHighlighting()
{	
	// prepare tags
	sGlobOT = "<";
	sGlobCT = ">";
	sGlobTag = "pre";
	sGlobLangTag = "lang";	
	sGlobTemplate= "cpp";
	bGlobCodeInBox=true;

	xmlGlobDoc = loadAndBuildSyntax( languageSyntax.XMLDocument );
	xslGlobDoc = codeXSL.XMLDocument;
	
	return true;
}

// Global variables...
var sGlobOT, sGlobTag, sGlobCT, sGlobLangTag, sGlobDefaultLang, bGlobCodeInBox;
var xmlGlobDoc, xslGlobDoc;

// Initialize and preparse...
initHighlighting();
	</script>
<script language="javascript">
function highlightDemo()
{
	var sText
	
	sText = processAndHighlightText( document.formDemo.sourceCode.value, "<", "pre", ">", true);
	sText = processAndHighlightText( sText, "<", "code", ">", false);
	// update value
	parsedCode.innerHTML = sText;
}
function addDemo( demoID )
{
	var xp="examples/example[@id=\"" + demoID +"\"]";
	var demoNode = codeExamples.XMLDocument.selectSingleNode( xp );
	var sDemo = demoNode.nodeTypedValue;
	document.formDemo.sourceCode.value= sDemo;
	
	highlightDemo();
}
	</script>

<!-- Download Links -->
<ul class="download">
		<li>
			<a href='highlight/highlight_src.zip'>Download source and demo files - 18 Kb</a></li>
	</ul>

<!-- Article image -->
<p><img src="highlight/highlight.png" alt="Sample Image - highlight.png" height="236" width="487"></p>

<div style="background-color: #FFFFBB; color:#AA0000">
		New languages supported: JScript, VBScript, C, XML !
	</div>
        <h2>Introduction</h2>
        <p>Have you ever wondered how the CP team highlights the source code in their 
		edited article ? I suppose it's not by hand and they must have some clever code 
		to do it.</p>
        <p>However, if you look around in the forums on the web, you will see that 
		there are few if any who have this feature. Sad thing, because colored source 
		code is much easier to read. In fact, it would be great to have source code in 
		forums automatically colored with your favorite coloring scheme.</p>
<p>The last-but-not-least reason for writing this article was to learn regular 
		expressions, javascript and DOM in one project.</p>
        <p>The source code entirely written in JScript so it can be included 
		server-side or client-side in your web pages.</p>
        <p>The techniques used are:</p>
        <ul>
		<li>
		regular expressions
		<li>
		XML DOM
		<li>
		XSL transformation
		<LI>
			CSS style
		</LI>
	</ul>
        <P></P>
        <p>When reading this article, I will assume that you have little knowledge of 
		regular expressions, DOM and XSLT although I'm also a newbie in those 3 topics.</p>
<h2>Live Demo</h2>
<p>Before starting to explain the "colorizing" process, you can play with the demo 
		below (you need to enable JavaScript). Copy and paste any HTML text or use the 
		"Add ..." to generate built-in examples. The source code formated as follows</p>
<p>For boxed code, use <code>pre</code> tag:
	</p> 
		<pre>&lt;pre lang="..."&gt;
source code
&lt;pre&gt;</pre>
		where <code>...</code> describes the language: "c" -&gt; C, "cpp" -&gt; C++, 
		"jscript" -&gt; Javascript, "vbscript" -&gt; VBScript, "xml" -&gt; XML.<br>
		For inline code, use the <code>code</code> tag. 
<P></P>
<table style="BORDER-RIGHT: 1px solid; BORDER-TOP: 1px solid; BORDER-LEFT: 1px solid; BORDER-BOTTOM: 1px solid">
		<form id="formDemo" name="formDemo">
			<TBODY>
				<tr>
					<td>Enter a mixture of source code and HTML here.</td>
				</tr>
				<tr>
					<td><textarea cols="80" rows="10" name="sourceCode" id="sourceCode"></textarea></td>
				</tr>
				<tr>
					<td><input type="button" name="colBut" id="colBut" value="Colorize" onclick="highlightDemo()">
						<input type="button" name="addC" id="addC" value="Add C" onclick='addDemo("c")'>
						<input type="button" name="addCpp" id="addCpp" value="Add C++" onclick='addDemo("cpp")'>
						<input type="button" name="addJs" id="addJs" value="Add JScript" onclick='addDemo("jscript")'>
						<input type="button" name="addVb" id="addVb" value="Add VBScript" onclick='addDemo("vbscript")'>
						<input type="button" name="addXML" id="addXML" value="Add XML" onclick='addDemo("xml")'>
					</td>
				</tr>
				<tr>
					<td><div name="parsedCode" id="parsedCode" class="demo-div" style="FONT-SIZE:smaller"></div>
					</td>
				</tr>
		</form>
		</TBODY>
	</table>		
        <h2>Transformation overview</h2>
        <p align="center">
		<table ID="Table1">
			<tr>
				<td><IMG src="highlight/pipe.png"></td>
			</tr>
			<tr>
				<td>Parsing pipe</td>
			</tr>
		</table>
	</p>
        <p>All the boxes will be discussed in details in the next chapter. I will give 
		here an short overview of the process.</p>
        <p>First, a language syntax specification file is loaded (Language 
		specification box). This specification is a plain xml file given by the users. 
		In order to speed up things, preprocessing is made on this document 
		(Preprocessing box).</p>
        <p>Let us suppose for simplicity that we have the source code to colorize (Code 
		box). Note that I will show how to apply the coloring to a whole html page 
		later on. The parser, using the preprocessed syntax document, builds an XML 
		document representing the parsed code (Parsing box). The technique used by the 
		parser is to split up the code in a succession of nodes of different types: 
		keyword, comment, litteral, etc...</p>
        <p>At last, an XSTL transformation are applied to the parsed code document to 
		render it to HTML and a CSS style is given to match the desired appearance.</p>
        <h2>Parsing procedure</h2>
        <p>The philosophy used to build the parser is inspired from the Kate 
		documentation (see [1]).</p>
        <p>The code is considered as a succession of <b>contexts</b>. For example, in 
		C++,</p>
        <ul>
		<li>
		keyword: if, else, while, etc...
		<li>
		preprocessor instruction: #ifdef, ...
		<li>
		literals: "..."
		<li>
		line comment: // ...
		<li>
		block comment: /* ... */
		<li>
			and the rest.
		</li>
	</ul>
        <P></P>
        <p>For each <b>context</b>, we define <b>rules</b> that have 3 properties:</p>
        <ol>
		<li>
		a regular expression for matching a string
		<li>
			the context of the text matched by the rule: <b>attribute</b>
		<li>
			the context of the text following the rule: <b>context</b>
		</li>
	</ol>
        <P></P>
        <p>The rules have priority among them. For example, we will first look for a /* 
		... */ comment, then a // ... line comment, then litteral, etc...</p>
        <p>When a rule is matched using a regular expression, the string matched by the 
		rule is assigned with the <b>attribute</b> context, the current context is 
		updated as <b>context</b> and the parsing continues. The diagram show the 
		possible path between contexts. As one can see, some rule do not lead to a need 
		context.</p>
        <p align="center">
		<table ID="Table2">
			<tr>
				<td><img src="highlight/parsecontext.png" width="636" height="346">
				<td></td>
			<tr>
				<td>Context dynamics</td>
			</tr>
		</table>
	</p>
        <P>Let me explain a bit the schema below. Consider that we are in the <code>code</code>
		context. We are going to look for the first match of the code rules: <code>/**/, 
			//, "...", keyword</code>. Moreover, we have to take into account their 
		priorities: a keyword is not really a keyword in a block of comment, so it has 
		a lower priority. This task is easily and naturally done through regular 
		expressions.</P>
        <P>Once we find a match, we look for the rule that triggered that match (always 
		following the priority of the rules). Therefore, pathological like is well 
		parsed:
	</P>
        <pre lang="c++">// a keyword while in a comment</pre>
        while is not considered as a keyword since it is in a comment.
        <P></P>
        <h3>Rules available</h3>
        <p>There are 5 rules currently available:</p>
<OL>
		<li>
			<b>detect2chars</b>: detects a pattern made of 2 characters.
		<li>
			<b>detectchar:</b>
		detects a pattern made of 1 character.
		<li>
			<b>linecontinue:</b>
		detects end of line
		<li>
			<b>keyword:</b>detect a keyword out of a keyword family
		<li>
			<b>regexp:</b>matches a regular expression.
		</li>
	</OL>
<P>regexp is by far the most powerful rule of all as all other rules are represented 
		internally by regular expressions.</P>
        <P></P>
        <h2>Language Specification</h2>
        <p>From the rules and context above, we derive an XML structure as described in 
		the XSD schema below (<em>I don't really understand xsd but .Net generates this 
			nice diagram...</em>)</p>
        <table align="center" ID="Table3">
		<tr>
			<td align="middle"><A href="highlight/schema.png" target="_blank"><IMG src="highlight/tn_schema.jpg"></A></td>
		</tr>
		<tr>
			<td align="middle">Language specification schema. Click on the image to view it 
				full size.</td>
		</tr>
	</table>
        <p>I will breifly discuss the language specification file here. For more 
		details, look at the xsd schema or at <code>highlight.xml</code> specification 
		file (for C++). Basically, you must define families of keywords, choose context 
		and write the rule to pass from one to another.</p>
        <h3>Nodes</h3>
        <table ID="Table4">
		<thead>
                <tr>
				<td>Name</td>
				<td>Type</td>
				<td>Parent Node</td>
				<td>Description</td>
			</tr></TR>
            </thead>
		<tbody>
			<tr>
				<td>highlight</td>
				<td>root</td>
				<td>none</td>
				<td>
					<P>The root node</P>
				</td>
			</tr>
			<tr>
				<td>needs-build</td>
				<td>A (optional)</td>
				<td>highlight</td>
				<td>"yes" if file needs preprocessing</td>
			</tr>
			<tr>
				<td>save-build</td>
				<td>A (optional***)</td>
				<td>highlight</td>
				<td>"yes" if file has to be saved after preprocessing</td>
			</tr>
			<tr>
				<td>keywordlists</td>
				<td>E</td>
				<td>highlight</td>
				<td>Node containing families of keywords as children</td>
			</tr>
			<tr>
				<td>keywordlist</td>
				<td>E</td>
				<td>keywordlist</td>
				<td>A family of keywords</td>
			</tr>
			<tr>
				<td>id</td>
				<td>A</td>
				<td>keywordlist</td>
				<td>String identifier</td>
			</tr>
			<tr>
				<td>pre</td>
				<td>A (optional)</td>
				<td>keywordlist</td>
				<td>Regular to append before keyword</td>
			</tr>
			<tr>
				<td>post</td>
				<td>A (optional)</td>
				<td>keywordlist</td>
				<td>Regular to append at the end of the keyword</td>
			</tr>
			<tr>
				<td>regexp</td>
				<td>A (optional*)</td>
				<td>keywordlist</td>
				<td>Regular expression matching the keyword family. Build by the preprocessor</td>
			</tr>
			<tr>
				<td>kw</td>
				<td>E</td>
				<td>keywordlist</td>
				<td>Text or CDATA node containing the keywords</td>
			</tr>
			<tr>
				<td>languages</td>
				<td>E</td>
				<td>highlight</td>
				<td>Node containing languages as children</td>
			</tr>
			<tr>
				<td>language</td>
				<td>E</td>
				<td>languages</td>
				<td>A language specification</td>
			</tr>
			<tr>
				<td>contexts</td>
				<td>E</td>
				<td>language</td>
				<td>A collection of context node</td>
			</tr>
			<tr>
				<td>default</td>
				<td>A</td>
				<td>contexts</td>
				<td>String identifying the default context</td>
			</tr>
			<tr>
				<td>context</td>
				<td>E</td>
				<td>contexts</td>
				<td>A context node containing rules as children</td>
			</tr>
			<tr>
				<td>id</td>
				<td>A</td>
				<td>context</td>
				<td>String identifier</td>
			</tr>
			<tr>
				<td>attribute</td>
				<td>A</td>
				<td>context</td>
				<td>The name of the node in which the context will be stored.</td>
			</tr>
			<tr>
				<td>detect2chars**</td>
				<td>E</td>
				<td>context</td>
				<td>Rule to dectect pair of characters. (ex: <code>/*</code>)</td>
			</tr>
			<tr>
				<td>char</td>
				<td>A</td>
				<td>detect2chars</td>
				<td>First character of the pattern</td>
			</tr>
			<tr>
				<td>char1</td>
				<td>A</td>
				<td>detect2chars</td>
				<td>Second character of the pattern</td>
			</tr>
			<tr>
				<td>detectchar**</td>
				<td>E</td>
				<td>context</td>
				<td>Rule to dectect one character. (ex: <code>"</code>)</td>
			</tr>
			<tr>
				<td>char</td>
				<td>A</td>
				<td>detectchar</td>
				<td>character to match</td>
			</tr>
			<tr>
				<td>keyword**</td>
				<td>E</td>
				<td>context</td>
				<td>Rule to match a family of keywords</td>
			</tr>
			<tr>
				<td>family</td>
				<td>A</td>
				<td>keyword</td>
				<td>Family indentifier, must match <code>/highlight/keywordlists/keyword[@id]</code></td>
			</tr>
			<tr>
				<td>regexp</td>
				<td>E</td>
				<td>context</td>
				<td>A regular expression to match</td>
			</tr>
			<tr>
				<td>expression</td>
				<td>A</td>
				<td>regexp</td>
				<td>the regular expression.</td>
			</tr>
		</tbody>
	</table>Comments: 
<UL>
		<li>
		*: this argument is optional at the condition that preprocessing takes place. 
		The usual way to do is to always preprocess or to preprocess once with the 
		"save-build" parameter set to "yes" so that the preprocessing is save. Note 
		that if you modify the language syntax, you will have to re-preprocess.
		<li>
			**: all those element have two other attributes:
			<table>
				<tr>
					<td>attribute (optional)</td>
					<td>A</td>
					<td>a rule</td>
					<td>The name of the node in which the string match will be stored. If not set or 
						equal to "hidden", no node is created.</td>
				</tr>
				<tr>
					<td>context</td>
					<td>A</td>
					<td>a rule</td>
					<td>The next context.</td>
				</tr>
			</table>
		<li>
			***: Client-side javascript is not allowed to write files. Hence, this option 
			aplies only to server-side execution.</li></UL>
<H2>Preprocessing</H2>
        In the preprocessing phase, we are going to build the regular expressions that 
        will be used later on to match the <b>rules</b>. This section makes an extensive use of 
<b>regular expressions</b>. As mentionned before, this is not a tutorial on regular expressions since I'm also a newbie in
that topic.
A tool that I have found to be <b>really</b> useful is <b>Expresso</b> (see [3]) a regular expression test machine.
        <h3>keyword families</h3>
        Building the keyword families regular expressions is straightforward. You just 
        need to concatenate the keywords togetter using <b>|</b>:
        <pre lang="xml">&lt;keywordlist ...&gt;
    &lt;kw&gt;if&lt;/kw&gt;
    &lt;kw&gt;else&lt;/kw&gt;
&lt;/keywordlist&gt;</pre>
        will be matched by
        <pre lang="xml">\b(if|else)\b</pre>
        <p>The generated regular expression is added as an attribute to the <b>keywordlist</b>
		node:
	</p>
        <pre lang="xml">&lt;keywordlist <b>regexp="\b(if|else)\b"</b>&gt; 
    &lt;kw&gt;if&lt;/kw&gt; 
    &lt;kw&gt;else&lt;/kw&gt; 
&lt;/keywordlist&gt;</pre>
<p>When using libraries of function, it is usual to have a common function header, like 
		for OpenGL:</p>
<pre lang="c++"><b>gl</b>Vertex2f, <b>gl</b>PushMatrix(), etc...
</pre>
You can skip the hassle of rewritting <code>gl</code> in all the <code>kw</code> items by using the attribute <code>pre</code> which takes a regular expression as a parameter:
        <pre lang="xml">&lt;keywordlist <b>pre="gl"</b> ...&gt;
    &lt;kw&gt;Vertex2f&lt;/kw&gt;
    &lt;kw&gt;PushMatrix&lt;/kw&gt;
&lt;/keywordlist&gt;</pre>
        will be matched by
        <pre lang="xml">\bgl(Vertex2f|PushMatrix)\b</pre>
You can also add regular expression after the keyword using <code>post</code>. Still working on our OpenGL example, there are some methods that have characters at the end to tell the type of parameters:
<ul>
		<li>
			<code>glCoord2f</code>: takes 2 floats,
		<li>
			<code>glRaster3f</code>: takes 3 floats,
		<li>
			<code>glVertex4v</code>: takes an array of floats of size 4</li>
	</ul>
Using <code>post</code> and regular expression, we can match it easily:
        <pre lang="xml">&lt;keywordlist pre="gl" <b>post="[2-4]{1}(f|v){1}"</b> ...&gt;
    &lt;kw&gt;Vertex&lt;/kw&gt;
    &lt;kw&gt;Raster&lt;/kw&gt;
&lt;/keywordlist&gt;</pre>
        will be matched by
        <pre>\bgl(Vertex2f|PushMatrix)[2-4]{1}(f|v){1}\b</pre>
        <h3>String literals</h3>
        <P>This is a little exercise on regular expression: How to match a literal 
		string in C++? Remember that it must support <code>\"</code>, end of line with <code>
			\</code>.</P>
        <p></CDOE>My answer (remember I'm a newbie) is
	</p>
        <pre lang="xml">"(.|\\"|\\\r\n)*?((\\\\)+"|[^\\]{1}")</pre>
        I tested this expression on the following string:
        <pre lang="C++">"a simple string" 
---
"a less \" simple string" 
---
"a even less simple string \\" 
---
"a double line\
string"
---
"a double line string does not work without 
backslash"
---
"Mixing" string "can\"" become "tricky"
---
"Mixing  \" nasty" string is \" even worst" 
</pre>
        <H3>&nbsp;</H3>
<H3>Contexts</H3>
        <p>The context regular expression is also build by concatenating the regular 
		expression of the rules. The value is added as an attribute to the <b>context</b>
		node:</p>
        <pre>&lt;context <b>regexp="(...|...)"</b>&gt;</pre>
        <h3>Controlling if preprocessing is necessary</h3>
        It is possible to skip the preprocessing phase or to save the "preprocessed" 
        language specification file. This is done by specifying the following 
        parameters in the root node <b>highlight</b>
        <table ID="Table6">
		<thead>
			<TR>
				<td>Attribute</td>
				<td>Description</td>
				<td>Default</td>
			</TR>
		</thead>
		<tbody>
			<tr>
				<td>need-build</td>
				<td>"yes" if needs preprocessing</td>
				<td>yes</td>
			</tr>
			<tr>
				<td>save-build</td>
				<td>"yes" if saving preprocessed language specification to disk</td>
				<td>no</td>
			</tr>
		</tbody>
	</table>
        <h3>Javascript call</h3>
        <p>The preprocessing phase is done through the javascript method <code>loadAndBuildSyntax</code>:</p>        
        <pre lang="jscript">// language specification file
var sXMLSyntax = "highlight.xml";
// loading is done by loadXML
// preprocessing is done in loadAnd... It returns a DOMDocument
var xmlDoc = loadAndBuildSyntax( loadXML( sXMLSyntax ) );</pre>
        <h2>Parsing</h2>
        <p></p>
        We are going to use the language syntax above to build an XML tree out of the 
        source code. This tree will be made out of successive <b>context</b> nodes.
        <P></P>
        <p>We can start parsing the string (pseudo-code below):</p>
        <pre lang="jscript">source = source code;
context = code; // current context
regExp = context.regexp; // regular expresion of the current context
while( source.length &gt; 0)
{
</pre>
        Here we follow the procedure:
        <ol>
		<li>
			find first match of the <b>context</b>rules
		<li>
		store the source before the match
		<li>
		find the rule that was matched
		<li>
			process the rule parameters</li></ol>
        <pre>    match = regExp.execute( source );
    // check if the rules matched something
    if( !match)
    {
        // no match, creating node with the remaining source and finishing.
        addChildNode( context // name of the node,
            source // content of the node);
        break;
    }    
    else
    {
</pre>
        The source before the match has to be stored in a new node:
        <pre lang="jscript">        addChildNode( context, source before match);
</pre>
        We now have to find the rule that has matched. This is done by the method <code>
		findRule</code>
        that returns the rule node. The rule is then processed using <b>attribute</b> and
        <b>context</b> parameters.
        <pre lang="jscript">    
        // getting new node
        ruleNode = findRule( match );
        // testing if matching string has to be stored
        // if yes, adding
        if (ruleNode.attribute != "hidden")                
            addChildNode( attribute, match);
        
        // getting new context            
        context=ruleNode.context;
        // getting new relar expression            
        regExp=context.regexp;            
    }
}
</pre>
        <p>At the end of this method, we have build an XML tree containing the context. 
		For example, consider the classic of the classic "Hello world" program below:</p>
        <pre lang="c++">int main(int argc, char* argv[])
{
    // my first program
    cout&lt;&lt;"Hello world";
    return -1;
};
</pre>
        This sample is translated in the following xml structure:
        <pre lang="xml">&lt;parsedcode lang="cpp" in-box="-1"&gt;
  &lt;reservedkeyword&gt;int&lt;/reservedkeyword&gt;
  &lt;code&gt; main(&lt;/code&gt;
  &lt;reservedkeyword&gt;int&lt;/reservedkeyword&gt;
  &lt;code&gt; argc, &gt;&lt;/code&gt;
  &lt;reservedkeyword&gt;char&lt;/reservedkeyword&gt;
  &lt;code&gt; * argv[])
{
&lt;/code&gt;
...
</pre>
Here is the specification of the resulting XML file:
<table>
		<thead>
<tr>
				<td>Node Name</td>
				<td>Type</td>
				<td>Parent Node</td>
				<td>Description</td>
			</tr></TR>
            </thead>
		<tbody>
			<tr>
				<td>parsedcode</td>
				<td>root</td>
				<td>
				<td>Root node of document</td>
			</tr>
			<tr>
				<td>lang</td>
				<td>A</td>
				<td>parsedcode</td>
				<td>type of language: c, cpp, jscript, etc.</td>
			</tr>
			<tr>
				<td>in-box</td>
				<td>A</td>
				<td>parsedcode</td>
				<td>-1 if it should be enclosed in a pre tag, otherwize in code tag</td>
			</tr>
			<tr>
				<td>code</td>
				<td>E</td>
				<td>parsedcode</td>
				<td>non special source code</td>
			</tr>
			<tr>
				<td>and others...</td>
				<td>E</td>
				<td>parsedcode</td>
				<td></td>
			</tr>
		</tbody>
	</table>
        <h3>Javascript call</h3>
        <p>The algorithm above is implemented in the <code>applyRules</code> method:</p>
        <pre lang="jscript">applyRules( languageNode, contextNode, sCode, parsedCodeNode);
</pre>
        where
        <ul>
		<li>
			<code>languageNode</code> is the current language node (<code>XMLDOMNode</code>),
		<li>
			<code>contextNode</code> is the start context node (<code>XMLDOMNode</code>),
		<li>
			<code>sCode</code> is the source code (<code>String</code>),
		<li>
			<code>parsedCodeNode</code> is the parent node of the parsed code (<code>XMLDOMNode</code>)</li></ul>
        <h2>XSLT transformation</h2>
        <p>
		Once you have the XML representation of your code, you can basically do 
		whatever you want with it using XSLT transformations.
	</p>
<h3>Header</h3>
<P>Every XSL file starts with some declarations and other standard options:</P>
        <pre lang="xml">
<SPAN class=xml-bracket>&lt;?</SPAN><SPAN class=xml-tag>xml</SPAN> <SPAN class=xml-attribute-name>version</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="1.0" encoding="ISO-8859-1"</SPAN><SPAN class=xml-bracket>?&gt;</SPAN>
<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:stylesheet</SPAN> <SPAN class=xml-attribute-name>xmlns</SPAN><SPAN class=xml-attribute-name>:xsl</SPAN><SPAN class=xml-attribute-value>="http://www.w3.org/1999/XSL/Transform" version="1.0"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:output</SPAN> <SPAN class=xml-attribute-name>encoding</SPAN><SPAN class=xml-attribute-value>="ISO-8859-1"</SPAN> <SPAN class=xml-attribute-name>indent</SPAN><SPAN class=xml-attribute-value>="no"</SPAN> <SPAN class=xml-attribute-name>omit-xml-declaration</SPAN><SPAN class=xml-attribute-value>="yes"</SPAN><SPAN class=xml-bracket>/&gt;</SPAN></pre>
<p>Since source code indenting has to be conserved, we disable automatic indenting and, 
		also the xml declaration is omitted:</p>
<pre lang="xml">
<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:output</SPAN> <SPAN class=xml-attribute-name>encoding</SPAN><SPAN class=xml-attribute-value>="ISO-8859-1"</SPAN> <b><SPAN class=xml-attribute-name>indent</SPAN><SPAN class=xml-attribute-value>="no"</SPAN> <SPAN class=xml-attribute-name>omit-xml-declaration</SPAN><SPAN class=xml-attribute-value>="yes"</SPAN></b><SPAN class=xml-bracket>/&gt;</SPAN></pre>
<h3>Basic templates</h3>	
<pre lang="xml">
<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:template</SPAN> <SPAN class=xml-attribute-name>match</SPAN><SPAN class=xml-attribute-value>="cpp-linecomment"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>span</SPAN> <SPAN class=xml-attribute-name>class</SPAN><SPAN class=xml-attribute-value>="cpp-comment"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>//<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:value-of</SPAN> <SPAN class=xml-attribute-name>select</SPAN><SPAN class=xml-attribute-value>="text()"</SPAN>   <SPAN class=xml-attribute-name>disable-output-escaping</SPAN><SPAN class=xml-attribute-value>="yes"</SPAN> <SPAN class=xml-bracket>/&gt;</SPAN><SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>span</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>template</SPAN><SPAN class=xml-bracket>&gt;</SPAN></pre>
<P>This template appies to the node <code>cpp-linecomment</code> which corresponds to 
		single line comment in C++.<br />
		We apply the <b>CSS</b> style to this node by encapsulating it in <code>span</code>
		tags and by specifying the <b>CSS</b> class.<br />
		Moreovern, we do not want character escaping for that, so we use</P>
<pre lang="xml">
<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:value-of</SPAN> <SPAN class=xml-attribute-name>select</SPAN><SPAN class=xml-attribute-value>="text()"</SPAN>   <b><SPAN class=xml-attribute-name>disable-output-escaping</SPAN><SPAN class=xml-attribute-value>="yes"</SPAN></b> <SPAN class=xml-bracket>/&gt;</SPAN><SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>span</SPAN><SPAN class=xml-bracket>&gt;</SPAN></pre>
<h3>The <b>parsedcode</b> template</h3>
<p>It gets a little complicated here. As everybody knows, XSL quicly becomes really 
		complicated once you want to do more advanced stylesheets. Below is the 
		template for <code>parsedcode</code>, it does simple thing but looks ugly:<br />
		Checks if <code>in-box</code> parameter is true, if true create <code>pre</code>
		tags, otherwize create <code>code</code> tags.</p>
<pre lang="xml">
<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:template</SPAN> <SPAN class=xml-attribute-name>match</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="parsedcode"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
	<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:choose</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
		<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:when</SPAN> <SPAN class=xml-attribute-name>test</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="@in-box[.=0]"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
			<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:element</SPAN> <SPAN class=xml-attribute-name>name</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="span"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
				<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:attribute</SPAN> <SPAN class=xml-attribute-name>name</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="class"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>cpp-inline<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>attribute</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
				<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:attribute</SPAN> <SPAN class=xml-attribute-name>name</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="lang"</SPAN><SPAN class=xml-bracket>&gt;</SPAN><SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:value-of</SPAN> <SPAN class=xml-attribute-name>select</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="@lang"</SPAN><SPAN class=xml-bracket>/&gt;</SPAN><SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>attribute</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
				<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:apply-templates</SPAN><SPAN class=xml-bracket>/&gt;</SPAN>
			<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>element</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
		<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>when</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
		<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:otherwise</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
			<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:element</SPAN> <SPAN class=xml-attribute-name>name</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="pre"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
				<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:attribute</SPAN> <SPAN class=xml-attribute-name>name</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="class"</SPAN><SPAN class=xml-bracket>&gt;</SPAN>cpp-pre<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>attribute</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
				<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:attribute</SPAN> <SPAN class=xml-attribute-name>name</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="lang"</SPAN><SPAN class=xml-bracket>&gt;</SPAN><SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:value-of</SPAN> <SPAN class=xml-attribute-name>select</SPAN><SPAN class=xml-attribute-name></SPAN><SPAN class=xml-attribute-value>="@lang"</SPAN><SPAN class=xml-bracket>/&gt;</SPAN><SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>attribute</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
				<SPAN class=xml-bracket>&lt;</SPAN><SPAN class=xml-tag>xsl:apply-templates</SPAN><SPAN class=xml-bracket>/&gt;</SPAN>
			<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>element</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
		<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>otherwise</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
	<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>choose</SPAN><SPAN class=xml-bracket>&gt;</SPAN>
<SPAN class=xml-bracket>&lt;/</SPAN><SPAN class=xml-tag>xsl</SPAN>:<SPAN class=xml-tag>template</SPAN><SPAN class=xml-bracket>&gt;</SPAN></pre>
 			<h3>Javascript call</h3>
			<p>This is where you have to customize a bit the methods. The rendering is 
		done in the method <code>highlightCode</code>:
		<pre lang="jscript">highlightCode( sLang, sRootTag, bInBox, sCode)</pre>
		where</p>
			<ul>
		<li>
			<code>sLang</code>
		is a string identifying the language ( "cpp" for C++),
		<li>
			<code>sRootTag</code> will the node name encapsulation the code. For example, <code>
				pre</code> for boxed code, <code>code</code>
		for inline code,
		<li>
			<code>bInCode</code> a boolean set to true if <code>in-box</code> has to be set 
			to true.
		</li>
		<li>
			<code>sCode</code>
		is the source code
		<li>
			it returns the modified code
		</li>
	</ul>
			<p>The file names are <b>hardcoded</b> inside the <code>highlightCode</code>
		method: <code>hightlight.xml</code> for the language specification, <code>highlight.xsl</code>
		for the stylesheet. In the article, the XML syntax is embed in a <code>xml</code>
		tag and is simply accessed using the <code>id</code></p>
			<h2>Applying code transformation to an entire HTML page.</h2>
			<p>So now you are wondering how to apply this transformation to an entire 
		HTML page? Well surprisingly, this can be done in... <b>2</b> lines! In fact, 
		there exist the method <code lang="jscript">String::replace(regExp, replace)</code>
		that replaces the substring matching the regular expressions <code>regExp</code>
		with <code>replace</code>. The best part of the story is that <code>replace</code>
		can be a <b>function</b>... So we just (almost) need to pass <code>highlightCode</code>
		and we are done.</p>
			<p>For example, we want to match the code enclosed in <code>pre</code> tags:</p>
			<pre lang="jscript">// this is javascript
var regExp=/&lt;<b>pre</b>&gt;(.|\n)*?&lt;\/<b>pre</b>&gt;/gim;
// render xml
var sValue =  sValue.replace( regExp,  
        function( $0 ) 
        {
            return highlightCode("cpp", "cpp",$0.substring( 5, $0.length-6 ));
        } 
    );
</pre>
<p>In practice, some checking are made on the language name and all these computations 
		are hidden in the <code>replaceCode</code> method.</p>
			<h2>Using the methods in your web site</h2>
			<h3>ASP pages</h3>
			To use the highlightin scheme in your ASP web site:
			<ol>
		<li>
			Put the javascript code between script tags in an asp page:
			<pre lang="html">&lt;script language="javascript" runat="server"&gt;
...
&lt;/script&gt;
</pre>
		<li>
		include this page where you need it
		<li>
			modify the method <code>processAndHighlightCode</code>
		to suit your needs
		<li>
		modify the method handleException to redirect the exception to the Response
		<li>
		apply this method to the HTML code you want to modify
		<li>
			update your <code>css</code> style with the corresponding classes.
		</li>
	</ol>
		<h3>Demonstration application</h3>
		<p>The demonstration application is a hack of the <b>CodeProject Article Helper</b>. 
		Type in code in <code>pre</code> or <code>code</code> to see the results.</p>
		<h2>Update History</h2>
		<table>
		<tr>
			<th>
				Date</th><th>Description</th></tr>
		<tr valign="top">
			<td>02-20-2002</td>
			<td>
				<ul>
					<li>
						Added demonstration in the article!</li>
					<li>
					Added new languages: JScript, VBScript, C, XML<li>
					Now handling &lt;pre lang="..."&gt; bracketting: you can specify the language 
					of the code.
					<li>
						<code>loadAndBuildSyntax</code> takes a DomDocument as parameter. You can call 
						it like this: <code>loadAndBuildSyntax( loadXML( sFileName ))</code></li>
					<li>
						<code>highlightCode</code> takes one more argument: bInBox.</li>
				</ul>
			</td>
		</tr>
		<tr>
			<td>02-17-2002</td>
			<td>Minor changes in stylesheet</td>
		</tr>
		<tr valign="top">
			<td>02-14-2003</td>
			<td>
				<ul>
					<li>
						Added <code>pre</code>, <code>post</POST> to the keyword rule
							<li>
								The text disapearing in &lt;code&gt; brackets is fixed. The bug was in 
								processAndHighlightArcticle (bad function argument).</li>
				</ul>
				</CODE>
			</td>
		</tr>
		<tr>
			<td>02-13-2003</td>
			<td>Initial release.</td>
		</tr>
	</table>
		<h2>References</h2>
		<table id="Table8">
		<tr>
			<td>[1]</td>
			<td><a href="http://docs.kde.org/en/3.1/kdebase/kate/katehighlight-system.html">The 
					Kate Syntax Highlight System</a> documentation files.
			</td>
		</tr>
		<tr>
			<td>[2]</td>
			<td><a href="http://www.codeproject.com/jscript/cparticlewriterhelper.asp">The Code 
					Project Article Helper, <A href="/script/profile/whos_who.asp?id=36966">Jason 
						Henderson</A></a>
			</td>
		</tr>
		<tr>
			<td>[3]</td>
			<td><a href="http://www.codeproject.com/useritems/expresso.asp">Expresso - A Tool for 
					Building and Testing Regular Expressions</a>, <A href="/script/profile/whos_who.asp?id=115387">
					Hollenhorst</A>
			</td>
		</tr>
	</table></SPAN></SPAN></SPAN></SPAN>
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.
License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.
A list of licenses authors might use can be found here
Written By
Jonathan de Halleux
Engineer
United States
Jonathan de Halleux is Civil Engineer in Applied Mathematics. He finished his PhD in 2004 in the rainy country of Belgium. After 2 years in the Common Language Runtime (i.e. .net), he is now working at Microsoft Research on Pex (http://research.microsoft.com/pex).
Multiple Language Syntax Highlighting, Part 1: JScript

License

Comments and Discussions