Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Multiple Language Syntax Highlighting, Part 1: JScript

, 12 Mar 2003
Makes automaticaly highlighting source code in web page a reality (for C,C++,JScript, VBScript, XML)
highlight-demo.zip
highlight.png
parsecontext.png
pipe.png
schema.png
tn_schema.jpg
highlight.zip
highlight.xsl
highlight.xsx
highlight_src.zip
pipe.png
highlight.xsl
parsecontext.png
tn_schema.jpg
highlight.png
highlight.xsx
schema.png
/// <summary>HTML Syntax highlighting methods in JavaScript.</summary>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>

/// <summary>Handles exceptions</summary>
/// <param name="exception">a catched exception</param>
/// <code>
///	try
///	{	
///		// returns false if failed
///		if (!doSomething())
///			throw "Could not do anything";
///	}
///	catch (exception)
///	{	
///		handleException(exception);
///	}
/// </code>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function handleException( exception )
{
	if (typeof(exception) == "string")
		alert("Error: "+ exception);
	else if (exception.description == null )
		alert("Error: "+ exception.message );
	else
		alert("Error: "+ exception.description );
//	Response.Write("<b>Error in script: " + exception + "</b></br>");
}

/// <summary>Loads an xml file</summary>
/// <param name="sFileName">XML file name</param>
/// <returns>a DOMDocument object ( i.e. a ActiveXObject("Msxml2.DOMDocument") ) </returns>
/// <exception>If file not loaded successfully</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function loadXML( sFileName)
{
	var xmlDoc = new ActiveXObject("Msxml2.DOMDocument");
	xmlDoc.async="false";

	try
	{	
		// try loading xml file, throw exception if failed
		if (!xmlDoc.load( sFileName ))
			throw "Could not load xml file " + sFileName;
	}
	catch (exception)
	{	
		xmlDoc=null;
		handleException(exception);
	}
	
	return xmlDoc;
};

/// <summary>adds a CDATA child elem</summary>
/// <param name="node">node to append child</param>
/// <param name="nodeName">new child node name</param>
/// <param name="cdata">CDATA value</param>
/// <exception>If could not create child node</exception>
/// <exception>If could not create CDATA node</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function addChildCDATAElem( node, nodeName, cdata )
{
	var newNode = node.ownerDocument.createElement( nodeName);
	if (newNode == null)
		throw "Could not append node to " + node.nodeName;		
	node.appendChild( newNode );
	
	var newCDATANode = node.ownerDocument.createCDATASection( cdata );
	if (newCDATANode == null)
		throw "Could not append CDATA node to " + newNode.nodeName;
	newNode.appendChild( newCDATANode );
}

/// <summary>adds a text child elem</summary>
/// <param name="node">node to append child</param>
/// <param name="nodeName">new child node name</param>
/// <param name="text">text value</param>
/// <exception>If could not create child node</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function addChildElem( node, nodeName, text )
{
	var newNode = node.ownerDocument.createElement( nodeName);
	if (newNode == null)
		throw "Could not append node to " + node.nodeName;		
	newNode.text = text;
	node.appendChild( newNode );
}

/// <summary>Adds \ to regular expression character</summary>
/// <param name="char0">character to transform</param>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function stringToRegExp( char0 )
{
	var regExp = /(\-|\+|\*|\?|\(|\)|\[|\]|\\|\$|\^|\!)/g; 

	return char0.replace(regExp, "\\$1");
}

/// <summary>Builds keywords family regular expressions</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <remarks>This method create regular expression that match a whole keyword family and 
///	add it as a parameter "regexp" to the keywordlist node.</remarks>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildKeywordRegExp( languageNode )
{
	var keywordListList,keywordListNode;
	var sRegExp,preNode, postNode;
	var kwList, kwNode,rootNode;
	
	rootNode = languageNode.selectSingleNode("/*");

	// iterating keywords	
	keywordListList = rootNode.selectNodes("keywordlists/keywordlist");
	keywordListList.reset();
	for ( keywordListNode = keywordListList.nextNode(); keywordListNode != null; keywordListNode= keywordListList.nextNode() )
	{
		sRegExp="\\b";
		
		// adding pre...
		preNode = keywordListNode.attributes.getNamedItem("pre");
		if (preNode != null)
			sRegExp=sRegExp+preNode.nodeTypedValue;
		
		sRegExp=sRegExp+"(";
		
		// build regular expression...
		kwList = keywordListNode.selectNodes("kw");
		kwList.reset();
		// iterate kw elements
		for (kwNode = kwList.nextNode() ; kwNode != null; kwNode = kwList.nextNode() )
		{
			sRegExp=sRegExp +  stringToRegExp( kwNode.nodeTypedValue ) + "|"; 
		}
		
		// close string
		if (sRegExp.length > 1)
			sRegExp=sRegExp.substring(0,sRegExp.length-1);

		sRegExp=sRegExp+")";
		// adding pre...
		postNode = keywordListNode.attributes.getNamedItem("post");
		if (postNode != null)
			sRegExp=sRegExp+postNode.nodeTypedValue;
			
		sRegExp=sRegExp+"\\b";
		
		// add to keywordListNode
		keywordListNode.setAttribute( "regexp", sRegExp );
	}

}

/// <summary>Builds regular expression out of contextNode</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <param name="contextNode"><see also cref="XMLDOMNode"/> context node</para>
/// <remarks>This method create regular expression that match all the context rules
/// add it as a parameter "regexp" to the context node.</remarks>
/// <exception>If keyword family not corresponding to keyword attribute.</exception>
/// <exception>Regular expression rule missing regexp argument</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildRuleRegExp( languageNode, contextNode )
{
	var sRegExp, ruleNode, regExpExprNode, rootNode;
	var keywordListNode, keywordListNameNode, keywordListRegExpNode,xp;
	
	rootNode = languageNode.selectSingleNode("/*");
	sRegExp="(";

	var ruleList=contextNode.childNodes;
	// building regular expression	
	for (ruleNode=ruleList.nextNode(); ruleNode != null; ruleNode=ruleList.nextNode() )
	{
		if (ruleNode.nodeName == "#comment")
			continue;
			
		// apply rule...
		if (ruleNode.nodeName == "detect2chars")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			var char1=ruleNode.attributes.getNamedItem("char1").nodeTypedValue;
			sRegExp= sRegExp + stringToRegExp( char0 + char1 ) + "|";
		}
		else if (ruleNode.nodeName == "detectchar")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			sRegExp=sRegExp + stringToRegExp( char0 ) + "|";
		}
		else if (ruleNode.nodeName == "linecontinue")
		{
			sRegExp=sRegExp + "\n|"
		}
		else if (ruleNode.nodeName == "regexp" )
		{
			regExpExprNode = ruleNode.attributes.getNamedItem("expression");
			if ( regExpExprNode == null )
				throw "Regular expression rule missing expression attribute";
				
			sRegExp=sRegExp + regExpExprNode.nodeTypedValue + "|";
		}
		else if (ruleNode.nodeName == "keyword")
		{
			// finding keywordlist
			keywordListNameNode = ruleNode.attributes.getNamedItem("family");
			if (keywordListNameNode == null)
				throw "Keyword rule missing family";
			xp="keywordlists/keywordlist[@id=\""
					+ keywordListNameNode.nodeTypedValue 
					+ "\"]";
			keywordListNode = rootNode.selectSingleNode(xp);
			if (keywordListNode == null)
				throw "Could not find keywordlist (xp: "+ xp + ")";
				
			keywordListRegExpNode = keywordListNode.attributes.getNamedItem("regexp");
			if (keywordListRegExpNode == null)
				throw "Could not find keywordlist regular expression";
				
			// adding regexp
			sRegExp=sRegExp+keywordListRegExpNode.nodeTypedValue+"|";
		}
	}

	if (sRegExp.length > 1)
		sRegExp=sRegExp.substring(0,sRegExp.length-1)+")";
	else
		sRegExp="";
	
	return sRegExp;	
};

/// <summary>Precompiles regular expressions, search strings and prepares rules attribute</summary>
/// <param name="xmlDoc"><seealso DOMDocument/> highlight syntax document</param>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> context node</para>
/// <exception>If rule id not corresponding to a rule family</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildRules( languageNode )
{
	var contextList, contextNode, sRegExp, rootNode;	
	var rulePropList, rulePropNode, rulePropNodeAttributes, ruleList, ruleNode;

	rootNode = languageNode.selectSingleNode("/*");
	
	// first building keyword regexp
	buildKeywordRegExp( languageNode );	
	
	contextList = languageNode.selectNodes("contexts/context");
	// create regular expressions for context
	for (contextNode = contextList.nextNode(); contextNode != null; contextNode = contextList.nextNode())
	{
		sRegExp = buildRuleRegExp( languageNode, contextNode );
		// add attribute
		contextNode.setAttribute( "regexp", sRegExp );	
	}
}

/// <summary>Prepares syntax xml file</summary>
/// <param name="sXMLSyntax">xml Syntax file name</param>
/// <returns><seealso cref"DOMDocument"> language description </returns>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function loadAndBuildSyntax( sXMLSyntax )
{
	var xmlDoc, xmlModDoc, languageNode, languageNodeList;
	var needBuildNode, bNeedBuild;
	
	// get highlight file	
	xmlDoc = loadXML( sXMLSyntax );

	// check if build needed...
	bNeedBuild = true;
	needBuildNode = xmlDoc.documentElement.selectSingleNode("/highlight").attributes.getNamedItem("needs-build");
	if (needBuildNode == null  || needBuildNode.nodeTypedValue=="yes")
	{
		// iterate languages and prebuild
		languageNodeList = xmlDoc.documentElement.selectNodes("/highlight/languages/language");
		languageNodeList.reset();
		for(languageNode = languageNodeList.nextNode(); languageNode != null; languageNode = languageNodeList.nextNode())
		{
			/////////////////////////////////////////////////////////////////////////		
			// build regular expressions
			buildRules( languageNode );	
		}

		// updating...
		xmlDoc.documentElement.selectSingleNode("/highlight").setAttribute("needs-build","no");
	}
	
	// save file if asked
	saveBuildNode = xmlDoc.documentElement.selectSingleNode("/highlight").attributes.getNamedItem("save-build");
	if (saveBuildNode != null && saveBuildNode.nodeTypedValue == "yes")
		xmlDoc.save( sXMLSyntax );
		
	// closing file
	return xmlDoc;
}

/// <summary>Finds the rule that trigerred the match</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <param name="contextNode"><see also cref="XMLDOMNode"/> context node</para>
/// <param name="sMatch"><see also cref="String/> that matched the context regular expression</param>
/// <remarks>If the <seealso RegExp/> finds a rule occurence, this method is used to find which rule has been trigerred.</remarks>
/// <exception>Triggers if sMatch does not match any rule of contextNode</exception>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function findRule( languageNode, contextNode, sMatch )
{
	var regExpNode, regExp, sRegExp, arr, familyNode,xp;
	var ruleNode, regExpExprNode,rootNode;
	var ruleList=contextNode.childNodes;
	
	rootNode=languageNode.selectSingleNode("/*");

	// building regular expression	
	for (ruleNode=ruleList.nextNode(); ruleNode != null ; ruleNode = ruleList.nextNode() )
	{
		if (ruleNode.nodeName == "#comment")
			continue;
	
		if (ruleNode.nodeName == "detect2chars")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			var char1=ruleNode.attributes.getNamedItem("char1").nodeTypedValue;
			if ( sMatch == char0 + char1)			
				return ruleNode;
		}
		else if (ruleNode.nodeName == "detectchar")
		{
			var char0=ruleNode.attributes.getNamedItem("char").nodeTypedValue;
			if (char0 == sMatch)
				return ruleNode;
		}
		else if (ruleNode.nodeName == "linecontinue")
		{
			if ( "\n" == sMatch)
				return ruleNode;
		}
		else if (ruleNode.nodeName == "regexp")
		{
			regExpExprNode=ruleNode.attributes.getNamedItem("expression");
			if ( regExpExprNode == null )
				throw "Regular expression rule missing expression attribute";
			
			regExp = new RegExp( regExpExprNode.nodeTypedValue, "m" );
			arr = regExp.exec(sMatch);
			if ( arr != null )
				return ruleNode;
		}	
		else if (ruleNode.nodeName == "keyword")
		{
			familyNode = ruleNode.attributes.getNamedItem("family");
			if ( familyNode == null)
				throw "Could not find family attribute for keyword";
			xp="keywordlists/keywordlist[@id=\"" 
					+ familyNode.nodeTypedValue 
					+ "\"]/@regexp";
			regExpNode = rootNode.selectSingleNode( xp );
			if ( regExpNode == null)
				throw "Could not find regular expression for keyword family "+ ruleNode.attributes.getNamedItem("attribute").nodeTypedValue + "(xp: "+xp+")";

			// estimate regular expression	
			sRegExp="(" + regExpNode.nodeTypedValue + ")";
			regExp = new RegExp( sRegExp, "m" );
			arr=regExp.exec(sMatch);
			if ( arr != null )
				return ruleNode;
		}
	}
	return null;
}

/// <summary>Applies the context rules succesively to sString</summary>
/// <param name="languageNode"><see also cref="XMLDOMNode"/> language node</para>
/// <param name="contextNode"><see also cref="XMLDOMNode"/> context node</para>
/// <param name"sString">String to parse and convert</param>
/// <param name="parsedCodeNode"><seealso cref="XMLDOMNode">mother node for dumping parsed code</param>
/// <remarks>This methods uses the pre-computed regular expressions of context rules, rule matching, etc...
/// the result is outputted in the xmlResult document, starting at parsedCodeNode node.
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function applyRules( languageNode, contextNode, sString, parsedCodeNode)
{
	var regExp, arr,sRegExp;
	var ruleNode,newNode, newCDATANode;

	// building regExp 
	sRegExp=contextNode.attributes.getNamedItem("regexp").nodeTypedValue;
	var regExp = new RegExp( sRegExp, "m" );
	
	while (sString.length > 0)
	{
		// apply
		arr = regExp.exec( sString );
		if (arr == null)
		{
			addChildCDATAElem( parsedCodeNode,
							contextNode.attributes.getNamedItem("attribute").nodeTypedValue, 
							sString );
			
			// finished parsing
			regExp=null;
			return null;
		}
		else
		{
			// adding text
			addChildCDATAElem(parsedCodeNode, 
							contextNode.attributes.getNamedItem("attribute").nodeTypedValue,
							sString.substring(0, arr.index ) );
			
			// find rule...
			ruleNode = findRule( languageNode, contextNode, arr[0] );
			if (ruleNode == null)
				throw "Didn't matching rule, regular expression false ? ( context: " + contextNode.attributes.getNamedItem("id").nodeTypedValue;
			
			// check if rule nees to be added to result...
			attributeNode=ruleNode.attributes.getNamedItem("attribute");
			if (attributeNode != null && attributeNode.nodeTypedValue!="hidden" )
			{
				addChildCDATAElem(parsedCodeNode,
								ruleNode.attributes.getNamedItem("attribute").nodeTypedValue ,
								arr[0]);
			}
			
			// update context if necessary
			if ( contextNode.attributes.getNamedItem("id").nodeTypedValue != ruleNode.attributes.getNamedItem("context").nodeTypedValue )
			{
				// return new context 
				var xpContext = "contexts/context[@id=\"" 
								+ ruleNode.attributes.getNamedItem("context").nodeTypedValue
								+ "\"]";
				contextNode = languageNode.selectSingleNode( xpContext);
				if (contextNode == null)
					throw "Didn't matching context, error in xml specification ?";
					
				// build new regular expression
				sRegExp=contextNode.attributes.getNamedItem("regexp").nodeTypedValue;
				regExp = new RegExp( sRegExp, "m" );
			}
			sString = sString.substring(arr.index+arr[0].length, sString.length);			
		}
	}
	regExp = null;
}


/// <summary>Create and populate an xml document with the corresponging language</summary>
/// <param name="xmlDoc"><seealso DOMDocument/> highlight syntax document</param>
/// <param name="sLang">language string description. For C++, use cpp.</param> 
/// <param name="sRootTag">Root tag (under parsed code) for the generated xml tree.</param> 
/// <param name="sCode">Code to parse</param>
/// <returns><seealso cref="DOMDocument"> document containing parsed node.</returns>
/// <remarks>This method builds an XML tree containing context node. Use an xsl file to render it.</remarks>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function buildHighlightTree( xmlDoc, sLang, sRootTag, sCode )
{
	var languageAttribute,languageNode,xp;
	var resultMainNode, parsedCodeNode;
	var sHighlightedCode, sDefault;

	try
	{			
		/////////////////////////////////////////////////////////////////////////		
		// getting language
		xp="/highlight/languages/language[@id=\"" + sLang + "\"]";
		languageNode=xmlDoc.documentElement.selectSingleNode( xp );
		if (languageNode == null)
			throw "Could not find " + sLang + "language (xpath: " + xp + ")";
	
		/////////////////////////////////////////////////////////////////////////		
		// getting context
		contextsNode=languageNode.selectSingleNode( "contexts" );
		if (contextsNode == null)
			throw "Could not find contexts node for " + sLang + "language";

		/////////////////////////////////////////////////////////////////////////		
		// getting default context	
		sDefault=contextsNode.attributes.getNamedItem("default").nodeTypedValue;
		xp="context[@id=\"" +  sDefault + "\"]";
		contextNode=contextsNode.selectSingleNode( xp );
		if (contextNode == null)
			throw "Could not find default context for " + sLang + "language (xpath: " + xp + ")";
	
		// create result xml
		xmlResult = new ActiveXObject("Msxml2.DOMDocument");

		///////////////////////////////////////////////////////////////////////////	
		// creating main node
		resultMainNode=xmlResult.createElement( "parsedcode" );
		if (resultMainNode == null)
			throw "Could not create main node parsedcode";
		xmlResult.appendChild(resultMainNode);
					
		///////////////////////////////////////////////////////////////////////////	
		// creating language node
		parsedCodeNode=xmlResult.createElement( sRootTag );
		if (parsedCodeNode == null)
			throw "Could not create node " + sRootTag;
		resultMainNode.appendChild(parsedCodeNode);

		///////////////////////////////////////////////////////////////////////////	
		// parse and populate xmlResult
		applyRules( languageNode, contextNode, sCode, parsedCodeNode);

		return xmlResult;
	}
	catch(exception)
	{
		handleException (exception);
		xmlResult=null;
		xslDoc = null;
		return null;
	}
}

/// <summary>Apply syntax matching to sCode with the corresponding language sLang</summay>
/// <param name="sLang">language string description. For C++, use cpp.</param> 
/// <param name="sRootTag">Root tag (under parsed code) for the generated xml tree.</param> 
/// <param name="sCode">Code to parse</param>
/// <returns>the highlighted code.</returns>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function highlightCode( sLang, sRootTag, sCode)
{
	var xmlDoc, xmlResult, xslDocName, xslDoc, sResult;
	var sXMLLang, sXSLFile, sResult;

	try
	{		
		// computing file names
		sXMLLang = "highlight.xml";
		sXSLStyle = "highlight.xsl";

		xmlDoc = loadAndBuildSyntax( sXMLLang );

		// re-build highlight tree	
		xmlResult = buildHighlightTree( xmlDoc, sLang, sRootTag, sCode );

		// load xsl..
		xslDoc = loadXML( sXSLStyle );
	 
		// render xml
		sResult=xmlResult.transformNode( xslDoc );	
		
		return sResult;
	}
	catch(exception)
	{
		handleException (exception);
		xmlResult=null;
		xslDoc = null;
		return null;
	}
};

/// <summary>Processes HTML and highlight code in <pre>...</pre> and in <code>...</code></summary>
/// <param name="sValue">HTML code</param>
/// <returns>HTML with colored code</returns>
/// <remarks>Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003</remarks>
function processAndHighlightArticle( sValue )
{
	var sTag, sRegExp, regExp, sHighlightedCode;

	// retreive code to render
	sTag="cpp";

	var regExp=/<pre>(.|\n)*?<\/pre>/gim;

	// render xml
	var sValue =  sValue.replace( regExp,  
		function( $0 ) 
		{
			// strip out tags
			return highlightCode("cpp", "cpp",$0.substring( 5, $0.length-6 ));
		} 
	);

	///////////////////////////////////////////////////////////////////////:
	// inline code
	regExp=/<code>(.|\n)*?<\/code>/gim;

	// render xml
	var sHighlightedCode =  sValue.replace( regExp,  
		function( $0 ) 
		{
			// strip out tags
			return highlightCode("cpp", "icpp", $0.substring( 6, $0.length-7 ));
		} 
		);
	
	return sHighlightedCode;
};

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

Jonathan de Halleux
Engineer
United States United States
Jonathan de Halleux is Civil Engineer in Applied Mathematics. He finished his PhD in 2004 in the rainy country of Belgium. After 2 years in the Common Language Runtime (i.e. .net), he is now working at Microsoft Research on Pex (http://research.microsoft.com/pex).

| Advertise | Privacy | Terms of Use | Mobile
Web04 | 2.8.141216.1 | Last Updated 13 Mar 2003
Article Copyright 2003 by Jonathan de Halleux
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid