Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

Writing Your Own RTF Converter

, 1 Aug 2013 CPOL
An article on how to write a custom RTF parser and converter.
rtfconverter_sourcecode.zip
RtfConverter_SourceCode
bin
Debug
Release
Itenso.Rtf.Converter.Html.dll
Itenso.Rtf.Converter.Xml.dll
Itenso.Rtf.Interpreter.dll
Itenso.Rtf.Parser.dll
Itenso.Sys.dll
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Rtf2Html.exe
Rtf2Raw.exe
Rtf2Xml.exe
RtfInterpreterTests.exe
RtfParserTests.exe
RtfWindows.exe
RtfWinForms.exe
Converter
Html
Properties
Xml
Properties
docu
Word2007RTFSpec9.pdf
ext
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Interpreter
Converter
Image
Text
Interpreter
Model
Properties
Support
InterpreterTests
Properties
RtfInterpreterTest
RtfInterpreterTest_0.rtf
RtfInterpreterTest_1.rtf
RtfInterpreterTest_10.rtf
RtfInterpreterTest_11.rtf
RtfInterpreterTest_12.rtf
RtfInterpreterTest_13.rtf
RtfInterpreterTest_14.rtf
RtfInterpreterTest_15.rtf
RtfInterpreterTest_16.rtf
RtfInterpreterTest_17.rtf
RtfInterpreterTest_18.rtf
RtfInterpreterTest_19.rtf
RtfInterpreterTest_2.rtf
RtfInterpreterTest_20.rtf
RtfInterpreterTest_21.rtf
RtfInterpreterTest_22.rtf
RtfInterpreterTest_23.rtf
RtfInterpreterTest_3.rtf
RtfInterpreterTest_4.rtf
RtfInterpreterTest_4.rtf.jpg
RtfInterpreterTest_4.rtf.jpg.hex
RtfInterpreterTest_5.rtf
RtfInterpreterTest_5.rtf.png
RtfInterpreterTest_5.rtf.png.hex
RtfInterpreterTest_6.rtf
RtfInterpreterTest_6.rtf.wmf
RtfInterpreterTest_6.rtf.wmf.hex
RtfInterpreterTest_7.rtf
RtfInterpreterTest_7.rtf.emf
RtfInterpreterTest_7.rtf.emf.hex
RtfInterpreterTest_8.rtf
RtfInterpreterTest_8.rtf.wmf
RtfInterpreterTest_8.rtf.wmf.hex
RtfInterpreterTest_9.rtf
RtfInterpreterTest_fail_0.rtf
RtfInterpreterTest_fail_1.rtf
RtfInterpreterTest_fail_2.rtf
RtfInterpreterTest_fail_3.rtf
RtfInterpreterTest_fail_4.rtf
keys
ItensoRtfConverter.snk
Parser
Model
Parser
Properties
Support
ParserTests
Properties
RtfParserTest
minimal.rtf
RtfParserTest_0.rtf
RtfParserTest_1.rtf
RtfParserTest_2.rtf
RtfParserTest_3.rtf
RtfParserTest_4.rtf
RtfParserTest_5.rtf
RtfParserTest_6.rtf
RtfParserTest_7.rtf
RtfParserTest_8.rtf
RtfParserTest_fail_0.rtf
RtfParserTest_fail_1.rtf
RtfParserTest_fail_2.rtf
RtfParserTest_fail_3.rtf
RtfParserTest_fail_4.rtf
RtfParserTest_fail_5.rtf
RtfParserTest_fail_6.rtf
Rtf2Html
Properties
Rtf2Raw
Properties
Rtf2Xml
Properties
RtfWindows
Properties
RtfWinForms
DefaultText.rtf
Properties
Sys
Application
Collection
Logging
Properties
Test
RtfConverter_v1.7.0.zip
Rtf2Xml
Properties
bin
Release
Itenso.Rtf.Converter.Html.dll
Itenso.Rtf.Converter.Xml.dll
Itenso.Rtf.Interpreter.dll
Itenso.Rtf.Parser.dll
Itenso.Sys.dll
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Rtf2Html.exe
Rtf2Raw.exe
Rtf2Xml.exe
RtfInterpreterTests.exe
RtfParserTests.exe
RtfWindows.exe
RtfWinForms.exe
Converter
Html
Properties
Xml
Properties
docu
Word2007RTFSpec9.pdf
ext
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Interpreter
Converter
Image
Text
Interpreter
Model
Properties
Support
InterpreterTests
Properties
RtfInterpreterTest
RtfInterpreterTest_0.rtf
RtfInterpreterTest_1.rtf
RtfInterpreterTest_10.rtf
RtfInterpreterTest_11.rtf
RtfInterpreterTest_12.rtf
RtfInterpreterTest_13.rtf
RtfInterpreterTest_14.rtf
RtfInterpreterTest_15.rtf
RtfInterpreterTest_16.rtf
RtfInterpreterTest_17.rtf
RtfInterpreterTest_18.rtf
RtfInterpreterTest_19.rtf
RtfInterpreterTest_2.rtf
RtfInterpreterTest_20.rtf
RtfInterpreterTest_21.rtf
RtfInterpreterTest_22.rtf
RtfInterpreterTest_23.rtf
RtfInterpreterTest_3.rtf
RtfInterpreterTest_4.rtf
RtfInterpreterTest_4.rtf.jpg
RtfInterpreterTest_4.rtf.jpg.hex
RtfInterpreterTest_5.rtf
RtfInterpreterTest_5.rtf.png
RtfInterpreterTest_5.rtf.png.hex
RtfInterpreterTest_6.rtf
RtfInterpreterTest_6.rtf.wmf
RtfInterpreterTest_6.rtf.wmf.hex
RtfInterpreterTest_7.rtf
RtfInterpreterTest_7.rtf.emf
RtfInterpreterTest_7.rtf.emf.hex
RtfInterpreterTest_8.rtf
RtfInterpreterTest_8.rtf.wmf
RtfInterpreterTest_8.rtf.wmf.hex
RtfInterpreterTest_9.rtf
RtfInterpreterTest_fail_0.rtf
RtfInterpreterTest_fail_1.rtf
RtfInterpreterTest_fail_2.rtf
RtfInterpreterTest_fail_3.rtf
RtfInterpreterTest_fail_4.rtf
keys
ItensoRtfConverter.snk
Parser
Model
Parser
Properties
Support
ParserTests
Properties
RtfParserTest
minimal.rtf
RtfParserTest_0.rtf
RtfParserTest_1.rtf
RtfParserTest_2.rtf
RtfParserTest_3.rtf
RtfParserTest_4.rtf
RtfParserTest_5.rtf
RtfParserTest_6.rtf
RtfParserTest_7.rtf
RtfParserTest_8.rtf
RtfParserTest_fail_0.rtf
RtfParserTest_fail_1.rtf
RtfParserTest_fail_2.rtf
RtfParserTest_fail_3.rtf
RtfParserTest_fail_4.rtf
RtfParserTest_fail_5.rtf
RtfParserTest_fail_6.rtf
Rtf2Html
Properties
Rtf2Raw
Properties
RtfWindows
Properties
RtfWinForms
DefaultText.rtf
Properties
Sys
Application
Collection
Logging
Properties
Test
Rtf2Html2010.suo
Rtf2Raw2010.suo
Rtf2Xml2010.suo
RtfInterpreter2010.suo
RtfParser2010.suo
RtfWindows2010.suo
RtfWinForms2010.suo
// -- FILE ------------------------------------------------------------------
// name       : Program.cs
// project    : RTF Framelet
// created    : Jani Giannoudis - 2008.05.30
// language   : c#
// environment: .NET 2.0
// copyright  : (c) 2004-2010 by Itenso GmbH, Switzerland
// --------------------------------------------------------------------------
using System;
using System.IO;
using System.Diagnostics;
using Itenso.Sys.Application;
using Itenso.Rtf;
using Itenso.Rtf.Support;
using Itenso.Rtf.Parser;
using Itenso.Rtf.Interpreter;
using Itenso.Rtf.Converter.Image;
using Itenso.Rtf.Converter.Html;

namespace Itenso.Solutions.Community.Rtf2Html
{

	// ------------------------------------------------------------------------
	enum ProgramExitCode
	{
		Successfully = 0,
		InvalidSettings = -1,
		ParseRtf = -2,
		DestinationDirectory = -3,
		InterpretRtf = -4,
		ConvertHtml = -5,
		SaveHtml = -6,
	} // enum ProgramExitCode

	// ------------------------------------------------------------------------
	class Program
	{

		// ----------------------------------------------------------------------
		public Program()
		{
			settings = new ProgramSettings();
		} // Program

		// ----------------------------------------------------------------------
		private static ProgramExitCode ExitCode
		{
			get { return (ProgramExitCode)Environment.ExitCode; }
			set { Environment.ExitCode = (int)value; }
		} // ExitCode

		// ----------------------------------------------------------------------
		public void Execute()
		{
			Console.WriteLine( string.Concat(
				ApplicationInfo.ShortCaption,
				", ",
				ApplicationInfo.Copyright ) );

			// program settings
			if ( ValidateProgramSettings() == false )
			{
				return;
			}

			// parse rtf
			IRtfGroup rtfStructure = ParseRtf();
			if ( ExitCode != ProgramExitCode.Successfully )
			{
				return;
			}

			// destination directory
			EnsureDestinationDirectory();
			if ( ExitCode != ProgramExitCode.Successfully )
			{
				return;
			}

			// image handling
			RtfVisualImageAdapter imageAdapter = new RtfVisualImageAdapter(
				settings.ImageFileNamePattern,
				settings.ImageFormat );

			// interpret rtf
			IRtfDocument rtfDocument = InterpretRtf( rtfStructure, imageAdapter );
			if ( ExitCode != ProgramExitCode.Successfully )
			{
				return;
			}

			// convert to hmtl
			string html = ConvertHmtl( rtfDocument, imageAdapter );
			if ( ExitCode != ProgramExitCode.Successfully )
			{
				return;
			}

			// save html
			string fileName = SaveHmtl( html );
			if ( ExitCode != ProgramExitCode.Successfully )
			{
				return;
			}

			// open html file
			OpenHtmlFile( fileName );
			if ( ExitCode != ProgramExitCode.Successfully )
			{
				return;
			}

			// display html text
			DisplayHtmlText( html );
			if ( ExitCode != ProgramExitCode.Successfully )
			{
				return;
			}

			Console.WriteLine( "successfully converted RTF to HTML in " + settings.DestinationDirectory );
		} // Execute

		// ----------------------------------------------------------------------
		private bool ValidateProgramSettings()
		{
			if ( settings.IsHelpMode )
			{
				ShowHelp();
				return false;
			}

			if ( !settings.IsValid )
			{
				ShowHelp();
				ExitCode = ProgramExitCode.InvalidSettings;
				return false;
			}

			return true;
		} // ValidateProgramSettings

		// ----------------------------------------------------------------------
		private IRtfGroup ParseRtf()
		{
			IRtfGroup rtfStructure;
			RtfParserListenerFileLogger parserLogger = null;
			try
			{
				// logger
				if ( settings.LogParser )
				{
					string logFileName = settings.BuildDestinationFileName(
						settings.LogDirectory,
						RtfParserListenerFileLogger.DefaultLogFileExtension );
					parserLogger = new RtfParserListenerFileLogger( logFileName );
				}

				// rtf parser
				// open readonly - in case of dominant locks...
				using ( FileStream stream = File.Open( settings.SourceFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite ) )
				{
					// parse the rtf structure
					RtfParserListenerStructureBuilder structureBuilder = new RtfParserListenerStructureBuilder();
					RtfParser parser = new RtfParser( structureBuilder );
					parser.IgnoreContentAfterRootGroup = true; // support WordPad documents
					if ( parserLogger != null )
					{
						parser.AddParserListener( parserLogger );
					}
					parser.Parse( new RtfSource( stream ) );
					rtfStructure = structureBuilder.StructureRoot;
				}
			}
			catch ( Exception e )
			{
				if ( parserLogger != null )
				{
					parserLogger.Dispose();
				}

				Console.WriteLine( "error while parsing rtf: " + e.Message );
				ExitCode = ProgramExitCode.ParseRtf;
				return null;
			}

			return rtfStructure;
		} // ParseRtf

		// ----------------------------------------------------------------------
		private void EnsureDestinationDirectory()
		{
			if ( !settings.HasDestinationOutput )
			{
				return;
			}

			try
			{
				if ( !Directory.Exists( settings.DestinationDirectory ) )
				{
					Directory.CreateDirectory( settings.DestinationDirectory );
				}
			}
			catch ( Exception e )
			{
				Console.WriteLine( "error while creating destination directory: " + e.Message );
				ExitCode = ProgramExitCode.DestinationDirectory;
				return;
			}
		} // EnsureDestinationDirectory

		// ----------------------------------------------------------------------
		private IRtfDocument InterpretRtf( IRtfGroup rtfStructure, IRtfVisualImageAdapter imageAdapter )
		{
			IRtfDocument rtfDocument;
			RtfInterpreterListenerFileLogger interpreterLogger = null;
			try
			{
				// logger
				if ( settings.LogInterpreter )
				{
					string logFileName = settings.BuildDestinationFileName(
						settings.LogDirectory,
						RtfInterpreterListenerFileLogger.DefaultLogFileExtension );
					interpreterLogger = new RtfInterpreterListenerFileLogger( logFileName );
				}

				// image converter
				RtfImageConverter imageConverter = null;
				if ( settings.SaveImage )
				{
					RtfImageConvertSettings imageConvertSettings = new RtfImageConvertSettings( imageAdapter );
					imageConvertSettings.ImagesPath = settings.DestinationDirectory;
					imageConvertSettings.BackgroundColor = settings.ImageBackgroundColor;
					if ( settings.ExtendedImageScale )
					{
						imageConvertSettings.ScaleExtension = 0.5f;
					}
					imageConverter = new RtfImageConverter( imageConvertSettings );
				}

				// rtf parser
				// interpret the rtf structure using the extractors
				rtfDocument = RtfInterpreterTool.BuildDoc( rtfStructure, interpreterLogger, imageConverter );

			}
			catch ( Exception e )
			{
				if ( interpreterLogger != null )
				{
					interpreterLogger.Dispose();
				}

				Console.WriteLine( "error while interpreting rtf: " + e.Message );
				ExitCode = ProgramExitCode.InterpretRtf;
				return null;
			}

			return rtfDocument;
		} // InterpretRtf

		// ----------------------------------------------------------------------
		private string ConvertHmtl( IRtfDocument rtfDocument, IRtfVisualImageAdapter imageAdapter )
		{
			string html;

			try
			{
				RtfHtmlConvertSettings htmlConvertSettings = new RtfHtmlConvertSettings( imageAdapter );
				if ( settings.CharacterSet != null )
				{
					htmlConvertSettings.CharacterSet = settings.CharacterSet;
				}
				htmlConvertSettings.Title = settings.SourceFileNameWithoutExtension;
				htmlConvertSettings.ImagesPath = settings.ImagesPath;
				htmlConvertSettings.IsShowHiddenText = settings.ShowHiddenText;
				htmlConvertSettings.UseNonBreakingSpaces = settings.UseNonBreakingSpaces;
				if ( settings.ConvertScope != RtfHtmlConvertScope.None )
				{
					htmlConvertSettings.ConvertScope = settings.ConvertScope;
				}
				if ( !string.IsNullOrEmpty( settings.StyleSheets ) )
				{
					string[] styleSheets = settings.StyleSheets.Split( ',' );
					htmlConvertSettings.StyleSheetLinks.AddRange( styleSheets );
				}
				htmlConvertSettings.ConvertVisualHyperlinks = settings.ConvertVisualHyperlinks;
				if ( !string.IsNullOrEmpty( settings.VisualHyperlinkPattern ) )
				{
					htmlConvertSettings.VisualHyperlinkPattern = settings.VisualHyperlinkPattern;
				}
				htmlConvertSettings.SpecialCharsRepresentation = settings.SpecialCharsRepresentation;

				RtfHtmlConverter htmlConverter = new RtfHtmlConverter( rtfDocument, htmlConvertSettings );
				html = htmlConverter.Convert();
			}
			catch ( Exception e )
			{
				Console.WriteLine( "error while converting to html: " + e.Message );
				ExitCode = ProgramExitCode.ConvertHtml;
				return null;
			}

			return html;
		} // ConvertHmtl

		// ----------------------------------------------------------------------
		private string SaveHmtl( string text )
		{
			if ( !settings.SaveHtml )
			{
				return null;
			}

			string fileName = settings.BuildDestinationFileName( null, RtfHtmlConverter.DefaultHtmlFileExtension );
			try
			{
				using ( TextWriter writer = new StreamWriter( fileName, false, settings.Encoding ) )
				{
					writer.Write( text );
				}
			}
			catch ( Exception e )
			{
				Console.WriteLine( "error while saving html: " + e.Message );
				ExitCode = ProgramExitCode.SaveHtml;
				return null;
			}

			return fileName;
		} // SaveHmtl

		// ----------------------------------------------------------------------
		private void OpenHtmlFile( string fileName )
		{
			if ( !settings.SaveHtml || !settings.OpenHtml )
			{
				return;
			}
			Process.Start( fileName );
		} // OpenHtmlFile

		// ----------------------------------------------------------------------
		private void DisplayHtmlText( string htmlText )
		{
			if ( !settings.DisplayHtml )
			{
				return;
			}
			Console.WriteLine( htmlText );
		} // DisplayHtmlText

		// ----------------------------------------------------------------------
		private static void ShowHelp()
		{
			Console.WriteLine();
			Console.WriteLine( "Convert RTF to HTML" );
			Console.WriteLine();
			Console.WriteLine( "Rtf2Html source-file [destination] [/CSS:names] [/ID:path] [/IT:format] [/BC:color] [/XS] [/CE:encoding] [/CS:charset]" );
			Console.WriteLine( "                     [CS:mappings] [/DS:scope] [/SH] [/SI] [/LD:path] [/LP] [/LI] [/D] [/O] [/HT] [/NBS] [/CH] [/HP:pattern] [/?]" );
			Console.WriteLine();
			Console.WriteLine( "   source-file             source rtf file" );
			Console.WriteLine( "   destination             destination directory (default=source-file directory)" );
			Console.WriteLine( "   /CSS:name1,name2        style sheet names (default=none)" );
			Console.WriteLine( "   /ID:path                images directory (default=destination directory)" );
			Console.WriteLine( "   /IT:format              images type format:" );
			Console.WriteLine( "                             jpg, gif or png (default=jpg)" );
			Console.WriteLine( "   /BC:color               image background color name (default=none)" );
			Console.WriteLine( "   /ID:path                images directory (default=destination directory)" );
			Console.WriteLine( "   /CE:encoding            character encoding:" );
			Console.WriteLine( "                             ASCII, UTF7, UTF8, Unicode, BigEndianUnicode, UTF32, OperatingSystem (default=UTF8)" );
			Console.WriteLine( "   /CS:charset             document character set used for the HTML header meta-tag 'content' (default=UTF-8)" );
			Console.WriteLine( "   /SC:mapping1,mapping2   special character mapping (default=none)" );
			Console.WriteLine( "                             mapping: special-character=replacement" );
			Console.WriteLine( "                             special characters: Tabulator, NonBreakingSpace, EmDash, EnDash, EmSpace, EnSpace, QmSpace" );
			Console.WriteLine( "                                Bullet, LeftSingleQuote, RightSingleQuote, LeftDoubleQuote, RightDoubleQuote, OptionalHyphen, NonBreakingHyphen" );
			Console.WriteLine( "   /DS:scope               document scope, comma separated list of document sections:" );
			Console.WriteLine( "                             doc, html, head, body, content, all (default=all)" );
			Console.WriteLine( "   /SH                     don't save HTML to the destination (default=on)" );
			Console.WriteLine( "   /SI                     don't save images to the destination (default=on)" );
			Console.WriteLine( "   /LD:path                log file directory (default=destination directory)" );
			Console.WriteLine( "   /LP                     write rtf parser log file (default=off)" );
			Console.WriteLine( "   /LI                     write rtf interpreter log file (default=off)" );
			Console.WriteLine( "   /D                      display HTML text on screen (default=off)" );
			Console.WriteLine( "   /O                      open HTML in associated application (default=off)" );
			Console.WriteLine( "   /HT                     show hidden text (default=off)" );
			Console.WriteLine( "   /NBS                    use non-breaking spaces (default=off)" );
			Console.WriteLine( "   /CH                     convert visual hyperlinks (default=off)" );
			Console.WriteLine( "   /HP:pattern             regular expression pattern to recognize visual hyperlinks, default:" );
			Console.WriteLine( "                             " + RtfHtmlConvertSettings.DefaultVisualHyperlinkPattern );
			Console.WriteLine( "   /?                      this help" );
			Console.WriteLine();
			Console.WriteLine( "Samples:" );
			Console.WriteLine( "  Rtf2Html MyText.rtf" );
			Console.WriteLine( "  Rtf2Html MyText.rtf /DS:body,content" );
			Console.WriteLine( "  Rtf2Html MyText.rtf c:\\temp" );
			Console.WriteLine( "  Rtf2Html MyText.rtf c:\\temp /CSS:MyCompany.css" );
			Console.WriteLine( "  Rtf2Html MyText.rtf c:\\temp /CSS:MyCompany.css,ThisProject.css" );
			Console.WriteLine( "  Rtf2Html MyText.rtf c:\\temp /CSS:MyCompany.css,ThisProject.css /ID:images /IT:png /BC:white" );
			Console.WriteLine( "  Rtf2Html MyText.rtf c:\\temp /CSS:MyCompany.css,ThisProject.css /ID:images /IT:png /BC:white /LD:log /LP /LI" );
			Console.WriteLine( "  Rtf2Html MyText.rtf c:\\temp /SC:Tabulator=>,Bullet=:" );
			Console.WriteLine();
		} // ShowHelp

		// ----------------------------------------------------------------------
		static void Main()
		{
			new Program().Execute();
		} // Main

		// ----------------------------------------------------------------------
		// members
		private readonly ProgramSettings settings;

	} // class Program

} // namespace Itenso.Solutions.Community.Rtf2Html
// -- EOF -------------------------------------------------------------------

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Jani Giannoudis
Software Developer (Senior)
Switzerland Switzerland
Jani is Co-founder of Meerazo.com, a free service which allows to share resources like locations, things, persons and their services in a cooperating group of people.

| Advertise | Privacy | Terms of Use | Mobile
Web04 | 2.8.1411023.1 | Last Updated 1 Aug 2013
Article Copyright 2008 by Jani Giannoudis
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid