Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version
Go to top

Writing Your Own RTF Converter

, 1 Aug 2013
An article on how to write a custom RTF parser and converter.
rtfconverter_sourcecode.zip
RtfConverter_SourceCode
bin
Debug
Release
Itenso.Rtf.Converter.Html.dll
Itenso.Rtf.Converter.Xml.dll
Itenso.Rtf.Interpreter.dll
Itenso.Rtf.Parser.dll
Itenso.Sys.dll
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Rtf2Html.exe
Rtf2Raw.exe
Rtf2Xml.exe
RtfInterpreterTests.exe
RtfParserTests.exe
RtfWindows.exe
RtfWinForms.exe
Converter
Html
Properties
Xml
Properties
docu
Word2007RTFSpec9.pdf
ext
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Interpreter
Converter
Image
Text
Interpreter
Model
Properties
Support
InterpreterTests
Properties
RtfInterpreterTest
RtfInterpreterTest_0.rtf
RtfInterpreterTest_1.rtf
RtfInterpreterTest_10.rtf
RtfInterpreterTest_11.rtf
RtfInterpreterTest_12.rtf
RtfInterpreterTest_13.rtf
RtfInterpreterTest_14.rtf
RtfInterpreterTest_15.rtf
RtfInterpreterTest_16.rtf
RtfInterpreterTest_17.rtf
RtfInterpreterTest_18.rtf
RtfInterpreterTest_19.rtf
RtfInterpreterTest_2.rtf
RtfInterpreterTest_20.rtf
RtfInterpreterTest_21.rtf
RtfInterpreterTest_22.rtf
RtfInterpreterTest_23.rtf
RtfInterpreterTest_3.rtf
RtfInterpreterTest_4.rtf
RtfInterpreterTest_4.rtf.jpg
RtfInterpreterTest_4.rtf.jpg.hex
RtfInterpreterTest_5.rtf
RtfInterpreterTest_5.rtf.png
RtfInterpreterTest_5.rtf.png.hex
RtfInterpreterTest_6.rtf
RtfInterpreterTest_6.rtf.wmf
RtfInterpreterTest_6.rtf.wmf.hex
RtfInterpreterTest_7.rtf
RtfInterpreterTest_7.rtf.emf
RtfInterpreterTest_7.rtf.emf.hex
RtfInterpreterTest_8.rtf
RtfInterpreterTest_8.rtf.wmf
RtfInterpreterTest_8.rtf.wmf.hex
RtfInterpreterTest_9.rtf
RtfInterpreterTest_fail_0.rtf
RtfInterpreterTest_fail_1.rtf
RtfInterpreterTest_fail_2.rtf
RtfInterpreterTest_fail_3.rtf
RtfInterpreterTest_fail_4.rtf
keys
ItensoRtfConverter.snk
Parser
Model
Parser
Properties
Support
ParserTests
Properties
RtfParserTest
minimal.rtf
RtfParserTest_0.rtf
RtfParserTest_1.rtf
RtfParserTest_2.rtf
RtfParserTest_3.rtf
RtfParserTest_4.rtf
RtfParserTest_5.rtf
RtfParserTest_6.rtf
RtfParserTest_7.rtf
RtfParserTest_8.rtf
RtfParserTest_fail_0.rtf
RtfParserTest_fail_1.rtf
RtfParserTest_fail_2.rtf
RtfParserTest_fail_3.rtf
RtfParserTest_fail_4.rtf
RtfParserTest_fail_5.rtf
RtfParserTest_fail_6.rtf
Rtf2Html
Properties
Rtf2Raw
Properties
Rtf2Xml
Properties
RtfWindows
Properties
RtfWinForms
DefaultText.rtf
Properties
Sys
Application
Collection
Logging
Properties
Test
RtfConverter_v1.7.0.zip
Rtf2Xml
Properties
bin
Release
Itenso.Rtf.Converter.Html.dll
Itenso.Rtf.Converter.Xml.dll
Itenso.Rtf.Interpreter.dll
Itenso.Rtf.Parser.dll
Itenso.Sys.dll
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Rtf2Html.exe
Rtf2Raw.exe
Rtf2Xml.exe
RtfInterpreterTests.exe
RtfParserTests.exe
RtfWindows.exe
RtfWinForms.exe
Converter
Html
Properties
Xml
Properties
docu
Word2007RTFSpec9.pdf
ext
log4net.dll
nunit-console-runner.dll
nunit.core.dll
nunit.framework.dll
nunit.util.dll
Interpreter
Converter
Image
Text
Interpreter
Model
Properties
Support
InterpreterTests
Properties
RtfInterpreterTest
RtfInterpreterTest_0.rtf
RtfInterpreterTest_1.rtf
RtfInterpreterTest_10.rtf
RtfInterpreterTest_11.rtf
RtfInterpreterTest_12.rtf
RtfInterpreterTest_13.rtf
RtfInterpreterTest_14.rtf
RtfInterpreterTest_15.rtf
RtfInterpreterTest_16.rtf
RtfInterpreterTest_17.rtf
RtfInterpreterTest_18.rtf
RtfInterpreterTest_19.rtf
RtfInterpreterTest_2.rtf
RtfInterpreterTest_20.rtf
RtfInterpreterTest_21.rtf
RtfInterpreterTest_22.rtf
RtfInterpreterTest_23.rtf
RtfInterpreterTest_3.rtf
RtfInterpreterTest_4.rtf
RtfInterpreterTest_4.rtf.jpg
RtfInterpreterTest_4.rtf.jpg.hex
RtfInterpreterTest_5.rtf
RtfInterpreterTest_5.rtf.png
RtfInterpreterTest_5.rtf.png.hex
RtfInterpreterTest_6.rtf
RtfInterpreterTest_6.rtf.wmf
RtfInterpreterTest_6.rtf.wmf.hex
RtfInterpreterTest_7.rtf
RtfInterpreterTest_7.rtf.emf
RtfInterpreterTest_7.rtf.emf.hex
RtfInterpreterTest_8.rtf
RtfInterpreterTest_8.rtf.wmf
RtfInterpreterTest_8.rtf.wmf.hex
RtfInterpreterTest_9.rtf
RtfInterpreterTest_fail_0.rtf
RtfInterpreterTest_fail_1.rtf
RtfInterpreterTest_fail_2.rtf
RtfInterpreterTest_fail_3.rtf
RtfInterpreterTest_fail_4.rtf
keys
ItensoRtfConverter.snk
Parser
Model
Parser
Properties
Support
ParserTests
Properties
RtfParserTest
minimal.rtf
RtfParserTest_0.rtf
RtfParserTest_1.rtf
RtfParserTest_2.rtf
RtfParserTest_3.rtf
RtfParserTest_4.rtf
RtfParserTest_5.rtf
RtfParserTest_6.rtf
RtfParserTest_7.rtf
RtfParserTest_8.rtf
RtfParserTest_fail_0.rtf
RtfParserTest_fail_1.rtf
RtfParserTest_fail_2.rtf
RtfParserTest_fail_3.rtf
RtfParserTest_fail_4.rtf
RtfParserTest_fail_5.rtf
RtfParserTest_fail_6.rtf
Rtf2Html
Properties
Rtf2Raw
Properties
RtfWindows
Properties
RtfWinForms
DefaultText.rtf
Properties
Sys
Application
Collection
Logging
Properties
Test
Rtf2Html2010.suo
Rtf2Raw2010.suo
Rtf2Xml2010.suo
RtfInterpreter2010.suo
RtfParser2010.suo
RtfWindows2010.suo
RtfWinForms2010.suo
// -- FILE ------------------------------------------------------------------
// name       : RtfSpec.cs
// project    : RTF Framelet
// created    : Leon Poyyayil - 2008.05.20
// language   : c#
// environment: .NET 2.0
// copyright  : (c) 2004-2010 by Itenso GmbH, Switzerland
// --------------------------------------------------------------------------
using System.Text;

namespace Itenso.Rtf
{

	// ------------------------------------------------------------------------
	public static class RtfSpec
	{

		// --- rtf general ----
		public const string TagRtf = "rtf";
		public const int RtfVersion1 = 1;

		public const string TagGenerator = "generator";
		public const string TagViewKind = "viewkind";

		// --- encoding ----
		public const string TagEncodingAnsi = "ansi";
		public const string TagEncodingMac = "mac";
		public const string TagEncodingPc = "pc";
		public const string TagEncodingPca = "pca";
		public const string TagEncodingAnsiCodePage = "ansicpg";
		public const int AnsiCodePage = 1252;
		public const int SymbolFakeCodePage = 42; // a windows legacy hack ...
		public static readonly Encoding AnsiEncoding = Encoding.GetEncoding( AnsiCodePage );

		public const string TagUnicodeSkipCount = "uc";
		public const string TagUnicodeCode = "u";
		public const string TagUnicodeAlternativeChoices = "upr";
		public const string TagUnicodeAlternativeUnicode = "ud";

		// --- font ----
		public const string TagFontTable = "fonttbl";
		public const string TagDefaultFont = "deff";
		public const string TagFont = "f";
		public const string TagFontKindNil = "fnil";
		public const string TagFontKindRoman = "froman";
		public const string TagFontKindSwiss = "fswiss";
		public const string TagFontKindModern = "fmodern";
		public const string TagFontKindScript = "fscript";
		public const string TagFontKindDecor = "fdecor";
		public const string TagFontKindTech = "ftech";
		public const string TagFontKindBidi = "fbidi";
		public const string TagFontCharset = "fcharset";
		public const string TagFontPitch = "fprq";
		public const string TagFontSize = "fs";
		public const string TagFontDown = "dn";
		public const string TagFontUp = "up";
		public const string TagFontSubscript = "sub";
		public const string TagFontSuperscript = "super";
		public const string TagFontNoSuperSub = "nosupersub";

		public const string TagThemeFontLoMajor = "flomajor"; // these are 'theme' fonts
		public const string TagThemeFontHiMajor = "fhimajor"; // used in new font tables
		public const string TagThemeFontDbMajor = "fdbmajor";
		public const string TagThemeFontBiMajor = "fbimajor";
		public const string TagThemeFontLoMinor = "flominor";
		public const string TagThemeFontHiMinor = "fhiminor";
		public const string TagThemeFontDbMinor = "fdbminor";
		public const string TagThemeFontBiMinor = "fbiminor";

		public const int DefaultFontSize = 24;

		public const string TagCodePage = "cpg";

		// --- color ----
		public const string TagColorTable = "colortbl";
		public const string TagColorRed = "red";
		public const string TagColorGreen = "green";
		public const string TagColorBlue = "blue";
		public const string TagColorForeground = "cf";
		public const string TagColorBackground = "cb";
		public const string TagColorBackgroundWord = "chcbpat";
		public const string TagColorHighlight = "highlight";

		// --- header/footer ----
		public const string TagHeader = "header";
		public const string TagHeaderFirst = "headerf";
		public const string TagHeaderLeft = "headerl";
		public const string TagHeaderRight = "headerr";
		public const string TagFooter = "footer";
		public const string TagFooterFirst = "footerf";
		public const string TagFooterLeft = "footerl";
		public const string TagFooterRight = "footerr";
		public const string TagFootnote = "footnote";

		// --- character ----
		public const string TagDelimiter = ";";
		public const string TagExtensionDestination = "*";
		public const string TagTilde = "~";
		public const string TagHyphen = "-";
		public const string TagUnderscore = "_";

		// --- special character ----
		public const string TagPage = "page";
		public const string TagSection = "sect";
		public const string TagParagraph = "par";
		public const string TagLine = "line";
		public const string TagTabulator = "tab";
		public const string TagEmDash = "emdash";
		public const string TagEnDash = "endash";
		public const string TagEmSpace = "emspace";
		public const string TagEnSpace = "enspace";
		public const string TagQmSpace = "qmspace";
		public const string TagBulltet = "bullet";
		public const string TagLeftSingleQuote = "lquote";
		public const string TagRightSingleQuote = "rquote";
		public const string TagLeftDoubleQuote = "ldblquote";
		public const string TagRightDoubleQuote = "rdblquote";

		// --- format ----
		public const string TagPlain = "plain";
		public const string TagParagraphDefaults = "pard";
		public const string TagSectionDefaults = "sectd";

		public const string TagBold = "b";
		public const string TagItalic = "i";
		public const string TagUnderLine = "ul";
		public const string TagUnderLineNone = "ulnone";
		public const string TagStrikeThrough = "strike";
		public const string TagHidden = "v";
		public const string TagAlignLeft = "ql";
		public const string TagAlignCenter = "qc";
		public const string TagAlignRight = "qr";
		public const string TagAlignJustify = "qj";

		public const string TagStyleSheet = "stylesheet";

		// --- info ----
		public const string TagInfo = "info";
		public const string TagInfoVersion = "version";
		public const string TagInfoRevision = "vern";
		public const string TagInfoNumberOfPages = "nofpages";
		public const string TagInfoNumberOfWords = "nofwords";
		public const string TagInfoNumberOfChars = "nofchars";
		public const string TagInfoId = "id";
		public const string TagInfoTitle = "title";
		public const string TagInfoSubject = "subject";
		public const string TagInfoAuthor = "author";
		public const string TagInfoManager = "manager";
		public const string TagInfoCompany = "company";
		public const string TagInfoOperator = "operator";
		public const string TagInfoCategory = "category";
		public const string TagInfoKeywords = "keywords";
		public const string TagInfoComment = "comment";
		public const string TagInfoDocumentComment = "doccomm";
		public const string TagInfoHyperLinkBase = "hlinkbase";
		public const string TagInfoCreationTime = "creatim";
		public const string TagInfoRevisionTime = "revtim";
		public const string TagInfoPrintTime = "printim";
		public const string TagInfoBackupTime = "buptim";
		public const string TagInfoYear = "yr";
		public const string TagInfoMonth = "mo";
		public const string TagInfoDay = "dy";
		public const string TagInfoHour = "hr";
		public const string TagInfoMinute = "min";
		public const string TagInfoSecond = "sec";
		public const string TagInfoEditingTimeMinutes = "edmins";

		// --- user properties ----
		public const string TagUserProperties = "userprops";
		public const string TagUserPropertyType = "proptype";
		public const string TagUserPropertyName = "propname";
		public const string TagUserPropertyValue = "staticval";
		public const string TagUserPropertyLink = "linkval";

		// this table is from the RTF specification 1.9.1, page 40
		public const int PropertyTypeInteger = 3;
		public const int PropertyTypeRealNumber = 5;
		public const int PropertyTypeDate = 64;
		public const int PropertyTypeBoolean = 11;
		public const int PropertyTypeText = 30;

		// --- picture ----
		public const string TagPicture = "pict";
		public const string TagPictureWrapper = "shppict";
		public const string TagPictureWrapperAlternative = "nonshppict";
		public const string TagPictureFormatEmf = "emfblip";
		public const string TagPictureFormatPng = "pngblip";
		public const string TagPictureFormatJpg = "jpegblip";
		public const string TagPictureFormatPict = "macpict";
		public const string TagPictureFormatOs2Metafile = "pmmetafile";
		public const string TagPictureFormatWmf = "wmetafile";
		public const string TagPictureFormatWinDib = "dibitmap";
		public const string TagPictureFormatWinBmp = "wbitmap";
		public const string TagPictureWidth = "picw";
		public const string TagPictureHeight = "pich";
		public const string TagPictureWidthGoal = "picwgoal";
		public const string TagPictureHeightGoal = "pichgoal";
		public const string TagPictureWidthScale = "picscalex";
		public const string TagPictureHeightScale = "picscaley";

		// --- bullets/numbering ----
		public const string TagParagraphNumberText = "pntext";
		public const string TagListNumberText = "listtext";

		// ----------------------------------------------------------------------
		public static int GetCodePage( int charSet )
		{
			switch ( charSet )
			{
				case 0:
					return 1252; // ANSI
				case 1:
					return 0; // Default
				case 2:
					return 42; // Symbol
				case 77:
					return 10000; // Mac Roman
				case 78:
					return 10001; // Mac Shift Jis
				case 79:
					return 10003; // Mac Hangul
				case 80:
					return 10008; // Mac GB2312
				case 81:
					return 10002; // Mac Big5
				case 82:
					return 0; // Mac Johab (old)
				case 83:
					return 10005; // Mac Hebrew
				case 84:
					return 10004; // Mac Arabic
				case 85:
					return 10006; // Mac Greek
				case 86:
					return 10081; // Mac Turkish
				case 87:
					return 10021; // Mac Thai
				case 88:
					return 10029; // Mac East Europe
				case 89:
					return 10007; // Mac Russian
				case 128:
					return 932; // Shift JIS
				case 129:
					return 949; // Hangul
				case 130:
					return 1361; // Johab
				case 134:
					return 936; // GB2312
				case 136:
					return 950; // Big5
				case 161:
					return 1253; // Greek
				case 162:
					return 1254; // Turkish
				case 163:
					return 1258; // Vietnamese
				case 177:
					return 1255; // Hebrew
				case 178:
					return 1256; // Arabic
				case 179:
					return 0; // Arabic Traditional (old)
				case 180:
					return 0; // Arabic user (old)
				case 181:
					return 0; // Hebrew user (old)
				case 186:
					return 1257; // Baltic
				case 204:
					return 1251; // Russian
				case 222:
					return 874; // Thai
				case 238:
					return 1250; // Eastern European
				case 254:
					return 437; // PC 437
				case 255:
					return 850; // OEM
			}

			return 0;
		} // GetCodePage

	} // class RtfSpec

} // namespace Itenso.Rtf
// -- EOF -------------------------------------------------------------------

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Jani Giannoudis
Software Developer (Senior)
Switzerland Switzerland
Jani is Co-founder of Meerazo.com, a free service which allows to share resources like locations, things, persons and their services in a cooperating group of people.

| Advertise | Privacy | Mobile
Web03 | 2.8.140916.1 | Last Updated 1 Aug 2013
Article Copyright 2008 by Jani Giannoudis
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid