Click here to Skip to main content
15,895,606 members
Articles / Programming Languages / C#

NRTFTree - A class library for RTF processing in C#

Rate me:
Please Sign up or sign in to vote.
4.50/5 (42 votes)
7 Sep 2007LGPL33 min read 479.7K   22.4K   161  
Class library to manage RTF files.
/*****************************************************************************
* Clase:		RtfLex
* Autor:		Sgoliver
* Fecha:		13/03/2005
* Descripci�n:	Analizador l�xico b�sico para el formato RTF.
* ***************************************************************************/
using System;
using System.IO;

namespace Net.Sgoliver.NRtfTree
{
	/// <summary>
	/// Analizador l�xico (tokenizador) para documentos en formato RTF. Analiza el documento y devuelve de 
	/// forma secuencial todos los elementos RTF leidos (tokens).
	/// </summary>
	public class RtfLex
	{
		#region Atributos privados

		/// <summary>
		/// Fichero abierto.
		/// </summary>
		private TextReader rtf;

		#endregion

		#region Constantes

		/// <summary>
		/// Marca de fin de fichero.
		/// </summary>
		private const int EOF	= -1;

		#endregion

		#region Constructores

		/// <summary>
		/// Constructor de la clase RtfLex
		/// </summary>
		/// <param name="rtffile">Stream del fichero a analizar</param>
		public RtfLex(StreamReader rtffile)
		{
			rtf = rtffile;
		}

		/// <summary>
		/// Constructor de la clase RtfLex
		/// </summary>
		/// <param name="rtftext">Cadena de texto que contiene el documento RTF</param>
		public RtfLex(StringReader rtftext)
		{
			rtf = rtftext;
		}

		#endregion

		#region M�todos P�blicos

		/// <summary>
		/// Lee un nuevo token del documento RTF.
		/// </summary>
		/// <returns>Siguiente token leido del documento.</returns>
		public RtfToken nextToken()
		{
			//Caracter leido del documento
			int c;

			//Se crea el nuevo token a devolver
			RtfToken token = new RtfToken();

			//Se lee el siguiente caracter del documento
			c = rtf.Read();

			//Se ignoran los retornos de carro, tabuladores y caracteres nulos
			while(c == '\r' || c == '\n' || c == '\t' || c == '\0')
				c = rtf.Read();

			//Se trata el caracter leido
			if( c != EOF )
			{
				switch(c)
				{
					case '{':
						token.type = TOKEN_TYPE.GROUP_START;
						break;
					case '}':
						token.type = TOKEN_TYPE.GROUP_END;
						break;
					case '\\':
						parseKeyword(token);
						break;
					default:
						token.type = TOKEN_TYPE.TEXT;
						parseText(c,token);
						break;
				}
			}
			else
			{
				//Fin de fichero
				token.type = TOKEN_TYPE.EOF;
			}

			return token;
		}

		
		#endregion

		#region M�todos Privados

		/// <summary>
		/// Lee una palabra clave del documento RTF.
		/// </summary>
		/// <param name="token">Token RTF al que se asignar� la palabra clave.</param>
		private void parseKeyword(RtfToken token)
		{
			string palabraClave = "";

			string parametroStr = "";
			int parametroInt = 0;

			int c;
			bool negativo = false;

			c = rtf.Peek();

			//Si el caracter leido no es una letra --> Se trata de un s�mbolo de control
			if(!isLetter(c))		
			{
				rtf.Read();
				token.type = TOKEN_TYPE.CONTROL;
				token.key = ((char)c).ToString();

				//Si se trata de un caracter especial (codigo de 8 bits) se lee el par�metro hexadecimal
				if(token.key == "\'")
				{
					string cod = "";
					
					cod += (char)rtf.Read();
					cod += (char)rtf.Read();
					
					token.hasParam = true;

					token.param = Convert.ToInt32(cod,16);
				}

				//TODO: �Hay m�s s�mbolos de control son par�metros?

				return;
			}

			//Se lee la palabra clave completa (hasta encontrar un caracter no alfanum�rico, por ejemplo '\' � ' '
			c = rtf.Peek();
			while( isLetter(c) )
			{
				rtf.Read();
				palabraClave += (char)c;

				c = rtf.Peek();
			}

			//Se asigna la palabra clave leida
			token.type = TOKEN_TYPE.KEYWORD;
			token.key = palabraClave;

			//Se comprueba si la palabra clave tiene par�metro
			if( isDigit(c) || c == '-' )	
			{
				token.hasParam = true;

				//Se comprubea si el par�metro es negativo
				if(c == '-')
				{
					negativo = true;

					rtf.Read();
				}

				//Se lee el par�metro completo
				c = rtf.Peek();
				while( isDigit(c) )
				{
					rtf.Read();
					parametroStr += (char)c;

					c = rtf.Peek();
				}

				parametroInt = Convert.ToInt32(parametroStr);

				if(negativo)
					parametroInt = -parametroInt;

				//Se asigna el par�metro de la palabra clave
				token.param = parametroInt;
			}

			if( c == ' ' )
			{
				rtf.Read();
			}
		}

		/// <summary>
		/// Lee una cadena de texto del documento RTF.
		/// </summary>
		/// <param name="car">Primer caracter de la cadena.</param>
		/// <param name="token">Token RTF al que se asignar� la palabra clave.</param>
		private void parseText(int car, RtfToken token)
		{
			int c = car;

			string texto = ((char)c).ToString();

			c = rtf.Peek();
			while( c != '\\' && c != '}' && c != '{' && c != EOF)
			{
				rtf.Read();

				texto += (char)c;

				c = rtf.Peek();
			}

			token.key = texto;
		}

		/// <summary>
		/// Indica si un caracter est� o no en min�scula
		/// </summary>
		/// <param name="c">C�digo ASCII del caracter</param>
		/// <returns>True si el caracter est� en min�scula</returns>
		private bool isLower(int c)
		{
			return (c >= 97 && c <= 122);
		}

		/// <summary>
		/// Indica si un caracter est� o no en may�scula.
		/// </summary>
		/// <param name="c">C�digo ASCII del caracter.</param>
		/// <returns>True si el caracter est� en may�scula.</returns>
		private bool isUpper(int c)
		{
			return (c >= 65 && c <= 90);
		}

		/// <summary>
		/// Indica si un caracter es alfab�tico.
		/// </summary>
		/// <param name="c">C�digo ASCII del caracter.</param>
		/// <returns>True si el caracter es alfab�tico.</returns>
		private bool isLetter(int c)
		{
			return (isLower(c) || isUpper(c));
		}

		/// <summary>
		/// Indica si un caracter es num�rico.
		/// </summary>
		/// <param name="c">C�digo ASCII del caracter.</param>
		/// <returns>True si el caracter es num�rico.</returns>
		private bool isDigit(int c)
		{
			return (c >= 48 && c <= 57);
		}

		/// <summary>
		/// Traduce un c�digo Ansi al caracter correspondiente.
		/// </summary>
		/// <param name="cod">C�digo del caracter.</param>
		/// <returns>Caracter correspondiente al c�digo Ansi indicado.</returns>
		private string translateAnsiCode(int cod)
		{
			string res = "";

			switch(cod)
			{
				case 193:
					res = "�";
					break;
				case 201:
					res = "�";
					break;
				case 205:
					res = "�";
					break;
				case 211:
					res = "�";
					break;
				case 218:
					res = "�";
					break;
				case 225:
					res = "�";
					break;
				case 233:
					res = "�";
					break;
				case 237:
					res = "�";
					break;
				case 243:
					res = "�";
					break;
				case 250:
					res = "�";
					break;
				case 241:
					res = "�";
					break;
				case 209:
					res = "�";
					break;
			}

			return res;
		}

		
		#endregion
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU Lesser General Public License (LGPLv3)


Written By
Web Developer
Spain Spain
Currently, i work for a great consulting company as a software developer.

My homepage is:
http://www.sgoliver.net

Comments and Discussions