Click here to Skip to main content
15,891,910 members
Articles / Programming Languages / C# 4.0

Entity mapping language implementation using bsn-goldparser with CodeDom

Rate me:
Please Sign up or sign in to vote.
5.00/5 (8 votes)
6 Apr 2012CPOL17 min read 28.4K   410   19  
Use the GOLD Parser to define a language for mapping between two business entities, create a parser using the bsn-goldparser engine, and generate an assembly using CodeDom.
// bsn GoldParser .NET Engine
// --------------------------
// 
// Copyright 2009, 2010 by Ars�ne von Wyss - avw@gmx.ch
// 
// Development has been supported by Sirius Technologies AG, Basel
// 
// Source:
// 
// https://bsn-goldparser.googlecode.com/hg/
// 
// License:
// 
// The library is distributed under the GNU Lesser General Public License:
// http://www.gnu.org/licenses/lgpl.html
// 
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
// 
// You should have received a copy of the GNU Lesser General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
// 
using System;
using System.IO;

using bsn.GoldParser.Grammar;

namespace bsn.GoldParser.Parser {
	///<summary>
	/// The tokenizer reads an input character stream and outputs the tokens read.
	///</summary>
	/// <remarks>
	/// A pull-model is used for the tokenizer.
	/// </remarks>
	public abstract class Tokenizer<T>: ITokenizer<T> where T: class, IToken {
		private enum ParseMode {
			SingleSymbol,
			MergeLexicalErrors,
			BlockComment
		}

		private readonly TextBuffer buffer; // Buffer to keep current characters.
		private readonly CompiledGrammar grammar;
		private bool mergeLexicalErrors;

		/// <summary>
		/// Initializes new instance of Parser class.
		/// </summary>
		/// <param name="textReader"><see cref="TextReader"/> instance to read data from.</param>
		/// <param name="grammar">The grammar used for the DFA states</param>
		protected Tokenizer(TextReader textReader, CompiledGrammar grammar) {
			this.grammar = grammar;
			if (textReader == null) {
				throw new ArgumentNullException("textReader");
			}
			if (grammar == null) {
				throw new ArgumentNullException("grammar");
			}
			buffer = new TextBuffer(textReader);
		}

		/// <summary>
		/// Gets the index of the input.
		/// </summary>
		/// <value>The index of the input.</value>
		public long InputIndex {
			get {
				return buffer.Position;
			}
		}

		/// <summary>
		/// Gets current char position in the current source line. It is 1-based.
		/// </summary>
		public int LineColumn {
			get {
				return buffer.Column;
			}
		}
		/// <summary>
		/// Gets current line number. It is 1-based.
		/// </summary>
		public int LineNumber {
			get {
				return buffer.Line;
			}
		}

		/// <summary>
		/// Gets or sets a value indicating whether lexical errors are merged (so that they contain more than one character).
		/// </summary>
		/// <value><c>true</c> if lexical errors are to be merged; otherwise, <c>false</c>.</value>
		public bool MergeLexicalErrors {
			get {
				return mergeLexicalErrors;
			}
			set {
				mergeLexicalErrors = value;
			}
		}

		/// <summary>
		/// Gets source of parsed data.
		/// </summary>
		public TextReader TextReader {
			get {
				return buffer.TextReader;
			}
		}

		protected abstract T CreateToken(Symbol tokenSymbol, LineInfo tokenPosition, string text);

		private ParseMessage NextSymbol(ParseMode mode, out Symbol tokenSymbol, ref int length) {
			DfaState state = grammar.DfaInitialState;
			char ch;
			tokenSymbol = null;
			int offset = length;
			while (buffer.TryLookahead(ref offset, out ch)) {
				state = state.GetTransition(ch);
				// This block-if statement checks whether an edge was found from the current state.
				// If so, the state and current position advance. Otherwise it is time to exit the main loop
				// and report the token found (if there was it fact one). If the LastAcceptState is -1,
				// then we never found a match and the Error Token is created. Otherwise, a new token
				// is created using the Symbol in the Accept State and all the characters that
				// comprise it.
				if (state == null) {
					// end has been reached
					if (tokenSymbol == null) {
						//Tokenizer cannot recognize symbol
						offset = ++length;
						if (mode == ParseMode.MergeLexicalErrors) {
							while (NextSymbol(ParseMode.SingleSymbol, out tokenSymbol, ref offset) == ParseMessage.LexicalError) {
								length = offset;
							}
						}
						tokenSymbol = grammar.ErrorSymbol;
					}
					break;
				}
				if ((mode == ParseMode.BlockComment) && (!grammar.BlockCommentStates.Contains(state))) {
					state = grammar.DfaInitialState;
				} else {
					// This code checks whether the target state accepts a token. If so, it sets the
					// appropiate variables so when the algorithm in done, it can return the proper
					// token and number of characters.
					if (state.AcceptSymbol != null) {
						tokenSymbol = state.AcceptSymbol;
						length = offset;
					}
				}
			}
			if (tokenSymbol == null) {
				if (offset == length) {
					tokenSymbol = grammar.EndSymbol;
				} else {
					tokenSymbol = grammar.ErrorSymbol;
					length++;
				}
			}
			switch (tokenSymbol.Kind) {
			case SymbolKind.CommentLine:
				while (buffer.TryLookahead(length, out ch) && (ch != '\r') && (ch != '\n')) {
					length++;
				}
				return ParseMessage.CommentLineRead;
			case SymbolKind.CommentStart:
				for (;;) {
					Symbol blockTokenSymbol;
					NextSymbol(ParseMode.BlockComment, out blockTokenSymbol, ref length);
					switch (blockTokenSymbol.Kind) {
					case SymbolKind.End:
						return ParseMessage.CommentError;
					case SymbolKind.CommentEnd:
						return ParseMessage.CommentBlockRead;
					}
				}
			case SymbolKind.Error:
				return ParseMessage.LexicalError;
			}
			return ParseMessage.TokenRead;
		}

		/// <summary>
		/// Gets the grammar.
		/// </summary>
		/// <value>The grammar.</value>
		public CompiledGrammar Grammar {
			get {
				return grammar;
			}
		}

		/// <summary>
		/// Reads next token from the input stream.
		/// </summary>
		/// <returns>Token symbol which was read.</returns>
		public virtual ParseMessage NextToken(out T token) {
			Symbol tokenSymbol;
			int offset = 0;
			ParseMessage result = NextSymbol(MergeLexicalErrors ? ParseMode.MergeLexicalErrors : ParseMode.SingleSymbol, out tokenSymbol, ref offset);
			LineInfo position;
			string text = buffer.Read(offset, out position);
			token = CreateToken(tokenSymbol, position, text);
			return result;
		}
	}

	/// <summary>
	/// A concrete tokenizer creating the normal <see cref="TextToken"/> as tokens.
	/// </summary>
	public class Tokenizer: Tokenizer<Token> {
		/// <summary>
		/// Initializes a new instance of the <see cref="Tokenizer"/> class.
		/// </summary>
		/// <param name="textReader"><see cref="TextReader"/> instance to read data from.</param>
		/// <param name="grammar">The grammar used for the DFA states</param>
		public Tokenizer(TextReader textReader, CompiledGrammar grammar): base(textReader, grammar) {}

		protected override Token CreateToken(Symbol tokenSymbol, LineInfo tokenPosition, string text) {
			return new TextToken(tokenSymbol, tokenPosition, text);
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Founder Software Force
Hong Kong Hong Kong
I am always interested in finding innovative ways for building better applications and founded a technology company since 2003. Welcome to exchange any idea with you and if I am not too busy before deadline of projects, I will reply your emails. Also, if you willing to pay for consulting works and customized software development, you can leave me message.

Comments and Discussions