Click here to Skip to main content
15,884,353 members
Articles / Programming Languages / C#

Wrapper Class for Parsing Fixed-Width, Multiple Section Files

Rate me:
Please Sign up or sign in to vote.
4.07/5 (4 votes)
21 Apr 2006CPOL8 min read 51.5K   1.1K   33  
An article describing a wrapper class to import very large multiple section reports, typically from a legacy system, into the modern SQL Server or other RDBMS.
using System;
using System.IO;
using System.Text;
using System.Collections;
using System.Data;

namespace ReportParser
{
	public class FileParser
	{
		/// <summary>
		/// Raised when an error occurs on a record
		/// </summary>
    	private FileSchema m_FileSchema = null;
		private string m_FileName = "";
		private string m_SchemaFile = "";
		private int m_CurrentLineNumber = 0;

		public FileParser(string fileName)
		{
			m_FileName = fileName;
			m_FileSchema = new FileSchema();
		}

		public FileParser(string fileName, string schemaFile)
		{
			m_FileName = fileName;
			m_FileSchema = new FileSchema(schemaFile);
		}

		/// <summary>
		/// Path to the data file.
		/// </summary>
		public string FileName
		{
			get{return m_FileName;}
			set{m_FileName = value;}
		}

		/// <summary>
		/// Path to a schema file.
		/// </summary>
		public string SchemaFile
		{
			get{return m_SchemaFile;}
			set{m_SchemaFile = value;}
		}

		//The current line number when parsing
		public int CurrentLineNumber
		{	
			get{return m_CurrentLineNumber;}
		}

		/// <summary>
		/// Removes extra spaces from around the data
		/// </summary>
		/// <param name="fields"></param>
		private void TrimFields(ref string[] fields)
		{
			for(int x = 0; x < fields.Length; x++)
				fields[x] = fields[x].Trim();
		}

		/// <summary>
		/// Does the same as Parse, but puts the results in a datatable.
		/// </summary>
		/// <returns></returns>
		public DataSet ParseToDataSet()
		{
			DataSet oDS = DataSetFromSchema();
			StreamReader reader = new StreamReader(m_FileName);
			reader.ReadLine();			// Read in the First Line, Should be 1 X 132 spaces... Next Line Starts the data.
			
			int iSection = 0;			// Section Currently Being worked on in loop.
			string[] Lines = null;		// All Lines Read in from Reader for given section.
			int CurrentLine;			// Current Line in Section Being Read.
			Section oSection = null;	// Pointer to current section for aesethetic purposes only.
			DataRow prevRow = null;		// Previously added row for Foreign Key Fields.
			string CurrentFieldName="";	// Current Field Name for Diagnostic Purposes
			string CurrentLineValue="";	// Current Line for Diagnostic Purposes

			try
			{
				while ( ReadSection(reader, m_FileSchema.Sections[iSection].Length, ref Lines) )
				{
					oSection = m_FileSchema.Sections[iSection];
					CurrentLine = 0;
					foreach (string Line in Lines)
					{
						CurrentLineValue = Line;
						
						if (Line == "") 
							System.Diagnostics.Debug.WriteLine("BreakPoint!");

						foreach (TextField tf in oSection.TextFields)
						{
							CurrentFieldName = tf.Name;
							if (tf.LineNumber == CurrentLine) 
							{
								if ( tf.StartIndex > 0 )	// StartIndex > 1 indicates to read from Current Line.
								{   // Value from file put into temp string var to examine contents before putting into
									// typed field, which may cause datatype conversion errors. 
									string strValue = Line.Substring(tf.StartIndex, tf.Length).Trim();
									tf.Value = (object) strValue;
								}
								else						// If StartIndex = -1, then read from previous DataRow.
									tf.Value = prevRow[tf.Name];
							}
						}

						if (oSection.Length > 0 ) // Increment only when section is multi-line
							CurrentLine ++;   
						else					  // Single Line, Multi-Row Section, Add Current Line to DataTable.
							prevRow = AddRow(oDS, oSection);

					}

					// All fields in above considered to be one row of data. Add this row to the DataSet.
					if (oSection.Length > 0 )
						prevRow = AddRow(oDS, oSection);

					// Increment Section, if section greater than the number of sections, start from 0.
					iSection ++;
					if (iSection > (m_FileSchema.Sections.Count-1))
						iSection = 0;
				}		
				reader.Close();
				return oDS;
			}
			catch (Exception ex)
			{
				string strMessage = "FileParser.ParseToDataSet: The following error occured: " + ex.Message 
					+ "\nStack Trace: " + ex.StackTrace
					+ "\nCurrent Field: " + CurrentFieldName 
					+ "\nCurrent Line: " + CurrentLineValue;
				throw new ApplicationException(strMessage);
			}
		}

		private DataSet DataSetFromSchema()
		{
			DataSet oDS = new DataSet();
			foreach (Section section in m_FileSchema.Sections )
			{
				DataTable dt = MakeTable(section);
				oDS.Tables.Add(dt);
			}
			return oDS;
		}

		/// <summary>
		/// Builds a datatable based on the FileSchema
		/// </summary>
		/// <returns></returns>
		/// 
		private DataTable MakeTable(Section Section)
		{
			DataTable dt = new DataTable() ;
			dt.TableName = Section.Name;
			DataColumn column;
			foreach(TextField field in Section.TextFields)
			{
				column = new DataColumn(field.Name);
				//I don't really like this.
				//I could not find a way to convert from TypeCode to Type.
				//If you find the way, please let me know.
				switch(field.DataType)
				{
					case TypeCode.Boolean:
						column.DataType = Type.GetType("System.Boolean");
						break;
					case TypeCode.Byte:
						column.DataType = Type.GetType("System.Byte");
						break;
					case TypeCode.Char:
						column.DataType = Type.GetType("System.Char");
						break;
					case TypeCode.DateTime:
						column.DataType = Type.GetType("System.DateTime");
						break;
					case TypeCode.Decimal:
						column.DataType = Type.GetType("System.Decimal");
						break;
					case TypeCode.Double:
						column.DataType = Type.GetType("System.Double");
						break;
					case TypeCode.Int16:
						column.DataType = Type.GetType("System.Int16");
						break;
					case TypeCode.Int32:
						column.DataType = Type.GetType("System.Int32");
						break;
					case TypeCode.Int64:
						column.DataType = Type.GetType("System.Int64");
						break;
					case TypeCode.Object:
						column.DataType = Type.GetType("System.Object");
						break;
					case TypeCode.Single:
						column.DataType = Type.GetType("System.Single");
						break;
					case TypeCode.String:
						column.DataType = Type.GetType("System.String");
						break;
				}

				dt.Columns.Add(column);
			}

			return dt;
		}

		/// <summary>
		/// Adds a row to the datatable
		/// </summary>
		private DataRow AddRow(DataSet oDS, Section oSection)
		{
			DataTable oDT = oDS.Tables[oSection.Name];
			DataRow oDR = oDT.NewRow();
			foreach(TextField field in oSection.TextFields)
			{
				oDR[field.Name] = field.Value;
				field.Value = "";
			}
			oDT.Rows.Add(oDR);
			return oDR;
		}

		/// <summary>
		/// Read a Section from the file.
		/// </summary>
		private bool ReadSection(StreamReader reader, int Length, ref string[] Lines)
		{
			string lines = "";
			string sLine = "";
			if (Length > 0 )
			{
				int LineNumber = 0;
				while (( reader.Peek() != -1 ) && (LineNumber < Length))
				{
					lines +=  reader.ReadLine() + "|";
					LineNumber ++;
				}
			}
			else
			{
				int PreviousSectionLines = Lines.Length;
				while( reader.Peek() != -1 ) 
				{
					if ((char)reader.Peek() == '0')
						break;
					else
					{
						if ((char)reader.Peek() == '1')
							// Advance the Reader over a continuation header.
							ReadSection(reader, PreviousSectionLines, ref Lines);
						else
							sLine = reader.ReadLine();
							if (sLine.Trim().Length > 0) 
								lines +=  sLine + "|";
					}
				}
			}

			Lines = lines.Substring(0, lines.Length-1).Split('|');	
			
			if (Length == 0)	// When Length = 0, multi-line, return true when at least one line read
				return (Lines.Length > 0);
			else				// When Length > 0, Return True When the Read Length is Equal to the Requested Length.
				return (Lines.Length == Length);

		}
		/// <summary>
		/// Gets - sets the path to the data.
		/// </summary>
		public string FilePath
		{
			get{return m_FileSchema.FilePath;}
			set{m_FileSchema.FilePath = value;}
		}

		/// <summary>
		/// Gets - sets the delimiter in a delimitted file
		/// </summary>
		public char Delimeter
		{
			get{return m_FileSchema.Delimeter;}
			set{m_FileSchema.Delimeter = value;}
		}

		/// <summary>
		/// Gets sets the character used for quoted fields
		/// </summary>
		public char QuoteCharacter
		{
			get{return m_FileSchema.QuoteCharacter;}
			set{m_FileSchema.QuoteCharacter = value;}
		}

		/// <summary>
		/// Gets - sets the TextFields
		/// </summary>
		public SectionCollection Sections
		{
			get{return m_FileSchema.Sections;}
			set{m_FileSchema.Sections = value;}
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
United States United States
Tampa, FL developer with about 11 years of experience.

Comments and Discussions