|
using System;
using System.IO;
using System.Text;
using System.Collections;
using System.Data;
using System.Text.RegularExpressions ;
namespace ReportParser
{
public class FileParser
{
/// <summary>
/// Raised when an error occurs on a record
/// </summary>
private FileSchema m_FileSchema = null;
private string m_FileName = "";
private string m_SchemaFile = "";
private int m_CurrentLineNumber = 0;
private string LineToFind= "";
public FileParser(string fileName)
{
m_FileName = fileName;
m_FileSchema = new FileSchema();
}
public FileParser(string fileName, string schemaFile)
{
m_FileName = fileName;
m_FileSchema = new FileSchema(schemaFile);
}
/// <summary>
/// Path to the data file.
/// </summary>
public string FileName
{
get{return m_FileName;}
set{m_FileName = value;}
}
/// <summary>
/// Path to a schema file.
/// </summary>
public string SchemaFile
{
get{return m_SchemaFile;}
set{m_SchemaFile = value;}
}
//The current line number when parsing
public int CurrentLineNumber
{
get{return m_CurrentLineNumber;}
}
/// <summary>
/// Removes extra spaces from around the data
/// </summary>
/// <param name="fields"></param>
private void TrimFields(ref string[] fields)
{
for(int x = 0; x < fields.Length; x++)
fields[x] = fields[x].Trim();
}
/// <summary>
/// Does the same as Parse, but puts the results in a datatable.
/// </summary>
/// <returns></returns>
public DataSet ParseToDataSet()
{
DataSet oDS = DataSetFromSchema();
StreamReader reader = new StreamReader(m_FileName);
int iSection = 0; // Section Currently Being worked on in loop.
string[] Lines = null; // All Lines Read in from Reader for given section.
int CurrentLine; // Current Line in Section Being Read.
Section oSection = null; // Pointer to current section for aesethetic purposes only.
DataRow prevRow = null; // Previously added row for Foreign Key Fields.
DataRow parentRow = null; // Reference to Parent Row for each section for DataRelation.
string CurrentFieldName=""; // Current Field Name for Diagnostic Purposes
string CurrentLineValue=""; // Current Line for Diagnostic Purposes
oSection = m_FileSchema.Sections[0];// Default to first Section. Section advances after each loop done.
try
{
while ( ReadSection(reader, oSection.Length, oSection, ref Lines) )
{
CurrentLine = 0;
foreach (string Line in Lines)
{
CurrentLineValue = Line;
if (LineToFind == CurrentLineValue)
System.Diagnostics.Debug.WriteLine("We are Here!!!");
foreach (TextField tf in oSection.TextFields)
{
CurrentFieldName = tf.Name;
if (tf.LineNumber == CurrentLine)
{
if ( tf.StartIndex > 0 ) // StartIndex > 1 indicates to read from Current Line.
tf.Value = Line.Substring(tf.StartIndex, tf.Length).Trim();
else // If StartIndex = -1, then read from previous DataRow.
if (iSection > 0 ) // Only Carry over if not the header..., if it is the header,autoincremented.
tf.Value = prevRow[tf.Name];
}
}
if (oSection.Length > 0 ) // Increment only when section is multi-line
CurrentLine ++;
else // Single Line, Multi-Row Section, Add Current Line to DataTable.
prevRow = AddRow(oDS, oSection, parentRow);
}
if (oSection.Length > 0 )// All fields in above considered to be one row of data. Add this row to the DataSet.
prevRow = AddRow(oDS, oSection, parentRow);
if (iSection==0)// Set Reference to ParentRow for all following Child Rows..
parentRow = prevRow;
iSection ++;// Increment Section, if section greater than the number of sections, start from 0.
if (iSection > (m_FileSchema.Sections.Count-1))
iSection = 0;
oSection = m_FileSchema.Sections[iSection]; // Advance The Section Reference.
}
reader.Close();
return oDS;
}
catch (Exception ex)
{
string strMessage = "FileParser.ParseToDataSet: The following error occured: " + ex.Message
+ "\nStack Trace: " + ex.StackTrace
+ "\nCurrent Field: " + CurrentFieldName
+ "\nText of Line Causing Error:\n\"" + CurrentLineValue + "\"";
throw new ApplicationException(strMessage);
}
}
private DataSet DataSetFromSchema()
{
DataSet oDS = new DataSet();
int i = 0;
foreach (Section section in m_FileSchema.Sections )
{
DataTable dt = MakeTable(section, i==0);
oDS.Tables.Add(dt);
AppendDataRelation(oDS, i);
i++;
}
return oDS;
}
private void AppendDataRelation(DataSet oDS, int i)
{
if (oDS.Tables.Count > 1)
{
DataColumn ParentColumn = oDS.Tables[0].Columns[0];
if ( (oDS.Tables[i].Columns.IndexOf(ParentColumn.ColumnName ))!= -1)
{
DataRelation oDRN;
string RelationName = oDS.Tables[0].TableName + "_" + oDS.Tables[i].TableName ;
DataColumn ChildColumn = oDS.Tables[i].Columns[oDS.Tables[0].Columns[0].ColumnName];
oDRN = new DataRelation(RelationName, ParentColumn,ChildColumn);
// Following Affects output of XML Document Only I Believe
oDRN.Nested = true;
oDS.Relations.Add(oDRN);
}
}
return;
}
/// <summary>
/// Builds a datatable based on the FileSchema
/// </summary>
/// <returns></returns>
///
private DataTable MakeTable(Section Section, bool isHeader)
{
DataTable dt = new DataTable() ;
dt.TableName = Section.Name;
DataColumn column;
foreach(TextField field in Section.TextFields)
{
column = new DataColumn(field.Name);
//I don't really like this.
//I could not find a way to convert from TypeCode to Type.
//If you find the way, please let me know.
switch(field.DataType)
{
case TypeCode.Boolean:
column.DataType = Type.GetType("System.Boolean");
break;
case TypeCode.Byte:
column.DataType = Type.GetType("System.Byte");
break;
case TypeCode.Char:
column.DataType = Type.GetType("System.Char");
break;
case TypeCode.DateTime:
column.DataType = Type.GetType("System.DateTime");
break;
case TypeCode.Decimal:
column.DataType = Type.GetType("System.Decimal");
break;
case TypeCode.Double:
column.DataType = Type.GetType("System.Double");
break;
case TypeCode.Int16:
column.DataType = Type.GetType("System.Int16");
break;
case TypeCode.Int32:
column.DataType = Type.GetType("System.Int32");
break;
case TypeCode.Int64:
column.DataType = Type.GetType("System.Int64");
break;
case TypeCode.Object:
column.DataType = Type.GetType("System.Object");
break;
case TypeCode.Single:
column.DataType = Type.GetType("System.Single");
break;
case TypeCode.String:
column.DataType = Type.GetType("System.String");
break;
}
if (isHeader && (field.StartIndex == -1))
{
column.AutoIncrement = true;
column.AutoIncrementStep = -1;
column.AutoIncrementSeed = -1;
}
dt.Columns.Add(column);
}
return dt;
}
/// <summary>
/// Adds a row to the datatable
/// </summary>
private DataRow AddRow(DataSet oDS, Section oSection, DataRow parentRow)
{
DataTable oDT = oDS.Tables[oSection.Name];
DataRow oDR = oDT.NewRow();
foreach(TextField field in oSection.TextFields)
{
if (oDT.Columns[field.Name].AutoIncrement == false)
oDR[field.Name] = field.Value;
else
parentRow = null; // Set ParentRow to null since we are on a record
field.Value = "";
}
if (parentRow != null)
{
foreach (DataRelation oDRN in oDS.Relations)
{
if (oDRN.RelationName.IndexOf(oSection.Name) > 0)
{
oDR.SetParentRow(parentRow, oDRN);
break;
}
}
}
oDT.Rows.Add(oDR);
return oDR;
}
/// <summary>
/// Read a Section from the file.
/// </summary>
private bool ReadSection(StreamReader reader, int Length, Section oSection, ref string[] Lines)
{
System.Text.StringBuilder oSB = new StringBuilder("");
if (Length > 0 ) // Multiple Line Header or Footer Section
{
int LineNumber = 0;
while (( reader.Peek() != -1 ) && (LineNumber < Length))
{
oSB.Append(reader.ReadLine() + "|");
LineNumber ++;
}
}
else // Multi-Line Details Section
{
while( reader.Peek() != -1 )
{
if ((char)reader.Peek() == '0') // TODO: Should be reading from the configuration.
break;
else
{
if ((char)reader.Peek() == '1') // Advance the Reader over a continuation header.
{
ReadSection(reader, Lines.Length, oSection.ParentSection, ref Lines);
}
else
{
oSB.Append ( reader.ReadLine() + "|" );
}
}
}
}
Lines = oSB.ToString().Substring(0, oSB.ToString().Length-1).Split('|');
if (Length == 0) // When Length = 0, multi-line, return true when at least one line read
return (Lines.Length > 0);
else // When Length > 0, Return True When the Read Length is Equal to the Requested Length.
{
string sLine;
if (oSection.SectionFormat == SectionFormat.Header )
{
sLine = Lines[Lines.Length-1] ;
System.Text.RegularExpressions.Regex regex = new Regex(oSection.EndString);
Match m = regex.Match(sLine);
if (m.Success && m.Length == sLine.Length)
return (Lines.Length == Length);
else
{
while( (reader.Peek() != -1) && ((char)reader.Peek() != '1'))
{
sLine = reader.ReadLine(); // Advance the reader
}
if ((reader.Peek() != -1))
return ReadSection(reader, Length, oSection, ref Lines);
else
return false;
}
}
else // oSection.SectionFormat = SectionFormat.Footer
{
while( (reader.Peek() != -1) && ((char)reader.Peek() != '1'))
{
sLine = reader.ReadLine();
}
return (Lines.Length == Length);
}
}
}
/// <summary>
/// Gets - sets the path to the data.
/// </summary>
public string FilePath
{
get{return m_FileSchema.FilePath;}
set{m_FileSchema.FilePath = value;}
}
/// <summary>
/// Gets - sets the delimiter in a delimitted file
/// </summary>
public char Delimeter
{
get{return m_FileSchema.Delimeter;}
set{m_FileSchema.Delimeter = value;}
}
/// <summary>
/// Gets sets the character used for quoted fields
/// </summary>
public char QuoteCharacter
{
get{return m_FileSchema.QuoteCharacter;}
set{m_FileSchema.QuoteCharacter = value;}
}
/// <summary>
/// Gets - sets the TextFields
/// </summary>
public SectionCollection Sections
{
get{return m_FileSchema.Sections;}
set{m_FileSchema.Sections = value;}
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.