Click here to Skip to main content
15,886,799 members
Articles / Programming Languages / XML

Validating data with Flat File Checker

Rate me:
Please Sign up or sign in to vote.
0.00/5 (No votes)
30 Oct 2009GPL32 min read 43.7K   881   17  
An article on data validation with a flat file schema generated in Flat File Checker.
Option Strict On
Imports System.Xml
Imports System.Xml.XPath
Imports System.Threading

''' <summary>
''' Represents the relational link between two files.
''' </summary>
''' <remarks>When the link is built it contains index that links rows in data file to rows in lookup</remarks>
Public Class FileLink
    ' Inherits Table
    Implements Xml.IHasXmlNode, IDataRule

    Private _case_sensitive As Boolean
    ''' <summary>
    ''' Primary Key field of the link
    ''' </summary>
    ''' <remarks></remarks>
    Private _pk As DataColumn
    ''' <summary>
    ''' Foreign Key field of the link
    ''' </summary>
    ''' <remarks></remarks>
    Private _fk As DataColumn
    ''' <summary>
    ''' This file will be used to load columns of external that are need for validation
    ''' </summary>
    ''' <remarks></remarks>
    Private _execution_lookup As FlatFile
    Private _files As FlatFileSchema
    Private _extChecks As ChecksCollection
    Protected Delegate Function CompareKeysFunc(ByVal primaryKey As String, ByVal foreignKey As String) As Boolean
    Private CompareKeys As CompareKeysFunc
    Private _action As RowAction
    Private _enforce As Boolean
    Private _index As List(Of List(Of Integer))
    Private _where As ChecksCollection
    '------------------------------------------
    ''' <summary>
    ''' Name of Primary Key Column in Lookup File of the Relational Link
    ''' </summary>
    ''' <value></value>
    ''' <returns></returns>
    ''' <remarks></remarks>
    Public Property PrimaryKey() As DataColumn
        Get
            Return _pk
        End Get
        Set(ByVal value As DataColumn)
            _pk = value
        End Set
    End Property
    ''' <summary>
    ''' Name of Foreign Key Column in Data File of the Relational Link
    ''' </summary>
    ''' <value></value>
    ''' <returns></returns>
    ''' <remarks></remarks>
    Public Property ForeignKey() As DataColumn
        Get
            Return _fk
        End Get
        Set(ByVal value As DataColumn)
            _fk = value
        End Set
    End Property

    ''' <summary>
    ''' Data File of the Link
    ''' </summary>
    ''' <value></value>
    ''' <returns></returns>
    ''' <remarks></remarks>
    Public ReadOnly Property File() As DataTable
        Get
            Return _fk.Datasource
        End Get
    End Property
    ''' <summary>
    ''' Name of the data file that is validated
    ''' </summary>
    ''' <value></value>
    ''' <returns></returns>
    ''' <remarks></remarks>
    Public ReadOnly Property FileName() As String
        Get
            Return _fk.Table.AliasName
        End Get
    End Property
    ''' <summary>
    ''' Name of the lookup file
    ''' </summary>
    ''' <value>Name of the lookup file</value>
    ''' <returns>Name of the file</returns>
    ''' <remarks></remarks>
    Public ReadOnly Property LookupName() As String
        Get
            If Not Lookup Is Nothing Then
                Return Lookup.AliasName
            Else : Return Nothing
            End If
        End Get
    End Property
    ''' <summary>
    ''' Flag shows whether orphan records should be considered as errors
    ''' </summary>
    ''' <value></value>
    ''' <returns>True if record not linked to the file should be added to data errors</returns>
    ''' <remarks></remarks>
    Public Property Enforce() As Boolean
        Get
            Return _enforce
        End Get
        Set(ByVal value As Boolean)
            _enforce = value
        End Set
    End Property
    ''' <summary>
    ''' Loads relevant fields from the linked(data) file
    ''' </summary>
    ''' <returns>Success of the load</returns>
    ''' <remarks></remarks>
    Public Function Load(ByVal wait As AutoResetEvent) As Boolean

        ' _execution_lookup = Lookup.createcopy()
        If TypeOf Lookup Is DelimitedFile Then ' Delimited File
            With CType(Lookup, DelimitedFile)
                _execution_lookup = FlatFile.Create(.Path, .FileType, .AliasName, .Delimiter, .Qualifier, Me._files)
            End With
        Else ' Fixed Position File
            With CType(Lookup, FlatFile)
                _execution_lookup = FlatFile.Create(.Path, .FileType, .AliasName)
            End With
        End If

        If Not InitiateColumns() Then
            logger.LogError("Could not initiate columns for file validation", "FileLink.Load")
            Return False
        End If


        If File Is Nothing Then
            logger.LogError("File " & FileName & " was not defined. Please correct your schema", "FileLink.Load")
            Return False
        End If

        If Not Lookup.Load(wait) Then
            logger.LogError("Failed to load file: " & Lookup.AliasName, "FileLink.Load")
            Return False
        End If
        ' read header

        If Not PopulateIndex() Then
            Return False
        End If
        Return True
    End Function
    Protected Function CompareKeysCase(ByVal primaryKey As String, ByVal foreignKey As String) As Boolean
        Return primaryKey = foreignKey
    End Function
    Protected Function CompareKeyNoCase(ByVal primaryKey As String, ByVal foreignKey As String) As Boolean
        Return UCase(primaryKey) = UCase(foreignKey)
    End Function
    ''' <summary>
    ''' Populates index that linkes rows from data files to lookup
    ''' </summary>
    ''' <returns></returns>
    ''' <remarks>Success status</remarks>
    Public Function PopulateIndex() As Boolean
        Dim _environment As String = "FileLink.PopulateIndex"
        Dim col_data As DataColumn
        Dim col_lookup As DataColumn
        Dim index_row As List(Of Integer)
        Dim data_row As String
        Dim lookup_row As String
        Dim i As Integer = 0 ' Row counter in data file
        Dim j As Integer = 0 ' Row counter in lookup file
        If _index Is Nothing Then _index = New List(Of List(Of Integer))

        Dim action As RowAction = Nothing

        If Not Me._action Is Nothing AndAlso Me._action.Column Is Nothing Then Me._action.Column = Me.ForeignKey
        '
        col_data = _fk 'get foreign key column
        col_lookup = _pk ' get primary key column

        If col_data Is Nothing Then
            logger.LogError("Foreign Key column name is wrong: " & _fk.FullName & "", _environment)
            Return False
            'Throw New ColumnNameException("Foreign Key column name is wrong", _fk, _ffile.AliasName)
        End If
        If col_data.Index < 0 Then
            logger.LogError("Foreign Key column Index has not been assigned: " & _fk.FullName & "", _environment)
            Return False
        End If
        If col_lookup Is Nothing Then
            logger.LogError("Primary Key column name is wrong: " & _pk.FullName & "", _environment)
            'Throw New ColumnNameException("Primary Key column name is wrong", _pk, _flookup.AliasName)
            Return False
        End If
        Dim val_matched As Boolean = False
        i = 0 ' Row in the data source -1
        For i = 0 To col_data.Table.Rows - 1 ' go through records in data file to match with records in lookup
            data_row = col_data(i)
            val_matched = False
            index_row = New List(Of Integer)
            index_row.Add(i) 'Data Row Number in first column
            If Len(data_row) > 0 Then
                j = 0 ' row in the lookup source
                For Each lookup_row In col_lookup
                    If Len(lookup_row) > 0 Then
                        If CompareKeys(data_row, lookup_row) Then
                            index_row.Add(j) ' Lookup Row number in second column
                            val_matched = True
                            Exit For
                        End If
                    End If
                    j += 1
                Next lookup_row
                _index.Add(index_row)
            Else ' Empty Value

            End If
            If Not val_matched Then ' No value was found in the lookup file
                If _enforce Then
                    If _where Is Nothing OrElse Me._where.Evaluate(i) Then
                        File.AddError(col_data, i + 1, action, data_row, Me)
                        action = Me._action
                    End If
                End If
            End If
            File.AddAction(action, i)
            action = Nothing
        Next i
        Return True
    End Function
    ''' <summary>
    ''' Gets row index in the lookup file for the row in the data file
    ''' </summary>
    ''' <param name="row">Row in the data file</param>
    ''' <returns>Row in the lookup file</returns>
    ''' <remarks></remarks>
    Public Function LookupRow(ByVal row As Integer) As Integer

        If _index Is Nothing OrElse _index.Count = 0 Then Return -1
        If row >= _index.Count Then Return -1
        If _index(row)(0) = row Then
            If _index(row).Count > 1 Then
                'Matching value exists
                Return _index(row)(1)
            Else : Return -1 ' No match for DataRow in the look up file
            End If
        End If
        ' Return row of the lookup data

        Return -1
    End Function
    ''' <summary>
    ''' Copy of file of the PK field that is used to get columns from external file
    ''' </summary>
    ''' <value>Copy of Lookup file</value>
    ''' <returns></returns>
    ''' <remarks></remarks>
    Public ReadOnly Property ExecutionLookup() As FlatFile
        Get
            Return _execution_lookup
        End Get
    End Property
    Public ReadOnly Property Lookup() As DataTable
        Get
            If Not _pk Is Nothing Then
                Return _pk.Datasource
            Else : Return Nothing
            End If
        End Get
    End Property
    Public Function InitiateColumns() As Boolean
        AddColumn(_pk)
        If Not _where Is Nothing Then
            _where.Initiate()
            _where.InitiateMyColumns()
        End If

        Return True
    End Function
    Friend ReadOnly Property Document() As XmlDocument
        Get
            If Not Me._files Is Nothing Then
                Return Me._files.Document
            Else : Return Nothing
            End If
        End Get
    End Property

    Public Sub AddColumn(ByVal column As DataColumn)
        Dim existing_column As DataColumn
        If _extChecks Is Nothing Then
            _extChecks = New AndChecksCollection(column)
            ExecutionLookup.AddColumn(column)
            Exit Sub
        End If
        For Each existing_column In Lookup.ColumnsToLoad
            If existing_column.Name = column.Name Then
                column = existing_column
                Exit Sub
            End If
        Next
        ExecutionLookup.Columns.Add(column)
        Exit Sub
    End Sub
    Public Sub New(ByVal foreignKey As FileColumn, ByVal primaryKey As FileColumn, ByVal files As FlatFileSchema)
        ' Assign attributes for the link
        Me.New(foreignKey, primaryKey, files, False, True)
    End Sub
    Public Sub New(ByVal foreignKey As FileColumn, ByVal primaryKey As FileColumn, ByVal files As FlatFileSchema, ByVal enforce As Boolean)
        ' Assign attributes for the link
        Me.New(foreignKey, primaryKey, files, enforce, True)
    End Sub
    Public Sub New(ByVal foreignKey As DataColumn, ByVal primaryKey As DataColumn, ByVal files As FlatFileSchema, ByVal enforce As Boolean, ByVal caseSensitive As Boolean)
        ' Assign attributes for the link
        _pk = primaryKey
        _fk = foreignKey
        _enforce = enforce
        _files = files
        Me.CaseSensitive = caseSensitive
    End Sub
    Public Function Copy() As FileLink

        Return New FileLink(_fk, _pk, _files, _enforce, _case_sensitive)
    End Function
    Public Sub New(ByVal definition As IXPathNavigable, ByVal files As FlatFileSchema)
        ' Get Attributes from the node
        _files = files
        Dim navigator As XPathNavigator = definition.CreateNavigator
        _enforce = False
        Dim _lookup As String
        Dim _datafile As String
        Dim _pk_str, _fk_str As String

        _datafile = navigator.GetAttribute("DataFile", "")
        _lookup = navigator.GetAttribute("Lookup", "")
        _pk_str = navigator.GetAttribute("PrimaryKey", "")
        _fk_str = navigator.GetAttribute("ForeignKey", "")

        ' ------------------------------------------
        ' Here we need to find those columns in existing files!!!
        Try
            _fk = files.File(_datafile).Column(_fk_str)
            _pk = files.File(_lookup).Column(_pk_str)
        Catch ex As NullReferenceException
            Throw New XmlException("Wrong definition of the relational link. Check file and column names.", ex)
        End Try
        ' ------------------------------------------
        _case_sensitive = Not navigator.GetAttribute("CaseSensitive", "") = "False"

        SetCompareKeys(_case_sensitive)

        Dim enforceNavigator As XPathNavigator
        If navigator.MoveToFirstChild Then ' has child nodes
            Do
                If navigator.Name = "Enforce" Then
                    _enforce = True
                    enforceNavigator = navigator.CreateNavigator
                    If enforceNavigator.MoveToFirstChild Then
                        Do
                            If enforceNavigator.Name = "Action" Then
                                Me.Action = RowAction.Create(enforceNavigator, Me)
                            ElseIf enforceNavigator.Name = "Where" Then
                                _where = ChecksCollection.CreateRuleContainer(enforceNavigator, Me, Me.Column, True)
                                _where.InWhere = True
                            End If
                        Loop While enforceNavigator.MoveToNext
                    End If
                    Exit Do  ' Can have only one Enforce node
                End If
            Loop While navigator.MoveToNext
        End If

    End Sub
    Public Property InWhere() As Boolean Implements IDataRule.InWhere
        Get
            Return False
        End Get
        Set(ByVal value As Boolean)

        End Set
    End Property
    Public Property Parent() As IDataRule Implements IDataRule.Parent
        Get
            Return Nothing
        End Get
        Set(ByVal value As IDataRule)

        End Set
    End Property
    Public Sub SetCompareKeys(ByVal caseSensitive As Boolean)
        If caseSensitive Then
            CompareKeys = AddressOf Me.CompareKeysCase
        Else
            CompareKeys = AddressOf Me.CompareKeyNoCase
        End If
    End Sub
    ''' <summary>
    ''' Returns XML node with definition of the link
    ''' </summary>
    ''' <returns></returns>
    ''' <remarks></remarks>
    Public Function GetNode() As XmlNode Implements IHasXmlNode.GetNode

        Dim xmlLinkNode As XmlNode = Document.CreateElement("Link")
        Dim xmlDataFile As XmlAttribute = Document.CreateAttribute("DataFile")
        Dim xmlPK As XmlAttribute = Document.CreateAttribute("PrimaryKey")
        Dim xmlFK As XmlAttribute = Document.CreateAttribute("ForeignKey")
        Dim xmlLookup As XmlAttribute = Document.CreateAttribute("Lookup")
        If Not Me._case_sensitive Then
            Dim attrCaseSensitive As XmlAttribute = Document.CreateAttribute("CaseSensitive")
            attrCaseSensitive.Value = "False"
            xmlLinkNode.Attributes.Append(attrCaseSensitive)
        End If
        xmlPK.Value = _pk.Name
        xmlFK.Value = _fk.Name
        xmlLookup.Value = Me.LookupName
        xmlDataFile.Value = Me.FileName
        xmlLinkNode.Attributes.Append(xmlPK)
        xmlLinkNode.Attributes.Append(xmlFK)
        xmlLinkNode.Attributes.Append(xmlLookup)
        xmlLinkNode.Attributes.Append(xmlDataFile)

        If Me.Enforce Then
            Dim xmlEnforceNode As XmlNode = Document.CreateElement("Enforce")
            If Not _action Is Nothing Then
                xmlEnforceNode.AppendChild(_action.GetNode(Document))
            End If
            xmlLinkNode.AppendChild(xmlEnforceNode)
        End If

        Return xmlLinkNode

    End Function
    Public Sub Flush()

        If Not Lookup Is Nothing Then Lookup.Flush()
    End Sub


    Public Property CaseSensitive() As Boolean
        Get
            Return _case_sensitive
        End Get
        Set(ByVal value As Boolean)
            _case_sensitive = value
            SetCompareKeys(value)
        End Set
    End Property

    Public Function Evaluate(ByVal row As Integer) As Boolean Implements IDataRule.Evaluate

        Dim indexLine As List(Of Integer) = _index(row)
        If Not indexLine Is Nothing Then
            If indexLine.Count = 2 Then
                If indexLine(0) > 0 Then
                    Return True
                Else ' Row number in index should be greater then Zero for valid link
                    Return False
                End If
            Else ' Row in index should have 2 values
                Return False
            End If
        Else ' index line is null reference
            Return False
        End If

    End Function

    Public Function RunChecks(ByVal wait As System.Threading.AutoResetEvent) As Boolean Implements IDataRule.RunChecks
        Dim errored As Integer
        Dim action As RowAction
        If _index.Count = 0 Then
            If Not Me.PopulateIndex() Then
                Return False
            End If
        End If
        Dim i As Integer
        For i = 0 To _index.Count - 1
            action = Nothing
            If Not Evaluate(i) Then
                errored += 1
                Column.Table.AddError(Column, i, action, Column(i), Me)
                ' Apply action for the errored row
                action = _action
            End If
            Column.Datasource.AddAction(action, i)
        Next

        Return (errored = 0)

    End Function

    Public Property Column() As DataColumn Implements IDataRule.Column
        Get
            Return Me.ForeignKey
        End Get
        Set(ByVal value As DataColumn)
            Me.ForeignKey = value
        End Set
    End Property

    Public ReadOnly Property ErrorMessage() As String Implements IDataRule.ErrorMessage
        Get
            Return "Reference between " & Me.FileName & " and " & Me.LookupName & " is broken"
        End Get
    End Property

    Public Property Action() As RowAction Implements IDataRule.Action
        Get
            Return _action
        End Get
        Set(ByVal value As RowAction)
            _action = value
        End Set
    End Property

    Public ReadOnly Property Type() As DataRuleType Implements IDataRule.Type
        Get
            Return DataRuleType.Reference
        End Get
    End Property
End Class

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)


Written By
Database Developer
United Kingdom United Kingdom
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions