Click here to Skip to main content
15,030,781 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
See more:
I have a program to parse a CSV file from local filesystem to a specified SQL Server table.
Now when i execute the program i get error
System.IndexOutOfRangeException: 'Cannot find column 1'
exception on the line where i the progrsm attempts to populate the datatable.

On closer inspection the error shows that its emanating from row number 3 as below :
https://drive.google.com/file/d/1FOUavsf_D_1H5M2MGf6_v8kFDCNs_kYG/view?usp=sharing 


Program :

C#
<pre>         static void Main(string[] args)
        {            
            var absPath = @"C:\Users\user\Documents\Projects\MastercardSurveillance\fbc_mc_all_cards.csv";
           
            ProcessFile();    

            void ProcessFile()
            {
                string realPath = @"C:\Users\user\Documents\CSV";
                string appLog = "CSVERRORS";

                var logPath = realPath + Convert.ToString(appLog) + DateTime.Today.ToString("dd -MM-yy") + ".txt";
                if (!File.Exists(logPath))
                {
                    File.Create(logPath).Dispose();
                }
                var dt = GetDATATable();
                if (dt == null)
                {
                    return;
                }
                if (dt.Rows.Count == 0)
                {                
                    using (StreamWriter sw = File.AppendText(logPath))
                    {
                        sw.WriteLine("No rows imported after reading file " + absPath);
                        sw.Flush();
                        sw.Close();
                    }
                    return;
                }
                ClearData();
                InsertDATA();
            }
            DataTable GetDATATable()
            {
                var FilePath = absPath;
                string TableName = "Cards";
                string realPath = @"C:\Users\user\Documents\CSV";
                string appLog = "CSVERRORS";

                var logPath = realPath + Convert.ToString(appLog) + DateTime.Today.ToString("dd -MM-yy") + ".txt";
                if (!File.Exists(logPath))
                {
                    File.Create(logPath).Dispose();
                }
                    var dt = new DataTable(TableName);
                using (var csvReader = new TextFieldParser(FilePath))
                {
                    csvReader.SetDelimiters(new string[] { "," });
                    csvReader.HasFieldsEnclosedInQuotes = true;
                    var readFields = csvReader.ReadFields();
                    if (readFields == null)
                    {
                        using (StreamWriter sw = File.AppendText(logPath))
                        {
                            sw.WriteLine("Could not read header fields for file " + FilePath);
                            sw.Flush();
                            sw.Close();
                        }
                        
                        return null;
                    }
                    foreach (var dataColumn in readFields.Select(column => new DataColumn(column, typeof(string)) { AllowDBNull = true, DefaultValue = string.Empty }))
                    {
                        dt.Columns.Add(dataColumn);
                    }
                    while (!csvReader.EndOfData)
                    {
                        var data = csvReader.ReadFields();
                        if (data == null)
                        {
                            using (StreamWriter sw = File.AppendText(logPath))
                            {
                                sw.WriteLine(string.Format("Could not read fields on line {0} for file {1}", csvReader.LineNumber, FilePath));
                                sw.Flush();
                                sw.Close();
                            }
                            continue;
                        }
                        var dr = dt.NewRow();
                        for (var i = 0; i < data.Length; i++)
                        {
                            if (!string.IsNullOrEmpty(data[i]))
                            {
                                dr[i] = data[i];
                            }
                        }
                        dt.Rows.Add(dr);
                    }
                }
                return dt;
            }
             void ClearData()
             {               
                string SqlSvrConn = @"Server=XXXXXX-5QFK4BL\MSDEVOPS;Database=McardSurveillance;Trusted_Connection=True;MultipleActiveResultSets=true;";
                using (var sqlConnection = new SqlConnection(SqlSvrConn))
                {
                    sqlConnection.Open();
                    // Truncate the live table
                    using (var sqlCommand = new SqlCommand(_truncateLiveTableCommandText, sqlConnection))
                    {
                        sqlCommand.ExecuteNonQuery();
                    }
                }
                  
             }
            void InsertDATA()
            {
                string SqlSvrConn = @"Server=XXXXXX-5QFK4BL\MSDEVOPS;Database=McardSurveillance;Trusted_Connection=True;MultipleActiveResultSets=true;";
                DataTable table = GetDATATable();
                using (var sqlBulkCopy = new SqlBulkCopy(SqlSvrConn))
                {
                    sqlBulkCopy.DestinationTableName = "dbo.Cards";
                    for (var count = 0; count < table.Columns.Count; count++)
                    {
                        sqlBulkCopy.ColumnMappings.Add(count, count);
                    }
                    sqlBulkCopy.WriteToServer(table);
                }
            }

        } 


How can i identify and possibly exclude the extra data columns being returned from the CSV file?

What I have tried:

It appears there is a mismatch between number of columns in datatable and number of columns being read from the CSV file.
Im not sure however how i can account for this with my logic. For now i did not want to switch to using a CSV parse package but rather i need insight on how i can remove the extra column or rather ensure that the splitting takes account of all possible dubious characters.
For clarity i have a copy of the CSV file here :

https://drive.google.com/file/d/1bPFf8aGAkM1UAOhfCq6hUV_rexd7GfpP/view?usp=sharing
Posted
Updated 10-Aug-20 0:19am
v2

1 solution

Look at your data: it's not CSV.
It contains a couple of comma characters, yes - 30 of them, in several hundred rows of data - but that's not CSV. I have no idea what it is, but either you are trying to process the wrong file, or you need to find out what the format is supposed to be and work out how to process that.

No CSV parser will give you good results from that data.
   
Comments
Tshumore 10-Aug-20 6:23am
   
I have been getting an error whenever i open it in Excel that says : The file format and extension of '' dont match. The file could be corrupted or unsafe.
OriginalGriff 10-Aug-20 6:33am
   
Because it's not a CSV file.
It's also not an XLSX file either.

I have no idea what that is: you need to go back to where you got it and find out!

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)




CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900