Click here to Skip to main content
15,886,693 members
Articles / Programming Languages / C#

LINQ to CSV library

Rate me:
Please Sign up or sign in to vote.
4.97/5 (217 votes)
10 Jan 2015Apache23 min read 997.5K   482  
Easy to use library to use CSV and tab delimited files with LINQ queries.
using System;
using System.Collections.Generic;
using System.IO;

namespace LINQtoCSV
{

    /// <summary>
    /// Summary description for CsvContext
    /// </summary>
    public class CsvContext
    {
        /// ///////////////////////////////////////////////////////////////////////
        /// Read
        /// 
        /// <summary>
        /// Reads the comma separated values from a stream or file.
        /// Returns the data into an IEnumerable<T> that can be used for LINQ queries.
        /// 
        /// The stream or file will be closed after the last line has been processed.
        /// Because the library implements deferred reading (using Yield Return), this may not happen
        /// for a while.
        /// </summary>
        /// <typeparam name="T">
        /// The records in the returned IEnumerable<T> will be of this type.
        /// </typeparam>
        /// <param name="stream">
        /// The data will be read from this stream.
        /// </param>
        /// <param name="cultureInfo">
        /// If the culture of the input data is different from the current culture,
        /// pass the input culture here. For example, if you are in Australia and the data
        /// is from the US, pass in "en-US". 
        /// </param>
        /// <returns>
        /// Values read from the stream or file.
        /// </returns>
        public IEnumerable<T> Read<T>(string fileName, CsvFileDescription fileDescription) where T : class, new()
        {
            // Note that ReadData will not be called right away, but when the returned 
            // IEnumerable<T> actually gets accessed.

            IEnumerable<T> ie = ReadData<T>(fileName, null, fileDescription);
            return ie;
        }

        public IEnumerable<T> Read<T>(StreamReader stream) where T : class, new()
        {
            return Read<T>(stream, new CsvFileDescription());
        }

        public IEnumerable<T> Read<T>(string fileName) where T : class, new()
        {
            return Read<T>(fileName, new CsvFileDescription());
        }

        public IEnumerable<T> Read<T>(StreamReader stream, CsvFileDescription fileDescription) where T : class, new()
        {
            return ReadData<T>(null, stream, fileDescription);
        }

        /// ///////////////////////////////////////////////////////////////////////
        /// ReadData
        /// <summary>
        /// 
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="fileName">
        /// Name of the file associated with the stream.
        /// null if there is no such file.
        /// Used solely when throwing an exception.
        /// </param>
        /// <param name="stream">
        /// All data is read from this stream.
        /// 
        /// This is a StreamReader rather then a TextReader,
        /// because we need to be able to seek back to the start of the
        /// stream, and you can't do that with a TextReader (or s StringReader).
        /// </param>
        /// <param name="fileDescription"></param>
        /// <returns></returns>
        private IEnumerable<T> ReadData<T>(
                    string fileName, 
                    StreamReader stream, 
                    CsvFileDescription fileDescription) where T : class, new()
        {
            // If T implements IDataRow, then we're reading raw data rows 
            bool readingRawDataRows = typeof(IDataRow).IsAssignableFrom(typeof(T));

            // The constructor for FieldMapper_Reading will throw an exception if there is something
            // wrong with type T. So invoke that constructor before you open the file, because if there
            // is an exception, the file will not be closed.
            //
            // If T implements IDataRow, there is no need for a FieldMapper, because in that case we're returning
            // raw data rows.
            FieldMapper_Reading<T> fm = null;

            if (!readingRawDataRows)
            {
                fm = new FieldMapper_Reading<T>(fileDescription, fileName, false);
            }

            // -------
            // Each time the IEnumerable<T> that is returned from this method is 
            // accessed in a foreach, ReadData is called again (not the original Read overload!)
            //
            // So, open the file here, or rewind the stream.

            bool readingFile = !string.IsNullOrEmpty(fileName);

            if (readingFile)
            {
                stream = new StreamReader(
                                    fileName, 
                                    fileDescription.TextEncoding,
                                    fileDescription.DetectEncodingFromByteOrderMarks);
            }
            else
            {
                // Rewind the stream

                if ((stream == null) || (!stream.BaseStream.CanSeek))
                {
                    throw new BadStreamException();
                }

                stream.BaseStream.Seek(0, SeekOrigin.Begin);
            }

            // ----------

            CsvStream cs = new CsvStream(stream, null,fileDescription.SeparatorChar);
            
            // If we're reading raw data rows, instantiate a T so we return objects
            // of the type specified by the caller.
            // Otherwise, instantiate a DataRow, which also implements IDataRow.
            IDataRow row = null;
            if (readingRawDataRows)
            {
                row = new T() as IDataRow;
            }
            else
            {
                row = new DataRow();
            }

            AggregatedException ae =
                new AggregatedException(typeof(T).ToString(), fileName, fileDescription.MaximumNbrExceptions);

            try
            {
                bool firstRow = true;
                while (cs.ReadRow(ref row))
                {
                    // Skip empty lines.
                    // Important. If there is a newline at the end of the last data line, the code
                    // thinks there is an empty line after that last data line.
                    if ((row.Count == 1) && 
                        ((row[0].Value == null) ||
                         (string.IsNullOrEmpty(row[0].Value.Trim())) ))
                    {
                        continue;
                    }

                    if (firstRow && fileDescription.FirstLineHasColumnNames)
                    {
                        if (!readingRawDataRows) { fm.ReadNames(row); }
                    }
                    else
                    {
                        T obj = default(T);
                        try
                        {
                            if (readingRawDataRows)
                            {
                                obj = row as T;
                            }
                            else
                            {
                                obj = fm.ReadObject(row, ae);
                            }
                        }
                        catch (AggregatedException ae2)
                        {
                            // Seeing that the AggregatedException was thrown, maximum number of exceptions
                            // must have been reached, so rethrow.
                            // Catch here, so you don't add an AggregatedException to an AggregatedException
                            throw ae2;
                        }
                        catch (Exception e)
                        {
                            // Store the exception in the AggregatedException ae.
                            // That way, if a file has many errors leading to exceptions,
                            // you get them all in one go, packaged in a single aggregated exception.
                            ae.AddException(e);
                        }

                        yield return obj;
                    }
                    firstRow = false;
                }
            }
            finally
            {
                if (readingFile)
                {
                    stream.Close();
                }

                // If any exceptions were raised while reading the data from the file,
                // they will have been stored in the AggregatedException ae.
                // In that case, time to throw ae.
                ae.ThrowIfExceptionsStored();
            }
        }

        /// ///////////////////////////////////////////////////////////////////////
        /// Write
        /// 
        public void Write<T>(
            IEnumerable<T> values, 
            string fileName, 
            CsvFileDescription fileDescription) 
        {
            using (StreamWriter sw = new StreamWriter(
                                                fileName,
                                                false,
                                                fileDescription.TextEncoding))
            {
                WriteData<T>(values, fileName, sw, fileDescription);
            }
        }

        public void Write<T>(
            IEnumerable<T> values,
            TextWriter stream) 
        {
            Write<T>(values, stream, new CsvFileDescription());
        }

        public void Write<T>(
            IEnumerable<T> values, 
            string fileName) 
        {
            Write<T>(values, fileName, new CsvFileDescription());
        }

        public void Write<T>(
            IEnumerable<T> values,
            TextWriter stream,
            CsvFileDescription fileDescription) 
        {
            WriteData<T>(values, null, stream, fileDescription);
        }

        private void WriteData<T>(
            IEnumerable<T> values,
            string fileName,
            TextWriter stream, 
            CsvFileDescription fileDescription) 
        {
            FieldMapper<T> fm = new FieldMapper<T>(fileDescription, fileName, true);
            CsvStream cs = new CsvStream(null,stream, fileDescription.SeparatorChar);

            List<string> row = new List<string>();

            // If first line has to carry the field names, write the field names now.
            if (fileDescription.FirstLineHasColumnNames)
            {
                fm.WriteNames(ref row);
                cs.WriteRow(row, fileDescription.QuoteAllFields);
            }

            // -----

            foreach (T obj in values)
            {
                // Convert obj to row
                fm.WriteObject(obj, ref row);
                cs.WriteRow(row, fileDescription.QuoteAllFields);
            }
        }

        /// ///////////////////////////////////////////////////////////////////////
        /// CsvContext
        /// 
        /// <summary>
        /// 
        /// </summary>
        public CsvContext()
        {
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Apache License, Version 2.0


Written By
Architect
Australia Australia
Twitter: @MattPerdeck
LinkedIn: au.linkedin.com/in/mattperdeck
Current project: JSNLog JavaScript Logging Package

Matt has over 9 years .NET and SQL Server development experience. Before getting into .Net, he worked on a number of systems, ranging from the largest ATM network in The Netherlands to embedded software in advanced Wide Area Networks and the largest ticketing web site in Australia. He has lived and worked in Australia, The Netherlands, Slovakia and Thailand.

He is the author of the book ASP.NET Performance Secrets (www.amazon.com/ASP-NET-Site-Performance-Secrets-Perdeck/dp/1849690685) in which he shows in clear and practical terms how to quickly find the biggest bottlenecks holding back the performance of your web site, and how to then remove those bottlenecks. The book deals with all environments affecting a web site - the web server, the database server and the browser.

Matt currently lives in Sydney, Australia. He recently worked at Readify and the global professional services company PwC. He now works at SP Health, a global provider of weight loss web sites such at CSIRO's TotalWellBeingDiet.com and BiggestLoserClub.com.

Comments and Discussions