Click here to Skip to main content
15,897,226 members
Articles / Programming Languages / C#

LINQ to CSV library

Rate me:
Please Sign up or sign in to vote.
4.97/5 (217 votes)
10 Jan 2015Apache23 min read 1M   482  
Easy to use library to use CSV and tab delimited files with LINQ queries.
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;

namespace LINQtoCSV
{
    /// <summary>
    /// Based on code found at
    /// http://knab.ws/blog/index.php?/archives/3-CSV-file-parser-and-writer-in-C-Part-1.html
    /// and
    /// http://knab.ws/blog/index.php?/archives/10-CSV-file-parser-and-writer-in-C-Part-2.html
    /// </summary>
    internal class CsvStream
    {
        private TextReader m_instream;
        private TextWriter m_outStream;
        private char m_SeparatorChar;
        private char[] m_SpecialChars;

        // Current line number in the file. Only used when reading a file, not when writing a file.
        private int m_lineNbr;

        /// ///////////////////////////////////////////////////////////////////////
        /// CsvStream
        /// 
        public CsvStream(TextReader inStream, TextWriter outStream, char SeparatorChar)
        {
            m_instream = inStream;
            m_outStream = outStream;
            m_SeparatorChar = SeparatorChar;
            m_SpecialChars = ("\"\x0A\x0D" + m_SeparatorChar.ToString()).ToCharArray();
            m_lineNbr = 1;
        }

        /// ///////////////////////////////////////////////////////////////////////
        /// WriteRow
        /// 
        public void WriteRow(List<string> row, bool quoteAllFields)
        {
            bool firstItem = true;
            foreach (string item in row)
            {
                if (!firstItem) { m_outStream.Write(m_SeparatorChar); }

                // If the item is null, don't write anything.
                if (item != null)
                {
                    // If user always wants quoting, or if the item has special chars
                    // (such as "), or if item is the empty string or consists solely of
                    // white space, surround the item with quotes.

                    if ((quoteAllFields ||
                        (item.IndexOfAny(m_SpecialChars) > -1) ||
                        (item.Trim() == "")))
                    {
                        m_outStream.Write("\"" + item.Replace("\"", "\"\"") + "\"");
                    }
                    else
                    {
                        m_outStream.Write(item);
                    }
                }

                firstItem = false;
            }

            m_outStream.WriteLine("");
        }


        /// ///////////////////////////////////////////////////////////////////////
        /// ReadRow
        /// 
        /// <summary>
        /// 
        /// </summary>
        /// <param name="row">
        /// Contains the values in the current row, in the order in which they 
        /// appear in the file.
        /// </param>
        /// <returns>
        /// True if a row was returned in parameter "row".
        /// False if no row returned. In that case, you're at the end of the file.
        /// </returns>
        public bool ReadRow(ref IDataRow row)
        {
            row.Clear();

            while (true)
            {
                // Number of the line where the item starts. Note that an item
                // can span multiple lines.
                int startingLineNbr = m_lineNbr;

                string item = null;

                bool moreAvailable = GetNextItem(ref item);
                if (!moreAvailable)
                {
                    return (row.Count > 0);
                }
                row.Add(new DataRowItem(item, startingLineNbr));
            }
        }

        private bool EOS = false;
        private bool EOL = false;
        private bool previousWasCr = false;

        private bool GetNextItem(ref string itemString)
        {
            itemString = null;
            if (EOL)
            {
                // previous item was last in line, start new line
                EOL = false;
                return false;
            }

            bool itemFound = false;
            bool quoted = false;
            bool predata = true;
            bool postdata = false;
            StringBuilder item = new StringBuilder();

            while (true)
            {
                char c = GetNextChar(true);
                if (EOS)
                {
                    if (itemFound) { itemString = item.ToString(); }
                    return itemFound;
                }

                // ---------
                // Keep track of line number. 
                // Note that line breaks can happen within a quoted field, not just at the
                // end of a record.

                // Don't count 0D0A as two line breaks.
                if ((!previousWasCr) && (c == '\x0A'))
                {
                    m_lineNbr++;
                }

                if (c == '\x0D')
                {
                    m_lineNbr++;
                    previousWasCr = true;
                }
                else
                {
                    previousWasCr = false;
                }

                // ---------

                if ((postdata || !quoted) && c == m_SeparatorChar)
                {
                    // end of item, return
                    if (itemFound) { itemString = item.ToString(); }
                    return true;
                }

                if ((predata || postdata || !quoted) && (c == '\x0A' || c == '\x0D'))
                {
                    // we are at the end of the line, eat newline characters and exit
                    EOL = true;
                    if (c == '\x0D' && GetNextChar(false) == '\x0A')
                    {
                        // new line sequence is 0D0A
                        GetNextChar(true);
                    }

                    if (itemFound) { itemString = item.ToString(); }
                    return true;
                }

                if (predata && c == ' ')
                    // whitespace preceeding data, discard
                    continue;

                if (predata && c == '"')
                {
                    // quoted data is starting
                    quoted = true;
                    predata = false;
                    itemFound = true;
                    continue;
                }

                if (predata)
                {
                    // data is starting without quotes
                    predata = false;
                    item.Append(c);
                    itemFound = true;
                    continue;
                }

                if (c == '"' && quoted)
                {
                    if (GetNextChar(false) == '"')
                    {
                        // double quotes within quoted string means add a quote       
                        item.Append(GetNextChar(true));
                    }
                    else
                    {
                        // end-quote reached
                        postdata = true;
                    }

                    continue;
                }

                // all cases covered, character must be data
                item.Append(c);
            }
        }

        private char[] buffer = new char[4096];
        private int pos = 0;
        private int length = 0;

        private char GetNextChar(bool eat)
        {
            if (pos >= length)
            {
                length = m_instream.ReadBlock(buffer, 0, buffer.Length);
                if (length == 0)
                {
                    EOS = true;
                    return '\0';
                }
                pos = 0;
            }
            if (eat)
                return buffer[pos++];
            else
                return buffer[pos];
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Apache License, Version 2.0


Written By
Architect
Australia Australia
Twitter: @MattPerdeck
LinkedIn: au.linkedin.com/in/mattperdeck
Current project: JSNLog JavaScript Logging Package

Matt has over 9 years .NET and SQL Server development experience. Before getting into .Net, he worked on a number of systems, ranging from the largest ATM network in The Netherlands to embedded software in advanced Wide Area Networks and the largest ticketing web site in Australia. He has lived and worked in Australia, The Netherlands, Slovakia and Thailand.

He is the author of the book ASP.NET Performance Secrets (www.amazon.com/ASP-NET-Site-Performance-Secrets-Perdeck/dp/1849690685) in which he shows in clear and practical terms how to quickly find the biggest bottlenecks holding back the performance of your web site, and how to then remove those bottlenecks. The book deals with all environments affecting a web site - the web server, the database server and the browser.

Matt currently lives in Sydney, Australia. He recently worked at Readify and the global professional services company PwC. He now works at SP Health, a global provider of weight loss web sites such at CSIRO's TotalWellBeingDiet.com and BiggestLoserClub.com.

Comments and Discussions