Click here to Skip to main content
Click here to Skip to main content
Articles » Database » Database » Utilities » Downloads
 
Add your own
alternative version

A Fast CSV Reader

, 10 Nov 2011
A reader that provides fast, non-cached, forward-only access to CSV data.
csvreader11_demo.zip
CsvReader11_demo
CsvReader11_demo
CsvReader11_demo
CsvReaderBenchmarks
App.ico
data.rar
CsvReaderDemo
App.ico
LumenWorks.Framework.IO
Csv
Events
Exceptions
Resources
LumenWorks.Framework.Tests.Unit
IO
Csv
CsvReaderBenchmarks
App.ico
data.rar
CsvReaderDemo
App.ico
LumenWorks.Framework.IO
Csv
Events
Exceptions
Resources
LumenWorks.Framework.Tests.Unit
IO
Csv
CsvReaderBenchmarks
App.ico
data.rar
CsvReaderDemo
App.ico
LumenWorks.Framework.IO
Csv
Events
Exceptions
Resources
LumenWorks.Framework.Tests.Unit
IO
Csv
csvreader11_src.zip
CsvReader11_src
CsvReader11_src
CsvReader11_src
LumenWorks.Framework.IO
Csv
Events
Exceptions
Resources
LumenWorks.Framework.Tests.Unit
IO
Csv
LumenWorks.Framework.IO
Csv
Events
Exceptions
Resources
LumenWorks.Framework.Tests.Unit
IO
Csv
LumenWorks.Framework.IO
Csv
Events
Exceptions
Resources
LumenWorks.Framework.Tests.Unit
IO
Csv
csvreader20_demo.zip
CsvReader20_demo
CsvReader20_demo
CsvReader20_demo
CsvReaderBenchmarks
bin
Release
CsvReaderBenchmarks.exe
CsvReaderBenchmarks.vshost.exe
LumenWorks.Framework.IO.dll
data.rar
Properties
CsvReaderDemo
bin
Release
CsvReaderDemo.exe
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
Properties
Settings.settings
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Events
Exceptions
Resources
Properties
LumenWorks.Framework.Tests.Unit
bin
Release
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
IO
Csv
Properties
CsvReaderBenchmarks
bin
Release
CsvReaderBenchmarks.exe
LumenWorks.Framework.IO.dll
data.rar
Properties
CsvReaderDemo
bin
Release
CsvReaderDemo.exe
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
Properties
Settings.settings
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Events
Exceptions
Resources
Properties
LumenWorks.Framework.Tests.Unit
bin
Release
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
IO
Csv
Properties
CsvReaderBenchmarks
bin
Release
CsvReaderBenchmarks.exe
LumenWorks.Framework.IO.dll
data.rar
Properties
CsvReaderDemo
bin
Release
CsvReaderDemo.exe
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
Properties
Settings.settings
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Events
Exceptions
Resources
Properties
LumenWorks.Framework.Tests.Unit
bin
Release
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
IO
Csv
Properties
csvreader20_src.zip
CsvReader20_src
CsvReader20_src
CsvReader20_src
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Events
Exceptions
Resources
Properties
LumenWorks.Framework.Tests.Unit
bin
Release
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
IO
Csv
Properties
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Events
Exceptions
Resources
Properties
LumenWorks.Framework.Tests.Unit
bin
Release
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
IO
Csv
Properties
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Events
Exceptions
Resources
Properties
LumenWorks.Framework.Tests.Unit
bin
Release
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
IO
Csv
Properties
CsvReader_bin.zip
csvreader_demo.zip
CsvReaderDemo
CsvReaderDemo
bin
Release
CsvReaderDemo.exe
CsvReaderDemo.vshost.exe
LumenWorks.Framework.IO.dll
data.rar
Properties
CsvReaderTests
bin
Release
CsvReaderTests.dll
LumenWorks.Framework.IO.dll
Properties
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Exceptions
Resources
Properties
CsvReader_Profiler.zip
CsvReader_src.zip
bin
Release
CsvReaderDemo.exe
CsvReaderDemo.vshost.exe
CsvReaderDemo.vshost.exe.manifest
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
Properties
Settings.settings
CsvReaderDemoWeb
bin
CsvReaderDemoWeb.dll
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
Properties
LumenWorks.Framework.IO
bin
Release
LumenWorks.Framework.IO.dll
Csv
Events
Exceptions
Resources
LumenWorks.Framework.snk
Properties
LumenWorks.Framework.Tests.Unit
bin
Release
LumenWorks.Framework.IO.dll
LumenWorks.Framework.Tests.Unit.dll
IO
Csv
Properties
CsvReaderBenchmarks
bin
Release
CsvReaderBenchmarks.exe
CsvReaderBenchmarks.vshost.exe
CsvReaderBenchmarks.vshost.exe.manifest
LumenWorks.Framework.IO.dll
data.rar
Properties
//	LumenWorks.Framework.Tests.Unit.IO.CSV.CsvReaderMalformedTest
//	Copyright (c) 2005 S�bastien Lorion
//
//	MIT license (http://en.wikipedia.org/wiki/MIT_License)
//
//	Permission is hereby granted, free of charge, to any person obtaining a copy
//	of this software and associated documentation files (the "Software"), to deal
//	in the Software without restriction, including without limitation the rights 
//	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
//	of the Software, and to permit persons to whom the Software is furnished to do so, 
//	subject to the following conditions:
//
//	The above copyright notice and this permission notice shall be included in all 
//	copies or substantial portions of the Software.
//
//	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
//	INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
//	PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE 
//	FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
//	ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


// A special thanks goes to "shriop" at CodeProject for providing many of the standard and Unicode parsing tests.


using System;
using System.Globalization;
using System.IO;
using System.Text;

using NUnit.Framework;

using LumenWorks.Framework.IO.Csv;

namespace LumenWorks.Framework.Tests.Unit.IO.Csv
{
	[TestFixture()]
	public class CsvReaderMalformedTest
	{
		#region Utilities

		private void CheckMissingFieldUnquoted(long recordCount, int fieldCount, long badRecordIndex, int badFieldIndex, int bufferSize)
		{
			CheckMissingFieldUnquoted(recordCount, fieldCount, badRecordIndex, badFieldIndex, bufferSize, true, MissingFieldAction.ParseError);
			CheckMissingFieldUnquoted(recordCount, fieldCount, badRecordIndex, badFieldIndex, bufferSize, true, MissingFieldAction.ReplaceByEmpty);
			CheckMissingFieldUnquoted(recordCount, fieldCount, badRecordIndex, badFieldIndex, bufferSize, true, MissingFieldAction.ReplaceByNull);

			CheckMissingFieldUnquoted(recordCount, fieldCount, badRecordIndex, badFieldIndex, bufferSize, false, MissingFieldAction.ParseError);
			CheckMissingFieldUnquoted(recordCount, fieldCount, badRecordIndex, badFieldIndex, bufferSize, false, MissingFieldAction.ReplaceByEmpty);
			CheckMissingFieldUnquoted(recordCount, fieldCount, badRecordIndex, badFieldIndex, bufferSize, false, MissingFieldAction.ReplaceByNull);
		}

		private void CheckMissingFieldUnquoted(long recordCount, int fieldCount, long badRecordIndex, int badFieldIndex, int bufferSize, bool sequentialAccess, MissingFieldAction action)
		{
			// construct the csv data with template "00,01,02\n10,11,12\n...." and calculate expected error position

			long capacity = recordCount * (fieldCount * 2 + fieldCount - 1) + recordCount;
			Assert.IsTrue(capacity <= int.MaxValue);

			StringBuilder sb = new StringBuilder((int) capacity);
			int expectedErrorPosition = 0;

			for (long i = 0; i < recordCount; i++)
			{
				int realFieldCount;

				if (i == badRecordIndex)
					realFieldCount = badFieldIndex;
				else
					realFieldCount = fieldCount;

				for (int j = 0; j < realFieldCount; j++)
				{
					sb.Append(i);
					sb.Append(j);
					sb.Append(CsvReader.DefaultDelimiter);
				}

				sb.Length--;
				sb.Append('\n');

				if (i == badRecordIndex)
				{
					expectedErrorPosition = sb.Length % bufferSize;

					// when eof is true, buffer is cleared and position is reset to 0, so exception will have CurrentPosition = 0
					if (i == recordCount - 1)
						expectedErrorPosition = 0;
				}
			}

			// test csv

			using (CsvReader csv = new CsvReader(new StringReader(sb.ToString()), false, bufferSize))
			{
				csv.MissingFieldAction = action;
				Assert.AreEqual(fieldCount, csv.FieldCount);

				while (csv.ReadNextRecord())
				{
					Assert.AreEqual(fieldCount, csv.FieldCount);

					// if not sequential, directly test the missing field
					if (!sequentialAccess)
						CheckMissingFieldValueUnquoted(csv, badFieldIndex, badRecordIndex, badFieldIndex, expectedErrorPosition, sequentialAccess, action);

					for (int i = 0; i < csv.FieldCount; i++)
						CheckMissingFieldValueUnquoted(csv, i, badRecordIndex, badFieldIndex, expectedErrorPosition, sequentialAccess, action);
				}
			}
		}

		private void CheckMissingFieldValueUnquoted(CsvReader csv, int fieldIndex, long badRecordIndex, int badFieldIndex, int expectedErrorPosition, bool sequentialAccess, MissingFieldAction action)
		{
			const string Message = "RecordIndex={0}; FieldIndex={1}; Position={2}; Sequential={3}; Action={4}";

			// make sure s contains garbage as to not have false successes
			string s = "asdfasdfasdf";

			try
			{
				s = csv[fieldIndex];
			}
			catch (MissingFieldCsvException ex)
			{
				Assert.AreEqual(badRecordIndex, ex.CurrentRecordIndex, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action);
				Assert.IsTrue(fieldIndex >= badFieldIndex, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action);
				Assert.AreEqual(expectedErrorPosition, ex.CurrentPosition, Message, ex.CurrentRecordIndex, ex.CurrentFieldIndex, ex.CurrentPosition, sequentialAccess, action);

				return;
			}

			if (csv.CurrentRecordIndex != badRecordIndex || fieldIndex < badFieldIndex)
				Assert.AreEqual(csv.CurrentRecordIndex.ToString() + fieldIndex.ToString(), s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action);
			else
			{
				switch (action)
				{
					case MissingFieldAction.ReplaceByEmpty:
						Assert.AreEqual(string.Empty, s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action);
						break;

					case MissingFieldAction.ReplaceByNull:
						Assert.IsNull(s, Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action);
						break;

					case MissingFieldAction.ParseError:
						Assert.Fail("Failed to throw ParseError. - " + Message, csv.CurrentRecordIndex, fieldIndex, -1, sequentialAccess, action);
						break;

					default:
						Assert.Fail("'{0}' is not handled by this test.", action);
						break;
				}
			}
		}

		#endregion

		[Test()]
		public void MissingFieldUnquotedTest1()
		{
			CheckMissingFieldUnquoted(4, 4, 2, 2, CsvReader.DefaultBufferSize);
			CheckMissingFieldUnquoted(4, 4, 2, 2, CsvReader.DefaultBufferSize);
		}

		[Test()]
		public void MissingFieldUnquotedTest2()
		{
			// With bufferSize = 16, faulty new line char is at the start of next buffer read
			CheckMissingFieldUnquoted(4, 4, 2, 3, 16);
		}

		[Test()]
		public void MissingFieldUnquotedTest3()
		{
			// test missing field when end of buffer has been reached
			CheckMissingFieldUnquoted(3, 4, 2, 3, 16);
		}

		[Test()]
		[ExpectedException(typeof(MissingFieldCsvException))]
		public void MissingFieldQuotedTest1()
		{
			const string Data = "a,b,c,d\n1,1,1,1\n2,\"2\"\n3,3,3,3";

			try
			{
				using (CsvReader csv = new CsvReader(new StringReader(Data), false))
				{
					while (csv.ReadNextRecord())
						for (int i = 0; i < csv.FieldCount; i++)
						{
							string s = csv[i];
						}
				}
			}
			catch (MissingFieldCsvException ex)
			{
				if (ex.CurrentRecordIndex == 2 && ex.CurrentFieldIndex == 2 && ex.CurrentPosition == 22)
					throw ex;
			}
		}

		[Test()]
		[ExpectedException(typeof(MissingFieldCsvException))]
		public void MissingFieldQuotedTest2()
		{
			const string Data = "a,b,c,d\n1,1,1,1\n2,\"2\",\n3,3,3,3";

			try
			{
				using (CsvReader csv = new CsvReader(new StringReader(Data), false, 11))
				{
					while (csv.ReadNextRecord())
						for (int i = 0; i < csv.FieldCount; i++)
						{
							string s = csv[i];
						}
				}
			}
			catch (MissingFieldCsvException ex)
			{
				if (ex.CurrentRecordIndex == 2 && ex.CurrentFieldIndex == 2 && ex.CurrentPosition == 1)
					throw ex;
			}
		}

		[Test()]
		[ExpectedException(typeof(MissingFieldCsvException))]
		public void MissingFieldQuotedTest3()
		{
			const string Data = "a,b,c,d\n1,1,1,1\n2,\"2\"\n\"3\",3,3,3";

			try
			{
				using (CsvReader csv = new CsvReader(new StringReader(Data), false))
				{
					while (csv.ReadNextRecord())
						for (int i = 0; i < csv.FieldCount; i++)
						{
							string s = csv[i];
						}
				}
			}
			catch (MissingFieldCsvException ex)
			{
				if (ex.CurrentRecordIndex == 2 && ex.CurrentFieldIndex == 2 && ex.CurrentPosition == 22)
					throw ex;
			}
		}

		[Test()]
		[ExpectedException(typeof(MissingFieldCsvException))]
		public void MissingFieldQuotedTest4()
		{
			const string Data = "a,b,c,d\n1,1,1,1\n2,\"2\",\n\"3\",3,3,3";

			try
			{
				using (CsvReader csv = new CsvReader(new StringReader(Data), false, 11))
				{
					while (csv.ReadNextRecord())
						for (int i = 0; i < csv.FieldCount; i++)
						{
							string s = csv[i];
						}
				}
			}
			catch (MissingFieldCsvException ex)
			{
				if (ex.CurrentRecordIndex == 2 && ex.CurrentFieldIndex == 2 && ex.CurrentPosition == 1)
					throw ex;
			}
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License

About the Author

Sebastien Lorion
Architect
Canada Canada
Sébastien Lorion is software architect as day job.
 
He is also a musician, actually singing outside the shower Smile | :)
 
He needs constant mental and emotional stimulation, so all of this might change someday ...

| Advertise | Privacy | Mobile
Web01 | 2.8.140721.1 | Last Updated 10 Nov 2011
Article Copyright 2005 by Sebastien Lorion
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid