|
/*
* Copyright (c) 2013, Yuriy Nelipovich
*
* If you find this code useful or in case of any questions, suggestions
* bug reports, donation, please email me: dev.yuriy.n@gmail.com
*/
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
namespace LogJoin
{
/// <summary>
/// Joins multiple sources by the key (using Left Outer Join) and outputs
/// result to text file
/// </summary>
public class Join
{
private const string ColumnNameDelimiter = ".";
private readonly string _separator;
private readonly IList<Source> _sources;
/// <summary>
/// Constructs new instance
/// </summary>
/// <param name="sources">Sources to join</param>
/// <param name="separator">Separates values of each record in output file</param>
public Join(IList<Source> sources, string separator)
{
this._sources = sources;
this._separator = separator;
}
/// <summary>
/// Do the job
/// </summary>
/// <param name="output">Destination</param>
public void Export(Stream output)
{
using (var writer = new StreamWriter(output))
{
this.WriteHeader(writer);
var allRecords = JoinSources(this._sources);
if (allRecords != null)
{
var count = this.WriteRecords(allRecords, writer);
Console.WriteLine("Total: {0} records.", count);
}
}
}
private void WriteHeader(TextWriter writer)
{
var columns =
new[] {"Key"}
.Concat(
this._sources
.Select(s => s.OtherColumnsNames.Select(c => s.Name + ColumnNameDelimiter + c))
.SelectMany(_ => _)
);
var header = string.Join(this._separator, columns);
writer.WriteLine(header);
}
/// <summary>
/// Joins multiple sources by their keys
/// </summary>
/// <param name="sources">the sources</param>
/// <returns>Sequence of joined records</returns>
private static IEnumerable<Record> JoinSources(IEnumerable<Source> sources)
{
IEnumerable<Record> allRecords = null;
foreach (var source in sources)
{
if (allRecords == null)
allRecords = source.GetAllRecords().GroupBy(_ => _.Key, (key, group) => @group.Last());
else
{
Source newSource = source;
allRecords = allRecords
.GroupJoin(newSource.GetAllRecords(), _ => _.Key, _ => _.Key,
(r, group) => new Record(r.Key)
{
OtherFields =
r.OtherFields.Concat(
@group.Any()
? @group.Last().OtherFields
: newSource.OtherColumnsNames.Select(_ => string.Empty)
).ToArray()
});
}
}
return allRecords;
}
private long WriteRecords(IEnumerable<Record> allRecords, TextWriter writer)
{
var s = Stopwatch.StartNew();
long count = 0;
foreach (var record in allRecords)
{
var line = string.Join(this._separator, new[] {record.Key}.Concat(record.OtherFields));
writer.WriteLine(line);
count++;
if (s.Elapsed.TotalSeconds > 3)
{
Console.WriteLine("{0} records written so far...", count);
s = Stopwatch.StartNew();
}
}
return count;
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.