Click here to Skip to main content
15,884,537 members
Articles / General Programming / Regular Expressions

Collect and Compare Log Statistics using LogJoin

Rate me:
Please Sign up or sign in to vote.
4.83/5 (3 votes)
22 Oct 2013BSD4 min read 7.8K   88   5  
The LogJoin tool helps to collect any unstructured data from text files and join it to a simple table representation for easy analysis.
/*
 * Copyright (c) 2013, Yuriy Nelipovich
 * 
 * If you find this code useful or in case of any questions, suggestions
 * bug reports, donation, please email me: dev.yuriy.n@gmail.com
 */

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;

namespace LogJoin
{
    /// <summary>
    /// Joins multiple sources by the key (using Left Outer Join) and outputs
    /// result to text file
    /// </summary>
    public class Join
    {
        private const string ColumnNameDelimiter = ".";
        private readonly string _separator;
        private readonly IList<Source> _sources;

        /// <summary>
        /// Constructs new instance
        /// </summary>
        /// <param name="sources">Sources to join</param>
        /// <param name="separator">Separates values of each record in output file</param>
        public Join(IList<Source> sources, string separator)
        {
            this._sources = sources;
            this._separator = separator;
        }

        /// <summary>
        /// Do the job
        /// </summary>
        /// <param name="output">Destination</param>
        public void Export(Stream output)
        {
            using (var writer = new StreamWriter(output))
            {
                this.WriteHeader(writer);

                var allRecords = JoinSources(this._sources);

                if (allRecords != null)
                {
                    var count = this.WriteRecords(allRecords, writer);
                    Console.WriteLine("Total: {0} records.", count);
                }
            }
        }

        private void WriteHeader(TextWriter writer)
        {
            var columns =
                new[] {"Key"}
                    .Concat(
                        this._sources
                            .Select(s => s.OtherColumnsNames.Select(c => s.Name + ColumnNameDelimiter + c))
                            .SelectMany(_ => _)
                    );
            var header = string.Join(this._separator, columns);
            writer.WriteLine(header);
        }

        /// <summary>
        /// Joins multiple sources by their keys
        /// </summary>
        /// <param name="sources">the sources</param>
        /// <returns>Sequence of joined records</returns>
        private static IEnumerable<Record> JoinSources(IEnumerable<Source> sources)
        {
            IEnumerable<Record> allRecords = null;
            foreach (var source in sources)
            {
                if (allRecords == null)
                    allRecords = source.GetAllRecords().GroupBy(_ => _.Key, (key, group) => @group.Last());
                else
                {
                    Source newSource = source;
                    allRecords = allRecords
                        .GroupJoin(newSource.GetAllRecords(), _ => _.Key, _ => _.Key,
                                   (r, group) => new Record(r.Key)
                                       {
                                           OtherFields =
                                               r.OtherFields.Concat(
                                                   @group.Any()
                                                       ? @group.Last().OtherFields
                                                       : newSource.OtherColumnsNames.Select(_ => string.Empty)
                                                     ).ToArray()
                                       });
                }
            }
            return allRecords;
        }

        private long WriteRecords(IEnumerable<Record> allRecords, TextWriter writer)
        {
            var s = Stopwatch.StartNew();
            long count = 0;
            foreach (var record in allRecords)
            {
                var line = string.Join(this._separator, new[] {record.Key}.Concat(record.OtherFields));
                writer.WriteLine(line);
                count++;
                if (s.Elapsed.TotalSeconds > 3)
                {
                    Console.WriteLine("{0} records written so far...", count);
                    s = Stopwatch.StartNew();
                }
            }
            return count;
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The BSD License


Written By
Software Developer CactusSoft
Belarus Belarus
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions