Click here to Skip to main content
12,698,179 members (21,358 online)
Click here to Skip to main content


5 bookmarked

Collect and Compare Log Statistics using LogJoin

, 22 Oct 2013 BSD
The LogJoin tool helps to collect any unstructured data from text files and join it to a simple table representation for easy analysis.
 * Copyright (c) 2013, Yuriy Nelipovich
 * If you find this code useful or in case of any questions, suggestions
 * bug reports, donation, please email me:

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;

namespace LogJoin
    /// <summary>
    /// Logical source of records (like a relational table) having its own name,
    /// column names, and primary key column
    /// </summary>
    public class Source
        private readonly Input _input;
        private readonly string[] _keyGroupNames;
        private readonly Regex _recordRegex;

        /// <summary>
        /// Constructs new Source instance
        /// </summary>
        /// <param name="input">Text input that provides text portions for each record</param>
        /// <param name="name">Name of the source (like a name of relational table)</param>
        /// <param name="recordRegex">Regex that 'extracts' fields (column values) from each text portion</param>
        /// <param name="isMultilineRegex">True if regex is multiline</param>
        /// <param name="keyGroupNames">Names of fields that together represent unique key (like a primary key in relational table).
        /// Each name must be defined in <paramref name="recordRegex"/></param>
        /// <param name="otherColumnsNames">Names of other fields (columns in relational table).
        /// Each name must be defined in <paramref name="recordRegex"/></param>
        public Source(Input input, string name, string recordRegex, bool isMultilineRegex, string[] keyGroupNames,
                      string[] otherColumnsNames)
            this._input = input;
            this._recordRegex = new Regex(recordRegex, isMultilineRegex ? RegexOptions.Multiline : RegexOptions.None);
            this._keyGroupNames = keyGroupNames;
            this.OtherColumnsNames = otherColumnsNames;
            this.Name = name;

        public string Name { get; private set; }

        public string[] OtherColumnsNames { get; private set; }

        /// <summary>
        /// Gets all records extracted from the input
        /// </summary>
        /// <returns>Sequence of records</returns>
        public IEnumerable<Record> GetAllRecords()
            var recordNumber = 0;
            string portion;
            while ((portion = this._input.ReadTextPortion()) != null)
                var record = this.GetRecord(portion, recordNumber);
                if (record != null)
                    yield return record;

        /// <summary>
        /// Tries to extract a Record from given <paramref name="text"/> portion
        /// </summary>
        /// <param name="text">Text potion that may contain all field values</param>
        /// <param name="recordNumber">Number of text portion</param>
        /// <returns>A Record or null if text protion does not match</returns>
        private Record GetRecord(string text, int recordNumber)
            var match = this._recordRegex.Match(text);
            if (!match.Success)
                return null;
            string[] keyParts;
            keyParts = this._keyGroupNames != null
                           ? this._keyGroupNames.Select(gn => match.Groups[gn].Value).ToArray()
                           : new[] {recordNumber.ToString(CultureInfo.InvariantCulture)};
            if (keyParts.All(string.IsNullOrEmpty))
                throw new ApplicationException(string.Format("Key not found in '{0}' text: {1}", this.Name, text));
            return new Record(string.Join("~", keyParts))
                    OtherFields = this.OtherColumnsNames.Select(cn => match.Groups[cn].Value).ToArray()

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.


This article, along with any associated source code and files, is licensed under The BSD License


About the Author

Yuriy Nelipovich
Software Developer CactusSoft
Belarus Belarus
No Biography provided

You may also be interested in...

| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.170118.1 | Last Updated 22 Oct 2013
Article Copyright 2013 by Yuriy Nelipovich
Everything else Copyright © CodeProject, 1999-2017
Layout: fixed | fluid