Click here to Skip to main content
15,886,362 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
I have a list of predefined regex pattern strings (around `7 thousand` type of regex pattern to group similar type of messages).

Now I have two set to list one for `regex patterns` and another for `real messages` which contains some variable names.

I need to group all the similar messages and show those grouped messages, Now I have traverse 7000 regex patterns to group similar items in 1000 messages. It takes `m*n iterations` to find the correct groups.

To reduce the time of the processing, I have removed matched items from the list of messages. e.g `1000 - (matched items on the previous iteration)`.

What I have tried:

It is taking too much long time to process these two lists. To reduce the time, I have grouped it on message category type and processing them in parallel tasks.

    List<KBError> warningKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Warning").ToList();
    List<KBError> fatalKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Fatal").ToList();
    List<KBError> severeKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Severe").ToList();
    List<KBError> cbeccErrorKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Error").ToList();
    
    //Remove All error message which should be processed
    errors.RemoveAll(error => !processingErrorType.HasFlag(error.ErrorType));
    
    List<Error> warningErrors = errors.Where(kbErr => kbErr.ErrorType == ErrorType.Warning).ToList();
    List<Error> fatalErrors = errors.Where(kbErr => kbErr.ErrorType == ErrorType.Fatal).ToList();
    List<Error> severeErrors = errors.Where(kbErr => kbErr.ErrorType == ErrorType.Severe).ToList();
    List<Error> cbeccErrors = errors.Where(kbErr => kbErr.ErrorType ==ErrorType.Error).ToList();

After that these messages are processed in the parallel task by partitioning them in the equal subset of items. 

    Func<List<KBError>, List<Error>, List<Error>> FindDistinctErrorMessages = (filteredKBErros, filteredErros) =>
    {
        ConcurrentBag<Error> errorsList = new ConcurrentBag<Error>();
    
    
        object lockObject = new object();
    
        System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
        sw.Start();
    
    
        Parallel.For(0, filteredKBErros.Count,
            () => new Dictionary<KBError, List<Error>>(),
            (x, loopState, kpErrorResult) =>
            {
                kpErrorResult.Add(filteredKBErros[(int)x], filteredErros
                    .Where(error => Regex.IsMatch(error.ErrorMessage,
                        filteredKBErros[(int)x].ErrorMessage, System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace)).ToList());
                return kpErrorResult;
            },
            (kpErrorResult) =>
            {
                lock (lockObject)
                {
                    foreach (KeyValuePair<KBError, List<Error>> errorResult in kpErrorResult)
                    {
                        if (errorResult.Value.Count > 0)
                        {
                            Error error = null;
                            if (errorResult.Value.Count == 1)
                            {
                                error = errorResult.Value.First();
                            }
                            else
                            {
                                error = new Error();
                                error.ErrorMessage = errorResult.Value.First().ErrorMessage;                                         
                                error.Errors = errorResult.Value;
                                error.ErrorType = errorResult.Value.First().ErrorType;
                            }
                            error.ErrorCount = errorResult.Value.Count;
                            error.ErrorCode = errorResult.Key.ErrorCode;
                            AddErrorResolutionMessage(error, errorResult.Key);
                            error.ErrorMessagePattern = errorResult.Key.ErrorMessage;
                            errors.Add(error);
                            errorResult.Value.ForEach(err => errors.Remove(err));
                        }
                    }
                }
            }
            );
        sw.Stop();
        System.Diagnostics.Debug.WriteLine(string.Format("Completed in {0} seconds", sw.Elapsed.TotalSeconds));
    
        return errors.ToList();
    
    };
    
    
    //Filter the Warning KB List
    List<KBError> filteredWarningKBList = FilterKBList(warningKBErrors, warningErrors);
    List<KBError> filteredSevereKBList = FilterKBList(severeKBErrors, severeErrors);
    List<KBError> filteredFatalKBList = FilterKBList(fatalKBErrors, fatalErrors);
    List<KBError> filteredcbeccErrorsKBList = FilterKBList(cbeccErrorKBErrors, cbeccErrors);
    
    
    List<Task<List<Error>>> tasks = new List<Task<List<Error>>>();
    
    if (warningErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Warning) || processingErrorType.Equals(ErrorType.All)))
    {
        int equalCounts = warningErrors.Count < 10 ? 1 : warningErrors.Count / 10;
        foreach (IEnumerable<Error> subSet in warningErrors.Split(equalCounts))
        {
            tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredWarningKBList, subSet.ToList()), CancellationToken.None));
        }
    }
    
    if (severeErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Severe) || processingErrorType == ErrorType.All))
    {
        int equalCounts = severeErrors.Count < 10 ? 1 : severeErrors.Count / 10;
        foreach (IEnumerable<Error> subSet in severeErrors.Split(equalCounts))
        {
            tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredSevereKBList, subSet.ToList()), CancellationToken.None));
        }
    }
    
    if (fatalErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Fatal) || processingErrorType.Equals(ErrorType.All)))
    {
        int equalCounts = fatalErrors.Count < 10 ? 1 : fatalErrors.Count / 10;
        foreach (IEnumerable<Error> subSet in fatalErrors.Split(equalCounts))
        {
            tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredFatalKBList, subSet.ToList()), CancellationToken.None));
        }
    }
    
    if (cbeccErrors.Count > 0 && (processingErrorType.HasFlag(ErrorType.Error) || processingErrorType.Equals(ErrorType.All)))
    {
        int equalCounts = cbeccErrors.Count < 10 ? 1 : cbeccErrors.Count / 10;
        foreach (IEnumerable<Error> subSet in cbeccErrors.Split(equalCounts))
        {
            tasks.Add(Task.Run<List<Error>>(() => FindDistinctErrorMessages(filteredcbeccErrorsKBList, subSet.ToList()), CancellationToken.None));
        }
    }


After starting these task, It takes lots of time to complete these tasks. wait statement for these created tasks somehow put the application in the hang state.

try
{
    List<Error> result = new List<Error>();
    Task.WaitAll(tasks.ToArray());
    foreach (var task in tasks)
    {
        result.AddRange(task.Result);
    }
    result = result.Distinct().ToList();
    result.GroupBy(res => res.ErrorMessagePattern).ToList()
        .ForEach(grp =>
        {
            Error error = grp.First();
            error.ErrorCount = grp.Sum(r => r.ErrorCount);
            if (grp.Count() > 1)
            {
                grp.ToList().ForEach(grpElement =>
                {
                    if (grpElement != error)
                    {
                        if (error.Errors == null)
                            error.Errors = new List<Error>();
                        grpElement.ErrorCount = 1;

                        if (grpElement.Errors != null && grpElement.Errors.Count > 0)
                        {
                            error.Errors.AddRange(grpElement.Errors);
                            grpElement.Errors = null;
                        }
                    }
                });
            }
            distinctErrors.Add(error);
        });
}
finally
{

}

errors.ForEach(error =>
{
    error.ErrorCount = 1;
    AddErrorResolutionMessage(error, null);
    distinctErrors.Add(error);

    if (error.PossibleResolution == "Not Found")
        logMessage.AppendLine(error.ErrorMessage);

});




> Is there any better way or algorithm to reduce the time of processing
> these lists and reduce the time complexity of the process rather
> processing mxn elements?
Posted
Updated 4-Sep-17 2:44am

1 solution

C#
List<KBError> warningKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Warning").ToList();
List<KBError> fatalKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Fatal").ToList();
List<KBError> severeKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Severe").ToList();
List<KBError> cbeccErrorKBErrors = distinctKBErrors.Where(kbErr => kbErr.ErrorType == "Error").ToList();

You are taking a performance hit with all the .ToList()s. Linq uses iterators for performance and you're removing that benefit. Have you tried removing them?

RegEx can also give you a performance hit if not correctly optimised. Check and optimise. This may help: Compilation and Reuse in Regular Expressions | Microsoft Docs[^]

LAstly, you could optimise you Parallel code. Have a read of this thread: c# - How to improve throughput on Parallel.ForEach - Stack Overflow[^]
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900