Click here to Skip to main content
15,891,033 members
Articles / Programming Languages / C#

Combined Regular Expression Search

Rate me:
Please Sign up or sign in to vote.
4.98/5 (22 votes)
1 Apr 2014CPOL9 min read 57K   1.2K   37  
This article details the implementation of an efficient grouped regular expression searcher.
using MultiRegexSearcher;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace MultiRegexSearcher
{
    public static class NFAToDFA
    {
        public static FSM ConvertNFAToDFA(FSM NFA)
        {
            FSM DFA = new FSM();

            Queue<PowerSetElement> statesToCheck = new Queue<PowerSetElement>();
            var startingState = new PowerSetElement(new List<State> { NFA.StartingState });
            startingState.OutputTransitions = NFA.StartingState.OutputTransitions;
            statesToCheck.Enqueue(EpsilonClosureOf(startingState));
            DFA.States.Add(statesToCheck.Peek());
            DFA.StartingState = statesToCheck.Peek();

            PowerSetElement active;
            while (statesToCheck.Count != 0)
            {
                active = statesToCheck.Dequeue();
                //Get all possible transitions from any of the combined states
                HashSet<string> alphabetReach = new HashSet<string>();
                active.CombinedStates.ForEach(
                    state 
                        => 
                    EpsilonClosureOf(state).CombinedStates.ForEach(
                        reachedState 
                            =>
                        reachedState.OutputTransitions.ConvertAll(
                            trans 
                                => 
                            alphabetReach.Add(trans.Label))));
                foreach (string transLabel in alphabetReach)
                {
                    //See where I go with a specific label from this state. For that state find the epsilon-closure
                    //and add it to the que. 
                    if (transLabel == string.Empty)
                        continue;
                    HashSet<State> newCombinedStates = new HashSet<State>();
                    active.CombinedStates
                        .ForEach(state => state.OutputTransitions
                            .Where(trans => trans.Label == transLabel).ToList()
                                .ForEach(trans => EpsilonClosureOf(trans.TargetState).CombinedStates
                                    .ForEach(stateInClosureList => newCombinedStates.Add(stateInClosureList))));

                    PowerSetElement newCandidateDFAState = new PowerSetElement(newCombinedStates.ToList());
                    PowerSetElement newDFAState = (PowerSetElement)DFA.States.FirstOrDefault(powerset => powerset.Equals(newCandidateDFAState));
                    if (newDFAState == null)
                        newDFAState = newCandidateDFAState;
                    active.OutputTransitions.Add(new Transition(newDFAState, transLabel));
                    if (!DFA.States.Contains(newDFAState))
                    {
                        DFA.States.Add(newDFAState);
                        statesToCheck.Enqueue(newDFAState);
                    }
                }
            }

            DFA.MatchingStates.AddRange(DFA.States.Where(state => state.Type == State.StateType.Terminal));

            return DFA;
        }

        private static PowerSetElement EpsilonClosureOf(State theState)
        {
            var statesInImmeadiateEpsilonReach = theState.OutputTransitions
                            .Where(trans => trans.Label == string.Empty)
                            .Select(trans => trans.TargetState).ToList();
            HashSet<State> finalList = new HashSet<State>();
            statesInImmeadiateEpsilonReach
                .ForEach(stateInImmediateEpsilonReach 
                            => 
                        EpsilonClosureOf(stateInImmediateEpsilonReach).CombinedStates
                            .ForEach(state => finalList.Add(state)));
            finalList.Add(theState);
            return new PowerSetElement(finalList.ToList());
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer
Canada Canada
I'm a recent Msc Computer Science graduate with an interest in all things Machine Learning. I am in love with C# and having an affair with Python. My main interests in machine learning is NLP hence the Python affair. I enjoy classical music and for some reason am constantly fighting my body's call for exercise.

Comments and Discussions