using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Windows.Forms;
using System.Data;
using System.Diagnostics;
namespace RegularExpression
{
/// <summary>
/// the regular expression recognizer class
/// </summary>
public class RegEx
{
/// <summary>
/// Used to validate the pattern string and make sure it is in correct form
/// </summary>
static private RegExValidator m_reValidator = new RegExValidator();
/// <summary>
/// Position of the last error in the pattern string.
///
/// </summary>
private int m_nLastErrorIndex = -1;
/// <summary>
/// Length of the error substring in the pattern string
/// </summary>
private int m_nLastErrorLength = -1;
/// <summary>
/// ErrorCode indicating what if the last compilation succeded.
/// </summary>
private ErrorCode m_LastErrorCode = ErrorCode.ERR_SUCCESS;
/// <summary>
/// Specifies match must occure at the begining of the input string (^)
/// </summary>
private bool m_bMatchAtStart = false;
/// <summary>
/// Specifies match must occure at the end of the input string ($)
/// </summary>
private bool m_bMatchAtEnd = false;
/// <summary>
/// Behave greedy, default is true.
/// If true, Find will stop at the first accepting state, otherwisie it will try process more charectr
/// </summary>
private bool m_bGreedy = true;
/// <summary>
/// Start state of the DFA M'.
/// </summary>
private State m_stateStartDfaM = null;
/// <summary>
/// public default constructor
/// </summary>
public RegEx()
{
}
/// <summary>
/// to keep set of distinct input symbol. needed for NFA to DFA conversion
/// </summary>
private Set m_setInputSymbol = new Set();
/// <summary>
/// Converts a regular expression from infix form to postfix form
/// </summary>
/// <param name="sInfixPattern">regular expression in infix form</param>
/// <returns>regular expression in postfix form</returns>
private string ConvertToPostfix(string sInfixPattern)
{
Stack stackOperator = new Stack();
Queue queuePostfix = new Queue();
bool bEscape = false;
for (int i = 0; i < sInfixPattern.Length; i++)
{
char ch = sInfixPattern[i];
if (bEscape == false && ch == MetaSymbol.ESCAPE)
{
queuePostfix.Enqueue(ch);
bEscape = true;
continue;
}
if (bEscape == true)
{
queuePostfix.Enqueue(ch);
bEscape = false;
continue;
}
switch (ch)
{
case MetaSymbol.OPEN_PREN:
stackOperator.Push(ch);
break;
case MetaSymbol.CLOSE_PREN:
while ((char)stackOperator.Peek() != MetaSymbol.OPEN_PREN)
{
queuePostfix.Enqueue(stackOperator.Pop());
}
stackOperator.Pop(); // pop the '('
break;
default:
while (stackOperator.Count > 0)
{
char chPeeked = (char)stackOperator.Peek();
int nPriorityPeek = GetOperatorPriority(chPeeked);
int nPriorityCurr = GetOperatorPriority(ch);
if (nPriorityPeek >= nPriorityCurr)
{
queuePostfix.Enqueue(stackOperator.Pop());
}
else
{
break;
}
}
stackOperator.Push(ch);
break;
}
} // end of for..loop
while (stackOperator.Count > 0)
{
queuePostfix.Enqueue((char)stackOperator.Pop());
}
StringBuilder sb = new StringBuilder(1024);
while (queuePostfix.Count > 0)
{
sb.Append((char)queuePostfix.Dequeue());
}
return sb.ToString();
}
/// <summary>
/// helper function. needed for postfix conversion
/// </summary>
/// <param name="chOpt">literal symbol</param>
/// <returns>priority</returns>
private int GetOperatorPriority(char chOpt)
{
switch (chOpt)
{
case MetaSymbol.OPEN_PREN:
return 0;
case MetaSymbol.ALTERNATE:
return 1;
case MetaSymbol.CONCANATE:
return 2;
case MetaSymbol.ZERO_OR_ONE:
case MetaSymbol.ZERO_OR_MORE:
case MetaSymbol.ONE_OR_MORE:
return 3;
case MetaSymbol.COMPLEMENT:
return 4;
default:
return 5;
}
}
/// <summary>
/// Compiles a pattern string produces a Minimum DFA model.
/// </summary>
/// <param name="sPattern">Actual pattern string in the correct format</param>
/// <param name="sbStats">This will receive the statistics. Can be null.</param>
/// <returns>ErrorCode indicating how the compilatio went.</returns>
public ErrorCode CompileWithStats(string sPattern, StringBuilder sbStats)
{
Console.WriteLine("Orig RE: " + sPattern);
if (sbStats == null)
{
return Compile(sPattern); // no statistics required
}
m_setInputSymbol.Clear();
State.ResetCounter();
string sUnit = "Tick(s)";
long tcStart = DateTime.Now.Ticks;
ValidationInfo vi = m_reValidator.Validate(sPattern);
UpdateValidationInfo(vi);
if (vi.ErrorCode != ErrorCode.ERR_SUCCESS)
{
return vi.ErrorCode;
}
string sRegExConcat = vi.FormattedString;
long tcConcat = DateTime.Now.Ticks - tcStart;
tcStart = DateTime.Now.Ticks;
string sRegExPostfix = ConvertToPostfix(sRegExConcat);
long tcPostfix = DateTime.Now.Ticks - tcStart;
Set setMasterNfa = new Set();
tcStart = DateTime.Now.Ticks;
State stateStartNfa = CreateNfa(sRegExPostfix, setMasterNfa);
long tcNfa = DateTime.Now.Ticks - tcStart;
int nNfaStateCount = setMasterNfa.Count;
string sFsaNfa = GetFsaTable(stateStartNfa, setMasterNfa, m_setInputSymbol);
State.ResetCounter();
Set setMasterDfa = new Set();
tcStart = DateTime.Now.Ticks;
State stateStartDfa = ConvertToDfa(stateStartNfa, setMasterDfa);
m_stateStartDfaM = stateStartDfa;
long tcDfa = DateTime.Now.Ticks - tcStart;
int nDfaStateCount = setMasterDfa.Count;
string sFsaDfa = GetFsaTable(stateStartDfa, setMasterDfa, m_setInputSymbol);
Set setMasterDfaM = new Set();
setMasterDfaM.Union(setMasterDfa); // working on the references of DFA states
tcStart = DateTime.Now.Ticks;
State stateStartDfaM = ReduceDfa(stateStartDfa, setMasterDfaM, m_setInputSymbol);
m_stateStartDfaM = stateStartDfaM;
long tcDfaM = DateTime.Now.Ticks - tcStart;
int nDfaMStateCount = setMasterDfaM.Count;
string sFsaDfaM = GetFsaTable(stateStartDfaM, setMasterDfaM, m_setInputSymbol);
sbStats.Append("Original String:\t" + sPattern);
sbStats.AppendLine();
sbStats.Append("After Validation:\t" + sRegExConcat + " (TimeSpan: " + tcConcat.ToString() + " " + sUnit + ")");
sbStats.AppendLine();
sbStats.Append("After Postfix:\t\t" + sRegExPostfix + " (TimeSpan: " + tcPostfix.ToString() + " " + sUnit + ")");
sbStats.AppendLine();
sbStats.Append("Input Symbol Count:\t" + m_setInputSymbol.Count.ToString());
sbStats.AppendLine();
sbStats.AppendLine();
sbStats.Append("NFA Table(TimeSpan: " + tcNfa.ToString() + " " + sUnit + "):");
sbStats.AppendLine();
sbStats.Append(sFsaNfa);
sbStats.AppendLine();
sbStats.AppendLine();
sbStats.Append("DFA Table(TimeSpan: " + tcDfa.ToString() + " " + " " + sUnit + "):");
sbStats.AppendLine();
sbStats.Append(sFsaDfa);
sbStats.AppendLine();
sbStats.AppendLine();
sbStats.Append("DFA M' Table(TimeSpan: " + tcDfaM.ToString() + " " + sUnit + "):");
sbStats.AppendLine();
sbStats.Append(sFsaDfaM);
sbStats.AppendLine();
sbStats.AppendLine();
long nTotal = (tcConcat + tcPostfix + tcNfa + tcDfa + tcDfaM);
sbStats.AppendLine();
sbStats.Append("Total TimeSpan: " + nTotal.ToString() + " " + sUnit);
return ErrorCode.ERR_SUCCESS;
}
/// <summary>
/// Compiles a pattern string produces a Minimum DFA model.
/// </summary>
/// <param name="sPattern">Actual pattern string in the correct format</param>
/// <returns>ErrorCode indicating how the compilatio went.</returns>
public ErrorCode Compile(string sPattern)
{
ValidationInfo vi = m_reValidator.Validate(sPattern);
UpdateValidationInfo(vi);
if (vi.ErrorCode != ErrorCode.ERR_SUCCESS)
{
return vi.ErrorCode;
}
m_setInputSymbol.Clear();
State.ResetCounter();
string sRegExConcat = sPattern;
string sRegExPostfix = ConvertToPostfix(sRegExConcat);
Set setMasterNfa = new Set();
State stateStartNfa = CreateNfa(sRegExPostfix, setMasterNfa);
State.ResetCounter();
Set setMasterDfa = new Set();
State stateStartDfa = ConvertToDfa(stateStartNfa, setMasterDfa);
m_stateStartDfaM = stateStartDfa;
Set setMasterDfaM = new Set();
setMasterDfaM.Union(setMasterDfa); // working on the references of DFA states
m_stateStartDfaM = ReduceDfa(stateStartDfa, setMasterDfaM, m_setInputSymbol);
return ErrorCode.ERR_SUCCESS;
}
/// <summary>
/// Finds all state reachable from the specic state on Epsilon transition
/// </summary>
/// <param name="stateStart">State from which search begins</param>
/// <returns>A set of all state reachable from teh startState on Epsilon transtion</returns>
private Set Eclosure(State stateStart)
{
Set setProcessed = new Set();
Set setUnprocessed = new Set();
setUnprocessed.AddElement(stateStart);
while (setUnprocessed.Count > 0)
{
State state = (State)setUnprocessed[0];
State[] arrTrans = state.GetTransitions(MetaSymbol.EPSILON);
setProcessed.AddElement(state);
setUnprocessed.RemoveElement(state);
if (arrTrans != null)
{
foreach (State stateEpsilon in arrTrans)
{
if (!setProcessed.ElementExist(stateEpsilon))
{
setUnprocessed.AddElement(stateEpsilon);
}
}
}
}
return setProcessed;
}
/// <summary>
/// Finds all state reachable from the set of states on Epsilon transition
/// </summary>
/// <param name="setState">Set of states to search from</param>
/// <returns></returns>
private Set Eclosure(Set setState)
{
Set setAllEclosure = new Set();
State state = null;
foreach (object obj in setState)
{
state = (State)obj;
Set setEclosure = Eclosure(state);
setAllEclosure.Union(setEclosure);
}
return setAllEclosure;
}
/// <summary>
/// Gets Move of a set states.
/// </summary>
/// <param name="setState">Set of state for which to get Move </param>
/// <param name="chInputSymbol">Innput symbol</param>
/// <returns>Set of Move</returns>
private Set Move(Set setState, string sInputSymbol)
{
Set set = new Set();
State state = null;
foreach (object obj in setState)
{
state = (State)obj;
Set setMove = Move(state, sInputSymbol);
set.Union(setMove);
}
return set;
}
/// <summary>
/// Gets Move of a state.
/// </summary>
/// <param name="state">state for which to get Move</param>
/// <param name="chInputSymbol">Input symbol</param>
/// <returns>Set of Move</returns>
private Set Move(State state, string sInputSymbol)
{
Set set = new Set();
State[] arrTrans = state.GetTransitions(sInputSymbol);
if (arrTrans != null)
{
set.AddElementRange(arrTrans);
}
return set;
}
/// <summary>
/// Converts a regular expression in posfix form to NFA using "Thompson�s Construction"
/// </summary>
/// <param name="sRegExPosfix">Regulare expression in postfix form (pattern)</param>
/// <returns>Start state of the NFA</returns>
private State CreateNfa(string sRegExPosfix, Set setMasterNfa)
{
Stack stackNfa = new Stack();
NfaExpression expr = null;
NfaExpression exprA = null;
NfaExpression exprB = null;
NfaExpression exprNew = null;
bool bEscape = false;
foreach (char ch in sRegExPosfix)
{
if (bEscape == false && ch == MetaSymbol.ESCAPE)
{
bEscape = true;
continue;
}
if (bEscape == true)
{
exprNew = new NfaExpression();
exprNew.StartState().AddTransition(ch.ToString(), exprNew.FinalState());
stackNfa.Push(exprNew);
m_setInputSymbol.AddElement(ch); // this set is later used to convert from NFA to DFA
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
bEscape = false;
continue;
}
switch (ch)
{
case MetaSymbol.ZERO_OR_MORE: // A* Kleene closure
exprA = (NfaExpression)stackNfa.Pop();
exprNew = new NfaExpression();
exprA.FinalState().AddTransition(MetaSymbol.EPSILON, exprA.StartState());
exprA.FinalState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprA.StartState());
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
stackNfa.Push(exprNew);
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
break;
case MetaSymbol.ALTERNATE: // A|B
exprB = (NfaExpression)stackNfa.Pop();
exprA = (NfaExpression)stackNfa.Pop();
exprNew = new NfaExpression();
exprA.FinalState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
exprB.FinalState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprA.StartState());
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprB.StartState());
stackNfa.Push(exprNew);
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
break;
case MetaSymbol.CONCANATE: // AB
exprB = (NfaExpression)stackNfa.Pop();
exprA = (NfaExpression)stackNfa.Pop();
exprA.FinalState().AddTransition(MetaSymbol.EPSILON, exprB.StartState());
exprNew = new NfaExpression(exprA.StartState(), exprB.FinalState());
stackNfa.Push(exprNew);
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
break;
case MetaSymbol.ONE_OR_MORE: // A+ => AA* => A.A*
exprA = (NfaExpression)stackNfa.Pop();
exprNew = new NfaExpression();
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprA.StartState());
exprNew.FinalState().AddTransition(MetaSymbol.EPSILON, exprA.StartState());
exprA.FinalState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
stackNfa.Push(exprNew);
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
break;
case MetaSymbol.ZERO_OR_ONE: // A? => A|empty
exprA = (NfaExpression)stackNfa.Pop();
exprNew = new NfaExpression();
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprA.StartState());
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
exprA.FinalState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
stackNfa.Push(exprNew);
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
break;
case MetaSymbol.ANY_ONE_CHAR:
exprNew = new NfaExpression();
exprNew.StartState().AddTransition(MetaSymbol.ANY_ONE_CHAR_TRANS, exprNew.FinalState());
stackNfa.Push(exprNew);
m_setInputSymbol.AddElement(MetaSymbol.ANY_ONE_CHAR_TRANS); // this set is later used to convert from NFA to DFA
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
break;
case MetaSymbol.COMPLEMENT: // ^
exprA = (NfaExpression)stackNfa.Pop();
NfaExpression exprDummy = new NfaExpression();
exprDummy.StartState().AddTransition(MetaSymbol.DUMMY, exprDummy.FinalState());
exprA.FinalState().AddTransition(MetaSymbol.EPSILON, exprDummy.StartState());
NfaExpression exprAny = new NfaExpression();
exprAny.StartState().AddTransition(MetaSymbol.ANY_ONE_CHAR_TRANS, exprAny.FinalState());
exprNew = new NfaExpression();
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprA.StartState());
exprNew.StartState().AddTransition(MetaSymbol.EPSILON, exprAny.StartState());
exprAny.FinalState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
exprDummy.FinalState().AddTransition(MetaSymbol.EPSILON, exprNew.FinalState());
stackNfa.Push(exprNew);
m_setInputSymbol.AddElement(MetaSymbol.ANY_ONE_CHAR_TRANS); // this set is later used to convert from NFA to DFA
m_setInputSymbol.AddElement(MetaSymbol.DUMMY);
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
setMasterNfa.AddElement(exprAny.StartState());
setMasterNfa.AddElement(exprAny.FinalState());
setMasterNfa.AddElement(exprDummy.StartState());
setMasterNfa.AddElement(exprDummy.FinalState());
break;
default:
exprNew = new NfaExpression();
exprNew.StartState().AddTransition(ch.ToString(), exprNew.FinalState());
stackNfa.Push(exprNew);
m_setInputSymbol.AddElement(ch); // this set is later used to convert from NFA to DFA
setMasterNfa.AddElement(exprNew.StartState());
setMasterNfa.AddElement(exprNew.FinalState());
break;
} // end of switch statement
} // end of for..each loop
Debug.Assert(stackNfa.Count == 1);
expr = (NfaExpression)stackNfa.Pop(); // pop the very last one. YES, THERE SHOULD ONLY BE ONE LEFT AT THIS POINT
expr.FinalState().AcceptingState = true; // the very last state is the accepting state of the NFA
return expr.StartState(); // retun the start state of NFA
} // end of CreateNfa method
/// <summary>
/// Converts NFA to DFA using "Subset Construction"
/// </summary>
/// <param name="stateStartNfa">Starting state of NFA</param>
/// <param name="setMasterDfa">Contains set of all DFA states when this function returns</param>
/// <returns>Starting state of DFA</returns>
private State ConvertToDfa(State stateStartNfa, Set setMasterDfa)
{
NfaToDfaHelper helper = new NfaToDfaHelper();
Set setMove = null;
Set setEclosure = null;
// first, we get Eclosure of the start state of NFA ( just following the algoritham)
setEclosure = Eclosure(stateStartNfa);
State stateStartDfa = new State(); // create a new DFA state to represent the above Eclosure
// NOTE:
// we keep track of the NFA Eclosure and the DFA state that represent the Eclosure.
// we maintain a relationship between the NFA Eclosure and DFA state that represents the NFA Eclosure.
// all these are done in the NfaToDfaHelper class.
if (IsAcceptingGroup(setEclosure) == true)
{
stateStartDfa.AcceptingState = true;
}
helper.AddDfaState(stateStartDfa, setEclosure);
string sInputSymbol = String.Empty; // dummy
// please see "subset construction" algoritham
// for clear understanding
State stateT = null;
Set setT = null;
State stateU = null;
while ((stateT = helper.GetNextUnmarkedDfaState()) != null)
{
helper.Mark(stateT); // flag it to indicate that we have processed this state.
// the DFA state stateT represents a set of NFA Eclosure.
// so, we retrieve the Eclosure.
setT = helper.GetEclosureByDfaState(stateT);
foreach (object obj in m_setInputSymbol)
{
sInputSymbol = obj.ToString();
setMove = Move(setT, sInputSymbol);
if (setMove.IsEmpty() == false)
{
setEclosure = Eclosure(setMove);
stateU = helper.FindDfaStateByEclosure(setEclosure);
if (stateU == null) // so set setEclosure must be a new one and we should crate a new DFA state
{
stateU = new State();
if (IsAcceptingGroup(setEclosure) == true)
{
stateU.AcceptingState = true;
}
helper.AddDfaState(stateU, setEclosure); // add new state (as unmarked by default)
}
stateT.AddTransition(sInputSymbol, stateU);
}
} // end of foreach..loop
} // end of while..loop
setMasterDfa.Union(helper.GetAllDfaState());
return stateStartDfa;
} // end of ConverToDfa method
/// <summary>
/// Converts DFA to Minimum DFA or DFA M'.
/// </summary>
/// <param name="stateStartDfa">Starting state of DFA</param>
/// <param name="setMasterDfa">Set of all DFA state (including the starting one)</param>
/// <param name="setInputSymbol">Set of all input symbol</param>
/// <returns>Starting state of DFA M'</returns>
private State ReduceDfa(State stateStartDfa, Set setMasterDfa, Set setInputSymbol)
{
State stateStartReducedDfa = null; // start state of the Reduced DFA
ArrayList arrGroup = null; // master array of all possible partitions/groups
// STEP 1: partition the DFS states.
// we do this by calling another method
arrGroup = PartitionDfaGroups(setMasterDfa, setInputSymbol);
// NOTE: arrGroup now contains all possible groups for all the DFA state.
// STEP 2: now we go through all the groups and select a group representative for each group.
// eventually the representive becomes one of the state in DFA M'. All other members of the groups get eliminited (deleted).
foreach (object objGroup in arrGroup)
{
Set setGroup = (Set)objGroup;
bool bAcceptingGroup = IsAcceptingGroup(setGroup); // see if the group contains any accepting state
bool bStartingGroup = setGroup.ElementExist(stateStartDfa); // check if the group contains the starting DFA state
// choose group representative
State stateRepresentative = (State)setGroup[0]; // just choose the first one as group representative
// should the representative be start state of DFA M'
if (bStartingGroup == true)
{
stateStartReducedDfa = stateRepresentative;
}
// should the representative be an accepting state of DFA M'
if (bAcceptingGroup == true)
{
stateRepresentative.AcceptingState = true;
}
if (setGroup.GetCardinality() == 1)
{
continue; // no need for further processing
}
// STEP 3: remove the representative from its group
// and replace all the references of the remaining member of the group with the representative
setGroup.RemoveElement(stateRepresentative);
State stateToBeReplaced = null; // state to be replaced with the group representative
int nReplecementCount = 0;
foreach (object objStateToReplaced in setGroup)
{
stateToBeReplaced = (State)objStateToReplaced;
setMasterDfa.RemoveElement(stateToBeReplaced); // remove this member from the master set as well
foreach (object objState in setMasterDfa)
{
State state = (State)objState;
nReplecementCount += state.ReplaceTransitionState(stateToBeReplaced, stateRepresentative);
}
// here, in C++, you would actully delete the stateA object by calling:
// delete stateToBeRemoved;
}
} // end of outer foreach..loop
// STEP 4: now remove all "dead states"
int nIndex = 0;
while (nIndex < setMasterDfa.Count)
{
State state = (State)setMasterDfa[nIndex];
if (state.IsDeadState())
{
setMasterDfa.RemoveAt(nIndex);
// here, in C++, you would actully delete the stateDead object by calling:
// delete stateDead;
continue;
}
nIndex++;
}
return stateStartReducedDfa;
}
/// <summary>
/// Helper function. Check to see if a set contains any accpeting state.
/// </summary>
/// <param name="setGroup">Set of state</param>
/// <returns>true if set contains any accpting states, otherwise false</returns>
private bool IsAcceptingGroup(Set setGroup)
{
State state = null;
foreach (object objState in setGroup)
{
state = (State)objState;
if (state.AcceptingState == true)
{
return true;
}
}
return false;
}
/// <summary>
/// Partions set of all DFA states into smaller groups (according to the partion rules).
/// Please see notes for detail of partioning DFA.
/// </summary>
/// <param name="setMasterDfa">Set of all DFA states</param>
/// <param name="setInputSymbol">Set of all input symbol</param>
/// <returns>Array of DFA groups</returns>
private ArrayList PartitionDfaGroups(Set setMasterDfa, Set setInputSymbol)
{
ArrayList arrGroup = new ArrayList(); // array of all set (group) of DFA states.
Map map = new Map(); // to keep track of which memeber transition into which group
Set setEmpty = new Set();
// first we need to create two partition of the setMasterDfa:
// one with all the accepting states and the other one with all the non-accpeting states
Set setAccepting = new Set(); // group of all accepting state
Set setNonAccepting = new Set(); // group of all non-accepting state
foreach (object objState in setMasterDfa)
{
State state = (State)objState;
if (state.AcceptingState == true)
{
setAccepting.AddElement(state);
}
else
{
setNonAccepting.AddElement(state);
}
}
if (setNonAccepting.GetCardinality() > 0)
{
arrGroup.Add(setNonAccepting); // add this newly created partition to the master list
}
// for accepting state, there should always be at least one state, if NOT then there must be something wrong somewhere
arrGroup.Add(setAccepting); // add this newly created partition to the master list
// now we iterate through these two partitions and see if they can be further partioned.
// we continuew the iteration until no further paritioning is possible.
IEnumerator iterInput = setInputSymbol.GetEnumerator();
iterInput.Reset();
while (iterInput.MoveNext())
{
string sInputSymbol = iterInput.Current.ToString();
int nPartionIndex = 0;
while (nPartionIndex < arrGroup.Count)
{
Set setToBePartitioned = (Set)arrGroup[nPartionIndex];
nPartionIndex++;
if (setToBePartitioned.IsEmpty() || setToBePartitioned.GetCardinality() == 1)
{
continue; // because we can't partition a set with zero or one memeber in it
}
foreach (object objState in setToBePartitioned)
{
State state = (State)objState;
State[] arrState = state.GetTransitions(sInputSymbol.ToString());
if (arrState != null)
{
Debug.Assert(arrState.Length == 1);
State stateTransionTo = arrState[0]; // since the state is DFA state, this array should contain only ONE state
Set setFound = FindGroup(arrGroup, stateTransionTo);
map.Add(setFound, state);
}
else // no transition exists, so transition to empty set
{
//setEmpty = new Set();
map.Add(setEmpty, state); // keep a map of which states transtion into which group
}
} // end of foreach (object objState in setToBePartitioned)
if (map.Count > 1) // means some states transition into different groups
{
arrGroup.Remove(setToBePartitioned);
foreach (DictionaryEntry de in map)
{
Set setValue = (Set)de.Value;
arrGroup.Add(setValue);
}
nPartionIndex = 0; // we want to start from the begining again
iterInput.Reset(); // we want to start from the begining again
}
map.Clear();
} // end of while..loop
} // end of foreach (object objString in setInputSymbol)
return arrGroup;
} // end of PartitionDfaSet method
/// <summary>
/// Helper function. Finds a set in a array of set for a particular state.
/// </summary>
/// <param name="arrGroup">Array of set of states</param>
/// <param name="state">State to search for</param>
/// <returns>Set the state belongs to</returns>
private Set FindGroup(ArrayList arrGroup, State state)
{
foreach (object objSet in arrGroup)
{
Set set = (Set)objSet;
if (set.ElementExist(state) == true)
{
return set;
}
}
return null;
}
/// <summary>
/// Retuns a string format of NFA.Set type.
/// Only used for debugging.
/// </summary>
/// <param name="set">Set of state.</param>
/// <returns>Formatted string</returns>
private string SetToString(Set set)
{
string s = "";
foreach (object objState in set)
{
State state = (State)objState;
s += state.Id.ToString() + ", ";
}
s = s.TrimEnd(new char[] { ' ', ',' });
if (s.Length == 0)
{
s = "Empty";
}
s = "{" + s + "}";
return s;
}
/// <summary>
/// Converts an Automata into formatted string that represents a Finate Automat State table.
/// Very useful for understanding automata and drawing the model.
/// NOTE: Not needed for actual regular expression operations.
/// </summary>
/// <param name="startState">Starting state of the automata</param>
/// <param name="setAllState">Set of all states</param>
/// <param name="setInputSymbol">Set of all input symbols</param>
/// <returns>Formatted string</returns>
private string GetFsaTable(State startState, Set setAllState, Set setInputSymbol)
{
FsaDS fsaDS = new FsaDS();
Set setAllInput = new Set();
setAllInput.Union(setInputSymbol);
setAllInput.RemoveElement(MetaSymbol.EPSILON);
setAllInput.AddElement(MetaSymbol.EPSILON);
string sInputSymbol = String.Empty;
foreach (object objString in setAllInput)
{
sInputSymbol = objString.ToString();
System.Data.DataColumn dc = fsaDS.Fsa.Columns.Add(sInputSymbol);
dc.DataType = typeof(string);
dc.DefaultValue = "--";
}
int nTransitionCount = 0;
State state = null;
FsaDS.FsaRow rFsa = null;
foreach (object objState in setAllState)
{
state = (State)objState;
rFsa = fsaDS.Fsa.NewFsaRow();
if (state.AcceptingState == true)
{
rFsa.State = "{" + "s" + state.Id.ToString() + "}";
}
else
{
rFsa.State = "s" + state.Id.ToString();
}
if (state.Equals(startState) == true)
{
rFsa.State = ">" + rFsa.State;
}
State[] arrTrans = null;
foreach (object objString in setAllInput)
{
sInputSymbol = objString.ToString();
arrTrans = state.GetTransitions(sInputSymbol.ToString());
if (arrTrans != null)
{
nTransitionCount += arrTrans.Length;
StringBuilder sbData = new StringBuilder(1024);
int i = 0;
for (i = 0; i < arrTrans.Length - 1; i++)
{
sbData.Append("s" + arrTrans[i].Id.ToString() + ", ");
}
sbData.Append("s" + arrTrans[i].Id.ToString());
System.Data.DataColumn dc = fsaDS.Fsa.Columns[sInputSymbol];
rFsa[dc] = sbData.ToString();
}
}
fsaDS.Fsa.AddFsaRow(rFsa);
}
fsaDS.AcceptChanges();
StringBuilder sb = new StringBuilder(2048);
// first create the column header using column name for header text
for (int i = 0; i < fsaDS.Fsa.Columns.Count - 1; i++)
{
sb.Append(fsaDS.Fsa.Columns[i].ColumnName);
sb.Append("\t|\t");
}
sb.Append(fsaDS.Fsa.Columns[fsaDS.Fsa.Columns.Count - 1].ColumnName);
sb.AppendLine();
// now create the rows
foreach (FsaDS.FsaRow fr in fsaDS.Fsa)
{
for (int i = 0; i < fsaDS.Fsa.Columns.Count - 1; i++)
{
sb.Append(fr[i].ToString());
sb.Append("\t|\t");
}
sb.Append(fr[fsaDS.Fsa.Columns.Count - 1].ToString());
sb.AppendLine();
}
string sFormat = "(State Count: {0}; Transition Count: {1})";
string sStat = String.Format(sFormat, setAllState.Count, nTransitionCount);
sb.Append(sStat);
sb.AppendLine();
return sb.ToString();
}
/// <summary>
/// Search and finds a match for the compiled pattern.
/// One must call Compile method before calling this method.
/// </summary>
/// <param name="sSearchIn">String to search in.</param>
/// <param name="nSearchStartAt">Index at which to begin the search.</param>
/// <param name="nSearchEndAt">Index at which to end the search.</param>
/// <param name="nFoundBeginAt">If match found, recives the index where the match started, otherwise -1</param>
/// <param name="nFoundEndAt">If match found, recives the index where the match ended, otherwise -1</param>
/// <returns>true if match found, otherwise false.</returns>
public bool FindMatch(string sSearchIn,
int nSearchStartAt,
int nSearchEndAt,
ref int nFoundBeginAt,
ref int nFoundEndAt)
{
if (m_stateStartDfaM == null)
{
return false;
}
if (nSearchStartAt < 0)
{
return false;
}
State stateStart = m_stateStartDfaM;
nFoundBeginAt = -1;
nFoundEndAt = -1;
bool bAccepted = false;
State toState = null;
State stateCurr = stateStart;
int nIndex = nSearchStartAt;
int nSearchUpTo = nSearchEndAt;
while (nIndex <= nSearchUpTo)
{
if (m_bGreedy && IsWildCard(stateCurr) == true)
{
if (nFoundBeginAt == -1)
{
nFoundBeginAt = nIndex;
}
ProcessWildCard(stateCurr, sSearchIn, ref nIndex, nSearchUpTo);
}
char chInputSymbol = sSearchIn[nIndex];
toState = stateCurr.GetSingleTransition(chInputSymbol.ToString());
if (toState == null)
{
toState = stateCurr.GetSingleTransition(MetaSymbol.ANY_ONE_CHAR_TRANS);
}
if (toState != null)
{
if (nFoundBeginAt == -1)
{
nFoundBeginAt = nIndex;
}
if (toState.AcceptingState)
{
if (m_bMatchAtEnd && nIndex != nSearchUpTo) // then we ignore the accepting state
{
//toState = stateStart ;
}
else
{
bAccepted = true;
nFoundEndAt = nIndex;
if (m_bGreedy == false)
{
break;
}
}
}
stateCurr = toState;
nIndex++;
}
else
{
if (!m_bMatchAtStart && !bAccepted ) // we reset everything
{
nIndex = (nFoundBeginAt != -1 ? nFoundBeginAt + 1 : nIndex + 1);
nFoundBeginAt = -1;
nFoundEndAt = -1;
//nIndex++;
stateCurr = stateStart; // start from begining
}
else
{
break;
}
}
} // end of while..loop
if (!bAccepted)
{
if (stateStart.AcceptingState == false)
{
return false;
}
else // matched an empty string
{
nFoundBeginAt = nSearchStartAt;
nFoundEndAt = nFoundBeginAt - 1;
return true;
}
}
return true;
}
/// <summary>
/// Determins if a state contains a wildcard transition.
/// i.e., A_*B
/// </summary>
/// <param name="state">State to check</param>
/// <returns>true if the state contains wildcard transition, otherwise false</returns>
private bool IsWildCard(State state)
{
return (state == state.GetSingleTransition(MetaSymbol.ANY_ONE_CHAR_TRANS));
}
/// <summary>
/// Process state that has wildcard transition.
/// </summary>
/// <param name="state">State with wildcard transition</param>
/// <param name="sSearchIn">String to search in.</param>
/// <param name="nCurrIndex">Current index of the search</param>
/// <param name="nSearchUpTo">Index where to stop the search.</param>
private void ProcessWildCard(State state, string sSearchIn, ref int nCurrIndex, int nSearchUpTo)
{
State toState = null;
int nIndex = nCurrIndex;
while (nIndex <= nSearchUpTo)
{
char ch = sSearchIn[nIndex];
toState = state.GetSingleTransition(ch.ToString());
if (toState != null)
{
nCurrIndex = nIndex;
}
nIndex++;
}
}
/// <summary>
/// Get the ready state of the parser.
/// </summary>
/// <returns>true if a Compile method had been called successfully, otherwise false.</returns>
public bool IsReady()
{
return (m_stateStartDfaM != null);
}
/// <summary>
/// Position where error occured during the last compilation
/// </summary>
/// <returns>-1 if there was no compilation</returns>
public int GetLastErrorPosition()
{
return m_nLastErrorIndex;
}
/// <summary>
/// Indicate if the last compilation was successfull or error
/// </summary>
/// <returns></returns>
public ErrorCode GetLastErrorCode()
{
return m_LastErrorCode;
}
/// <summary>
/// Get last error length.
/// </summary>
/// <returns>Length</returns>
public int GetLastErrorLength()
{
return m_nLastErrorLength;
}
/// <summary>
/// Gets/Sets to indicate whether Find should stop at the first accepting state,
/// or should continue see if further match is possible (greedy).
/// </summary>
public bool UseGreedy
{
get
{
return m_bGreedy;
}
set
{
m_bGreedy = value;
}
}
/// <summary>
/// Helper fucntion. Updates the local variables once the validation returns.
/// </summary>
/// <param name="vi"></param>
private void UpdateValidationInfo(ValidationInfo vi)
{
if (vi.ErrorCode == ErrorCode.ERR_SUCCESS)
{
m_bMatchAtEnd = vi.MatchAtEnd;
m_bMatchAtStart = vi.MatchAtStart;
}
m_LastErrorCode = vi.ErrorCode;
m_nLastErrorIndex = vi.ErrorStartAt;
m_nLastErrorLength = vi.ErrorLength;
}
}
}