|
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using EeekSoft.Text;
namespace AhoCorasickSearch
{
/// <summary>
/// Search using string.IndexOf (for comparsion)
/// </summary>
class IndexOfSearch : IStringSearchAlgorithm
{
#region IStringSearchAlgorithm Members
string[] _keywords;
public string[] Keywords
{
get { return _keywords; }
set { _keywords=value; }
}
public StringSearchResult[] FindAll(string text)
{
throw new NotImplementedException();
}
public StringSearchResult FindFirst(string text)
{
foreach(string kwd in _keywords)
{
int i=text.IndexOf(kwd);
if (i!=-1) return new StringSearchResult(i,kwd);
}
return StringSearchResult.Empty;
}
public bool ContainsAny(string text)
{
foreach(string kwd in _keywords)
{
int i=text.IndexOf(kwd);
if (i!=-1) return true;
}
return false;
}
#endregion
}
/// <summary>
/// Search using regular expressions (for comparsion)
/// </summary>
class RegexSearch : IStringSearchAlgorithm
{
#region IStringSearchAlgorithm Members
string[] _keywords;
Regex _reg;
public string[] Keywords
{
get { return _keywords; }
set
{
_keywords=value;
_reg=new Regex("("+string.Join("|",value)+")",RegexOptions.None);
}
}
public StringSearchResult[] FindAll(string text)
{
throw new NotImplementedException();
}
public StringSearchResult FindFirst(string text)
{
throw new NotImplementedException();
}
public bool ContainsAny(string text)
{
return _reg.Match(text).Success;
}
#endregion
}
/// <summary>
/// Testing application
/// </summary>
class TestApp
{
/// <summary>
/// Minimal and maximal word length
/// </summary>
const int MaxWordLength = 10;
const int MinWordLength = 3;
/// <summary>
/// Allowed letters in word
/// </summary>
const string AllowedLetters="abcdefghijklmnopqrstuvwxyz";
#region Utility methods
static Random rnd=new Random(12345);
/// <summary>
/// Generate random word
/// </summary>
public static string GetRandomWord()
{
StringBuilder sb=new StringBuilder();
for(int i=0; i<rnd.Next(MaxWordLength-MinWordLength)+MinWordLength; i++)
sb.Append(AllowedLetters[rnd.Next(AllowedLetters.Length)]);
return sb.ToString();
}
/// <summary>
/// Generate list of random keywords
/// </summary>
public static string[] GetRandomKeywords(int count)
{
string[] ret=new string[count];
for(int i=0; i<count; i++)
ret[i]=GetRandomWord();
return ret;
}
/// <summary>
/// Generate random text
/// </summary>
public static string GetRandomText(int count)
{
StringBuilder sb=new StringBuilder();
while(sb.Length<count) { sb.Append(GetRandomWord()); sb.Append(" "); }
return sb.ToString();
}
#endregion
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main(string[] args)
{
// set language to english
// (because in czech "ch" is one letter and "abch".IndexOf("bc") returns -1)
System.Threading.Thread.CurrentThread.CurrentCulture=System.Globalization.CultureInfo.CreateSpecificCulture("en");
// algorithms
IStringSearchAlgorithm[] algorithms=new IStringSearchAlgorithm[3];
algorithms[0]=new StringSearch();
algorithms[1]=new IndexOfSearch();
algorithms[2]=new RegexSearch();
double[] results=new double[3];
for(int j=0; j<100; j++)
{
// generate random keywords and random text
string[] keywords=GetRandomKeywords(80);
string text=GetRandomText(2000);
// insert keyword into text
text=text.Insert(rnd.Next(text.Length),keywords[rnd.Next(keywords.Length)]);
// for each algorithm...
double first=0;
for(int algIndex=0; algIndex<algorithms.Length; algIndex++)
{
IStringSearchAlgorithm alg=algorithms[algIndex];
alg.Keywords=keywords;
// search for keywords and measure performance
HiPerfTimer tmr=new HiPerfTimer();
tmr.Start();
for(int i=0; i<10; i++)
{
if (!alg.ContainsAny(text))
{ Console.WriteLine(" Invalid result!"); return; }
}
tmr.Stop();
// write results
if (first==0) first=tmr.Duration;
results[algIndex]+=tmr.Duration;
Console.WriteLine("{0}: {1:0.00000}ms ({2:0.00}%)",alg.GetType().Name.PadRight(15),tmr.Duration,
100.0*tmr.Duration/first);
}
Console.WriteLine();
}
// Write summary results for all test
Console.WriteLine("\n===SUMMARY===");
double finalFirst=results[0];
for(int j=0; j<algorithms.Length; j++)
{
IStringSearchAlgorithm alg=algorithms[j];
Console.WriteLine("{0}: {1:0.00000}ms ({2:0.00}%)",alg.GetType().Name.PadRight(15),results[j],
100.0*results[j]/finalFirst);
}
Console.WriteLine();
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
I live in Prague, the capital city of Czech republic (most of the time
). I've been very interested in functional programming recently and I have a passion for the new Microsoft F# language. I'm writing a book about
Functional Programming in the Real World that shows the ideas using examples in C# 3.0 and F#.
I've been Microsoft MVP (for C#) since 2004 and I'm one of the most active members of the F# community. I'm a computer science student at
Charles University of Prague. My hobbies include photography, fractals and of course many things related to computers (except fixing them). My favorite book writers are Terry Pratchett and Philip K Dick and I like paintings by M. C. Escher.
PS: My favorite codeproject icon is .