|
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Lucene.Net.Analysis;
namespace AnalyzerViewer
{
public abstract class AnalyzerView
{
public abstract string Name { get; }
public virtual string GetView(TokenStream tokenStream, out int numberOfTokens)
{
StringBuilder sb = new StringBuilder();
Token token = tokenStream.Next();
numberOfTokens = 0;
while (token != null)
{
numberOfTokens++;
sb.Append(GetTokenView(token));
token = tokenStream.Next();
}
return sb.ToString();
}
protected abstract string GetTokenView(Token token);
}
public class TermAnalyzerView : AnalyzerView
{
public override string Name
{
get { return "Terms"; }
}
protected override string GetTokenView(Token token)
{
return "[" + token.TermText() + "] ";
}
}
public class TermWithOffsetsView : AnalyzerView
{
public override string Name
{
get { return "Terms With Offsets"; }
}
protected override string GetTokenView(Token token)
{
return token.TermText() + " Start: " + token.StartOffset().ToString().PadLeft(5) + " End: " + token.EndOffset().ToString().PadLeft(5) + "\r\n";
}
}
public class TermFrequencies : AnalyzerView
{
public override string Name
{
get { return "Term Frequencies"; }
}
Dictionary<string, int> termDictionary = new Dictionary<string, int>();
public override string GetView(TokenStream tokenStream, out int numberOfTokens)
{
StringBuilder sb = new StringBuilder();
Token token = tokenStream.Next();
numberOfTokens = 0;
while (token != null)
{
numberOfTokens++;
if (termDictionary.Keys.Contains(token.TermText()))
termDictionary[token.TermText()] = termDictionary[token.TermText()] + 1;
else
termDictionary.Add(token.TermText(), 1);
token = tokenStream.Next();
}
foreach (var item in termDictionary.OrderBy(x => x.Key))
{
sb.Append(item.Key + " [" + item.Value + "] ");
}
termDictionary.Clear();
return sb.ToString();
}
protected override string GetTokenView(Token token)
{
throw new NotImplementedException();
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
I'm a proud father and a software developer. I'm fascinated by a few particular .Net projects such as Lucene.Net, NHibernate, Quartz.Net, and others. I love learning and studying code to learn how other people solve software problems.