|
using System;
using System.Collections;
using System.Text;
using System.Text.RegularExpressions;
using System.ComponentModel;
using System.Diagnostics;
using System.Drawing;
using System.Drawing.Design;
using System.Globalization;
using SpellChecker.Forms;
using SpellChecker.Dictionary;
using SpellChecker.Dictionary.Affix;
using SpellChecker.Dictionary.Phonetic;
namespace SpellChecker
{
/// <summary>
/// The Spelling class encapsulates the functions necessary to check
/// the spelling of inputted text.
/// </summary>
[ToolboxBitmap(typeof(SpellChecker.AutoSpelling), "Spelling.bmp")]
public partial class AutoSpelling : Component
{
#region Global Regex
// Regex are class scope and compiled to improve performance on reuse
private Regex _digitRegex = new Regex(@"^\d", RegexOptions.Compiled);
private Regex _htmlRegex = new Regex(@"</[c-g\d]+>|</[i-o\d]+>|</[a\d]+>|</[q-z\d]+>|<[cg]+[^>]*>|<[i-o]+[^>]*>|<[q-z]+[^>]*>|<[a]+[^>]*>|<(\[^\]*\|'[^']*'|[^'\>])*>", RegexOptions.IgnoreCase & RegexOptions.Compiled);
private MatchCollection _htmlTags;
private Regex _letterRegex = new Regex(@"\D", RegexOptions.Compiled);
private Regex _upperRegex = new Regex(@"[^A-Z]", RegexOptions.Compiled);
private Regex _wordEx = new Regex(@"\b[A-Za-z0-9_'À-ÿ]+\b", RegexOptions.Compiled);
private MatchCollection _words;
#endregion
#region public properties
private WordDictionary _dictionary;
private bool _ignoreAllCapsWords = true;
private bool _ignoreHtml = true;
private bool _ignoreWordsWithDigits = false;
private Hashtable _replaceList = new Hashtable();
private string _replacementWord = "";
private SuggestionEnum _suggestionMode = SuggestionEnum.PhoneticNearMiss;
private StringBuilder _text = new StringBuilder();
private int _wordIndex = 0;
/// <summary>
/// The suggestion strategy to use when generating suggestions
/// </summary>
public enum SuggestionEnum
{
/// <summary>
/// Combines the phonetic and near miss strategies
/// </summary>
PhoneticNearMiss,
/// <summary>
/// The phonetic strategy generates suggestions by word sound
/// </summary>
/// <remarks>
/// This technique was developed by the open source project ASpell.net
/// </remarks>
Phonetic,
/// <summary>
/// The near miss strategy generates suggestion by replacing,
/// removing, adding chars to make words
/// </summary>
/// <remarks>
/// This technique was developed by the open source spell checker ISpell
/// </remarks>
NearMiss
}
/// <summary>
/// The current word being spell checked from the text property
/// </summary>
[Browsable(false)]
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
public string CurrentWord
{
get
{
if (_words == null || _words.Count == 0)
return string.Empty;
else
return _words[this.WordIndex].Value;
}
}
/// <summary>
/// The WordDictionary object to use when spell checking
/// </summary>
[Browsable(true)]
[CategoryAttribute("Dictionary")]
[Description("The WordDictionary object to use when spell checking")]
public WordDictionary Dictionary
{
get
{
if (!base.DesignMode && _dictionary == null)
_dictionary = new WordDictionary();
return _dictionary;
}
set
{
if (value != null)
_dictionary = value;
}
}
/// <summary>
/// Ignore words with all capital letters when spell checking
/// </summary>
[DefaultValue(true)]
[CategoryAttribute("Options")]
[Description("Ignore words with all capital letters when spell checking")]
public bool IgnoreAllCapsWords
{
get { return _ignoreAllCapsWords; }
set { _ignoreAllCapsWords = value; }
}
/// <summary>
/// Ignore html tags when spell checking
/// </summary>
[DefaultValue(true)]
[CategoryAttribute("Options")]
[Description("Ignore html tags when spell checking")]
public bool IgnoreHtml
{
get { return _ignoreHtml; }
set { _ignoreHtml = value; }
}
/// <summary>
/// Ignore words with digits when spell checking
/// </summary>
[DefaultValue(false)]
[CategoryAttribute("Options")]
[Description("Ignore words with digits when spell checking")]
public bool IgnoreWordsWithDigits
{
get { return _ignoreWordsWithDigits; }
set { _ignoreWordsWithDigits = value; }
}
/// <summary>
/// List of words and replacement values to automatically replace
/// </summary>
/// <remarks>
/// When <see cref="ReplaceAllWord"/> is clicked, the <see cref="CurrentWord"/> is added to this list
/// </remarks>
[Browsable(false)]
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
public Hashtable ReplaceList
{
get { return _replaceList; }
}
/// <summary>
/// The suggestion strategy to use when generating suggestions
/// </summary>
[DefaultValue(SuggestionEnum.PhoneticNearMiss)]
[CategoryAttribute("Options")]
[Description("The suggestion strategy to use when generating suggestions")]
public SuggestionEnum SuggestionMode
{
get { return _suggestionMode; }
set { _suggestionMode = value; }
}
/// <summary>
/// The text to spell check
/// </summary>
[Browsable(false)]
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
public string Text
{
get { return _text.ToString(); }
}
/// <summary>
/// TextIndex is the index of the current text being spell checked
/// </summary>
[Browsable(false)]
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
public int TextIndex
{
get
{
if (_words == null || _words.Count == 0)
return 0;
return _words[this.WordIndex].Index;
}
}
/// <summary>
/// The number of words being spell checked
/// </summary>
[Browsable(false)]
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
public int WordCount
{
get
{
if (_words == null)
return 0;
return _words.Count;
}
}
/// <summary>
/// WordIndex is the index of the current word being spell checked
/// </summary>
[Browsable(false)]
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
public int WordIndex
{
get
{
if (_words == null)
return 0;
// make sure word index can't be higher then word count
return Math.Max(0, Math.Min(_wordIndex, (this.WordCount - 1)));
}
set
{
_wordIndex = value;
}
}
#endregion
#region Constructors
public AutoSpelling()
{
InitializeComponent();
}
public AutoSpelling(IContainer container)
{
container.Add(this);
InitializeComponent();
}
#endregion
#region private methods
/// <summary>
/// Calculates the words from the Text property
/// </summary>
private void CalculateWords()
{
// splits the text into words
_words = _wordEx.Matches(_text.ToString());
// remark html
this.MarkHtml();
}
/// <summary>
/// Determines if the string should be spell checked
/// </summary>
/// <param name="characters" type="string">
/// <para>
/// The Characters to check
/// </para>
/// </param>
/// <returns>
/// Returns true if the string should be spell checked
/// </returns>
private bool CheckString(string characters)
{
if (_ignoreAllCapsWords && !_upperRegex.IsMatch(characters))
{
return false;
}
if (_ignoreWordsWithDigits && _digitRegex.IsMatch(characters))
{
return false;
}
if (!_letterRegex.IsMatch(characters))
{
return false;
}
if (_ignoreHtml)
{
int startIndex = _words[this.WordIndex].Index;
foreach (Match item in _htmlTags)
{
if (startIndex >= item.Index && startIndex <= item.Index + item.Length - 1)
{
return false;
}
}
}
return true;
}
private void Initialize()
{
if (_dictionary == null)
_dictionary = new WordDictionary();
if (!_dictionary.Initialized)
_dictionary.Initialize();
}
/// <summary>
/// Calculates the position of html tags in the Text property
/// </summary>
private void MarkHtml()
{
// splits the text into words
_htmlTags = _htmlRegex.Matches(_text.ToString());
}
/// <summary>
/// Resets the public properties
/// </summary>
private void Reset()
{
_wordIndex = 0; // reset word index
}
/// <summary>
/// Deletes the CurrentWord from the Text Property
/// </summary>
/// <remarks>
/// Note, calling ReplaceWord with the ReplacementWord property set to
/// an empty string has the same behavior as DeleteWord.
/// </remarks>
private void DeleteWord()
{
if (_words == null || _words.Count == 0)
{
TraceWriter.TraceWarning("No Words to Delete");
return;
}
string replacedWord = this.CurrentWord;
int replacedIndex = this.WordIndex;
int index = _words[replacedIndex].Index;
int length = _words[replacedIndex].Length;
// adjust length to remove extra white space after first word
if (index == 0
&& index + length < _text.Length
&& _text[index + length] == ' ')
{
length++; //removing trailing space
}
// adjust length to remove double white space
else if (index > 0
&& index + length < _text.Length
&& _text[index - 1] == ' '
&& _text[index + length] == ' ')
{
length++; //removing trailing space
}
// adjust index to remove extra white space before punctuation
else if (index > 0
&& index + length < _text.Length
&& _text[index - 1] == ' '
&& char.IsPunctuation(_text[index + length]))
{
index--;
length++;
}
// adjust index to remove extra white space before last word
else if (index > 0
&& index + length == _text.Length
&& _text[index - 1] == ' ')
{
index--;
length++;
}
string deletedWord = _text.ToString(index, length);
_text.Remove(index, length);
this.CalculateWords();
}
/// <summary>
/// GetWordIndexFromTextIndex
/// </summary>
/// <param name="textIndex"></param>
/// <returns></returns>
private int GetWordIndexFromTextIndex(int textIndex)
{
if (_words == null || _words.Count == 0 || textIndex < 1)
{
TraceWriter.TraceWarning("No words to get text index from.");
return 0;
}
if (_words.Count == 1)
return 0;
int low = 0;
int high = _words.Count - 1;
// binary search
while (low <= high)
{
int mid = (low + high) / 2;
int wordStartIndex = _words[mid].Index;
int wordEndIndex = _words[mid].Index + _words[mid].Length - 1;
// add white space to end of word by finding the start of the next word
if ((mid + 1) < _words.Count)
wordEndIndex = _words[mid + 1].Index - 1;
if (textIndex < wordStartIndex)
high = mid - 1;
else if (textIndex > wordEndIndex)
low = mid + 1;
else if (wordStartIndex <= textIndex && textIndex <= wordEndIndex)
return mid;
}
// return last word if not found
return _words.Count - 1;
}
/// <summary>
/// add CurrentWord and in _replacementWord to the _replaceList
/// </summary>
private void ReplaceAllWord()
{
if (this.CurrentWord.Length == 0)
{
TraceWriter.TraceWarning("No current word");
return;
}
// if not in list and replacement word has length
if (!_replaceList.ContainsKey(this.CurrentWord) && _replacementWord.Length > 0)
{
_replaceList.Add(this.CurrentWord, _replacementWord);
}
this.ReplaceWord();
}
/// <summary>
/// Replaces all instances of the CurrentWord in the Text Property
/// </summary>
/// <param name="replacementWord" type="string">
/// <para>
/// The word to replace the CurrentWord with
/// </para>
/// </param>
private void ReplaceAllWord(string replacementWord)
{
_replacementWord = replacementWord;
this.ReplaceAllWord();
}
/// <summary>
/// Replaces the instances of the CurrentWord in the Text Property
/// </summary>
private void ReplaceWord()
{
if (_words == null || _words.Count == 0 || this.CurrentWord.Length == 0)
{
TraceWriter.TraceWarning("No text or current word");
return;
}
if (_replacementWord.Length == 0)
{
this.DeleteWord();
return;
}
string replacedWord = this.CurrentWord;
int replacedIndex = this.WordIndex;
int index = _words[replacedIndex].Index;
int length = _words[replacedIndex].Length;
_text.Remove(index, length);
// if first letter upper case, match case for replacement word
if (char.IsUpper(_words[replacedIndex].ToString(), 0))
{
_replacementWord = _replacementWord.Substring(0, 1).ToUpper(CultureInfo.CurrentUICulture)
+ _replacementWord.Substring(1);
}
_text.Insert(index, _replacementWord);
this.CalculateWords();
}
/// <summary>
/// Replaces the instances of the CurrentWord in the Text Property
/// </summary>
/// <param name="replacementWord" type="string">
/// <para>
/// The word to replace the CurrentWord with
/// </para>
/// </param>
private void ReplaceWord(string replacementWord)
{
_replacementWord = replacementWord;
this.ReplaceWord();
}
#endregion
#region public methods
/// <summary>
/// auto spelling check to text
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public bool AutoSpelingCheck(string text)
{
bool misspelledWord = false;
_text = new StringBuilder(text);
this.CalculateWords();
this.Reset();
//automatically spelling check to text
if (_words != null && WordCount > 0)
{
string currentWord = "";
for (int i = 0; i <= WordCount-1; i++)
{
_wordIndex = i; // saving the current word index
currentWord = this.CurrentWord;
if (CheckString(currentWord))
{
if (!this.TestWord(currentWord))
{
if(_replaceList.ContainsKey(currentWord))
{
_replacementWord = _replaceList[currentWord].ToString();
this.ReplaceWord();
}
else
{
misspelledWord = true;
AutoSpelingCheck();
}
}
}
}
}
return misspelledWord;
}
/// <summary>
///
/// </summary>
private void AutoSpelingCheck()
{
// can't generate suggestions with out current word
if (this.CurrentWord.Length == 0)
{
TraceWriter.TraceWarning("No current word");
return;
}
this.Initialize();
if (_suggestionMode == SuggestionEnum.PhoneticNearMiss
|| _suggestionMode == SuggestionEnum.NearMiss)
{
// suggestions for a typical fault of spelling, that
// differs with more, than 1 letter from the right form.
String result;
if (this.ReplaceChars(out result))
{
ReplaceAllWord(result);
}
else if (this.BadChar(out result))
{
ReplaceAllWord(result);
}
else if (this.ForgotChar(out result))
{
ReplaceAllWord(result);
}
else if (this.SwapChar(out result))
{
ReplaceAllWord(result);
}
else if (this.ExtraChar(out result))
{
ReplaceAllWord(result);
}
else if (this.TwoWords(out result))
{
ReplaceAllWord(result);
}
}
} // suggest
/// <summary>
/// Checks to see if the word is in the dictionary
/// </summary>
/// <param name="word" type="string">
/// <para>
/// The word to check
/// </para>
/// </param>
/// <returns>
/// Returns true if word is found in dictionary
/// </returns>
public bool TestWord(string word)
{
this.Initialize();
TraceWriter.TraceVerbose("Testing Word: {0}", word);
if (this.Dictionary.Contains(word))
{
return true;
}
else if (this.Dictionary.Contains(word.ToLower()))
{
return true;
}
return false;
}
#endregion
#region ISpell Near Miss Suggetion methods
/// <summary>
/// swap out each char one by one and try all the tryme
/// chars in its place to see if that makes a good word
/// </summary>
private bool BadChar(out String result)
{
result = "";
bool isFoundWord = false;
for (int i = 0; i < this.CurrentWord.Length; i++)
{
StringBuilder tempWord = new StringBuilder(this.CurrentWord);
char[] tryme = this.Dictionary.TryCharacters.ToCharArray();
for (int x = 0; x < tryme.Length; x++)
{
tempWord[i] = tryme[x];
if (this.TestWord(tempWord.ToString()))
{
result = tempWord.ToString();
isFoundWord = true;
break;
}
}
}
return isFoundWord;
}
/// <summary>
/// try omitting one char of word at a time
/// </summary>
private bool ExtraChar(out String result)
{
result = "";
bool isFoundWord = false;
if (this.CurrentWord.Length > 1)
{
for (int i = 0; i < this.CurrentWord.Length; i++)
{
StringBuilder tempWord = new StringBuilder(this.CurrentWord);
tempWord.Remove(i, 1);
if (this.TestWord(tempWord.ToString()))
{
result = tempWord.ToString();
isFoundWord = true;
break;
}
}
}
return isFoundWord;
}
/// <summary>
/// try inserting a tryme character before every letter
/// </summary>
private bool ForgotChar(out String result)
{
result = "";
bool isFoundWord = false;
char[] tryme = this.Dictionary.TryCharacters.ToCharArray();
for (int i = 0; i <= this.CurrentWord.Length; i++)
{
for (int x = 0; x < tryme.Length; x++)
{
StringBuilder tempWord = new StringBuilder(this.CurrentWord);
tempWord.Insert(i, tryme[x]);
if (this.TestWord(tempWord.ToString()))
{
result = tempWord.ToString();
isFoundWord = true;
return isFoundWord;
}
}
}
return isFoundWord;
}
/// <summary>
/// suggestions for a typical fault of spelling, that
/// differs with more, than 1 letter from the right form.
/// </summary>
private bool ReplaceChars(out String result)
{
result = "";
bool isFoundWord = false;
ArrayList replacementChars = this.Dictionary.ReplaceCharacters;
for (int i = 0; i < replacementChars.Count; i++)
{
int split = ((string)replacementChars[i]).IndexOf(' ');
string key = ((string)replacementChars[i]).Substring(0, split);
string replacement = ((string)replacementChars[i]).Substring(split + 1);
int pos = this.CurrentWord.IndexOf(key);
while (pos > -1)
{
string tempWord = this.CurrentWord.Substring(0, pos);
tempWord += replacement;
tempWord += this.CurrentWord.Substring(pos + key.Length);
if (this.TestWord(tempWord))
{
result = tempWord.ToString();
isFoundWord = true;
return isFoundWord;
}
pos = this.CurrentWord.IndexOf(key, pos + 1);
}
}
return isFoundWord;
}
/// <summary>
/// try swapping adjacent chars one by one
/// </summary>
private bool SwapChar(out String result)
{
result = "";
bool isFoundWord = false;
for (int i = 0; i < this.CurrentWord.Length - 1; i++)
{
StringBuilder tempWord = new StringBuilder(this.CurrentWord);
char swap = tempWord[i];
tempWord[i] = tempWord[i + 1];
tempWord[i + 1] = swap;
if (this.TestWord(tempWord.ToString()))
{
result = tempWord.ToString();
isFoundWord = true;
break;
}
}
return isFoundWord;
}
/// <summary>
/// split the string into two pieces after every char
/// if both pieces are good words make them a suggestion
/// </summary>
private bool TwoWords(out String result)
{
result = "";
bool isFoundWord = false;
for (int i = 1; i < this.CurrentWord.Length - 1; i++)
{
string firstWord = this.CurrentWord.Substring(0, i);
string secondWord = this.CurrentWord.Substring(i);
if (this.TestWord(firstWord) && this.TestWord(secondWord))
{
string tempWord = firstWord + " " + secondWord;
result = tempWord;
isFoundWord = true;
break;
}
}
return isFoundWord;
}
#endregion
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.