using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace Siphon
{
public static class Inflector
{
#region Inflection rules
private static Boolean s_isDisposed;
private static Boolean s_isInitialized;
private static List<InflectorRule> s_plurals;
private static List<InflectorRule> s_singulars;
private static List<String> s_uncountables;
private sealed class InflectorRule
{
private readonly Regex m_regex;
private readonly String m_replacement;
public InflectorRule(String regexPattern, String replacementText)
{
m_regex = new Regex(regexPattern, RegexOptions.IgnoreCase);
m_replacement = replacementText;
}
public String Apply(String word)
{
if (!m_regex.IsMatch(word))
return null;
String replace = m_regex.Replace(word, m_replacement);
return replace;
}
}
#endregion
#region Construction and initialization
public static void InitializeRules()
{
if (!s_isDisposed && (!s_isInitialized || s_plurals == null || s_singulars == null || s_uncountables == null))
{
s_plurals = new List<InflectorRule>();
s_singulars = new List<InflectorRule>();
s_uncountables = new List<String>();
AddPluralRule("$", "s");
AddPluralRule("s$", "s");
AddPluralRule("(ax|test)is$", "$1es");
AddPluralRule("(octop|vir)us$", "$1i");
AddPluralRule("(alias|status)$", "$1es");
AddPluralRule("(bu)s$", "$1ses");
AddPluralRule("(buffal|tomat)o$", "$1oes");
AddPluralRule("([ti])um$", "$1a");
AddPluralRule("sis$", "ses");
AddPluralRule("(?:([^f])fe|([lr])f)$", "$1$2ves");
AddPluralRule("(hive)$", "$1s");
AddPluralRule("([^aeiouy]|qu)y$", "$1ies");
AddPluralRule("(x|ch|ss|sh)$", "$1es");
AddPluralRule("(matr|vert|ind)ix|ex$", "$1ices");
AddPluralRule("([m|l])ouse$", "$1ice");
AddPluralRule("^(ox)$", "$1en");
AddPluralRule("(quiz)$", "$1zes");
AddSingularRule("s$", "");
AddSingularRule("ss$", "ss");
AddSingularRule("(n)ews$", "$1ews");
AddSingularRule("([ti])a$", "$1um");
AddSingularRule("((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis");
AddSingularRule("(^analy)ses$", "$1sis");
AddSingularRule("([^f])ves$", "$1fe");
AddSingularRule("(hive)s$", "$1");
AddSingularRule("(tive)s$", "$1");
AddSingularRule("([lr])ves$", "$1f");
AddSingularRule("([^aeiouy]|qu)ies$", "$1y");
AddSingularRule("(s)eries$", "$1eries");
AddSingularRule("(m)ovies$", "$1ovie");
AddSingularRule("(x|ch|ss|sh)es$", "$1");
AddSingularRule("([m|l])ice$", "$1ouse");
AddSingularRule("(bus)es$", "$1");
AddSingularRule("(o)es$", "$1");
AddSingularRule("(shoe)s$", "$1");
AddSingularRule("(cris|ax|test)es$", "$1is");
AddSingularRule("(octop|vir)i$", "$1us");
AddSingularRule("(alias|status)$", "$1");
AddSingularRule("(alias|status)es$", "$1");
AddSingularRule("^(ox)en", "$1");
AddSingularRule("(vert|ind)ices$", "$1ex");
AddSingularRule("(matr)ices$", "$1ix");
AddSingularRule("(quiz)zes$", "$1");
AddIrregularRule("person", "people");
AddIrregularRule("man", "men");
AddIrregularRule("child", "children");
AddIrregularRule("sex", "sexes");
AddIrregularRule("index", "indexes");
AddIrregularRule("tax", "taxes");
AddIrregularRule("move", "moves");
AddIrregularRule("tooth", "teeth");
AddUnknownCountRule("equipment");
AddUnknownCountRule("information");
AddUnknownCountRule("rice");
AddUnknownCountRule("money");
AddUnknownCountRule("species");
AddUnknownCountRule("series");
AddUnknownCountRule("fish");
AddUnknownCountRule("sheep");
s_isInitialized = true;
}
}
private static void AddPluralRule(String rule, String replacement)
{
s_plurals.Add(new InflectorRule(rule, replacement));
}
private static void AddSingularRule(String rule, String replacement)
{
s_singulars.Add(new InflectorRule(rule, replacement));
}
private static void AddIrregularRule(String singular, String plural)
{
AddPluralRule("(" + singular[0] + ")" + singular.Substring(1) + "$",
"$1" + plural.Substring(1));
AddSingularRule("(" + plural[0] + ")" + plural.Substring(1) + "$",
"$1" + singular.Substring(1));
}
private static void AddUnknownCountRule(String word)
{
s_uncountables.Add(word.ToLower());
}
public static void Initialize()
{
s_isDisposed = false;
s_isInitialized = false;
InitializeRules();
}
public static void Dispose()
{
s_isInitialized = false;
s_isDisposed = true;
s_plurals = null;
s_singulars = null;
s_uncountables = null;
}
#endregion
#region Converting between singular and plural forms
public static String MakePlural(String word)
{
InitializeRules();
return ApplyRules(s_plurals, word);
}
public static String MakeSingular(String word)
{
try
{
InitializeRules();
return ApplyRules(s_singulars, word);
}
catch (ArgumentNullException ex)
{
String message = String.Format("Failed to singularize the word \"{0}\"", word ?? "(null)");
throw new SiphonException(message, ex);
}
}
private static String ApplyRules(IList<InflectorRule> rules, String word)
{
// Set the preconditions for this function.
if (rules == null)
throw new ArgumentNullException("rules");
if (word == null)
throw new ArgumentNullException("word");
// Apply the inflection rules to the word.
String result = word;
if (s_uncountables == null || !s_uncountables.Contains(word.ToLower()))
{
for (Int32 i = rules.Count - 1; i >= 0; i--)
{
String temp = rules[i].Apply(word);
if (temp != null)
{
result = temp;
break;
}
}
}
return result;
}
#endregion
#region Changing case (upper, lower, title, camel, etc.)
public static String ToTitleCase(String text)
{
return ToTitleCase(text, true);
}
public static String ToTitleCase(String text, Boolean ignoreShortWords)
{
List<String> ignoreWords = null;
if (ignoreShortWords)
{
ignoreWords = new List<String>();
ignoreWords.Add("a");
ignoreWords.Add("is");
ignoreWords.Add("was");
ignoreWords.Add("the");
}
String[] tokens = text.Split(' ');
StringBuilder sb = new StringBuilder(text.Length);
foreach (String token in tokens)
{
if (ignoreShortWords
&& token != tokens[0]
&& ignoreWords.Contains(token.ToLower()))
{
sb.Append(token.ToLower());
}
else
{
sb.Append(MakeInitialUppercase(token));
}
sb.Append(" ");
}
return sb.ToString().Trim();
}
public static String ToHumanCase(String lowercaseAndUnderscoredWord)
{
return MakeInitialUppercase(Regex.Replace(lowercaseAndUnderscoredWord, @"_", " "));
}
public static String ToPascalCase(String lowercaseAndUnderscoredWord)
{
return ToPascalCase(lowercaseAndUnderscoredWord, true);
}
public static String ToPascalCase(String lowercaseAndUnderscoredWord, Boolean removeUnderscores)
{
String pattern = "(?:^|_)(.)";
if (!removeUnderscores)
pattern = "(?:^)(.)";
return Regex.Replace(lowercaseAndUnderscoredWord
, pattern
, delegate(Match match) { return match.Groups[1].Value.ToUpper(); });
}
public static String ToCamelCase(String lowercaseAndUnderscoredWord)
{
return MakeInitialLowercase(ToPascalCase(lowercaseAndUnderscoredWord));
}
public static String MakeInitialUppercase(String text)
{
if (text == null)
throw new ArgumentNullException("text");
return text.Substring(0, 1).ToUpper()
+ text.Substring(1).ToLower();
}
public static String MakeInitialLowercase(String text)
{
if (text == null)
throw new ArgumentNullException("text");
return text.Substring(0, 1).ToLower() + text.Substring(1);
}
#endregion
#region Soundex
/*
* The algorithm for this function is:
* Copyright (c) 2006 Creativyst, Inc.
*
* For more information go to:
* http://www.creativyst.com/Doc/Articles/SoundEx1/SoundEx1.htm
*
* censusMode
* 0 = Disabled
* 1 = Properly calculated SoundEx codes found in all census years
* 2 = Improperly calculated SoundEx codes found in SOME of the censuses performed in 1880, 1900, and 1910
*/
public static String Soundex(String word, Int32 resultLength, Int32 censusMode)
{
String temp;
Int32 soundexLength = 10;
// Perform sanity checks
if (censusMode > 0)
resultLength = 4;
if (resultLength > 0)
soundexLength = resultLength;
if (soundexLength > 10)
soundexLength = 10;
if (soundexLength < 4)
soundexLength = 4;
// Remove leading and trailing whitespace
if (word == null)
return String.Empty;
word = word.Trim();
if (String.IsNullOrEmpty(word))
return String.Empty;
// Company names may include digits (e.g. 3M) - it is important to
// preserve these digits in the resulting SoundEx code
word = word.Replace("0", "Zero");
word = word.Replace("1", "One");
word = word.Replace("2", "Two");
word = word.Replace("3", "Three");
word = word.Replace("4", "Four");
word = word.Replace("5", "Five");
word = word.Replace("6", "Six");
word = word.Replace("7", "Seven");
word = word.Replace("8", "Eight");
word = word.Replace("9", "Nine");
// Clean and tidy
word = word.ToUpper();
String sound = word;
// Replace non-chars with spaces; if there is nothing but
// whitespace left in the resulting string then abandon ship
sound = sound.Replace(@"[^A-Z]", " ");
sound = sound.Trim();
if (String.IsNullOrEmpty(sound))
return String.Empty;
if (censusMode == 0)
{
sound = sound.Replace(@"DG", "G"); // Change DG to G
sound = sound.Replace(@"GH", "H"); // Change GH to H
sound = sound.Replace(@"GN", "N"); // Change GN to N
sound = sound.Replace(@"KN", "N"); // Change KN to N
sound = sound.Replace(@"PH", "F"); // Change PH to F
sound = sound.Replace(@"MP([STZ])", "M$1"); // Change MPST|MPZ to MST|MZ
sound = sound.Replace(@"^PS", "S"); // Change leading PS to S
sound = sound.Replace(@"^PF", "F"); // Change leading PF to F
sound = sound.Replace(@"MB", "M"); // Change MB to M
sound = sound.Replace(@"TCH", "CH"); // Change TCH to CH
}
// The above improvements may change the first letter
Char firstLetter = sound[0];
// In properly done census, SoundEx for H and W will be squezeed out
// before performing the test for adjacent digits (this differs from
// how 'real' vowels are handled)
if (firstLetter == 'H' || firstLetter == 'W')
{
temp = sound.Substring(1);
sound = "-";
sound += temp;
}
if (censusMode == 1)
sound = sound.Replace(@"[HW]", ".");
// Classic SoundEx
sound = sound.Replace(@"[AEIOUYHW]", "0");
sound = sound.Replace(@"[BPFV]", "1");
sound = sound.Replace(@"[CSGJKQXZ]", "2");
sound = sound.Replace(@"[DT]", "3");
sound = sound.Replace(@"[L]", "4");
sound = sound.Replace(@"[MN]", "5");
sound = sound.Replace(@"[R]", "6");
// Properly done census: squeeze out H and W before doing adjacent
// digit removal.
if (censusMode == 1)
sound = sound.Replace(@"\.", String.Empty);
// Remove extra equal adjacent digits
Int32 soundLength = sound.Length;
Char lastChar = '\0';
temp = String.Empty;
for (Int32 i = 0; i < soundLength; i++)
{
Char currentChar = sound[i];
if (currentChar == lastChar)
temp += " ";
else
{
temp += currentChar;
lastChar = currentChar;
}
}
sound = temp;
// Drop the first letter code
sound = sound.Substring(1);
// Remove spaces
sound = sound.Replace(@"\s", String.Empty);
// Remove zeros
sound = sound.Replace(@"0", String.Empty);
// Right-pad with zeros
sound += "0000000000";
// Add the first letter of the word
sound = firstLetter + sound;
// And, finally, size to taste
sound = sound.Substring(0, soundexLength);
return sound;
}
#endregion
#region Helpers
public static String SeparateWordsWithUnderscores(String text)
{
return
Regex.Replace(
Regex.Replace(
Regex.Replace(text, @"([A-Z]+)([A-Z][a-z])", "$1_$2")
, @"([a-z\d])([A-Z])"
, "$1_$2")
, @"[-\s]"
, "_");
}
#endregion
}
}