Two more goes and then I'm done. (Wish I could get a job doing this kind of stuff! Wish I could get a job!)
This is similar to my previous attempt but Reflector showed how
ToLower()
and
ToUpper()
are implemented and we can save some time by caching the
CurrentCulture.TextInfo
which saves about 14%:
public static string ToggleCase_SimmoTech(string s)
{
char[] chs = s.ToCharArray();
var currentCultureTextInfo = CultureInfo.CurrentCulture.TextInfo;
for (var i = 0; i < s.Length; ++i)
{
char ch = chs[i];
if (ch <= 0xff)
{
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') ||
(ch >= 0xc0 && ch <= 0xde && ch != 0xd7) || (ch >= 0xe0 && ch <= 0xfe && ch != 0xf7))
{
chs[i] = (char) (ch ^ 0x20);
}
continue;
}
switch (char.GetUnicodeCategory(ch))
{
case UnicodeCategory.UppercaseLetter:
chs[i] = currentCultureTextInfo.ToLower(ch);
break;
case UnicodeCategory.LowercaseLetter:
chs[i] = currentCultureTextInfo.ToUpper(ch);
break;
}
}
return (new string(chs));
}
and finally, my fastest (and last) effort:
To save checking the character ranges which are toggleable, we can use a lookup table like this:
static readonly bool[] ToggleableLatinChars = new[]
{
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false,
false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false,
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false
};
public static string ToggleCase_SimmoTech2(string s)
{
char[] chs = s.ToCharArray();
var currentCultureTextInfo = CultureInfo.CurrentCulture.TextInfo;
for (var i = 0; i < s.Length; ++i)
{
char ch = chs[i];
if (ch <= 0xff)
{
if (ToggleableLatinChars[(byte) ch])
{
chs[i] = (char) (ch ^ 0x20);
}
continue;
}
switch (char.GetUnicodeCategory(ch))
{
case UnicodeCategory.UppercaseLetter:
chs[i] = currentCultureTextInfo.ToLower(ch);
break;
case UnicodeCategory.LowercaseLetter:
chs[i] = currentCultureTextInfo.ToUpper(ch);
break;
}
}
return (new string(chs));
}
This saves 2% or 3% more, but with the extra data required I wouldn't bother unless it was an absolutely speed-critical algorithm.
Cheers,
Simon
Simon Hewitt is a freelance IT consultant and is MD of Hunton Information Systems Ltd.
He is currently looking for contract work in London.
He is happily married to Karen (originally from Florida, US), has a lovely daughter Bailey, and they live in Kings Langley, Hertfordshire, UK.