|
using System.Text;
using System;
namespace Ranslant.JSON.Linq
{
internal sealed class Utilities
{
internal static string MakeIndent(int indentlevel)
{
if (indentlevel < 0)
throw new Exception("indentLevel must be >= 0");
StringBuilder sb = new StringBuilder();
for (int i = 0; i < indentlevel; i++)
{
sb.Append("\t");
}
return sb.ToString();
}
internal static string MakeJSONTextLine(string indent, bool addSeparator, string content)
{
StringBuilder sb = new StringBuilder();
sb.Append(indent);
sb.Append(content);
if (addSeparator)
sb.Append(JToken.ValuesSeparator);
sb.AppendLine();
return sb.ToString();
}
// UTF8: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark
// UTF16: http://en.wikipedia.org/wiki/UTF-16#Byte_order_encoding_schemes
/// <summary>
/// Tries to detect the encoding scheme based on the Byte Order Mark
/// </summary>
/// <param name="data">the data to be checked</param>
/// <returns>the passing encoding, if found. An exception is thrown otherwise </returns>
internal static Encoding TryDetectEncoding(byte[] data)
{
Encoding encoding;
// I use these constructs rather than directly the byte values to be sure I have the right content
// Maybe I should store that somewhere as static to save on performance, but I do not see this needed for now
byte[] byteOrderMarkUTF8 = new UTF8Encoding(true).GetPreamble();
byte[] byteOrderMarkUTF16BE = new UnicodeEncoding(true, true).GetPreamble();
byte[] byteOrderMarkUTF16LE = new UnicodeEncoding(false, true).GetPreamble();
if (data.StartsWith(byteOrderMarkUTF8)) //if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
{
encoding = new UTF8Encoding(true);
}
else
{
// UTF16 Big endian
if (data.StartsWith(byteOrderMarkUTF16BE)) //if (bytes[0] == 0xFE && bytes[1] == 0xFF)
{
encoding = new UnicodeEncoding(true, false);
}
else
{
// UTF16 - little endian
if (data.StartsWith(byteOrderMarkUTF16LE)) //if (bytes[0] == 0xFF && bytes[1] == 0xFE)
{
encoding = new UnicodeEncoding(false, false);
}
else
{
// we hope we have ASCII
// the first char has to be between 0x00 and 0x7f, and so does the next one.
// Indeed, if it would be 0x00, then we could have UTF 8/16/32. For instance,
// we could have: 0x7B 0x00 as the first two chars, and this is definitely NOT Ascii
if (data[0] < 0x7F && data[0] > 0x00 && data[1] < 0x7F && data[1] > 0x00)
{
encoding = new ASCIIEncoding();
}
else
throw new JsonException("encoding of the data could not be detected. Please provide the encoder");
}
}
}
return encoding;
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.