Click here to Skip to main content
15,886,137 members
Articles / Programming Languages / C# 4.0

The List Trifecta, Part 2

Rate me:
Please Sign up or sign in to vote.
5.00/5 (4 votes)
7 Sep 2013LGPL310 min read 28.6K   317   12  
The BDictionary is like a Dictionary mashed up with List<T>. BList and BMultiMap also say hello.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using Loyc.Collections;
using System.ComponentModel;
using uchar = System.Int32;
using NUnit.Framework;

namespace Loyc
{
	/// <summary>UString is a wrapper around string that provides a range of 21-bit 
	/// UCS-4 characters. "U" stands for "Unicode", as in UCS-4, as opposed to a 
	/// normal string that is UTF-16.</summary>
	/// <remarks>
	/// It has been suggested that Java and .NET's reliance on 16-bit "unicode" 
	/// characters was a mistake, because it turned out that 16 bits was not enough 
	/// to represent all the world's characters. And I agree.
	/// <para/>
	/// Instead it has been suggested that we should use <a href="http://www.utf8everywhere.org/">
	/// UTF-8 everywhere</a>. To scan UTF-8 data instead of UTF-16 while still 
	/// supporting non-English characters (or "ĉĥáràĉtérŝ", as I like to say),
	/// it is useful to have a bidirectional iterator that scans characters one
	/// codepoint at a time. UString provides that functionality for .NET, and
	/// the nice thing about UString is that it's portable to UTF-8 environments.
	/// That is, by using UString, your code is portable to a UTF-8 environment
	/// that uses an equivalent implementation of UString for UTF-8. Eventually 
	/// I want Loyc to target native environments, where UTF-8 is common, and 
	/// UString can provide a common data type for both UTF-8 and UTF-16 
	/// environments.
	/// <para/>
	/// UString is a bidirectional range of "uchar", which is an alias for int
	/// (uchar means "Unicode" or "UCS-4", rather than "unsigned").
	/// <para/>
	/// The difference between StringSlice and UString is that StringSlice is a
	/// random-access range of char, while UString is a bidirectional range of
	/// uchar (int). Since UString implements <see cref="IListSource{char}"/>,
	/// it requires StringSlice in order to support the Slice method.
	/// <para/>
	/// UString has a <see cref="DecodeAt(int)"/> method that tries to decode
	/// a UTF character to UCS at a particular index.
	/// <para/>
	/// Since UString and StringSlice are just slightly different views of the 
	/// same data, you can implicitly cast between them.
	/// <para/>
	/// Unfortunately, it's not possible for UString to compare equal to its 
	/// equivalent string, for two reasons: (1) System.String.Equals cannot be
	/// changed, and (2) UString.GetHashCode cannot return the same value as
	/// String.GetHashCode without actually generating a String object, which
	/// would be inefficient (String.GetHashCode cannot be emulated because it
	/// changes between versions of the .NET framework and even between 32- and 
	/// 64-bit builds.)
	/// <para/>
	/// TODO: add Right, Normalize, EndsWith, FindLast, ReplaceAll, etc.
	/// </remarks>
	public struct UString : IBRange<uchar>, IListSource<char>, ICloneable<UString>, IEquatable<UString>
	{
		public static readonly UString Null = default(UString);

		readonly string _str;
		int _start, _count;
		
		/// <summary>Initializes a UString slice.</summary>
		/// <exception cref="ArgumentException">The start index was below zero.</exception>
		/// <remarks>The (start, count) range is allowed to be invalid, as long
		/// as 'start' is zero or above. 
		/// <ul>
		/// <li>If 'count' is below zero, or if 'start' is above the original Length, 
		/// the Count of the new slice is set to zero.</li>
		/// <li>if (start + count) is above the original Length, the Count of the new
		/// slice is reduced to <c>list.Length - start</c>.</li>
		/// </ul>
		/// </remarks>
		public UString(string str, int start, int count = int.MaxValue)
		{
			if (start < 0)
				throw new ArgumentException("UString: the start index was below zero.");
			if (count < 0)
				throw new ArgumentException("UString: the count was below zero.");
			_str = str ?? "";
			_start = start;
			if (count > _str.Length - start) {
				count = _str.Length - start;
				if (count < 0) {
					_start -= count;
					count = 0;
				}
			}
			_count = count;
		}
		public UString(string str)
		{
			_str = str;
			_start = 0;
			_count = str.Length;
		}
		private UString(int start, int count, string str)
		{
			// Constructs without bounds checking
			_str = str;
			_start = start;
			_count = count;
			Debug.Assert(start >= 0 && count >= 0 && start + count <= _str.Length);
		}
		public string InternalString
		{
			get { return _str; }
		}
		public int InternalStart
		{
			get { return _start; }
		}
		public int InternalStop
		{
			get { return _start + _count; }
		}

		public int Length
		{
			get { return _count; }
		}
		[EditorBrowsable(EditorBrowsableState.Never)]
		public int Count
		{
			get { return _count; }
		}
		public bool IsEmpty
		{
			get { return _count == 0; }
		}
		public uchar Front
		{
			get { return DecodeAt(0); }
		}
		public uchar Back
		{
			get {
				int c = DecodeAt(_count - 1);
				if (c >= 0)
					return c;
				if (_count > 1) {
					int c1 = DecodeAt(_count - 2);
					if (c1 >= 0)
						return c1;
				}
				return c;
			}
		}

		public uchar PopFront(out bool fail)
		{
			if (_count != 0) {
				fail = false;
				var c = Front;
				int inc = c >= 0x10000 ? 2 : 1;
				_count -= inc;
				_start += inc;
				return c;
			}
			fail = true;
			return default(uchar);
		}
		public uchar PopBack(out bool fail)
		{
			if (_count != 0) {
				fail = false;
				var c = Back;
				_count -= (c >= 0x10000 ? 2 : 1);
				return c;
			}
			fail = true;
			return default(uchar);
		}

		IFRange<uchar>  ICloneable<IFRange<uchar>>.Clone() { return Clone(); }
		IBRange<uchar>  ICloneable<IBRange<uchar>>.Clone() { return Clone(); }
		public UString Clone() { return this; }

		IEnumerator<uchar> IEnumerable<uchar>.GetEnumerator() { return GetEnumerator(); }
		IEnumerator<char> IEnumerable<char>.GetEnumerator() { return ((StringSlice)this).GetEnumerator(); }
		System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); }
		public RangeEnumerator<UString,uchar> GetEnumerator()
		{
			return new RangeEnumerator<UString,uchar>(this);
		}

		/// <summary>Returns the UCS code point that starts at the specified index.</summary>
		/// <param name="index">Code unit index at which to decode.</param>
		/// <returns>The code point starting at this index, or a negative number.</returns>
		/// <exception cref="IndexOutOfRangeException">Oops.</exception>
		/// <remarks>
		/// If decoding fails, either because the index points to the "middle" of a
		/// multi-code-unit sequence or because the string contains an invalid
		/// UTF sequence, this method returns a negative value (the bitwise 'not' of 
		/// the invalid char).
		/// </remarks>
		public uchar DecodeAt(int index)
		{
			int c = this[0];
			if (c < 0xD800 || c >= 0xE000)
				return c;
			if (c < 0xDC00) {
				int c1 = this[1, '\0'];
				if (c1 >= 0xDC00 && c1 < 0xE000)
					return 0x10000 + (c << 10) + c1;
			}
			return ~(int)c;
		}

		/// <summary>Returns the code unit (16-bit value) at the specified index.</summary>
		/// <exception cref="IndexOutOfRangeException">Oops.</exception>
		public char this[int index]
		{
			get { 
				if ((uint)index < (uint)_count)
					return _str[_start + index];
				throw new IndexOutOfRangeException();
			}
		}
		/// <summary>Returns the code unit (16-bit value) at the specified index,
		/// or a default value if the specified index was out of range.</summary>
		public char this[int index, char defaultValue]
		{
			get { 
				if ((uint)index < (uint)_count)
					return _str[_start + index];
				return defaultValue;
			}
		}
		public char TryGet(int index, ref bool fail)
		{
			if ((uint)index < (uint)_count) {
				fail = false;
				return _str[_start + index];
			}
			fail = true;
			return default(char);
		}
		IRange<char> IListSource<char>.Slice(int start, int count) { return ((StringSlice)this).Slice(start, count); }
		public UString Slice(int start, int count = int.MaxValue)
		{
			return Substring(start, count);
		}

		#region GetHashCode, Equals, ToString

		public override uchar GetHashCode()
		{
			int hc1 = 352654597, hc2 = hc1;
			for (int i = _start, e = _start + _count; i < e; i++) {
				hc1 = ((hc1 << 5) + hc1 + (hc1 >> 27)) ^ _str[i];
				if (++i == e) break;
				hc2 = ((hc2 << 5) + hc2 + (hc2 >> 27)) ^ _str[i];
			}
			return hc1 + hc2 * 1566083941;
		}
		public override bool Equals(object obj)
		{
			return (obj is UString) && Equals((UString)obj);
		}
		public bool Equals(UString other)
		{
			if (other._count != _count) return false;
			for (int i = _start, j = other._start, e = i + _count; i < e; i++, j++) {
				if (_str[i] != other._str[j])
					return false;
			}
			return true;
		}
		public override string ToString()
		{
			return _str.Substring(_start, _count);
		}

		#endregion

		public static bool operator ==(UString x, UString y) { return x.Equals(y); }
		public static bool operator !=(UString x, UString y) { return !x.Equals(y); }
		public static explicit operator string(UString s) { return s._str.Substring(s._start, s._count); }
		public static implicit operator UString(string s) { return new UString(s); }
		public static implicit operator StringSlice(UString s) { return new StringSlice(s._str, s._start, s._count); }

		/// <summary>Synonym for Slice()</summary>
		public UString Substring(int start, int count)
		{
			if (start < 0)
				throw new ArgumentException("The start index was below zero.");
			if (count < 0)
				count = 0;
			Debug.Assert(_start <= _str.Length);
			if (start > _count)
				start = _count;
			if (count > _count - start)
				count = _count - start;
			return new UString(_start + start, count, _str);
		}
		public UString Substring(int start)
		{
			if (start < 0)
				throw new ArgumentException("The start index was below zero.");
			if (start > _count)
				start = _count;
			return new UString(_start + start, _count - start, _str);
		}

		//
		// TODO: write lots of string-like methods.
		// This can be implemented more efficiently using macros when EC# is ready.
		//

		/// <summary>Finds the specified UCS-4 character.</summary>
		/// <returns>returns a range from the first occurrence of 'what' to the 
		/// original end of this UString. If the character is not found, an empty 
		/// string (slicing the end of this range) is returned.</returns>
		public UString Find(uchar what, bool ignoreCase = false)
		{
			var sub = this;
			if (what <= 0xFFFF) {
				if (ignoreCase) {
					what = char.ToUpperInvariant((char)what);
					for (;;) {
						bool fail;
						var was = sub;
						uchar f = sub.PopFront(out fail);
						if (fail || what == f || f <= 0xFFFF && what == char.ToUpperInvariant((char)f))
							return was;
					}
				} else {
					int i = _str.IndexOf((char)what, _start, _count);
					if (i == -1) 
						return new UString(InternalStop, 0, _str);
					return new UString(i, InternalStop - i, _str);
				}
			} else {
				for (;;) {
					bool fail;
					var was = sub;
					uchar f = sub.PopFront(out fail);
					if (fail || what == f)
						return was;
				}
			}
		}
		/// <summary>Finds the specified string within this string.</summary>
		/// <returns>Returns a range from the first occurrence of 'what' to the 
		/// original end of this UString. If 'what' is not found, an empty string
		/// (slicing the end of this range) is returned.</returns>
		public UString Find(UString what, bool ignoreCase = false)
		{
			if (what.Length <= 1) {
				if (what.Length == 0)
					return this;
				return Find(what[0], ignoreCase);
			}
			if (ignoreCase)
				what = what.ToUpper();
			char first = what[0];
			int i = _start, last = _start + _count - what.Length;
			if (ignoreCase)
				for (; i <= last; i++) {
					if (char.ToUpperInvariant(_str[i]) == first && EqualsAtCaseInsensitive(i, what))
						return new UString(i, _start + _count - i, _str);
				}
			else
				for (; i <= last; i++) {
					if (_str[i] == first && EqualsAt(i, what))
						return new UString(i, _start + _count - i, _str);
				}
			return new UString(_start + _count, 0, _str);
		}
		private bool EqualsAtCaseInsensitive(int i, UString what)
		{
			for (int w = 0; w < what.Length; w++) {
				if (char.ToUpperInvariant(_str[i++]) != what[w])
					return false;
			}
			return true;
		}
		private bool EqualsAt(int i, UString what)
		{
			for (int w = 0; w < what.Length; w++) {
				if (_str[i++] != what[w])
					return false;
			}
			return true;
		}

		private bool IsSmallSlice { get { return (_count << 1) < (_str.Length - 4); } }

		/// <summary>Converts the string to uppercase using the 'invariant' culture.</summary>
		public UString ToUpper()
		{
			var sb = new StringBuilder(Length);
			bool change = false;
			for (int i = _start; i < _start + _count; i++) {
				char c = _str[i], uc = char.ToUpperInvariant(c);
				if (c != uc) change = true;
				sb.Append(uc);
			}
			return change || IsSmallSlice ? sb.ToString() : this;
		}

		/// <summary>Determines whether this string starts with the specified other 
		/// string.</summary>
		/// <returns>true if this string starts with the contents of 'what'</returns>
		public bool StartsWith(UString what, bool ignoreCase = false)
		{
			if (what.Length > Length)
				return false;
			if (ignoreCase)
				for (int i = 0; i < what.Length; i++) {
					if (char.ToUpperInvariant(this[i]) != char.ToUpperInvariant(what[i]))
						return false;
				}
			else
				for (int i = 0; i < what.Length; i++) {
					if (this[i] != what[i])
						return false;
				}
			return true;
		}

		/// <summary>Returns a new string in which all occurrences (or a specified 
		/// number of occurrences) of a specified string in the current instance 
		/// are replaced with another specified string.</summary>
		/// <param name="what"></param>
		/// <param name="replacement"></param>
		/// <param name="ignoreCase"></param>
		/// <param name="maxReplacements"></param>
		/// <returns>Returns a new string with replacements made, or the same 
		/// string if no replacements occurred.</returns>
		public UString Replace(UString what, UString replacement, bool ignoreCase = false, int maxReplacements = int.MaxValue)
		{
			if (maxReplacements <= 0)
				return this;
			UString sub = Find(what, ignoreCase);
			if (sub.IsEmpty)
				return this;
			StringBuilder sb = new StringBuilder(_str, _start, sub._start - _start, 
								capacity: _count + replacement._count - what._count);
			sb.Append(replacement._str, replacement._start, replacement._count);

			UString self;
			for (int rep = 1; ; rep++) {
				self = sub.Substring(what.Length);
				if (rep >= maxReplacements)
					break;
				sub = self.Find(what, ignoreCase);
				sb.Append(self._str, self._start, sub._start - self._start);
				if (sub.IsEmpty)
					return sb.ToString();
				sb.Append(replacement._str, replacement._start, replacement._count);
			}
			sb.Append(self._str, self._start, self._count);
			return sb.ToString();
		}
		public UString ReplaceOne(UString what, UString replacement, bool ignoreCase = false)
		{
			return Replace(what, replacement, ignoreCase, 1);
		}
	}

	[TestFixture]
	public class UStringTests : Assert
	{
		[Test]
		public void StartsWith()
		{
			IsTrue(new UString("hello").StartsWith("hell"));
			IsFalse(new UString("hello", 1).StartsWith("hell"));
			IsTrue(new UString("hello", 1).StartsWith(""));
			IsTrue(new UString("hello", 1).StartsWith("e"));
			IsTrue(new UString("hello", 2).StartsWith("llo"));
		}

		[Test]
		public void ChangeCase()
		{
			AreEqual(new UString("Hello There!!", 3, 9).ToUpper(), (UString)"LO THERE!");
			string s = "!HELLO THERE!";
			AreEqual(new UString(s, 1, 11).ToUpper(), (UString)"HELLO THERE");
			AreEqual(new UString(s, 1, 11).ToUpper().InternalString, s);
			s = "!HELLO THERE, Sailor Joe Fitzgeraldsteinenberg!";
			AreEqual(new UString(s, 1, 4).ToUpper().InternalString, "HELL");
			AreEqual(new UString("ĈĥáráĉtérŜ!").ToUpper(), (UString)"ĈĤÁRÁĈTÉRŜ!");
		}

		[Test]
		public void Find()
		{
			UString eek = "eekeekeekeek".USlice(3);
			AreEqual(eek.Find("eekee".USlice(0, 3)), (UString)"eekeekeek");
			AreEqual(eek.Find("eekee".USlice(2, 3)), (UString)"keekeek");
			AreEqual(eek.Find("KEE", false), (UString)"");
			AreEqual(eek.Find("KEE", true), (UString)"keekeek");
			UString nowaldo = "1234567890waldo!".USlice(1, 13);
			AreEqual(nowaldo.Find("waldo"), (UString)"");
			AreEqual(nowaldo.Find("waldo").InternalStart, nowaldo.InternalStop);
			UString waldo = "1234567890waldo!".USlice(1, 14);
			AreEqual(waldo.Find("waldo"), (UString)"waldo");
			AreEqual(waldo.Find("waldo").InternalStart, 10);
		}

		[Test]
		public void Replace()
		{
			UString no = "__no__".USlice(2, 2), yes = "_Yes___".USlice(1, 3);
			UString No = "__No__".USlice(2, 2), nos = "noNoNoNoNoN_".USlice(1, 9);
			AreEqual(nos.ReplaceOne(no, yes, false), nos);
			AreEqual(nos.ReplaceOne(No, yes, false), (UString)"oYesNoNoNo");
			AreEqual(nos.Replace(No, yes, false, 2), (UString)"oYesYesNoNo");
			AreEqual(nos.ReplaceOne(no, yes, true), (UString)"oYesNoNoNo");
			AreEqual(nos.Replace(no, yes, true, 3), (UString)"oYesYesYesNo");
			AreEqual(nos.Replace("NoN", "oN", false), (UString)"ooNooNo");
			AreEqual(nos.Replace(no, yes, false), (UString)"oNoNoNoNo");
			AreEqual(nos.Replace(no, yes, true), (UString)"oYesYesYesYes");
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU Lesser General Public License (LGPLv3)


Written By
Software Developer None
Canada Canada
Since I started programming when I was 11, I wrote the SNES emulator "SNEqr", the FastNav mapping component, the Enhanced C# programming language (in progress), the parser generator LLLPG, and LES, a syntax to help you start building programming languages, DSLs or build systems.

My overall focus is on the Language of your choice (Loyc) initiative, which is about investigating ways to improve interoperability between programming languages and putting more power in the hands of developers. I'm also seeking employment.

Comments and Discussions