Click here to Skip to main content
15,896,296 members
Articles / Desktop Programming / WPF

Using AvalonEdit (WPF Text Editor)

Rate me:
Please Sign up or sign in to vote.
4.97/5 (271 votes)
1 Apr 2013LGPL313 min read 1.9M   72.3K   535  
AvalonEdit is an extensible Open-Source text editor with support for syntax highlighting and folding.
// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt)
// This code is distributed under the GNU LGPL (for details please see \doc\license.txt)

using System;
using System.Globalization;
using System.Windows.Documents;

namespace ICSharpCode.AvalonEdit.Document
{
	/// <summary>
	/// Specifies the mode for getting the next caret position.
	/// </summary>
	public enum CaretPositioningMode
	{
		/// <summary>
		/// Normal positioning (stop at every caret position)
		/// </summary>
		Normal,
		/// <summary>
		/// Stop only on word borders.
		/// </summary>
		WordBorder,
		/// <summary>
		/// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right.
		/// </summary>
		WordStart,
		/// <summary>
		/// Stop only at the beginning of words, and anywhere in the middle of symbols.
		/// </summary>
		WordStartOrSymbol,
		/// <summary>
		/// Stop only on word borders, and anywhere in the middle of symbols.
		/// </summary>
		WordBorderOrSymbol
	}
	
	/// <summary>
	/// Static helper methods for working with text.
	/// </summary>
	public static partial class TextUtilities
	{
		#region GetControlCharacterName
		// the names of the first 32 ASCII characters = Unicode C0 block
		static readonly string[] c0Table = {
			"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT",
			"LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3",
			"DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS",
			"RS", "US"
		};
		
		// DEL (ASCII 127) and
		// the names of the control characters in the C1 block (Unicode 128 to 159)
		static readonly string[] delAndC1Table = {
			"DEL",
			"PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ",
			"VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS",
			"CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC",
			"PM", "APC"
		};
		
		/// <summary>
		/// Gets the name of the control character.
		/// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value.
		/// </summary>
		public static string GetControlCharacterName(char controlCharacter)
		{
			int num = (int)controlCharacter;
			if (num < c0Table.Length)
				return c0Table[num];
			else if (num >= 127 && num <= 159)
				return delAndC1Table[num - 127];
			else
				return num.ToString("x4", CultureInfo.InvariantCulture);
		}
		#endregion
		
		#region GetWhitespace
		/// <summary>
		/// Gets all whitespace (' ' and '\t', but no newlines) after offset.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The offset where the whitespace starts.</param>
		/// <returns>The segment containing the whitespace.</returns>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		public static ISegment GetWhitespaceAfter(ITextSource textSource, int offset)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			int pos;
			for (pos = offset; pos < textSource.TextLength; pos++) {
				char c = textSource.GetCharAt(pos);
				if (c != ' ' && c != '\t')
					break;
			}
			return new SimpleSegment(offset, pos - offset);
		}
		
		/// <summary>
		/// Gets all whitespace (' ' and '\t', but no newlines) before offset.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The offset where the whitespace ends.</param>
		/// <returns>The segment containing the whitespace.</returns>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		public static ISegment GetWhitespaceBefore(ITextSource textSource, int offset)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			int pos;
			for (pos = offset - 1; pos >= 0; pos--) {
				char c = textSource.GetCharAt(pos);
				if (c != ' ' && c != '\t')
					break;
			}
			pos++; // go back the one character that isn't whitespace
			return new SimpleSegment(pos, offset - pos);
		}
		
		/// <summary>
		/// Gets the leading whitespace segment on the document line.
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
		                                                 Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
		public static ISegment GetLeadingWhitespace(TextDocument document, DocumentLine documentLine)
		{
			if (documentLine == null)
				throw new ArgumentNullException("documentLine");
			return GetWhitespaceAfter(document, documentLine.Offset);
		}
		
		/// <summary>
		/// Gets the trailing whitespace segment on the document line.
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
		                                                 Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
		public static ISegment GetTrailingWhitespace(TextDocument document, DocumentLine documentLine)
		{
			if (documentLine == null)
				throw new ArgumentNullException("documentLine");
			ISegment segment = GetWhitespaceBefore(document, documentLine.EndOffset);
			// If the whole line consists of whitespace, we consider all of it as leading whitespace,
			// so return an empty segment as trailing whitespace.
			if (segment.Offset == documentLine.Offset)
				return new SimpleSegment(documentLine.EndOffset, 0);
			else
				return segment;
		}
		#endregion
		
		#region GetSingleIndentationSegment
		/// <summary>
		/// Gets a single indentation segment starting at <paramref name="offset"/> - at most one tab
		/// or <paramref name="indentationSize"/> spaces.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The offset where the indentation segment starts.</param>
		/// <param name="indentationSize">The size of an indentation unit. See <see cref="TextEditorOptions.IndentationSize"/>.</param>
		/// <returns>The indentation segment.
		/// If there is no indentation character at the specified <paramref name="offset"/>,
		/// an empty segment is returned.</returns>
		public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			int pos = offset;
			while (pos < textSource.TextLength) {
				char c = textSource.GetCharAt(pos);
				if (c == '\t') {
					if (pos == offset)
						return new SimpleSegment(offset, 1);
					else
						break;
				} else if (c == ' ') {
					if (pos - offset >= indentationSize)
						break;
				} else {
					break;
				}
				// continue only if c==' ' and (pos-offset)<tabSize
				pos++;
			}
			return new SimpleSegment(offset, pos - offset);
		}
		#endregion
		
		#region GetCharacterClass
		/// <summary>
		/// Gets whether the character is whitespace, part of an identifier, or line terminator.
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "c")]
		public static CharacterClass GetCharacterClass(char c)
		{
			if (c == '\r' || c == '\n')
				return CharacterClass.LineTerminator;
			else if (char.IsWhiteSpace(c))
				return CharacterClass.Whitespace;
			else if (char.IsLetterOrDigit(c) || c == '_')
				return CharacterClass.IdentifierPart;
			else
				return CharacterClass.Other;
		}
		#endregion
		
		#region GetNextCaretPosition
		/// <summary>
		/// Gets the next caret position.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The start offset inside the text source.</param>
		/// <param name="direction">The search direction (forwards or backwards).</param>
		/// <param name="mode">The mode for caret positioning.</param>
		/// <returns>The offset of the next caret position, or -1 if there is no further caret position
		/// in the text source.</returns>
		/// <remarks>
		/// This method is NOT equivalent to the actual caret movement when using VisualLine.GetNextCaretPosition.
		/// In real caret movement, there are additional caret stops at line starts and ends. This method
		/// treats linefeeds as simple whitespace.
		/// </remarks>
		public static int GetNextCaretPosition(ITextSource textSource, int offset, LogicalDirection direction, CaretPositioningMode mode)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			if (mode != CaretPositioningMode.Normal
			    && mode != CaretPositioningMode.WordBorder
			    && mode != CaretPositioningMode.WordStart
			    && mode != CaretPositioningMode.WordBorderOrSymbol
			    && mode != CaretPositioningMode.WordStartOrSymbol)
			{
				throw new ArgumentException("Unsupported CaretPositioningMode: " + mode, "mode");
			}
			if (direction != LogicalDirection.Backward
			    && direction != LogicalDirection.Forward)
			{
				throw new ArgumentException("Invalid LogicalDirection: " + direction, "direction");
			}
			int textLength = textSource.TextLength;
			if (textLength <= 0) {
				// empty document? has a normal caret position at 0, though no word borders
				if (mode == CaretPositioningMode.Normal) {
					if (offset > 0 && direction == LogicalDirection.Backward) return 0;
					if (offset < 0 && direction == LogicalDirection.Forward) return 0;
				}
				return -1;
			}
			while (true) {
				int nextPos = (direction == LogicalDirection.Backward) ? offset - 1 : offset + 1;
				
				// return -1 if there is no further caret position in the text source
				// we also need this to handle offset values outside the valid range
				if (nextPos < 0 || nextPos > textLength)
					return -1;
				
				// stop at every caret position? we can stop immediately.
				if (mode == CaretPositioningMode.Normal)
					return nextPos;
				// not normal mode? we're looking for word borders...
				
				// check if we've run against the textSource borders.
				// a 'textSource' usually isn't the whole document, but a single VisualLineElement.
				if (nextPos == 0) {
					// at the document start, there's only a word border
					// if the first character is not whitespace
					if (!char.IsWhiteSpace(textSource.GetCharAt(0)))
						return nextPos;
				} else if (nextPos == textLength) {
					// at the document end, there's never a word start
					if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) {
						// at the document end, there's only a word border
						// if the last character is not whitespace
						if (!char.IsWhiteSpace(textSource.GetCharAt(textLength - 1)))
							return nextPos;
					}
				} else {
					CharacterClass charBefore = GetCharacterClass(textSource.GetCharAt(nextPos - 1));
					CharacterClass charAfter = GetCharacterClass(textSource.GetCharAt(nextPos));
					if (charBefore == charAfter) {
						if (charBefore == CharacterClass.Other &&
						    (mode == CaretPositioningMode.WordBorderOrSymbol || mode == CaretPositioningMode.WordStartOrSymbol))
						{
							// With the "OrSymbol" modes, there's a word border and start between any two unknown characters
							return nextPos;
						}
					} else {
						// this looks like a possible border
						
						// if we're looking for word starts, check that this is a word start (and not a word end)
						// if we're just checking for word borders, accept unconditionally
						if (!((mode == CaretPositioningMode.WordStart || mode == CaretPositioningMode.WordStartOrSymbol)
						      && (charAfter == CharacterClass.Whitespace || charAfter == CharacterClass.LineTerminator)))
						{
							return nextPos;
						}
					}
				}
				// we'll have to continue searching...
				offset = nextPos;
			}
		}
		#endregion
	}
	
	/// <summary>
	/// Classifies a character as whitespace, line terminator, part of an identifier, or other.
	/// </summary>
	public enum CharacterClass
	{
		/// <summary>
		/// The character is not whitespace, line terminator or part of an identifier.
		/// </summary>
		Other,
		/// <summary>
		/// The character is whitespace (but not line terminator).
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		Whitespace,
		/// <summary>
		/// The character can be part of an identifier (Letter, digit or underscore).
		/// </summary>
		IdentifierPart,
		/// <summary>
		/// The character is line terminator (\r or \n).
		/// </summary>
		LineTerminator
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU Lesser General Public License (LGPLv3)


Written By
Germany Germany
I am the lead developer on the SharpDevelop open source project.

Comments and Discussions