|
using System;
using System.Diagnostics;
using System.Collections;
using System.Text;
using System.Text.RegularExpressions;
using System.Diagnostics;
namespace HtmlFragments
{
/// <summary>
/// A collection of fragments organized in a list.
/// </summary>
public class Fragments: Fragment, IList
{
/// <summary>
/// Construct an empty collection.
/// </summary>
public Fragments()
{
}
/// <summary>
/// Constructs a collection and parses the given text into fragments.
/// </summary>
/// <param name="fragment">The text to be parsed.</param>
/// <exception cref="HtmlFragments.FragmentParsingException">Will be thrown, if there is an error in the input.</exception>
public Fragments( string fragment )
{
string patternQuoted = "[\"](?'{0}'[^\"]*)[\"]|['](?'text'[^']*)[']";
string patternAttributes = @"(\s+(?'attribute'(?'name'[\w-]+)=" + String.Format( patternQuoted, "value" ) + "))*";
string patternTag = @"(?'Tag'\w+)";
string patternOpenTag = @"(?'OpenTag'<" + patternTag + patternAttributes + @"\s*>)";
string patternOpenCloseTag = @"(?'OpenCloseTag'<"+ patternTag + patternAttributes + @"\s*/>)";
string patternCloseTag = @"(?'CloseTag'</"+ patternTag + @"\s*>)";
string patternWhiteSpace = @"(?'WhiteSpace'\s+)";
string patternText = @"(?'Text'[^<>\s](" + patternWhiteSpace + @"[^<>\s]|[^<>\s])*)";
string patternDoctype = @"(?'Doctype'<!DOCTYPE\s+(?'root'\w+)\s+(?'type'\w+)(\s+" + String.Format( patternQuoted, "identifier" ) + @")?(\s+" + String.Format( patternQuoted, "uri" ) + @")?\s*>)";
string patternComment = @"<!--(?'Comment'[\s\S]*)-->";
string pattern = patternOpenTag + "|" + patternOpenCloseTag + "|" + patternCloseTag + "|" + patternDoctype + "|" + patternComment + "|" + patternText + "|" + patternWhiteSpace;
Regex regex = new Regex( pattern, RegexOptions.ExplicitCapture );
ArrayList stack = new ArrayList();
FragmentTag currentTag = null;
Fragments currentNodes = this;
int mustMatchAt = 0;
Match match = regex.Match( fragment, mustMatchAt );
while ( match.Success )
{
if ( match.Index != mustMatchAt )
{
throw new FragmentParsingException( "unmatched text found.", mustMatchAt, fragment );
}
mustMatchAt = match.Index + match.Length;
if ( match.Groups["Text"].Success )
{
int offset = 0;
StringBuilder text = new StringBuilder( match.Groups["Text"].Value );
foreach ( Capture white in match.Groups["WhiteSpace"].Captures )
{
text.Replace( white.Value, " ", white.Index - match.Index + offset, white.Length );
offset += 1 - white.Length;
}
currentNodes.Add( new FragmentText( text.ToString() ) );
}
else if ( match.Groups["Doctype"].Success )
{
string root = match.Groups["root"].Value;
string type = match.Groups["type"].Value;
string identifier = match.Groups["identifier"].Value;
string uri = match.Groups["uri"].Value;
currentNodes.Add( new FragmentDoctype( root, type, identifier, uri ) );
}
else if ( match.Groups["Comment"].Success )
{
currentNodes.Add( new FragmentComment( match.Groups["Comment"].Value ) );
}
else if ( match.Groups["OpenCloseTag"].Success )
{
FragmentTag tag = new FragmentTag( match.Groups["Tag"].Value, FragmentTagType.OpenCloseShort );
AppendAttributes( tag, match );
currentNodes.Add( tag );
}
else if ( match.Groups["OpenTag"].Success )
{
FragmentTag tag = new FragmentTag( match.Groups["Tag"].Value, FragmentTagType.Open );
AppendAttributes( tag, match );
currentNodes.Add( tag );
stack.Add( tag );
currentTag = tag;
currentNodes = currentTag.Nodes;
}
else if ( match.Groups["CloseTag"].Success )
{
if ( currentTag != null )
{
if ( currentTag.Name == match.Groups["Tag"].Value )
{
currentTag.Type = FragmentTagType.OpenClose;
stack.Remove( currentTag );
if ( stack.Count > 0 )
{
currentTag = (FragmentTag)stack[ stack.Count - 1 ];
currentNodes = currentTag.Nodes;
}
else
{
currentTag = null;
currentNodes = this;
}
}
else
{
throw new FragmentParsingException( "unmatched closing tag '" + match.Groups["Tag"].Value + "'. Should match '" + currentTag.Name + "'.", match.Index, fragment );
}
}
else
{
FragmentTag tag = new FragmentTag( match.Groups["Tag"].Value, FragmentTagType.Close );
foreach ( Fragment node in currentNodes )
{
tag.Nodes.Add( node );
}
currentNodes.Clear();
currentNodes.Add( tag );
}
}
match = match.NextMatch();
}
}
private void AppendAttributes( FragmentTag tag, Match match )
{
Group nameGroup = match.Groups["name"];
Group valueGroup = match.Groups["value"];
if ( nameGroup.Success && valueGroup.Success )
{
CaptureCollection names = nameGroup.Captures;
CaptureCollection values = valueGroup.Captures;
for ( int i=0; i < names.Count; i++ )
{
tag.Attributes[ names[i].Value ] = values[i].Value;
}
}
}
private ArrayList nodes = new ArrayList();
#region Append
/// <summary>
/// Appends the text of all child fragment to the string.
/// </summary>
/// <param name="builder">To this object all text is appended.</param>
public override void Append( StringBuilder builder )
{
foreach ( Fragment node in nodes )
node.Append( builder );
}
#endregion
#region Implementation of IList
/// <summary>
/// Removes the fragment at an index.
/// </summary>
/// <param name="index">Index of the element to remove.</param>
public void RemoveAt( int index )
{
nodes.RemoveAt( index );
}
/// <summary>
/// Inserts the fragment at the given index.
/// </summary>
/// <param name="index">Where to insert.</param>
/// <param name="fragment">Fragment to insert.</param>
public void Insert( int index, Fragment fragment )
{
nodes.Insert( index, fragment );
}
void IList.Insert( int index, object value )
{
Insert( index, (Fragment)value );
}
/// <summary>
/// Removes the given element.
/// </summary>
/// <param name="fragment">Fragment to remove.</param>
public void Remove( Fragment fragment )
{
nodes.Remove( fragment );
}
void IList.Remove( object value )
{
Remove( (Fragment)value );
}
bool IList.Contains( object value )
{
return Contains( (Fragment)value );
}
/// <summary>
/// Checks, whether a fragment is contained in the collection.
/// </summary>
/// <param name="fragment">Fragment to look for.</param>
/// <returns>true if the fragment is in the collection, else false.</returns>
public bool Contains( Fragment fragment )
{
return nodes.Contains( fragment );
}
/// <summary>
/// Clear the collection.
/// </summary>
public void Clear()
{
nodes.Clear();
}
int IList.IndexOf( object value )
{
return IndexOf( (Fragment)value );
}
/// <summary>
/// Returns the index of a fragment.
/// </summary>
/// <param name="fragment">Fragment to look for.</param>
/// <returns>Index of the fragment.</returns>
public int IndexOf( Fragment fragment )
{
return nodes.IndexOf( fragment );
}
int IList.Add( object value )
{
return Add( (Fragment)value );
}
/// <summary>
/// Add a fragment to the end of the collection.
/// </summary>
/// <param name="fragment">Fragment to add.</param>
/// <returns>Index of the appended fragment.</returns>
public int Add( Fragment fragment )
{
return nodes.Add( fragment );
}
/// <summary>
/// Is the collection read-only.
/// </summary>
public bool IsReadOnly
{
get
{
return nodes.IsReadOnly;
}
}
object IList.this[int index]
{
get
{
return nodes[index];
}
set
{
if ( value is Fragment )
nodes[index] = value;
else
throw new ArgumentException( "Does not inherit " + typeof( Fragment ).FullName + ".", "value" );
}
}
/// <summary>
/// Indexed access to the collection.
/// </summary>
public Fragment this[int index]
{
get
{
return (Fragment)nodes[index];
}
set
{
nodes[index] = value;
}
}
/// <summary>
/// Is the collection of fixed size.
/// </summary>
public bool IsFixedSize
{
get
{
return nodes.IsFixedSize;
}
}
#endregion
#region Implementation of ICollection
/// <summary>
/// Copies the collection into an array.
/// </summary>
/// <param name="array">The one-dimensional Array that is the destination of the elements copied from the current collection.</param>
/// <param name="index">The index in array at which copying begins.</param>
public void CopyTo( System.Array array, int index )
{
nodes.CopyTo( array, index );
}
/// <summary>
/// Gets a value indicating whether access to the collection is synchronized (thread-safe).
/// </summary>
public bool IsSynchronized
{
get
{
return nodes.IsSynchronized;
}
}
/// <summary>
/// Gets the number of element in the collection.
/// </summary>
public int Count
{
get
{
return nodes.Count;
}
}
/// <summary>
/// Gets an object that can be used to synchronize access to the collection.
/// </summary>
public object SyncRoot
{
get
{
return nodes.SyncRoot;
}
}
#endregion
#region Implementation of IEnumerable
/// <summary>
/// Returns an enumerator that can iterate through the collection.
/// </summary>
/// <returns>An IEnumerator for the entire collection.</returns>
public System.Collections.IEnumerator GetEnumerator()
{
return nodes.GetEnumerator();
}
#endregion
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.