Click here to Skip to main content
12,455,073 members (56,428 online)
Click here to Skip to main content

Stats

46.7K views
760 downloads
69 bookmarked
Posted

GoalBook - A Hybrid Smart Client

, 25 Sep 2009 CPOL
A WPF hybrid smart client that synchronises your goals with the Toodledo online To-do service.
GoalBook
Dependencies
CAG WPF 2
Microsoft.Practices.Composite.dll
Microsoft.Practices.Composite.Presentation.dll
Microsoft.Practices.Composite.UnityExtensions.dll
Microsoft.Practices.ObjectBuilder2.dll
Microsoft.Practices.ServiceLocation.dll
Microsoft.Practices.Unity.dll
vssver2.scc
CSLA
Csla.dll
Csla.XmlSerializers.dll
vssver2.scc
Infragistics
Infragistics3.Wpf.DataPresenter.v9.1.Express.dll
Infragistics3.Wpf.Editors.v9.1.Express.dll
Infragistics3.Wpf.v9.1.Express.dll
vssver2.scc
WPFToolkit
vssver2.scc
WPFToolkit.dll
GoalBook.Controls
CheckedList
vssver2.scc
Properties
vssver2.scc
vssver2.scc
GoalBook.Goals
GoalBook.Goals.csproj.user
Properties
vssver2.scc
Views
vssver2.scc
vssver2.scc
GoalBook.Infrastructure
Comparers
vssver2.scc
Constants
vssver2.scc
Controls
Converters
vssver2.scc
Enums
vssver2.scc
Events
vssver2.scc
GoalBook.Infrastructure.csproj.user
Helpers
vssver2.scc
Interfaces
vssver2.scc
ObjectModel
vssver2.scc
Printing
vssver2.scc
Properties
vssver2.scc
vssver2.scc
GoalBook.Notes
GoalBook.Notes.csproj.user
Properties
vssver2.scc
Resources
arrow_redo.png
arrow_undo.png
cut.png
page_copy.png
paste_plain.png
text_bold.png
text_indent.png
text_indent_remove.png
text_italic.png
text_list_bullets.png
text_list_numbers.png
vssver2.scc
world_link.png
Views
vssver2.scc
vssver2.scc
GoalBook.Public
Encryption
vssver2.scc
HtmlConverter
vssver2.scc
HtmlParser
vssver2.scc
Misc
vssver2.scc
Properties
vssver2.scc
vssver2.scc
GoalBook.Shell
App.ico
Commands
vssver2.scc
GoalBook.Shell.csproj.user
Journal.ico
Journal48.ico
Misc
vssver2.scc
Modules
vssver2.scc
Properties
licenses.licx
vssver2.scc
Resources
arrow_refresh.png
arrow_undo.png
checked.gif
cross.png
disk.png
email.png
erase.png
error.png
exclamation.png
flag_red.png
group.png
help.png
information.png
magnifier.png
page_red.png
pencil.png
printer.png
user.png
vssver2.scc
Wave.jpg
world.png
world_link.png
Services
vssver2.scc
Splash.png
Views
vssver2.scc
vssver2.scc
Windows
vssver2.scc
GoalBook.snk
GoalBook.Synchronisation
Events
vssver2.scc
Properties
vssver2.scc
ToodleDo
vssver2.scc
vssver2.scc
GoalBook.Tasks
Controls
vssver2.scc
Properties
vssver2.scc
Views
vssver2.scc
vssver2.scc
vssver2.scc
//---------------------------------------------------------------------------
// 
// File: HtmlLexicalAnalyzer.cs
//
// Copyright (C) Microsoft Corporation.  All rights reserved.
//
// Description: Lexical analyzer for Html-to-Xaml converter
//
//---------------------------------------------------------------------------

using System;
using System.IO;
using System.Diagnostics;
using System.Collections;
using System.Text;

namespace GoalBook.Public.HtmlConverter
{
    /// <summary>
    /// lexical analyzer class
    /// recognizes tokens as groups of characters separated by arbitrary amounts of whitespace
    /// also classifies tokens according to type
    /// </summary>
    internal class HtmlLexicalAnalyzer
    {
        // ---------------------------------------------------------------------
        //
        // Constructors
        //
        // ---------------------------------------------------------------------

        #region Constructors

        /// <summary>
        /// initializes the _inputStringReader member with the string to be read
        /// also sets initial values for _nextCharacterCode and _nextTokenType
        /// </summary>
        /// <param name="inputTextString">
        /// text string to be parsed for xml content
        /// </param>
        internal HtmlLexicalAnalyzer(string inputTextString)
        {
            _inputStringReader = new StringReader(inputTextString);
            _nextCharacterCode = 0;
            _nextCharacter = ' ';
            _lookAheadCharacterCode = _inputStringReader.Read();
            _lookAheadCharacter = (char)_lookAheadCharacterCode;
            _previousCharacter = ' ';
            _ignoreNextWhitespace = true;
            _nextToken = new StringBuilder(100);
            _nextTokenType = HtmlTokenType.Text;
            // read the first character so we have some value for the NextCharacter property
            this.GetNextCharacter();
        }

        #endregion Constructors

        // ---------------------------------------------------------------------
        //
        // Internal methods
        //
        // ---------------------------------------------------------------------

        #region Internal Methods

        /// <summary>
        /// retrieves next recognizable token from input string 
        /// and identifies its type
        /// if no valid token is found, the output parameters are set to null
        /// if end of stream is reached without matching any token, token type
        /// paramter is set to EOF
        /// </summary>
        internal void GetNextContentToken()
        {
            Debug.Assert(_nextTokenType != HtmlTokenType.EOF);
            _nextToken.Length = 0;
            if (this.IsAtEndOfStream)
            {
                _nextTokenType = HtmlTokenType.EOF;
                return;
            }

            if (this.IsAtTagStart)
            {
                this.GetNextCharacter();

                if (this.NextCharacter == '/')
                {
                    _nextToken.Append("</");
                    _nextTokenType = HtmlTokenType.ClosingTagStart;

                    // advance
                    this.GetNextCharacter();
                    _ignoreNextWhitespace = false; // Whitespaces after closing tags are significant
                }
                else
                {
                    _nextTokenType = HtmlTokenType.OpeningTagStart;
                    _nextToken.Append("<");
                    _ignoreNextWhitespace = true; // Whitespaces after opening tags are insignificant
                }
            }
            else if (this.IsAtDirectiveStart)
            {
                // either a comment or CDATA
                this.GetNextCharacter();
                if (_lookAheadCharacter == '[')
                {
                    // cdata
                    this.ReadDynamicContent();
                }
                else if (_lookAheadCharacter == '-')
                {
                    this.ReadComment();
                }
                else
                {
                    // neither a comment nor cdata, should be something like DOCTYPE
                    // skip till the next tag ender
                    this.ReadUnknownDirective();
                }
            }
            else
            {
                // read text content, unless you encounter a tag
                _nextTokenType = HtmlTokenType.Text;
                while (!this.IsAtTagStart && !this.IsAtEndOfStream && !this.IsAtDirectiveStart)
                {
                    if (this.NextCharacter == '<' && !this.IsNextCharacterEntity && _lookAheadCharacter == '?')
                    {
                        // ignore processing directive
                        this.SkipProcessingDirective();
                    }
                    else
                    {
                        if (this.NextCharacter <= ' ')
                        {
                            //  Respect xml:preserve or its equivalents for whitespace processing
                            if (_ignoreNextWhitespace)
                            {
                                // Ignore repeated whitespaces
                            }
                            else
                            {
                                // Treat any control character sequence as one whitespace
                                _nextToken.Append(' ');
                            }
                            _ignoreNextWhitespace = true; // and keep ignoring the following whitespaces
                        }
                        else
                        {
                            _nextToken.Append(this.NextCharacter);
                            _ignoreNextWhitespace = false;
                        }
                        this.GetNextCharacter();
                    }
                }
            }
        }

        /// <summary>
        /// Unconditionally returns a token which is one of: TagEnd, EmptyTagEnd, Name, Atom or EndOfStream
        /// Does not guarantee token reader advancing.
        /// </summary>
        internal void GetNextTagToken()
        {
            _nextToken.Length = 0;
            if (this.IsAtEndOfStream)
            {
                _nextTokenType = HtmlTokenType.EOF;
                return;
            }

            this.SkipWhiteSpace();

            if (this.NextCharacter == '>' && !this.IsNextCharacterEntity)
            {
                // &gt; should not end a tag, so make sure it's not an entity
                _nextTokenType = HtmlTokenType.TagEnd;
                _nextToken.Append('>');
                this.GetNextCharacter();
                // Note: _ignoreNextWhitespace must be set appropriately on tag start processing
            }
            else if (this.NextCharacter == '/' && _lookAheadCharacter == '>')
            {
                // could be start of closing of empty tag
                _nextTokenType = HtmlTokenType.EmptyTagEnd;
                _nextToken.Append("/>");
                this.GetNextCharacter();
                this.GetNextCharacter();
                _ignoreNextWhitespace = false; // Whitespace after no-scope tags are sifnificant
            }
            else if (IsGoodForNameStart(this.NextCharacter))
            {
                _nextTokenType = HtmlTokenType.Name;

                // starts a name
                // we allow character entities here
                // we do not throw exceptions here if end of stream is encountered
                // just stop and return whatever is in the token
                // if the parser is not expecting end of file after this it will call
                // the get next token function and throw an exception
                while (IsGoodForName(this.NextCharacter) && !this.IsAtEndOfStream)
                {
                    _nextToken.Append(this.NextCharacter);
                    this.GetNextCharacter();
                }
            }
            else
            {
                // Unexpected type of token for a tag. Reprot one character as Atom, expecting that HtmlParser will ignore it.
                _nextTokenType = HtmlTokenType.Atom;
                _nextToken.Append(this.NextCharacter);
                this.GetNextCharacter();
            }
        }

        /// <summary>
        /// Unconditionally returns equal sign token. Even if there is no
        /// real equal sign in the stream, it behaves as if it were there.
        /// Does not guarantee token reader advancing.
        /// </summary>
        internal void GetNextEqualSignToken()
        {
            Debug.Assert(_nextTokenType != HtmlTokenType.EOF);
            _nextToken.Length = 0;

            _nextToken.Append('=');
            _nextTokenType = HtmlTokenType.EqualSign;

            this.SkipWhiteSpace();

            if (this.NextCharacter == '=')
            {
                // '=' is not in the list of entities, so no need to check for entities here
                this.GetNextCharacter();
            }
        }

        /// <summary>
        /// Unconditionally returns an atomic value for an attribute
        /// Even if there is no appropriate token it returns Atom value
        /// Does not guarantee token reader advancing.
        /// </summary>
        internal void GetNextAtomToken()
        {
            Debug.Assert(_nextTokenType != HtmlTokenType.EOF);
            _nextToken.Length = 0;

            this.SkipWhiteSpace();

            _nextTokenType = HtmlTokenType.Atom;

            if ((this.NextCharacter == '\'' || this.NextCharacter == '"') && !this.IsNextCharacterEntity)
            {
                char startingQuote = this.NextCharacter;
                this.GetNextCharacter();

                // Consume all characters between quotes
                while (!(this.NextCharacter == startingQuote && !this.IsNextCharacterEntity) && !this.IsAtEndOfStream)
                {
                    _nextToken.Append(this.NextCharacter);
                    this.GetNextCharacter();
                }
                if (this.NextCharacter == startingQuote)
                {
                    this.GetNextCharacter();
                }

                // complete the quoted value
                // NOTE: our recovery here is different from IE's
                // IE keeps reading until it finds a closing quote or end of file
                // if end of file, it treats current value as text
                // if it finds a closing quote at any point within the text, it eats everything between the quotes
                // TODO: Suggestion:
                // however, we could stop when we encounter end of file or an angle bracket of any kind
                // and assume there was a quote there
                // so the attribute value may be meaningless but it is never treated as text
            }
            else
            {
                while (!this.IsAtEndOfStream && !Char.IsWhiteSpace(this.NextCharacter) && this.NextCharacter != '>')
                {
                    _nextToken.Append(this.NextCharacter);
                    this.GetNextCharacter();
                }
            }
        }

        #endregion Internal Methods

        // ---------------------------------------------------------------------
        //
        // Internal Properties
        //
        // ---------------------------------------------------------------------

        #region Internal Properties

        internal HtmlTokenType NextTokenType
        {
            get
            {
                return _nextTokenType;
            }
        }

        internal string NextToken
        {
            get
            {
                return _nextToken.ToString();
            }
        }

        #endregion Internal Properties

        // ---------------------------------------------------------------------
        //
        // Private methods
        //
        // ---------------------------------------------------------------------

        #region Private Methods

        /// <summary>
        /// Advances a reading position by one character code
        /// and reads the next availbale character from a stream.
        /// This character becomes available as NextCharacter property.
        /// </summary>
        /// <remarks>
        /// Throws InvalidOperationException if attempted to be called on EndOfStream
        /// condition.
        /// </remarks>
        private void GetNextCharacter()
        {
            if (_nextCharacterCode == -1)
            {
                throw new InvalidOperationException("GetNextCharacter method called at the end of a stream");
            }

            _previousCharacter = _nextCharacter;

            _nextCharacter = _lookAheadCharacter;
            _nextCharacterCode = _lookAheadCharacterCode;
            // next character not an entity as of now
            _isNextCharacterEntity = false;

            this.ReadLookAheadCharacter();

            if (_nextCharacter == '&')
            {
                if (_lookAheadCharacter == '#')
                {
                    // numeric entity - parse digits - &#DDDDD;
                    int entityCode;
                    entityCode = 0;
                    this.ReadLookAheadCharacter();

                    // largest numeric entity is 7 characters
                    for (int i = 0; i < 7 && Char.IsDigit(_lookAheadCharacter); i++)
                    {
                        entityCode = 10 * entityCode + (_lookAheadCharacterCode - (int)'0');
                        this.ReadLookAheadCharacter();
                    }
                    if (_lookAheadCharacter == ';')
                    {
                        // correct format - advance
                        this.ReadLookAheadCharacter();
                        _nextCharacterCode = entityCode;

                        // if this is out of range it will set the character to '?'
                        _nextCharacter = (char)_nextCharacterCode;

                        // as far as we are concerned, this is an entity
                        _isNextCharacterEntity = true;
                    }
                    else
                    {
                        // not an entity, set next character to the current lookahread character
                        // we would have eaten up some digits
                        _nextCharacter = _lookAheadCharacter;
                        _nextCharacterCode = _lookAheadCharacterCode;
                        this.ReadLookAheadCharacter();
                        _isNextCharacterEntity = false;
                    }
                }
                else if (Char.IsLetter(_lookAheadCharacter))
                {
                    // entity is written as a string
                    string entity = "";

                    // maximum length of string entities is 10 characters
                    for (int i = 0; i < 10 && (Char.IsLetter(_lookAheadCharacter) || Char.IsDigit(_lookAheadCharacter)); i++)
                    {
                        entity += _lookAheadCharacter;
                        this.ReadLookAheadCharacter();
                    }
                    if (_lookAheadCharacter == ';')
                    {
                        // advance
                        this.ReadLookAheadCharacter();

                        if (HtmlSchema.IsEntity(entity))
                        {
                            _nextCharacter = HtmlSchema.EntityCharacterValue(entity);
                            _nextCharacterCode = (int)_nextCharacter;
                            _isNextCharacterEntity = true;
                        }
                        else
                        {
                            // just skip the whole thing - invalid entity
                            // move on to the next character
                            _nextCharacter = _lookAheadCharacter;
                            _nextCharacterCode = _lookAheadCharacterCode;
                            this.ReadLookAheadCharacter();

                            // not an entity
                            _isNextCharacterEntity = false;
                        }
                    }
                    else
                    {
                        // skip whatever we read after the ampersand
                        // set next character and move on
                        _nextCharacter = _lookAheadCharacter;
                        this.ReadLookAheadCharacter();
                        _isNextCharacterEntity = false;
                    }
                }
            }
        }

        private void ReadLookAheadCharacter()
        {
            if (_lookAheadCharacterCode != -1)
            {
                _lookAheadCharacterCode = _inputStringReader.Read();
                _lookAheadCharacter = (char)_lookAheadCharacterCode;
            }
        }

        /// <summary>
        /// skips whitespace in the input string
        /// leaves the first non-whitespace character available in the NextCharacter property
        /// this may be the end-of-file character, it performs no checking 
        /// </summary>
        private void SkipWhiteSpace()
        {
            // TODO: handle character entities while processing comments, cdata, and directives
            // TODO: SUGGESTION: we could check if lookahead and previous characters are entities also
            while (true)
            {
                if (_nextCharacter == '<' && (_lookAheadCharacter == '?' || _lookAheadCharacter == '!'))
                {
                    this.GetNextCharacter();

                    if (_lookAheadCharacter == '[')
                    {
                        // Skip CDATA block and DTDs(?)
                        while (!this.IsAtEndOfStream && !(_previousCharacter == ']' && _nextCharacter == ']' && _lookAheadCharacter == '>'))
                        {
                            this.GetNextCharacter();
                        }
                        if (_nextCharacter == '>')
                        {
                            this.GetNextCharacter();
                        }
                    }
                    else
                    {
                        // Skip processing instruction, comments
                        while (!this.IsAtEndOfStream && _nextCharacter != '>')
                        {
                            this.GetNextCharacter();
                        }
                        if (_nextCharacter == '>')
                        {
                            this.GetNextCharacter();
                        }
                    }
                }


                if (!Char.IsWhiteSpace(this.NextCharacter))
                {
                    break;
                }

                this.GetNextCharacter();
            }
        }

        /// <summary>
        /// checks if a character can be used to start a name
        /// if this check is true then the rest of the name can be read
        /// </summary>
        /// <param name="character">
        /// character value to be checked
        /// </param>
        /// <returns>
        /// true if the character can be the first character in a name
        /// false otherwise
        /// </returns>
        private bool IsGoodForNameStart(char character)
        {
            return character == '_' || Char.IsLetter(character);
        }

        /// <summary>
        /// checks if a character can be used as a non-starting character in a name
        /// uses the IsExtender and IsCombiningCharacter predicates to see
        /// if a character is an extender or a combining character
        /// </summary>
        /// <param name="character">
        /// character to be checked for validity in a name
        /// </param>
        /// <returns>
        /// true if the character can be a valid part of a name
        /// </returns>
        private bool IsGoodForName(char character)
        {
            // we are not concerned with escaped characters in names
            // we assume that character entities are allowed as part of a name
            return 
                this.IsGoodForNameStart(character) || 
                character == '.' || 
                character == '-' || 
                character == ':' ||
                Char.IsDigit(character) || 
                IsCombiningCharacter(character) || 
                IsExtender(character);
        }

        /// <summary>
        /// identifies a character as being a combining character, permitted in a name
        /// TODO: only a placeholder for now but later to be replaced with comparisons against
        /// the list of combining characters in the XML documentation
        /// </summary>
        /// <param name="character">
        /// character to be checked
        /// </param>
        /// <returns>
        /// true if the character is a combining character, false otherwise
        /// </returns>
        private bool IsCombiningCharacter(char character)
        {
            // TODO: put actual code with checks against all combining characters here
            return false;
        }

        /// <summary>
        /// identifies a character as being an extender, permitted in a name
        /// TODO: only a placeholder for now but later to be replaced with comparisons against
        /// the list of extenders in the XML documentation
        /// </summary>
        /// <param name="character">
        /// character to be checked
        /// </param>
        /// <returns>
        /// true if the character is an extender, false otherwise
        /// </returns>
        private bool IsExtender(char character)
        {
            // TODO: put actual code with checks against all extenders here
            return false;
        }

        /// <summary>
        /// skips dynamic content starting with '<![' and ending with ']>' 
        /// </summary>
        private void ReadDynamicContent()
        {
            // verify that we are at dynamic content, which may include CDATA
            Debug.Assert(_previousCharacter == '<' && _nextCharacter == '!' && _lookAheadCharacter == '[');

            // Let's treat this as empty text
            _nextTokenType = HtmlTokenType.Text;
            _nextToken.Length = 0;

            // advance twice, once to get the lookahead character and then to reach the start of the cdata
            this.GetNextCharacter();
            this.GetNextCharacter();
            
            // NOTE: 10/12/2004: modified this function to check when called if's reading CDATA or something else
            // some directives may start with a <![ and then have some data and they will just end with a ]>
            // this function is modified to stop at the sequence ]> and not ]]>
            // this means that CDATA and anything else expressed in their own set of [] within the <! [...]>
            // directive cannot contain a ]> sequence. However it is doubtful that cdata could contain such
            // sequence anyway, it probably stops at the first ]
            while (!(_nextCharacter == ']' && _lookAheadCharacter == '>') && !this.IsAtEndOfStream)
            {
                // advance
                this.GetNextCharacter();
            }

            if (!this.IsAtEndOfStream)
            {
                // advance, first to the last >
                this.GetNextCharacter();

                // then advance past it to the next character after processing directive
                this.GetNextCharacter();
            }
        }

        /// <summary>
        /// skips comments starting with '<!-' and ending with '-->' 
        /// NOTE: 10/06/2004: processing changed, will now skip anything starting with
        /// the "<!-"  sequence and ending in "!>" or "->", because in practice many html pages do not
        /// use the full comment specifying conventions
        /// </summary>
        private void ReadComment()
        {
            // verify that we are at a comment
            Debug.Assert(_previousCharacter == '<' && _nextCharacter == '!' && _lookAheadCharacter == '-');

            // Initialize a token
            _nextTokenType = HtmlTokenType.Comment;
            _nextToken.Length = 0;

            // advance to the next character, so that to be at the start of comment value
            this.GetNextCharacter(); // get first '-'
            this.GetNextCharacter(); // get second '-'
            this.GetNextCharacter(); // get first character of comment content
 
            while (true)
            {
                // Read text until end of comment
                // Note that in many actual html pages comments end with "!>" (while xml standard is "-->")
                while (!this.IsAtEndOfStream && !(_nextCharacter == '-' && _lookAheadCharacter == '-' || _nextCharacter == '!' && _lookAheadCharacter == '>'))
                {
                    _nextToken.Append(this.NextCharacter);
                    this.GetNextCharacter();
                }

                // Finish comment reading
                this.GetNextCharacter();
                if (_previousCharacter == '-' && _nextCharacter == '-' && _lookAheadCharacter == '>')
                {
                    // Standard comment end. Eat it and exit the loop
                    this.GetNextCharacter(); // get '>'
                    break;
                }
                else if (_previousCharacter == '!' && _nextCharacter == '>')
                {
                    // Nonstandard but possible comment end - '!>'. Exit the loop
                    break;
                }
                else
                {
                    // Not an end. Save character and continue continue reading
                    _nextToken.Append(_previousCharacter);
                    continue;
                }
            }

            // Read end of comment combination
            if (_nextCharacter == '>')
            {
                this.GetNextCharacter();
            }
        }

        /// <summary>
        /// skips past unknown directives that start with "<!" but are not comments or Cdata
        /// ignores content of such directives until the next ">" character
        /// applies to directives such as DOCTYPE, etc that we do not presently support
        /// </summary>
        private void ReadUnknownDirective()
        {
            // verify that we are at an unknown directive
            Debug.Assert(_previousCharacter == '<' && _nextCharacter == '!' && !(_lookAheadCharacter == '-' || _lookAheadCharacter == '['));

            // Let's treat this as empty text
            _nextTokenType = HtmlTokenType.Text;
            _nextToken.Length = 0;

            // advance to the next character
            this.GetNextCharacter();

            // skip to the first tag end we find
            while (!(_nextCharacter == '>' && !IsNextCharacterEntity) && !this.IsAtEndOfStream)
            {
                this.GetNextCharacter();
            }

            if (!this.IsAtEndOfStream)
            {
                // advance past the tag end
                this.GetNextCharacter();
            }
        }

        /// <summary>
        /// skips processing directives starting with the characters '<?' and ending with '?>' 
        /// NOTE: 10/14/2004: IE also ends processing directives with a />, so this function is
        /// being modified to recognize that condition as well
        /// </summary>
        private void SkipProcessingDirective()
        {
            // verify that we are at a processing directive
            Debug.Assert(_nextCharacter == '<' && _lookAheadCharacter == '?');

            // advance twice, once to get the lookahead character and then to reach the start of the drective
            this.GetNextCharacter();
            this.GetNextCharacter();

            while (!((_nextCharacter == '?' || _nextCharacter == '/') && _lookAheadCharacter == '>') && !this.IsAtEndOfStream)
            {
                // advance
                // we don't need to check for entities here because '?' is not an entity
                // and even though > is an entity there is no entity processing when reading lookahead character
                this.GetNextCharacter();
            }

            if (!this.IsAtEndOfStream)
            {
                // advance, first to the last >
                this.GetNextCharacter();

                // then advance past it to the next character after processing directive
                this.GetNextCharacter();
            }
        }

        #endregion Private Methods

        // ---------------------------------------------------------------------
        //
        // Private Properties
        //
        // ---------------------------------------------------------------------

        #region Private Properties

        private char NextCharacter
        {
            get
            {
                return _nextCharacter;
            }
        }

        private bool IsAtEndOfStream
        {
            get
            {
                return _nextCharacterCode == -1;
            }
        }

        private bool IsAtTagStart
        {
            get
            {
                return _nextCharacter == '<' && (_lookAheadCharacter == '/' || IsGoodForNameStart(_lookAheadCharacter)) && !_isNextCharacterEntity;
            }
        }

        private bool IsAtTagEnd
        {
            // check if at end of empty tag or regular tag
            get
            {
                return (_nextCharacter == '>' || (_nextCharacter == '/' && _lookAheadCharacter == '>')) && !_isNextCharacterEntity;
            }
        }

        private bool IsAtDirectiveStart
        {
            get
            {
                return (_nextCharacter == '<' && _lookAheadCharacter == '!' && !this.IsNextCharacterEntity);
            }
        }

        private bool IsNextCharacterEntity
        {
            // check if next character is an entity
            get
            {
                return _isNextCharacterEntity;
            }
        }

        #endregion Private Properties

        // ---------------------------------------------------------------------
        //
        // Private Fields
        //
        // ---------------------------------------------------------------------

        #region Private Fields

        // string reader which will move over input text
        private StringReader _inputStringReader;
        // next character code read from input that is not yet part of any token
        // and the character it represents
        private int _nextCharacterCode;
        private char _nextCharacter;
        private int _lookAheadCharacterCode;
        private char _lookAheadCharacter;
        private char _previousCharacter;
        private bool _ignoreNextWhitespace;
        private bool _isNextCharacterEntity;

        // store token and type in local variables before copying them to output parameters
        StringBuilder _nextToken;
        HtmlTokenType _nextTokenType;

        #endregion Private Fields
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Mark Brownsword
Software Developer (Senior)
Australia Australia
I've been working as a software developer since 2000 and hold a Bachelor of Business degree from The Open Polytechnic of New Zealand. Computers are for people and I aim to build applications for people that they would want to use.

You may also be interested in...

Pro
Pro
| Advertise | Privacy | Terms of Use | Mobile
Web01 | 2.8.160826.1 | Last Updated 25 Sep 2009
Article Copyright 2009 by Mark Brownsword
Everything else Copyright © CodeProject, 1999-2016
Layout: fixed | fluid