|
/*********************************************************************
* Author: Andrew Deren
* Date: July, 2004
* http://www.adersoftware.com
*
* StringTokenizer class. You can use this class in any way you want
* as long as this header remains in this file.
*
* The article is at http://www.codeproject.com/csharp/stringtokenizer.asp
*
* Modified by T. Almdal to provide additiona tokens for parsing
* ATL like registry scripts. In addition, Enumerator was defined to
* allow standard enumerators to be used.
*
**********************************************************************/
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Almdal.RegistryScript {
/// <summary>
/// This delegate is ued by the StringTokenizer to ask the class
/// tht invoked the StringTokenizer to do variable substitution
/// </summary>
/// <param name="variable">variable name</param>
/// <returns>substituted value</returns>
public delegate string VariableSubstitution(string variable);
/// <summary>
/// StringTokenizer tokenized string (or stream) into tokens.
/// Substitution variables are delinated by enclosing % signs
/// For example %GUID% would be recognized as a variable.
/// </summary>
internal class StringTokenizer : IEnumerable<Token> {
/// <summary>
/// Internal class to implement token enumeration
/// </summary>
public class TokenEnumeratorImpl : IEnumerator<Token> {
private Token _current = null;
private StringTokenizer _st = null;
public TokenEnumeratorImpl(StringTokenizer tokenizer) { _st = tokenizer; }
#region IEnumerator<Token> Members
/// <summary>
/// Get the current token
/// </summary>
public Token Current {
get { return _current; }
}
#endregion
#region IDisposable Members
/// <summary>
/// IDisposable Implementation required by IEnumertor
/// </summary>
public void Dispose() {
_current = null;
_st = null;
}
#endregion
#region IEnumerator Members
object System.Collections.IEnumerator.Current {
get { return _current; }
}
public bool MoveNext() {
if (_current != null && _current.Kind == TokenKind.EOF) {
return false;
}
_current = _st.Next();
return true;
}
public void Reset() {
throw new Exception("The method or operation is not implemented.");
}
#endregion
}
const char EOF = (char)0;
private VariableSubstitution _Substitution = null;
int line;
int column;
int pos; // position within data
string data;
bool ignoreWhiteSpace;
bool _normalizeString;
char[] symbolChars;
int saveLine;
int saveCol;
int savePos;
private StringTokenizer() {
_Substitution = new VariableSubstitution(DefaultVariableReplacement);
}
public StringTokenizer(TextReader reader) : this() {
if (reader == null)
throw new ArgumentNullException("reader");
data = reader.ReadToEnd();
Reset();
}
public StringTokenizer(string data) : this() {
if (data == null)
throw new ArgumentNullException("data");
this.data = data;
Reset();
}
/// <summary>
/// gets or sets the variable substituion delegate
/// </summary>
public VariableSubstitution SubstitionRoutine {
get { return _Substitution; }
set { _Substitution = value; }
}
/// <summary>
/// gets or sets which characters are part of TokenKind.Symbol
/// </summary>
public char[] SymbolChars {
get { return this.symbolChars; }
set { this.symbolChars = value; }
}
/// <summary>
/// if set to true, white space characters will be ignored,
/// but EOL and whitespace inside of string will still be tokenized
/// </summary>
public bool IgnoreWhiteSpace {
get { return this.ignoreWhiteSpace; }
set { this.ignoreWhiteSpace = value; }
}
/// <summary>
/// Property to indicate that the strings should be normalized by removing quotes and processing escape characters
/// </summary>
public bool NormalizeString {
get { return _normalizeString; }
set { _normalizeString = value; }
}
private void Reset() {
this.ignoreWhiteSpace = false;
_normalizeString = true;
this.symbolChars = new char[] { '=', '+', '-', '/', ',', '.', '*', '~', '!', '@', '#', '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', ':', ';', '<', '>', '?', '|', '\\' };
line = 1;
column = 1;
pos = 0;
}
protected char Peek(int count) {
if (pos + count >= data.Length)
return EOF;
else
return data[pos + count];
}
protected char Consume() {
char ret = data[pos];
pos++;
column++;
return ret;
}
protected Token CreateToken(TokenKind kind, string value) {
return new Token(kind, value, line, column);
}
protected Token CreateToken(TokenKind kind) {
string tokenData = data.Substring(savePos, pos - savePos);
return new Token(kind, tokenData, saveLine, saveCol);
}
protected Token Next() { // TNA - removed the dastardly goto
while (true) {
char ch = Peek(0);
switch (ch) {
case EOF:
return CreateToken(TokenKind.EOF, string.Empty);
case ' ':
case '\t': {
if (this.ignoreWhiteSpace) {
Consume();
break; // start again
}
else
return ReadWhitespace();
}
case '\r': {
if (this.ignoreWhiteSpace) {
StartRead();
}
Consume();
if (Peek(0) == '\n')
Consume(); // on DOS/Windows we have \r\n for new line
line++;
column = 1;
if (!this.ignoreWhiteSpace) {
return CreateToken(TokenKind.EOL);
}
break;
}
case '\n': {
if (this.ignoreWhiteSpace) {
StartRead();
}
Consume();
line++;
column = 1;
if (!this.ignoreWhiteSpace) {
return CreateToken(TokenKind.EOL);
}
break;
}
case '\'':
case '"':
return ReadString(ch);
case '=':
StartRead();
Consume();
return CreateToken(TokenKind.Assignment);
case '{':
StartRead();
Consume();
return CreateToken(TokenKind.StartKey);
case '}':
StartRead();
Consume();
return CreateToken(TokenKind.EndKey);
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return ReadNumber();
default: {
if (Char.IsControl(ch)) {
StartRead();
Consume();
return CreateToken(TokenKind.Unknown);
}
else if (ch == '=') {
StartRead();
Consume();
return CreateToken(TokenKind.Symbol);
}
else {
return ReadWord();
}
}
}
}
}
/// <summary>
/// save read point positions so that CreateToken can use those
/// </summary>
private void StartRead() {
saveLine = line;
saveCol = column;
savePos = pos;
}
#region Read Token Methods
/// <summary>
/// reads all whitespace characters (does not include newline)
/// </summary>
/// <returns></returns>
protected Token ReadWhitespace() {
StartRead();
Consume(); // consume the looked-ahead whitespace char
while (true) {
char ch = Peek(0);
if (ch == '\t' || ch == ' ')
Consume();
else
break;
}
return CreateToken(TokenKind.WhiteSpace);
}
/// <summary>
/// reads number. Number is: DIGIT+ ("." DIGIT*)?
/// </summary>
/// <returns></returns>
protected Token ReadNumber() {
StartRead();
bool hadDot = Peek(0) == '.'; // maybe the first character is a period
Consume(); // read first digit
while (true) {
char ch = Peek(0);
if (Char.IsDigit(ch))
Consume();
else if (ch == '.' && !hadDot) {
hadDot = true;
Consume();
}
else
break;
}
return CreateToken(TokenKind.Number);
}
/// <summary>
/// reads word. Word contains any alpha character or _
/// </summary>
protected Token ReadWord() {
StringBuilder word = new StringBuilder(256);
for (char ch = Peek(0); // Get First Character of the word
!(Char.IsControl(ch) || Char.IsWhiteSpace(ch));
ch = Peek(0)) {
if (ch == '%') {
word.Append(ReadVariable());
}
else {
word.Append(Consume());
}
};
return CreateToken(TokenKind.Word, word.ToString());
}
private string ReadVariable() {
StringBuilder variable = new StringBuilder(256);
Consume(); // Move Past initial %
bool isVar = false;
for (char ch = Peek(0); // Get First Character of the word
ch != '%' && !(Char.IsControl(ch) || Char.IsWhiteSpace(ch));
ch = Peek(0)) {
isVar = true; // if we got here there is a least one character
// between the percent signs
variable.Append(Consume());
}
if (Peek(0) == '%') {
Consume();
return isVar ? _Substitution(variable.ToString()) : "%";
}
else { // no closing % not a variable so just return the string
return variable.ToString();
}
}
/// <summary>
/// Reads all characters until next End of String is found.
///
/// </summary>
/// <returns></returns>
protected Token ReadString(char EOS) {
StringBuilder ztring = new StringBuilder(256);
char endChar = Peek(0);
if (_normalizeString) { // if we are removeing
Consume();
}
else {
ztring.Append(Consume());
}
for (char ch = Peek(0); // Get First Character of the word
ch != EOF;
ch = Peek(0)) {
if (ch == EOF)
break;
else if (ch == '\r') { // handle CR in strings
ztring.Append(Consume());
if (Peek(0) == '\n') // for DOS & windows
ztring.Append(Consume());
line++;
column = 1;
}
else if (ch == '\n') { // new line in quoted string
ztring.Append(Consume());
line++;
column = 1;
}
else if (ch == '%') {
ztring.Append(ReadVariable());
}
else if (ch == '\\') { // is it escape char
switch (Peek(1)) { // Only convert the following escape sequences to single character
case '\'': // Single Quote
case '"': // Double quote
case '%': // Percent sign
Consume();
break;
default:
ztring.Append(Consume());
break;
}
char nextChar = Consume();
ztring.Append(nextChar);
}
else if (ch == endChar) {
if (!_normalizeString) {
ztring.Append(Consume());
}
else {
Consume();
}
break;
}
else {
ztring.Append(Consume());
}
};
return CreateToken(_normalizeString ? TokenKind.String : TokenKind.QuotedString, ztring.ToString());
}
/// <summary>
/// checks whether c is a symbol character.
/// </summary>
protected bool IsSymbol(char c) {
for (int i = 0; i < symbolChars.Length; i++)
if (symbolChars[i] == c)
return true;
return false;
}
#endregion
#region Default Variable Substituion Implementation
/// <summary>
/// Default implemenation of the Variable substition routine, which just returns the
/// input.
/// </summary>
/// <param name="variable"></param>
/// <returns></returns>
protected string DefaultVariableReplacement(string variable) { return variable; }
#endregion
#region IEnumerable<Token> Members
public IEnumerator<Token> GetEnumerator() {
return new TokenEnumeratorImpl(this);
}
#endregion
#region IEnumerable Members
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
return new TokenEnumeratorImpl(this);
}
#endregion
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.