|
' Generated by TinyPG v1.2 available at www.codeproject.com
Imports System
Imports System.Collections.Generic
Imports System.Text.RegularExpressions
Namespace TinyPG
#Region "Scanner"
Partial Public Class Scanner
Public Input As String
Public StartPos As Integer = 0
Public EndPos As Integer = 0
Public CurrentLine As Integer
Public CurrentColumn As Integer
Public CurrentPosition As Integer
Public Skipped As List(Of Token) ' tokens that were skipped
Private LookAheadToken As Token
Private Patterns As List(Of Regex)
Private Tokens As List(Of TokenType)
Private SkipList As List(Of TokenType) ' tokens to be skipped
Public Sub New()
Dim regex As Regex
Patterns = New List(Of Regex)()
Tokens = New List(Of TokenType)()
LookAheadToken = Nothing
SkipList = New List(Of TokenType)()
SkipList.Add(TokenType.WHITESPACE)
SkipList.Add(TokenType.COMMENTLINE)
SkipList.Add(TokenType.COMMENTBLOCK)
regex = new Regex("\(", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.BRACKETOPEN)
regex = new Regex("\)", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.BRACKETCLOSE)
regex = new Regex("\{[^\}]*\}([^};][^}]*\}+)*;", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.CODEBLOCK)
regex = new Regex(",", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.COMMA)
regex = new Regex("\[", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.SQUAREOPEN)
regex = new Regex("\]", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.SQUARECLOSE)
regex = new Regex("=", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.ASSIGN)
regex = new Regex("\|", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.PIPE)
regex = new Regex(";", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.SEMICOLON)
regex = new Regex("(\*|\+|\?)", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.UNARYOPER)
regex = new Regex("[a-zA-Z_][a-zA-Z0-9_]*", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.IDENTIFIER)
regex = new Regex("[0-9]+", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.CINTEGER)
regex = new Regex("[0-9]*\.[0-9]+", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.CDOUBLE)
regex = new Regex("(0x[0-9a-fA-F]{6})", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.HEX)
regex = new Regex("->", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.ARROW)
regex = new Regex("<%\s*@", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.DIRECTIVEOPEN)
regex = new Regex("%>", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.DIRECTIVECLOSE)
regex = new Regex("^$", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.EOF)
regex = new Regex("@?\""(\""\""|[^\""])*\""", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.CSTRING)
regex = new Regex("\s+", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.WHITESPACE)
regex = new Regex("//[^\n]*\n?", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.COMMENTLINE)
regex = new Regex("/\*[^*]*\*+(?:[^/*][^*]*\*+)*/", RegexOptions.Compiled)
Patterns.Add(regex)
Tokens.Add(TokenType.COMMENTBLOCK)
End Sub
Public Sub Init(ByVal input As String)
Me.Input = input
StartPos = 0
EndPos = 0
CurrentLine = 0
CurrentColumn = 0
CurrentPosition = 0
Skipped = New List(Of Token)()
LookAheadToken = Nothing
End Sub
Public Function GetToken(ByVal type As TokenType) As Token
Dim t As New Token(Me.StartPos, Me.EndPos)
t.Type = type
t.Text = type.ToString()
Return t
End Function
''' <summary>
''' executes a lookahead of the next token
''' and will advance the scan on the input string
''' </summary>
''' <returns></returns>
Public Function Scan() As Token
Dim tok As Token = LookAhead()
' temporarely retrieve the lookahead
LookAheadToken = Nothing
' reset lookahead token, so scanning will continue
StartPos = tok.EndPos
EndPos = tok.EndPos
' set the tokenizer to the new scan position
Return tok
End Function
''' <summary>
''' returns token with longest best match
''' </summary>
''' <returns></returns>
Public Function LookAhead() As Token
Dim len As Integer
Dim index As Integer = -1
Dim i As Integer
Dim start As Integer = StartPos
Dim tok As Token = Nothing
' this prevents double scanning and matching
' increased performance
If Not LookAheadToken Is Nothing Then
Return LookAheadToken
End If
Do
len = -1
Dim m_input As String = Input.Substring(start)
tok = New Token(start, EndPos)
i = 0
While i < Patterns.Count
Dim r As Regex = Patterns(i)
Dim m As Match = r.Match(m_input)
If m.Success AndAlso m.Index = 0 AndAlso m.Length > len Then
len = m.Length
index = i
End If
System.Math.Max(System.Threading.Interlocked.Increment(i), i - 1)
End While
If index >= 0 AndAlso len >= 0 Then
tok.EndPos = start + len
tok.Text = Input.Substring(tok.StartPos, len)
tok.Type = Tokens(index)
Else
If tok.EndPos < Input.Length Then
tok.Text = Input.Substring(tok.StartPos, 1)
End If
End If
If SkipList.Contains(tok.Type) Then
start = tok.EndPos
Skipped.Add(tok)
End If
Loop While SkipList.Contains(tok.Type)
LookAheadToken = tok
Return tok
End Function
End Class
#End Region
#Region "Token"
Public Enum TokenType
'Non terminal tokens:
_NONE_ = 0
_UNDETERMINED_= 1
'Non terminal tokens:
Start = 2
Directive = 3
NameValue = 4
ExtProduction= 5
Attribute = 6
Params = 7
Param = 8
Production = 9
Rule = 10
Subrule = 11
ConcatRule = 12
Symbol = 13
'Terminal tokens:
BRACKETOPEN = 14
BRACKETCLOSE= 15
CODEBLOCK = 16
COMMA = 17
SQUAREOPEN = 18
SQUARECLOSE = 19
ASSIGN = 20
PIPE = 21
SEMICOLON = 22
UNARYOPER = 23
IDENTIFIER = 24
CINTEGER = 25
CDOUBLE = 26
HEX = 27
ARROW = 28
DIRECTIVEOPEN= 29
DIRECTIVECLOSE= 30
EOF = 31
CSTRING = 32
WHITESPACE = 33
COMMENTLINE = 34
COMMENTBLOCK= 35
End Enum
Public Class Token
Private m_startPos As Integer
Private m_endPos As Integer
Private m_text As String
Private m_value As Object
Public Property StartPos() As Integer
Get
Return m_startPos
End Get
Set(ByVal value As Integer)
m_startPos = value
End Set
End Property
Public Property EndPos() As Integer
Get
Return m_endPos
End Get
Set(ByVal value As Integer)
m_endPos = value
End Set
End Property
Public ReadOnly Property Length() As Integer
Get
Return m_endPos - m_startPos
End Get
End Property
Public Property Text() As String
Get
Return m_text
End Get
Set(ByVal value As String)
m_text = value
End Set
End Property
Public Property Value() As Object
Get
Return m_value
End Get
Set(ByVal value As Object)
Me.m_value = value
End Set
End Property
Public Type As TokenType
Public Sub New()
Me.New(0, 0)
End Sub
Public Sub New(ByVal start As Integer, ByVal endPos As Integer)
Type = TokenType._UNDETERMINED_
m_startPos = start
m_endPos = endPos
Text = ""
' must initialize with empty string, may cause null reference exceptions otherwise
Value = Nothing
End Sub
Public Sub UpdateRange(ByVal token As Token)
If token.StartPos < m_startPos Then
m_startPos = token.StartPos
End If
If token.EndPos > m_endPos Then
m_endPos = token.EndPos
End If
End Sub
Public Overloads Overrides Function ToString() As String
If Text <> Nothing Then
Return Type.ToString() + " '" + Text + "'"
Else
Return Type.ToString()
End If
End Function
End Class
#End Region
End Namespace
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
Currently Herre Kuijpers is employed at Rubicon. During his career he developed skills with all kinds of technologies, methodologies and programming languages such as c#, ASP.Net, .Net Core, VC++, Javascript, SQL, Agile, Scrum, DevOps, ALM. Currently he fulfills the role of software architect in various projects.
Herre Kuijpers is a very experienced software architect with deep knowledge of software design and development on the Microsoft .Net platform. He has a broad knowledge of Microsoft products and knows how these, in combination with custom software, can be optimally implemented in the often complex environment of the customer.