|
'----------------------------------------------------------------------
' Gold Parser engine.
' See more details on http:'www.devincook.com/goldparser/
'
' Original code is written in VB by Devin Cook (GOLDParser@DevinCook.com)
'
' This translation is done by Vladimir Morozov (vmoroz@hotmail.com)
'
' The translation is based on the other engine translations:
' Delphi engine by Alexandre Rai (riccio@gmx.at)
' C# engine by Marcus Klimstra (klimstra@home.nl)
'----------------------------------------------------------------------
Imports System
Imports System.IO
Imports System.Text
Imports System.Collections
Namespace GoldParser
''' <summary>
''' Contains grammar tables required for parsing.
''' </summary>
Class Grammar
#Region "Public constants"
''' <summary>
''' Identifies Gold parser grammar file.
''' </summary>
Public Const FileHeader As String = "GOLD Parser Tables/v1.0"
#End Region
#Region "Private field declaration"
' Grammar header information
Private m_name As String ' Name of the grammar
Private m_version As String ' Version of the grammar
Private m_author As String ' Author of the grammar
Private m_about As String ' Grammar description
Private m_startSymbolIndex As Integer ' Start symbol index
Private m_caseSensitive As Boolean ' Grammar is case sensitive or not
' Tables read from the binary grammar file
Private m_symbolTable() As Symbol ' Symbol table
Private m_charSetTable() As String ' Charset table
Private m_ruleTable() As Rule ' Rule table
Private m_dfaStateTable() As DfaState ' DFA state table
Private m_lalrStateTable() As LalrState ' LALR state table
' Tables exposed as public properties
Private m_symbols As SymbolCollection ' Symbol table
Private m_charSets As CharSetCollection ' Charset table
Private m_rules As RuleCollection ' Rule table
Private m_dfaStates As DfaStateCollection ' DFA state table
Private m_lalrStates As LalrStateCollection ' LALR state table
' Initial states
Private m_dfaInitialState As Integer ' DFA initial state
Private m_lalrInitialState As Integer ' LALR initial state
' Internal state of grammar parser
Private m_reader As BinaryReader ' Source of the grammar
Private m_entryCount As Integer ' Number of entries left
#End Region
#Region "Constructors"
''' <summary>
''' Creates a new instance of <c>Grammar</c> class
''' </summary>
''' <param name="reader"></param>
Public Sub New(ByVal reader As BinaryReader)
If reader Is Nothing Then
Throw New ArgumentNullException("reader")
End If
m_reader = reader
Load()
End Sub
#End Region
#Region "Public members"
''' <summary>
''' Gets grammar name.
''' </summary>
Public ReadOnly Property Name() As String
Get
Return m_name
End Get
End Property
''' <summary>
''' Gets grammar version.
''' </summary>
Public ReadOnly Property Version() As String
Get
Return m_version
End Get
End Property
''' <summary>
''' Gets grammar author.
''' </summary>
Public ReadOnly Property Author() As String
Get
Return m_author
End Get
End Property
''' <summary>
''' Gets grammar description.
''' </summary>
Public ReadOnly Property About() As String
Get
Return m_about
End Get
End Property
''' <summary>
''' Gets the start symbol for the grammar.
''' </summary>
Public ReadOnly Property StartSymbol() As Symbol
Get
Return m_symbolTable(m_startSymbolIndex)
End Get
End Property
''' <summary>
''' Gets the value indicating if the grammar is case sensitive.
''' </summary>
Public ReadOnly Property CaseSensitive() As Boolean
Get
Return m_caseSensitive
End Get
End Property
''' <summary>
''' Gets initial DFA state.
''' </summary>
Public ReadOnly Property DfaInitialState() As Integer
Get
Return m_dfaInitialState
End Get
End Property
''' <summary>
''' Gets initial LALR state.
''' </summary>
Public ReadOnly Property InitialLalrState() As LalrState
Get
Return m_lalrStateTable(m_lalrInitialState)
End Get
End Property
''' <summary>
''' Gets symbol table.
''' </summary>
Public ReadOnly Property SymbolTable() As SymbolCollection
Get
If m_symbols Is Nothing Then
m_symbols = New SymbolCollection(m_symbolTable)
End If
Return m_symbols
End Get
End Property
''' <summary>
''' Gets char set table.
''' </summary>
Public ReadOnly Property CharSetTable() As CharSetCollection
Get
If m_charSets Is Nothing Then
m_charSets = New CharSetCollection(m_charSetTable)
End If
Return m_charSets
End Get
End Property
''' <summary>
''' Gets rule table.
''' </summary>
Public ReadOnly Property RuleTable() As RuleCollection
Get
If m_rules Is Nothing Then
m_rules = New RuleCollection(m_ruleTable)
End If
Return m_rules
End Get
End Property
''' <summary>
''' Gets DFA state table.
''' </summary>
Public ReadOnly Property DfaStateTable() As DfaStateCollection
Get
If m_dfaStates Is Nothing Then
m_dfaStates = New DfaStateCollection(m_dfaStateTable)
End If
Return m_dfaStates
End Get
End Property
''' <summary>
''' Gets LALR state table.
''' </summary>
Public ReadOnly Property LalrStateTable() As LalrStateCollection
Get
If m_lalrStates Is Nothing Then
m_lalrStates = New LalrStateCollection(m_lalrStateTable)
End If
Return m_lalrStates
End Get
End Property
#End Region
#Region "Private members"
''' <summary>
''' Loads grammar from the binary reader.
''' </summary>
Private Sub Load()
If FileHeader <> ReadString() Then
Throw New Exception(Res.GetString(Res.Grammar_WrongFileHeader))
End If
While m_reader.PeekChar() <> -1
Dim recordType As RecordType = ReadNextRecord()
Select Case recordType
Case recordType.Parameters
ReadHeader()
Case recordType.TableCounts
ReadTableCounts()
Case recordType.Initial
ReadInitialStates()
Case recordType.Symbols
ReadSymbols()
Case recordType.CharSets
ReadCharSets()
Case recordType.Rules
ReadRules()
Case recordType.DfaStates
ReadDfaStates()
Case recordType.LalrStates
ReadLalrStates()
Case Else
Throw New Exception(Res.GetString(Res.Grammar_InvalidRecordType))
End Select
End While
End Sub
''' <summary>
''' Reads the next record in the binary grammar file.
''' </summary>
''' <returns>Read record type.</returns>
Private Function ReadNextRecord() As RecordType
Dim recordType As Char = ChrW(ReadByte())
'Structure below is ready for future expansion
Select Case recordType
Case "M"c
'Read the number of entry's
m_entryCount = ReadInt16()
Return CType(ReadByteEntry(), RecordType)
Case Else
Throw New Exception(Res.GetString(Res.Grammar_InvalidRecordHeader))
End Select
End Function
''' <summary>
''' Reads grammar header information.
''' </summary>
Private Sub ReadHeader()
m_name = ReadStringEntry()
m_version = ReadStringEntry()
m_author = ReadStringEntry()
m_about = ReadStringEntry()
m_caseSensitive = ReadBoolEntry()
m_startSymbolIndex = ReadInt16Entry()
End Sub
''' <summary>
''' Reads table record counts and initializes tables.
''' </summary>
Private Sub ReadTableCounts()
' Initialize tables
m_symbolTable = New Symbol(ReadInt16Entry() - 1) {}
m_charSetTable = New String(ReadInt16Entry() - 1) {}
m_ruleTable = New Rule(ReadInt16Entry() - 1) {}
m_dfaStateTable = New DfaState(ReadInt16Entry() - 1) {}
m_lalrStateTable = New LalrState(ReadInt16Entry() - 1) {}
End Sub
''' <summary>
''' Read initial DFA and LALR states.
''' </summary>
Private Sub ReadInitialStates()
m_dfaInitialState = ReadInt16Entry()
m_lalrInitialState = ReadInt16Entry()
End Sub
''' <summary>
''' Read symbol information.
''' </summary>
Private Sub ReadSymbols()
Dim index As Integer = ReadInt16Entry()
Dim name As String = ReadStringEntry()
Dim type As SymbolType = CType(ReadInt16Entry(), SymbolType)
Dim symbol As New Symbol(index, name, type)
m_symbolTable(index) = symbol
End Sub
''' <summary>
''' Read char set information.
''' </summary>
Private Sub ReadCharSets()
m_charSetTable(ReadInt16Entry()) = ReadStringEntry()
End Sub
''' <summary>
''' Read rule information.
''' </summary>
Private Sub ReadRules()
Dim index As Integer = ReadInt16Entry()
Dim symbol As Symbol = m_symbolTable(ReadInt16Entry)
ReadEmptyEntry()
Dim symbols As Symbol() = New Symbol(m_entryCount - 1) {}
For i As Integer = 0 To symbols.Length - 1
symbols(i) = m_symbolTable(ReadInt16Entry)
Next
Dim rule As New Rule(index, symbol, symbols)
m_ruleTable(index) = rule
End Sub
''' <summary>
''' Read DFA state information.
''' </summary>
Private Sub ReadDfaStates()
Dim index As Integer = ReadInt16Entry()
Dim acceptSymbol As Symbol = Nothing
If ReadBoolEntry() Then
acceptSymbol = m_symbolTable(ReadInt16Entry)
Else
ReadInt16Entry()
End If
ReadEmptyEntry()
' Read DFA edges
Dim edges As DfaEdge() = New DfaEdge((m_entryCount / 3) - 1) {}
For i As Integer = 0 To edges.Length - 1
edges(i).CharSetIndex = ReadInt16Entry()
edges(i).TargetIndex = ReadInt16Entry()
ReadEmptyEntry()
Next
' Create DFA state and store it in DFA state table
Dim transitionVector As Hashtable = CreateDfaTransitionVector(edges)
Dim DfaState As New DfaState(index, acceptSymbol, transitionVector)
m_dfaStateTable(index) = DfaState
End Sub
''' <summary>
''' Read LALR state information.
''' </summary>
Private Sub ReadLalrStates()
Dim index As Integer = ReadInt16Entry()
ReadEmptyEntry()
Dim stateTable As LalrStateAction() = New LalrStateAction((m_entryCount / 4) - 1) {}
Dim i As Integer
For i = 0 To stateTable.Length - 1
Dim symbol As Symbol = m_symbolTable(ReadInt16Entry)
Dim action As LalrAction = CType(ReadInt16Entry(), LalrAction)
Dim targetIndex As Integer = ReadInt16Entry()
ReadEmptyEntry()
stateTable(i) = New LalrStateAction(i, symbol, action, targetIndex)
Next
' Create the transition vector
Dim transitionVector As LalrStateAction() = New LalrStateAction(m_symbolTable.Length - 1) {}
For i = 0 To transitionVector.Length - 1
transitionVector(i) = Nothing
Next
For i = 0 To stateTable.Length - 1
transitionVector(stateTable(i).Symbol.Index) = stateTable(i)
Next
Dim lalrState As New LalrState(index, stateTable, transitionVector)
m_lalrStateTable(index) = lalrState
End Sub
''' <summary>
''' Creates the DFA state transition vector.
''' </summary>
''' <param name="edges">Array of automata edges.</param>
''' <returns>Hashtable with the transition information.</returns>
Private Function CreateDfaTransitionVector(ByVal edges() As DfaEdge) As Hashtable
Dim transitionVector As Hashtable = New Hashtable
'
'89 is the initial prime number for the hash table size.
'It should be a good start for the most char sets.
'
Dim i As Integer = edges.Length
i -= 1
Do While (i >= 0)
Dim charSet As String = Me.m_charSetTable(edges(i).CharSetIndex)
For j As Integer = 0 To charSet.Length - 1
transitionVector.Item(charSet.Chars(j)) = edges(i).TargetIndex
Next
i -= 1
Loop
Return transitionVector
End Function
''' <summary>
''' Reads empty entry from the grammar file.
''' </summary>
Private Sub ReadEmptyEntry()
If ReadEntryType() <> EntryType.Empty Then
Throw New Exception(Res.GetString(Res.Grammar_EmptyEntryExpected))
End If
m_entryCount -= 1
End Sub
''' <summary>
''' Reads string entry from the grammar file.
''' </summary>
''' <returns>String entry content.</returns>
Private Function ReadStringEntry() As String
If ReadEntryType() <> EntryType.String Then
Throw New Exception(Res.GetString(Res.Grammar_StringEntryExpected))
End If
m_entryCount -= 1
Return ReadString()
End Function
''' <summary>
''' Reads Int16 entry from the grammar file.
''' </summary>
''' <returns>Int16 entry content.</returns>
Private Function ReadInt16Entry() As Integer
If ReadEntryType() <> EntryType.Integer Then
Throw New Exception(Res.GetString(Res.Grammar_IntegerEntryExpected))
End If
m_entryCount -= 1
Return ReadInt16()
End Function
''' <summary>
''' Reads byte entry from the grammar file.
''' </summary>
''' <returns>Byte entry content.</returns>
Private Function ReadByteEntry() As Byte
If ReadEntryType() <> EntryType.Byte Then
Throw New Exception(Res.GetString(Res.Grammar_ByteEntryExpected))
End If
m_entryCount -= 1
Return ReadByte()
End Function
''' <summary>
''' Reads boolean entry from the grammar file.
''' </summary>
''' <returns>Boolean entry content.</returns>
Private Function ReadBoolEntry() As Boolean
If ReadEntryType() <> EntryType.Boolean Then
Throw New Exception(Res.GetString(Res.Grammar_BooleanEntryExpected))
End If
m_entryCount -= 1
Return ReadBool()
End Function
''' <summary>
''' Reads entry type.
''' </summary>
''' <returns>Entry type.</returns>
Private Function ReadEntryType() As EntryType
If m_entryCount = 0 Then
Throw New Exception(Res.GetString(Res.Grammar_NoEntry))
End If
Return CType(ReadByte(), EntryType)
End Function
''' <summary>
''' Reads string from the grammar file.
''' </summary>
''' <returns>String value.</returns>
Private Function ReadString() As String
Dim result As StringBuilder = New StringBuilder
Dim unicodeChar As Char = ChrW(ReadInt16())
While unicodeChar <> ChrW(0)
result.Append(unicodeChar)
unicodeChar = ChrW(ReadInt16())
End While
Return result.ToString()
End Function
''' <summary>
''' Reads two byte integer Int16 from the grammar file.
''' </summary>
''' <returns>Int16 value.</returns>
Private Function ReadInt16() As Integer
Return m_reader.ReadInt16()
End Function
''' <summary>
''' Reads byte from the grammar file.
''' </summary>
''' <returns>Byte value.</returns>
Private Function ReadByte() As Byte
Return m_reader.ReadByte()
End Function
''' <summary>
''' Reads boolean from the grammar file.
''' </summary>
''' <returns>Boolean value.</returns>
Private Function ReadBool() As Boolean
Return (ReadByte() = 1)
End Function
#End Region
#Region "Private type definitions"
''' <summary>
''' Record type byte in the binary grammar file.
''' </summary>
Private Enum RecordType
' Fields
CharSets = 67 'C
Comment = 33 '!
DfaStates = 68 'D
Initial = 73 'I
LalrStates = 76 'L
Parameters = 80 'P
Rules = 82 'R
Symbols = 83 'S
TableCounts = 84 'T
End Enum
''' <summary>
''' Entry type byte in the binary grammar file.
''' </summary>
Private Enum EntryType
' Fields
[Boolean] = 66 'B
[Byte] = 98 'b
Empty = 69 'E
[Integer] = 73 'I
[String] = 83 'S
End Enum
''' <summary>
''' Edge between DFA states.
''' </summary>
Private Structure DfaEdge
Public CharSetIndex As Integer
Public TargetIndex As Integer
End Structure
#End Region
End Class
End Namespace
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
Adrian Moore is the Development Manager for the SCADA Vision system developed by ABB Inc in Calgary, Alberta.
He has been interested in compilers, parsers, real-time database systems and peer-to-peer solutions since the early 90's. In his spare time, he is currently working on a SQL parser for querying .NET DataSets (http://www.queryadataset.com).
Adrian is a Microsoft MVP for Windows Networking.