Click here to Skip to main content
15,897,371 members
Articles / Programming Languages / Visual Basic

Implementation of LZW Compression and Decompression in VB.NET

Rate me:
Please Sign up or sign in to vote.
4.57/5 (10 votes)
24 Aug 20062 min read 95.9K   2.3K   46  
Implementation of Mark Nelson's LZW algorithms in VB.NET.
Option Strict On
Option Explicit On 

Public Class clsLZW
    ' see http://marknelson.us/1989/10/01/lzw-data-compression/
    ' this is a VB.NET conversion port of mark's C program.
    ' Please refer to that program prior to modifying this one.

    Private BITS As Integer = 14
    Private HASHING_SHIFT As Integer = 4
    Private MAX_VALUE As Integer = (1 << BITS) - 1
    Private MAX_CODE As Integer = MAX_VALUE - 1
    'Private Const TABLE_SIZE As Integer = 5021 ' 12 bits
    'Private Const TABLE_SIZE As Integer = 9029 ' 13 bits
    Private Const TABLE_SIZE As Integer = 18041 ' 14 bits
    Private EOF As Integer = -1

    Public brInput As IO.BinaryReader = Nothing
    Public bwOutput As IO.BinaryWriter = Nothing

    Private iaCode_Value(TABLE_SIZE) As Integer
    Private iaPrefix_Code(TABLE_SIZE) As Integer
    Private baAppend_Character(TABLE_SIZE) As Byte

    '** This is the compression routine.  The code should be a fairly close
    '** match to the algorithm accompanying the article.
    Public Sub compress()
        Dim iNextCode As Integer = 0
        Dim iCharacter As Integer = 0
        Dim iStringCode As Integer = 0
        Dim iIndex As Integer = 0

        iNextCode = 256                   ' Next code is the next available string code

        For i As Integer = 0 To TABLE_SIZE - 1 ' Clear out the string table before starting
            iaCode_Value(i) = -1
        Next i

        ' Get the first iCharacter. Assuming it to be 0 - 255
        ' Hence only valid for ASCII text files */
        iStringCode = ReadByte()

        '** This is the main loop where it all happens.  This loop runs util all of
        '** the Input has been exhausted.  Note that it stops adding codes to the
        '** table after all of the possible codes have been defined.
        iCharacter = ReadByte()
        While iCharacter <> -1
            iIndex = find_match(iStringCode, iCharacter)    ' See if the string is in */
            If (iaCode_Value(iIndex) <> -1) Then            ' the table.  If it is,   */
                iStringCode = iaCode_Value(iIndex)          ' get the code value.  If */
            Else                                            ' the string is not in the table, try to add it.   */
                If (iNextCode <= MAX_CODE) Then
                    iaCode_Value(iIndex) = iNextCode
                    iNextCode += 1
                    iaPrefix_Code(iIndex) = iStringCode
                    baAppend_Character(iIndex) = CByte(iCharacter)
                End If
                output_code(iStringCode)    ' When a string is found  */
                iStringCode = iCharacter    ' that is not in the table */
            End If                          ' after adding the new one */
            iCharacter = ReadByte()
        End While

        ' End of the main loop.

        output_code(iStringCode)   ' Output the last code               
        output_code(MAX_VALUE)     ' Output the end of buffer code      */
        output_code(0)             ' This code flushes the Output buffer*/
    End Sub

    ' This is the hashing routine.  It tries to find a match for the prefix+char
    ' string in the string table.  If it finds it, the iIndex is returned.  If
    ' the string is not found, the first available iIndex in the string table is
    ' returned instead.
    Private Function find_match(ByVal iHashPrefix As Integer, ByVal iHashCharacter As Integer) As Integer
        Dim iIndex As Integer = 0
        Dim iOffset As Integer = 0

        iIndex = CInt((iHashCharacter << HASHING_SHIFT) Xor iHashPrefix)

        If (iIndex = 0) Then
            iOffset = 1
        Else
            iOffset = TABLE_SIZE - iIndex
        End If

        While (True)
            If iaCode_Value(iIndex) = -1 Then
                Return iIndex
            End If
            If (iaPrefix_Code(iIndex) = iHashPrefix) And (baAppend_Character(iIndex) = iHashCharacter) Then
                Return iIndex
            End If
            iIndex -= iOffset
            If (iIndex < 0) Then
                iIndex += TABLE_SIZE
            End If
        End While
    End Function

    ' The following routine is used to output variable length
    ' codes.  It is written strictly for clarity, and is not
    ' particularly efficient.

    Private Sub output_code(ByVal code As Integer)
        Static output_bit_count As Integer = 0
        Static output_bit_buffer As Long = 0

        output_bit_buffer = output_bit_buffer Or (code << (32 - BITS - output_bit_count))
        output_bit_count += BITS

        While output_bit_count >= 8
            WriteByte(CByte((output_bit_buffer >> 24) And 255))
            output_bit_buffer <<= 8
            output_bit_count -= 8
        End While
    End Sub



    ' This is the expansion routine.  It takes an LZW format file, and expands
    ' it to an bwOutput file.  The code here should be a fairly close match to
    ' the algorithm in the accompanying article.

    Public Sub expand()
        Dim baDecode_Stack(TABLE_SIZE) As Byte
        Dim iNextCode As Integer
        Dim iNewCode As Integer
        Dim iOldCode As Integer
        Dim bCharacter As Byte
        Dim iCurrCode As Integer
        Dim i As Integer

        'This is the next available code to define.
        iNextCode = 256

        ' Read in the first code, initialize the 
        ' character variable, and send the first 
        ' code to the output file.
        iOldCode = input_code()
        bCharacter = CType(iOldCode, Byte)
        WriteByte(CByte(iOldCode))

        ' This is the main expansion loop.  It reads in characters from the LZW file
        ' until it sees the special code used to inidicate the end of the data.
        iNewCode = input_code()
        While (iNewCode <> MAX_VALUE)
            If iNewCode >= iNextCode Then
                ' This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING
                ' case which generates an undefined code.  It handles it by decoding
                ' the last code, and adding a single character to the end of the decode string.            
                baDecode_Stack(0) = bCharacter
                i = 1
                iCurrCode = iOldCode
            Else
                ' Otherwise we do a straight decode of the new code.
                i = 0
                iCurrCode = iNewCode
            End If
            While iCurrCode > 255
                ' This routine simply decodes a string from the string table, storing
                ' it in a buffer.  The buffer can then be output in reverse order by
                ' the expansion program.
                baDecode_Stack(i) = baAppend_Character(iCurrCode)
                i = i + 1
                If i >= MAX_CODE Then
                    Throw New ApplicationException("Fatal error during iCurrCode expansion.")
                End If
                iCurrCode = iaPrefix_Code(iCurrCode)
            End While
            baDecode_Stack(i) = CType(iCurrCode, Byte)
            bCharacter = baDecode_Stack(i)

            'Now we output the decoded string in reverse order.
            While i >= 0
                WriteByte(baDecode_Stack(i))
                i = i - 1
            End While

            ' Finally, if possible, add a new code to the string table.
            If (iNextCode <= MAX_CODE) Then
                iaPrefix_Code(iNextCode) = iOldCode
                baAppend_Character(iNextCode) = bCharacter
                iNextCode += 1
            End If
            iOldCode = iNewCode
            iNewCode = input_code()
        End While
    End Sub

    ' The following routine is used to input variable length
    ' codes.  It is written strictly for clarity, and is not
    ' particularly efficient.
    Private Function input_code() As Integer
        Dim return_value As Long
        Static input_bit_count As Integer = 0
        Static input_bit_buffer As Long = 0
        Static Mask32 As Long = CLng(2 ^ 32) - 1

        While input_bit_count <= 24
            input_bit_buffer = (input_bit_buffer Or _
                ReadByte() << (24 - input_bit_count)) And Mask32
            input_bit_count += 8
        End While
        return_value = (input_bit_buffer >> 32 - BITS) And Mask32
        input_bit_buffer = (input_bit_buffer << BITS) And Mask32
        input_bit_count -= BITS
        Return CInt(return_value)
    End Function

    Private Sub WriteByte(ByVal b As Byte)
        bwOutput.Write(b)
    End Sub

    Private Function ReadByte() As Integer
        Dim ba(1) As Byte
        Dim iResult As Integer
        iResult = brInput.Read(ba, 0, 1)
        If iResult = 0 Then
            Return -1
        End If
        Return ba(0)
    End Function

End Class

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States
Alan Budelier is a happy Catholic working for IT in Major Fraternal Life Insurnance Company in Rock Island, IL, who, like Christ, would someday like to compile and run without any need for exception handling.

Comments and Discussions