Click here to Skip to main content
15,896,118 members
Articles / Programming Languages / XML

Compressing Persisted DataSets

Rate me:
Please Sign up or sign in to vote.
4.30/5 (32 votes)
19 Jun 2005CPOL5 min read 99.1K   1.8K   43  
Using .NET 2.0 DeflateStream and GZipStream to compress persisted DataSets.
Imports System.Data
Imports System.IO
Imports System.IO.Compression
Imports System.Runtime.Serialization.Formatters.Binary
Imports System.Runtime.Serialization

Module Module1

    Sub Main()
        Dim ts1 As TimeSpan
        Dim ts2 As TimeSpan
        Dim ds As New DataSet
        Dim info As FileInfo

        ts1 = New TimeSpan(Now.Ticks)
        ds.ReadXml("..\input.xml")
        ts2 = New TimeSpan(Now.Ticks)
        info = New FileInfo("..\input.xml")
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to read  raw XML, size=" & info.Length)

        ts1 = New TimeSpan(Now.Ticks)
        ds.WriteXml("test.xml")
        ts2 = New TimeSpan(Now.Ticks)
        info = New FileInfo("test.xml")
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to write raw XML, size=" & info.Length)
        Console.WriteLine()

        Dim outfile As FileStream
        Dim infile As FileStream

        Dim ZipStream As GZipStream
        ts1 = New TimeSpan(Now.Ticks)
        outfile = New FileStream("test.xmz", FileMode.Create, FileAccess.Write)
        ZipStream = New GZipStream(outfile, CompressionMode.Compress, False)
        ds.WriteXml(ZipStream)
        ' neglecting to close either of the following streams results in a corrupted file when trying to read later
        ZipStream.Close() ' important to close this first to flush compressed stream
        outfile.Close()   ' important to close this second to flush output stream
        ts2 = New TimeSpan(Now.Ticks)
        info = New FileInfo("test.xmz")
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to write gzipped XML, size=" & info.Length)

        ds.Reset()

        ts1 = New TimeSpan(Now.Ticks)
        infile = New FileStream("test.xmz", FileMode.Open, FileAccess.Read)
        ZipStream = New GZipStream(infile, CompressionMode.Decompress, False)
        ds.ReadXml(ZipStream)
        infile.Close()
        ts2 = New TimeSpan(Now.Ticks)
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to read  gzipped XML")
        Console.WriteLine()

        Dim DefStream As DeflateStream
        ts1 = New TimeSpan(Now.Ticks)
        outfile = New FileStream("test.xmd", FileMode.Create, FileAccess.Write)
        DefStream = New DeflateStream(outfile, CompressionMode.Compress, False)
        ds.WriteXml(DefStream)
        ' neglecting to close either of the following streams results in a corrupted file when trying to read later
        DefStream.Close() ' important to close this first to flush compressed stream
        outfile.Close()   ' important to close this second to flush output stream
        ts2 = New TimeSpan(Now.Ticks)
        info = New FileInfo("test.xmd")
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to write deflated XML, size=" & info.Length)

        ds.Reset()

        ts1 = New TimeSpan(Now.Ticks)
        infile = New FileStream("test.xmd", FileMode.Open, FileAccess.Read)
        DefStream = New DeflateStream(infile, CompressionMode.Decompress, False)
        ds.ReadXml(DefStream)
        infile.Close()
        ts2 = New TimeSpan(Now.Ticks)
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to read  deflated XML")
        Console.WriteLine()

        ts1 = New TimeSpan(Now.Ticks)
        outfile = New FileStream("test.bin", FileMode.Create, FileAccess.Write)
        Dim formatter As New BinaryFormatter
        ds.RemotingFormat = SerializationFormat.Binary
        formatter.Serialize(outfile, ds)
        outfile.Close()
        ts2 = New TimeSpan(Now.Ticks)
        info = New FileInfo("test.bin")
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to write binary, size=" & info.Length)

        ts1 = New TimeSpan(Now.Ticks)
        infile = New FileStream("test.bin", FileMode.Open, FileAccess.Read)
        ds = DirectCast(formatter.Deserialize(infile), DataSet)
        infile.Close()
        ts2 = New TimeSpan(Now.Ticks)
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to read  binary")
        Console.WriteLine()

        ts1 = New TimeSpan(Now.Ticks)
        outfile = New FileStream("test.bnz", FileMode.Create, FileAccess.Write)
        ds.RemotingFormat = SerializationFormat.Binary
        DefStream = New DeflateStream(outfile, CompressionMode.Compress, False)
        formatter.Serialize(DefStream, ds)
        DefStream.Close() ' important to close this first to flush compressed stream
        outfile.Close()
        ts2 = New TimeSpan(Now.Ticks)
        info = New FileInfo("test.bnz")
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to write compressed binary, size=" & info.Length)

        ts1 = New TimeSpan(Now.Ticks)
        infile = New FileStream("test.bnz", FileMode.Open, FileAccess.Read)
        DefStream = New DeflateStream(infile, CompressionMode.Decompress, False)
        ds = DirectCast(formatter.Deserialize(DefStream), DataSet)
        infile.Close()
        ts2 = New TimeSpan(Now.Ticks)
        Console.WriteLine("Took " & ts2.Subtract(ts1).ToString & " to read  compressed binary")
    End Sub

End Module

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
Canada Canada
Adrian Moore is the Development Manager for the SCADA Vision system developed by ABB Inc in Calgary, Alberta.

He has been interested in compilers, parsers, real-time database systems and peer-to-peer solutions since the early 90's. In his spare time, he is currently working on a SQL parser for querying .NET DataSets (http://www.queryadataset.com).

Adrian is a Microsoft MVP for Windows Networking.

Comments and Discussions