65.9K
CodeProject is changing. Read more.
Home

String Compression - SevenZip vs GZip

starIconstarIconstarIconstarIcon
emptyStarIcon
starIcon

4.80/5 (6 votes)

Sep 18, 2016

CPOL

1 min read

viewsIcon

22865

downloadIcon

10

The alternative solution, how compress/decompress string faster and powerful

Introduction

GZipStream is a class that provides methods to Compress/Decompress streams, comes included in .NET Framework 2.0 and versions above.

SevenZip is an Assembly for Compress/Decompress files, which makes it more powerful.

Using the Code

using Encoder = SevenZip.Sdk.Compression.Lzma.Encoder;

public static string Compress(string text) {

            var watcher = new Watcher();
            watcher.Start();

            string result = null;

            using (var t = new MemoryStream(Encoding.Default.GetBytes(text)))
            using (var m = new MemoryStream()) {

                var encoder = new Encoder();

                encoder.WriteCoderProperties(m);
                m.Write(BitConverter.GetBytes(t.Length), 0, 8);
                encoder.Code(t, m, t.Length, -1, null);

                m.Position = 0;
                result = Encoding.Default.GetString(m.ToArray());

                t.Close();
                m.Close();
            }

            watcher.Stop();
            Console.WriteLine($"{watcher.TotalMilliseconds()}ms in compress 
            {text.Length} chars with SevenZip! = {result.Length} new length");

            return result;
        }

Now GZipStream:

public static string Compress(string text) {

            var watcher = new Watcher();
            watcher.Start();

            var buffer = Encoding.Default.GetBytes(text);
            var t = new MemoryStream();
            using (var m = new GZipStream(t, CompressionMode.Compress, true))
                m.Write(buffer, 0, buffer.Length);

            t.Position = 0;

            var outStream = new MemoryStream();
            var compressed = new byte[t.Length];
            t.Read(compressed, 0, compressed.Length);

            var gzBuffer = new byte[compressed.Length + 4];
            Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length);
            Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4);

            watcher.Stop();
            var result = Convert.ToBase64String(gzBuffer);
            Console.WriteLine($"{watcher.TotalMilliseconds()}ms in compress 
            {text.Length} chars with GZipStream! = {result.Length} new length");
        }

De-Compress

using Decoder = SevenZip.Sdk.Compression.Lzma.Decoder;

public static string Decompress(string text) {

            var watcher = new Watcher();
            watcher.Start();

            string result = null;

            using (var t = new MemoryStream(Encoding.Default.GetBytes(text)))
            using (var m = new MemoryStream())
            {
                var decoder = new Decoder();

                var numArray = new byte[5];
                t.Read(numArray, 0, 5);
                var numArray1 = new byte[8];
                t.Read(numArray1, 0, 8);

                var num = BitConverter.ToInt64(numArray1, 0);

                decoder.SetDecoderProperties(numArray);
                decoder.Code(t, m, t.Length, num, null);

                m.Position = 0;
                result = new StreamReader(m).ReadToEnd();

                t.Close();
                m.Close();
            }

            watcher.Stop();
            Console.WriteLine($"{watcher.TotalMilliseconds()} ms in decompress 
            {text.Length} chars with SevenZip! = {result.Length} new length");

            return result;
        }

Results!

We are going to perform tests with different runs :)

Try On

  • OS: Windows 10 Versión 10.0.14393
  • RAM: 3GB Ddr2
  • CPU: Pentium Dual Core 2.70Ghz (2 cores)

Try On

  • OS: Windows Server 2012 Version 6.3.9600
  • RAM: 3.75GB
  • CPU: Intel Xeon 2.30Ghz (2 cores)

Finally, I have tested with a 150,000 characters with a real document file. The document is available for download in the resources of the article.

Code

static void Main()
        {
            //======================= 1 runs sync!

            Console.WriteLine($"{Environment.NewLine}1 runs sync!");
            var text = Generate(1000);

            var compressed = Compress(text);
            var decompressed = Decompress(compressed);
            CompressGZipStream(text);

            //====================================

            //=================== 2 runs Parallel!

            Console.WriteLine($"{Environment.NewLine}2 runs Parallel!");

            var list = new List<string>();
            for (var i = 2; i <= 6; i++)
                list.Add(Generate(15000 * i));

            Parallel.ForEach(list, new ParallelOptions 
            { MaxDegreeOfParallelism = 2 }, (x) => CompressParallel(x));

            //===================================

            //========= 1 run sync with real-text!

            string path = $"{Environment.CurrentDirectory}\\150,000.txt";
            if (!File.Exists(path))
                return;

            Console.WriteLine($"{Environment.NewLine}1 run sync with real-text!");
            text = File.ReadAllText(path);

            var compressedReal = Compress(text);
            var decompressedReal = Decompress(compressedReal);
            CompressGZipStream(text);

            //====================================

            Console.ReadLine();
        }

Parallel Method

static void CompressParallel(string text) {
            Compress(text);
            CompressGZipStream(text);
            Console.WriteLine();
        }

Watcher

public class Watcher
    {
        DateTime _start = DateTime.MinValue;
        DateTime _stop = DateTime.MinValue;

        TimeSpan Timer {
            get
            {
                return _stop.Subtract(_start);
            }
        }

        public void Start() { _start = DateTime.Now; }

        public void Stop() { _stop = DateTime.Now; }

        public double TotalMilliseconds() {
            if (_stop == DateTime.MinValue)
                return 0;
            return Timer.TotalMilliseconds;
        }
    }

Generate Random Text

static string Generate(int length) {
            var rnd = new Random();
            string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
            return new string(Enumerable.Repeat
            (chars, length).Select(x => x[rnd.Next(x.Length)]).ToArray());
        }

Conclusion

GZipStream is ultra fast to compress, almost 60% faster, but its outcome is not so convincing.

SevenZip is much more powerful when compressed, but its speed is not the best in the world.

Whether to Compress/Decompress thousands of data asynchronously, it would be useful to use GZipStream, since their algorithm does not require many resources regardless of the hardware.

SevenZip is excellent to send large data to a server, as the bytes of an image, file, document, etc. or even an Instant Messenger.