Click here to Skip to main content
15,891,473 members
Articles / Programming Languages / C#

Project Tool

Rate me:
Please Sign up or sign in to vote.
4.69/5 (10 votes)
23 Sep 2007CPOL3 min read 54.6K   1.7K   73  
Backup your C# solution and projects for archive or source code sharing. Temporary or unused files are excluded.
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;

namespace QiHe.CodeLib.Compress
{
    public class Compressor
    {
        const int MaximumLength = 258;
        const int MaximumDistance = 32768;

        public static IEnumerable<CompressedItem> AnalyzeItems(Stream input)
        {
            DoubleBufferedStreamReader reader = new DoubleBufferedStreamReader(input, MaximumLength, MaximumDistance);
            Dictionary<int, List<long>> chain = new Dictionary<int, List<long>>();
            foreach (CompressedItem item in AnalyzeItems(reader, chain))
            {
                yield return item;
            }
        }

        public static IEnumerable<CompressedItem> AnalyzeItems(Stream input, CompressionOption option)
        {
            DoubleBufferedStreamReader reader = new DoubleBufferedStreamReader(input, MaximumLength, MaximumDistance);
            Dictionary<int, List<long>> chain = new Dictionary<int, List<long>>();
            switch (option)
            {
                case CompressionOption.Faster:
                    foreach (CompressedItem item in AnalyzeItemsFastMode(reader, chain))
                    {
                        yield return item;
                    }
                    break;
                case CompressionOption.Normal:
                    foreach (CompressedItem item in AnalyzeItems(reader, chain))
                    {
                        yield return item;
                    }
                    break;
                case CompressionOption.Smaller:
                    foreach (CompressedItem item in AnalyzeItemsLazyMode(reader, chain))
                    {
                        yield return item;
                    }
                    break;
            }
        }

        //similar to LZSS algorithm with hashing
        static IEnumerable<CompressedItem> AnalyzeItems(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
        {
            while (true)
            {
                int key = Examine3Bytes(input);
                if (key == -1) break;

                List<long> positions = GetPositions(chain, key);
                if (positions.Count > 0)
                {
                    SlideWindow(positions, input.Position);
                }

                bool foundmatch = false;
                if (positions.Count > 0)
                {
                    LengthDistancePair match = FindLongestMatch(input, positions);
                    if (match.Length > 0)
                    {
                        yield return match;
                        positions.Add(input.Position);
                        for (int i = 0; i < match.Length - 1; i++)
                        {
                            input.Position++;
                            key = Examine3Bytes(input);
                            GetPositions(chain, key).Add(input.Position);
                        }
                        input.Position++;
                        foundmatch = true;
                    }
                }
                if (!foundmatch)
                {
                    positions.Add(input.Position);
                    byte value = input.ReadByte();
                    yield return new LiteralByte(value);
                }
            }

            byte[] data = input.ReadToEnd();
            if (data.Length > 0)
            {
                foreach (byte value in data)
                {
                    yield return new LiteralByte(value);
                }
            }
        }


        static IEnumerable<CompressedItem> AnalyzeItemsFastMode(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
        {
            while (true)
            {
                int key = Examine3Bytes(input);
                if (key == -1) break;

                List<long> positions = GetPositions(chain, key);
                if (positions.Count > 0)
                {
                    SlideWindow(positions, input.Position);
                }

                bool foundmatch = false;
                if (positions.Count > 0)
                {
                    LengthDistancePair match = FindLongestMatch(input, positions);
                    if (match.Length > 0)
                    {
                        yield return match;
                        positions.Add(input.Position);
                        input.Position += match.Length;
                        foundmatch = true;
                    }
                }
                if (!foundmatch)
                {
                    positions.Add(input.Position);
                    byte value = input.ReadByte();
                    yield return new LiteralByte(value);
                }
            }

            byte[] data = input.ReadToEnd();
            if (data.Length > 0)
            {
                foreach (byte value in data)
                {
                    yield return new LiteralByte(value);
                }
            }
        }

        static IEnumerable<CompressedItem> AnalyzeItemsLazyMode(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
        {
            LengthDistancePair zeroMatch = new LengthDistancePair(0, 0);
            LengthDistancePair lastMatch = zeroMatch;
            byte lastByte = 0;
            bool isTrying = false;
            while (true)
            {
                LengthDistancePair match = FindLongestMatch(input, chain);
                if (match == null) break;
                ChainPosition(input, chain);

                if (match.Length > lastMatch.Length || lastMatch.Length == 0)
                {
                    if (isTrying)
                    {
                        yield return new LiteralByte(lastByte);
                    }

                    lastByte = input.ReadByte();
                    lastMatch = match;
                    isTrying = true;
                }
                else
                {
                    yield return lastMatch;

                    for (int i = 2; i < lastMatch.Length; i++)
                    {
                        input.Position++;
                        ChainPosition(input, chain);
                    }
                    input.Position++;
                    lastMatch = zeroMatch;
                    isTrying = false;
                }
            }
            if (isTrying)
            {
                if (lastMatch.Length > 0)
                {
                    yield return lastMatch;
                    input.Position += lastMatch.Length - 1;
                }
                else
                {
                    yield return new LiteralByte(lastByte);
                }
            }

            byte[] data = input.ReadToEnd();
            if (data.Length > 0)
            {
                if (data.Length > 3)
                {
                    throw new Exception(data.Length + " bytes uncompressed.");
                }
                foreach (byte value in data)
                {
                    yield return new LiteralByte(value);
                }
            }
        }

        private static void ChainPosition(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
        {
            int key = Examine3Bytes(input);
            GetPositions(chain, key).Add(input.Position);
        }

        private static LengthDistancePair FindLongestMatch(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
        {
            int key = Examine3Bytes(input);
            if (key == -1) return null;

            List<long> positions = GetPositions(chain, key);
            if (positions.Count > 0)
            {
                SlideWindow(positions, input.Position);
            }

            return FindLongestMatch(input, positions);
        }

        private static List<long> GetPositions(Dictionary<int, List<long>> chain, int key)
        {
            if (!chain.ContainsKey(key))
            {
                chain[key] = new List<long>();
            }
            return chain[key];
        }

        private static LengthDistancePair FindLongestMatch(DoubleBufferedStreamReader input, List<long> positions)
        {
            input.Mode = DoubleBufferedStreamReader.WorkMode.Seeking;
            long pos0 = input.Position;
            int maxlen = 0;
            int maxdis = 0;
            byte[] nextBytes = input.PeekBytes(MaximumLength);
            for (int i = positions.Count - 1; i >= 0; i--)
            {
                int len = Match(input, positions[i], nextBytes);
                int dis = (int)(pos0 - positions[i]);
                if (len > maxlen
                    //trade off between length and distance
                    //&& len > DeflateFormat.DeflateCoding.Distance[dis].ExtraBits - 2
                    )
                {
                    maxlen = len;
                    maxdis = dis;
                }
            }
            input.Position = pos0;
            input.Mode = DoubleBufferedStreamReader.WorkMode.Reading;
            return new LengthDistancePair(maxlen, maxdis);
        }

        private static void SlideWindow(List<long> positions, long currentPos)
        {
            List<long> tobeRemoved = new List<long>();
            foreach (long pos in positions)
            {
                if (currentPos - pos > MaximumDistance)
                {
                    tobeRemoved.Add(pos);
                }
                else
                {
                    break;
                }
            }
            foreach (long pos in tobeRemoved)
            {
                positions.Remove(pos);
            }
        }

        private static int Match(DoubleBufferedStreamReader input, long pos, byte[] nextBytes)
        {
            int length = 3;
            input.Position = pos + length;
            while (length < nextBytes.Length)
            {
                if (input.ReadByte() == nextBytes[length])
                {
                    length++;
                }
                else
                {
                    break;
                }
            }
            return length;
        }

        private static int Examine3Bytes(DoubleBufferedStreamReader input)
        {
            byte[] bytes = input.PeekBytes(3);
            if (bytes.Length == 3)
            {
                //int value = bytes[0] + bytes[1] * 256 + bytes[2] * 65536;
                int value = (((bytes[0] << 8) + bytes[1]) << 8) + bytes[2];
                return value;
            }
            else
            {
                return -1;
            }
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect YunCheDa Hangzhou
China China
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions