using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
namespace QiHe.CodeLib.Compress
{
public class Compressor
{
const int MaximumLength = 258;
const int MaximumDistance = 32768;
public static IEnumerable<CompressedItem> AnalyzeItems(Stream input)
{
DoubleBufferedStreamReader reader = new DoubleBufferedStreamReader(input, MaximumLength, MaximumDistance);
Dictionary<int, List<long>> chain = new Dictionary<int, List<long>>();
foreach (CompressedItem item in AnalyzeItems(reader, chain))
{
yield return item;
}
}
public static IEnumerable<CompressedItem> AnalyzeItems(Stream input, CompressionOption option)
{
DoubleBufferedStreamReader reader = new DoubleBufferedStreamReader(input, MaximumLength, MaximumDistance);
Dictionary<int, List<long>> chain = new Dictionary<int, List<long>>();
switch (option)
{
case CompressionOption.Faster:
foreach (CompressedItem item in AnalyzeItemsFastMode(reader, chain))
{
yield return item;
}
break;
case CompressionOption.Normal:
foreach (CompressedItem item in AnalyzeItems(reader, chain))
{
yield return item;
}
break;
case CompressionOption.Smaller:
foreach (CompressedItem item in AnalyzeItemsLazyMode(reader, chain))
{
yield return item;
}
break;
}
}
//similar to LZSS algorithm with hashing
static IEnumerable<CompressedItem> AnalyzeItems(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
{
while (true)
{
int key = Examine3Bytes(input);
if (key == -1) break;
List<long> positions = GetPositions(chain, key);
if (positions.Count > 0)
{
SlideWindow(positions, input.Position);
}
bool foundmatch = false;
if (positions.Count > 0)
{
LengthDistancePair match = FindLongestMatch(input, positions);
if (match.Length > 0)
{
yield return match;
positions.Add(input.Position);
for (int i = 0; i < match.Length - 1; i++)
{
input.Position++;
key = Examine3Bytes(input);
GetPositions(chain, key).Add(input.Position);
}
input.Position++;
foundmatch = true;
}
}
if (!foundmatch)
{
positions.Add(input.Position);
byte value = input.ReadByte();
yield return new LiteralByte(value);
}
}
byte[] data = input.ReadToEnd();
if (data.Length > 0)
{
foreach (byte value in data)
{
yield return new LiteralByte(value);
}
}
}
static IEnumerable<CompressedItem> AnalyzeItemsFastMode(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
{
while (true)
{
int key = Examine3Bytes(input);
if (key == -1) break;
List<long> positions = GetPositions(chain, key);
if (positions.Count > 0)
{
SlideWindow(positions, input.Position);
}
bool foundmatch = false;
if (positions.Count > 0)
{
LengthDistancePair match = FindLongestMatch(input, positions);
if (match.Length > 0)
{
yield return match;
positions.Add(input.Position);
input.Position += match.Length;
foundmatch = true;
}
}
if (!foundmatch)
{
positions.Add(input.Position);
byte value = input.ReadByte();
yield return new LiteralByte(value);
}
}
byte[] data = input.ReadToEnd();
if (data.Length > 0)
{
foreach (byte value in data)
{
yield return new LiteralByte(value);
}
}
}
static IEnumerable<CompressedItem> AnalyzeItemsLazyMode(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
{
LengthDistancePair zeroMatch = new LengthDistancePair(0, 0);
LengthDistancePair lastMatch = zeroMatch;
byte lastByte = 0;
bool isTrying = false;
while (true)
{
LengthDistancePair match = FindLongestMatch(input, chain);
if (match == null) break;
ChainPosition(input, chain);
if (match.Length > lastMatch.Length || lastMatch.Length == 0)
{
if (isTrying)
{
yield return new LiteralByte(lastByte);
}
lastByte = input.ReadByte();
lastMatch = match;
isTrying = true;
}
else
{
yield return lastMatch;
for (int i = 2; i < lastMatch.Length; i++)
{
input.Position++;
ChainPosition(input, chain);
}
input.Position++;
lastMatch = zeroMatch;
isTrying = false;
}
}
if (isTrying)
{
if (lastMatch.Length > 0)
{
yield return lastMatch;
input.Position += lastMatch.Length - 1;
}
else
{
yield return new LiteralByte(lastByte);
}
}
byte[] data = input.ReadToEnd();
if (data.Length > 0)
{
if (data.Length > 3)
{
throw new Exception(data.Length + " bytes uncompressed.");
}
foreach (byte value in data)
{
yield return new LiteralByte(value);
}
}
}
private static void ChainPosition(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
{
int key = Examine3Bytes(input);
GetPositions(chain, key).Add(input.Position);
}
private static LengthDistancePair FindLongestMatch(DoubleBufferedStreamReader input, Dictionary<int, List<long>> chain)
{
int key = Examine3Bytes(input);
if (key == -1) return null;
List<long> positions = GetPositions(chain, key);
if (positions.Count > 0)
{
SlideWindow(positions, input.Position);
}
return FindLongestMatch(input, positions);
}
private static List<long> GetPositions(Dictionary<int, List<long>> chain, int key)
{
if (!chain.ContainsKey(key))
{
chain[key] = new List<long>();
}
return chain[key];
}
private static LengthDistancePair FindLongestMatch(DoubleBufferedStreamReader input, List<long> positions)
{
input.Mode = DoubleBufferedStreamReader.WorkMode.Seeking;
long pos0 = input.Position;
int maxlen = 0;
int maxdis = 0;
byte[] nextBytes = input.PeekBytes(MaximumLength);
for (int i = positions.Count - 1; i >= 0; i--)
{
int len = Match(input, positions[i], nextBytes);
int dis = (int)(pos0 - positions[i]);
if (len > maxlen
//trade off between length and distance
//&& len > DeflateFormat.DeflateCoding.Distance[dis].ExtraBits - 2
)
{
maxlen = len;
maxdis = dis;
}
}
input.Position = pos0;
input.Mode = DoubleBufferedStreamReader.WorkMode.Reading;
return new LengthDistancePair(maxlen, maxdis);
}
private static void SlideWindow(List<long> positions, long currentPos)
{
List<long> tobeRemoved = new List<long>();
foreach (long pos in positions)
{
if (currentPos - pos > MaximumDistance)
{
tobeRemoved.Add(pos);
}
else
{
break;
}
}
foreach (long pos in tobeRemoved)
{
positions.Remove(pos);
}
}
private static int Match(DoubleBufferedStreamReader input, long pos, byte[] nextBytes)
{
int length = 3;
input.Position = pos + length;
while (length < nextBytes.Length)
{
if (input.ReadByte() == nextBytes[length])
{
length++;
}
else
{
break;
}
}
return length;
}
private static int Examine3Bytes(DoubleBufferedStreamReader input)
{
byte[] bytes = input.PeekBytes(3);
if (bytes.Length == 3)
{
//int value = bytes[0] + bytes[1] * 256 + bytes[2] * 65536;
int value = (((bytes[0] << 8) + bytes[1]) << 8) + bytes[2];
return value;
}
else
{
return -1;
}
}
}
}