// ==========================================================================
// Class Implementation : COXBinDiffCalculator
// ==========================================================================
// Source file : bdifcalc.cpp
// This software along with its related components, documentation and files ("The Libraries")
// is � 1994-2007 The Code Project (1612916 Ontario Limited) and use of The Libraries is
// governed by a software license agreement ("Agreement"). Copies of the Agreement are
// available at The Code Project (www.codeproject.com), as part of the package you downloaded
// to obtain this file, or directly from our office. For a copy of the license governing
// this software, you may contact us at legalaffairs@codeproject.com, or by calling 416-849-8900.
// //////////////////////////////////////////////////////////////////////////
#include "stdafx.h" // standard MFC include
#include "bdifcalc.h" // class specification
#include "progress.h" // progress bar
#include "oxdflhdr.h" // file header
#include <math.h>
#ifdef _DEBUG
#undef THIS_FILE
static char BASED_CODE THIS_FILE[] = __FILE__;
#endif
IMPLEMENT_DYNAMIC(COXBinDiffCalculator, CObject)
#define new DEBUG_NEW
/////////////////////////////////////////////////////////////////////////////
// Definition of static members
const double COXBinDiffCalculator::m_cMinMeanChunkLen = 20.0;
const double COXBinDiffCalculator::m_cMaxMeanChunkLen = 80.0;
const double COXBinDiffCalculator::m_cBigChunkLen = 500.0;
const TCHAR* COXBinDiffCalculator::m_cFileHeader = TEXT("BinDiff0001");
// If m_cDropEOL is TRUE, cr and lf are not allowed to function as delimiters
const BOOL COXBinDiffCalculator::m_cDropEOL = FALSE;
const DWORD COXBinDiffCalculator::m_cBufSiz = 128;
const WORD COXBinDiffCalculator::m_cMinMatchLen = 6;
// ... Must be >= m_cMinMatchLen
const WORD COXBinDiffCalculator::m_cMinEqualRunLen = (COXBinDiffCalculator::m_cMinMatchLen+4);
// Tag values. A tag value is encoded in the 4 lowest bits of a tag byte.
// The 4 high bits of the tag byte are used for encoding a value 0-15.
const BYTE COXBinDiffCalculator::m_cTagSmallDiff = 0;
const BYTE COXBinDiffCalculator::m_cTagMediumDiff = 1;
const BYTE COXBinDiffCalculator::m_cTagLargeDiff = 2;
const BYTE COXBinDiffCalculator::m_cTagSmallNearCopy = 3;
const BYTE COXBinDiffCalculator::m_cTagMediumNearCopy = 4;
const BYTE COXBinDiffCalculator::m_cTagLargeNearCopy = 5;
const BYTE COXBinDiffCalculator::m_cTagSmallDistantCopy = 6;
const BYTE COXBinDiffCalculator::m_cTagMediumDistantCopy = 7;
const BYTE COXBinDiffCalculator::m_cTagLargeDistantCopy = 8;
const BYTE COXBinDiffCalculator::m_cTagSmallFarCopy = 9;
const BYTE COXBinDiffCalculator::m_cTagMediumFarCopy = 0x0A;
const BYTE COXBinDiffCalculator::m_cTagLargeFarCopy = 0x0B;
// ... Tags 0x0C,0x0D,0x0E unused.
const BYTE COXBinDiffCalculator::m_cTagEOF = 0x0F;
// Maximum values encodable by different tags.
// 4-bit value (0-15) is used to encode a value 1 - m_cSmallSize;
// 12-bit value (0-4095) is used to encode a value m_cSmallSize+1 - m_cMediumSize;
// 20-bit value (0-1048575) is used to encode a value m_cMediumSize+1 - m_cLargeSize;
const DWORD COXBinDiffCalculator::m_cSmallSize = 16L;
const DWORD COXBinDiffCalculator::m_cMediumSize = (4096L + COXBinDiffCalculator::m_cSmallSize);
const DWORD COXBinDiffCalculator::m_cLargeSize = (1048576L + COXBinDiffCalculator::m_cMediumSize);
// Maximum file positions encodable in 2 or 3 bytes
const DWORD COXBinDiffCalculator::m_cNearDistance = 0xFFFFL;
const DWORD COXBinDiffCalculator::m_cDistantDistance = 0xFFFFFF;
const DWORD COXBinDiffCalculator::m_cMaxStrLen = 255;
// Data members -------------------------------------------------------------
// protected:
// COXDiffProgress* m_pProgressBar;
// --- Pointer to the progress bar (may be derived from COXDiffProgress)
// private:
// Member functions ---------------------------------------------------------
// public:
COXBinDiffCalculator::COXBinDiffCalculator()
:
#if ! BDEXTR
m_LstFreeTreeNode(NULL),
m_LstFreeMatchBlock(NULL),
#endif /* ! BDEXTR */
m_pProgressBar(NULL)
{
m_pProgressBar = new COXDiffProgress;
}
#if ! BDEXTR
void COXBinDiffCalculator::SubtractFiles(LPCTSTR orgFilNam, LPCTSTR derivedFilNam, LPCTSTR diffFilNam,
COXDiffFileHeader* pHeader)
{
CStdioFile OrgFil;
CStdioFile DerivedFil;
CStdioFile DiffFil;
if (!DiffFil.Open(diffFilNam, CFile:: modeCreate | CFile::modeReadWrite | CFile::typeBinary))
{
CString errMsg;
errMsg = TEXT("Cannot open file ");
errMsg += diffFilNam;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(errMsg);
}
if (!OrgFil.Open(orgFilNam, CFile::modeRead | CFile::typeBinary | CFile::shareDenyWrite))
{
CString errMsg;
errMsg = TEXT("Cannot open file ");
errMsg += orgFilNam;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(errMsg);
}
if (!DerivedFil .Open(derivedFilNam, CFile::modeRead | CFile::typeBinary | CFile::shareDenyWrite))
{
CString errMsg;
errMsg = TEXT("Cannot open file ");
errMsg += derivedFilNam;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(errMsg);
}
SubtractFiles(&OrgFil, &DerivedFil, &DiffFil, pHeader);
OrgFil.Close();
DerivedFil.Close();
DiffFil.Close();
}
void COXBinDiffCalculator::SubtractFiles(CFile* pOrgFil, CFile* pDerivedFil, CFile* pDiffFil,
COXDiffFileHeader* pHeader)
{
COXTreeNode* pOrgTreeRoot = NULL;
int delim;
COXMatchBlock* pMatchLst = NULL;
LONG size;
LONG orgSum;
LONG orgSize;
LONG derivedSum;
LONG derivedSize;
// if no special header was specified, suplly a default header
BOOL bDefHeader(FALSE);
if (pHeader == NULL)
{
pHeader = new COXDiffFileHeader(m_cFileHeader);
bDefHeader = TRUE;
}
DWORD nBeginHeaderPos = pDiffFil->GetPosition();
pHeader->WriteHeader(pDiffFil);
DWORD nEndHeaderPos = pDiffFil->GetPosition();
orgSize = pOrgFil->GetLength();
size = derivedSize = pDerivedFil->GetLength();
// Write dummy check-data; gets rewritten at end of this procedure
WriteLongNBytes(0L,pDiffFil,4);
WriteLongNBytes(0L,pDiffFil,4);
WriteLongNBytes(0L,pDiffFil,4);
WriteLongNBytes(0L,pDiffFil,4);
orgSum = 0;
derivedSum = 0;
// Dummy block
{
if (size == 0)
{
int byte;
BYTE helpByte;
// ... EOF on diff fil
pDiffFil->Write(&m_cTagEOF, 1);
// ... Adjust checksum
do
{
if (pOrgFil->Read(&helpByte, 1) == 1)
byte = helpByte;
else
byte = EOF;
if (byte != EOF)
{
orgSum += byte;
}
} while (byte != EOF);
}
else
{
// Find suitable delimiter
delim = FindDelimiter(pOrgFil,m_cMinMeanChunkLen,m_cMaxMeanChunkLen);
if (delim < 0)
delim = 0;
// Build indexed position tree
pOrgTreeRoot = BuildTree(pOrgFil,delim,orgSum);
// Match files
pMatchLst = MatchFiles(pOrgTreeRoot,pOrgFil,pDerivedFil,delim,derivedSum);
// Write diff file
DumpDiff(pMatchLst,pDerivedFil,pDiffFil);
}
}
if (pMatchLst != NULL)
DeleteMatchBlocks(pMatchLst);
if (pOrgTreeRoot != NULL)
DeleteTreeNode(pOrgTreeRoot);
// MSDOS has no resource fork: encode extra EOF
pDiffFil->Write(&m_cTagEOF, 1);
// Adjust the check-data
pDiffFil->Seek(nEndHeaderPos - nBeginHeaderPos, CFile::begin);
WriteLongNBytes(orgSize,pDiffFil,4);
WriteLongNBytes(orgSum,pDiffFil,4);
WriteLongNBytes(derivedSize,pDiffFil,4);
WriteLongNBytes(derivedSum,pDiffFil,4);
if (bDefHeader)
delete pHeader;
}
#endif /* ! BDEXTR */
void COXBinDiffCalculator::AddFiles(LPCTSTR orgFilNam, LPCTSTR derivedFilNam, LPCTSTR diffFilNam,
COXDiffFileHeader* pHeader)
{
CStdioFile OrgFil;
CStdioFile DerivedFil;
CFile DiffFil;
CFileException* pFileEx = new CFileException;
if (!DiffFil.Open(diffFilNam, CFile::modeRead | CFile::typeBinary | CFile::shareDenyWrite, pFileEx))
{
CString errMsg;
errMsg = TEXT("Cannot open file ");
errMsg += diffFilNam;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(errMsg);
THROW(pFileEx);
}
if (!OrgFil.Open(orgFilNam, CFile::modeRead | CFile::typeBinary | CFile::shareDenyWrite, pFileEx))
{
CString errMsg;
errMsg = TEXT("Cannot open file ");
errMsg += orgFilNam;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(errMsg);
THROW(pFileEx);
}
if (!DerivedFil.Open(derivedFilNam, CFile:: modeCreate | CFile::modeReadWrite | CFile::typeBinary, pFileEx))
{
CString errMsg;
errMsg =TEXT(" Cannot open file ");
errMsg += derivedFilNam;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(errMsg);
THROW(pFileEx);
}
#ifdef WIN32
pFileEx->Delete();
#else
delete pFileEx;
#endif
AddFiles(&OrgFil, &DerivedFil, &DiffFil, pHeader);
DerivedFil.Close();
OrgFil.Close();
DiffFil.Close();
}
void COXBinDiffCalculator::AddFiles(CFile* pOrgFil, CFile* pDerivedFil, CFile* pDiffFil,
COXDiffFileHeader* pHeader)
{
int c;
BYTE helpByte;
LONG blockLen;
LONG blockPos;
int tag;
LONG derivedSize;
LONG derivedSum;
LONG orgSize;
LONG orgSum;
LONG checkDerivedSize;
LONG checkDerivedSum;
LONG checkOrgSize;
LONG checkOrgSum;
LONG prevDiffPos;
LONG diffPos;
LONG curPos;
BOOL bDefHeader(FALSE);
TRY
{
// if no special header was specified, suplly a default header
if (pHeader == NULL)
{
pHeader = new COXDiffFileHeader(m_cFileHeader);
bDefHeader = TRUE;
}
pHeader->ReadHeader(pDiffFil);
}
CATCH_ALL(e)
{
ASSERT(m_pProgressBar != NULL);
if (bDefHeader)
delete pHeader;
m_pProgressBar->Abort(TEXT("Incorrect difference header"));
THROW_LAST();
}
END_CATCH_ALL
// Read sizes and checksums of original and updated file
checkOrgSize = ReadLongNBytes(pDiffFil,4);
checkOrgSum = ReadLongNBytes(pDiffFil,4);
orgSize = 0;
orgSum = 0;
checkDerivedSize = ReadLongNBytes(pDiffFil,4);
checkDerivedSum = ReadLongNBytes(pDiffFil,4);
derivedSize = 0;
derivedSum = 0;
{
orgSize += pOrgFil->GetLength();
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Init(0,pOrgFil->GetLength(),TEXT("Checking:"));
curPos = 0;
c = EOF;
while (pOrgFil->Read(&helpByte, 1) == 1)
{
c = helpByte;
if ((curPos & 0xFFF) == 0)
if (!m_pProgressBar->Adjust(curPos))
m_pProgressBar->Abort(TEXT("Aborting"));
orgSum += c;
curPos++;
}
pOrgFil->Seek(0L, CFile::begin);
m_pProgressBar->Close();
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Init(0,pDiffFil->GetLength(),TEXT("Apply diff:"));
tag = 0;
prevDiffPos = 0;
diffPos = 0;
while (tag != m_cTagEOF && pDiffFil->Read(&helpByte, 1) == 1)
{
c = helpByte;
diffPos++;
// Adjust bar every 256 bytes
if (diffPos - prevDiffPos > 0xFF)
{
if (!m_pProgressBar->Adjust(diffPos))
m_pProgressBar->Abort(TEXT("Aborting"));
prevDiffPos = diffPos;
}
tag = c & 0x0F;
switch (tag)
{
case m_cTagSmallDiff:
blockLen = (c >> 4) + 1;
CopyFileChars(blockLen,pDiffFil,pDerivedFil,derivedSum);
diffPos += blockLen;
derivedSize += blockLen;
break;
case m_cTagMediumDiff:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cSmallSize + 1;
CopyFileChars(blockLen,pDiffFil,pDerivedFil,derivedSum);
diffPos += blockLen;
derivedSize += blockLen;
}
break;
case m_cTagLargeDiff:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = (blockLen << 8) | c;
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cMediumSize + 1;
}
CopyFileChars(blockLen,pDiffFil,pDerivedFil,derivedSum);
diffPos += blockLen;
derivedSize += blockLen;
}
break;
case m_cTagSmallNearCopy:
blockLen = (c >> 4) + 1;
blockPos = ReadLongNBytes(pDiffFil,2);
diffPos += 2;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
break;
case m_cTagMediumNearCopy:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cSmallSize + 1;
blockPos = ReadLongNBytes(pDiffFil,2);
diffPos += 2;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
}
break;
case m_cTagLargeNearCopy:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = (blockLen << 8) | c;
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cMediumSize + 1;
blockPos = ReadLongNBytes(pDiffFil,2);
diffPos += 2;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
}
}
break;
case m_cTagSmallDistantCopy:
blockLen = (c >> 4) + 1;
blockPos = ReadLongNBytes(pDiffFil,3);
diffPos += 3;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
break;
case m_cTagMediumDistantCopy:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cSmallSize + 1;
blockPos = ReadLongNBytes(pDiffFil,3);
diffPos += 3;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
}
break;
case m_cTagLargeDistantCopy:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = (blockLen << 8) | c;
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cMediumSize + 1;
blockPos = ReadLongNBytes(pDiffFil,3);
diffPos += 3;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
}
}
break;
case m_cTagSmallFarCopy:
blockLen = (c >> 4) + 1;
blockPos = ReadLongNBytes(pDiffFil,4);
diffPos += 4;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
break;
case m_cTagMediumFarCopy:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cSmallSize + 1;
blockPos = ReadLongNBytes(pDiffFil,4);
diffPos += 4;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
}
break;
case m_cTagLargeFarCopy:
blockLen = (c >> 4);
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = (blockLen << 8) | c;
if (pDiffFil->Read(&helpByte, 1) == 1)
c = helpByte;
else
c = EOF;
diffPos++;
if (c != EOF)
{
blockLen = ((blockLen << 8) | c) + m_cMediumSize + 1;
blockPos = ReadLongNBytes(pDiffFil,4);
diffPos += 4;
pOrgFil->Seek(blockPos, CFile::begin);
CopyFileChars(blockLen,pOrgFil,pDerivedFil,derivedSum);
derivedSize += blockLen;
}
}
break;
case m_cTagEOF:
break;
default:
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(TEXT("Unknown tag in diff file - possibly created by a more recent BinDiff"));
break;
}
}
m_pProgressBar->Close();
if (bDefHeader)
delete pHeader;
} // End of dummy block
if (orgSize != checkOrgSize || orgSum != checkOrgSum)
{
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(TEXT("Original file is not the file the diff was created with."));
}
if (derivedSize != checkDerivedSize || derivedSum != checkDerivedSum)
{
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Abort(TEXT("CRC failure : New derived file is corrupt.\nPatch file could be invalid"));
}
}
void COXBinDiffCalculator::ReplaceProgressBar(COXDiffProgress* pProgressBar)
{
// ... Should already have a progress bar
ASSERT(m_pProgressBar != NULL);
// ... Not allowed to replace by an empty one
ASSERT(pProgressBar != NULL);
delete m_pProgressBar;
m_pProgressBar = pProgressBar;
}
#ifdef _DEBUG
void COXBinDiffCalculator::Dump(CDumpContext& dc) const
{
CObject::Dump(dc);
}
void COXBinDiffCalculator::AssertValid() const
{
CObject::AssertValid();
}
#endif
COXBinDiffCalculator::~COXBinDiffCalculator()
{
// ... Should always have a progress bar
ASSERT(m_pProgressBar != NULL);
delete m_pProgressBar;
if (m_LstFreeTreeNode != NULL)
DeleteTreeNode(m_LstFreeTreeNode);
DeleteMatchBlocks(m_LstFreeMatchBlock);
}
// protected:
void COXBinDiffCalculator::DeleteTreeNode(COXTreeNode* pTreeNode)
// --- In : pTreeNode : The tree node to delete
// --- Out :
// --- Returns :
// --- Effect : recursive function. First deletes its children then deletes itself
{
ASSERT(pTreeNode != NULL);
if (pTreeNode->pGE != NULL)
DeleteTreeNode(pTreeNode->pGE);
if (pTreeNode->pLT != NULL)
DeleteTreeNode(pTreeNode->pLT);
delete pTreeNode;
pTreeNode = NULL;
}
void COXBinDiffCalculator::CopyFileChars(LONG count, CFile* pInFile, CFile* pOutFile, LONG& sum)
// --- In : count : The number of bytes to copy
// pInFile : The input file
// pOutFile : The output file
// sum : The previous checksum
// --- Out : sum :The checksum after to bytes are copied
// --- Returns :
// --- Effect : Copies the specified number of bytes from the in file
// to the out file and adjusts the checksum
{
const UINT nBufferLength = 2048;
BYTE pBuffer[nBufferLength + 1];
UINT nLengthToRead;
UINT nLengthRead;
BYTE* pByte;
BYTE* pLastByte;
while (0 < count)
{
// Copy the bytes
nLengthToRead = nBufferLength < (UINT)count ? nBufferLength : (UINT)count;
nLengthRead = pInFile->Read(pBuffer, nLengthToRead);
if (nLengthRead == 0)
AfxThrowFileException(CFileException::endOfFile);
pOutFile->Write(pBuffer, nLengthRead);
count -= nLengthRead;
// Add all the bytes together (for checksum)
pByte = pBuffer;
pLastByte = &pBuffer[nLengthRead - 1];
while(pByte <= pLastByte)
sum += *(pByte++);
}
}
LONG COXBinDiffCalculator::ReadLongNBytes(CFile* pFile,int n)
// --- In : pFile : The input file
// n : The number of bytes to read
// --- Out :
// --- Returns : The number represented by those bytes
// --- Effect : It is expected that the MOST SIGNIFICANT bytes come FIRST
{
LONG x;
BYTE byte;
x = 0;
while (n > 0)
{
if (pFile->Read(&byte, 1) == 1)
x = (x << 8) | byte;
else
{
n = 0;
TRACE(_T("COXBinDiffCalculator::ReadLongNBytes : EOF Encountered while reading long\n"));
AfxThrowFileException(CFileException::endOfFile);
}
n--;
}
return x;
}
#if ! BDEXTR
void COXBinDiffCalculator::WriteLongNBytes(LONG x, CFile* pFile,int n)
// --- In : x : The long number to write
// pFile : The output file
// n : The number of bytes to write
// --- Out :
// --- Returns :
// --- Effect : It is expected that the MOST SIGNIFICANT bytes come FIRST
// Writes the specified number (x) as a number of bytes (n)
// to the output file
{
BYTE byte = 0;
while (n > 4)
{
pFile->Write(&byte, 1);
n--;
}
if (n == 4)
{
byte = BYTE((x >> 24) & 0xFF);
pFile->Write(&byte, 1);
n--;
}
if (n == 3)
{
byte = BYTE((x >> 16) & 0xFF);
pFile->Write(&byte, 1);
n--;
}
if (n == 2)
{
byte = BYTE((x >> 8) & 0xFF);
pFile->Write(&byte, 1);
n--;
}
if (n == 1)
{
byte = BYTE(x & 0xFF);
pFile->Write(&byte, 1);
}
}
void COXBinDiffCalculator::ScanFile(CFile* pFile, COXByteAttribs* byteTable)
// --- In : pFile : input file
// byteTable : table to store statistical info in
// --- Out : byteTable : table with statistical info
// --- Returns :
// --- Effect : Makes a statistical scan of the specified file
// Scans file and for each byte 0-255, calculate
// mean distance and standard deviation of the distances
// between occurences of these bytes:
//
// E.g. look at byte b in the file:
//
// ------b---------b-----b-----------b------b-----b----
// < d1 >< d2 >< d3 >< d4 >< d5 >< d6 >
// < 7 >< 10 >< 6 >< 12 >< 7 >< 6 >
// The mean and std.dev. are calculated on distances d1,d2,... for byte b.
{
int byte;
BYTE helpByte;
DWORD curPos;
DWORD dist;
COXByteAttribs* pByteAttribs;
double dDist;
// Initialize tables
pByteAttribs = byteTable;
for (byte = 0; byte < 256; byte++)
{
pByteAttribs->lastPos = -1L;
pByteAttribs->sum = 0L;
pByteAttribs->sumSquares = 0.0;
pByteAttribs->mean = 0.0;
pByteAttribs->stdDev = 0.0;
pByteAttribs->cnt = 0L;
pByteAttribs++;
}
// Scan through file
pFile->Seek(0L,CFile::begin);
curPos = 0L;
do
{
// Adjust progress bar every 4 K bytes
if ((curPos & 0xFFF) == 0)
if (!m_pProgressBar->Adjust(curPos))
m_pProgressBar->Abort(TEXT("Aborting"));
if (pFile->Read(&helpByte, 1) == 1)
byte = helpByte;
else
byte = EOF;
if (byte != EOF)
{
pByteAttribs = &byteTable[byte];
// ... Calculate distance from last occurrence of this byte
dDist = dist = curPos - pByteAttribs->lastPos;
// ... Remember this byte's position
pByteAttribs->lastPos = curPos;
pByteAttribs->sum += dist;
pByteAttribs->sumSquares += dDist * dDist;
// ...cnt contains the number of occurrences
pByteAttribs->cnt++;
}
curPos++;
} while (byte != EOF);
// Calculate mean and standard deviation for all bytes.
pByteAttribs = byteTable;
for (byte = 0; byte < 256; byte++)
{
// Make byte 'occur' just after EOF
dDist = dist = curPos - pByteAttribs->lastPos;
pByteAttribs->sum += dist;
pByteAttribs->sumSquares += dDist*dDist;
pByteAttribs->cnt++;
// Calculate mean. Bytes that did not occur get mean equal to FILE size
pByteAttribs->mean =
(double) pByteAttribs->sum / (double) pByteAttribs->cnt;
// Calculate standard deviation. We could also use the variance
// but I like the std. dev. more.
pByteAttribs->stdDev =
sqrt(pByteAttribs->sumSquares / (double) pByteAttribs->cnt
- pByteAttribs->mean*pByteAttribs->mean);
pByteAttribs++;
}
}
int COXBinDiffCalculator::FindDelimiter(CFile* pFile, double minMeanChunkLen, double maxMeanChunkLen)
// --- In : pFile : input file
// byteTable
// --- Out :
// --- Returns :
// --- Effect : Analyze open file and determine a suitable delimiter
// for chopping the file into chunks.
// This routine changes the current file position.
{
int byte;
int bestByte;
COXByteAttribs byteTable[256];
LONG filSiz;
filSiz = pFile->GetLength();
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Init(0L,filSiz,TEXT("Pass 1 of 3:"));
ScanFile(pFile,byteTable);
// Determine best byte
bestByte = -1;
while (bestByte == -1
&& maxMeanChunkLen < m_cBigChunkLen
&& maxMeanChunkLen < filSiz)
{
COXByteAttribs* pByteAttribs;
COXByteAttribs* pBestByteAttribs;
pByteAttribs = byteTable;
pBestByteAttribs = NULL;
for (byte = 0; byte < 256; byte++)
{
#if m_cDropEOL
if (byte != '\015' && byte != '\012')
#endif
{
// Check if chunk length is between minMeanLen and maxMeanLen.
if (pByteAttribs->mean >= minMeanChunkLen &&
pByteAttribs->mean <= maxMeanChunkLen)
{
if (bestByte == -1)
{
bestByte = byte;
pBestByteAttribs = pByteAttribs;
}
else
{
// Compare stddev: if it is lower,
// the byte is better
if (pBestByteAttribs->stdDev > pByteAttribs->stdDev)
{
bestByte = byte;
pBestByteAttribs = pByteAttribs;
}
}
}
}
pByteAttribs++;
}
// Increase allowable chunk length for the case no acceptable delimiter was
// found: we will loop then
maxMeanChunkLen += 50;
}
m_pProgressBar->Close();
return(bestByte);
}
COXBinDiffCalculator::COXTreeNode* COXBinDiffCalculator::NewTreeNode()
// --- In :
// --- Out :
// --- Returns : The new tree node
// --- Effect : Create a new tree node or reuse one of the free list
{
COXTreeNode* pTreeNode;
if (m_LstFreeTreeNode == NULL)
{
pTreeNode = new COXTreeNode();
}
else
{
pTreeNode = m_LstFreeTreeNode;
m_LstFreeTreeNode = m_LstFreeTreeNode->pGE;
}
pTreeNode->filPos = 0L;
pTreeNode->pGE = NULL;
pTreeNode->pLT = NULL;
return(pTreeNode);
}
void COXBinDiffCalculator::FreeTreeNode(COXTreeNode* pTreeNode)
// --- In : pTreeNode : The node to free
// --- Out :
// --- Returns :
// --- Effect : Releases a tree node by putting on the free list
{
pTreeNode->pGE = m_LstFreeTreeNode;
m_LstFreeTreeNode = pTreeNode;
}
int COXBinDiffCalculator::CmpNode(COXTreeNode* pNode1, CFile* pFil1, COXTreeNode* pNode2,
CFile* pFil2, int delim, LONG* pEqualLen)
// --- In : pNode1 :
// pFil1 :
// pNode2 :
// pFil2 :
// delim : The chopping character
// pEqualLen : Number of equal characters encountered.
// --- Out :
// --- Returns : -1, 0, 1 if <, = , >.
// --- Effect : Compare two tree nodes
// Every tree node has m_cMinMatchLen characters of
// the file buffered within the node: first compare these
// If all these characters are equal, read characters from
// the associated files and compare these.
// If the nodes are different, set equalLen to the
// number of equal characters encountered.
{
LONG pos1;
BYTE buf1[m_cBufSiz];
BYTE* p1;
LONG pos2;
BYTE buf2[m_cBufSiz];
BYTE* p2;
int result;
LONG l;
pos1 = pNode1->filPos;
pos2 = pNode2->filPos;
// ... -2 means: not yet defined
result = -2;
*pEqualLen = 0;
// First compare the m_cMinMatchLen buffered bytes from both nodes
p1 = pNode1->bytes;
p2 = pNode2->bytes;
l = 0;
while (l < m_cMinMatchLen && *p1 == *p2 && *p1 != delim /* && *p2 != delim */)
{
p1++;
p2++;
l++;
(*pEqualLen)++;
}
if (l == m_cMinMatchLen)
{
// If no difference was found, we will have to compare both files from
// m_cMinMatchLen bytes after pos1 and pos2.
pos1 += m_cMinMatchLen;
pos2 += m_cMinMatchLen;
}
else if (*p1 == delim && *p2 != delim)
{
result = -1; // node 1 < node 2 because node 1 is shorter
}
else if (*p2 == delim && *p1 != delim)
{
result = 1; // node 2 < node 1 because node 2 is shorter
}
else if (*p1 == *p2 && *p1 == delim)
{
result = 0; // node 1 == node 2: both end in a delimiter at the same time
}
else if (*p1 < *p2)
{
result = -1; // node 1 < node 2 because a different character found
}
else if (*p2 < *p1)
{
result = 1; // node 2 < node 1 because a different character found
}
// If result is -2, no difference was found in the buffered bytes. Start
// reading bytes from the files in chuncks of m_cBufSiz bytes.
if (result == -2)
{
do
{
// Read a buffer from file 1. Pad file with delimiter after eof.
pFil1->Seek(pos1,CFile::begin);
l = pFil1->Read(buf1,(UINT)m_cBufSiz);
if (l < m_cBufSiz)
buf1[l] = BYTE(delim);
// Read a buffer from file 2. Pad file with delimiter after eof.
pFil2->Seek(pos2,CFile::begin);
l = pFil2->Read(buf2,(UINT)m_cBufSiz);
if (l < m_cBufSiz)
buf2[l] = BYTE(delim);
// Compare buffers
p1 = buf1;
p2 = buf2;
l = 0;
while (l < m_cBufSiz && *p1 == *p2 && *p1 != delim /* && *p2 != delim */)
{
p1++;
p2++;
l++;
(*pEqualLen)++;
}
// If no difference was found: set file positions to read new buffers
if (l == m_cBufSiz)
{
pos1 += m_cBufSiz;
pos2 += m_cBufSiz;
}
else if (*p1 == delim && *p2 != delim)
{
result = -1; /* node 1 < node 2 */
}
else if (*p2 == delim && *p1 != delim)
{
result = 1; /* node 2 < node 1 */
}
else if (*p1 == *p2 && *p1 == delim)
{
result = 0; /* node 1 == node 2 */
}
else if (*p1 < *p2)
{
result = -1;
}
else if (*p2 < *p1)
{
result = 1;
}
} while (result == -2);
}
return(result);
}
void COXBinDiffCalculator::FindBestMatch(COXTreeNode* pOrgTreeRoot,
COXTreeNode** ppOrgTreeNode, CFile* pOrgFil, COXTreeNode* pDerivedTreeNode,
CFile* pDerivedFil, int delim, LONG* pMatchLen)
// --- In : pOrgTreeRoot :
// ppOrgTreeNode :
// pOrgFil :
// pDerivedTreeNode :
// pDerivedFil :
// delim :
// pMatchLen :
// --- Out :
// --- Returns :
// --- Effect : From a tree with root node pOrgTreeRoot, find the node
// pOrgTreeNode that best matches pDerivedTreeNode.
// Also find length of match.
{
int direction;
COXTreeNode* pNode;
LONG equalLen;
// Find best location from tree
*ppOrgTreeNode = NULL;
pNode = pOrgTreeRoot;
*pMatchLen = 0L;
// Descend tree and remember node with longest match
while (pNode != NULL)
{
direction = CmpNode(pDerivedTreeNode,pDerivedFil,pNode,pOrgFil,delim,&equalLen);
// Remember match if length is greater than previous best
if (equalLen > *pMatchLen)
{
*pMatchLen = equalLen;
*ppOrgTreeNode = pNode;
}
if (direction == -1)
{
pNode = pNode->pLT;
}
else if (direction == 1)
{
pNode = pNode->pGE;
}
else /* Node is equal: stop search */
{
pNode = NULL;
}
}
}
void COXBinDiffCalculator::AddTreeNode(COXTreeNode** ppTreeRoot, CFile* pFile,
int delim, COXTreeNode* pNewNode)
// --- In : ppTreeRoot : Root of the tree
// pFile :
// delim :
// pNewNode : The new node to add
// --- Out :
// --- Returns :
// --- Effect : Add new node to index tree and linked list.
{
COXTreeNode* pNode;
COXTreeNode* pPrvNode;
int cmp=0;
LONG equalLen;
// Find location in tree
pNode = *ppTreeRoot;
pPrvNode = NULL;
while (pNode != NULL)
{
pPrvNode = pNode;
cmp = CmpNode(pNewNode,pFile,pNode,pFile,delim,&equalLen);
if (cmp == -1)
{
pNode = pNode->pLT;
}
else if (cmp == 1)
{
pNode = pNode->pGE;
}
else // There is an equal node: stop looking
{
pNode = NULL;
}
}
if (pPrvNode == NULL)
{
*ppTreeRoot = pNewNode;
}
else
{
if (cmp == -1)
{
pPrvNode->pLT = pNewNode;
}
else if (cmp == 1)
{
pPrvNode->pGE = pNewNode;
}
else
{
FreeTreeNode(pNewNode);
}
}
}
COXBinDiffCalculator::COXTreeNode* COXBinDiffCalculator::BuildTree(CFile* pFile, int delim, LONG& OrgSum)
// --- In : pFile :
// delim :
// OrgSum
// --- Out : OrgSum
// --- Returns :
// --- Effect : Build index tree: divide file in chunks by using the delimiter.
// We also create chunks of strings that contain m_cMinMatchLen
// or more equal bytes.
{
COXTreeNode* pTreeRoot;
COXTreeNode* pNewNode;
COXTreeNode* pEqualRunNode;
int byte;
BYTE helpByte;
int prevByte;
LONG equalByteCnt;
LONG curPos;
LONG prevPos;
LONG len;
BYTE* pByte;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Init(0L,pFile->GetLength(),TEXT("Pass 2 of 3:"));
pTreeRoot = NULL;
curPos = 0;
prevPos = 0;
do
{
if (curPos - prevPos > 0x3FF)
{
if (!m_pProgressBar->Adjust(curPos))
m_pProgressBar->Abort(TEXT("Aborting"));
prevPos = curPos;
}
// Restore file position (because it is destroyed by CmpNode)
pFile->Seek(curPos,CFile::begin);
// Prepare new node
pNewNode = NewTreeNode();
len = 0;
pNewNode->filPos = curPos;
pByte = pNewNode->bytes;
byte = -1;
equalByteCnt = 0;
do
{
prevByte = byte;
if (pFile->Read(&helpByte, 1) == 1)
{
byte = helpByte;
if (byte == prevByte)
{
equalByteCnt++;
}
else
{
// No need to check for m_cMinEqualRunLen or more equal bytes here;
// if they were here, they will be added to the tree anyhow. This
// loop only executes at most m_cMinMatchLen times.
equalByteCnt = 1;
}
OrgSum += byte;
*(pByte++) = BYTE(byte);
}
else
{
byte = EOF;
*(pByte++) = BYTE(delim);
}
len++;
} while (len < m_cMinMatchLen && byte != delim && byte != EOF);
while (byte != delim && byte != EOF)
{
prevByte = byte;
if (pFile->Read(&helpByte, 1) == 1)
{
byte = helpByte;
if (byte == prevByte)
{
equalByteCnt++;
}
else
{
// Check for LONG runs of equal bytes, and add these to
// the tree also.
if (equalByteCnt >= m_cMinEqualRunLen)
{
pEqualRunNode = NewTreeNode();
// Buffered characters consists of m_cMinMatchLen equal bytes
memset(pEqualRunNode->bytes,prevByte,m_cMinMatchLen);
// Calculate position of start of run
pEqualRunNode->filPos = curPos + len - equalByteCnt;
AddTreeNode(&pTreeRoot,pFile,delim,pEqualRunNode);
pFile->Seek(curPos+len+1,CFile::begin); // Restore file position
}
equalByteCnt = 1;
}
OrgSum += byte;
}
else
byte = EOF;
len++;
}
AddTreeNode(&pTreeRoot,pFile,delim,pNewNode);
curPos += len;
} while (byte != EOF);
m_pProgressBar->Close();
return(pTreeRoot);
}
void COXBinDiffCalculator::ExtendMatch(LONG& OrgFilPos, CFile* pOrgFil,
LONG& DerivedFilPos, CFile* pDerivedFil, LONG& MatchLen)
// --- In : OrgFilPos :
// pOrgFil :
// DerivedFilPos :
// pDerivedFil :
// MatchLen :
// --- Out : OrgFilPos :
// DerivedFilPos :
// MatchLen :
// --- Returns :
// --- Effect : Extend match in two directions, ignoring delimiters:
// if there is a match of length n at position p and p',
// it can be changed into a match of length n+m+q
// at position p - m and p' - m
//
// < m >< n >< q >
// p
// ...aaazzzzzzzzxxxxxxxxxxxyyyyyyyccccccccccccccccc (pOrgFil)
// ...bbbzzzzzzzzxxxxxxxxxxxyyyyyyyddddddddddddddddd (pDerivedFil)
// p'
{
BYTE buf1[m_cBufSiz];
BYTE buf2[m_cBufSiz];
int l;
LONG step;
BOOL isDone;
BYTE* p1;
BYTE* p2;
LONG endPos1, endPos2;
// First, try to read forward and extend match toward the end
isDone = FALSE;
endPos1 = OrgFilPos + MatchLen;
endPos2 = DerivedFilPos + MatchLen;
while (! isDone)
{
step = m_cBufSiz;
pOrgFil->Seek(endPos1,CFile::begin);
// ... Eventually shrink step if pOrgFil is too short
step = (UINT)pOrgFil->Read(buf1,(UINT)step);
pDerivedFil->Seek(endPos2,CFile::begin);
// ... Eventually shrink step if derivedFil is too short
step = (UINT)pDerivedFil->Read(buf2,(UINT)step);
// ...step < m_cBufSiz if one of the files at eof: stop comparing
isDone = (step < m_cBufSiz);
// ... Prepare endPos1 and endPos2 for reading next buffer
endPos1 += step;
endPos2 += step;
p1 = buf1;
p2 = buf2;
while (*p1 == *p2 && step > 0)
{
p1++;
p2++;
step--;
MatchLen++;
}
// ... step > 0 if *p1 != *p2 found: stop comparing
isDone = isDone || step > 0;
}
// Second, try to read backwards and extend the match towards the file start.
// Stop extending if orgFilPos or derivedFilPos equal 0.
isDone = FALSE;
while (OrgFilPos > 0 && DerivedFilPos > 0 && ! isDone)
{
// Try to step m_cBufSiz bytes back. If orgFilPos or derivedFilPos is
// less than that, reduce the step size accordingly.
step = m_cBufSiz;
if (OrgFilPos < step)
step = OrgFilPos;
if (DerivedFilPos < step)
step = DerivedFilPos;
// ...Jump back 'step' bytes and read two buffers of 'step' bytes.
(OrgFilPos) -= step;
pOrgFil->Seek(OrgFilPos,CFile::begin);
pOrgFil->Read(buf1,(UINT)step);
DerivedFilPos -= step;
pDerivedFil->Seek(DerivedFilPos,CFile::begin);
pDerivedFil->Read(buf2,(UINT)step);
// ... Put pointers at the end of the buffers
p1 = buf1 + step - 1;
p2 = buf2 + step - 1;
// ...Run backwards until a difference found or at start of buffer
l = (int)step;
while (*p1 == *p2 && l > 0)
{
p1--;
p2--;
l--;
// Adjust matchLen for each matched byte.
MatchLen++;
}
isDone = (l > 0);
// Adjust orgFilPos and derivedFilPos: add length of unequal part of buffer
OrgFilPos += l;
DerivedFilPos += l;
}
}
void COXBinDiffCalculator::DeleteMatchBlocks(COXMatchBlock* pBlock)
{
COXMatchBlock* pMatchBlock = pBlock;
COXMatchBlock* pMatchBlockNext;
while(pMatchBlock != NULL)
{
pMatchBlockNext = pMatchBlock->pNxt;
delete pMatchBlock;
pMatchBlock = pMatchBlockNext;
}
}
COXBinDiffCalculator::COXMatchBlock* COXBinDiffCalculator::NewMatchBlock(void)
// --- In :
// --- Out :
// --- Returns : The new match block
// --- Effect : Create a new match block or reuse one of the free list
{
COXMatchBlock* pMatchBlock;
if (m_LstFreeMatchBlock == NULL)
{
pMatchBlock = new COXMatchBlock();
}
else
{
pMatchBlock = m_LstFreeMatchBlock;
m_LstFreeMatchBlock = m_LstFreeMatchBlock->pNxt;
}
pMatchBlock->orgFilPos = 0L;
pMatchBlock->len = 0L;
pMatchBlock->derivedFilPos = 0L;
pMatchBlock->pNxt = NULL;
return(pMatchBlock);
}
void COXBinDiffCalculator::FreeMatchBlock(COXBinDiffCalculator::COXMatchBlock* pMatchBlock)
// --- In : pMatchBlock : Block to release
// --- Out :
// --- Returns : The new match block
// --- Effect : Release a match block: put on the free list
{
pMatchBlock->pNxt = m_LstFreeMatchBlock;
m_LstFreeMatchBlock = pMatchBlock;
}
void COXBinDiffCalculator::AddMatch(COXBinDiffCalculator::COXTreeNode* pOrgTreeNode,
CFile* pOrgFil, COXBinDiffCalculator::COXTreeNode* pDerivedTreeNode,
CFile* pDerivedFil, LONG matchLen, COXBinDiffCalculator::COXMatchBlock** ppMatchLst)
// --- In : pOrgTreeNode :
// pOrgFil :
// pDerivedTreeNode :
// pDerivedFil :
// matchLen :
// ppMatchLst :
// --- Out :
// --- Returns :
// --- Effect : Add new match between pOrgTreeNode and pDerivedTreeNode,
// with length matchlen. If the match is LONG enough
// (after extending): add it to the list of matches.
// The list of matches is kept in order of increasing
// starting position in derivedFil.
// If a new match is added that is completely part of
// an existing match, it is dropped.
// If a new match is added that encloses one or more existing
// matches, the enclosed matches are dropped, and the new one
// is added.
// The resulting list will only contain matches with different
// derivedFilPos values: if there would be two equal derivedFilPos
// values, one of the two blocks would enclose the other and
// would have been dropped.
{
LONG orgFilPos, derivedFilPos;
LONG distance;
COXMatchBlock* pMatchBlock;
COXMatchBlock* pPrvMatchBlock;
COXMatchBlock* pNxtMatchBlock;
BOOL dropNewMatch;
orgFilPos = pOrgTreeNode->filPos;
derivedFilPos = pDerivedTreeNode->filPos;
// Pass 1: check if there are matchblocks that enclose the new match
// (relative to derivedFil), in a way that they are an expansion of this
// new match block. This saves us expanding this block, because after
// expansion, it would be the same as the overlapping block.
// This is done by checking the distance between the positions for
// orgFil and derivedFil: if the larger block and the smaller block have
// the same distance, and the smaller block is part of the larger
// one, the smaller will expand to be equal to the larger one.
distance = orgFilPos - derivedFilPos;
pMatchBlock = *ppMatchLst;
dropNewMatch = FALSE;
while (pMatchBlock != NULL
&& pMatchBlock->derivedFilPos <= derivedFilPos
&& ! dropNewMatch)
{
dropNewMatch =
(
// ...pMatchBlock->derivedFilPos <= derivedFilPos
// && Already tested in while condition
derivedFilPos <= pMatchBlock->derivedFilPos + pMatchBlock->len
&&
pMatchBlock->distance == distance
);
pMatchBlock = pMatchBlock->pNxt;
}
if (! dropNewMatch)
{
ExtendMatch(orgFilPos,pOrgFil,derivedFilPos,pDerivedFil,matchLen);
if (matchLen >= m_cMinMatchLen)
{
// Pass 2: check if there are matchblocks that enclose the new match
// (relative to derivedFil).
pMatchBlock = *ppMatchLst;
while (pMatchBlock != NULL
&& pMatchBlock->derivedFilPos <= derivedFilPos
&& ! dropNewMatch)
{
dropNewMatch =
(
// pMatchBlock->derivedFilPos <= derivedFilPos
// && Already tested in while condition */
derivedFilPos+matchLen <=
pMatchBlock->derivedFilPos + pMatchBlock->len
);
pMatchBlock = pMatchBlock->pNxt;
}
if (! dropNewMatch)
{
// Pass 3: drop all matchblocks from list that are enclosed by the new one
pMatchBlock = *ppMatchLst;
pPrvMatchBlock = NULL;
while (pMatchBlock != NULL)
{
pNxtMatchBlock = pMatchBlock->pNxt;
//... Check if pMatchBlock completely enclosed by new match
if
(
derivedFilPos <= pMatchBlock->derivedFilPos
&&
pMatchBlock->derivedFilPos + pMatchBlock->len <=
derivedFilPos+matchLen
)
{
// ...If completely enclosed: remove from list
if (pPrvMatchBlock == NULL)
{
*ppMatchLst = pNxtMatchBlock;
}
else
{
pPrvMatchBlock->pNxt = pNxtMatchBlock;
}
FreeMatchBlock(pMatchBlock);
}
else
{
pPrvMatchBlock = pMatchBlock;
}
pMatchBlock = pNxtMatchBlock;
}
// Pass 4: Find location to add match to list; keep list sorted on
// derivedFilPos.
pNxtMatchBlock = *ppMatchLst;
pPrvMatchBlock = NULL;
while (pNxtMatchBlock != NULL
&& pNxtMatchBlock->derivedFilPos < derivedFilPos)
{
pPrvMatchBlock = pNxtMatchBlock;
pNxtMatchBlock = pNxtMatchBlock->pNxt;
}
// Add new match
pMatchBlock = NewMatchBlock();
pMatchBlock->orgFilPos = orgFilPos;
pMatchBlock->derivedFilPos = derivedFilPos;
pMatchBlock->distance = distance;
pMatchBlock->len = matchLen;
pMatchBlock->pNxt = pNxtMatchBlock;
if (pPrvMatchBlock == NULL)
{
*ppMatchLst = pMatchBlock;
}
else
{
pPrvMatchBlock->pNxt = pMatchBlock;
}
}
}
}
}
void COXBinDiffCalculator::ShrinkMatchList(COXBinDiffCalculator::COXMatchBlock** ppMatchLst)
// --- In : ppMatchLst
// --- Out :
// --- Returns :
// --- Effect : Clean up the list of matches: shrink overlapping matches
// and drop those that become too short
{
COXMatchBlock* pMatchBlock;
COXMatchBlock* pPrvMatchBlock;
COXMatchBlock* pNxtMatchBlock;
LONG distance;
pPrvMatchBlock = NULL;
pMatchBlock = *ppMatchLst;
while (pMatchBlock != NULL)
{
pNxtMatchBlock = pMatchBlock->pNxt;
if (pNxtMatchBlock != NULL)
{
// ... distance is maximal length of pMatchBlock without overlap
distance = pNxtMatchBlock->derivedFilPos - pMatchBlock->derivedFilPos;
// ... Shrink block if too long
if (distance < pMatchBlock->len)
{
pMatchBlock->len = distance;
}
}
// Drop blocks that become too short.
if (pMatchBlock->len < m_cMinMatchLen)
{
if (pPrvMatchBlock == NULL)
{
*ppMatchLst = pNxtMatchBlock;
}
else
{
pPrvMatchBlock->pNxt = pNxtMatchBlock;
}
FreeMatchBlock(pMatchBlock);
}
else
{
pPrvMatchBlock = pMatchBlock;
}
pMatchBlock = pNxtMatchBlock;
}
}
COXBinDiffCalculator::COXMatchBlock* COXBinDiffCalculator::MatchFiles(
COXBinDiffCalculator::COXTreeNode* pOrgTreeRoot,CFile* pOrgFil,
CFile* pDerivedFil, int delim, LONG& DerivedSum)
// --- In : pOrgTreeRoot :
// pOrgFil :
// pDerivedFil :
// delim :
// DerivedSum
// --- Out : DerivedSum
// --- Returns : The matched block
// --- Effect : Compare chunks from file 2 with tree from file 1
{
COXTreeNode* pOrgTreeNode;
LONG matchLen;
COXMatchBlock* pMatchLst;
int byte;
BYTE helpByte;
int prevByte;
LONG curPos;
LONG prevPos;
COXTreeNode treeNode;
COXTreeNode equalRunNode;
LONG len;
LONG equalByteCnt;
BYTE* pByte;
ASSERT(m_pProgressBar != NULL);
m_pProgressBar->Init(0L,pDerivedFil->GetLength(),TEXT("Pass 3 of 3:"));
pMatchLst = NULL;
curPos = 0;
prevPos = 0;
do
{
if (curPos - prevPos > 0x3FF)
{
if (!m_pProgressBar->Adjust(curPos))
m_pProgressBar->Abort(TEXT("Aborting"));
prevPos = curPos;
}
// Restore FILE position (destroyed by FindBestMatch below)
pDerivedFil->Seek(curPos,CFile::begin);
// Buffer some characters from the file.
len = 0;
treeNode.filPos = curPos;
pByte = treeNode.bytes;
byte = -1;
equalByteCnt = 0;
do
{
prevByte = byte;
if (pDerivedFil->Read(&helpByte, 1) == 1)
{
byte = helpByte;
if (byte == prevByte)
{
equalByteCnt++;
}
else
{
// No need to check for m_cMinMatchLen or more equal bytes here;
// if they were here, they will be checked against the tree
// anyhow. This loop only executes at most m_cMinMatchLen times.
equalByteCnt = 1;
}
DerivedSum += byte;
*(pByte++) = BYTE(byte);
}
else
{
byte = EOF;
*(pByte++) = BYTE(delim);
}
len++;
} while (len < m_cMinMatchLen && byte != delim && byte != EOF);
while (byte != delim && byte != EOF)
{
prevByte = byte;
if (pDerivedFil->Read(&helpByte, 1) == 1)
{
byte = helpByte;
if (byte == prevByte)
{
equalByteCnt++;
}
else
{
// Check for LONG runs of equal bytes, and check these against
// the tree also.
if (equalByteCnt >= m_cMinEqualRunLen)
{
// ... Buffered characters consists of m_cMinMatchLen equal bytes
memset(equalRunNode.bytes,prevByte,m_cMinMatchLen);
// ...Calculate position of start of run
equalRunNode.filPos = curPos + len - equalByteCnt;
// Search best match in original tree
FindBestMatch(pOrgTreeRoot,&pOrgTreeNode,pOrgFil,
&equalRunNode,pDerivedFil,
delim,&matchLen);
if (pOrgTreeNode != NULL)
{
// Add match to list of matches
AddMatch(pOrgTreeNode,pOrgFil,
&equalRunNode,pDerivedFil,
matchLen,&pMatchLst);
}
pDerivedFil->Seek(curPos+len+1, CFile::begin); /* Restore file position */
}
// Reset equal byte count to 1.
equalByteCnt = 1;
}
DerivedSum += byte;
}
else
byte = EOF;
len++;
}
curPos += len;
// Search best match in original tree
FindBestMatch(pOrgTreeRoot,&pOrgTreeNode,pOrgFil,
&treeNode,pDerivedFil,
delim,&matchLen);
if (pOrgTreeNode != NULL)
{
// Add match to list of matches
AddMatch(pOrgTreeNode,pOrgFil,
&treeNode,pDerivedFil,
matchLen,&pMatchLst);
}
} while (byte != EOF);
// Remove overlapping matches
ShrinkMatchList(&pMatchLst);
m_pProgressBar->Close();
return(pMatchLst);
}
void COXBinDiffCalculator::DumpDiff(COXBinDiffCalculator::COXMatchBlock* pMatchLst, CFile* pDerivedFil, CFile* pDiffFil)
// --- In : pMatchLst :
// pDerivedFil :
// pDiffFil :
// --- Out :
// --- Returns :
// --- Effect : Write diff file
{
COXMatchBlock* pMatchBlock;
LONG len, pos;
LONG blockPos;
LONG writeLen;
LONG blockLen;
LONG codeLen;
LONG filSiz;
LONG nextPos;
BYTE byte;
filSiz = pDerivedFil->GetLength();
// Descend match block list. Resulting FILE is a series of matches and
// non-matches between original FILE and derived FILE.
pMatchBlock = pMatchLst;
len = filSiz;
pos = 0;
// Repeat until all bytes of derivedFil have been checked
while (len > 0)
{
// Get next matching position from match block. If there is none,
// set nextPos beyond eof on derived file.
// If there are unmatched bytes between the last position written
// and the next position, write them to the diff file.
// Possibly there are no unmatched bytes (nextPos == pos), in case
// of consecutive match blocks.
nextPos = pMatchBlock != NULL ? pMatchBlock->derivedFilPos : filSiz;
if (nextPos > pos)
{
// There are unmatched bytes: write one or more block of unmatched bytes
// from derivedFil
pDerivedFil->Seek(pos,CFile::begin);
writeLen = nextPos - pos;
while (writeLen > 0)
{
// If remaining block is small, use tag for small
// blocks (1 tag/length byte)
if (writeLen <= m_cSmallSize)
{
blockLen = writeLen;
codeLen = blockLen - 1; /* Encode length 1-m_cSmallSize by subtr. 1 */
// ... codeLen: 4 bit value
// ... bit 3-0 | Tag
byte = BYTE((codeLen << 4) | m_cTagSmallDiff);
pDiffFil->Write(&byte, 1);
}
// If remaining block is medium size use tag for
// medium blocks (2 tag/length bytes)
else if (writeLen <= m_cMediumSize)
{
blockLen = writeLen;
// ... Encode length m_cSmallSize+1 - m_cMediumSize by subtr. m_cSmallSize+1
codeLen = blockLen - m_cSmallSize - 1;
// ... codeLen: 12 bit value
// ... bit 11-8 | Tag
byte = BYTE(((codeLen >> 4) & 0xF0) | m_cTagMediumDiff);
pDiffFil->Write(&byte, 1);
// ... bit 7-0
byte = BYTE((codeLen ) & 0xFF);
pDiffFil->Write(&byte, 1);
}
else
{
// If remaining block is large: write a large block,
// and then re-check the remaining length
if (writeLen > m_cLargeSize)
{
blockLen = m_cLargeSize;
}
else
{
blockLen = writeLen;
}
// ... Encode length m_cMediumSize+1 - m_cLargeSize by subtracting m_cMediumSize+1
codeLen = blockLen - m_cMediumSize - 1;
// ... codeLen: 20 bit value
// ... bit 19-16 | Tag
byte = BYTE(((codeLen >> 12) & 0xF0) | m_cTagLargeDiff);
pDiffFil->Write(&byte, 1);
// ... bit 15-8
byte = BYTE((codeLen >> 8) & 0xFF);
pDiffFil->Write(&byte, 1);
// ... bit 7-0
byte = BYTE((codeLen ) & 0xFF);
pDiffFil->Write(&byte, 1);
}
writeLen -= blockLen;
len -= blockLen;
while (blockLen > 0)
{
pDerivedFil->Read(&byte, 1);
pDiffFil->Write(&byte, 1);
blockLen--;
}
}
pos = nextPos;
}
// Write block of matching bytes: encode them as a count and a file position
// in the original file.
if (pMatchBlock != NULL)
{
blockPos = pMatchBlock->orgFilPos;
writeLen = pMatchBlock->len;
while (writeLen > 0)
{
if (writeLen <= m_cSmallSize)
{
blockLen = writeLen;
codeLen = blockLen - 1;
if (blockPos <= m_cNearDistance)
{
// ... codeLen: 4 bit value
// ... bit 3-0 | Tag
byte = BYTE((codeLen << 4) | m_cTagSmallNearCopy);
pDiffFil->Write(&byte, 1);
// ... blockPos: 16 bit value
WriteLongNBytes(blockPos,pDiffFil,2);
}
else if (blockPos <= m_cDistantDistance)
{
// ... codeLen: 4 bit value
// ... bit 3-0 | Tag
byte = BYTE((codeLen << 4) | m_cTagSmallDistantCopy);
pDiffFil->Write(&byte, 1);
// ... blockPos: 24 bit value
WriteLongNBytes(blockPos,pDiffFil,3);
}
else
{
// ... codeLen: 4 bit value
// ... bit 3-0 | Tag
byte = BYTE((codeLen << 4) | m_cTagSmallFarCopy);
pDiffFil->Write(&byte, 1);
// ... blockPos: 32 bit value
WriteLongNBytes(blockPos,pDiffFil,4);
}
}
else if (writeLen <= m_cMediumSize)
{
blockLen = writeLen;
codeLen = blockLen - m_cSmallSize - 1;
if (blockPos <= m_cNearDistance)
{
// ... codeLen: 12 bit value
// ... bit 11-8 | Tag
byte = BYTE(((codeLen >> 4) & 0xF0) | m_cTagMediumNearCopy);
pDiffFil->Write(&byte, 1);
// ... bit 7-0
byte = BYTE((codeLen) & 0xFF);
pDiffFil->Write(&byte, 1);
// ... blockPos: 16 bit value
WriteLongNBytes(blockPos,pDiffFil,2);
}
else if (blockPos <= m_cDistantDistance)
{
// ... codeLen: 12 bit value
// ... bit 11-8 | Tag
byte = BYTE(((codeLen >> 4) & 0xF0) |m_cTagMediumDistantCopy);
pDiffFil->Write(&byte, 1);
// ... bit 7-0
byte = BYTE((codeLen) & 0xFF);
pDiffFil->Write(&byte, 1);
// ... blockPos: 24 bit value
WriteLongNBytes(blockPos,pDiffFil,3);
}
else
{
// ... codeLen: 12 bit value
// ... bit 11-8 | Tag
byte = BYTE(((codeLen >> 4) & 0xF0) | m_cTagMediumFarCopy);
pDiffFil->Write(&byte, 1);
// ... bit 7-0
byte = BYTE((codeLen) & 0xFF);
pDiffFil->Write(&byte, 1);
// ... blockPos: 32 bit value
WriteLongNBytes(blockPos,pDiffFil,4);
}
}
else
{
if (writeLen > m_cLargeSize)
{
blockLen = m_cLargeSize;
}
else
{
blockLen = writeLen;
}
codeLen = blockLen - m_cMediumSize - 1;
if (blockPos <= m_cNearDistance)
{
/* codeLen: 20 bit value */
/* bit 19-16 | Tag */
byte = BYTE(((codeLen >> 12) & 0xF0) | m_cTagLargeNearCopy);
pDiffFil->Write(&byte, 1);
/* bit 15-8 */
byte = BYTE((codeLen >> 8) & 0xFF);
pDiffFil->Write(&byte, 1);
/* bit 7-0 */
byte = BYTE((codeLen) & 0xFF);
pDiffFil->Write(&byte, 1);
/* blockPos: 16 bit value */
WriteLongNBytes(blockPos,pDiffFil,2);
}
else if (blockPos <= m_cDistantDistance)
{
/* codeLen: 20 bit value */
/* bit 19-16 | Tag */
byte = BYTE(((codeLen >> 12) & 0xF0) | m_cTagLargeDistantCopy);
pDiffFil->Write(&byte, 1);
/* bit 15-8 */
byte = BYTE((codeLen >> 8) & 0xFF);
pDiffFil->Write(&byte, 1);
/* bit 7-0 */
byte = BYTE((codeLen) & 0xFF);
pDiffFil->Write(&byte, 1);
/* blockPos: 24 bit value */
WriteLongNBytes(blockPos,pDiffFil,3);
}
else
{
/* codeLen: 20 bit value */
/* bit 19-16 | Tag */
byte = BYTE(((codeLen >> 12) & 0xF0) | m_cTagLargeFarCopy);
pDiffFil->Write(&byte, 1);
/* bit 15-8 */
byte = BYTE((codeLen >> 8) & 0xFF);
pDiffFil->Write(&byte, 1);
/* bit 7-0 */
byte = BYTE((codeLen) & 0xFF);
pDiffFil->Write(&byte, 1);
/* blockPos: 32 bit value */
WriteLongNBytes(blockPos,pDiffFil,4);
}
}
writeLen -= blockLen;
// ... A bug that was present in the previous version (if writeLen > m_cLargeSize)
// is corrected by the following line
blockPos += blockLen;
len -= blockLen;
pos += blockLen;
}
pMatchBlock = pMatchBlock->pNxt;
}
}
pDiffFil->Write(&m_cTagEOF, 1);
}
#endif // ! BDEXTR
// private:
// ==========================================================================