/*
The primary core of the diff engine has been taken from Michael Potter's
A Generic, Reusable Diff Algorithm in C# - II
http://www.codeproject.com/csharp/DiffEngine.asp
Article content copyright Michael Potter, 2004
Specifically, the BinaryFile.cs, CharData.cs, Engine.cs, Structures.cs
and TextFile.cs files in the OpenCollective.Diff project are copyrighted by
Michael Potter used under the permission granted in a post by Michael
on the above noted article dicussion in which he states:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Re: Great Stuff! - Copywrite? Michael Potter 16:51 26 May '04
Thanks,
Copywrite - let me check my files. Ahh... yes... here it is... If you
use this code and make a million dollars you are required to come to
St. Louis and buy me a beer.
Seriously, use it any way you want. Let me know if you find any
improvements (I am sure there are many). If you are exposing the source
in another project it would be nice to drop in a comment that points
back here. Not necessary, but nice.
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Otherwise the following notice applies:
OpenCollective - http://www.netbrick.net/ -- Version 0.73
Copyright (c) 2004
by Tyler Jensen ( tylerj@netbrick.net ) of NetBrick Inc. ( http://www.netbrick.net )
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.
*/
using System;
using System.Collections;
namespace OpenCollective.Diff
{
public enum DiffEngineLevel
{
FastImperfect,
Medium,
SlowPerfect
}
public class DiffEngine
{
private IDiffList _source;
private IDiffList _dest;
private ArrayList _matchList;
private DiffEngineLevel _level;
private DiffStateList _stateList;
public DiffEngine()
{
_source = null;
_dest = null;
_matchList = null;
_stateList = null;
_level = DiffEngineLevel.FastImperfect;
}
private int GetSourceMatchLength(int destIndex, int sourceIndex, int maxLength)
{
int matchCount;
for (matchCount = 0; matchCount < maxLength; matchCount++)
{
if ( _dest.GetByIndex(destIndex + matchCount).CompareTo(_source.GetByIndex(sourceIndex + matchCount)) != 0 )
{
break;
}
}
return matchCount;
}
private void GetLongestSourceMatch(DiffState curItem, int destIndex,int destEnd, int sourceStart,int sourceEnd)
{
int maxDestLength = (destEnd - destIndex) + 1;
int curLength = 0;
int curBestLength = 0;
int curBestIndex = -1;
int maxLength = 0;
for (int sourceIndex = sourceStart; sourceIndex <= sourceEnd; sourceIndex++)
{
maxLength = Math.Min(maxDestLength,(sourceEnd - sourceIndex) + 1);
if (maxLength <= curBestLength)
{
//No chance to find a longer one any more
break;
}
curLength = GetSourceMatchLength(destIndex,sourceIndex,maxLength);
if (curLength > curBestLength)
{
//This is the best match so far
curBestIndex = sourceIndex;
curBestLength = curLength;
}
//jump over the match
sourceIndex += curBestLength;
}
//DiffState cur = _stateList.GetByIndex(destIndex);
if (curBestIndex == -1)
{
curItem.SetNoMatch();
}
else
{
curItem.SetMatch(curBestIndex, curBestLength);
}
}
private void ProcessRange(int destStart, int destEnd, int sourceStart, int sourceEnd)
{
int curBestIndex = -1;
int curBestLength = -1;
int maxPossibleDestLength = 0;
DiffState curItem = null;
DiffState bestItem = null;
for (int destIndex = destStart; destIndex <= destEnd; destIndex++)
{
maxPossibleDestLength = (destEnd - destIndex) + 1;
if (maxPossibleDestLength <= curBestLength)
{
//we won't find a longer one even if we looked
break;
}
curItem = _stateList.GetByIndex(destIndex);
if (!curItem.HasValidLength(sourceStart, sourceEnd, maxPossibleDestLength))
{
//recalc new best length since it isn't valid or has never been done.
GetLongestSourceMatch(curItem, destIndex, destEnd, sourceStart, sourceEnd);
}
if (curItem.Status == DiffStatus.Matched)
{
switch (_level)
{
case DiffEngineLevel.FastImperfect:
if (curItem.Length > curBestLength)
{
//this is longest match so far
curBestIndex = destIndex;
curBestLength = curItem.Length;
bestItem = curItem;
}
//Jump over the match
destIndex += curItem.Length - 1;
break;
case DiffEngineLevel.Medium:
if (curItem.Length > curBestLength)
{
//this is longest match so far
curBestIndex = destIndex;
curBestLength = curItem.Length;
bestItem = curItem;
//Jump over the match
destIndex += curItem.Length - 1;
}
break;
default:
if (curItem.Length > curBestLength)
{
//this is longest match so far
curBestIndex = destIndex;
curBestLength = curItem.Length;
bestItem = curItem;
}
break;
}
}
}
if (curBestIndex < 0)
{
//we are done - there are no matches in this span
}
else
{
int sourceIndex = bestItem.StartIndex;
_matchList.Add(DiffResultSpan.CreateNoChange(curBestIndex,sourceIndex,curBestLength));
if (destStart < curBestIndex)
{
//Still have more lower destination data
if (sourceStart < sourceIndex)
{
//Still have more lower source data
// Recursive call to process lower indexes
ProcessRange(destStart, curBestIndex -1,sourceStart, sourceIndex -1);
}
}
int upperDestStart = curBestIndex + curBestLength;
int upperSourceStart = sourceIndex + curBestLength;
if (destEnd > upperDestStart)
{
//we still have more upper dest data
if (sourceEnd > upperSourceStart)
{
//set still have more upper source data
// Recursive call to process upper indexes
ProcessRange(upperDestStart,destEnd,upperSourceStart,sourceEnd);
}
}
}
}
public double ProcessDiff(IDiffList source, IDiffList destination,DiffEngineLevel level)
{
_level = level;
return ProcessDiff(source,destination);
}
public double ProcessDiff(IDiffList source, IDiffList destination)
{
DateTime dt = DateTime.Now;
_source = source;
_dest = destination;
_matchList = new ArrayList();
int dcount = _dest.Count();
int scount = _source.Count();
if ((dcount > 0)&&(scount > 0))
{
_stateList = new DiffStateList(dcount);
ProcessRange(0,dcount - 1,0, scount - 1);
}
TimeSpan ts = DateTime.Now - dt;
return ts.TotalSeconds;
}
private bool AddChanges(
ArrayList report,
int curDest,
int nextDest,
int curSource,
int nextSource)
{
bool retval = false;
int diffDest = nextDest - curDest;
int diffSource = nextSource - curSource;
int minDiff = 0;
if (diffDest > 0)
{
if (diffSource > 0)
{
minDiff = Math.Min(diffDest,diffSource);
report.Add(DiffResultSpan.CreateReplace(curDest,curSource,minDiff));
if (diffDest > diffSource)
{
curDest+=minDiff;
report.Add(DiffResultSpan.CreateAddDestination(curDest,diffDest - diffSource));
}
else
{
if (diffSource > diffDest)
{
curSource+= minDiff;
report.Add(DiffResultSpan.CreateDeleteSource(curSource,diffSource - diffDest));
}
}
}
else
{
report.Add(DiffResultSpan.CreateAddDestination(curDest,diffDest));
}
retval = true;
}
else
{
if (diffSource > 0)
{
report.Add(DiffResultSpan.CreateDeleteSource(curSource,diffSource));
retval = true;
}
}
return retval;
}
public ArrayList DiffReport()
{
ArrayList retval = new ArrayList();
int dcount = _dest.Count();
int scount = _source.Count();
//Deal with the special case of empty files
if (dcount == 0)
{
if (scount > 0)
{
retval.Add(DiffResultSpan.CreateDeleteSource(0,scount));
}
return retval;
}
else
{
if (scount == 0)
{
retval.Add(DiffResultSpan.CreateAddDestination(0,dcount));
return retval;
}
}
_matchList.Sort();
int curDest = 0;
int curSource = 0;
DiffResultSpan last = null;
//Process each match record
foreach (DiffResultSpan drs in _matchList)
{
if ((!AddChanges(retval,curDest,drs.DestIndex,curSource,drs.SourceIndex))&&
(last != null))
{
last.AddLength(drs.Length);
}
else
{
retval.Add(drs);
}
curDest = drs.DestIndex + drs.Length;
curSource = drs.SourceIndex + drs.Length;
last = drs;
}
//Process any tail end data
AddChanges(retval,curDest,dcount,curSource,scount);
return retval;
}
}
}