Click here to Skip to main content
15,891,184 members
Articles / Web Development / ASP.NET

OpenCollective -- The Requirements Management Wiki

Rate me:
Please Sign up or sign in to vote.
4.41/5 (16 votes)
9 Nov 20044 min read 262.2K   1.8K   111  
An article on building a project oriented wiki for software development requirements management
/*
The primary core of the diff engine has been taken from Michael Potter's
A Generic, Reusable Diff Algorithm in C# - II
http://www.codeproject.com/csharp/DiffEngine.asp
Article content copyright Michael Potter, 2004

Specifically, the BinaryFile.cs, CharData.cs, Engine.cs, Structures.cs 
and TextFile.cs files in the OpenCollective.Diff project are copyrighted by
Michael Potter used under the permission granted in a post by Michael
on the above noted article dicussion in which he states:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Re: Great Stuff! - Copywrite?   Michael Potter  16:51 26 May '04  
Thanks,
   Copywrite - let me check my files. Ahh... yes... here it is... If you 
use this code and make a million dollars you are required to come to 
St. Louis and buy me a beer.

Seriously, use it any way you want. Let me know if you find any 
improvements (I am sure there are many). If you are exposing the source 
in another project it would be nice to drop in a comment that points 
back here. Not necessary, but nice. 
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Otherwise the following notice applies:

OpenCollective -  http://www.netbrick.net/ -- Version 0.73
Copyright (c) 2004
by Tyler Jensen ( tylerj@netbrick.net ) of NetBrick Inc. ( http://www.netbrick.net )

Permission is hereby granted, free of charge, to any person obtaining a copy 
of this software and associated documentation files (the "Software"), to deal 
in the Software without restriction, including without limitation the rights 
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
of the Software, and to permit persons to whom the Software is furnished to do 
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all 
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
OR OTHER DEALINGS IN THE SOFTWARE.
*/
using System;
using System.Collections;

namespace OpenCollective.Diff
{
	public enum DiffEngineLevel
	{
		FastImperfect,
		Medium,
		SlowPerfect
	}

	public class DiffEngine
	{
		private IDiffList _source;
		private IDiffList _dest;
		private ArrayList _matchList;

		private DiffEngineLevel _level;

		private DiffStateList _stateList;

		public DiffEngine() 
		{
			_source = null;
			_dest = null;
			_matchList = null;
			_stateList = null;
			_level = DiffEngineLevel.FastImperfect;
		}

		private int GetSourceMatchLength(int destIndex, int sourceIndex, int maxLength)
		{
			int matchCount;
			for (matchCount = 0; matchCount < maxLength; matchCount++)
			{
				if ( _dest.GetByIndex(destIndex + matchCount).CompareTo(_source.GetByIndex(sourceIndex + matchCount)) != 0 )
				{
					break;
				}
			}
			return matchCount;
		}

		private void GetLongestSourceMatch(DiffState curItem, int destIndex,int destEnd, int sourceStart,int sourceEnd)
		{
			
			int maxDestLength = (destEnd - destIndex) + 1;
			int curLength = 0;
			int curBestLength = 0;
			int curBestIndex = -1;
			int maxLength = 0;
			for (int sourceIndex = sourceStart; sourceIndex <= sourceEnd; sourceIndex++)
			{
				maxLength = Math.Min(maxDestLength,(sourceEnd - sourceIndex) + 1);
				if (maxLength <= curBestLength)
				{
					//No chance to find a longer one any more
					break;
				}
				curLength = GetSourceMatchLength(destIndex,sourceIndex,maxLength);
				if (curLength > curBestLength)
				{
					//This is the best match so far
					curBestIndex = sourceIndex;
					curBestLength = curLength;
				}
				//jump over the match
				sourceIndex += curBestLength; 
			}
			//DiffState cur = _stateList.GetByIndex(destIndex);
			if (curBestIndex == -1)
			{
				curItem.SetNoMatch();
			}
			else
			{
				curItem.SetMatch(curBestIndex, curBestLength);
			}
		
		}

		private void ProcessRange(int destStart, int destEnd, int sourceStart, int sourceEnd)
		{
			int curBestIndex = -1;
			int curBestLength = -1;
			int maxPossibleDestLength = 0;
			DiffState curItem = null;
			DiffState bestItem = null;
			for (int destIndex = destStart; destIndex <= destEnd; destIndex++)
			{
				maxPossibleDestLength = (destEnd - destIndex) + 1;
				if (maxPossibleDestLength <= curBestLength)
				{
					//we won't find a longer one even if we looked
					break;
				}
				curItem = _stateList.GetByIndex(destIndex);
				
				if (!curItem.HasValidLength(sourceStart, sourceEnd, maxPossibleDestLength))
				{
					//recalc new best length since it isn't valid or has never been done.
					GetLongestSourceMatch(curItem, destIndex, destEnd, sourceStart, sourceEnd);
				}
				if (curItem.Status == DiffStatus.Matched)
				{
					switch (_level)
					{
						case DiffEngineLevel.FastImperfect:
							if (curItem.Length > curBestLength)
							{
								//this is longest match so far
								curBestIndex = destIndex;
								curBestLength = curItem.Length;
								bestItem = curItem;
							}
							//Jump over the match 
							destIndex += curItem.Length - 1; 
							break;
						case DiffEngineLevel.Medium: 
							if (curItem.Length > curBestLength)
							{
								//this is longest match so far
								curBestIndex = destIndex;
								curBestLength = curItem.Length;
								bestItem = curItem;
								//Jump over the match 
								destIndex += curItem.Length - 1; 
							}
							break;
						default:
							if (curItem.Length > curBestLength)
							{
								//this is longest match so far
								curBestIndex = destIndex;
								curBestLength = curItem.Length;
								bestItem = curItem;
							}
							break;
					}
				}
			}
			if (curBestIndex < 0)
			{
				//we are done - there are no matches in this span
			}
			else
			{
	
				int sourceIndex = bestItem.StartIndex;
				_matchList.Add(DiffResultSpan.CreateNoChange(curBestIndex,sourceIndex,curBestLength));
				if (destStart < curBestIndex)
				{
					//Still have more lower destination data
					if (sourceStart < sourceIndex)
					{
						//Still have more lower source data
						// Recursive call to process lower indexes
						ProcessRange(destStart, curBestIndex -1,sourceStart, sourceIndex -1);
					}
				}
				int upperDestStart = curBestIndex + curBestLength;
				int upperSourceStart = sourceIndex + curBestLength;
				if (destEnd > upperDestStart)
				{
					//we still have more upper dest data
					if (sourceEnd > upperSourceStart)
					{
						//set still have more upper source data
						// Recursive call to process upper indexes
						ProcessRange(upperDestStart,destEnd,upperSourceStart,sourceEnd);
					}
				}
			}
		}

		public double ProcessDiff(IDiffList source, IDiffList destination,DiffEngineLevel level)
		{
			_level = level;
			return ProcessDiff(source,destination);
		}

		public double ProcessDiff(IDiffList source, IDiffList destination)
		{
			DateTime dt = DateTime.Now;
			_source = source;
			_dest = destination;
			_matchList = new ArrayList();
			
			int dcount = _dest.Count();
			int scount = _source.Count();
			
			
			if ((dcount > 0)&&(scount > 0))
			{
				_stateList = new DiffStateList(dcount);
				ProcessRange(0,dcount - 1,0, scount - 1);
			}

			TimeSpan ts = DateTime.Now - dt;
			return ts.TotalSeconds;
		}


		private bool AddChanges(
			ArrayList report, 
			int curDest,
			int nextDest,
			int curSource,
			int nextSource)
		{
			bool retval = false;
			int diffDest = nextDest - curDest;
			int diffSource = nextSource - curSource;
			int minDiff = 0;
			if (diffDest > 0)
			{
				if (diffSource > 0)
				{
					minDiff = Math.Min(diffDest,diffSource);
					report.Add(DiffResultSpan.CreateReplace(curDest,curSource,minDiff));
					if (diffDest > diffSource)
					{
						curDest+=minDiff;
						report.Add(DiffResultSpan.CreateAddDestination(curDest,diffDest - diffSource)); 
					}
					else
					{
						if (diffSource > diffDest)
						{
							curSource+= minDiff;
							report.Add(DiffResultSpan.CreateDeleteSource(curSource,diffSource - diffDest));
						}
					}	
				}
				else
				{
					report.Add(DiffResultSpan.CreateAddDestination(curDest,diffDest)); 
				}
				retval = true;
			}
			else
			{
				if (diffSource > 0)
				{
					report.Add(DiffResultSpan.CreateDeleteSource(curSource,diffSource));  
					retval = true;
				}
			}
			return retval;
		}

		public ArrayList DiffReport()
		{
			ArrayList retval = new ArrayList();
			int dcount = _dest.Count();
			int scount = _source.Count();
			
			//Deal with the special case of empty files
			if (dcount == 0)
			{
				if (scount > 0)
				{
					retval.Add(DiffResultSpan.CreateDeleteSource(0,scount));
				}
				return retval;
			}
			else
			{
				if (scount == 0)
				{
					retval.Add(DiffResultSpan.CreateAddDestination(0,dcount));
					return retval;
				}
			}


			_matchList.Sort();
			int curDest = 0;
			int curSource = 0;
			DiffResultSpan last = null;

			//Process each match record
			foreach (DiffResultSpan drs in _matchList)
			{
				if ((!AddChanges(retval,curDest,drs.DestIndex,curSource,drs.SourceIndex))&&
					(last != null))
				{
					last.AddLength(drs.Length);
				}
				else
				{
					retval.Add(drs);
				}
				curDest = drs.DestIndex + drs.Length;
				curSource = drs.SourceIndex + drs.Length;
				last = drs;
			}
			
			//Process any tail end data
			AddChanges(retval,curDest,dcount,curSource,scount);

			return retval;
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States
Since 2001 I've been writing .NET applications in C# and architecting n-tier applications in the enterprise. Before that I worked as a tech writer for nine years. Don't bother doing the math. I'm old. Ever since I laid eyes on my first Commodore PET, I've been a technologist. I've worked in the software world for fifteen years. I started as a technical writer and learned to code from the best engineers as I worked with them in creating technical documentation. It was then that I learned that writing code was more fun and frankly easier than writing about code. I've been doing both ever since. You can visit my blog at http://www.tsjensen.com/blog.

Comments and Discussions