Click here to Skip to main content
15,891,184 members
Articles / Containers / Virtual Machine

Statistical parsing of English sentences

Rate me:
Please Sign up or sign in to vote.
4.95/5 (88 votes)
13 Dec 200617 min read 993.5K   23.6K   239  
Shows how to generate parse trees for English language sentences, using a C# port of OpenNLP, a statistical natural language parsing library.
//Copyright (C) 2005 Richard J. Northedge
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

//This file is based on the Heap.java source file found in the
//original java implementation of OpenNLP.  That source file contains the following header:

//Copyright (C) 2005 Thomas Morton
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

using System;
using System.Collections;

namespace OpenNLP.Tools.Util
{
	/// <summary>
	/// This class implements the heap interface using an ArrayList as the underlying
	/// data structure.  This heap allows values which are equals to be inserted, however
	/// the order in which they are extracted is arbitrary.
	/// </summary>
	public class ListHeap : IHeap, IEnumerable
	{
		private ArrayList mList;
		private IComparer mComparer;
		private int mSize;
		private object mMax = null;
		
		/// <summary>
		/// True if the heap is empty.
		/// </summary>
		public virtual bool IsEmpty
		{
			get
			{
				return (mList.Count == 0);
			}
			
		}
	
		/// <summary>
		/// Creates a new heap with the specified size using the sorted based on the
		/// specified comparator.
		/// </summary>
		/// <param name="size">
		/// The size of the heap.
		/// </param>
		/// <param name="comparer">
		/// The comparer to be used to sort heap elements.
		/// </param>
		public ListHeap(int size, IComparer comparer)
		{
			mSize = size;
			mComparer = comparer;
			mList = new ArrayList(size);
		}
		
		/// <summary>
		/// Createa a new heap of the specified size.
		/// </summary>
		/// <param name="size">
		/// The size of the new heap.
		/// </param>
		public ListHeap(int size) : this(size, null)
		{
		}
		
		private int ParentIndex(int index)
		{
			return (index - 1) / 2;
		}
		
		private int LeftIndex(int index)
		{
			return (index + 1) * 2 - 1;
		}
		
		private int RightIndex(int index)
		{
			return (index + 1) * 2;
		}
		
		/// <summary>
		/// The size of the heap.
		/// </summary>
		public virtual int Size
		{
			get
			{
				return mList.Count;
			}
			set
			{
				if (value > mList.Count)
				{
					return ;
				}
				else
				{
					ArrayList newList = new ArrayList(value);
					for (int currentItem = 0; currentItem < value; currentItem++)
					{
						newList.Add(this.Extract());
					}
					mList = newList;
				}
			}
		}
		
		private void Swap(int firstIndex, int secondIndex)
		{
			object firstObject = mList[firstIndex];
			object secondObject = mList[secondIndex];
			
			mList[secondIndex] = firstObject;
			mList[firstIndex] = secondObject;
		}
		
		private bool LessThan(object firstObject, object secondObject)
		{
			if (mComparer != null)
			{
				return (mComparer.Compare(firstObject, secondObject) < 0);
			}
			else
			{
				return (((IComparable) firstObject).CompareTo(secondObject) < 0);
			}
		}
		
		private bool GreaterThan(object firstObject, object secondObject)
		{
			if (mComparer != null)
			{
				return (mComparer.Compare(firstObject, secondObject) > 0);
			}
			else
			{
				return (((IComparable) firstObject).CompareTo(secondObject) > 0);
			}
		}
		
		private void Heapify(int index)
		{
			while (true)
			{
				int left = LeftIndex(index);
				int right = RightIndex(index);
				int smallest;
				
				if (left < mList.Count && LessThan(mList[left], mList[index]))
				{
					smallest = left;
				}
				else
				{
					smallest = index;
				}
				
				if (right < mList.Count && LessThan(mList[right], mList[smallest]))
				{
					smallest = right;
				}
				
				if (smallest != index)
				{
					Swap(smallest, index);
					index = smallest;
				}
				else
				{
					break;
				}
			}
		}
		
		public virtual object Extract()
		{
			if (mList.Count == 0)
			{
				throw new NotSupportedException("Heap Underflow");
			}
			object mMax = mList[0];
			int last = mList.Count - 1;
			if (last != 0)
			{
				mList[0] = mList[last];
				mList.RemoveAt(last);
				Heapify(0);
			}
			else
			{
				mList.RemoveAt(last);
			}
			
			return mMax;
		}
		
		/// <summary>
		/// Resets the heap size to its original value.
		/// </summary>
		public virtual void ResetSize()
		{
			this.Size = mSize;
		}
				
		/// <summary>
		/// Gets the object on top of the heap.
		/// </summary>
		public virtual object Top
		{
			get
			{
				if (mList.Count == 0)
				{
					throw new NotSupportedException("Heap Underflow");
				}
				return (mList[0]);
			}
		}
		
		public virtual void Add(object item)
		{
			/* keep track of min to prevent unnecessary insertion */
			if (mMax == null)
			{
				mMax = item;
			}
			else if (GreaterThan(item, mMax))
			{
				if (mList.Count < mSize)
				{
					mMax = item;
				}
				else
				{
					return;
				}
			}
			mList.Add(item);
			
			int index = mList.Count - 1;
			
			//percolate new node to correct position in heap.
			while (index > 0 && GreaterThan(mList[ParentIndex(index)], item))
			{
				mList[index] = mList[ParentIndex(index)];
				index = ParentIndex(index);
			}
			
			mList[index] = item;
		}
		
		public virtual void Clear()
		{
			mList.Clear();
		}
		
		public virtual IEnumerator GetEnumerator()
		{
			return (mList.GetEnumerator());
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United Kingdom United Kingdom
Richard Northedge is a senior developer with a UK Microsoft Gold Partner company. He has a postgraduate degree in English Literature, has been programming professionally since 1998 and has been an MCSD since 2000.

Comments and Discussions