Click here to Skip to main content
Click here to Skip to main content
 
Add your own
alternative version
Go to top

UDDI Explorer: Tool for searching web services

, 20 Dec 2005
Tool for searching web service(s) and viewing their WSDL information.
uddiexplorer_demo.zip
UDDIExplorer_demo
WSDLParser.dll
microsoft.uddi.sdk.dll
ServiceRanking.dll
UDDI Explorer.exe
WSDLCache
Weather data from the National Digital Forecast Database NOAA's NWS.wsdl
uddiexplorer_src.zip
UDDIExplorer_src
ServiceRanking
ServiceRanking.csproj.user
UDDI Explorer
microsoft.uddi.sdk.dll
UDDI Explorer.csproj.user
UDDI Explorer.resharperoptions
WSDLParser
App.ico
WSDLParser.csproj.user
/*
 * Suffix stripping
 * Martin Porter Stemming algorithm Finite state tranducer, 
 * Copyright (c) by Martin Porter
*/

using System;
using System.Runtime.InteropServices;

namespace ServiceRanking
{
	/// <summary>
	/// Summary description for PorterStemmer.
	/// Thanks Martin
	/// </summary>
	public interface StemmerInterface 
	{
		string stemTerm( string s );
	}

	[ClassInterface( ClassInterfaceType.None )]
	public class PorterStemmer : StemmerInterface
	{
		private char[] _b;
		private int i,     // offset into b 
			i_end, // offset to end of stemmed word 
			j, k;
		private static int INC = 200;
		// unit of size whereby b is increased 
		
		public PorterStemmer() 
		{
			_b = new char[INC];
			i = 0;
			i_end = 0;
		}
		
		public string stemTerm( string s )
		{
			setTerm( s );
			stem();
			return getTerm();
		}

		void setTerm( string s)
		{
			i = s.Length;
			char[] new_b = new char[i];
			for (int c = 0; c < i; c++)
				new_b[c] = s[c];

			_b  = new_b;		

		}

		public string getTerm()
		{
			return new String(_b, 0, i_end);
		}


		public void add(char ch) 
		{
			if (i == _b.Length) 
			{
				char[] new_b = new char[i+INC];
				for (int c = 0; c < i; c++)
					new_b[c] = _b[c];
				_b = new_b;
			}
			_b[i++] = ch;
		}

		public void add(char[] w, int wLen) 
		{
			if (i+wLen >= _b.Length) 
			{
				char[] new_b = new char[i+wLen+INC];
				for (int c = 0; c < i; c++)
					new_b[c] = _b[c];
				_b = new_b;
			}
			for (int c = 0; c < wLen; c++)
				_b[i++] = w[c];
		}

		public override string ToString() 
		{
			return new String(_b,0,i_end);
		}

		public int getResultLength() 
		{
			return i_end;
		}

		public char[] getResultBuffer() 
		{
			return _b;
		}


		private bool cons(int i) 
		{
			switch (_b[i]) 
			{
				case 'a': case 'e': case 'i': case 'o': case 'u': return false;
				case 'y': return (i==0) ? true : !cons(i-1);
				default: return true;
			}
		}

		private int m() 
		{
			int n = 0;
			int i = 0;
			while(true) 
			{
				if (i > j) return n;
				if (! cons(i)) break; i++;
			}
			i++;
			while(true) 
			{
				while(true) 
				{
					if (i > j) return n;
					if (cons(i)) break;
					i++;
				}
				i++;
				n++;
				while(true) 
				{
					if (i > j) return n;
					if (! cons(i)) break;
					i++;
				}
				i++;
			}
		}


		private bool vowelinstem() 
		{
			int i;
			for (i = 0; i <= j; i++)
				if (! cons(i))
					return true;
			return false;
		}


		private bool doublec(int j) 
		{
			if (j < 1)
				return false;
			if (_b[j] != _b[j-1])
				return false;
			return cons(j);
		}

		private bool cvc(int i) 
		{
			if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2))
				return false;
			int ch = _b[i];
			if (ch == 'w' || ch == 'x' || ch == 'y')
				return false;
			return true;
		}

		private bool ends(String s) 
		{
			int l = s.Length;
			int o = k-l+1;
			if (o < 0)
				return false;
			char[] sc = s.ToCharArray();
			for (int i = 0; i < l; i++)
				if (_b[o+i] != sc[i])
					return false;
			j = k-l;
			return true;
		}

		/// setto(s) sets (j+1),...k to the characters in the string s, readjusting
		//
		private void setto(String s) 
		{
			int l = s.Length;
			int o = j+1;
			char[] sc = s.ToCharArray();
			for (int i = 0; i < l; i++)
				_b[o+i] = sc[i];
			k = j+l;
		}


		private void r(String s) 
		{
			if (m() > 0)
				setto(s);
		}

		private void step1() 
		{
			if (_b[k] == 's') 
			{
				if (ends("sses"))
					k -= 2;
				else if (ends("ies"))
					setto("i");
				else if (_b[k-1] != 's')
					k--;
			}
			if (ends("eed")) 
			{
				if (m() > 0)
					k--;
			} 
			else if ((ends("ed") || ends("ing")) && vowelinstem()) 
			{
				k = j;
				if (ends("at"))
					setto("ate");
				else if (ends("bl"))
					setto("ble");
				else if (ends("iz"))
					setto("ize");
				else if (doublec(k)) 
				{
					k--;
					int ch = _b[k];
					if (ch == 'l' || ch == 's' || ch == 'z')
						k++;
				}
				else if (m() == 1 && cvc(k)) setto("e");
			}
		}


		private void step2() 
		{
			if (ends("y") && vowelinstem())
				_b[k] = 'i';
		}

		private void step3() 
		{
			if (k == 0)
				return;
			
			// For Bug 1 
			switch (_b[k-1]) 
			{
				case 'a':
					if (ends("ational")) { r("ate"); break; }
					if (ends("tional")) { r("tion"); break; }
					break;
				case 'c':
					if (ends("enci")) { r("ence"); break; }
					if (ends("anci")) { r("ance"); break; }
					break;
				case 'e':
					if (ends("izer")) { r("ize"); break; }
					break;
				case 'l':
					if (ends("bli")) { r("ble"); break; }
					if (ends("alli")) { r("al"); break; }
					if (ends("entli")) { r("ent"); break; }
					if (ends("eli")) { r("e"); break; }
					if (ends("ousli")) { r("ous"); break; }
					break;
				case 'o':
					if (ends("ization")) { r("ize"); break; }
					if (ends("ation")) { r("ate"); break; }
					if (ends("ator")) { r("ate"); break; }
					break;
				case 's':
					if (ends("alism")) { r("al"); break; }
					if (ends("iveness")) { r("ive"); break; }
					if (ends("fulness")) { r("ful"); break; }
					if (ends("ousness")) { r("ous"); break; }
					break;
				case 't':
					if (ends("aliti")) { r("al"); break; }
					if (ends("iviti")) { r("ive"); break; }
					if (ends("biliti")) { r("ble"); break; }
					break;
				case 'g':
					if (ends("logi")) { r("log"); break; }
					break;
				default :
					break;
			}
		}

		// step4() deals with -ic-, -full, -ness etc. similar strategy to step3.
		private void step4() 
		{
			switch (_b[k]) 
			{
				case 'e':
					if (ends("icate")) { r("ic"); break; }
					if (ends("ative")) { r(""); break; }
					if (ends("alize")) { r("al"); break; }
					break;
				case 'i':
					if (ends("iciti")) { r("ic"); break; }
					break;
				case 'l':
					if (ends("ical")) { r("ic"); break; }
					if (ends("ful")) { r(""); break; }
					break;
				case 's':
					if (ends("ness")) { r(""); break; }
					break;
			}
		}

		// step5() takes off -ant, -ence etc., in context <c>vcvc<v>. 
		private void step5() 
		{
			if (k == 0)
				return;

			// for Bug 1 
			switch ( _b[k-1] ) 
			{
				case 'a':
					if (ends("al")) break; return;
				case 'c':
					if (ends("ance")) break;
					if (ends("ence")) break; return;
				case 'e':
					if (ends("er")) break; return;
				case 'i':
					if (ends("ic")) break; return;
				case 'l':
					if (ends("able")) break;
					if (ends("ible")) break; return;
				case 'n':
					if (ends("ant")) break;
					if (ends("ement")) break;
					if (ends("ment")) break;
					// element etc. not stripped before the m 
					if (ends("ent")) break; return;
				case 'o':
					if (ends("ion") && j >= 0 && (_b[j] == 's' || _b[j] == 't')) break;
					// j >= 0 fixes Bug 2 
					if (ends("ou")) break; return;
					// takes care of -ous 
				case 's':
					if (ends("ism")) break; return;
				case 't':
					if (ends("ate")) break;
					if (ends("iti")) break; return;
				case 'u':
					if (ends("ous")) break; return;
				case 'v':
					if (ends("ive")) break; return;
				case 'z':
					if (ends("ize")) break; return;
				default:
					return;
			}
			if (m() > 1)
				k = j;
		}

		// step6() removes a final -e if m() > 1. 
		private void step6() 
		{
			j = k;
			
			if (_b[k] == 'e') 
			{
				int a = m();
				if (a > 1 || a == 1 && !cvc(k-1))
					k--;
			}
			if (_b[k] == 'l' && doublec(k) && m() > 1)
				k--;
		}

		public void stem() 
		{
			k = i - 1;
			if (k > 1) 
			{
				step1();
				step2();
				step3();
				step4();
				step5();
				step6();
			}
			i_end = k+1;
			i = 0;
		}


	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

Thanh Dao
Software Developer
Vietnam Vietnam
I'm still alive...but temporarily moved to work on mobile & web stuffs(j2me/brew/php/flash...something not M$). things have just been very busy, and probably will continue...so don't have chance to maintain & respond. Hope will have time to try to write again, because many ideas with WPF &silver light are waiting. wish me luck Smile | :)
 
FYI:
- MESHSimPack project(c# library for measuring similarity among concepts of the MESH ontology):
http://sourceforge.net/projects/meshsimpack.

| Advertise | Privacy | Mobile
Web04 | 2.8.140921.1 | Last Updated 21 Dec 2005
Article Copyright 2005 by Thanh Dao
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid