Click here to Skip to main content
15,886,199 members
Articles / Programming Languages / C++

Patricia and Huffman, Sitting in a Trie

Rate me:
Please Sign up or sign in to vote.
4.54/5 (24 votes)
27 Dec 200410 min read 137.2K   2.2K   39  
Article demonstrates a text-based Patricia trie and adds new text-compression features.
/*

"Text-Based Patricia Trie"

Copyright (C) 2004 by Michael Walter Jaworski

Licensed under the Academic Free License version 1.2

*/

#ifndef PATRICIA_TRIE_H
#define PATRICIA_TRIE_H

#define NUMBER_OF_LETTERS_IN_ALPHABET		26				//number of letters in the alphabet
#define MAX_KEY_BITS										32				//if you change the key type you need to change this to 
																									//the number of bits in that type or if you use a machine 
																									//that is not 32 bit
#define EMPTY_NODE											0					//like null
#define CONV_FACTOR_ASCII_TO_INT				65				//how to convert upper to number
#define LOWER_TO_UPPER_FACTOR						32				//how to convert lower to upper
#define START_BIT_COUNT									0					//starts the key splitting level (check the zero bit for 1 or 0 first)
#define ASSOCIATED_CLASS								PayloadC	//the class associated with the text stored in the pat class
#define END_OF_STRING										'\0'

//
//
//!!comment this out if you do not want printing capabilities!!
//*************************************************************************************
#define DEBUG														1					
//*************************************************************************************
//!!comment this out if you do not want printing capabilities!!

//you could have an instruction of the letter that is represented by 1, so the max number of bits
//the plus one is for the end of string, just in case we really do have all e's		
#define MAX_STRING_FOR_KEY							MAX_KEY_BITS+1 

//for print functions only
#ifdef DEBUG
	#include <iostream.h>
#endif


//the key could be something else, like a long if you wanted more bits (more space though)
typedef unsigned int keyType;
typedef char bitPositionType;

///////////////////////////////////////////////////////////////////////////////////////////
//PURPOSE::: the data that is associated with a key, you can store anything in this 
//:::::::::: class that you want to link to your search key
//:::::::::: 
//:::::::::: 
//:::::::::: 
//NOTES::::: use polymorphism and make your own class or just edit this one....
//:::::::::: 
//:::::::::: 
///////////////////////////////////////////////////////////////////////////////////////////
class PayloadC
{
	private:

	public:

		PayloadC ()		{}
		~PayloadC ()	{}
};


///////////////////////////////////////////////////////////////////////////////////////////
//PURPOSE::: every node in the tree is a collection of either left and right links
//:::::::::: or the payload and key. the key has to be stored to check that the text is
//:::::::::: correct, if you want a mud deal where you could type the letter t for 
//:::::::::: tell, then you do not need to store the key and could take the full check out
//:::::::::: of the code
//:::::::::: 
//:::::::::: 
//NOTES::::: a node will either store links to nodes or the payload
//:::::::::: 
//:::::::::: 
///////////////////////////////////////////////////////////////////////////////////////////
class PatriciaNodeC
{
	public:

		//if payload is given, then create a leaf
		PatriciaNodeC (ASSOCIATED_CLASS* _payload, keyType _key)
		{ leaf.payload = _payload; leaf.key = _key; is_leaf = true; }

		//if payload is not given, then assume we are internal
		PatriciaNodeC (void)
		{ internal.left = EMPTY_NODE; internal.right = EMPTY_NODE; is_leaf = false; }

		//delete the payload, 
		~PatriciaNodeC ()
		{ }

		//we are either internal or a leaf (we can overlap the data to save space)
		union
		{
			struct
			{
				PatriciaNodeC* left;
				PatriciaNodeC* right;			
			} 
			internal;
			
			struct
			{
				ASSOCIATED_CLASS* payload;
				keyType key;
			}
			leaf;
		};

		//remember if we are a leaf or not
		bool is_leaf;
};


///////////////////////////////////////////////////////////////////////////////////////////
//PURPOSE::: to store strings of text and associate that text with a container class. 
//:::::::::: 
//:::::::::: 
//:::::::::: 
//:::::::::: 
//NOTES::::: odd numbers are on the right
//:::::::::: even on the left
//:::::::::: and so on... (2, 4, 8) multiples of 2 break it down
///////////////////////////////////////////////////////////////////////////////////////////
class PatriciaTrieC
{
	private:

		//the root node of the PAT trie
		PatriciaNodeC* head;

		//the huffman code used to convert text to keys (or numbers)
		static char huffman_code [NUMBER_OF_LETTERS_IN_ALPHABET];

	protected:

		//***********************************************************************************
		PatriciaNodeC* insert (PatriciaNodeC*& _n, ASSOCIATED_CLASS*& _payload, keyType _key, bitPositionType _bp);

		ASSOCIATED_CLASS* search (PatriciaNodeC* _n, keyType _key, bitPositionType _bp);

		ASSOCIATED_CLASS* remove (PatriciaNodeC*& _n, keyType _key, bitPositionType _bp);
		//***********************************************************************************

		//***********************************************************************************
		void clear (PatriciaNodeC*& kill_me);
		
		#ifdef DEBUG
			
			void print (PatriciaNodeC* _n, keyType _bp);

		#endif
		//***********************************************************************************

		//***********************************************************************************
		bool IsBitOn (keyType _key, bitPositionType _bp)
		{ return ((_key >> _bp) & 1); }
		//***********************************************************************************

	public:

		//***********************************************************************************
		keyType BuildKey (char* _txt);
		//***********************************************************************************

		//***********************************************************************************
		PatriciaTrieC (void)	{ head = EMPTY_NODE; }

		~PatriciaTrieC (void) { clear (head); }
		//***********************************************************************************

		//***********************************************************************************
		bool Insert (char* _key, ASSOCIATED_CLASS* _payload = EMPTY_NODE)
		{ return (insert (head, _payload, BuildKey (_key), START_BIT_COUNT) != EMPTY_NODE); }
		
		bool Insert (keyType _key, ASSOCIATED_CLASS* _payload = EMPTY_NODE)
		{ return (insert (head, _payload, _key, START_BIT_COUNT) != EMPTY_NODE); }
		//***********************************************************************************

		//***********************************************************************************
		ASSOCIATED_CLASS* Search (char* _key)
		{ return search (head, BuildKey (_key), START_BIT_COUNT); }
		
		ASSOCIATED_CLASS* Search (keyType _key)
		{ return search (head, _key, START_BIT_COUNT); }
		//***********************************************************************************

		//***********************************************************************************
		ASSOCIATED_CLASS* Remove (char* _key)
		{	return remove (head, BuildKey (_key), START_BIT_COUNT); }

		ASSOCIATED_CLASS* Remove (keyType _key)
		{	return remove (head, _key, START_BIT_COUNT); }
		//***********************************************************************************

		//***********************************************************************************
		#ifdef DEBUG
		
			void Print (void)
			{ print (head, START_BIT_COUNT); cout << endl; }
		
		#endif

		void Clear (void)
		{ clear (head); head = EMPTY_NODE; }
		//***********************************************************************************
};

#endif PATRICIA_TRIE_H

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Software Developer
United Kingdom United Kingdom
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions