Click here to Skip to main content
Click here to Skip to main content
Articles » Database » Database » General » Downloads
 
Add your own
alternative version

Implementation of a B-Tree Database Class

, 29 Jun 2006
An article and source code regarding the implmentation of B-Trees in C++.
/***************************************************************************
 btree.cpp  -  Defines the entry point for the console application.

 begin     : April 2004
 copyright : (C) 2004 by Phil Cairns
 email     : philcairns@hotmail.com

 This code may be used in compiled form in any way you desire (including
 commercial use). The code may be redistributed unmodified by any means
 providing it is not sold for profit without the authors written consent,
 and providing that this notice and the authors name and all copyright
 notices remains intact.

 This software is provided "as is" without express or implied warranty. Use
 it at your own risk!
 ***************************************************************************/

#include "stdafx.h"
#include "BTreeDB.h"
#include "time.h"
#if defined(WIN32)
	#include <windows.h>
#endif

// If this is set, we're going to use the bigger keys, records
// and database. Otherwise, we're using the little demo bits.
#define GOBIG 1

// If this flag is set, we're going to (re)build the database
// based on the parameters provided.
#define DOBUILD 1

// If we have a database, then this flag causes the database to
// be searched for all records that match the given search
// criteria. In this case, it's records whose first few chars
// match those given in the object passed to findAll.
#define DOFINDALL 1

// If we have a database, then this flag causes the database to
// be searched until we find a single record that matches the
// search criteria. It then moves sequentially through the 
// database, extracting records whose keys match the search
// criteria. Note that due to the shape of the tree, we are
// unlikely to find the "smallest" matching record.
#define DOFINDSOME 1

// If we have a database, then this flag causes a bunch of
// records to be deleted.
#define DODELETE 1

// If we have a database, then this flag causes the database
// to be traversed from start to end.
#define DOTRAVERSE 0

using namespace std;
using namespace Database;

#if GOBIG
const char* sourceChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; //abcdefghijklmnopqrstuvwxyz1234567890"; //-=~!@#$%^&*()_+{}|[];:,./<>?";
const size_t keyLen = 12;
const size_t dataLen = 28;
const size_t minDegree = 42;
const size_t iterLimit = 50000;
#else
const char* sourceChars = "ABCDEQXZ"; //GHIJKLMNOPQRSTUVWXYZ"; //abcdefghijklmnopqrstuvwxyz1234567890"; //-=~!@#$%^&*()_+{}|[];:,./<>?";
const size_t keyLen = 1;
const size_t dataLen = 5;
const size_t minDegree = 2;
const size_t iterLimit = 20;
#endif
const char* szFileName = "btree.db";

// This is a structure that can be used to store keys.
// In this case, the key is just a buffer full of
// characters. I've made these unsigned to make it
// really clear that although characters are being
// put into the buffer, this is not a string.
struct SampleKey
{
	unsigned char key[keyLen];
};

// This is a structure that can be used to store data.
// In this case, the data is just a buffer full of
// characters. Again, this is *not* a string.
struct SampleData
{
	unsigned char data[dataLen];
};

// Here is the record structure, consisting of a key
// and some data. This is what's stored in the database.
// Note that the database doesn't really differentiate
// between what's key and what's data, except that you
// tell it how big the key is when you create the
// database object.
struct SampleRecord
{
	SampleKey key;
	SampleData data;
};

// Here is a key that we are going to use to test search
// speed.
SampleKey gSearchKey;

// This makes a randome string of data, using either the
// long string or the short one, depending on whether or
// not we've gone big.
string makeGuff()
{
	string ret;

	for (int i = 0; i < sizeof(SampleRecord); i++)
	{
		ret += sourceChars[rand() % strlen(sourceChars)];
	}
	return ret;
}

// This function is the callback function used for the
// traversal. In all cases, a traversal callback takes
// two object ptrs and a depth.
// The first ptr is a pointer to the reference object,
// sorta like the DWORD_PTR data values you find around
// the place.
// The second ptr is a ptr to the current database
// object that we've traversed to.
// The depth is the depth we are in the tree. I've included
// this mainly out of debugging interest since I want to
// print out the tree (rotated 90 degrees). At least, that's
// what I use it for here.
bool traverseCallback(const DbObjPtr& obj, const DbObjPtr& ref, int depth)
{
	char data[1024];
	memset(data, 0, 1024);
	memcpy(data, ref->getData(), ref->getSize());
	printf("%s: ", data);
	memset(data, 0, 1024);
	memcpy(data, obj->getData(), obj->getSize());
	printf("%*s%s\n", depth * 2, "", data);
	return true;
}

// Creates a database file from scratch, and
// inserts a bunch of random data.
int buildDb()
{
	int searchIndex = rand() % iterLimit;

	// Get rid of the old database file.
	_unlink(szFileName);

	// Create the object that we're going to add to the
	// database many, many times.
	DbObjPtr pRef = new DbObj;

	// Create the database of randomly generated records
	printf("creating file---------------------------\n");
	BTreeDB db(szFileName, sizeof(SampleRecord), sizeof(SampleKey), minDegree);
	if (!db.open())
	{
		printf("failed to open the database (1)!\n");
		return -1;
	}

	for (int i = 0; i < iterLimit; i++)
	{
		string guff = makeGuff();
#if !GOBIG
		printf("inserting %s\n", guff.c_str());
#endif
		DbObjPtr pObj = new DbObj(guff);
		db.put(pObj);
		if (i == searchIndex)
		{
			memcpy(&gSearchKey.key[0], guff.c_str(), sizeof(gSearchKey));
		}
	}
#if DOTRAVERSE
	db.traverse(pRef, traverseCallback);
#endif
	printf("file created----------------------------\n");
	return 0;
}

int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
{
#if defined(WIN32)
	SYSTEMTIME st;
	memset(&st, 0, sizeof(SYSTEMTIME));
#endif

	srand((unsigned int)time(0));

#if DOBUILD
#if defined(WIN32)
	GetSystemTime(&st);
	printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	if (0 != buildDb())
	{
		return -1;
	}
#endif

	// This chunk of code creates a DbObj that can be
	// searched for, then searches for it.
#if DOFINDALL
	printf("starting search-------------------------\n");
#if defined(WIN32)
	GetSystemTime(&st);
	printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	{
		// Create the database object and open it.
		BTreeDB db(szFileName);
		if (!db.open())
		{
			printf("failed to open the database (2)!\n");
			return -1;
		}

		// Create the results vector ...
		DBOBJVECTOR results;

		// ... create the thing that we're searching for ...
		DbObjPtr pObj;
#if DOBUILD
		pObj = new DbObj(&gSearchKey.key[0], sizeof(SampleKey));
#else
		pObj = new DbObj("MN");
#endif

		// ... and then do the search.
		db.findAll(pObj, results);

		// Iterate through the results vector, displaying
		// the retrieved values.
		DBOBJVECTOR::iterator dovit = results.begin();
		char data[dataLen + 1];
		while (dovit != results.end())
		{
			memset(data, 0, dataLen + 1);
			memcpy(data, (*dovit)->getData(), dataLen);
			printf("%s\n", data);
			++dovit;
		}
		printf("%d matching results\n", results.size());
#if defined(WIN32)
		GetSystemTime(&st);
		printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
		printf("search complete-------------------------\n");
	}
#endif

	// Here, we look for the first instance of a value
	// within the btree, and then find more until we
	// run out of matching objects. Note that we aren't
	// guaranteed to find the "lowest" value since we
	// stop looking here when we find the first matching
	// value, which may be further up the tree than the
	// leftmost (least) value.
#if DOFINDSOME
	printf("starting find some----------------------\n");
#if defined(WIN32)
	GetSystemTime(&st);
	printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	{
		// Create the database object and then open it.
		BTreeDB db(szFileName);
		if (!db.open())
		{
			printf("failed to open the database (3)!\n");
			return -1;
		}

		// Create the object that we're going to search for.
		size_t resultsCtr = 0;
		DbObjPtr pObj = new DbObj("MN");

		// Do the search. If we got something, locn.first
		// will be a ptr to a valid node.
		NodeKeyLocn locn = db.search(pObj);
		if ((TreeNode*)locn.first != 0)
		{
			// Get the record from the database at the
			// given location, and print it out.
			DbObjPtr pRec;
			char data[dataLen + 1];
			if (db.get(locn, pRec))
			{
				memset(data, 0, dataLen + 1);
				memcpy(data, pRec->getData(), dataLen);
				printf("%s\n", data);
			}

			// While we can still get a record from the
			// database, keep getting them.
			while (db.seq(locn, pRec))
			{
				// Find out if the current record matches
				// the value that we're searching for.
				// If it doesn't, we bug out of this loop.
				if (0 != memcmp(pRec->getData(),
					pObj->getData(),
					min(pRec->getSize(), pObj->getSize())))
				{
					break;
				}
				memset(data, 0, dataLen + 1);
				memcpy(data, pRec->getData(), dataLen);
				printf("%s\n", data);
				++resultsCtr;
			}
		}
		printf("%d matching results\n", resultsCtr);
#if defined(WIN32)
		GetSystemTime(&st);
		printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	}
	printf("find some completed---------------------\n");
#endif

	// The deletion code deletes all keys that start with
	// each of the characters in the deletions string.
#if DODELETE
	printf("starting delete-------------------------\n");
#if defined(WIN32)
	GetSystemTime(&st);
	printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	{
		// Create the database object and open it.
		DbObjPtr pRef = new DbObj;
		BTreeDB db(szFileName);
		if (!db.open())
		{
			printf("failed to open the database (4)!\n");
			return -1;
		}

		// What I've got here is a string of characters (duh).
		// Each character is going to be used as the first
		// character of the objects to delete. Note that only
		// Q and X are not in the string. Therefore, only objects
		// starting with Q and X should be left at the end.
		DBOBJVECTOR results;
		DbObjPtr pObj;
		char data[2] = { 0, 0 };

		// Find out how many objects there are starting with Q or X
		size_t leftovers = 0;
		data[0] = 'Q';
		for (int ctr = 0; ctr < 2; ctr++)
		{
			pObj = new DbObj(data);
			db.findAll(pObj, results);
			printf("There are %d items starting with %s\n", results.size(), data);
			leftovers += results.size();
			data[0] = 'X';
		}

		// Do the deletions
#if GOBIG
		//const char* deletions = "PJNAZBYCWDKEVFMUGTHSIRLO";
		//const char* deletions = "ABCDEFGHIJKLMNOPRSTUVWYZ";
		const char* deletions = "ZYWVUTSRPONMLKJIHGFEDCBA";
#else
		const char* deletions = "ABCDEZ";
#endif
		DBOBJVECTOR::iterator dovit;
		const char* thisDel = deletions;
		size_t totalDeletions = 0;
		while (*thisDel)
		{
			data[0] = *thisDel;
			pObj = new DbObj(data);

			// Find all the objects that start with the
			// given letter.
			db.findAll(pObj, results);
			for (dovit = results.begin(); dovit != results.end(); dovit++)
			{
				// Delete the object at this point in the vector.
				db.del(*dovit);
#if !GOBIG
				printf("%.*s ---\n", dataLen, (*dovit)->getData());
				db.traverse(pRef, traverseCallback);
				printf("----------------------------\n");
#endif
			}
#if !GOBIG
			printf("%d objects deleted\n", results.size());
#endif
			totalDeletions += results.size();
			++thisDel;
		}
		db.flush();
		printf("Total objects deleted: %d\n", totalDeletions);

		// Check to see if there was anything not deleted that should have been deleted.
		if (iterLimit - leftovers != totalDeletions)
		{
			printf("There are left overs that should have been deleted ... indicates a BUG :(\n");
			for (thisDel = deletions; *thisDel; ++thisDel)
			{
				data[0] = *thisDel;
				pObj = new DbObj(data);
				db.findAll(pObj, results);
				for (dovit = results.begin(); dovit != results.end(); dovit++)
				{
					printf("%.*s\n", dataLen, (*dovit)->getData());
				}
			}
		}
#if defined(WIN32)
		GetSystemTime(&st);
		printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	}
	printf("ending delete---------------------------\n");
#endif

	// Traversal is pretty simple. You need to provide
	// a callback function that will be called for each
	// item in the database.
#if DOTRAVERSE
	printf("starting traverse-----------------------\n");
#if defined(WIN32)
	GetSystemTime(&st);
	printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	{
		DbObjPtr pRef = new DbObj;
		BTreeDB db(szFileName);
		if (!db.open())
		{
			printf("failed to open the database (5)!\n");
			return -1;
		}

		db.traverse(pRef, traverseCallback);
#if defined(WIN32)
		GetSystemTime(&st);
		printf("%.2d:%.2d:%.2d.%.3d\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
#endif
	}
	printf("traverse completed----------------------\n");
#endif

	return 0;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

_oti

Australia Australia
No Biography provided

| Advertise | Privacy | Mobile
Web04 | 2.8.140827.1 | Last Updated 29 Jun 2006
Article Copyright 2004 by _oti
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid