Click here to Skip to main content
Click here to Skip to main content
Articles » Database » NoSQL » General » Downloads
 
Add your own
alternative version

RaptorDB - the Key Value Store

, 22 Jan 2012 CPOL
Smallest, fastest embedded nosql persisted dictionary using b+tree or MurMur hash indexing. (Now with Hybrid WAH bitmap indexes)
RaptorDB_v1.0.zip
RaptorDB
testing
RaptorDB_v1.1.zip
Indexes
Storage
Views
RaptorDB_v1.2.zip
RaptorDB_v1.3.zip
RaptorDB_v1.4.1.zip
Properties
RaptorDB_v1.4.zip
RaptorDB_v1.5.1.zip
RaptorDB_v1.5.2.zip
RaptorDB_v1.5.zip
RaptorDB_v1.6.zip
RaptorDB_v1.7.5.zip
UnitTests
Properties
UnitTests.nunit
RaptorDB_v1.7.zip
_svn
all-wcprops
dir-prop-base
entries
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
UnitTests.nunit
RaptorDB_v1.8.zip
DataTypes
UnitTests release.nunit
UnitTests.nunit
UnitTests.nunit
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;

namespace RaptorDB
{
    internal class IndexFile
    {
        FileStream _file = null;
        private byte[] _FileHeader = new byte[] {
            (byte)'M', (byte)'G', (byte)'I',
            0,               // 3 = [keysize]   max 255
            0,0,             // 4 = [node size] max 65536
            0,0,0,0,         // 6 = [root page num]
            0,               // 10 = Index file type : 0=BTREE 1=HASH
            0,0,0,0          // 11 = bucket count 
            };

        private byte[] _BlockHeader = new byte[] { 
            (byte)'P',(byte)'A',(byte)'G',(byte)'E',
            0,               // 4 = [Flag] = 0=free 1=leaf 2=root 4=revisionpage --8=bucket 16=revisionbucket
            0,0,             // 5 = [item count] 
            0,0,0,0,         // 7 = [parent page number] / [bucket number]
            0,0,0,0          // 11 = [right page number] / [next page number]
        };

        internal byte _maxKeySize;
        internal short _PageNodeCount = 50;
        private int _LastPageNumber = 0;
        private int _PageLength;
        private int _CurrentRootPage = 0;
        private int _rowSize;

        internal int _DuplicatesPerPage = 1;
        internal int _BucketCount = 11;
        internal INDEXTYPE _indexType = INDEXTYPE.BTREE;

        public IndexFile(string filename, byte maxKeySize, short pageNodeCount, int bucketcount, INDEXTYPE type)
        {
            _maxKeySize = maxKeySize;
            _PageNodeCount = pageNodeCount;
            _rowSize = (_maxKeySize + 1 + 4 + 4);
            _BucketCount = bucketcount;
            _indexType = type;

            string path = Path.GetDirectoryName(filename);
            Directory.CreateDirectory(path);
            if (File.Exists(filename))
            {
                // if file exists open and read header
                _file = File.Open(filename, FileMode.Open, FileAccess.ReadWrite);
                ReadFileHeader();
                // compute last page number from file length 
                _PageLength = (_BlockHeader.Length + _rowSize * (_PageNodeCount));
                _LastPageNumber = (int)((_file.Length - _FileHeader.Length - bucketcount * 4) / _PageLength);
            }
            else
            {
                // else create new file
                _file = File.Open(filename, FileMode.Create, FileAccess.ReadWrite);

                _PageLength = (_BlockHeader.Length + _rowSize * (_PageNodeCount));

                CreateFileHeader();
                _LastPageNumber = (int)((_file.Length - _FileHeader.Length - bucketcount * 4) / _PageLength);
            }
            _DuplicatesPerPage = (_PageLength - _BlockHeader.Length - 4) / 4;
            // FIX : if index chk exists rollback changes to index
        }

        #region [  C o m m o n  ]
        public long CountNodes(Node root)
        {
            long c = 0;
            Node n = root;
            if (n == null)
                return 0;
            while (n.isLeafPage == false)
                n = LoadNodeFromPageNumber(n.ChildPointers[0].RecordNum);
            // n = first node
            int right = n.RightPageNumber;
            c += n.Count;
            while (right != -1)
            {
                right = NodeHeaderCount(right, ref c);
                // TODO : count duplicates also?
            }
            return c;
        }

        private int NodeHeaderCount(int nextpage, ref long c)
        {
            SeekPage(nextpage);
            byte[] b = new byte[_BlockHeader.Length];
            _file.Read(b, 0, _BlockHeader.Length);

            if (b[0] == _BlockHeader[0] && b[1] == _BlockHeader[1] && b[2] == _BlockHeader[2] && b[3] == _BlockHeader[3])
            {
                short count = Helper.ToInt16(b, 5);
                int rightpage = Helper.ToInt32(b, 11);
                c += count;
                return rightpage;
            }
            return 0;
        }

        public long CountBuckets()
        {
            long c = 0;
            foreach (int l in GetBucketList())
            {
                int next = -1;
                if (l != -1)
                {
                    next = BucketHeaderCount(l, ref c);
                    while (next != -1)
                        next = BucketHeaderCount(next, ref c);
                }
            }

            return c;
        }

        private int BucketHeaderCount(int nextbucket, ref long c)
        {
            SeekPage(nextbucket);
            byte[] b = new byte[_BlockHeader.Length];
            _file.Read(b, 0, _BlockHeader.Length);

            if (b[0] == _BlockHeader[0] && b[1] == _BlockHeader[1] && b[2] == _BlockHeader[2] && b[3] == _BlockHeader[3])
            {
                short count = Helper.ToInt16(b, 5);
                int nextpage = Helper.ToInt32(b, 11);
                c += count;
                return nextpage;
            }
            return 0;
        }

        private void CreateFileHeader()
        {
            // max key size
            byte[] b = Helper.GetBytes(_maxKeySize, false);
            Buffer.BlockCopy(b, 0, _FileHeader, 3, 1);
            // page node count
            b = Helper.GetBytes(_PageNodeCount, false);
            Buffer.BlockCopy(b, 0, _FileHeader, 4, 2);
            // bucket count
            b = Helper.GetBytes(_BucketCount, false);
            Buffer.BlockCopy(b, 0, _FileHeader, 11, 4);
            if (_indexType == INDEXTYPE.HASH)
                _FileHeader[10] = 1;

            _file.Seek(0L, SeekOrigin.Begin);
            _file.Write(_FileHeader, 0, _FileHeader.Length);
            //file.Flush();

            // write bucket lookup
            b = new byte[_BucketCount * 4];
            for (int i = 0; i < b.Length; i++)
                b[i] = 255;
            _file.Write(b, 0, b.Length);
            _file.Flush();

            if (_indexType == INDEXTYPE.BTREE)
            {
                Node n = new Node(3, -1, new List<KeyPointer>(), null, 0, -1);
                SaveNode(n);
            }
        }

        private void WriteHeader(long rootpage)
        {
            byte[] b = Helper.GetBytes(rootpage, false);
            Buffer.BlockCopy(b, 0, _FileHeader, 6, b.Length);
            _file.Seek(0L, SeekOrigin.Begin);
            _file.Write(_FileHeader, 0, _FileHeader.Length);
            _file.Flush();
        }

        private bool ReadFileHeader()
        {
            _file.Seek(0L, SeekOrigin.Begin);
            byte[] b = new byte[_FileHeader.Length];
            _file.Read(b, 0, _FileHeader.Length);

            if (b[0] == _FileHeader[0] && b[1] == _FileHeader[1] && b[2] == _FileHeader[2])
            {
                byte maxks = b[3];
                short nodes = Helper.ToInt16(b, 4);
                int root = Helper.ToInt32(b, 6);
                _maxKeySize = maxks;
                _PageNodeCount = nodes;
                _CurrentRootPage = root;
                _FileHeader = b;
                if (b[10] == (int)_indexType)
                    return true;
                else
                    throw new Exception("Index type does not match file index type");
            }

            return false;
        }

        public int GetNewPageNumber()
        {
            return _LastPageNumber++;
        }

        private void SeekPage(int pnum)
        {
            long offset = _FileHeader.Length + _BucketCount * 4;
            offset += pnum * _PageLength;
            if (offset > _file.Length)
                throw new Exception("file seek out of bounds =" + offset + " file len = " + _file.Length);
            _file.Seek(offset, SeekOrigin.Begin);
        }

        public void Shutdown()
        {
            _file.Flush();
            _file.Close();
        }
        #endregion

        #region [  N O D E S  ]
        public Node GetRoot()
        {
            return LoadNodeFromPageNumber(_CurrentRootPage);
        }

        public bool Commit(IDictionary<int, Node> nodes)
        {
            foreach (Node n in nodes.Values)
            {
                if (n.isDirty)
                {
                    SaveNode(n);
                    if (n.isRootPage)
                    {
                        WriteHeader(n.DiskPageNumber);
                        _CurrentRootPage = n.DiskPageNumber;
                    }
                }
            }

            return true;
        }

        private void SaveNode(Node node)
        {
            int pnum = node.DiskPageNumber;
            if (pnum > _LastPageNumber)
                throw new Exception("should not be here: page out of bounds");

            SeekPage(pnum);
            byte[] page = new byte[_PageLength];
            Buffer.BlockCopy(_BlockHeader, 0, page, 0, _BlockHeader.Length);
            // node type
            if (node.isLeafPage)
                page[4] = 1;
            if (node.isRootPage)
                page[4] += 2;
            if (node.isDuplicatePage)
                page[4] += 4;

            // node count
            byte[] b = Helper.GetBytes(node.Count, false);
            Buffer.BlockCopy(b, 0, page, 5, b.Length);
            // node parent
            b = Helper.GetBytes(node.ParentPageNumber, false);
            Buffer.BlockCopy(b, 0, page, 7, b.Length);
            // right page number
            b = Helper.GetBytes(node.RightPageNumber, false);
            Buffer.BlockCopy(b, 0, page, 11, b.Length);

            int index = _BlockHeader.Length;
            if (node.isDuplicatePage == true)
            {
                b = Helper.GetBytes(node.Duplicates.Count, false);
                Buffer.BlockCopy(b, 0, page, index, b.Length);
                index += b.Length;
                foreach (int p in node.Duplicates)
                {
                    b = Helper.GetBytes(p, false);
                    Buffer.BlockCopy(b, 0, page, index, b.Length);
                    index += b.Length;
                }
            }
            else
            {
                // node children
                for (int i = 0; i < node.Count; i++)
                {
                    KeyPointer kp = node.ChildPointers[i];
                    int idx = index + _rowSize * i;
                    byte size = (byte)kp.Key.val.Length;
                    if (size > _maxKeySize)
                        size = _maxKeySize;
                    // key size = 1 byte
                    page[idx] = size;
                    // key bytes
                    Buffer.BlockCopy(node.ChildPointers[i].Key.val, 0, page, idx + 1, page[idx]);
                    // offset = 4 bytes
                    b = Helper.GetBytes(kp.RecordNum, false);
                    Buffer.BlockCopy(b, 0, page, idx + 1 + _maxKeySize, b.Length);
                    // duplicatepage = 4 bytes
                    b = Helper.GetBytes(kp.DuplicatesPage, false);
                    Buffer.BlockCopy(b, 0, page, idx + 1 + _maxKeySize + 4, b.Length);
                }
            }
            _file.Write(page, 0, page.Length);
            _file.Flush();
        }

        public Node LoadNodeFromPageNumber(int number)
        {
            SeekPage(number);
            byte[] b = new byte[_PageLength];
            _file.Read(b, 0, _PageLength);

            if (b[0] == _BlockHeader[0] && b[1] == _BlockHeader[1] && b[2] == _BlockHeader[2] && b[3] == _BlockHeader[3])
            {
                // create node here
                int parentpage = Helper.ToInt32(b, 7);
                List<KeyPointer> list = new List<KeyPointer>();
                List<int> dups = new List<int>();
                short count = Helper.ToInt16(b, 5);
                if (count > _PageNodeCount)
                    throw new Exception("Count > node size");
                int rightpage = Helper.ToInt32(b, 11);
                int index = _BlockHeader.Length;
                if ((b[4] & 4) == 4)
                {
                    int dupc = Helper.ToInt32(b, index);
                    index += 4;
                    for (int i = 0; i < dupc; i++)
                    {
                        int l = Helper.ToInt32(b, index);
                        dups.Add(l);
                        index += 4;
                    }
                }
                else
                {
                    for (int i = 0; i < count; i++)
                    {
                        int idx = index + _rowSize * i;
                        byte ks = b[idx];
                        byte[] key = new byte[ks];
                        Buffer.BlockCopy(b, idx + 1, key, 0, ks);
                        int offset = Helper.ToInt32(b, idx + 1 + _maxKeySize);
                        int duppage = Helper.ToInt32(b, idx + 1 + _maxKeySize + 4);
                        KeyPointer kp = new KeyPointer(new bytearr(key), offset, duppage);
                        list.Add(kp);
                    }
                }
                Node n = new Node(b[4], parentpage, list, dups, number, rightpage);
                return n;
            }
            else
                throw new Exception("Page read error");
        }
        #endregion

        #region [  B U C K E T S  ]
        public List<int> GetBucketList()
        {
            _file.Seek(_FileHeader.Length, SeekOrigin.Begin);
            byte[] b = new byte[_BucketCount * 4];
            _file.Read(b, 0, b.Length);
            List<int> list = new List<int>();
            for (int i = 0; i < _BucketCount; i++)
            {
                int l = Helper.ToInt32(b, i * 4);
                list.Add(l);
            }

            return list;
        }

        public void SaveBucketList(List<int> list)
        {
            byte[] b = new byte[list.Count * 4];
            int idx = 0;
            foreach (int l in list)
            {
                byte[] bb = Helper.GetBytes(l, false);
                Buffer.BlockCopy(bb, 0, b, idx, bb.Length);
                idx += bb.Length;
            }
            _file.Seek(_FileHeader.Length, SeekOrigin.Begin);
            _file.Write(b, 0, b.Length);
            _file.Flush();
        }

        public void SaveBucket(Bucket bucket)
        {
            int pnum = bucket.DiskPageNumber;
            if (pnum > _LastPageNumber)
                throw new Exception("should not be here: page out of bounds");

            SeekPage(pnum);
            byte[] page = new byte[_PageLength];
            Buffer.BlockCopy(_BlockHeader, 0, page, 0, _BlockHeader.Length);
            // bucket type
            if (bucket.isBucket)
                page[4] = 8;
            if (bucket.isOverflow)
                page[4] += 16;


            // bucket count
            byte[] b = Helper.GetBytes(bucket.Count, false);
            Buffer.BlockCopy(b, 0, page, 5, b.Length);
            // bucket number
            b = Helper.GetBytes(bucket.BucketNumber, false);
            Buffer.BlockCopy(b, 0, page, 7, b.Length);
            // next page number
            b = Helper.GetBytes(bucket.NextPageNumber, false);
            Buffer.BlockCopy(b, 0, page, 11, b.Length);

            int index = _BlockHeader.Length;
            if (bucket.isOverflow == true)
            {
                b = Helper.GetBytes(bucket.Duplicates.Count, false);
                Buffer.BlockCopy(b, 0, page, index, b.Length);
                index += b.Length;
                foreach (long p in bucket.Duplicates)
                {
                    b = Helper.GetBytes(p, false);
                    Buffer.BlockCopy(b, 0, page, index, b.Length);
                    index += b.Length;
                }
            }
            else
            {
                // bucket children
                for (int i = 0; i < bucket.Count; i++)
                {
                    KeyPointer kp = bucket.Pointers[i];
                    int idx = index + _rowSize * i;
                    byte size = (byte)kp.Key.val.Length;
                    if (size > _maxKeySize)
                        size = _maxKeySize;
                    // key size = 1 byte
                    page[idx] = size;
                    // key bytes
                    Buffer.BlockCopy(bucket.Pointers[i].Key.val, 0, page, idx + 1, page[idx]);
                    // offset = 4 bytes
                    b = Helper.GetBytes(kp.RecordNum, false);
                    Buffer.BlockCopy(b, 0, page, idx + 1 + _maxKeySize, b.Length);
                    // duplicatepage = 4 bytes
                    b = Helper.GetBytes(kp.DuplicatesPage, false);
                    Buffer.BlockCopy(b, 0, page, idx + 1 + _maxKeySize + 4, b.Length);
                }
            }
            bucket.isOverflow = false;
            _file.Write(page, 0, page.Length);
            _file.Flush();
        }

        public Bucket LoadBucketFromPage(int page)
        {
            SeekPage(page);
            byte[] b = new byte[_PageLength];
            _file.Read(b, 0, _PageLength);

            if (b[0] == _BlockHeader[0] && b[1] == _BlockHeader[1] && b[2] == _BlockHeader[2] && b[3] == _BlockHeader[3])
            {
                // create node here
                int bucketnum = Helper.ToInt32(b, 7);
                List<KeyPointer> list = new List<KeyPointer>();
                List<int> dups = new List<int>();
                short count = Helper.ToInt16(b, 5);
                if (count > _PageNodeCount)
                    throw new Exception("Count > node size");
                int nextpage = Helper.ToInt32(b, 11);
                int index = _BlockHeader.Length;
                if ((b[4] & 4) == 4)
                {
                    int dupc = Helper.ToInt32(b, index);
                    index += 4;
                    for (int i = 0; i < dupc; i++)
                    {
                        int l = Helper.ToInt32(b, index);
                        dups.Add(l);
                        index += 4;
                    }
                }
                else
                {
                    for (int i = 0; i < count; i++)
                    {
                        int idx = index + _rowSize * i;
                        byte ks = b[idx];
                        byte[] key = new byte[ks];
                        Buffer.BlockCopy(b, idx + 1, key, 0, ks);
                        int offset = Helper.ToInt32(b, idx + 1 + _maxKeySize);
                        int duppage = Helper.ToInt32(b, idx + 1 + _maxKeySize + 4);

                        KeyPointer kp = new KeyPointer(new bytearr(key), offset, duppage);
                        list.Add(kp);
                    }
                }
                Bucket n = new Bucket(b[4], bucketnum, list, dups, page, nextpage);
                return n;
            }
            else
                throw new Exception("Page read error");
        }

        public bool Commit(IDictionary<int, Bucket> buckets)
        {
            foreach (Bucket n in buckets.Values)
            {
                if (n.isDirty)
                    SaveBucket(n);
            }
            return true;
        }
        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Mehdi Gholam
Architect
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.
 
* Mehdi is the 5th person to get 6 out of 7 Platinums on CodeProject (13th Jan'12)

| Advertise | Privacy | Terms of Use | Mobile
Web01 | 2.8.141216.1 | Last Updated 23 Jan 2012
Article Copyright 2011 by Mehdi Gholam
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid