RaptorDB - the Key Value Store

Mehdi Gholam

Rate me:

4.89/5 (118 votes)

22 Jan 2012CPOL22 min read

933.6K

9.9K

265

Smallest, fastest embedded nosql persisted dictionary using b+tree or MurMur hash indexing. (Now with Hybrid WAH bitmap indexes)

RaptorDB_v1.8.zip
- history.txt
- RaptorDB
  - AssemblyInfo.cs
  - DataTypes
    - DataTypes.cs
  - Global.cs
  - IIndex.cs
  - Indexes
  - mylogger.cs
  - Properties
    - Resources.Designer.cs
    - Resources.resx
  - RaptorDB.cs
  - RaptorDB.csproj
  - RaptorDBString.cs
  - SafeDictionary.cs
  - Storage
    - LogFile.cs
    - StorageFile.cs
- RaptorDBTest.sln
- testing
- UnitTests release.nunit
- UnitTests.nunit
- UnitTests
  - Properties
    - AssemblyInfo.cs
  - Tests.cs
  - UnitTests.csproj
  - UnitTests.nunit
RaptorDB_v1.0.zip
RaptorDB_v1.7.zip
- history.txt
- AssemblyInfo.cs
- Global.cs
- IIndex.cs
- BitmapCache.cs
- BitmapIndex.cs
- BTree.cs
- IndexFile.cs
- MurMurHash2.cs
- Node.cs
- WAHBitarray2.cs
- Resources.Designer.cs
- Resources.resx
- RaptorDB.cs
- RaptorDB.csproj
- SafeDictionary.cs
- LogFile.cs
- StorageFile.cs
- RaptorDBTest.sln
- AssemblyInfo.cs
- bplusTest.cs
- tests.csproj
- _svn
  - all-wcprops
  - dir-prop-base
  - entries
  - prop-base
    - props
      - text-base
        
        AssemblyInfo.cs.svn-base
      - tmp
        
        prop-base
        
        props
        
        text-base
    - AssemblyInfo.cs
  - raptordb.VisualState.xml
  - TestResult.xml
  - Tests.cs
  - UnitTests.csproj
  - UnitTests.nunit
  - UnitTests.VisualState.xml
RaptorDB_v1.7.5.zip
- history.txt
- AssemblyInfo.cs
- Global.cs
- IIndex.cs
- BitmapCache.cs
- BitmapIndex.cs
- BTree.cs
- IndexFile.cs
- MurMurHash2.cs
- Node.cs
- WAHBitarray2.cs
- Resources.Designer.cs
- Resources.resx
- RaptorDB.cs
- RaptorDB.csproj
- SafeDictionary.cs
- LogFile.cs
- StorageFile.cs
- RaptorDBTest.sln
- app.config
- AssemblyInfo.cs
- bplusTest.cs
- tests.csproj
- AssemblyInfo.cs
- raptordb.VisualState.xml
- TestResult.xml
- Tests.cs
- UnitTests.csproj
- UnitTests.nunit
- UnitTests.VisualState.xml
RaptorDB_v1.5.zip
RaptorDB_v1.5.1.zip
RaptorDB_v1.5.2.zip
RaptorDB_v1.6.zip
RaptorDB_v1.4.1.zip
RaptorDB_v1.1.zip
- history.txt
- AssemblyInfo.cs
- Global.cs
- IIndex.cs
- BTree.cs
- IndexFile.cs
- MurMurHash2.cs
- Node.cs
- RaptorDB.cs
- RaptorDB.csproj
- SafeDictionary.cs
- LogFile.cs
- StorageFile.cs
- Views
- RaptorDBTest.sln
- app.config
- AssemblyInfo.cs
- bplusTest.cs
- tests.csproj
RaptorDB_v1.2.zip
RaptorDB_v1.3.zip
RaptorDB_v1.4.zip

using System;
using System.Collections.Generic;
using System.Text;

namespace RaptorDB
{
    public class Hash : IIndex
    {
        private int _BucketCount = 10007;
        private int _BucketItems = 200;
        private int _MaxDuplicates = 10;
        private List<long> _bucketPointers = new List<long>();
        MurmurHash2Unsafe _hash = new MurmurHash2Unsafe();
        private SortedList<long, Bucket> _CachedBuckets = new SortedList<long, Bucket>(Global.MaxItemsBeforeIndexing);
        private IndexFile _indexfile;
        private bool _InMemory = false;

        public Hash(string indexfilename, byte maxkeysize, short nodeSize, bool allowDuplicates, int bucketcount)
        {
            _BucketCount = bucketcount;
            _BucketItems = nodeSize;
            // fill buckets with blanks
            for (int i = 0; i < _BucketCount; i++)
                _bucketPointers.Add(-1);

            _indexfile = new IndexFile(indexfilename, maxkeysize, nodeSize, bucketcount, INDEXTYPE.HASH);

            _BucketItems = _indexfile._PageNodeCount;

            // load data from index file
            _bucketPointers = _indexfile.GetBucketList();

            _MaxDuplicates = _indexfile._DuplicatesPerPage;
        }

        #region [   I I N D E X   ]

        public bool InMemrory
        {
            get
            {
                return _InMemory;
            }
            set
            {
                _InMemory = value;
            }
        }

        public bool Get(byte[] key, out long offset)
        {
            offset = -1;
            Bucket b = FindBucket(key);
            return SearchBucket(b, key, ref offset);
        }

        public void Set(byte[] key, long offset)
        {
            Bucket b = FindBucket(key);
            b = SetBucket(key, offset, b);
        }

        public void Commit()
        {
            if (_InMemory == false)
                SaveIndex();
        }

        public void Shutdown()
        {
            Commit();
        }

        public List<long> Enumerate(byte[] fromkey, int start, int count)
        {
            throw new NotImplementedException();
        }

        public long Count()
        {
            return _indexfile.CountBuckets();
        }

        public List<long> GetDuplicates(byte[] key)
        {
            Bucket b = FindBucket(key);
                        bool found = false;
            int pos = FindPointerOrLower(b, new bytearr(key), out found);

            if (found)
            {
                long dp = b.Pointers[pos].DuplicatesPage;
                if (dp != -1)
                {
                    b = LoadBucket(dp);
                    return b.Duplicates;
                }
            }
            return new List<long>();
        }

        public void SaveIndex()
        {
            if (_indexfile.Commit(_CachedBuckets))
            {
                _CachedBuckets = new SortedList<long, Bucket>(Global.MaxItemsBeforeIndexing);
            }
            _indexfile.SaveBucketList(_bucketPointers);
        }
        #endregion

        #region [   P R I V A T E   M E T H O D S   ]

        private Bucket SetBucket(byte[] key, long offset, Bucket b)
        {
            bool found = false;
            int pos = FindPointerOrLower(b, new bytearr(key), out found);

            if (found)
            {
                KeyPointer p = b.Pointers[pos];
                long v = p.Offset;

                // duplicate found     
                if (v != offset)
                {
                    p.Offset = offset;
                    AddToDuplicates(p.DuplicatesPage, v);
                    DirtyBucket(b);
                }
            }
            else
            {
                if (b.Pointers.Count < _BucketItems)
                {
                    KeyPointer k = new KeyPointer(new bytearr(key), offset);
                    pos++;
                    if (pos < b.Pointers.Count)
                        b.Pointers.Insert(pos, k);
                    else
                        b.Pointers.Add(k);
                    DirtyBucket(b);
                }
                else
                {
                    long p = b.NextPageNumber;
                    if (p != -1)
                    {
                        b = LoadBucket(p);
                        SetBucket(key, offset, b);
                    }
                    else
                    {
                        Bucket newb = new Bucket(_indexfile.GetNewPageNumber());
                        b.NextPageNumber = newb.DiskPageNumber;
                        DirtyBucket(b);
                        SetBucket(key, offset, newb);
                    }
                }
            }
            return b;
        }

        private bool SearchBucket(Bucket b, byte[] key, ref long offset)
        {
            bool found = false;
            int pos = FindPointerOrLower(b, new bytearr(key), out found);
            if (found)
            {
                KeyPointer k = b.Pointers[pos];
                offset = k.Offset;
                return true;
            }
            else
            {
                if (b.NextPageNumber != -1)
                {
                    b = LoadBucket(b.NextPageNumber);
                    return SearchBucket(b, key, ref offset);
                }
            }
            return false;
        }

        private void DirtyBucket(Bucket b)
        {
            if (b.isDirty)
                return;

            b.isDirty = true;
            if (_CachedBuckets.ContainsKey(b.DiskPageNumber) == false)
                _CachedBuckets.Add(b.DiskPageNumber, b);
        }

        private void AddToDuplicates(long duplicatepage, long offset)
        {
            // load duplicate page
            Bucket b = LoadBucket(duplicatepage);
            if (b.Duplicates.Count < _MaxDuplicates)
            {
                b.Duplicates.Add(offset);
                DirtyBucket(b);
            }
            else
            {
                if (b.NextPageNumber != -1)
                    AddToDuplicates(b.NextPageNumber, offset);
                else
                {
                    // new duplicate page
                    Bucket newb = new Bucket(_indexfile.GetNewPageNumber());
                    b.NextPageNumber = newb.DiskPageNumber;
                    DirtyBucket(b);
                    b.Duplicates.Add(offset);
                    DirtyBucket(newb);
                }
            }
        }

        private int FindPointerOrLower(Bucket b, bytearr key, out bool found)
        {
            found = false;
            if (b.Pointers.Count == 0)
                return 0;
            // binary search
            int lastlower = -1;
            int first = 0;
            int last = b.Pointers.Count - 1;
            int mid = 0;
            while (first <= last)
            {
                mid = (first + last) >> 1;
                KeyPointer k = b.Pointers[mid];
                int compare = Helper.Compare(k.Key, key);
                if (compare < 0)
                {
                    lastlower = mid;
                    first = mid + 1;
                }
                if (compare == 0)
                {
                    found = true;
                    return mid;
                }
                if (compare > 0)
                {
                    last = mid - 1;
                }
            }

            return lastlower;
        }

        private Bucket FindBucket(byte[] key)
        {
            Bucket b;
            uint h = _hash.Hash(key);

            int bucketNumber = (int)(h % _BucketCount);

            long pointer = _bucketPointers[bucketNumber];
            if (pointer == -1)
            {
                // new bucket
                b = CreateBucket(bucketNumber);
                _bucketPointers[bucketNumber] = b.DiskPageNumber;
                _CachedBuckets.Add(b.DiskPageNumber, b);
            }
            else
                b = LoadBucket(pointer);

            return b;
        }

        private Bucket LoadBucket(long pagenumber)
        {
            Bucket b;
            // try cache first
            if (_CachedBuckets.TryGetValue(pagenumber, out b))
                return b;
            // else load from disk //and put in cache
            b = _indexfile.LoadBucketFromPage(pagenumber);

            return b;
        }

        private Bucket CreateBucket(int bucketNumber)
        {
            Bucket b = new Bucket(_indexfile.GetNewPageNumber());
            b.BucketNumber = bucketNumber;
            // get next free indexfile pointer offset

            return b;
        }
        #endregion
    }

    public class MurmurHash2Unsafe
    {
        public UInt32 Hash(Byte[] data)
        {
            return Hash(data, 0xc58f1a7b);
        }
        const UInt32 m = 0x5bd1e995;
        const Int32 r = 24;

        public unsafe UInt32 Hash(Byte[] data, UInt32 seed)
        {
            Int32 length = data.Length;
            if (length == 0)
                return 0;
            UInt32 h = seed ^ (UInt32)length;
            Int32 remainingBytes = length & 3; // mod 4
            Int32 numberOfLoops = length >> 2; // div 4
            fixed (byte* firstByte = &(data[0]))
            {
                UInt32* realData = (UInt32*)firstByte;
                while (numberOfLoops != 0)
                {
                    UInt32 k = *realData;
                    k *= m;
                    k ^= k >> r;
                    k *= m;

                    h *= m;
                    h ^= k;
                    numberOfLoops--;
                    realData++;
                }
                switch (remainingBytes)
                {
                    case 3:
                        h ^= (UInt16)(*realData);
                        h ^= ((UInt32)(*(((Byte*)(realData)) + 2))) << 16;
                        h *= m;
                        break;
                    case 2:
                        h ^= (UInt16)(*realData);
                        h *= m;
                        break;
                    case 1:
                        h ^= *((Byte*)realData);
                        h *= m;
                        break;
                    default:
                        break;
                }
            }

            // Do a few final mixes of the hash to ensure the last few
            // bytes are well-incorporated.

            h ^= h >> 13;
            h *= m;
            h ^= h >> 15;

            return h;
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Written By

Mehdi Gholam

Architect -

United Kingdom

Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.

* Mehdi is the 5th person to get 6 out of 7 Platinum's on Code-Project (13th Jan'12)
* Mehdi is the 3rd person to get 7 out of 7 Platinum's on Code-Project (26th Aug'16)

RaptorDB - the Key Value Store

License

Comments and Discussions