Click here to Skip to main content
15,860,943 members
Articles / Web Development / HTML

RaptorDB - The Document Store

Rate me:
Please Sign up or sign in to vote.
4.96/5 (278 votes)
24 Jul 2019CPOL86 min read 2.3M   16.3K   653  
NoSql, JSON based, Document store database with compiled .net map functions and automatic hybrid bitmap indexing and LINQ query filters (now with standalone Server mode, Backup and Active Restore, Transactions, Server side queries, MonoDroid support, HQ-Branch Replication, working in Linux, .net
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.IO;
using System.Threading;
using System.Text.RegularExpressions;
using RaptorDB.Common;

namespace RaptorDB
{
    internal class Hoot
    {
        private string _bmpext = ".mgbmp";

        /// <summary>
        /// File based constructor
        /// </summary>
        /// <param name="IndexPath"></param>
        /// <param name="FileName"></param>
        public Hoot(string IndexPath, string FileName)
        {
            _Path = IndexPath;
            _FileName = FileName;
            if (_Path.EndsWith("\\") == false) _Path += "\\";
            Directory.CreateDirectory(IndexPath);
            //_log.Debug("\r\n\r\n");
            _log.Debug("Starting hOOt....");
            _log.Debug("Storage Folder = " + _Path);

            // read words
            LoadWords();
            // open bitmap index
            _bitmapFile = new FileStream(_Path + _FileName + _bmpext, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
            _lastBitmapOffset = _bitmapFile.Seek(0L, SeekOrigin.End);
        }

        private ILog _log = LogManager.GetLogger(typeof(Hoot));
        private string _FileName = "words";
        private string _Path = "";
        private SafeDictionary<string, Cache> _index = new SafeDictionary<string, Cache>();
        private object _lock = new object();
        private FileStream _bitmapFile;
        private long _lastBitmapOffset = 0;
        private bool _inMemory = false;

        public void FreeMemory(bool freecache)
        {
            lock (_lock)
            {
                _log.Debug("freeing memory");

                // free bitmap memory
                foreach (var v in _index)
                {
                    if (freecache)
                    {
                        long off = SaveBitmap(v.Value.GetCompressedBits());
                        v.Value.isDirty = false;
                        v.Value.FileOffset = off;
                        v.Value.FreeMemory(true);
                    }
                    else
                        v.Value.FreeMemory(false);
                }
            }
        }

        public void Save()
        {
            lock (_lock)
            {
                InternalSave();
            }
        }

        public void Index(int recordnumber, string text)
        {
            AddtoIndex(recordnumber, text);
        }

        public WAHBitArray Query(string filter)
        {
            return ExecutionPlan(filter);
        }

        public IEnumerable<int> FindRows(string filter)
        {
            // enumerate records
            return Query(filter).GetBitIndexes();
        }

        //public void OptimizeIndex()
        //{
        //    lock (_lock)
        //    {
        //        InternalSave();
        //        _log.Debug("optimizing index..");
        //        DateTime dt = FastDateTime.Now;
        //        _lastBitmapOffset = 0;
        //        _bitmapFile.Flush();
        //        _bitmapFile.Close();
        //        // compact bitmap index file to new file
        //        _bitmapFile = new FileStream(_Path + _FileName + _bmpext + "$", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
        //        MemoryStream ms = new MemoryStream();
        //        BinaryWriter bw = new BinaryWriter(ms, Encoding.UTF8);
        //        // save words and bitmaps
        //        using (FileStream words = new FileStream(_Path + _FileName + ".words", FileMode.Create))
        //        {
        //            foreach (KeyValuePair<string, Cache> kv in _index)
        //            {
        //                bw.Write(kv.Key);
        //                uint[] ar = LoadBitmap(kv.Value.FileOffset);
        //                long offset = SaveBitmap(ar);
        //                kv.Value.FileOffset = offset;
        //                bw.Write(kv.Value.FileOffset);
        //            }
        //            // save words
        //            byte[] b = ms.ToArray();
        //            words.Write(b, 0, b.Length);
        //            words.Flush();
        //            words.Close();
        //        }
        //        // rename files
        //        _bitmapFile.Flush();
        //        _bitmapFile.Close();
        //        File.Delete(_Path + _FileName + _bmpext);
        //        File.Move(_Path + _FileName + _bmpext + "$", _Path + _FileName + _bmpext);
        //        // reload everything
        //        _bitmapFile = new FileStream(_Path + _FileName + _bmpext, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
        //        _lastBitmapOffset = _bitmapFile.Seek(0L, SeekOrigin.End);
        //        _log.Debug("optimizing index done = " + DateTime.Now.Subtract(dt).TotalSeconds + " sec");
        //    }
        //}

        #region [  P R I V A T E   M E T H O D S  ]

        private WAHBitArray ExecutionPlan(string filter)
        {
            _log.Debug("query : " + filter);
            DateTime dt = FastDateTime.Now;
            // query indexes
            string[] words = filter.Split(' ');
            bool defaulttoand = true;
            if (filter.IndexOfAny(new char[] { '+', '-' }, 0) > 0)
                defaulttoand = false;

            WAHBitArray bits = null;

            foreach (string s in words)
            {
                Cache c;
                string word = s;
                if (s == "") continue;

                Cache.OPERATION op = Cache.OPERATION.OR;
                if (defaulttoand)
                    op = Cache.OPERATION.AND; 

                if (s.StartsWith("+"))
                {
                    op = Cache.OPERATION.AND;
                    word = s.Replace("+", "");
                }

                if (s.StartsWith("-"))
                {
                    op = Cache.OPERATION.ANDNOT;
                    word = s.Replace("-", "");
                }

                if (s.Contains("*") || s.Contains("?"))
                {
                    WAHBitArray wildbits = null;
                    // do wildcard search
                    Regex reg = new Regex(s.Replace("*", ".*").Replace("?", "."), RegexOptions.IgnoreCase);
                    foreach (var key in _index)
                    {
                        if (reg.IsMatch(key.Key))
                        {
                            c = _index[key.Key];
                            if (c.isLoaded == false)
                                LoadCache(c);

                            wildbits = DoBitOperation(wildbits, c, Cache.OPERATION.OR);
                        }
                    }
                    if (bits == null)
                        bits = wildbits;
                    else
                    {
                        if (op == Cache.OPERATION.AND)
                            bits = bits.And(wildbits);
                        else
                            bits = bits.Or(wildbits);
                    }
                }
                else if (_index.TryGetValue(word.ToLowerInvariant(), out c))
                {
                    // bits logic
                    if (c.isLoaded == false)
                        LoadCache(c);
                    bits = DoBitOperation(bits, c, op);
                }
            }
            if (bits == null)
                return new WAHBitArray();

            //// remove deleted docs
            //if (bits.Length > _deleted.Length)
            //    _deleted.Length = bits.Length;
            //else if (bits.Length < _deleted.Length)
            //    bits.Length = _deleted.Length;

            //WAHBitArray nd = _deleted.Not();

            WAHBitArray ret = bits;//.And(nd);
            _log.Debug("query time (ms) = " + FastDateTime.Now.Subtract(dt).TotalMilliseconds);
            return ret;
        }

        private static WAHBitArray DoBitOperation(WAHBitArray bits, Cache c, Cache.OPERATION op)
        {
            if (bits != null)
                bits = c.Op(bits, op);
            else
                bits = c.GetBitmap();
            return bits;
        }

        private void LoadCache(Cache c)
        {
            if (c.FileOffset != -1)
            {
                uint[] bits = LoadBitmap(c.FileOffset);
                c.SetCompressedBits(bits);
            }
            else
            {
                c.SetCompressedBits(new uint[] { 0 });
            }
        }

        private void InternalSave()
        {
            if (_inMemory == true)
                return;

            _log.Debug("saving index...");
            DateTime dt = FastDateTime.Now;

            MemoryStream ms = new MemoryStream();
            BinaryWriter bw = new BinaryWriter(ms, Encoding.UTF8);

            // save words and bitmaps
            using (FileStream words = new FileStream(_Path + _FileName + ".words", FileMode.Create))
            {
                foreach (KeyValuePair<string, Cache> kv in _index)
                {
                    bw.Write(kv.Key);
                    if (kv.Value.isDirty)
                    {
                        // write bit index
                        uint[] ar = kv.Value.GetCompressedBits();
                        if (ar != null)
                        {
                            // save bitmap data to disk
                            long off = SaveBitmap(ar);
                            // set the saved info in cache
                            kv.Value.FileOffset = off;
                            kv.Value.LastBitSaveLength = ar.Length;
                            // set the word bitmap offset
                            bw.Write(kv.Value.FileOffset);
                        }
                        else
                            bw.Write(kv.Value.FileOffset);
                    }
                    else
                        bw.Write(kv.Value.FileOffset);

                    kv.Value.isDirty = false;
                }
                byte[] b = ms.ToArray();
                words.Write(b, 0, b.Length);
                words.Flush();
                words.Close();
            }
            _log.Debug("save time (ms) = " + FastDateTime.Now.Subtract(dt).TotalMilliseconds);
        }

        private void LoadWords()
        {
            if (File.Exists(_Path + _FileName + ".words") == false)
                return;
            // load words
            byte[] b = File.ReadAllBytes(_Path + _FileName + ".words");
            if (b.Length > 0)
            {
                MemoryStream ms = new MemoryStream(b);
                BinaryReader br = new BinaryReader(ms, Encoding.UTF8);
                string s = br.ReadString();
                while (s != "")
                {
                    long off = br.ReadInt64();
                    Cache c = new Cache();
                    c.isLoaded = false;
                    c.isDirty = false;
                    c.FileOffset = off;
                    _index.Add(s, c);
                    try
                    {
                        s = br.ReadString();
                    }
                    catch { s = ""; }
                }
                _log.Debug("Word Count = " + _index.Count);
            }
        }

        //-----------------------------------------------------------------
        // BITMAP FILE FORMAT
        //    0  'B','M'
        //    2  uint count = 4 bytes
        //    6  '0'
        //    7  uint data
        //-----------------------------------------------------------------
        private long SaveBitmap(uint[] bits)
        {
            long off = _lastBitmapOffset;

            byte[] b = new byte[bits.Length * 4 + 7];
            // write header data
            b[0] = ((byte)'B');
            b[1] = ((byte)'M');
            Buffer.BlockCopy(Helper.GetBytes(bits.Length, false), 0, b, 2, 4);
            b[6] = (0);

            for (int i = 0; i < bits.Length; i++)
            {
                byte[] u = Helper.GetBytes((int)bits[i], false);
                Buffer.BlockCopy(u, 0, b, i * 4 + 7, 4);
            }
            _bitmapFile.Write(b, 0, b.Length);
            _lastBitmapOffset += b.Length;
            _bitmapFile.Flush();
            return off;
        }

        private uint[] LoadBitmap(long offset)
        {
            if (_inMemory == true)
                return null;

            if (offset == -1)
                return null;

            List<uint> ar = new List<uint>();

            using (FileStream bmp = new FileStream(_Path + _FileName + _bmpext, FileMode.Open,
                                                   FileAccess.ReadWrite, FileShare.ReadWrite))
            {
                bmp.Seek(offset, SeekOrigin.Begin);

                byte[] b = new byte[7];
                bmp.Read(b, 0, 7);
                if (b[0] == (byte)'B' && b[1] == (byte)'M' && b[6] == 0)
                {
                    int c = Helper.ToInt32(b, 2);
                    for (int i = 0; i < c; i++)
                    {
                        bmp.Read(b, 0, 4);
                        ar.Add((uint)Helper.ToInt32(b, 0));
                    }
                }

                bmp.Flush();
                bmp.Close();
            }
            return ar.ToArray();
        }

        private void AddtoIndex(int recnum, string text)
        {
            if (text == null)
                return;

            foreach (string key in text.Split(' '))
            {
                if (key == "")
                    continue;
                Cache cache;
                if (_index.TryGetValue(key.ToLower(), out cache))
                {
                    cache.SetBit(recnum, true);
                }
                else
                {
                    cache = new Cache();
                    cache.isLoaded = true;
                    cache.SetBit(recnum, true);
                    _index.Add(key.ToLower(), cache);
                }
            }
        }

        #endregion

        public void Shutdown()
        {
            if (_inMemory == true)
                return;
            Save();
            _bitmapFile.Flush();
            _bitmapFile.Close();
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect -
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.

* Mehdi is the 5th person to get 6 out of 7 Platinum's on Code-Project (13th Jan'12)
* Mehdi is the 3rd person to get 7 out of 7 Platinum's on Code-Project (26th Aug'16)

Comments and Discussions