Click here to Skip to main content
Click here to Skip to main content
Articles » Database » Database » General » Downloads
 
Add your own
alternative version

hOOt - full text search engine

, 22 Jun 2013
Smallest full text search engine (lucene replacement) built from scratch using inverted WAH bitmap index, highly compact storage, operating in database and document modes
hoot_v1.0-noexe.zip
hoot_v1.0.zip
Hoot
Properties
SampleApp
newifilter
Properties
Settings.settings
hoot_v1.1-noexe.zip
Hoot_v1.1.zip
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
_svn
all-wcprops
entries
text-base
ComHelper.cs.svn-base
FilterLoader.cs.svn-base
FilterReader.cs.svn-base
IFilter.cs.svn-base
Settings.settings
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
Resources.Designer.cs.svn-base
Resources.resx.svn-base
Settings.Designer.cs.svn-base
Settings.settings.svn-base
hoot_v1.2-noexe.zip
Hoot_v1.2.zip
Settings.settings
hoot_v1.3-noexe.zip
Hoot_v1.3.zip
Settings.settings
hoot_v1.4-noexe.zip
Hoot_v1.4.zip
Settings.settings
hoot_v1.5-noexe.zip
Hoot_v1.5.zip
Settings.settings
Hoot_v2.0.zip
fastJSON
MGIndex
Settings.settings
Hoot_v2.1.zip
Settings.settings
Hoot_v2.2.1.zip
Settings.settings
Hoot_v2.2.zip
Settings.settings
sampleapp.exe_v2.2-noexe.zip
sampleapp.exe_v2.2.1-noexe.zip
SampleApp.EXE_v2.2.1.zip
SampleApp.EXE_v2.2.zip
sampleapp_exe-noexe.zip
sampleapp_exe.zip
sampleapp_exe_v1.1-noexe.zip
SampleApp_EXE_v1.1.zip
sampleapp_exe_v1.2-noexe.zip
SampleApp_EXE_v1.2.zip
sampleapp_exe_v1.3-noexe.zip
SampleApp_EXE_v1.3.zip
sampleapp_exe_v1.4-noexe.zip
SampleApp_EXE_v1.4.zip
sampleapp_exe_v1.5-noexe.zip
SampleApp_EXE_v1.5.zip
sampleapp_exe_v2.0-noexe.zip
SampleApp_EXE_v2.0.zip
sampleapp_exe_v2.1-noexe.zip
SampleApp_EXE_v2.1.zip
using System;
using System.Diagnostics;
using System.Collections;
using System.Runtime.InteropServices;
using System.IO;
using System.Text;
using System.Collections.Generic;
using RaptorDB.Common;

namespace RaptorDB
{
    internal class StorageData
    {
        public byte[] Key;
        public byte[] Data;
        public bool isDeleted;
    }

    internal class StorageFile<T>
    {
        FileStream _datawrite;
        FileStream _recfilewrite;
        FileStream _recfileread = null;
        FileStream _dataread = null;

        private string _filename = "";
        private string _recfilename = "";
        private int _lastRecordNum = 0;
        private long _lastWriteOffset = 6;
        private object _lock = new object();
        private bool _dirty = false;
        IGetBytes<T> _T = null;

        public static byte[] _fileheader = { (byte)'M', (byte)'G', (byte)'D', (byte)'B',
                                              0, // 4 -- not used,
                                              0  // 5 -- not used
                                           };

        public static byte[] _rowheader = { (byte)'M', (byte)'G', (byte)'R' ,
                                           0,               // 3     [keylen]
                                           0,0,0,0,0,0,0,0, // 4-11  [datetime] 8 bytes = insert time
                                           0,0,0,0,         // 12-15 [data length] 4 bytes
                                           0,               // 16 -- [flags] = 1 : isDeletd:1
                                                            //                 2 : isCompressed:1
                                                            //                 
                                                            //                 
                                           0                // 17 -- [crc] = header crc check
                                       };
        private enum HDR_POS
        {
            KeyLen = 3,
            DateTime = 4,
            DataLength = 12,
            Flags = 16,
            CRC = 17
        }

        public bool SkipDateTime = false;

        public StorageFile(string filename)
        {
            Initialize(filename, false);
        }

        public StorageFile(string filename, bool SkipChecking)
        {
            Initialize(filename, SkipChecking);
        }

        private void Initialize(string filename, bool SkipChecking)
        {
            _T = RDBDataType<T>.ByteHandler();
            _filename = filename;
            if (File.Exists(filename) == false)
                _datawrite = new FileStream(filename, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.ReadWrite);
            else
                _datawrite = new FileStream(filename, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite);

            _dataread = new FileStream(_filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);

            if (SkipChecking == false)
            {
                // load rec pointers
                _recfilename = filename.Substring(0, filename.LastIndexOf('.')) + ".mgrec";
                if (File.Exists(_recfilename) == false)
                    _recfilewrite = new FileStream(_recfilename, FileMode.CreateNew, FileAccess.Write, FileShare.ReadWrite);
                else
                    _recfilewrite = new FileStream(_recfilename, FileMode.Open, FileAccess.Write, FileShare.ReadWrite);

                _recfileread = new FileStream(_recfilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);

                if (_datawrite.Length == 0)
                {
                    // new file
                    _datawrite.Write(_fileheader, 0, _fileheader.Length);
                    _datawrite.Flush();
                    _lastWriteOffset = _fileheader.Length;
                }
                else
                {
                    _lastWriteOffset = _datawrite.Seek(0L, SeekOrigin.End);
                }
                _lastRecordNum = (int)(_recfilewrite.Length / 8);
                _recfilewrite.Seek(0L, SeekOrigin.End);
            }
        }

        public int Count()
        {
            return (int)(_recfilewrite.Length >> 3);
        }

        public int WriteData(T key, byte[] data, bool deleted)
        {
            lock (_lock)
            {
                _dirty = true;
                byte[] k = _T.GetBytes(key);
                int kl = k.Length;

                // seek end of file
                long offset = _lastWriteOffset;
                byte[] hdr = CreateRowHeader(kl, (data == null ? 0 : data.Length));
                if (deleted)
                    hdr[(int)HDR_POS.Flags] = (byte)1;
                // write header info
                _datawrite.Write(hdr, 0, hdr.Length);
                // write key
                _datawrite.Write(k, 0, kl);
                if (data != null)
                {
                    // write data block
                    _datawrite.Write(data, 0, data.Length);
                    _lastWriteOffset += data.Length;
                }
                // update pointer
                _lastWriteOffset += hdr.Length;
                _lastWriteOffset += kl;
                // return starting offset -> recno
                int recno = _lastRecordNum++;
                _recfilewrite.Write(Helper.GetBytes(offset, false), 0, 8);
                if (Global.FlushStorageFileImmetiatley)
                {
                    _datawrite.Flush();
                    _recfilewrite.Flush();
                }
                return recno;
            }
        }

        public byte[] ReadData(int recnum)
        {
            bool isdel = false;
            return ReadData(recnum, out isdel);
        }

        public void Shutdown()
        {
            FlushClose(_dataread);
            FlushClose(_recfileread);
            FlushClose(_recfilewrite);
            FlushClose(_datawrite);

            _dataread = null;
            _recfileread = null;
            _recfilewrite = null;
            _datawrite = null;
        }

        public static StorageFile<int> ReadForward(string filename)
        {
            StorageFile<int> sf = new StorageFile<int>(filename, true);

            return sf;
        }

        #region [ private / internal  ]

        private byte[] CreateRowHeader(int keylen, int datalen)
        {
            byte[] rh = new byte[_rowheader.Length];
            Buffer.BlockCopy(_rowheader, 0, rh, 0, rh.Length);
            rh[3] = (byte)keylen;
            if (SkipDateTime == false)
                Buffer.BlockCopy(Helper.GetBytes(FastDateTime.Now.Ticks, false), 0, rh, 4, 4);
            Buffer.BlockCopy(Helper.GetBytes(datalen, false), 0, rh, 12, 4);

            return rh;
        }

        internal byte[] ReadData(int recnum, out bool isdeleted)
        {
            isdeleted = false;
            if (recnum >= _lastRecordNum)
                return null;

            lock (_lock)
            {
                long off = ComputeOffset(recnum);
                byte[] key;
                return internalReadData(off, out key, out isdeleted);
            }
        }

        /// <summary>
        /// internal use for guid keys only
        /// </summary>
        /// <param name="recnum"></param>
        /// <param name="?"></param>
        /// <param name="isdeleted"></param>
        /// <returns></returns>
        internal byte[] ReadData(int recnum, out Guid docid, out bool isdeleted)
        {
            isdeleted = false;
            docid = Guid.Empty;
            if (recnum >= _lastRecordNum)
                return null;
            lock (_lock)
            {
                long off = ComputeOffset(recnum);
                byte[] key;
                byte[] data= internalReadData(off, out key, out isdeleted);
                docid = new Guid(key);
                return data;
            }
        }
        private long ComputeOffset(int recnum)
        {
            if (_dirty)
            {
                _datawrite.Flush();
                _recfilewrite.Flush();
            }
            long off = recnum * 8L;
            byte[] b = new byte[8];

            _recfileread.Seek(off, SeekOrigin.Begin);
            _recfileread.Read(b, 0, 8);
            off = Helper.ToInt64(b, 0);
            if (off == 0)// kludge
                off = 6;
            return off;
        }

        private byte[] internalReadData(long offset, out byte[] key, out bool isdeleted)
        {
            // seek offset in file
            byte[] hdr = new byte[_rowheader.Length];
            _dataread.Seek(offset, System.IO.SeekOrigin.Begin);
            // read header
            _dataread.Read(hdr, 0, _rowheader.Length);
            // check header
            if (CheckHeader(hdr))
            {
                //isdeleted = false;
                key = null;
                byte[] data = null;
                isdeleted = isDeleted(hdr);
                //if (isDeleted(hdr) == false)
                //    isdeleted = false;
                int kl = hdr[(int)HDR_POS.KeyLen];
                if (kl > 0)
                {
                    key = new byte[kl];
                    _dataread.Read(key, 0, key.Length);
                }
                int dl = Helper.ToInt32(hdr, (int)HDR_POS.DataLength);
                if (dl > 0)
                {
                    data = new byte[dl];
                    // read data block
                    _dataread.Read(data, 0, dl);
                }
                return data;
            }
            else
                throw new Exception("data header error at offset : " + offset + " data file size = " + _dataread.Length);

        }

        private bool CheckHeader(byte[] hdr)
        {
            if (hdr[0] == (byte)'M' && hdr[1] == (byte)'G' && hdr[2] == (byte)'R' && hdr[(int)HDR_POS.CRC] == (byte)0)
                return true;
            return false;
        }

        private void FlushClose(FileStream st)
        {
            if (st != null)
            {
                st.Flush(true);
                st.Close();
            }
        }

        internal T GetKey(int recnum, out bool deleted)
        {
            lock (_lock)
            {
                deleted = false;
                long off = recnum * 8L;
                byte[] b = new byte[8];

                _recfileread.Seek(off, SeekOrigin.Begin);
                _recfileread.Read(b, 0, 8);
                off = Helper.ToInt64(b, 0);

                // seek offset in file
                byte[] hdr = new byte[_rowheader.Length];
                _dataread.Seek(off, System.IO.SeekOrigin.Begin);
                // read header
                _dataread.Read(hdr, 0, _rowheader.Length);

                if (CheckHeader(hdr))
                {
                    deleted = isDeleted(hdr);
                    byte kl = hdr[3];
                    byte[] kbyte = new byte[kl];

                    _dataread.Read(kbyte, 0, kl);
                    return _T.GetObject(kbyte, 0, kl);
                }

                return default(T);
            }
        }

        private bool isDeleted(byte[] hdr)
        {
            return (hdr[(int)HDR_POS.Flags] & (byte)1) > 0;
        }

        internal int CopyTo(StorageFile<int> storageFile, int start)
        {
            // copy data here
            lock (_lock)
            {
                long off = ComputeOffset(start);
                _dataread.Seek(off, SeekOrigin.Begin);
                Pump(_dataread, storageFile._datawrite);

                return _lastRecordNum;
            }
        }

        private static void Pump(Stream input, Stream output)
        {
            byte[] bytes = new byte[4096 * 2];
            int n;
            while ((n = input.Read(bytes, 0, bytes.Length)) != 0)
                output.Write(bytes, 0, n);
        }

        internal IEnumerable<StorageData> Enumerate()
        {
            lock (_lock)
            {
                long offset = 6;
                long size = _dataread.Length;
                while (offset < size)
                {
                    // skip header
                    _dataread.Seek(offset, SeekOrigin.Begin);
                    byte[] hdr = new byte[_rowheader.Length];
                    // read header
                    _dataread.Read(hdr, 0, _rowheader.Length);
                    offset += hdr.Length;
                    if (CheckHeader(hdr))
                    {
                        StorageData sd = new StorageData();
                        sd.isDeleted = isDeleted(hdr);
                        byte kl = hdr[3];
                        byte[] kbyte = new byte[kl];
                        offset += kl;
                        _dataread.Read(kbyte, 0, kl);
                        sd.Key = kbyte;
                        int dl = Helper.ToInt32(hdr, (int)HDR_POS.DataLength);
                        byte[] data = new byte[dl];
                        // read data block
                        _dataread.Read(data, 0, dl);
                        sd.Data = data;
                        offset += dl;
                        yield return sd;
                    }
                    else
                    {
                        throw new Exception("Data read failed");
                    }
                }
            }
        }
        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

About the Author

Mehdi Gholam
Architect
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.
 
* Mehdi is the 5th person to get 6 out of 7 Platinums on CodeProject (13th Jan'12)

| Advertise | Privacy | Mobile
Web02 | 2.8.140721.1 | Last Updated 22 Jun 2013
Article Copyright 2011 by Mehdi Gholam
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid