Click here to Skip to main content
Click here to Skip to main content
Articles » Database » Database » General » Downloads
 
Add your own
alternative version
Go to top

hOOt - full text search engine

, 22 Jun 2013
Smallest full text search engine (lucene replacement) built from scratch using inverted WAH bitmap index, highly compact storage, operating in database and document modes
hoot_v1.0-noexe.zip
hoot_v1.0.zip
fastJSON.dll
Hoot
Properties
SampleApp
newifilter
Properties
Settings.settings
hoot_v1.1-noexe.zip
Hoot_v1.1.zip
fastJSON.dll
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
_svn
all-wcprops
entries
text-base
ComHelper.cs.svn-base
FilterLoader.cs.svn-base
FilterReader.cs.svn-base
IFilter.cs.svn-base
Settings.settings
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
Resources.Designer.cs.svn-base
Resources.resx.svn-base
Settings.Designer.cs.svn-base
Settings.settings.svn-base
hoot_v1.2-noexe.zip
Hoot_v1.2.zip
fastJSON.dll
Settings.settings
hoot_v1.3-noexe.zip
Hoot_v1.3.zip
fastJSON.dll
Settings.settings
hoot_v1.4-noexe.zip
Hoot_v1.4.zip
fastJSON.dll
Settings.settings
hoot_v1.5-noexe.zip
Hoot_v1.5.zip
fastJSON.dll
Settings.settings
Hoot_v2.0.zip
fastJSON
MGIndex
Settings.settings
Hoot_v2.1.zip
Settings.settings
Hoot_v2.2.1.zip
.gitignore
Settings.settings
Hoot_v2.2.zip
.gitignore
Settings.settings
sampleapp.exe_v2.2-noexe.zip
sampleapp.exe_v2.2.1-noexe.zip
SampleApp.EXE_v2.2.1.zip
Hoot.dll
SampleApp.exe
SampleApp.EXE_v2.2.zip
Hoot.dll
SampleApp.exe
sampleapp_exe-noexe.zip
sampleapp_exe.zip
fastJSON.dll
Hoot.dll
SampleApp.exe
sampleapp_exe_v1.1-noexe.zip
SampleApp_EXE_v1.1.zip
fastJSON.dll
Hoot.dll
SampleApp.exe
sampleapp_exe_v1.2-noexe.zip
SampleApp_EXE_v1.2.zip
fastJSON.dll
Hoot.dll
SampleApp.exe
sampleapp_exe_v1.3-noexe.zip
SampleApp_EXE_v1.3.zip
fastJSON.dll
Hoot.dll
SampleApp.exe
sampleapp_exe_v1.4-noexe.zip
SampleApp_EXE_v1.4.zip
fastJSON.dll
Hoot.dll
SampleApp.exe
sampleapp_exe_v1.5-noexe.zip
SampleApp_EXE_v1.5.zip
fastJSON.dll
Hoot.dll
SampleApp.exe
sampleapp_exe_v2.0-noexe.zip
SampleApp_EXE_v2.0.zip
Hoot.dll
SampleApp.exe
sampleapp_exe_v2.1-noexe.zip
SampleApp_EXE_v2.1.zip
Hoot.dll
SampleApp.exe
using System;
using System.Diagnostics;
using System.Collections;
using System.Runtime.InteropServices;
using System.IO;
using System.Text;
using System.Collections.Generic;

namespace hOOt
{
    internal class StorageFile
    {
        System.IO.Stream _writefile;
        System.IO.Stream _recordfile;
        private int _maxKeyLen;
        string _filename = "";
        string _recfilename = "";
        int _lastRecordNum = 0;
        long _lastWriteOffset = 0;

        public static byte[] _fileheader = { (byte)'M', (byte)'G', (byte)'D', (byte)'B',
                                              0, // -- [flags] = [shutdownOK:1],
                                              0  // -- [maxkeylen] 
                                           };

        public static byte[] _rowheader = { (byte)'M', (byte)'G', (byte)'R' ,
                                           0,               // 4     [keylen]
                                           0,0,0,0,0,0,0,0, // 5-12  [datetime] 8 bytes = insert time
                                           0,0,0,0,         // 13-16 [data length] 4 bytes
                                           0,               // 17 -- [flags] = isCommited:1 
                                                            //                 isRollback:1
                                                            //                 isCompressed:1
                                                            //                 isDeleted:1
                                                            //                 isVersioned:1 
                                           0                // 18 -- [crc] = header crc check
                                       };
        private enum HDR_POS
        {
            KeyLen = 3,
            DateTime = 4,
            DataLength = 12,
            Flags = 16,
            CRC = 17
        }

        public bool SkipDateTime = false;

        public StorageFile(string filename, int maxkeylen)
        {
            _filename = filename;
            if (File.Exists(filename) == false)
                _writefile = new FileStream(filename, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.ReadWrite);
            else
                _writefile = new FileStream(filename, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite);

            // load rec pointers
            _recfilename = filename.Substring(0, filename.LastIndexOf('.')) + ".rec";
            if (File.Exists(_recfilename) == false)
                _recordfile = new FileStream(_recfilename, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.ReadWrite);
            else
                _recordfile = new FileStream(_recfilename, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite);

            _maxKeyLen = maxkeylen;
            if (_writefile.Length == 0)
            {
                // new file
                byte b = (byte)maxkeylen;
                _fileheader[5] = b;
                _writefile.Write(_fileheader, 0, _fileheader.Length);
                _writefile.Flush();
            }
            else
            {
                // TODO : check file header exists
                // TODO : check file flags ok
            }
            bw = new BinaryWriter(ms, Encoding.UTF8);

            _lastRecordNum = (int)(_recordfile.Length / 8);
            _recordfile.Seek(0L, SeekOrigin.End);
            _lastWriteOffset = _writefile.Seek(0L, SeekOrigin.End);
        }

        public IEnumerable<KeyValuePair<byte[], byte[]>> Traverse()
        {
            long offset = 0;
            offset = _fileheader.Length;

            while (offset < _writefile.Length)
            {
                long pointer = offset;
                byte[] key;
                offset = NextOffset(offset, out key);
                KeyValuePair<byte[], byte[]> kv = new KeyValuePair<byte[], byte[]>(key, internalReadData(pointer));

                yield return kv;
            }
        }

        private long NextOffset(long curroffset, out byte[] key)
        {
            using (Stream _read = new FileStream(_filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
            {
                long next = _read.Length;
                // seek offset in file
                byte[] hdr = new byte[_rowheader.Length];
                _read.Seek(curroffset, System.IO.SeekOrigin.Begin);
                // read header
                _read.Read(hdr, 0, _rowheader.Length);
                key = new byte[hdr[(int)HDR_POS.KeyLen]];
                _read.Read(key, 0, hdr[(int)HDR_POS.KeyLen]);
                // check header
                if (CheckHeader(hdr))
                {
                    next = curroffset + hdr.Length + Helper.ToInt32(hdr, (int)HDR_POS.DataLength) + hdr[(int)HDR_POS.KeyLen];
                }

                return next;
            }
        }

        public int WriteData(byte[] key, byte[] data)
        {
            byte[] k = key;
            int kl = k.Length;

            // seek end of file
            long offset = _lastWriteOffset;
            byte[] hdr = CreateRowHeader(kl, data.Length);
            // write header info
            _writefile.Write(hdr, 0, hdr.Length);
            // write key
            _writefile.Write(k, 0, kl);
            // write data block
            _writefile.Write(data, 0, data.Length);
            _writefile.Flush();
            // update pointer
            _lastWriteOffset += hdr.Length;
            _lastWriteOffset += kl;
            _lastWriteOffset += data.Length;
            // return starting offset -> recno
            int recno = _lastRecordNum++;
            _recordfile.Write(Helper.GetBytes(offset, false), 0, 8);
            _recordfile.Flush();

            return recno;
        }

        MemoryStream ms = new MemoryStream();
        BinaryWriter bw;
        private byte[] CreateRowHeader(int keylen, int datalen)
        {
            ms.Seek(0L, SeekOrigin.Begin);
            bw.Write(_rowheader, 0, 3);
            bw.Write((byte)keylen);
            if (SkipDateTime == false)
                bw.Write(FastDateTime.Now.Ticks);
            else
                bw.Write(0L);
            bw.Write(datalen);
            bw.Write((byte)0);
            bw.Write((byte)0);
            bw.Flush();
            return ms.ToArray();
        }

        public byte[] ReadData(int recnum)
        {
            long off = recnum * 8;
            using (Stream _read = new FileStream(_recfilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
            {
                byte[] b = new byte[8];

                _read.Seek(off, SeekOrigin.Begin);
                _read.Read(b, 0, 8);
                off = Helper.ToInt64(b, 0);
            }

            return internalReadData(off);
        }

        private byte[] internalReadData(long offset)
        {
            using (Stream _read = new FileStream(_filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
            {
                // seek offset in file
                byte[] hdr = new byte[_rowheader.Length];
                _read.Seek(offset, System.IO.SeekOrigin.Begin);
                // read header
                _read.Read(hdr, 0, _rowheader.Length);
                // check header
                if (CheckHeader(hdr))
                {
                    // skip key bytes
                    _read.Seek(hdr[(int)HDR_POS.KeyLen], System.IO.SeekOrigin.Current);
                    int dl = Helper.ToInt32(hdr, (int)HDR_POS.DataLength);
                    byte[] data = new byte[dl];
                    // read data block
                    _read.Read(data, 0, dl);
                    return data;
                }
                else
                    throw new Exception("data header error");
            }
        }

        private bool CheckHeader(byte[] hdr)
        {
            if (hdr[0] == (byte)'M' && hdr[1] == (byte)'G' && hdr[2] == (byte)'R' && hdr[(int)HDR_POS.CRC] == (byte)0)
                return true;
            return false;
        }

        public void Shutdown()
        {
            this._writefile.Flush();
            this._writefile.Close();
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Mehdi Gholam
Architect
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.
 
* Mehdi is the 5th person to get 6 out of 7 Platinums on CodeProject (13th Jan'12)

| Advertise | Privacy | Mobile
Web03 | 2.8.140926.1 | Last Updated 22 Jun 2013
Article Copyright 2011 by Mehdi Gholam
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid