Click here to Skip to main content
15,887,676 members
Articles / Programming Languages / C#

hOOt - full text search engine

Rate me:
Please Sign up or sign in to vote.
4.92/5 (156 votes)
24 Feb 2019CPOL17 min read 1.1M   22.5K   388  
Smallest full text search engine (lucene replacement) built from scratch using inverted MGRB bitmap index, highly compact storage, operating in database and document modes
using System;
using System.Collections.Generic;
using System.IO;

namespace RaptorDB
{
    #region [  TypeIndexes  ]
    internal class TypeIndexes<T> : MGIndex<T>, IIndex where T : IComparable<T>
    {
        public TypeIndexes(string path, string filename, byte keysize)
            : base(path, filename + ".mgidx", keysize, /*Global.PageItemCount,*/ true)
        {

        }

        public void Set(object key, int recnum)
        {
            if (key == null) return; // FEATURE : index null values ??

            base.Set((T)key, recnum);
        }

        public WAHBitArray Query(RDBExpression ex, object from, int maxsize)
        {
            T f = default(T);
            if (typeof(T).Equals(from.GetType()) == false)
                f = Converter(from);
            else
                f = (T)from;

            return base.Query(ex, f, maxsize);
        }

        private T Converter(object from)
        {
            if (typeof(T) == typeof(Guid))
            {
                object o = new Guid(from.ToString());
                return (T)o;
            }
            else
                return (T)Convert.ChangeType(from, typeof(T));
        }

        void IIndex.FreeMemory()
        {
            base.FreeMemory();
            base.SaveIndex();
        }

        void IIndex.Shutdown()
        {
            //base.SaveIndex();
            base.Shutdown();
        }

        object[] IIndex.GetKeys()
        {
            return base.GetKeys();
        }
        //public WAHBitArray Query(object fromkey, object tokey, int maxsize)
        //{
        //    T f = default(T);
        //    if (typeof(T).Equals(fromkey.GetType()) == false)
        //        f = (T)Convert.ChangeType(fromkey, typeof(T));
        //    else
        //        f = (T)fromkey;

        //    T t = default(T);
        //    if (typeof(T).Equals(tokey.GetType()) == false)
        //        t = (T)Convert.ChangeType(tokey, typeof(T));
        //    else
        //        t = (T)tokey;

        //    return base.Query(f, t, maxsize);
        //}
    }
    #endregion

    #region [  BoolIndex  ]
    internal class BoolIndex : IIndex
    {
        public BoolIndex(string path, string filename, string extension)
        {
            // create file
            _filename = filename + extension;
            //if (_filename.Contains(".") == false) _filename += ".deleted";
            _path = path;
            if (_path.EndsWith(Path.DirectorySeparatorChar.ToString()) == false)
                _path += Path.DirectorySeparatorChar.ToString();

            if (File.Exists(_path + _filename))
                ReadFile();
        }

        private WAHBitArray _bits = new WAHBitArray();
        private string _filename;
        private string _path;
        private object _lock = new object();
        //private bool _inMemory = false;

        public WAHBitArray GetBits()
        {
            return _bits.Copy();
        }

        public void Set(object key, int recnum)
        {
            lock (_lock)
                if (key != null)
                    _bits.Set(recnum, (bool)key);
        }

        public WAHBitArray Query(RDBExpression ex, object from, int maxsize)
        {
            lock (_lock)
            {
                bool b = (bool)from;
                if (b)
                    return _bits;
                else
                    return _bits.Not(maxsize);
            }
        }

        public void FreeMemory()
        {
            lock (_lock)
            {
                SaveIndex();
                _bits.FreeMemory();
                // free memory
                //_bits.FreeMemory();
                // save to disk
                //SaveIndex();
            }
        }

        public void Shutdown()
        {
            // shutdown
            //if (_inMemory == false)
            WriteFile();
        }

        public void SaveIndex()
        {
            //if (_inMemory == false)
            WriteFile();
        }

        public void InPlaceOR(WAHBitArray left)
        {
            lock (_lock)
                _bits = _bits.Or(left);
        }

        private void WriteFile()
        {
            lock (_lock)
            {
                WAHBitArray.TYPE t;
                uint[] ints = _bits.GetCompressed(out t);
                MemoryStream ms = new MemoryStream();
                BinaryWriter bw = new BinaryWriter(ms);
                bw.Write((byte)t);// write new format with the data type byte
                foreach (var i in ints)
                {
                    bw.Write(i);
                }
                bw.Flush();
                File.WriteAllBytes(_path + _filename, ms.ToArray());
            }
        }

        private void ReadFile()
        {
            byte[] b = File.ReadAllBytes(_path + _filename);
            MemoryStream ms = new MemoryStream(b);
            BinaryReader br = new BinaryReader(ms);
            WAHBitArray.TYPE t = WAHBitArray.TYPE.WAH;
            if (b.Length % 4 > 0) // new format with the data type byte
            {
                byte tb = br.ReadByte();
                t = (WAHBitArray.TYPE)Enum.ToObject(typeof(WAHBitArray.TYPE), tb);
            }
            List<uint> ints = new List<uint>();
            for (int i = 0; i < b.Length / 4; i++)
            {
                ints.Add((uint)br.ReadInt32());
            }
            _bits = new WAHBitArray(t, ints.ToArray());
        }

        public WAHBitArray Query(object fromkey, object tokey, int maxsize)
        {
            return Query(RDBExpression.Greater, fromkey, maxsize);
        }

        internal void FixSize(int size)
        {
            _bits.Length = size;
        }

        public object[] GetKeys()
        {
            return new object[] { true, false };
        }
    }
    #endregion

    #region [  FullTextIndex  ]
    internal class FullTextIndex : Hoot, IIndex
    {
        public FullTextIndex(string IndexPath, string FileName, bool docmode, bool sortable)
            : base(IndexPath, FileName, docmode)
        {
            if (sortable)
            {
                _idx = new TypeIndexes<string>(IndexPath, FileName, Global.DefaultStringKeySize);
                _sortable = true;
            }
        }
        private bool _sortable = false;
        private IIndex _idx;

        public void Set(object key, int recnum)
        {
            base.Index(recnum, (string)key);
            if (_sortable)
                _idx.Set(key, recnum);
        }

        public WAHBitArray Query(RDBExpression ex, object from, int maxsize)
        {
            return base.Query("" + from, maxsize);
        }

        public void SaveIndex()
        {
            base.Save();
            if (_sortable)
                _idx.SaveIndex();
        }

        public WAHBitArray Query(object fromkey, object tokey, int maxsize)
        {
            return base.Query("" + fromkey, maxsize);
        }

        public object[] GetKeys()
        {
            if (_sortable)
                return _idx.GetKeys(); // support get keys 
            else
                return new object[] { };
        }
        void IIndex.FreeMemory()
        {
            base.FreeMemory();

            this.SaveIndex();
        }

        void IIndex.Shutdown()
        {
            this.SaveIndex();
            base.Shutdown();
            if (_sortable)
                _idx.Shutdown();
        }

    }
    #endregion

    #region [  EnumIndex  ]
    internal class EnumIndex<T> : MGIndex<string>, IIndex //where T : IComparable<T>
    {
        public EnumIndex(string path, string filename)
            : base(path, filename + ".mgidx", 30, /*Global.PageItemCount,*/ true)
        {

        }

        public void Set(object key, int recnum)
        {
            if (key == null) return; // FEATURE : index null values ??

            base.Set(key.ToString(), recnum);
        }

        public WAHBitArray Query(RDBExpression ex, object from, int maxsize)
        {
            T f = default(T);
            if (typeof(T).Equals(from.GetType()) == false)
                f = Converter(from);
            else
                f = (T)from;

            return base.Query(ex, f.ToString(), maxsize);
        }

        private T Converter(object from)
        {
            if (typeof(T) == typeof(Guid))
            {
                object o = new Guid(from.ToString());
                return (T)o;
            }
            else
                return (T)Convert.ChangeType(from, typeof(T));
        }

        void IIndex.FreeMemory()
        {
            base.FreeMemory();
            base.SaveIndex();
        }

        void IIndex.Shutdown()
        {
            base.SaveIndex();
            base.Shutdown();
        }

        public WAHBitArray Query(object fromkey, object tokey, int maxsize)
        {
            T f = default(T);
            if (typeof(T).Equals(fromkey.GetType()) == false)
                f = (T)Convert.ChangeType(fromkey, typeof(T));
            else
                f = (T)fromkey;

            T t = default(T);
            if (typeof(T).Equals(tokey.GetType()) == false)
                t = (T)Convert.ChangeType(tokey, typeof(T));
            else
                t = (T)tokey;

            return base.Query(f.ToString(), t.ToString(), maxsize);
        }

        object[] IIndex.GetKeys()
        {
            return base.GetKeys();
        }
    }
    #endregion

    #region [  NoIndex  ]
    internal class NoIndex : IIndex
    {
        public void Set(object key, int recnum)
        {
            // ignore set
        }

        public WAHBitArray Query(RDBExpression ex, object from, int maxsize)
        {
            // always return everything
            return WAHBitArray.Fill(maxsize);
        }

        public void FreeMemory()
        {

        }

        public void Shutdown()
        {

        }

        public void SaveIndex()
        {

        }

        public object[] GetKeys()
        {
            return new object[] { };
        }
    }
    #endregion
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect -
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.

* Mehdi is the 5th person to get 6 out of 7 Platinum's on Code-Project (13th Jan'12)
* Mehdi is the 3rd person to get 7 out of 7 Platinum's on Code-Project (26th Aug'16)

Comments and Discussions