Click here to Skip to main content
15,883,816 members
Articles / Programming Languages / C#

hOOt - full text search engine

Rate me:
Please Sign up or sign in to vote.
4.92/5 (156 votes)
24 Feb 2019CPOL17 min read 1.1M   22.5K   388  
Smallest full text search engine (lucene replacement) built from scratch using inverted MGRB bitmap index, highly compact storage, operating in database and document modes
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
using hOOt;
using System.Threading;
using System.Diagnostics;


namespace SampleApp
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        Hoot h;
        DateTime _indextime;

        private void button2_Click(object sender, EventArgs e)
        {
            MessageBox.Show("" + h.WordCount());
        }

        private void button3_Click(object sender, EventArgs e)
        {
            h.FreeMemory(false);
            GC.Collect(2);
        }

        private void button4_Click(object sender, EventArgs e)
        {
            h.Save();
        }

        private void btnSearch_Click(object sender, EventArgs e)
        {
            if (h == null)
            {
                MessageBox.Show("hOOt not loaded");
                return;
            }

            listBox1.Items.Clear();
            DateTime dt = DateTime.Now;
            listBox1.BeginUpdate();
            foreach (var d in h.FindDocumentFileNames(txtSearch.Text))
            {
                listBox1.Items.Add(d);
            }
            listBox1.EndUpdate();
            lblStatus.Text = "Search = " + listBox1.Items.Count + " items, " + DateTime.Now.Subtract(dt).TotalSeconds + " s";
        }

        private void button7_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog fbd = new FolderBrowserDialog();
            fbd.SelectedPath = Directory.GetCurrentDirectory();
            if (fbd.ShowDialog() == DialogResult.OK)
            {
                txtWhere.Text = fbd.SelectedPath;
            }
        }

        private void btnStart_Click(object sender, EventArgs e)
        {
            if (txtIndexFolder.Text == "" || txtWhere.Text == "")
            {
                MessageBox.Show("Please supply the index storage folder and the where to start indexing from.");
                return;
            }

            btnStart.Enabled = false;
            btnStop.Enabled = true;
            if (h == null)
                h = new Hoot(Path.GetFullPath(txtIndexFolder.Text), "index");

            string[] files = Directory.GetFiles(txtWhere.Text, "*", SearchOption.AllDirectories);
            _indextime = DateTime.Now;
            backgroundWorker1.RunWorkerAsync(files);
        }

        private void button6_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog fbd = new FolderBrowserDialog();
            fbd.SelectedPath = Directory.GetCurrentDirectory();
            if (fbd.ShowDialog() == DialogResult.OK)
            {
                txtIndexFolder.Text = fbd.SelectedPath;
            }
        }

        private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
        {
            string[] files = e.Argument as string[];
            BackgroundWorker wrk = sender as BackgroundWorker;
            int i = 0;
            foreach (string fn in files)
            {
                if (wrk.CancellationPending)
                {
                    e.Cancel = true;
                    break;
                }
                backgroundWorker1.ReportProgress(1, fn);
                try
                {
                    TextReader tf = new EPocalipse.IFilter.FilterReader(fn);
                    string s = "";
                    if (tf != null)
                        s = tf.ReadToEnd();

                    h.Index(new Document(fn, s), true);
                }
                catch { }
                i++;
                if (i > 1000)
                {
                    i = 0;
                    h.Save();
                }
            }
            h.Save();
            h.OptimizeIndex();
        }

        private void backgroundWorker1_ProgressChanged(object sender, ProgressChangedEventArgs e)
        {
            lblIndexer.Text = "" + e.UserState;
        }

        private void backgroundWorker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            btnStart.Enabled = true;
            btnStop.Enabled = false;
            lblIndexer.Text = "" + DateTime.Now.Subtract(_indextime).TotalSeconds + " sec";
            MessageBox.Show("Indexing done : " + DateTime.Now.Subtract(_indextime).TotalSeconds + " sec");
        }

        private void btnStop_Click(object sender, EventArgs e)
        {
            backgroundWorker1.CancelAsync();
        }

        private void txtSearch_KeyDown(object sender, KeyEventArgs e)
        {
            if (e.KeyCode == Keys.Enter)
                btnSearch_Click(null, null);
        }

        private void listBox1_DoubleClick(object sender, EventArgs e)
        {
            Process.Start("" + listBox1.SelectedItem);
        }

        private void button1_Click_1(object sender, EventArgs e)
        {
            if (txtIndexFolder.Text == "")
            {
                MessageBox.Show("Please supply the index storage folder.");
                return;
            }

            h = new Hoot(Path.GetFullPath(txtIndexFolder.Text), "index");
            button1.Enabled = false;
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect -
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.

* Mehdi is the 5th person to get 6 out of 7 Platinum's on Code-Project (13th Jan'12)
* Mehdi is the 3rd person to get 7 out of 7 Platinum's on Code-Project (26th Aug'16)

Comments and Discussions