|
// simplest search engine - parse several text file and build an inverted index
// Adrian Pirvu 2009
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
namespace SimplestSearchEngine
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
// inverted index - a dictionary of file lists
// no file index, no ranking - we want it as simple is possible
private Dictionary<string, List<string>> InvertedIndex = new Dictionary<string, List<string>>();
// index files
private void buttonIndex_Click(object sender, EventArgs e)
{
if (openFileDialog.ShowDialog() != DialogResult.OK)
return;
// parse each file and build the inverted index.
foreach (string fileName in openFileDialog.FileNames)
{
string text = File.ReadAllText(fileName).Replace("\r", " ").Replace("\n", " ");
string[] terms = text.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
// parse each term (word) in text file and put it in dictionary
foreach (string term in terms)
{
if (!InvertedIndex.ContainsKey(term))
InvertedIndex.Add(term, new List<string>());
if (!InvertedIndex[term].Contains(fileName))
InvertedIndex[term].Add(fileName);
}
}
// update label
labelIndex.Text = openFileDialog.FileNames.Length.ToString() + " files indexed";
}
// perform seach over the inverted index built earlier
private void buttonSearch_Click(object sender, EventArgs e)
{
// split query in terms
string query = textBoxQuery.Text;
string[] terms = query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
// see documents for each term and merge them
List<string> commonDocuments = null;
foreach (string term in terms)
{
// if one term not in index, end search
if (!InvertedIndex.ContainsKey(term))
{
if (commonDocuments != null)
commonDocuments.Clear();
break;
}
// find documents containing all terms
if (commonDocuments == null)
commonDocuments = InvertedIndex[term];
else
commonDocuments = GetCommonDocuments(commonDocuments, InvertedIndex[term]);
}
// display results
if (commonDocuments != null)
{
listBoxResults.Items.Clear();
foreach (string fileName in commonDocuments)
listBoxResults.Items.Add(fileName);
}
}
// merge two arrays of file indexes
private List<string> GetCommonDocuments(List<string> documentList, List<string> newDocumentsList)
{
List<string> common = new List<string>();
foreach (string file in documentList)
if (newDocumentsList.Contains(file))
common.Add(file);
return common;
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.