Click here to Skip to main content
13,347,462 members (50,721 online)
Click here to Skip to main content

Tagged as


102 bookmarked
Posted 7 Mar 2009

Anatomy of a relevant search engine (part 1)

, 4 Jun 2009
Information retrieval, semantic search relevance and ranking. About anatomy of a search engine. The simplest search engine source code.

// simplest search engine - parse several text file and build an inverted index
// Adrian Pirvu 2009

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;

namespace SimplestSearchEngine
	public partial class Form1 : Form

		public Form1()

		// inverted index - a dictionary of file lists
		// no file index, no ranking - we want it as simple is possible
		private Dictionary<string, List<string>> InvertedIndex = new Dictionary<string, List<string>>();

		// index files
		private void buttonIndex_Click(object sender, EventArgs e)

			if (openFileDialog.ShowDialog() != DialogResult.OK)

			// parse each file and build the inverted index. 
			foreach (string fileName in openFileDialog.FileNames)
				string text = File.ReadAllText(fileName).Replace("\r", " ").Replace("\n", " ");
				string[] terms = text.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

				// parse each term (word) in text file and put it in dictionary
				foreach (string term in terms)
					if (!InvertedIndex.ContainsKey(term))
						InvertedIndex.Add(term, new List<string>());

					if (!InvertedIndex[term].Contains(fileName))

			// update label
			labelIndex.Text = openFileDialog.FileNames.Length.ToString() + " files indexed";

		// perform seach over the inverted index built earlier
		private void buttonSearch_Click(object sender, EventArgs e)

			// split query in terms
			string query = textBoxQuery.Text;
			string[] terms = query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

			// see documents for each term and merge them
			List<string> commonDocuments = null;
			foreach (string term in terms)
				// if one term not in index, end search
				if (!InvertedIndex.ContainsKey(term))
					if (commonDocuments != null)

				// find documents containing all terms
				if (commonDocuments == null)
					commonDocuments = InvertedIndex[term];
					commonDocuments = GetCommonDocuments(commonDocuments, InvertedIndex[term]);

			// display results
			if (commonDocuments != null)
				foreach (string fileName in commonDocuments)

		// merge two arrays of file indexes
		private List<string> GetCommonDocuments(List<string> documentList, List<string> newDocumentsList)
			List<string> common = new List<string>();
			foreach (string file in documentList)
				if (newDocumentsList.Contains(file))

			return common;


By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.


This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


About the Author

Adrian Pirvu
Romania Romania
No Biography provided

You may also be interested in...

Permalink | Advertise | Privacy | Terms of Use | Mobile
Web04 | 2.8.180111.1 | Last Updated 4 Jun 2009
Article Copyright 2009 by Adrian Pirvu
Everything else Copyright © CodeProject, 1999-2018
Layout: fixed | fluid