Click here to Skip to main content
Click here to Skip to main content
Articles » Database » Database » General » Downloads
 
Add your own
alternative version

hOOt - full text search engine

, 22 Jun 2013
Smallest full text search engine (lucene replacement) built from scratch using inverted WAH bitmap index, highly compact storage, operating in database and document modes
hoot_v1.0-noexe.zip
hoot_v1.0.zip
Hoot
Properties
SampleApp
newifilter
Properties
Settings.settings
hoot_v1.1-noexe.zip
hoot_v1.1.zip
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
_svn
all-wcprops
entries
text-base
ComHelper.cs.svn-base
FilterLoader.cs.svn-base
FilterReader.cs.svn-base
IFilter.cs.svn-base
Settings.settings
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
Resources.Designer.cs.svn-base
Resources.resx.svn-base
Settings.Designer.cs.svn-base
Settings.settings.svn-base
hoot_v1.2-noexe.zip
hoot_v1.2.zip
Settings.settings
hoot_v1.3-noexe.zip
hoot_v1.3.zip
Settings.settings
hoot_v1.4-noexe.zip
hoot_v1.4.zip
Settings.settings
hoot_v1.5-noexe.zip
hoot_v1.5.zip
Settings.settings
hoot_v2.0.zip
fastJSON
MGIndex
Settings.settings
hoot_v2.1.zip
Settings.settings
hoot_v2.2.1.zip
Settings.settings
hoot_v2.2.zip
Settings.settings
sampleapp.exe_v2.2-noexe.zip
sampleapp.exe_v2.2.1-noexe.zip
sampleapp.exe_v2.2.1.zip
sampleapp.exe_v2.2.zip
sampleapp_exe-noexe.zip
sampleapp_exe.zip
sampleapp_exe_v1.1-noexe.zip
sampleapp_exe_v1.1.zip
sampleapp_exe_v1.2-noexe.zip
sampleapp_exe_v1.2.zip
sampleapp_exe_v1.3-noexe.zip
sampleapp_exe_v1.3.zip
sampleapp_exe_v1.4-noexe.zip
sampleapp_exe_v1.4.zip
sampleapp_exe_v1.5-noexe.zip
sampleapp_exe_v1.5.zip
sampleapp_exe_v2.0-noexe.zip
sampleapp_exe_v2.0.zip
sampleapp_exe_v2.1-noexe.zip
sampleapp_exe_v2.1.zip
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.Win32;
using System.IO;
using System.Runtime.InteropServices.ComTypes;
using System.Runtime.InteropServices;

namespace EPocalipse.IFilter
{
  /// <summary>
  /// FilterLoader finds the dll and ClassID of the COM object responsible  
  /// for filtering a specific file extension. 
  /// It then loads that dll, creates the appropriate COM object and returns 
  /// a pointer to an IFilter instance
  /// </summary>
  static class FilterLoader
  {
    #region CacheEntry
    private class CacheEntry
    {
      public string DllName;
      public string ClassName;

      public CacheEntry(string dllName, string className)
      {
        DllName=dllName;
        ClassName=className;
      }
    }
    #endregion

    static Dictionary<string, CacheEntry> _cache=new Dictionary<string, CacheEntry>();

    #region Registry Read String helper
    static string ReadStrFromHKLM(string key)
    {
      return ReadStrFromHKLM(key,null);
    }
    static string ReadStrFromHKLM(string key, string value)
    {
      RegistryKey rk=Registry.LocalMachine.OpenSubKey(key);
      if (rk==null)
        return null;

      using (rk)
      {
        return (string)rk.GetValue(value);
      }
    }
    #endregion

    /// <summary>
    /// finds an IFilter implementation for a file type
    /// </summary>
    /// <param name="ext">The extension of the file</param>
    /// <returns>an IFilter instance used to retreive text from that file type</returns>
    private static IFilter LoadIFilter(string ext)
    {
      string dllName, filterPersistClass;

      //Find the dll and ClassID
      if (GetFilterDllAndClass(ext, out dllName, out filterPersistClass))
      {
        //load the dll and return an IFilter instance.
        return LoadFilterFromDll(dllName, filterPersistClass);
      }
      return null;
    }

    internal static IFilter LoadAndInitIFilter(string fileName)
    {
      return LoadAndInitIFilter(fileName,Path.GetExtension(fileName));
    }

    internal static IFilter LoadAndInitIFilter(string fileName, string extension)
    {
      IFilter filter=LoadIFilter(extension);
      if (filter==null)
        return null;

      IPersistFile persistFile=(filter as IPersistFile);
      if (persistFile!=null)
      {
        persistFile.Load(fileName, 0);
        IFILTER_FLAGS flags;
        IFILTER_INIT iflags =
					IFILTER_INIT.CANON_HYPHENS |
					IFILTER_INIT.CANON_PARAGRAPHS |
					IFILTER_INIT.CANON_SPACES |
					IFILTER_INIT.APPLY_INDEX_ATTRIBUTES |
					IFILTER_INIT.HARD_LINE_BREAKS |
					IFILTER_INIT.FILTER_OWNED_VALUE_OK;

        if (filter.Init(iflags, 0, IntPtr.Zero, out flags)==IFilterReturnCode.S_OK)
          return filter;
      }
      //If we failed to retreive an IPersistFile interface or to initialize 
      //the filter, we release it and return null.
      Marshal.ReleaseComObject(filter);
      return null;
    }

    private static IFilter LoadFilterFromDll(string dllName, string filterPersistClass)
    {
      //Get a classFactory for our classID
      IClassFactory classFactory=ComHelper.GetClassFactory(dllName, filterPersistClass);
      if (classFactory==null)
        return null;

      //And create an IFilter instance using that class factory
      Guid IFilterGUID=new Guid("89BCB740-6119-101A-BCB7-00DD010655AF");
      Object obj;
      classFactory.CreateInstance(null, ref IFilterGUID, out obj);
      return (obj as IFilter);
    }

    private static bool GetFilterDllAndClass(string ext, out string dllName, out string filterPersistClass)
    {
      if (!GetFilterDllAndClassFromCache(ext, out dllName, out filterPersistClass))
      {
        string persistentHandlerClass;

        persistentHandlerClass=GetPersistentHandlerClass(ext,true);
        if (persistentHandlerClass!=null)
        {
          GetFilterDllAndClassFromPersistentHandler(persistentHandlerClass,
            out dllName, out filterPersistClass);
        }
        AddExtensionToCache(ext, dllName, filterPersistClass);
      }
      return (dllName!=null && filterPersistClass!=null); 
    }

    private static void AddExtensionToCache(string ext, string dllName, string filterPersistClass)
    {
      lock (_cache)
      {
        _cache.Add(ext.ToLower(), new CacheEntry(dllName, filterPersistClass));
      }
    }

    private static bool GetFilterDllAndClassFromPersistentHandler(string persistentHandlerClass, out string dllName, out string filterPersistClass)
    {
      dllName=null;
      filterPersistClass=null;

      //Read the CLASS ID of the IFilter persistent handler
      filterPersistClass=ReadStrFromHKLM(@"Software\Classes\CLSID\" + persistentHandlerClass + 
        @"\PersistentAddinsRegistered\{89BCB740-6119-101A-BCB7-00DD010655AF}");
      if (String.IsNullOrEmpty(filterPersistClass))
          return false;

      //Read the dll name 
      dllName=ReadStrFromHKLM(@"Software\Classes\CLSID\" + filterPersistClass + 
        @"\InprocServer32");
      return (!String.IsNullOrEmpty(dllName));
    }

    private static string GetPersistentHandlerClass(string ext, bool searchContentType)
    {
      //Try getting the info from the file extension
      string persistentHandlerClass=GetPersistentHandlerClassFromExtension(ext);
      if (String.IsNullOrEmpty(persistentHandlerClass))
        //try getting the info from the document type 
        persistentHandlerClass=GetPersistentHandlerClassFromDocumentType(ext);
      if (searchContentType && String.IsNullOrEmpty(persistentHandlerClass))
        //Try getting the info from the Content Type
        persistentHandlerClass=GetPersistentHandlerClassFromContentType(ext);
      return persistentHandlerClass;
    }

    private static string GetPersistentHandlerClassFromContentType(string ext)
    {
      string contentType=ReadStrFromHKLM(@"Software\Classes\"+ext,"Content Type");
      if (String.IsNullOrEmpty(contentType))
        return null;
      
      string contentTypeExtension=ReadStrFromHKLM(@"Software\Classes\MIME\Database\Content Type\"+contentType,
          "Extension");
      if (ext.Equals(contentTypeExtension, StringComparison.CurrentCultureIgnoreCase))
        return null; //No need to look further. This extension does not have any persistent handler
    
      //We know the extension that is assciated with that content type. Simply try again with the new extension
      return GetPersistentHandlerClass(contentTypeExtension, false); //Don't search content type this time.
    }

    private static string GetPersistentHandlerClassFromDocumentType(string ext)
    {
      //Get the DocumentType of this file extension
      string docType=ReadStrFromHKLM(@"Software\Classes\"+ext);
      if (String.IsNullOrEmpty(docType))
        return null;
      
      //Get the Class ID for this document type
      string docClass=ReadStrFromHKLM(@"Software\Classes\" + docType + @"\CLSID");
      if (String.IsNullOrEmpty(docType))
        return null;

      //Now get the PersistentHandler for that Class ID
      return ReadStrFromHKLM(@"Software\Classes\CLSID\" + docClass + @"\PersistentHandler");
    }

    private static string GetPersistentHandlerClassFromExtension(string ext)
    {
      return ReadStrFromHKLM(@"Software\Classes\"+ext+@"\PersistentHandler");
    }

    private static bool GetFilterDllAndClassFromCache(string ext, out string dllName, out string filterPersistClass)
    {
      string lowerExt=ext.ToLower();
      lock (_cache)
      {
        CacheEntry cacheEntry;
        if (_cache.TryGetValue(lowerExt, out cacheEntry))
        {
          dllName=cacheEntry.DllName;
          filterPersistClass=cacheEntry.ClassName;
          return true;
        }
      }
      dllName=null;
      filterPersistClass=null;
      return false;
    }
  }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Mehdi Gholam
Architect
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.
 
* Mehdi is the 5th person to get 6 out of 7 Platinums on CodeProject (13th Jan'12)

| Advertise | Privacy | Mobile
Web01 | 2.8.140827.1 | Last Updated 22 Jun 2013
Article Copyright 2011 by Mehdi Gholam
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid