Click here to Skip to main content
15,891,248 members
Articles / Web Development / ASP.NET

Image and Location Search(aroo)

Rate me:
Please Sign up or sign in to vote.
4.96/5 (28 votes)
11 Jun 2008CPOL12 min read 203.3K   2.5K   76  
Index your website (including images and geographic data), search and display results in Google Earth.
using System;
using System.IO;
using System.Xml.Serialization;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Collections.Generic;
using System.Collections.Specialized;
using cd.net;
namespace Searcharoo.Common
{
    /// <summary>
    /// Catalog of Words (and Files)
    /// <summary>
    /// <remarks>
    /// XmlInclude
    /// http://pluralsight.com/blogs/craig/archive/2004/07/08/1580.aspx
    /// 
    /// added v6 : fallback loading of catalog from the WebAppCatalogResource assembly
    /// to get around issues where the Trust level (eg. in shared hosting) is restricted
    /// to NOT ALLOW File.IO or WebClient requests. This requires the catalog to be built
    /// using the Indexer.EXE then compiled into a DLL then uploaded to /bin/
    /// </remarks>
    [Serializable]
    [System.Xml.Serialization.XmlInclude(typeof(Searcharoo.Common.Word))]
    [System.Xml.Serialization.XmlInclude(typeof(Searcharoo.Common.Location))]
    [System.Xml.Serialization.XmlInclude(typeof(Searcharoo.Common.CatalogWordFile))]
    public class Catalog
    {
        /// <summary>
        /// Internal datastore of Words referencing Files
        /// </summary>
        /// <remarks>
        /// Hashtable
        /// key    = STRING representation of the word, 
        /// value  = Word OBJECT (with File collection, etc)
        /// </remarks>
        private System.Collections.Hashtable _Index;	//TODO: implement collection with faster searching

        /// <summary>
        /// Words in the Catalog
        /// </summary>
        /// <remarks>
        /// Added property to allow Serialization to disk
        /// NOTE: the XmlInclude attribute on the Catalog class, which is what
        /// enables this array of 'non-standard' objects to be serialized correctly
        /// </remarks>
        [XmlElement("o")]
        [XmlIgnore()]
        [Obsolete("Use WordFiles and Files properties")]
        public Word[] Words
        {
            get
            {
                Word[] wordArray = new Word[_Index.Count];
                _Index.Values.CopyTo(wordArray, 0);
                return wordArray;
            }
            set
            {
                Word[] wordArray = value;
                _Index = new Hashtable();   //HACK: index doesn't get populated with wordArray
            }
        }
        [XmlElement("w")]
        public CatalogWordFile[] WordFiles
        {
            get 
            {
                PrepareForSerialization();
                return _WordfileArray;
            }
            set 
            {
                _WordfileArray = value;
                PostDeserialization();
            }
        }
        [XmlElement("f")]
        public File[] Files
        {
            get
            {
                PrepareForSerialization();
                return _FileList.ToArray();
            }
            set
            {
                File[] fa = value;
                _FileList = new List<File>(fa);
                PostDeserialization();
            }
        }

        #region Private fields and methods to manage XmlSerialization
        /// <summary>
        /// List of File objects that were referenced in the Catalog
        /// </summary>
        private System.Collections.Generic.List<File> _FileList;
        /// <summary>
        /// Array of CatalogWordFile objects, with 'ids' for each File 
        /// rather than a reference to a File object
        /// </summary>
        private CatalogWordFile[] _WordfileArray;
        private bool _SerializePreparationDone = false;

        /// <summary>
        /// Property helper for Files &amp; WordFiles, ensures the data retrieved
        /// from those two properties is 'in sync'
        /// </summary>
        private void PrepareForSerialization()
        {
            if (_SerializePreparationDone) return;

            _FileList = new List<File>();
            _WordfileArray = new CatalogWordFile[_Index.Count];
            Word[] wordArray = new Word[_Index.Count];
            _Index.Values.CopyTo(wordArray, 0);
            
            // go through all the words
            for (int i = 0; i < wordArray.Length; i++)
            {
                // first, add all files to the 'flist' collection
                foreach (File f in wordArray[i].Files)
                {
                    if (!_FileList.Contains(f))
                    {
                        _FileList.Add(f);
                    }
                }
                // now go through again and use the indexes
                CatalogWordFile wf = new CatalogWordFile();
                wf.Text = wordArray[i].Text;
                foreach (File f in wordArray[i].Files)
                { 
                    wf.FileIds.Add(_FileList.IndexOf(f));
                }
                _WordfileArray[i] = wf;
            }
            _SerializePreparationDone = true;
        }
        /// <summary>
        /// Property helper for Files &amp; WordFiles, ensures when
        /// they are both 'set', the internal Catalog datastructure is
        /// setup correctly
        /// </summary>
        private void PostDeserialization()
        {
            if ((_WordfileArray != null) && (_FileList != null))
            { 
                foreach (CatalogWordFile wf in _WordfileArray)
                {
                    foreach (int i in wf.FileIds)
                    {
                        this.Add(wf.Text, _FileList[i],-1);
                    }
                }
            }
        }
        #endregion
        

        /// <summary>
        /// String array representing the list of words. 
        /// Used mainly for debugging - ie in the Save() method - so you can 
        /// see what the Spider found.
        /// </summary>
        /// <remarks>
        /// Because there is no 'set' accessor, this property is not XmlSerialized
        /// </remarks>
        [XmlAttribute("dict")]
        public string[] Dictionary
        {
            get
            {
                string[] wordArray = new string[_Index.Count];
                _Index.Keys.CopyTo(wordArray, 0);
                return wordArray;
            }
        }
        /// <summary>
        /// Number of Words in the Catalog
        /// </summary>
        /// <remarks>
        /// Because there is no 'set' accessor, this property is not XmlSerialized
        /// </remarks>
        public int Length
        {
            get { return _Index.Count; }
        }

        /// <summary>
        /// Constructor - creates the Hashtable for internal data storage.
        /// </summary>
        public Catalog()
        {
            _Index = new System.Collections.Hashtable();
        }
        /// <summary>
        /// Add a new Word/File pair to the Catalog
        /// </summary>
        public bool Add(string word, File infile, int position)
        {
            // ### Make sure the Word object is in the index ONCE only
            if (_Index.ContainsKey(word))
            {
                Word theword = (Word)_Index[word];	// add this file reference to the Word
                theword.Add(infile, position);
            }
            else
            {
                Word theword = new Word(word, infile, position);	// create a new Word object
                _Index.Add(word, theword);
            }
            _SerializePreparationDone = false;  // adding to the catalog invalidates 'serialization preparation'
            return true;
        }
        /// <summary>
        /// Returns all the Files which contain the searchWord
        /// </summary>
        /// <returns>
        /// Hashtable where:
        /// </returns>
        public Hashtable Search(string searchWord)
        {
            // apply the same 'trim' as when we're building the catalog
            //searchWord = searchWord.Trim(' ','?','\"',',','\'',';',':','.','(', ')','[',']','%','*','$','-').ToLower();
            Hashtable retval = null;
            if (_Index.ContainsKey(searchWord))
            {	// does all the work !!!
                Word thematch = (Word)_Index[searchWord];
                retval = thematch.InFiles(); // return the collection of File objects
            }
            return retval;
        }

        /// <summary>
        /// Debug string
        /// </summary>
        public override string ToString()
        {
            string wordlist = "";
            //foreach (object w in index.Keys) temp += ((Word)w).ToString();	// output ALL words, will take a long time
            return "CATALOG :: " + _Index.Values.Count.ToString() + " words.\n" + wordlist;
        }

        /// <summary>
        /// Save the catalog to disk by BINARY serializing the object graph as a *.DAT file.
        /// </summary>
        /// <remarks>
        /// For 'reference', the method also saves XmlSerialized copies of the Catalog (which
        /// can get quite large) and just the list of Words that were found. In production, you
        /// would probably comment out/remove the Debugging code.
        /// 
        /// You may also wish to use a difficult-to-guess filename for the serialized data, 
        /// or else change the .DAT file extension to something that will be not be served by
        /// IIS (so that other people can't download your catalog).
        /// </remarks>
        public void Save()
        {
            // XML
            if (Preferences.InMediumTrust)
            {
                // TODO: Maybe use to save as ZIP - save space on disk? http://www.123aspx.com/redir.aspx?res=31602
                string xmlFileName = Path.GetDirectoryName(Preferences.CatalogFileName) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(Preferences.CatalogFileName) + ".xml";
                Kelvin<Catalog>.ToXmlFile(this, xmlFileName);

                //XmlSerializer serializerXmlCatalog = new XmlSerializer(typeof(Catalog));
                //System.IO.TextWriter writer = new System.IO.StreamWriter(xmlFileName);
                //serializerXmlCatalog.Serialize(writer, this);
                //writer.Close();
                return;
            }

            // BINARY http://www.dotnetspider.com/technology/kbpages/454.aspx
            System.IO.Stream stream = new System.IO.FileStream(Preferences.CatalogFileName, System.IO.FileMode.Create);
            System.Runtime.Serialization.IFormatter formatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            formatter.Serialize(stream, this);
            stream.Close();
            
            #region Debugging Serialization - these are only really useful for looking at; they're not re-loaded
            if (Preferences.DebugSerializeXml)
            {
                //Kelvin<Catalog>.ToBinaryFile(this, Path.GetFileNameWithoutExtension(Preferences.CatalogFileName) + "_Kelvin" + Path.GetExtension(Preferences.CatalogFileName));
                //Kelvin<Catalog>.ToXmlFile(this, Path.GetFileNameWithoutExtension(Preferences.CatalogFileName) + "_Kelvin.xml");
                Kelvin<string[]>.ToXmlFile(this.Dictionary, Path.GetFileNameWithoutExtension(Preferences.CatalogFileName) + "_debugwords.xml");

                // XML http://www.devhood.com/Tutorials/tutorial_details.aspx?tutorial_id=236
                //XmlSerializer serializerXmlWords = new XmlSerializer(typeof(string[]));
                //System.IO.TextWriter writerW = new System.IO.StreamWriter(Path.GetFileNameWithoutExtension(Preferences.CatalogFileName) + "_words.xml");
                //serializerXmlWords.Serialize(writerW, this.Dictionary);
                //writerW.Close();
            }
            #endregion
        }

        /// <summary>
        /// Use Kelvin too
        /// </summary>
        /// <returns>the catalog deserialized from disk, or NULL</returns>
        public static Catalog Load()
        {
            if (Preferences.InMediumTrust)
            {
                try
                {
                    throw new Exception();
                    // added v5
                    string xmlFileName = Path.GetDirectoryName(Preferences.CatalogFileName) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(Preferences.CatalogFileName) + ".xml";
                    if (System.IO.File.Exists(xmlFileName))
                    {
                        Catalog c1 = Kelvin<Catalog>.FromXmlFile(xmlFileName);
                        return c1;
                    }
                }
                catch (Exception e)
                {   // added v6 : if cannot load from .DAT or .XML, try to load from compiled resource
                    try
                    {   // http://www.devhood.com/tutorials/tutorial_details.aspx?tutorial_id=75
                        System.Reflection.Assembly a = System.Reflection.Assembly.Load("WebAppCatalogResource");
                        string[] resNames = a.GetManifestResourceNames();

                        Catalog c2 = Kelvin<Catalog>.FromResource(a, resNames[0]);
                        return c2;
                    }
                    catch (Exception e1)
                    {
                        throw new Exception("Searcharoo Catalog.Load() ", e1);
                    }
                }
            }
            else
            {   // hopefully in Full trust
                // using Binary serialization requires the Binder because of the embedded 'full name'
                // of the serializing assembly - all the above methods using Xml do not have this requirement
                if (System.IO.File.Exists(Preferences.CatalogFileName))
                {
                    object deserializedCatalogObject;
                    using (System.IO.Stream stream = new System.IO.FileStream(Preferences.CatalogFileName, System.IO.FileMode.Open))
                    {
                        System.Runtime.Serialization.IFormatter formatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
                        //object m = formatter.Deserialize (stream); // This doesn't work, SerializationException "Cannot find the assembly <random name>"
                        formatter.Binder = new CatalogBinder();	// This custom Binder is REQUIRED to find the classes in our current 'Temporary ASP.NET Files' assembly
                        deserializedCatalogObject = formatter.Deserialize(stream);
                    } //stream.Close();
                    Catalog catalog = deserializedCatalogObject as Catalog;
                    return catalog;
                }
                else
                {
                    return null;
                }
            }
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
Australia Australia
-- ooo ---
www.conceptdevelopment.net
conceptdev.blogspot.com
www.searcharoo.net
www.recipenow.net
www.racereplay.net
www.silverlightearth.com

Comments and Discussions