Click here to Skip to main content
15,894,095 members
Articles / Web Development / ASP.NET

ASP.NET C# Search Engine (Highlighting, JSON, jQuery & Silverlight)

Rate me:
Please Sign up or sign in to vote.
4.60/5 (38 votes)
8 Mar 2009CPOL10 min read 379.2K   13.2K   184  
More professional ASP.NET C# search with proper document summary, query highlighting and RIA display options
using System;
using System.IO;
using System.Xml;
using ionic.utils.zip;

namespace Searcharoo.Common
{
    /// <summary>
    /// Load a Microsoft PowerPoint 2007 Xml file format
    /// </summary>
    /// <remarks>
    /// <see cref="DocxDocument" />
    /// </remarks>
    public class PptxDocument : DownloadDocument
    {
        private string _WordsOnly;

        public PptxDocument(Uri location)
            : base(location)
        {
            Extension = "pptx";
        }
        
        public override void Parse()
        {
            // no parsing (for now). perhaps in future we can regex look for urls (www.xxx.com) and try to link to them...
        }

        public override string WordsOnly
        {
            get { return _WordsOnly; }
        }

        /// <remarks>
        /// .NET System.IO.Compression and zip files
        /// http://blogs.msdn.com/dotnetinterop/archive/2006/04/05/.NET-System.IO.Compression-and-zip-files.aspx
        /// </remarks>
        public override bool GetResponse(System.Net.HttpWebResponse webresponse)
        {
            string filename = System.IO.Path.Combine(
                          Preferences.DownloadedTempFilePath
                        , (System.IO.Path.GetFileName(this.Uri.LocalPath)) );
            this.Title = System.IO.Path.GetFileNameWithoutExtension(filename);

            SaveDownloadedFile(webresponse, filename);
            try
            {   // Will be accessing this data in the pptx file
                //  ppt/presentation.xml      p:presentation/sldIdLst
                //  ppt/slides/slide{0}.xml   a:t
                try
                {
                    using (ZipFile zip = ZipFile.Read(filename))
                    {
                        int slideCount = 0;
                        using (MemoryStream streamroot = new MemoryStream())
                        {   // open the presentation 'root' file to see how many slides there are
                            zip.Extract("ppt/presentation.xml", streamroot);
                            streamroot.Seek(0, SeekOrigin.Begin);
                            XmlDocument xmldocroot = new XmlDocument();
                            xmldocroot.Load(streamroot);
                            XmlNodeList objXML = xmldocroot.GetElementsByTagName("p:sldId");
                            slideCount = objXML.Count;
                        }
                        XmlDocument xmlSlide;
                        string entryToExtractPattern = @"ppt/slides/slide{0}.xml";
                        for (int slideId = 1; slideId <= slideCount; slideId++)
                        {   // now open each slide file to extract text
                            using (MemoryStream stream = new MemoryStream())
                            {
                                string entryToExtract = String.Format(entryToExtractPattern, slideId);
                                zip.Extract(entryToExtract, stream);
                                stream.Seek(0, SeekOrigin.Begin);
                                xmlSlide = new XmlDocument();
                                xmlSlide.Load(stream);
                            }
                            string slideWords = "";
                            foreach (XmlElement x in xmlSlide.GetElementsByTagName("a:t"))
                            {
                                slideWords = slideWords + " " + x.InnerText;
                            }
                            _WordsOnly = _WordsOnly + " " + slideWords + Environment.NewLine + Environment.NewLine;
                            this.All = _WordsOnly;
                        }
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
                System.IO.File.Delete(filename);    // clean up
            }
            catch (Exception)
            {
                //                ProgressEvent(this, new ProgressEventArgs(2, "IFilter failed on " + this.Uri + " " + e.Message + ""));
            }
            if (this.All != string.Empty)
            {
                this.Description = base.GetDescriptionFromWordsOnly(WordsOnly);
                return true;
            }
            else
            {
                return false;
            }
        }

       
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
Australia Australia
-- ooo ---
www.conceptdevelopment.net
conceptdev.blogspot.com
www.searcharoo.net
www.recipenow.net
www.racereplay.net
www.silverlightearth.com

Comments and Discussions