- searcharoo_5.zip
- EPocalipse.IFilter
- Mono.GetOptions
- bin
- Release
- Mono.GetOptions.dll
- GetOptTest
- Mono.GetOptions.csproj
- Mono.GetOptions.Useful
- Mono.GetOptions
- Mono
- Properties
- Searcharoo.Indexer
- Searcharoo.sln
- Searcharoo
- AllDiagram.cd
- bin
- Release
- EPocalipse.IFilter.dll
- Searcharoo.dll
- Common
- Core_CatalogWordFile.cd
- DocumentDiagram.cd
- Engine
- Indexer
- IndexerDiagram.cd
- Properties
- Searcharoo.csproj
- SearchEngineDiagram.cd
- SpiderDiagram.cd
- WebApplication
|
using System;
using System.IO;
using System.Xml;
using ionic.utils.zip;
namespace Searcharoo.Common
{
/// <summary>
/// Load a Microsoft Excel 2007 Xml file format
/// </summary>
/// <remarks>
/// <see cref="DocxDocument" />
///
/// Xlsx...
/// http://www.gemboxsoftware.com/Excel2007/DemoApp.htm
/// </remarks>
public class XlsxDocument : DownloadDocument
{
private string _WordsOnly;
public XlsxDocument(Uri location)
: base(location)
{ }
public override void Parse()
{
// no parsing (for now). perhaps in future we can regex look for urls (www.xxx.com) and try to link to them...
}
public override string WordsOnly
{
get { return _WordsOnly; }
}
/// <remarks>
/// .NET System.IO.Compression and zip files
/// http://blogs.msdn.com/dotnetinterop/archive/2006/04/05/.NET-System.IO.Compression-and-zip-files.aspx
/// </remarks>
public override bool GetResponse(System.Net.HttpWebResponse webresponse)
{
string filename = System.IO.Path.Combine(
Preferences.DownloadedTempFilePath
, (System.IO.Path.GetFileName(this.Uri.LocalPath)));
this.Title = System.IO.Path.GetFileNameWithoutExtension(filename);
SaveDownloadedFile(webresponse, filename);
try
{ // Will be accessing this data in the xlsx file
// xl/workbook.xml sheet
// xl/worksheets/sheet{0}.xml v
try
{
using (ZipFile zip = ZipFile.Read(filename))
{
int slideCount = 0;
using (MemoryStream streamroot = new MemoryStream())
{ // open the presentation 'root' file to see how many slides there are
zip.Extract("xl/workbook.xml", streamroot);
streamroot.Seek(0, SeekOrigin.Begin);
XmlDocument xmldocroot = new XmlDocument();
xmldocroot.Load(streamroot);
XmlNodeList objXML = xmldocroot.GetElementsByTagName("sheet");
slideCount = objXML.Count;
}
XmlDocument xmlSheet;
string entryToExtractPattern = @"xl/worksheets/sheet{0}.xml";
for (int slideId = 1; slideId <= slideCount; slideId++)
{ // now open each slide file to extract text
using (MemoryStream stream = new MemoryStream())
{
string entryToExtract = String.Format(entryToExtractPattern, slideId);
zip.Extract(entryToExtract, stream);
stream.Seek(0, SeekOrigin.Begin);
xmlSheet = new XmlDocument();
xmlSheet.Load(stream);
}
string slideWords = "";
foreach (XmlElement x in xmlSheet.GetElementsByTagName("v"))
{
slideWords = slideWords + " " + x.InnerText;
}
_WordsOnly = _WordsOnly + " " + slideWords + Environment.NewLine + Environment.NewLine;
this.All = _WordsOnly;
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
System.IO.File.Delete(filename); // clean up
}
catch (Exception ex2)
{
// ProgressEvent(this, new ProgressEventArgs(2, "IFilter failed on " + this.Uri + " " + e.Message + ""));
}
if (this.All != string.Empty)
{
this.Description = base.GetDescriptionFromWordsOnly(WordsOnly);
return true;
}
else
{
return false;
}
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.