using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using CBR.Core.Models;
using System.IO;
using SevenZip;
namespace CBR.Core.Helpers
{
internal class ePUBParser
{
public ePUB ParseExtracted(string filePath, string extractFolder)
{
try
{
ePUB docPUB = new ePUB(filePath);
docPUB.ContentFolder = extractFolder;
//read META-INF container.xml
ParseContainer(docPUB, extractFolder);
// read content.OPF file
ParsePackage(docPUB, extractFolder);
// parse the table of content
ParseTOC(docPUB, extractFolder);
return docPUB;
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParseFolder", err);
return null;
}
}
public ePUB ParseFile(string filePath)
{
SevenZipExtractor temp = null;
try
{
ePUB docPUB = new ePUB(filePath);
temp = ZipHelper.Instance.GetExtractor(filePath);
// find container.xml and parse it
ArchiveFileInfo fil = temp.ArchiveFileData.Where(p => !p.IsDirectory && p.FileName == ePUB.ContainerFile).First();
using (MemoryStream stream = new MemoryStream())
{
temp.ExtractFile(fil.FileName, stream);
ParseContainer(docPUB, stream);
}
// find OPF package file and parse it
fil = temp.ArchiveFileData.Where(p => !p.IsDirectory && p.FileName == docPUB.OpenPackage.RelativeFilePath).First();
using (MemoryStream stream = new MemoryStream())
{
temp.ExtractFile(fil.FileName, stream);
ParsePackage(docPUB, stream);
}
// find the toc file and parse it
fil = temp.ArchiveFileData.Where(p => !p.IsDirectory && p.FileName == docPUB.GetTOCFile()).First();
using (MemoryStream stream = new MemoryStream())
{
temp.ExtractFile(fil.FileName, stream);
ParseTOC(docPUB, stream);
}
return docPUB;
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParseFolder", err);
return null;
}
}
private void ParseContainer(ePUB docPUB, Stream content)
{
try
{
XmlDocument doc = GetDocumentWithNoValidation(content);
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
ResolveNamespaces(nsmgr, doc.DocumentElement);
XmlNode rootfileNode = doc.SelectSingleNode("//ROOT:rootfiles/ROOT:rootfile", nsmgr);
docPUB.OpenPackage = new Package(TryGetAttributeString(rootfileNode, "full-path"));
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParseContainer", err);
}
}
private void ParseContainer(ePUB docPUB, string extractFolder)
{
try
{
using (FileStream fs = File.Open(Path.Combine(extractFolder, ePUB.ContainerFile), FileMode.Open, FileAccess.Read))
{
ParseContainer(docPUB, fs);
}
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParseContainer", err);
}
}
private void ParsePackage(ePUB docPUB, Stream content)
{
try
{
//reading the OPF
XmlDocument doc = GetDocumentWithNoValidation(content);
XmlNode root = doc.DocumentElement; //<package>
// resolve <package>
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
ResolveNamespaces(nsmgr, root);
// resolve <metadata>
ResolveNamespaces(nsmgr, root.SelectSingleNode("//ROOT:metadata", nsmgr));
docPUB.OpenPackage.Title = root.SelectSingleNode("//ROOT:metadata/dc:title", nsmgr).InnerText;
docPUB.OpenPackage.Id = root.SelectSingleNode("//ROOT:metadata/dc:identifier", nsmgr).InnerText;
//try get the cover id
XmlNode xmlNod = root.SelectSingleNode("//ROOT:metadata/ROOT:meta[@name='cover']", nsmgr);
docPUB.OpenPackage.CoverId = TryGetAttributeString(xmlNod, "content");
//read the meta list
XmlNodeList listNode = root.SelectNodes("//ROOT:metadata/ROOT:meta", nsmgr);
foreach (XmlNode node in listNode)
{
}
//read manifest items
listNode = root.SelectNodes("//ROOT:manifest/ROOT:item", nsmgr);
docPUB.OpenPackage.ManifestItems = new List<ManifestItem>();
foreach (XmlNode node in listNode)
{
docPUB.OpenPackage.ManifestItems.Add(new ManifestItem()
{
id = TryGetAttributeString(node, "id"),
href = TryGetAttributeString(node, "href").Replace("/", "\\"),
mediatype = TryGetAttributeString(node, "media-type")
});
}
//if no cover search for an image
if (string.IsNullOrEmpty(docPUB.OpenPackage.CoverId) || docPUB.OpenPackage.ManifestItems.Count( p=>p.id ==docPUB.OpenPackage.CoverId) <= 0 )
{
IEnumerable<ManifestItem> img = docPUB.OpenPackage.ManifestItems.Where(p => p.mediatype == "image/jpeg");
if (img.Count() > 1)
{
foreach (ManifestItem item in img)
{
if (item.id.Contains("cover") || item.href.Contains("cover"))
{
docPUB.OpenPackage.CoverId = item.id;
break;
}
}
}
else
{
foreach (ManifestItem item in img)
{
docPUB.OpenPackage.CoverId = item.id;
break;
}
}
}
//read <spine>
xmlNod = root.SelectSingleNode("//ROOT:spine", nsmgr);
Spine spine = new Spine();
spine.tocId = TryGetAttributeString(xmlNod, "toc");
spine.itemrefs = new List<string>();
listNode = root.SelectNodes("//ROOT:spine/ROOT:itemref", nsmgr);
foreach (XmlNode node in listNode)
{
spine.itemrefs.Add(TryGetAttributeString(node, "idref"));
}
docPUB.OpenPackage.Spine = spine;
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParsePackage", err);
}
}
private void ParsePackage(ePUB docPUB, string extractFolder)
{
try
{
using (FileStream fs = File.Open(Path.Combine(extractFolder, docPUB.OpenPackage.RelativeFilePath), FileMode.Open, FileAccess.Read))
{
ParsePackage(docPUB, fs);
}
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParsePackage", err);
}
}
private void ParseTOC(ePUB docPUB, Stream content)
{
try
{
//reading
XmlDocument doc = GetDocumentWithNoValidation( content );
XmlNode root = doc.DocumentElement; //<ncx>
// resolve <ncx>
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
ResolveNamespaces(nsmgr, root);
docPUB.OpenPackage.Toc = new TableOfContent();
docPUB.OpenPackage.Toc.Title = root.SelectSingleNode("//ROOT:docTitle/ROOT:text", nsmgr).InnerText;
XmlNodeList listNode;
listNode = root.SelectNodes("child::ROOT:navMap/ROOT:navPoint", nsmgr);
List<TableOfContentItem> navMapList = new List<TableOfContentItem>();
foreach (XmlNode navPoint in listNode)
{
TableOfContentItem item = new TableOfContentItem()
{
Id = TryGetAttributeString(navPoint, "id"),
PlayOrder = TryGetAttributeString(navPoint, "playOrder"),
Label = navPoint.SelectSingleNode("child::ROOT:navLabel/ROOT:text", nsmgr).InnerText,
Content = Path.Combine( docPUB.OpenPackage.RelativeFolder, TryGetAttributeString(navPoint.SelectSingleNode("child::ROOT:content", nsmgr), "src") )
};
navMapList.Add(item);
item.Children = ParseNavRecursif(docPUB, navPoint, nsmgr);
}
docPUB.OpenPackage.Toc.NavMap = navMapList;
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParseTOC", err);
}
}
private void ParseTOC(ePUB docPUB, string extractFolder)
{
try
{
using (FileStream fs = File.Open(docPUB.GetTOCFile(), FileMode.Open, FileAccess.Read))
{
ParseTOC(docPUB, fs);
}
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParseTOC", err);
}
}
private List<TableOfContentItem> ParseNavRecursif(ePUB docPUB, XmlNode node, XmlNamespaceManager nsmgr)
{
try
{
XmlNodeList listNode = node.SelectNodes("child::ROOT:navPoint", nsmgr);
List<TableOfContentItem> navMapList = new List<TableOfContentItem>();
foreach (XmlNode navPoint in listNode)
{
TableOfContentItem item = new TableOfContentItem()
{
Id = TryGetAttributeString(navPoint, "id"),
PlayOrder = TryGetAttributeString(navPoint, "playOrder"),
Label = navPoint.SelectSingleNode("child::ROOT:navLabel/ROOT:text", nsmgr).InnerText,
Content = Path.Combine( docPUB.OpenPackage.RelativeFolder, TryGetAttributeString(navPoint.SelectSingleNode("child::ROOT:content", nsmgr), "src") )
};
navMapList.Add(item);
item.Children = ParseNavRecursif(docPUB, navPoint, nsmgr);
}
return navMapList;
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ParseNavRecursif", err);
return null;
}
}
private XmlDocument GetDocumentWithNoValidation(Stream content)
{
try
{
content.Position = 0;
XmlDocument doc = new XmlDocument();
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Ignore;
using (XmlReader reader = XmlReader.Create(content, settings))
{
doc.Load(reader);
}
return doc;
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:GetDocumentWithNoValidation", err);
return null;
}
}
private XmlDocument GetDocumentWithNoValidation(string filePath)
{
try
{
using (FileStream fs = File.Open(filePath, FileMode.Open, FileAccess.Read))
{
return GetDocumentWithNoValidation(fs);
}
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:GetDocumentWithNoValidation", err);
return null;
}
}
private void ResolveNamespaces(XmlNamespaceManager xmlManager, XmlNode xmlElement)
{
try
{
ResolveNamespace(xmlManager, xmlElement, "xmlns", "ROOT");
ResolveNamespace(xmlManager, xmlElement, "xmlns:opf", "opf");
ResolveNamespace(xmlManager, xmlElement, "xmlns:dc", "dc");
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ResolveNamespaces", err);
}
}
private void ResolveNamespace(XmlNamespaceManager xmlManager, XmlNode xmlElement, string xmlNamespace, string xmlTag)
{
try
{
// Create an XmlNamespaceManager to resolve the default namespace
if (xmlElement.Attributes[xmlNamespace] != null)
{
string xmlns = xmlElement.Attributes[xmlNamespace].Value;
xmlManager.AddNamespace(xmlTag, xmlns);
}
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:ResolveNamespace", err);
}
}
private XmlAttribute TryGetAttribute(XmlNode node, string attributeName)
{
try
{
return node.Attributes[attributeName];
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:TryGetAttribute", err);
return null;
}
}
private string TryGetAttributeString(XmlNode node, string attributeName)
{
try
{
return node.Attributes[attributeName].Value;
}
catch (Exception err)
{
ExceptionHelper.Manage("ePUBParser:TryGetAttributeString", err);
return string.Empty;
}
}
}
}