Click here to Skip to main content
15,885,882 members
Articles / Desktop Programming / WPF

C.B.R.

Rate me:
Please Sign up or sign in to vote.
4.96/5 (52 votes)
22 Oct 2012GPL329 min read 124.2K   1.8K   132  
Comic and electronic publication reader with library management, extended file conversion, and devices support.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using CBR.Core.Models;
using System.IO;
using SevenZip;

namespace CBR.Core.Helpers
{
    internal class ePUBParser
    {
        public ePUB ParseExtracted(string filePath, string extractFolder)
        {
            try
            {
                ePUB docPUB = new ePUB(filePath);
                docPUB.ContentFolder = extractFolder;

                //read META-INF container.xml
                ParseContainer(docPUB, extractFolder);

                // read content.OPF file
                ParsePackage(docPUB, extractFolder);

                // parse the table of content
                ParseTOC(docPUB, extractFolder);

                return docPUB;
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParseFolder", err);
                return null;
            }
        }

        public ePUB ParseFile(string filePath)
        {
            SevenZipExtractor temp = null;
            try
            {
                ePUB docPUB = new ePUB(filePath);

                temp = ZipHelper.Instance.GetExtractor(filePath);

                // find container.xml and parse it
                ArchiveFileInfo fil = temp.ArchiveFileData.Where(p => !p.IsDirectory && p.FileName == ePUB.ContainerFile).First();
                using (MemoryStream stream = new MemoryStream())
                {
                    temp.ExtractFile(fil.FileName, stream);
                    ParseContainer(docPUB, stream);
                }

                // find OPF package file and parse it
                fil = temp.ArchiveFileData.Where(p => !p.IsDirectory && p.FileName == docPUB.OpenPackage.RelativeFilePath).First();
                using (MemoryStream stream = new MemoryStream())
                {
                    temp.ExtractFile(fil.FileName, stream);
                    ParsePackage(docPUB, stream);
                }

                // find the toc file and parse it
                fil = temp.ArchiveFileData.Where(p => !p.IsDirectory && p.FileName == docPUB.GetTOCFile()).First();
                using (MemoryStream stream = new MemoryStream())
                {
                    temp.ExtractFile(fil.FileName, stream);
                    ParseTOC(docPUB, stream);
                }

                return docPUB;
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParseFolder", err);
                return null;
            }
        }

        private void ParseContainer(ePUB docPUB, Stream content)
        {
            try
            {
                XmlDocument doc = GetDocumentWithNoValidation(content);

                XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
                ResolveNamespaces(nsmgr, doc.DocumentElement);

                XmlNode rootfileNode = doc.SelectSingleNode("//ROOT:rootfiles/ROOT:rootfile", nsmgr);

                docPUB.OpenPackage = new Package(TryGetAttributeString(rootfileNode, "full-path"));
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParseContainer", err);
            }
        }

        private void ParseContainer(ePUB docPUB, string extractFolder)
        {
            try
            {
                using (FileStream fs = File.Open(Path.Combine(extractFolder, ePUB.ContainerFile), FileMode.Open, FileAccess.Read))
                {
                    ParseContainer(docPUB, fs);
                }
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParseContainer", err);
            }
        }

        private void ParsePackage(ePUB docPUB, Stream content)
        {
            try
            {
                //reading the OPF
                XmlDocument doc = GetDocumentWithNoValidation(content);

                XmlNode root = doc.DocumentElement; //<package>

                // resolve <package>
                XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
                ResolveNamespaces(nsmgr, root);

                // resolve <metadata>
                ResolveNamespaces(nsmgr, root.SelectSingleNode("//ROOT:metadata", nsmgr));

                docPUB.OpenPackage.Title = root.SelectSingleNode("//ROOT:metadata/dc:title", nsmgr).InnerText;
                docPUB.OpenPackage.Id = root.SelectSingleNode("//ROOT:metadata/dc:identifier", nsmgr).InnerText;

                //try get the cover id
                XmlNode xmlNod = root.SelectSingleNode("//ROOT:metadata/ROOT:meta[@name='cover']", nsmgr);
                docPUB.OpenPackage.CoverId = TryGetAttributeString(xmlNod, "content");

                //read the meta list
                XmlNodeList listNode = root.SelectNodes("//ROOT:metadata/ROOT:meta", nsmgr);
                foreach (XmlNode node in listNode)
                {

                }

                //read manifest items
                listNode = root.SelectNodes("//ROOT:manifest/ROOT:item", nsmgr);

                docPUB.OpenPackage.ManifestItems = new List<ManifestItem>();
                foreach (XmlNode node in listNode)
                {
                    docPUB.OpenPackage.ManifestItems.Add(new ManifestItem()
                    {
                        id = TryGetAttributeString(node, "id"),
                        href = TryGetAttributeString(node, "href").Replace("/", "\\"),
                        mediatype = TryGetAttributeString(node, "media-type")
                    });
                }

                //if no cover search for an image
                if (string.IsNullOrEmpty(docPUB.OpenPackage.CoverId) || docPUB.OpenPackage.ManifestItems.Count( p=>p.id ==docPUB.OpenPackage.CoverId) <= 0 )
                {
                    IEnumerable<ManifestItem> img = docPUB.OpenPackage.ManifestItems.Where(p => p.mediatype == "image/jpeg");
                    if (img.Count() > 1)
                    {
                        foreach (ManifestItem item in img)
                        {
                            if (item.id.Contains("cover") || item.href.Contains("cover"))
                            {
                                docPUB.OpenPackage.CoverId = item.id;
                                break;
                            }
                        }
                    }
                    else
                    {
                        foreach (ManifestItem item in img)
                        {
                            docPUB.OpenPackage.CoverId = item.id;
                            break;
                        }
                    }

                }

                //read <spine>
                xmlNod = root.SelectSingleNode("//ROOT:spine", nsmgr);
                Spine spine = new Spine();
                spine.tocId = TryGetAttributeString(xmlNod, "toc");
                spine.itemrefs = new List<string>();

                listNode = root.SelectNodes("//ROOT:spine/ROOT:itemref", nsmgr);
                foreach (XmlNode node in listNode)
                {
                    spine.itemrefs.Add(TryGetAttributeString(node, "idref"));
                }

                docPUB.OpenPackage.Spine = spine;
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParsePackage", err);
            }
        }

        private void ParsePackage(ePUB docPUB, string extractFolder)
        {
            try
            {
                using (FileStream fs = File.Open(Path.Combine(extractFolder, docPUB.OpenPackage.RelativeFilePath), FileMode.Open, FileAccess.Read))
                {
                    ParsePackage(docPUB, fs);
                }
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParsePackage", err);
            }
        }

        private void ParseTOC(ePUB docPUB, Stream content)
        {
            try
            {
                //reading 
                XmlDocument doc = GetDocumentWithNoValidation( content );

                XmlNode root = doc.DocumentElement; //<ncx>

                // resolve <ncx>
                XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
                ResolveNamespaces(nsmgr, root);

                docPUB.OpenPackage.Toc = new TableOfContent();
                docPUB.OpenPackage.Toc.Title = root.SelectSingleNode("//ROOT:docTitle/ROOT:text", nsmgr).InnerText;

                XmlNodeList listNode;
                listNode = root.SelectNodes("child::ROOT:navMap/ROOT:navPoint", nsmgr);

                List<TableOfContentItem> navMapList = new List<TableOfContentItem>();
                foreach (XmlNode navPoint in listNode)
                {
                    TableOfContentItem item = new TableOfContentItem()
                    {
                        Id = TryGetAttributeString(navPoint, "id"),
                        PlayOrder = TryGetAttributeString(navPoint, "playOrder"),
                        Label = navPoint.SelectSingleNode("child::ROOT:navLabel/ROOT:text", nsmgr).InnerText,
                        Content = Path.Combine( docPUB.OpenPackage.RelativeFolder, TryGetAttributeString(navPoint.SelectSingleNode("child::ROOT:content", nsmgr), "src") )
                    };
                    navMapList.Add(item);

                    item.Children = ParseNavRecursif(docPUB, navPoint, nsmgr);
                }
                docPUB.OpenPackage.Toc.NavMap = navMapList;
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParseTOC", err);
            }
        }

        private void ParseTOC(ePUB docPUB, string extractFolder)
        {
            try
            {
                using (FileStream fs = File.Open(docPUB.GetTOCFile(), FileMode.Open, FileAccess.Read))
                {
                    ParseTOC(docPUB, fs);
                }
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParseTOC", err);
            }
        }

        private List<TableOfContentItem> ParseNavRecursif(ePUB docPUB, XmlNode node, XmlNamespaceManager nsmgr)
        {
            try
            {
                XmlNodeList listNode = node.SelectNodes("child::ROOT:navPoint", nsmgr);

                List<TableOfContentItem> navMapList = new List<TableOfContentItem>();
                foreach (XmlNode navPoint in listNode)
                {
                    TableOfContentItem item = new TableOfContentItem()
                    {
                        Id = TryGetAttributeString(navPoint, "id"),
                        PlayOrder = TryGetAttributeString(navPoint, "playOrder"),
                        Label = navPoint.SelectSingleNode("child::ROOT:navLabel/ROOT:text", nsmgr).InnerText,
                        Content = Path.Combine( docPUB.OpenPackage.RelativeFolder, TryGetAttributeString(navPoint.SelectSingleNode("child::ROOT:content", nsmgr), "src") )
                    };
                    navMapList.Add(item);

                    item.Children = ParseNavRecursif(docPUB, navPoint, nsmgr);
                }

                return navMapList;
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ParseNavRecursif", err);
                return null;
            }
        }

        private XmlDocument GetDocumentWithNoValidation(Stream content)
        {
            try
            {
                content.Position = 0;

                XmlDocument doc = new XmlDocument();

                XmlReaderSettings settings = new XmlReaderSettings();
                settings.DtdProcessing = DtdProcessing.Ignore;
                using (XmlReader reader = XmlReader.Create(content, settings))
                {
                    doc.Load(reader);
                }

                return doc;
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:GetDocumentWithNoValidation", err);
                return null;
            }
        }

        private XmlDocument GetDocumentWithNoValidation(string filePath)
        {
            try
            {
                using (FileStream fs = File.Open(filePath, FileMode.Open, FileAccess.Read))
                {
                    return GetDocumentWithNoValidation(fs);
                }
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:GetDocumentWithNoValidation", err);
                return null;
            }
        }

        private void ResolveNamespaces(XmlNamespaceManager xmlManager, XmlNode xmlElement)
        {
            try
            {
                ResolveNamespace(xmlManager, xmlElement, "xmlns", "ROOT");
                ResolveNamespace(xmlManager, xmlElement, "xmlns:opf", "opf");
                ResolveNamespace(xmlManager, xmlElement, "xmlns:dc", "dc");
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ResolveNamespaces", err);
            }
        }

        private void ResolveNamespace(XmlNamespaceManager xmlManager, XmlNode xmlElement, string xmlNamespace, string xmlTag)
        {
            try
            {
                // Create an XmlNamespaceManager to resolve the default namespace
                if (xmlElement.Attributes[xmlNamespace] != null)
                {
                    string xmlns = xmlElement.Attributes[xmlNamespace].Value;
                    xmlManager.AddNamespace(xmlTag, xmlns);
                }
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:ResolveNamespace", err);
            }
        }

        private XmlAttribute TryGetAttribute(XmlNode node, string attributeName)
        {
            try
            {
                return node.Attributes[attributeName];
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:TryGetAttribute", err);
                return null;
            }
        }

        private string TryGetAttributeString(XmlNode node, string attributeName)
        {
            try
            {
                return node.Attributes[attributeName].Value;
            }
            catch (Exception err)
            {
                ExceptionHelper.Manage("ePUBParser:TryGetAttributeString", err);
                return string.Empty;
            }
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)


Written By
Architect
France France
WPF and MVVM fan, I practice C # in all its forms from the beginning of the NET Framework without mentioning C ++ / MFC and other software packages such as databases, ASP, WCF, Web & Windows services, Application, and now Core and UWP.
In my wasted hours, I am guilty of having fathered C.B.R. and its cousins C.B.R. for WinRT and UWP on the Windows store.
But apart from that, I am a great handyman ... the house, a rocket stove to heat the jacuzzi and the last one: a wood oven for pizza, bread, and everything that goes inside

https://guillaumewaser.wordpress.com/
https://fouretcompagnie.wordpress.com/

Comments and Discussions