Click here to Skip to main content
15,893,564 members
Articles / Programming Languages / Visual Basic

Page Removal and Re-Ordering in XPS Files

Rate me:
Please Sign up or sign in to vote.
4.87/5 (9 votes)
16 Feb 2012CPOL24 min read 62.9K   1.9K   43  
An article about an application and its underlying XPS utilities package which create a new XPS file from a source XPS file with pages removed and/or reordered as designated by the user
Imports System
Imports System.Collections.Generic
Imports System.Text
Imports System.Windows.Documents
Imports System.Windows.Markup
Imports System.Windows.Xps.Packaging
Imports System.Windows.Xps
Imports System.IO
Imports System.IO.Packaging
Imports System.Xml
Imports System.Printing


Public Class XPSProcessingUtilities

#Region " public interface methods "
    Public Function getPageCount(ByVal xpsFile As String) As Integer
        ' Open the XPS file using OPC Package...
        Dim thePackage As Package = Package.Open(xpsFile, FileMode.Open, FileAccess.ReadWrite)
        Using (thePackage)

            ' get the XpsDocument...
            Dim xpsDoc As XpsDocument = New XpsDocument(thePackage)

            ' get the FixedDocumentSequenceReader...
            Dim fixedDocSeqReader As IXpsFixedDocumentSequenceReader = xpsDoc.FixedDocumentSequenceReader

            If (fixedDocSeqReader.FixedDocuments.Count = 0) Then
                Throw New InvalidOperationException("The source XPS file does not contain any documents!!")
            End If

            ' get the FixedDocumentReader for the first document
            ' [this is a known "limitation".... only the first document in the file is addressed by this routine]
            Dim fixedDocReader As IXpsFixedDocumentReader = fixedDocSeqReader.FixedDocuments(0)

            Return fixedDocReader.FixedPages.Count
        End Using
    End Function

    Public Sub RemovePageFromXpsFile(ByVal xpsFile As String, ByVal pageToRemove As Integer)
        Dim convertPageToPages As Integer() = New Integer(0) {pageToRemove}
        buildNewXPSFile(xpsFile, convertPageToPages, True)
    End Sub

    Public Sub RemovePagesFromXpsFile(ByVal xpsFile As String, ByVal pagesToRemove() As Integer)
        buildNewXPSFile(xpsFile, pagesToRemove, True)
    End Sub

    Public Sub RetainPagesFromXpsFile(ByVal xpsFile As String, ByVal pagesToKeep() As Integer)
        buildNewXPSFile(xpsFile, pagesToKeep, False)
    End Sub

#End Region

#Region " the main XPS processing routine(s) "

    ' this framework of this routine comes from a post by "Jo0815" found at the following link :
    '         http://forums.microsoft.com/MSDN/ShowPost.aspx?PostID=1861661&SiteID=1 
    ' the original C# code had to be translated to VB, and was necessarily expanded in several manners, but 
    '   Jo0815's code was definitely used as the base and a good jumping off point for this utility package.
    '   Also included in the post was useful guidance for going the extra step... of removing the no-longer-needed
    '   resources which had relationships only with the deleted pages.... [So, THANK YOU, Jo!!!!!]
    '
    ' if pages are being removed, all remaining pages are written to the new document in consecutive order
    ' if pages are being retained, the pages are written to the new document in the order supplied in the pageSet
    Private Sub buildNewXPSFile(ByVal xpsFileToProcess As String, ByVal pageSet As Integer(), ByVal pageRemoval As Boolean)

        Dim pageUriStrings As Collection = New Microsoft.VisualBasic.Collection()
        Dim resourceUris As Collection = New Microsoft.VisualBasic.Collection()

        ' Open the XPS file using OPC Package...
        Using thePackage As Package = Package.Open(xpsFileToProcess, FileMode.Open, FileAccess.ReadWrite)

            ' get the XpsDocument...
            Dim xpsDoc As XpsDocument = New XpsDocument(thePackage)

            ' get the FixedDocumentSequenceReader...
            Dim fixedDocSeqReader As IXpsFixedDocumentSequenceReader = xpsDoc.FixedDocumentSequenceReader

            If (fixedDocSeqReader.FixedDocuments.Count = 0) Then
                Throw New InvalidOperationException("The source XPS file does not contain any documents!!")
            End If

            ' get the FixedDocumentReader for the first document
            ' [this is a known "limitation".... only the first document in the file is addressed by this routine]
            Dim fixedDocReader As IXpsFixedDocumentReader = fixedDocSeqReader.FixedDocuments(0)

            ' we need to re-create the FixedDocument, so create a stream and a xml-writer for it
            Dim memStream As MemoryStream = New MemoryStream()
            Using xmlWriter As XmlTextWriter = New XmlTextWriter(memStream, Encoding.UTF8)

                xmlWriter.WriteStartDocument()
                xmlWriter.WriteStartElement("FixedDocument", "http://schemas.microsoft.com/xps/2005/06")

                ' loop over all pages and check if we have to remove or include the page... 
                ' (and collect potentially unused resources)
                For currentPageNumber As Integer = 1 To fixedDocReader.FixedPages.Count

                    ' get the FixedPageReader for that page
                    Dim pageReader As IXpsFixedPageReader = fixedDocReader.FixedPages(currentPageNumber - 1)

                    If (shouldThisPageBeExcluded(currentPageNumber, pageSet, pageRemoval)) Then

                        ' if to be excluded, delete the PackagePart for that FixedPage

                        ' but first, collect the resources from the deleted page for later checking if they can be removed
                        Dim thePageBeingDeleted As PackagePart = thePackage.GetPart(pageReader.Uri)
                        For Each resourceRelationship As PackageRelationship In thePageBeingDeleted.GetRelationships
                            'DEBUG Console.WriteLine("Potentially removable resource : " + resourceRelationship.TargetUri.ToString)
                            Try
                                    resourceUris.Add(resourceRelationship.TargetUri, resourceRelationship.TargetUri.ToString)
                            Catch ex As System.ArgumentException
                                ' this occurs if the same resource is used by more than 1 of the removed pages,
                                '   so it's ok to ignore it.... (a second instance in this collection doesn't help...)
                            End Try
                        Next

                        ' now delete the PackagePart for the page
                        thePackage.DeletePart(pageReader.Uri)
                        'DEBUG Console.WriteLine("Removing page #" + currentPageNumber.ToString)
                    Else
                        ' if not to be excluded, then add the PageContent Element to the new FixedDocument

                        ' if removing pages, then write directly to the FixedDocument
                        If pageRemoval Then
                            xmlWriter.WriteStartElement("PageContent")
                            xmlWriter.WriteAttributeString("Source", pageReader.Uri.ToString())
                            xmlWriter.WriteEndElement()
                            'DEBUG Console.WriteLine("Including page #" + currentPageNumber.ToString)
                        Else
                            ' if retaining pages, then store the Uri, so the FixedDocument can be written later in the proper order
                            pageUriStrings.Add(pageReader.Uri.ToString(), currentPageNumber.ToString)
                        End If
                    End If

                Next currentPageNumber

                ' if removing pages, then each of the pages remaining has already been written to the FixedDocument
                ' if keeping pages, we have the Uri's of each of the remaining pages in the Uri collection...
                '   --> so now write the entries from the collection in the designated order
                If Not pageRemoval Then
                    ' now write all of the remaining pages into the FixedDocument (in the desired order)
                    For Each x As Integer In pageSet
                        xmlWriter.WriteStartElement("PageContent")
                        xmlWriter.WriteAttributeString("Source", pageUriStrings.Item(x.ToString).ToString)
                        xmlWriter.WriteEndElement()
                    Next
                End If

                ' loop over all pages to see if potentially unused resources can be deleted
                For currentPageNumber As Integer = 1 To fixedDocReader.FixedPages.Count

                    ' get the FixedPageReader for that page
                    Dim pageReader As IXpsFixedPageReader = fixedDocReader.FixedPages(currentPageNumber - 1)

                    Try
                        Dim thisPage As PackagePart = thePackage.GetPart(pageReader.Uri)
                        For Each resourceRelationship As PackageRelationship In thisPage.GetRelationships
                            For Each looper As Uri In resourceUris

                                If looper = resourceRelationship.TargetUri Then
                                    resourceUris.Remove(looper.ToString)

                                End If
                            Next
                        Next
                    Catch ex As System.InvalidOperationException
                        ' this is fine... this happens when accessing the Uri of the deleted page
                    End Try
                Next

                ' any Uri's left in the collection are not referenced elsewhere and can be removed
                For Each removableUri As Uri In resourceUris
                    'DEBUG Console.WriteLine("Removing no longer used resource : " + removableUri.ToString)
                    MessageBox.Show("Removing no longer used resource : " + removableUri.ToString)
                    Try
                        thePackage.DeletePart(removableUri)
                    Catch ex As ArgumentException
                        MessageBox.Show("Failed to remove Part from the Package : " + removableUri.OriginalString)
                        Dim revisedUriString As String = removableUri.OriginalString.Substring(2)
                        While revisedUriString.StartsWith("/..")
                            revisedUriString = revisedUriString.Substring(3)
                        End While
                        Dim revisedUri As Uri = New Uri(revisedUriString, System.UriKind.Relative)
                        MessageBox.Show("New revised URI created : " + revisedUriString)
                        thePackage.DeletePart(revisedUri)
                    End Try
                Next removableUri

                MessageBox.Show("Closing document")

                ' close out the FixedDocument
                xmlWriter.WriteEndElement()
                xmlWriter.WriteEndDocument()
            End Using

            ' re-write the FixedDocument in the XPS file
            Dim newFixedDoc() As Byte = memStream.ToArray()
            Dim fixedDocPart As PackagePart = thePackage.GetPart(fixedDocReader.Uri)
            Dim partStream As Stream = fixedDocPart.GetStream(FileMode.Create)
            partStream.Write(newFixedDoc, 0, newFixedDoc.Length)
        End Using

    End Sub

    Private Function shouldThisPageBeExcluded(ByVal currentPage As Integer, ByVal pageSet As Integer(), ByVal isPageRemoval As Boolean) As Boolean
        Dim pageIsInSet As Boolean = False

        ' determine if the current page is in the set of pages to be removed (or kept)
        For looper As Integer = 0 To pageSet.Length() - 1
            If pageSet(looper) = currentPage Then
                pageIsInSet = True
                Exit For
            End If
        Next

        If isPageRemoval Then
            Return pageIsInSet
        Else
            Return Not pageIsInSet
        End If
    End Function

#End Region

End Class

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions