Introduction to XPS - Part 1 of n (of too many)

Lee Humphries
Rate me:
4.79/5 (13 votes)
2 Aug 2008CPOL15 min read
145.5K
1.1K
XPS is a fixed document format derived from XAML. Learn how to use it to produce the documents you want?
xpscleaner.zip
- Program.cs
- Properties
  - AssemblyInfo.cs
- Resources
- XPSCleaner.csproj
- XPSCleaner.sln
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Printing;
using System.Printing.IndexedProperties;
using System.Text;
using System.Threading;
using System.Xml;
using System.Xml.Xsl;
using System.Xml.XPath;

// Uses ICSharpCode's ZipLib http://www.icsharpcode.net/OpenSource/SharpZipLib/Default.aspx
// Available
using ICSharpCode.SharpZipLib.Zip;

namespace BillProcessCmd
{
    class Program
    {
        [System.STAThread]
        static void Main(string[] args)
        {
            if (args.Length < 1)
            {
				HelpText();
				return;
            }

			List<string> arguments = new List<string>();

			string ext = Path.GetExtension(args[0]).ToLower();
			List<string> sourceXMLFiles = new List<string>();
			List<string> sourceXPSFiles = new List<string>();

            if (!File.Exists(args[0]))
            {
                Console.WriteLine("Cannot find file {0}", args[0]);
                return;
			}

			// Do the clean up
			XpsClean(args[0], arguments);
		}

		private static void XpsClean(string sourceXpsFile, List<string> arguments)
		{
			// Get some basic file name bits ready
			string directoryName = Path.GetDirectoryName(sourceXpsFile) + "\\";
			directoryName = ((directoryName.Length > 2) ? directoryName : "");
			string baseFileName = directoryName + Path.GetFileNameWithoutExtension(sourceXpsFile);
			string xpsFileName = baseFileName + "-Clean.xps";
			string processingFileName = "";
			StringBuilder relRefsList = new StringBuilder();
			Dictionary<string, string> dupFiles = new Dictionary<string, string>();

			try
			{
				// Load up the Cleanup XSLT
				XslCompiledTransform cleanupXSLT = new XslCompiledTransform();
				cleanupXSLT.Load("Resources\\XPSCleaner.xsl");
				Console.WriteLine("Cleanup XSLT Loaded.");

				// Load up the Resource Relationships XSLT
				XslCompiledTransform relsXSLT = new XslCompiledTransform();
				relsXSLT.Load("Resources\\XPSRels.xsl");
				Console.WriteLine("Resource Relationships XSLT Loaded.");

				// Load up the Resource Relationship Listing XSLT
				XslCompiledTransform relRefsXSLT = new XslCompiledTransform();
				relRefsXSLT.Load("Resources\\XPSRelRefs.xsl");
				Console.WriteLine("Resource Relationship Listing XSLT Loaded.");

				// Load up the References XSLT
				XslCompiledTransform referencesXSLT = new XslCompiledTransform();
				referencesXSLT.Load("Resources\\XPSReferences.xsl");
				Console.WriteLine("References XSLT Loaded.");

				// Open up the source XPS file
				using (ZipFile zf = new ZipFile(sourceXpsFile))
				{
					// Extract its parts
					using (ZipOutputStream s = new ZipOutputStream(File.Create(xpsFileName)))
					{
						#region First Pass over all the files to identify duplicates
						// Duplicate files will be dropped and references to them
						// altered to point to the 'original'
						foreach (ZipEntry ze1 in zf)
						{
							string ze1NewName = ze1.Name.Replace("Documents/1/", "Documents/2/");
							// Skip this file if we've already identified it as a duplicate
							if (dupFiles.ContainsKey(ze1NewName))
								continue;

							// Go back through the list to identify any duplicates
							foreach (ZipEntry ze2 in zf)
							{
								// Ready the stream for the 'original' file
								using (Stream zs1 = zf.GetInputStream(ze1))
								{
									string ze2NewName = ze2.Name.Replace("Documents/1/", "Documents/2/");

									// Skip this file if it happens to be the same one
									// or is not the same type (extension)
									// or are of differing file sizes
									if (ze1NewName == ze2NewName ||
										Path.GetExtension(ze1NewName) != Path.GetExtension(ze2NewName) ||
										ze1.Size != ze2.Size)
										continue;

									// Make sure we are at the start of the first file
									//zs1.Seek(0, SeekOrigin.Begin);

									bool isEqual = true;

									// Ready some small buffers for the comparison
									byte[] buffer1 = new byte[4096];
									byte[] buffer2 = new byte[4096];
									int sourceBytes1;
									int sourceBytes2;

									// Now open up the two files and check if they are the same
									using (Stream zs2 = zf.GetInputStream(ze2))
									{
										// Using a fixed size buffer here makes no noticeable difference for performance
										// but keeps a lid on memory usage.
										do
										{
											sourceBytes1 = zs1.Read(buffer1, 0, buffer1.Length);
											sourceBytes2 = zs2.Read(buffer2, 0, buffer2.Length);

											for (int i = 0; i < buffer1.Length; i++)
											{
												if (buffer1[i] != buffer2[i])
												{
													isEqual = false;
													break;
												}
											}

											// If filesize can be relied on
											// this test should never fire
											if (sourceBytes1 != sourceBytes2)
											{
												isEqual = false;
											}

										} while (sourceBytes1 > 0 && isEqual);
									}

									if (isEqual)
									{
										// This file must be identified as a duplicate
										dupFiles.Add(ze2NewName, ze1NewName);
									}
								}
							}
						}
						#endregion

						#region Second Pass over the files performing the clean up and determining the resource files needed
						foreach (ZipEntry ze in zf)
						{
							// What file are we processing?
							processingFileName = ze.Name;

							// If this is a 'duplicate' then we just dump it
							if (dupFiles.ContainsKey(processingFileName))
								continue;

							// We're assuming this XPS file contains only one document (number 1)
							// Which we will change to number 2

							// Clean up fpage files and generate new fpage.rels files
							if (processingFileName.StartsWith("Documents/1/Pages") && processingFileName.EndsWith(".fpage"))
							{
								#region Clean up the fpage files and generate new fpage.rels files
								// Clean up the .fpage file itself
								string entryFileName = CopyAndCleanFile(baseFileName, processingFileName, cleanupXSLT, dupFiles, zf, ze, s);

								//  Determine the temporary file names we'll use
								string relsFileName = baseFileName + "Rels." + Path.GetFileName(processingFileName);
								string processingRelsFileName = processingFileName.Replace("Documents/1/Pages/", "Documents/2/Pages/_rels/") + ".rels";

								// Generate the Associated .rels file (removing any redundant references)
								relsXSLT.Transform(entryFileName, relsFileName);
								Console.WriteLine("{0} has been generated.", processingRelsFileName);

								// Delete the cleaned file
								File.Delete(entryFileName);

								// Do a search and replace for each of the 'duplicate' file references
								ReplaceReferencesToDuplicates(relsFileName, dupFiles);

								// Add the generated rels entry to the new zip
								AddZipEntry(processingRelsFileName, s, relsFileName);

								// Identify the actual resources needed
								relRefsList = IdentifyRels(relRefsList, relRefsXSLT, relsFileName);
								Console.WriteLine("{0} Resources have been listed.", processingRelsFileName);

								// Delete the rels file
								File.Delete(relsFileName);
								#endregion
							}
							else if (processingFileName.EndsWith(".fpage.rels"))
							{
								// dump these - we've regenerated them
							}
							else if (processingFileName.StartsWith("Documents/1/") || processingFileName.Contains("_rels/") || processingFileName.EndsWith(".fdseq"))
							{
								// Identify the files that we will clean up
								bool bTransformRequired = (processingFileName.EndsWith(".rels") || processingFileName.EndsWith(".fdoc") || processingFileName.EndsWith(".fdseq"));

								if (bTransformRequired)
								{
									#region Clean up references to other files
									// Clean up the .fpage file itself
									string entryFileName = CopyAndCleanFile(baseFileName, processingFileName, referencesXSLT, dupFiles, zf, ze, s);

									if (processingFileName.EndsWith(".rels"))
									{
										relRefsList = IdentifyRels(relRefsList, relRefsXSLT, entryFileName);
										Console.WriteLine("{0} Resources have been listed.", processingFileName);
									}

									// Delete the cleaned file
									File.Delete(entryFileName);
									#endregion
								}
							}
						}
						#endregion

						#region Process the list of resource references to determine what's actually needed
						List<string> relsFileNames = new List<string>();
						
						foreach(string relFileName in relRefsList.Replace("  ", " ").ToString().Split(' '))
						{
							if (relFileName.Trim().Length == 0)
								continue;

							if (!relsFileNames.Contains(relFileName))
							{
								// take off the opening slash as this will cause confusion later
								relsFileNames.Add(relFileName.TrimStart('/'));
							}
						}
						#endregion

						#region Third Pass to transfer the resource files needed
						foreach (ZipEntry ze in zf)
						{
							// What file are we processing?
							processingFileName = ze.Name;

							// If this is a 'duplicate' then we just dump it
							if (dupFiles.ContainsKey(processingFileName))
								continue;

							// Skip files we've already done
							if ((processingFileName.StartsWith("Documents/1/Pages") && processingFileName.EndsWith(".fpage")) ||
								(processingFileName.EndsWith(".fpage.rels")))
							{
								// Skip these - we've already processed them
							}
							else if (processingFileName.StartsWith("Documents/1/Resources/Fonts"))
							{
								#region Resource files that require 'moving' to the 'root' Resources folder
								string newFileName = processingFileName.Replace("Documents/1/", "");
								if (relsFileNames.Contains(newFileName))
								{
									Console.WriteLine("XPS file entry '{0}' moving to {1}", processingFileName, processingFileName.Replace("Documents/1/", ""));
									CopyZipEntry(ze.Name.Replace("Documents/1/", ""), s, zf, ze);
								}
								#endregion
							}
							else if (processingFileName.StartsWith("Documents/1/") || processingFileName.Contains("_rels/") || processingFileName.EndsWith(".fdseq"))
							{
								// Identify the files that were cleaned up
								bool bTransformRequired = (processingFileName.EndsWith(".rels") || processingFileName.EndsWith(".fdoc") || processingFileName.EndsWith(".fdseq"));

								if (!bTransformRequired)
								{
									#region Files that only require 'moving' to document '2'
									string newFileName = processingFileName.Replace("Documents/1/", "Documents/2/");
									if (relsFileNames.Contains(newFileName))
									{
										Console.WriteLine("XPS file entry '{0}' moving to {1}", processingFileName, newFileName);
										CopyZipEntry(ze.Name.Replace("Documents/1/", "Documents/2/"), s, zf, ze);
									}
									#endregion
								}
							}
							else
							{
								#region Files we just put in the same place in the new zip
								Console.WriteLine("XPS file entry '{0}' transferred as is", processingFileName);
								CopyZipEntry(ze.Name, s, zf, ze);
								#endregion
							}
						}
						#endregion
					}

					Console.WriteLine("\nFinished clean up of {0}.", xpsFileName);

					zf.Close();
				}
			}
			catch (Exception exp)
			{
				// Keep a record of the error
				LogToFile("ErrorLog.txt", sourceXpsFile, processingFileName, string.Format("Cleaning up {0} failed\r\n{1}", sourceXpsFile, processingFileName, exp.Message));
			}
		}

		private static void HelpText()
		{
			Console.WriteLine("Usage: XPSCleaner sourceFilename");
			Console.WriteLine("\tsourceFilename name (including path as necessary) of the xps file");
		}

		private static StringBuilder IdentifyRels(StringBuilder relRefsList, XslCompiledTransform relRefsXSLT, string relsSourceFileName)
		{
			// Identify the actual resources needed
			XmlWriterSettings settings = new XmlWriterSettings();
			settings.Indent = true;
			settings.OmitXmlDeclaration = true;
			settings.NewLineOnAttributes = true;
			settings.ConformanceLevel = ConformanceLevel.Fragment;
			XmlWriter relRefs = XmlWriter.Create(relRefsList, settings);
			relRefsXSLT.Transform(relsSourceFileName, relRefs);

			return relRefsList;
		}

		private static string CopyAndCleanFile(string baseFileName, string processingFileName, XslCompiledTransform cleaningXSLT, Dictionary <string, string> dupFiles, ZipFile zf, ZipEntry ze, ZipOutputStream s)
		{
			// Determine the file names we'll use
			string tempFileName = baseFileName + "Temp." + Path.GetFileName(processingFileName);
			string entryFileName = baseFileName + "." + Path.GetFileName(processingFileName);
			string destinationFileName = processingFileName.Replace("Documents/1/", "Documents/2/");

			// Read the Zip Entry out to a file
			Console.WriteLine("XPS file entry '{0}' updating and moving to {1}", processingFileName, destinationFileName);
			ReadZipEntry(tempFileName, zf, ze);

			// Alter the References in the XML
			cleaningXSLT.Transform(tempFileName, entryFileName);
			Console.WriteLine("{0} has been cleaned.", processingFileName);

			// Do a search and replace for each of the 'duplicate' file references
			ReplaceReferencesToDuplicates(entryFileName, dupFiles);

			// Add the entry to the new zip
			AddZipEntry(destinationFileName, s, entryFileName);

			// Delete the Temp file
			File.Delete(tempFileName);

			return entryFileName;
		}

		private static void ReplaceReferencesToDuplicates(string entryFileName, Dictionary<string, string> dupFiles)
		{
			// Do a search and replace for each of the 'duplicate' file references
			string[] entryLines = File.ReadAllLines(entryFileName);
			bool isChanged = false;

			// Work through each 'duplicate' entry
			foreach (KeyValuePair<string, string> dupFile in dupFiles)
			{
				// Through each line in the file
				for (int i = 0; i < entryLines.Length; i++)
				{
					// Searching for references to the 'duplicate'
					if (entryLines[i].Contains(dupFile.Key))
					{
						// And change them to the 'original'
						entryLines[i] = entryLines[i].Replace(dupFile.Key, dupFile.Value);
						isChanged = true;
					}
				}
			}

			// Replace this file if we've made changes to its contents
			if (isChanged)
			{
				File.WriteAllLines(entryFileName, entryLines);
			}
		}

		private static void LogToFile(string logFileName, string origSourceFileName, string processingFileName, string logText)
		{
			Console.WriteLine(logText);
			File.AppendAllText(logFileName, string.Format("{0:yyyy-MMM-dd hh:mm:ss} {1} {2}\r\n", DateTime.Now, origSourceFileName, processingFileName));
			File.AppendAllText(logFileName, logText + "\r\n");
		}

        private static void DoTransformation(string xsltFile, XPathNavigator inputNav, string outputFile)
        {
            // Load up the XSLT
            XslCompiledTransform compXSLT = new XslCompiledTransform();
            compXSLT.Load(xsltFile);

            // Do the Transformation
            DoTransformation(compXSLT, inputNav, null, outputFile);
        }

        private static void DoTransformation(XslCompiledTransform compXSLT, XPathNavigator inputNav, XsltArgumentList paramList, string outputFile)
        {
            // Do the Transformation
            inputNav.MoveToRoot();
            XmlReader inputReader = inputNav.ReadSubtree();
            XmlWriter outputWriter = XmlWriter.Create(outputFile);
            if (paramList != null)
                compXSLT.Transform(inputNav, paramList, outputWriter);
            else
                compXSLT.Transform(inputNav, outputWriter);

            outputWriter.Flush();
            outputWriter.Close();
        }

		private static void CopyZipEntry(string entryFileName, ZipOutputStream s, ZipFile inputZip, ZipEntry inputFile)
		{
			ZipEntry contentTypesEntry = new ZipEntry(entryFileName);
			contentTypesEntry.DateTime = DateTime.Now;
			s.PutNextEntry(contentTypesEntry);

			byte[] buffer = new byte[4096];

			using (Stream inStream = inputZip.GetInputStream(inputFile))
			{
				// Using a fixed size buffer here makes no noticeable difference for output
				// but keeps a lid on memory usage.
				int sourceBytes;
				do
				{
					sourceBytes = inStream.Read(buffer, 0, buffer.Length);
					s.Write(buffer, 0, sourceBytes);
				} while (sourceBytes > 0);
			}
		}

		private static void ReadZipEntry(string entryFileName, ZipFile inputZip, ZipEntry inputFile)
		{
			byte[] buffer = new byte[4096];

			using (FileStream fileStream = File.OpenWrite(entryFileName))
			{
				using (Stream inStream = inputZip.GetInputStream(inputFile))
				{
					// Using a fixed size buffer here makes no noticeable difference for output
					// but keeps a lid on memory usage.
					int sourceBytes;
					do
					{
						sourceBytes = inStream.Read(buffer, 0, buffer.Length);
						fileStream.Write(buffer, 0, sourceBytes);
					} while (sourceBytes > 0);
				}
			}
		}

		private static void AddZipEntry(string entryFileName, ZipOutputStream s, string inputFileName)
		{
			ZipEntry contentTypesEntry = new ZipEntry(entryFileName);
			contentTypesEntry.DateTime = DateTime.Now;
			s.PutNextEntry(contentTypesEntry);

			byte[] buffer = new byte[4096];

			using (FileStream fs = File.OpenRead(inputFileName))
			{
				// Using a fixed size buffer here makes no noticeable difference for output
				// but keeps a lid on memory usage.
				int sourceBytes;
				do
				{
					sourceBytes = fs.Read(buffer, 0, buffer.Length);
					s.Write(buffer, 0, sourceBytes);
				} while (sourceBytes > 0);
			}
		}
	}
}
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.
License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)
Written By
Lee Humphries
Founder md8n
Timor-Leste
If it ain't broke - that can be arranged.
Introduction to XPS - Part 1 of n (of too many)

License

Comments and Discussions