|
using System;
using System.IO;
using org.apache.pdfbox.pdmodel;
using org.apache.pdfbox.util;
namespace Pdf2Text
{
class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main(string[] args)
{
DateTime start = DateTime.Now;
if (args.Length < 2)
{
Console.WriteLine("Usage: PDF2TEXT <input filename (PDF)> <output filename (text)>");
return;
}
using (StreamWriter sw = new StreamWriter(args[1]))
{
sw.WriteLine(parseUsingPDFBox(args[0]));
}
Console.WriteLine("Done. Took " + (DateTime.Now - start));
// Console.ReadLine();
}
private static string parseUsingPDFBox(string input)
{
PDDocument doc = PDDocument.load(input);
PDFTextStripper stripper = new PDFTextStripper();
return stripper.getText(doc);
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.