Click here to Skip to main content
15,892,768 members
Articles / Programming Languages / C#

Extract Text from PDF in C# (100% .NET)

Rate me:
Please Sign up or sign in to vote.
3.67/5 (60 votes)
20 May 2006CPOL1 min read 972K   120.4K   174  
A simple class to extract plain text from PDF documents with ITextSharp
using System;
using System.Text;
using System.IO;

namespace PdfToText
{
    /// <summary>
    /// The main entry point to the program.
    /// </summary>
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                if (args.Length < 1)
                {
                    DisplayUsage();
                    return;
                }

                string file = args[0];
                if (!File.Exists(file))
                {
                    file = Path.GetFullPath(file);
                    if (!File.Exists(file))
                    {
                        Console.WriteLine("Please give in the path to the PDF file.");
                    }
                }

                PDFParser pdfParser = new PDFParser();
                pdfParser.ExtractText(file, Path.GetFileNameWithoutExtension(file)+".txt");
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc);
            }
        }

        static void DisplayUsage()
        {
            Console.WriteLine();
            Console.WriteLine("Usage:\tpdftotext FILE");
            Console.WriteLine();
            Console.WriteLine("\tFILE\t the path to the PDF file, it may be relative or absolute.");
            Console.WriteLine();
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
Romania Romania
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions