Click here to Skip to main content
Click here to Skip to main content
Articles » Web Development » ASP.NET » Howto » Downloads
 
Add your own
alternative version

From pdf files to plain text in a WebMatrix site

, 14 Mar 2013
How to use the PDFBox Java library in an ASP.NET Web Pages project
Pdf2Text.zip
Pdf2TextSite
App_Code
Content
Temp
UploadedFiles
@using Microsoft.Web.Helpers; 

@{
    TimeSpan elapsed = TimeSpan.Zero;
    var fileName = ""; 
    var fileTitle = "";
    var fileSubject = "";
    var fileAuthor = "";
    var fileCreator = "";
    var fileProducer = "";
    var fileKeywords = "";
    DateTime fileCreation = DateTime.MinValue;
    DateTime fileModify = DateTime.MinValue;
    long fileLength = 0;


    if (IsPost){
        var start = DateTime.Now;
        var fileSavePath = ""; 
        var uploadedFile = Request.Files[0]; 
        fileName = Path.GetFileName(uploadedFile.FileName); 
        fileSavePath = Server.MapPath("~/UploadedFiles/" + fileName); 
        uploadedFile.SaveAs(fileSavePath);

        PdfFile file = new PdfFile(fileSavePath);
        fileTitle = file.Title;
        fileSubject = file.Subject;
        fileAuthor = file.Author;
        fileCreator = file.Creator;
        fileProducer = file.Producer;
        fileKeywords = file.Keywords;
        fileCreation = file.Created;
        fileModify = file.Modified;
        fileLength = file.Content.Length;
 
        var destFile = Server.MapPath("~/Temp/Content.txt");
        using (StreamWriter sw = new StreamWriter(destFile)){
            sw.WriteLine(file.Content);
        }
        elapsed = (DateTime.Now - start);
    }   
}

<!DOCTYPE html>

<html lang="en">
    <head>
        <meta charset="utf-8" />
        <title>From PDF to Text</title>
        <link href="~/favicon.ico" rel="shortcut icon" type="image/x-icon" />
        <link href="~/Content/Style.css" rel="stylesheet" type="text/css" />
        <script type="text/javascript">
            function myFunction()
            {
                alert("Hello World!");
            }
        </script>
    </head>
    <body>
        <h2>From PDF to Text</h2>
        <div>
            <form enctype="multipart/form-data" method="post">
                <p><label for="fileUpload">PDF file</label></p>
                @FileUpload.GetHtml( 
                    initialNumberOfFiles:1, 
                    allowMoreFilesToBeAdded:false, 
                    includeFormTag:false, 
                    uploadText:"")
                <div>
                    <input type="submit" name="action" value="Upload" />
                </div>
            </form>
        </div>
        <hr>
        @if(IsPost){
            <div>
                <h3>Uploaded file: @fileName</h3>
                <p>Title: @fileTitle</p>
                <p>Subject: @fileSubject</p>
                <p>Author: @fileAuthor</p>
                <p>Creator: @fileCreator</p>
                <p>Producer: @fileProducer</p>
                <p>Keywords: @fileKeywords</p>
                <p>Created: @fileCreation</p>
                <p>Modified: @fileModify</p>
            </div>
            <hr>
            <div>
                <h3>@fileLength characters extracted in @elapsed</h3>
                @if (fileLength > 0) {
                    var fname = "Content.txt";
                    <input type="button" 
                        onclick="location.href('download.cshtml?filename=/Temp/@fname');" value="Open">
                }
            </div>
        }
    </body>
</html>

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Gianmaria Gregori
Chief Technology Officer Federfarma Pavia
Italy Italy
No Biography provided
Follow on   Twitter

| Advertise | Privacy | Mobile
Web03 | 2.8.140827.1 | Last Updated 15 Mar 2013
Article Copyright 2013 by Gianmaria Gregori
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid