Click here to Skip to main content
15,894,410 members
Articles / Web Development / ASP.NET

From pdf files to plain text in a WebMatrix site

Rate me:
Please Sign up or sign in to vote.
4.10/5 (3 votes)
14 Mar 2013CPOL2 min read 22.5K   270   8  
How to use the PDFBox Java library in an ASP.NET Web Pages project
@using Microsoft.Web.Helpers; 

@{
    TimeSpan elapsed = TimeSpan.Zero;
    var fileName = ""; 
    var fileTitle = "";
    var fileSubject = "";
    var fileAuthor = "";
    var fileCreator = "";
    var fileProducer = "";
    var fileKeywords = "";
    DateTime fileCreation = DateTime.MinValue;
    DateTime fileModify = DateTime.MinValue;
    long fileLength = 0;


    if (IsPost){
        var start = DateTime.Now;
        var fileSavePath = ""; 
        var uploadedFile = Request.Files[0]; 
        fileName = Path.GetFileName(uploadedFile.FileName); 
        fileSavePath = Server.MapPath("~/UploadedFiles/" + fileName); 
        uploadedFile.SaveAs(fileSavePath);

        PdfFile file = new PdfFile(fileSavePath);
        fileTitle = file.Title;
        fileSubject = file.Subject;
        fileAuthor = file.Author;
        fileCreator = file.Creator;
        fileProducer = file.Producer;
        fileKeywords = file.Keywords;
        fileCreation = file.Created;
        fileModify = file.Modified;
        fileLength = file.Content.Length;
 
        var destFile = Server.MapPath("~/Temp/Content.txt");
        using (StreamWriter sw = new StreamWriter(destFile)){
            sw.WriteLine(file.Content);
        }
        elapsed = (DateTime.Now - start);
    }   
}

<!DOCTYPE html>

<html lang="en">
    <head>
        <meta charset="utf-8" />
        <title>From PDF to Text</title>
        <link href="~/favicon.ico" rel="shortcut icon" type="image/x-icon" />
        <link href="~/Content/Style.css" rel="stylesheet" type="text/css" />
        <script type="text/javascript">
            function myFunction()
            {
                alert("Hello World!");
            }
        </script>
    </head>
    <body>
        <h2>From PDF to Text</h2>
        <div>
            <form enctype="multipart/form-data" method="post">
                <p><label for="fileUpload">PDF file</label></p>
                @FileUpload.GetHtml( 
                    initialNumberOfFiles:1, 
                    allowMoreFilesToBeAdded:false, 
                    includeFormTag:false, 
                    uploadText:"")
                <div>
                    <input type="submit" name="action" value="Upload" />
                </div>
            </form>
        </div>
        <hr>
        @if(IsPost){
            <div>
                <h3>Uploaded file: @fileName</h3>
                <p>Title: @fileTitle</p>
                <p>Subject: @fileSubject</p>
                <p>Author: @fileAuthor</p>
                <p>Creator: @fileCreator</p>
                <p>Producer: @fileProducer</p>
                <p>Keywords: @fileKeywords</p>
                <p>Created: @fileCreation</p>
                <p>Modified: @fileModify</p>
            </div>
            <hr>
            <div>
                <h3>@fileLength characters extracted in @elapsed</h3>
                @if (fileLength > 0) {
                    var fname = "Content.txt";
                    <input type="button" 
                        onclick="location.href('download.cshtml?filename=/Temp/@fname');" value="Open">
                }
            </div>
        }
    </body>
</html>

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Chief Technology Officer Federfarma Pavia
Italy Italy
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions