Click here to Skip to main content
12,635,132 members (25,416 online)
Click here to Skip to main content
Articles » Web Development » ASP.NET » Howto » Downloads

Stats

10.5K views
208 downloads
8 bookmarked
Posted

From pdf files to plain text in a WebMatrix site

, 14 Mar 2013 CPOL
How to use the PDFBox Java library in an ASP.NET Web Pages project
@using Microsoft.Web.Helpers; 

@{
    TimeSpan elapsed = TimeSpan.Zero;
    var fileName = ""; 
    var fileTitle = "";
    var fileSubject = "";
    var fileAuthor = "";
    var fileCreator = "";
    var fileProducer = "";
    var fileKeywords = "";
    DateTime fileCreation = DateTime.MinValue;
    DateTime fileModify = DateTime.MinValue;
    long fileLength = 0;


    if (IsPost){
        var start = DateTime.Now;
        var fileSavePath = ""; 
        var uploadedFile = Request.Files[0]; 
        fileName = Path.GetFileName(uploadedFile.FileName); 
        fileSavePath = Server.MapPath("~/UploadedFiles/" + fileName); 
        uploadedFile.SaveAs(fileSavePath);

        PdfFile file = new PdfFile(fileSavePath);
        fileTitle = file.Title;
        fileSubject = file.Subject;
        fileAuthor = file.Author;
        fileCreator = file.Creator;
        fileProducer = file.Producer;
        fileKeywords = file.Keywords;
        fileCreation = file.Created;
        fileModify = file.Modified;
        fileLength = file.Content.Length;
 
        var destFile = Server.MapPath("~/Temp/Content.txt");
        using (StreamWriter sw = new StreamWriter(destFile)){
            sw.WriteLine(file.Content);
        }
        elapsed = (DateTime.Now - start);
    }   
}

<!DOCTYPE html>

<html lang="en">
    <head>
        <meta charset="utf-8" />
        <title>From PDF to Text</title>
        <link href="~/favicon.ico" rel="shortcut icon" type="image/x-icon" />
        <link href="~/Content/Style.css" rel="stylesheet" type="text/css" />
        <script type="text/javascript">
            function myFunction()
            {
                alert("Hello World!");
            }
        </script>
    </head>
    <body>
        <h2>From PDF to Text</h2>
        <div>
            <form enctype="multipart/form-data" method="post">
                <p><label for="fileUpload">PDF file</label></p>
                @FileUpload.GetHtml( 
                    initialNumberOfFiles:1, 
                    allowMoreFilesToBeAdded:false, 
                    includeFormTag:false, 
                    uploadText:"")
                <div>
                    <input type="submit" name="action" value="Upload" />
                </div>
            </form>
        </div>
        <hr>
        @if(IsPost){
            <div>
                <h3>Uploaded file: @fileName</h3>
                <p>Title: @fileTitle</p>
                <p>Subject: @fileSubject</p>
                <p>Author: @fileAuthor</p>
                <p>Creator: @fileCreator</p>
                <p>Producer: @fileProducer</p>
                <p>Keywords: @fileKeywords</p>
                <p>Created: @fileCreation</p>
                <p>Modified: @fileModify</p>
            </div>
            <hr>
            <div>
                <h3>@fileLength characters extracted in @elapsed</h3>
                @if (fileLength > 0) {
                    var fname = "Content.txt";
                    <input type="button" 
                        onclick="location.href('download.cshtml?filename=/Temp/@fname');" value="Open">
                }
            </div>
        }
    </body>
</html>

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Gianmaria Gregori
Chief Technology Officer Federfarma Pavia
Italy Italy
No Biography provided

You may also be interested in...

| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.161208.2 | Last Updated 15 Mar 2013
Article Copyright 2013 by Gianmaria Gregori
Everything else Copyright © CodeProject, 1999-2016
Layout: fixed | fluid