Click here to Skip to main content
15,886,079 members
Articles / Programming Languages / Visual Basic

PDF Viewer Control Without Acrobat Reader Installed

Rate me:
Please Sign up or sign in to vote.
4.95/5 (143 votes)
6 Oct 2009CPOL4 min read 1.4M   73.7K   496  
PDF document viewer control that does not require any Acrobat product to be installed
Imports System.Drawing
Imports System.Windows.Forms

Public Class TesseractOCR

  Public Shared Function OCRImage(ByVal bm As System.Drawing.Image, ByVal language As String) As String
    OCRImage = ""
    Dim oOCR As New tessnet2.Tesseract
    oOCR.Init(Nothing, language, False)
    Dim WordList As New List(Of tessnet2.Word)
    WordList = oOCR.doOCR(ImageUtil.MakeGrayscale(bm), Rectangle.Empty)
    Dim LineCount As Integer = tessnet2.Tesseract.LineCount(WordList)
    For i As Integer = 0 To LineCount - 1
      OCRImage &= tessnet2.Tesseract.GetLineText(WordList, i) & vbCrLf
    Next
    oOCR.Dispose()
    ''Debug
    'OCRPaintWordBorders(bm, WordList)
  End Function

  Public Shared Sub OCRPaintWordBorders(ByRef img As System.Drawing.Image, ByVal WordList As List(Of tessnet2.Word))
    If WordList IsNot Nothing Then
      Dim g As Graphics = Graphics.FromImage(img)
      g.DrawImage(img, 0, 0)
      For Each word As tessnet2.Word In WordList
        Dim pen As Pen = New Pen(Color.FromArgb(255, 128, CInt(word.Confidence)))
        g.DrawRectangle(pen, word.Left, word.Top, word.Right - word.Left, word.Bottom - word.Top)
        'For Each c As tessnet2.Character In word.CharList
        '  e.Graphics.DrawRectangle(Pens.BlueViolet, c.Left + panel2.AutoScrollPosition.X, c.Top + panel2.AutoScrollPosition.Y, c.Right - c.Left, c.Bottom - c.Top)
        'Next
      Next
      g.Dispose()
    End If
  End Sub

  Public Shared Function GetPDFIndex(ByRef imgOCR As System.Drawing.Image, ByVal language As String) As List(Of PDFWordIndex)
    GetPDFIndex = New List(Of PDFWordIndex)
    Dim oOCR As New tessnet2.Tesseract
    oOCR.Init(Nothing, language, False)
    Dim WordList As New List(Of tessnet2.Word)
    WordList = oOCR.doOCR(imgOCR, Rectangle.Empty)
    If WordList IsNot Nothing Then
      For Each word As tessnet2.Word In WordList
        Dim pdfWordIndex As New PDFWordIndex
        pdfWordIndex.X = word.Left
        pdfWordIndex.Y = word.Top
        pdfWordIndex.Width = word.Right - word.Left
        pdfWordIndex.Height = word.Bottom - word.Top
        pdfWordIndex.FontSize = word.PointSize
        pdfWordIndex.Text = word.Text
        GetPDFIndex.Add(pdfWordIndex)
      Next
    End If
  End Function

  Public Structure Language
    Dim i As Integer
    Shared English As String = "eng"
    Shared Spanish As String = "spa"
    Shared German As String = "deu"
    Shared Italian As String = "ita"
    Shared French As String = "fra"
    Shared Dutch As String = "nld"
    Shared Portuguese As String = "por"
    Shared Vietnamese As String = "vie"
    Shared Basque As String = "eus"
    Shared Fraktur As String = "deu-f"
  End Structure

End Class

Public Class PDFWordIndex
  Public X As Integer
  Public Y As Integer
  Public Width As Integer
  Public Height As Integer
  Public FontSize As Integer
  Public Text As String
End Class


By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer
United States United States
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions