<grid> <stackpanel orientation="Vertical" verticalalignment="Top" horizontalalignment="Left"> <stackpanel orientation="Horizontal" verticalalignment="Top" horizontalalignment="Left" margin="10,10,0,0"> <Button Name="btnBrowse" Width="60" Height="30" Content="Browse" Click="btnBrowse_Click"/> <textbox name="txtPath" width="400" height="30" margin="10,0,0,0" /> </stackpanel> <stackpanel orientation="Vertical" horizontalalignment="Stretch"> <textbox name="txtViewer" width="400" height="300" horizontalalignment="Center" margin="80,10,10,10" acceptsreturn="True" selectionchanged="txtViewer_SelectionChanged" /> <Button Name="btnExtract" Width="60" Height="30" Content="Extract" Click="btnExtract_Click" /> <statusbar name="stsbar" height=" 30" removed="white"> <textblock name="lblCursorPosition" /> </statusbar> </stackpanel> </stackpanel> </grid>
namespace TestProject { public void PdfOcr() { try { PDFDoc doc = PDFDoc.Open(m_pdfpath); string img = string.Empty; List<pagedetail> pages = new List<pagedetail>(); int pgcount = doc.PageCount; int charcount = 0; int wordcount = 0; int paracount = 0; int linecount = 0; string msg = string.Empty; for (int i = 1; i <=pgcount; i++) { img = doc.GetPageImage(i); var hocr = OcrController.CreateHOCR(OcrMode.Tesseract, "eng", img); hDocument d = new hDocument(); d.AddFile(hocr); PageDetail pg = new PageDetail(); pg.Pageno = i; foreach (var page in d.Pages) { foreach (var para in page.Paragraphs) { pg.ParaCount++; pg.Text = page.Text; foreach (var line in para.Lines) { pg.LineCount++; foreach (var words in line.Words) { pg.WordCount++; pg.CharCount = pg.CharCount + words.Text.Length; } } } } msg ="Pages:" +pg.Pageno + "Line : " + pg.LineCount + "|Para:"+pg.ParaCount + "|Word : " + pg.WordCount + "|Char : " + pg.CharCount + Environment.NewLine; txtViewer.Text += pg.Text; txtViewer.Text += msg; charcount += pg.CharCount; wordcount += pg.WordCount; linecount += pg.LineCount; paracount += pg.ParaCount; pages.Add(pg); } msg = "TotalPageno:" + pgcount + Environment.NewLine; txtViewer.Text += msg; msg = "TotalCharacters : " + (wordcount + charcount) + Environment.NewLine; txtViewer.Text += msg; MessageBox.Show("Sucessfully done..."); } catch (Exception) { throw; } } private void btnExtract_Click(object sender, RoutedEventArgs e) { try { this.PdfOcr(); } catch (Exception ) { throw; } } private void txtViewer_SelectionChanged(object sender, RoutedEventArgs e) { int row = txtViewer.GetLineIndexFromCharacterIndex(txtViewer.CaretIndex); int col = txtViewer.CaretIndex - txtViewer.GetCharacterIndexFromLineIndex(row); lblCursorPosition.Text = "line" + (row + 1) + ",char" + (col + 1); } } }
Application.Current.Dispatcher.Invoke(DispatcherPriority.Background, (Action)(() => { txtViewer.Text += msg; }));
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)