using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; //for read pdf in C# using iTextSharp.text.pdf; using iTextSharp.text.pdf.parser; //for excel workbook using Spire.Xls; using Spire.Xls.Charts; using System.IO; using iTextSharp.text; namespace WindowsFormsApplication1 { public partial class Form1 : Form { public Form1() { InitializeComponent(); } public string ReadPdfFile(string fileName) { StringBuilder text = new StringBuilder(); if (System.IO.File.Exists(fileName)) { PdfReader pdfReader = new PdfReader(fileName); Workbook workbook = new Workbook(); //workbook.LoadFromFile("F:/finalexcel.xls"); Worksheet sheet = workbook.Worksheets[0]; sheet.Range["A1"].Text = "ओळखपत्र क्रमांक"; sheet.Range["B1"].Text = "मतदाराचे पुर्ण नाव"; sheet.Range["C1"].Text = "वडिलांचे नाव"; sheet.Range["D1"].Text = "घर क्रमांक"; sheet.Range["E1"].Text = "वय"; sheet.Range["F1"].Text = "लिंग "; for (int page =1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); //ITextExtractionStrategy strategy = new LocationTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); //string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Unicode, Encoding.Unicode, Encoding.UTF8.GetBytes(currentText))); //MessageBox.Show(currentText); int ab = 2; int cd = 2; int ef = 2; int gh = 2; int ij = 2; int kl = 2; string[] a = currentText.Split('\n'); //string[] b = currentText.Split("लऱंग").ToString(); // string[] b = currentText.Split('\n'); foreach (var item in a) { //MessageBox.Show(item); if (item.Contains(":")) { string[] sample = item.Split(':'); if (sample[1].Length > 1 && sample[0].Contains("मतदार")) { sheet.Range["B" + ab].Text = sample[1]; ab++; } if (sample[1].Length > 1 && sample[0].Contains("ळडडऱांचे नाळ")) { sheet.Range["C" + cd].Text = sample[1]; cd++; } if (sample[1].Length > 1 && sample[0].Contains("घर क्रमांक")) { sheet.Range["D" + ef].Text = sample[1]; ef++; } if (sample[1].Length > 1 && sample[0].Contains("ळय")) { //MessageBox.Show(sample[1]); string[] xyz = sample[1].Split(' '); //MessageBox.Show(xyz[0]); //sheet.Range["D" + gh].Text = xyz[0]; sheet.Range["E" + gh].Text = xyz[2]; sheet.Range["F" + ij].Text = sample[2]; gh++; ij++; } if (sample[1].Length > 1 && sample[0].Contains("लऱंग")) { //MessageBox.Show("a"); } } else { //string[] sample = item.Split(' '); if (!item.Contains("नाळ")) { sheet.Range["a" + kl].Text = item; kl++; } } //ab++; } workbook.SaveToFile("F:/finalexel.xls"); text.Append(currentText); } textBox1.Text = text.ToString(); pdfReader.Close(); } return (text.ToString()).Trim(); } private void Form1_Load(object sender, EventArgs e) { textBox1.Text = ReadPdfFile("F:/Beed-003.pdf"); //ChangeFont(); }
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)