Click here to Skip to main content
15,885,155 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
I am trying to read marathi pdf using itext sharp.but when i am fetching data from pdf it does not display data properly?
C#
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
//for read pdf in C#
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
//for excel workbook
using Spire.Xls;
using Spire.Xls.Charts;
using System.IO;
using iTextSharp.text;
namespace WindowsFormsApplication1
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        public string ReadPdfFile(string fileName)
        {
            StringBuilder text = new StringBuilder();

            if (System.IO.File.Exists(fileName))
            {
                PdfReader pdfReader = new PdfReader(fileName);
                Workbook workbook = new Workbook();
              
                //workbook.LoadFromFile("F:/finalexcel.xls");
                Worksheet sheet = workbook.Worksheets[0];
                sheet.Range["A1"].Text = "ओळखपत्र क्रमांक";
                sheet.Range["B1"].Text = "मतदाराचे पुर्ण नाव";
                sheet.Range["C1"].Text = "वडिलांचे नाव";
                sheet.Range["D1"].Text = "घर क्रमांक";
                sheet.Range["E1"].Text = "वय";
                sheet.Range["F1"].Text = "लिंग ";
                for (int page =1; page <= pdfReader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    //ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
                    
                    //string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Unicode, Encoding.Unicode, Encoding.UTF8.GetBytes(currentText)));
                    //MessageBox.Show(currentText);
                    int ab = 2;
                    int cd = 2;
                    int ef = 2;
                    int gh = 2;
                    int ij = 2;
                    int kl = 2;
                    string[] a = currentText.Split('\n');
                    //string[] b = currentText.Split("लऱंग").ToString();
                    //  string[] b = currentText.Split('\n');
                    foreach (var item in a)
                    {
                        //MessageBox.Show(item);
                        if (item.Contains(":"))
                        {
                            string[] sample = item.Split(':');
                            if (sample[1].Length > 1 && sample[0].Contains("मतदार"))
                            {
                                sheet.Range["B" + ab].Text = sample[1];
                                ab++;
                            }
                            if (sample[1].Length > 1 && sample[0].Contains("ळडडऱांचे नाळ"))
                            {
                                sheet.Range["C" + cd].Text = sample[1];
                                cd++;
                            }
                            if (sample[1].Length > 1 && sample[0].Contains("घर क्रमांक"))
                            {
                                sheet.Range["D" + ef].Text = sample[1];
                                ef++;
                            }
                            if (sample[1].Length > 1 && sample[0].Contains("ळय"))
                            {
                                //MessageBox.Show(sample[1]);
                                string[] xyz = sample[1].Split(' ');
                                //MessageBox.Show(xyz[0]);
                                //sheet.Range["D" + gh].Text = xyz[0];
                                sheet.Range["E" + gh].Text = xyz[2];
                                sheet.Range["F" + ij].Text = sample[2];

                                gh++;
                                ij++;
                            }
                            if (sample[1].Length > 1 && sample[0].Contains("लऱंग"))
                            {
                                //MessageBox.Show("a");


                            }

                        }
                        else
                        {
                            //string[] sample = item.Split(' ');
                            if (!item.Contains("नाळ"))
                            {
                                sheet.Range["a" + kl].Text = item;
                                kl++;
                            }
                        
                        }
                        //ab++;
                    }
                    workbook.SaveToFile("F:/finalexel.xls");
                    text.Append(currentText);
                }
                textBox1.Text = text.ToString();
                pdfReader.Close();
            }
            return (text.ToString()).Trim();
        }

        private void Form1_Load(object sender, EventArgs e)
        {
            textBox1.Text = ReadPdfFile("F:/Beed-003.pdf");
         //ChangeFont();
        }
Posted
Comments
dan!sh 7-Jan-14 1:02am    
What is the font setting for the textbox?
Gandalf_TheWhite 7-Jan-14 1:14am    
I guess the fonts will make the problem here. I dont think iTextSharp would be able to handle the Marathi fonts.
Member 12296330 27-Jul-20 14:10pm    
Hi @pailvan,
I am also doing the same, did you find your solution and if yes can you please share it?

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

  Print Answers RSS
Top Experts
Last 24hrsThis month


CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900