Am not sure if this will help you...
I found this in another web site..Also try the links provided below.
to extract all words from a PDF document for those with adobe acrobat installed like the standarard version 9 ( may work with earlier version but I have not tested against earlier version)
here are a couple of c# member available that will compile provided you have added reference to your project acrobat.dll and added using Acrobat; to your class:
public static string getTextFromPDF(string filespec)
{
Acrobat.AcroAppClass gAppClass = new Acrobat.AcroAppClass();
Acrobat.AcroAVDoc avDoc = (Acrobat.AcroAVDoc)gAppClass.GetInterface("Acrobat.AcroAVDoc");
avDoc.Open(System.IO.Path.GetFullPath(filespec), System.IO.Path.GetFileName(filespec));
AcroPDDoc doc = (AcroPDDoc)avDoc.GetPDDoc();
string txt = PdDocGetText(doc);
doc.Close();
avDoc.Close(1);
gAppClass.Exit();
return txt;
}
private static string PdDocGetText(AcroPDDoc pdDoc)
{
AcroPDPage page;
int pages = pdDoc.GetNumPages();
string pageText = "";
for (int i = 0; i < pages; i++)
{
page = (AcroPDPage)pdDoc.AcquirePage(i);
object jso, jsNumWords, jsWord;
List<string> words = new List<string>();
try
{
jso = pdDoc.GetJSObject();
if (jso != null)
{
object[] args = new object[] { i };
jsNumWords = jso.GetType().InvokeMember("getPageNumWords", System.Reflection.BindingFlags.InvokeMethod, null, jso, args, null);
int numWords = Int32.Parse(jsNumWords.ToString());
for (int j = 0; j <= numWords; j++)
{
object[] argsj = new object[] { i, j, false };
jsWord = jso.GetType().InvokeMember("getPageNthWord", System.Reflection.BindingFlags.InvokeMethod, null, jso, argsj, null);
words.Add((string)jsWord);
}
}
foreach (string word in words)
{
pageText += word;
}
}
catch
{
}
}
return pageText;
}
</string></string>
the above code sample has yet to be fully tested and may need improvement. nonetheless it is a good starting point.
for those interested in tables, rows and columns, look up the doucments by adobe like
http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/a crobat/pdfs/plugin_apps_developer_guide.pdf
around page 130ish to 136
the link
http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/plugin_apps_developer_guide.pdf
may also be helpfull for a lot other tasks.