OCR? Sounds like you need ITextSharp. Check out their SourceFourge page and do some reading up on how to use it. Here's a simple snippet to get you started with extracting some text from a PDF file:
itextsharp read pdf file[
^]
public string ParsePdf(string fileName)
{
if (!File.Exists(fileName))
throw new FileNotFoundException("fileName");
using (PdfReader reader = new PdfReader(fileName))
{
StringBuilder sb = new StringBuilder();
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
for (int page = 0; page < reader.NumberOfPages; page++)
{
string text = PdfTextExtractor.GetTextFromPage(reader, page + 1, strategy);
if (!string.IsNullOrWhitespace(text))
{
sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text))));
}
}
return sb.ToString();
}
}
}