Click here to Skip to main content
15,891,529 members
Articles / Programming Languages / C#

Document Processing Part II: Request Driven OCR

Rate me:
Please Sign up or sign in to vote.
5.00/5 (39 votes)
30 Apr 20054 min read 228.1K   6.9K   135  
To get qualified access to paper based information, sometimes more than plain OCR is needed. This article shows why, and offers a solution to increase OCR quality by semi-automatic table extraction.
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;

using DocumentProcessing;
using DocumentProcessing.Model;
using DocumentProcessing.GUI;

namespace DocumentProcessing
{
	/// <summary>
	/// Support class for changing coordinate systems
	/// </summary>
	public class DocumentViewerSupport
	{
		public static Rectangle GetImageSelectionToClientRectangle(AxMODI.AxMiDocView viewer,DocumentArea DocumentArea)
		{
			int x1,y1,x2,y2;
			viewer.ImageToClient(DocumentArea.Page,DocumentArea.Area.X,DocumentArea.Area.Y,out x1,out y1);
			viewer.ImageToClient(DocumentArea.Page,DocumentArea.Area.Right,DocumentArea.Area.Bottom,out x2,out y2);
			return new Rectangle(x1,y1,x2 - x1,y2 - y1);
		}
		
		public static DocumentArea GetScreenToImageRectangle(AxMODI.AxMiDocView viewer,Rectangle screenRectangle)
		{
			int viewPage = 0;
			DocumentArea context = new DocumentArea();
			
			Rectangle viewTarget = DocumentViewerSupport.GetScreenToImageRectangle(viewer,screenRectangle, out viewPage);
		
			context.Area = viewTarget;
			context.Page = viewPage;
			
			return context;
		}

		private static Rectangle GetScreenToImageRectangle(AxMODI.AxMiDocView viewer,Rectangle r, out int page)
		{
			Rectangle clientRect =r; 
			Point cp = viewer.PointToClient(new Point(r.X,r.Y));
			clientRect.X = cp.X;
			clientRect.Y = cp.Y;
			page = -1;
			int ppage1 , imgx1 ,imgy1 ;
			int ppage2 , imgx2 ,imgy2 ;
			try 
			{
				viewer.ClientToImage(clientRect.Left,clientRect.Top,out ppage1,out imgx1, out imgy1);
				viewer.ClientToImage(clientRect.Right,clientRect.Bottom,out ppage2,out imgx2, out imgy2);
			}
			catch( Exception exc)
			{
				throw new DocumentAreaException("Invalid bounds! Frame must be within a single page!");
			}
			if (ppage1 != ppage2) 
				throw new DocumentAreaException("Invalid bounds (2 pages selected)! Frame must be within a single page!");

			page = ppage1;
			Rectangle rect = new Rectangle(imgx1,imgy1, imgx2-imgx1,imgy2-imgy1); 
			return rect;
		}

		public static DocumentArea GetImageSelection(AxMODI.AxMiDocView viewer)
		{
			try 
			{
				int page,left,top,right,bottom;
				viewer.ImageSelection.GetBoundingRect(out page,out left, out top, out right, out bottom);
				DocumentArea DocumentArea = new DocumentArea();
				DocumentArea.Area = new Rectangle(left,top,right-left,bottom-top);
				DocumentArea.Page = page;
				return DocumentArea;
			}
			catch(Exception ee)
			{
				return null;
			}
		}

		public static Rectangle GetImageSelectionAreaToScreen(AxMODI.AxMiDocView viewer)
		{
			DocumentArea DocumentArea = GetImageSelection(viewer);
			if (DocumentArea == null) return Rectangle.Empty;
			Rectangle clientRect = GetImageSelectionToClientRectangle(viewer,DocumentArea);
			Rectangle screenRect = viewer.RectangleToScreen(clientRect);
			return screenRect;
		}
	}
	
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
CEO Axonic Informationssysteme GmbH, Germany
Germany Germany

Comments and Discussions