from ctypes import *
import inspect
import math

# #####################################################################
# Interface to the Transym OCR product via the shipped DLL
# 
# Copyright 2012 The NTF Group, Sydney, Australia
#
# Author: Andrew Hunt
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of 
# this software and associated documentation files (the "Software"), to deal in the
# Software without restriction, including without limitation the rights to use, copy,
# modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the
# following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# #####################################################################


# ###################################################
# SAMPLE USAGE
#
# import transymocr
# text, confidence = TOCR_TiffFile(SAMPLE)
#
#
# DEVELOPMENT NOTES
#
# Developed and tested with 
# - Windows 7 Pro 64-bit
# - Transym OCR version 3.3
# - Python 2.7.x
#
# CONFIGURATION
# TOCR_DLL_FILE must reference the installed TOCR DLL file
#
# ###################################################

TOCR_DLL_FILE = "C:/Windows/SysWOW64/tocrdll.dll"
tocr = WinDLL(TOCR_DLL_FILE )


# ###################################################
# The following is TOCRuser.h converted to Python Declarations
# ###################################################

# List of all error codes, Version 3.0.2.0

TOCR_OK = 0

# Error codes returned by an API function

class TOCR_Error:
	PREFIX = "TOCRERR_"
	
	TOCRERR_ILLEGALJOBNO				 =1
	TOCRERR_FAILLOCKDB					 =2

	TOCRERR_NOFREEJOBSLOTS				 =3
	TOCRERR_FAILSTARTSERVICE			 	 =4
	TOCRERR_FAILINITSERVICE				 =5
	TOCRERR_JOBSLOTNOTINIT				 =6
	TOCRERR_JOBSLOTINUSE				 =7
	TOCRERR_SERVICEABORT				 =8
	TOCRERR_CONNECTIONBROKEN			 =9
	TOCRERR_INVALIDSTRUCTID				=10
	TOCRERR_FAILGETVERSION				=11
	TOCRERR_FAILLICENCEINF				=12
	TOCRERR_LICENCEEXCEEDED				=13

	# RESELLER codes
	TOCRERR_INCORRECTLICENCE			=14
	TOCRERR_MISMATCH					=15

	TOCRERR_JOBSLOTNOTYOURS			=16

	TOCRERR_FAILGETJOBSTATUS1			=20
	TOCRERR_FAILGETJOBSTATUS2			=21
	TOCRERR_FAILGETJOBSTATUS3			=22
	TOCRERR_FAILCONVERT				=23
	TOCRERR_FAILSETCONFIG				=24
	TOCRERR_FAILGETCONFIG				=25

	TOCRERR_FAILDOJOB1					=30
	TOCRERR_FAILDOJOB2					=31
	TOCRERR_FAILDOJOB3					=32
	TOCRERR_FAILDOJOB4					=33
	TOCRERR_FAILDOJOB5					=34
	TOCRERR_FAILDOJOB6					=35
	TOCRERR_FAILDOJOB7					=36
	TOCRERR_FAILDOJOB8					=37
	TOCRERR_FAILDOJOB9					=38
	TOCRERR_FAILDOJOB10					=39
	TOCRERR_UNKNOWNJOBTYPE1			=40
	TOCRERR_JOBNOTSTARTED1				=41
	TOCRERR_FAILDUPHANDLE				=42

	TOCRERR_FAILGETJOBSTATUSMSG1		=45
	TOCRERR_FAILGETJOBSTATUSMSG2		=46

	TOCRERR_FAILGETNUMPAGES1			=50
	TOCRERR_FAILGETNUMPAGES2			=51
	TOCRERR_FAILGETNUMPAGES3			=52
	TOCRERR_FAILGETNUMPAGES4			=53
	TOCRERR_FAILGETNUMPAGES5			=54

	TOCRERR_FAILGETRESULTS1				=60
	TOCRERR_FAILGETRESULTS2				=61
	TOCRERR_FAILGETRESULTS3				=62
	TOCRERR_FAILGETRESULTS4				=63
	TOCRERR_FAILALLOCMEM100				=64
	TOCRERR_FAILALLOCMEM101				=65
	TOCRERR_FILENOTSPECIFIED				=66
	TOCRERR_INPUTNOTSPECIFIED			=67
	TOCRERR_OUTPUTNOTSPECIFIED			=68

	TOCRERR_FAILROTATEBITMAP			=70

	TOCRERR_TWAINPARTIALACQUIRE			=80
	TOCRERR_TWAINFAILEDACQUIRE			=81
	TOCRERR_TWAINNOIMAGES				=82
	TOCRERR_TWAINSELECTDSFAILED			=83

	# Error codes which may be seen in a msgbox or console but will not be returned by an API function
	if False:
		TOCRERR_INVALIDSERVICESTART			=1000
		TOCRERR_FAILSERVICEINIT				=1001
		TOCRERR_FAILLICENCE1				=1002
		TOCRERR_FAILSERVICESTART				=1003
		TOCRERR_UNKNOWNCMD				=1004
		TOCRERR_FAILREADCOMMAND			=1005
		TOCRERR_FAILREADOPTIONS				=1006
		TOCRERR_FAILWRITEJOBSTATUS1			=1007
		TOCRERR_FAILWRITEJOBSTATUS2			=1008
		TOCRERR_FAILWRITETHREADH			=1009
		TOCRERR_FAILREADJOBINFO1				=1010
		TOCRERR_FAILREADJOBINFO2				=1011
		TOCRERR_FAILREADJOBINFO3				=1012
		TOCRERR_FAILWRITEPROGRESS			=1013
		TOCRERR_FAILWRITEJOBSTATUSMSG		=1014
		TOCRERR_FAILWRITERESULTSSIZE			=1015
		TOCRERR_FAILWRITERESULTS			=1016
		TOCRERR_FAILWRITEAUTOORIENT			=1017
		TOCRERR_FAILLICENCE2				=1018
		TOCRERR_FAILLICENCE3				=1019
		TOCRERR_TOOMANYCOLUMNS			=1020
		TOCRERR_TOOMANYROWS				=1021
		TOCRERR_EXCEEDEDMAXZONE			=1022
		TOCRERR_NSTACKTOOSMALL				=1023
		TOCRERR_ALGOERR1					=1024
		TOCRERR_ALGOERR2					=1025
		TOCRERR_EXCEEDEDMAXCP				=1026
		TOCRERR_CANTFINDPAGE				=1027
		TOCRERR_UNSUPPORTEDIMAGETYPE		=1028
		TOCRERR_IMAGETOOWIDE				=1029
		TOCRERR_IMAGETOOLONG				=1030
		TOCRERR_UNKNOWNJOBTYPE2			=1031
		TOCRERR_TOOWIDETOROT				=1032
		TOCRERR_TOOLONGTOROT				=1033
		TOCRERR_INVALIDPAGENO				=1034
		TOCRERR_FAILREADJOBTYPENUMBYTES		=1035
		TOCRERR_FAILREADFILENAME				=1036
		TOCRERR_FAILSENDNUMPAGES			=1037
		TOCRERR_FAILOPENCLIP				=1038
		TOCRERR_NODIBONCLIP					=1039
		TOCRERR_FAILREADDIBCLIP				=1040
		TOCRERR_FAILLOCKDIBCLIP				=1041
		TOCRERR_UNKOWNDIBFORMAT			=1042
		TOCRERR_FAILREADDIB					=1043
		TOCRERR_NOXYPPM					=1044
		TOCRERR_FAILCREATEDIB				=1045
		TOCRERR_FAILWRITEDIBCLIP				=1046
		TOCRERR_FAILALLOCMEMDIB				=1047
		TOCRERR_FAILLOCKMEMDIB				=1048
		TOCRERR_FAILCREATEFILE				=1049
		TOCRERR_FAILOPENFILE1				=1050
		TOCRERR_FAILOPENFILE2				=1051
		TOCRERR_FAILOPENFILE3				=1052
		TOCRERR_FAILOPENFILE4				=1053
		TOCRERR_FAILREADFILE1				=1054
		TOCRERR_FAILREADFILE2				=1055
		TOCRERR_FAILFINDDATA1				=1056
		TOCRERR_TIFFERROR1					=1057
		TOCRERR_TIFFERROR2					=1058
		TOCRERR_TIFFERROR3					=1059
		TOCRERR_TIFFERROR4					=1060
		TOCRERR_FAILREADDIBHANDLE			=1061
		TOCRERR_PAGETOOBIG					=1062
		TOCRERR_FAILSETTHREADPRIORITY			=1063
		TOCRERR_FAILSETSRVERRORMODE			=1064

		TOCRERR_FAILREADFILENAME1			=1070
		TOCRERR_FAILREADFILENAME2			=1071
		TOCRERR_FAILREADFILENAME3			=1072
		TOCRERR_FAILREADFILENAME4			=1073
		TOCRERR_FAILREADFILENAME5			=1074

		TOCRERR_FAILREADFORMAT1				=1080
		TOCRERR_FAILREADFORMAT2				=1081

		TOCRERR_FAILALLOCMEM1				=1101
		TOCRERR_FAILALLOCMEM2				=1102
		TOCRERR_FAILALLOCMEM3				=1103
		TOCRERR_FAILALLOCMEM4				=1104
		TOCRERR_FAILALLOCMEM5				=1105
		TOCRERR_FAILALLOCMEM6				=1106
		TOCRERR_FAILALLOCMEM7				=1107
		TOCRERR_FAILALLOCMEM8				=1108
		TOCRERR_FAILALLOCMEM9				=1109
		TOCRERR_FAILALLOCMEM10				=1110

		TOCRERR_FAILWRITEMMFH				=1150
		TOCRERR_FAILREADACK					=1151
		TOCRERR_FAILFILEMAP					=1152
		TOCRERR_FAILFILEVIEW				=1153

		# dekernfcz.cpp
		TOCRERR_BUFFEROVERFLOW1			=2001

		# fczbreak.cpp
		TOCRERR_MAPOVERFLOW				=2002
		TOCRERR_REBREAKNEXTCALL				=2003
		TOCRERR_REBREAKNEXTDATA				=2004
		TOCRERR_REBREAKEXACTCALL			=2005
		TOCRERR_MAXZCANOVERFLOW1			=2006
		TOCRERR_MAXZCANOVERFLOW2			=2007
		TOCRERR_BUFFEROVERFLOW2			=2008
		TOCRERR_NUMKCOVERFLOW				=2009
		TOCRERR_BUFFEROVERFLOW3			=2010
		TOCRERR_BUFFEROVERFLOW4			=2011
		TOCRERR_SEEDERROR					=2012

		# fczclassify.cpp
		TOCRERR_FCZYREF					=2020
		TOCRERR_MAXTEXTLINES1				=2021
		TOCRERR_LINEINDEX					=2022
		TOCRERR_MAXFCZSONLINE				=2023
		TOCRERR_MEMALLOC1					=2024
		TOCRERR_MERGEBREAK					=2025

		# fczcleanup.cpp
		TOCRERR_DKERNPRANGE1				=2030
		TOCRERR_DKERNPRANGE2				=2031
		TOCRERR_BUFFEROVERFLOW5			=2032
		TOCRERR_BUFFEROVERFLOW6			=2033

		# fczlex.cpp
		TOCRERR_FILEOPEN1					=2040
		TOCRERR_FILEOPEN2					=2041
		TOCRERR_FILEOPEN3					=2042
		TOCRERR_FILEREAD1					=2043
		TOCRERR_FILEREAD2					=2044
		TOCRERR_SPWIDZERO					=2045
		TOCRERR_FAILALLOCMEMLEX1			=2046
		TOCRERR_FAILALLOCMEMLEX2			=2047

		# fczmap.cpp
		TOCRERR_BADOBWIDTH					=2050
		TOCRERR_BADROTATION				=2051

		# fcznoise.cpp
		TOCRERR_REJHIDMEMALLOC				=2055


		# fczselfref.cpp
		TOCRERR_UIDA						=2070
		TOCRERR_UIDB						=2071
		TOCRERR_ZEROUID					=2072
		TOCRERR_CERTAINTYDBNOTINIT			=2073
		TOCRERR_MEMALLOCINDEX				=2074
		TOCRERR_CERTAINTYDB_INIT				=2075
		TOCRERR_CERTAINTYDB_DELETE			=2076
		TOCRERR_CERTAINTYDB_INSERT1			=2077
		TOCRERR_CERTAINTYDB_INSERT2			=2078
		TOCRERR_OPENXORNEAREST				=2079
		TOCRERR_XORNEAREST					=2079

		# netproc.cpp
		TOCRERR_OPENSETTINGS				=2080
		TOCRERR_READSETTINGS1				=2081
		TOCRERR_READSETTINGS2				=2082
		TOCRERR_BADSETTINGS				=2083
		TOCRERR_WRITESETTINGS				=2084
		TOCRERR_MAXSCOREDIFF				=2085

		# pnnclass5.cpp
		TOCRERR_YDIMREFZERO1				=2090
		TOCRERR_YDIMREFZERO2				=2091
		TOCRERR_YDIMREFZERO3				=2092
		TOCRERR_ASMFILEOPEN				=2093
		TOCRERR_ASMFILEREAD				=2094
		TOCRERR_MEMALLOCASM				=2095
		TOCRERR_MEMREALLOCASM				=2096
		TOCRERR_SDBFILEOPEN					=2097
		TOCRERR_SDBFILEREAD					=2098
		TOCRERR_SDBFILEBAD1					=2099
		TOCRERR_SDBFILEBAD2					=2100
		TOCRERR_MEMALLOCSDB				=2101
		TOCRERR_DEVEL1						=2102
		TOCRERR_DEVEL2						=2103
		TOCRERR_DEVEL3						=2104
		TOCRERR_DEVEL4						=2105
		TOCRERR_DEVEL5						=2106
		TOCRERR_DEVEL6						=2107
		TOCRERR_DEVEL7						=2108
		TOCRERR_DEVEL8						=2109
		TOCRERR_DEVEL9						=2110
		TOCRERR_DEVEL10					=2111
		TOCRERR_DEVEL11					=2112
		TOCRERR_DEVEL12					=2113
		TOCRERR_DEVEL13					=2114
		TOCRERR_FILEOPEN4					=2115
		TOCRERR_FILEOPEN5					=2116
		TOCRERR_FILEOPEN6					=2117
		TOCRERR_FILEREAD3					=2118
		TOCRERR_FILEREAD4					=2119
		TOCRERR_ZOOMGTOOBIG				=2120
		TOCRERR_ZOOMGOUTOFRANGE			=2121

		# fczoutput.cpp
		TOCRERR_MEMALLOCRESULTS			=2130

		# vsub.cpp
		TOCRERR_MEMALLOCHEAP				=2140
		TOCRERR_HEAPNOTINITIALISED			=2141
		TOCRERR_MEMLIMITHEAP				=2142
		TOCRERR_MEMREALLOCHEAP				=2143
		TOCRERR_MEMALLOCFCZBM				=2144
		TOCRERR_FCZBMOVERLAP				=2145
		TOCRERR_FCZBMLOCATION				=2146
		TOCRERR_MEMREALLOCFCZBM			=2147
		TOCRERR_MEMALLOCFCHBM				=2148
		TOCRERR_MEMREALLOCFCHBM			=2149

	strings = None

	@classmethod
	def str(cls, code):
		if cls.strings == None:
			cls.strings = {}
			for name, value in inspect.getmembers(cls):
				if name.startswith(cls.PREFIX):
					cls.strings[value]  = name

		return cls.strings[code]

	@classmethod
	def raiseIfError(cls, code, str=""):
		if code == TOCR_OK:
			return

		raise Exception("%s = code %d. %s" % (cls.str(code), code, str))



# ###################################################
# The following is TOCRuser.h converted to Python Declarations
# ###################################################

# User constants, Version 3.3.0

TOCRJOBMSGLENGTH = 512        # max length of a job status message

TOCRMAXPPM = 78741            # max pixels per metre
TOCRMINPPM = 984              # min pixels per metre

# Setting for JobNo for TOCRSetErrorMode and TOCRGetErrorMode
TOCRDEFERRORMODE = -1         # set/get the default API error mode (applies

# Settings for ErrorMode for TOCRSetErrorMode and TOCRGetErrorMode
TOCRERRORMODE_NONE = 0        # errors unseen (use return status of API calls)
TOCRERRORMODE_MSGBOX = 1      # errors will bring up a message box
TOCRERRORMODE_LOG = 2         # errors are sent to a log file


# Setting for TOCRShutdown
TOCRSHUTDOWNALL = -1          # stop and shutdown processing for all jobs

# Values returnd by TOCRGetJobStatus JobStatus
TOCRJOBSTATUS_ERROR = -1      # an error ocurred processing the last job
TOCRJOBSTATUS_BUSY = 0        # the job is still processing
TOCRJOBSTATUS_DONE = 1        # the job completed successfully
TOCRJOBSTATUS_IDLE = 2        # no job has been specified yet

# Settings for TOCRJOBINFO.JobType
TOCRJOBTYPE_TIFFFILE = 0      # TOCRJOBINFO.InputFile specifies a tiff file
TOCRJOBTYPE_DIBFILE = 1       # TOCRJOBINFO.InputFile specifies a dib (bmp) file
TOCRJOBTYPE_DIBCLIPBOARD = 2  # clipboard contains a dib (clipboard format CF_DIB)
TOCRJOBTYPE_MMFILEHANDLE = 3  # TOCRJOBINFO.PageNo specifies a handle to a memory mapped DIB file

# Settings for TOCRJOBINFO.Orientation
TOCRJOBORIENT_AUTO = 0        # detect orientation and rotate automatically
TOCRJOBORIENT_OFF = 255       # don't rotate
TOCRJOBORIENT_90 = 1          # 90 degrees clockwise rotation
TOCRJOBORIENT_180 = 2         # 180 degrees clockwise rotation
TOCRJOBORIENT_270 = 3         # 270 degrees clockwise rotation

# Values returned by TOCRGetJobDBInfo
TOCRJOBSLOT_FREE = 0          # job slot is free for use
TOCRJOBSLOT_OWNEDBYYOU = 1    # job slot is in use by your process
TOCRJOBSLOT_BLOCKEDBYYOU = 2  # blocked by own process (re-initialise)
TOCRJOBSLOT_OWNEDBYOTHER = -1 # job slot is in use by another process (can't use)
TOCRJOBSLOT_BLOCKEDBYOTHER = -2 # blocked by another process (can't use)

# Values returned in WaitAnyStatus by TOCRWaitForAnyJob
TOCRWAIT_OK = 0               # JobNo is the job that finished (get and check it's JobStatus)
TOCRWAIT_SERVICEABORT = 1     # JobNo is the job that failed (re-initialise)
TOCRWAIT_CONNECTIONBROKEN = 2 # JobNo is the job that failed (re-initialise)
TOCRWAIT_FAILED = -1          # JobNo not set - check manually
TOCRWAIT_NOJOBSFOUND = -2     # JobNo not set - no running jobs found

# Settings for Mode for TOCRGetJobResultsEx
TOCRGETRESULTS_NORMAL = 0     # return results for TOCRRESULTS
TOCRGETRESULTS_EXTENDED = 1   # return results for TOCRRESULTSEX

# Values returned in ResultsInf by TOCRGetJobResults and TOCRGetJobResultsEx
TOCRGETRESULTS_NORESULTS = -1 # no results are available

# Values for TOCRConvertFormat InputFormat
TOCRCONVERTFORMAT_TIFFFILE = TOCRJOBTYPE_TIFFFILE

# Values for TOCRConvertFormat OutputFormat
TOCRCONVERTFORMAT_DIBFILE = TOCRJOBTYPE_DIBFILE
TOCRCONVERTFORMAT_MMFILEHANDLE = TOCRJOBTYPE_MMFILEHANDLE

# Values for licence features (returned by TOCRGetLicenceInfoEx)
TOCRLICENCE_STANDARD = 1      # V1 standard licence (no higher characters)
TOCRLICENCE_EURO = 2          # V2 (higher characters)
TOCRLICENCE_EUROUPGRADE = 3   # standard licence upgraded to euro (V1.4->V2)
TOCRLICENCE_V3SE = 4          # V3SE version 3 standard edition licence (no API)
TOCRLICENCE_V3SEUPGRADE = 5   # versions 1/2 upgraded to V3 standard edition (no API)
TOCRLICENCE_V3PRO = 6         # V3PRO version 3 pro licence
TOCRLICENCE_V3PROUPGRADE = 7  # versions 1/2 upgraded to version 3 pro
TOCRLICENCE_V3SEPROUPGRADE = 8 # version 3 standard edition upgraded to version 3 pro

# Values for TOCRSetConfig and TOCRGetConfig
TOCRCONFIG_DEFAULTJOB = -1    # default job number (all new jobs)
TOCRCONFIG_DLL_ERRORMODE = 0  # set the dll ErrorMode
TOCRCONFIG_SRV_ERRORMODE = 1  # set the service ErrorMode
TOCRCONFIG_SRV_THREADPRIORITY = 2 # set the service thread priority
TOCRCONFIG_DLL_MUTEXWAIT = 3  # set the dll mutex wait timeout (ms)
TOCRCONFIG_DLL_EVENTWAIT = 4  # set the dll event wait timeout (ms)
TOCRCONFIG_SRV_MUTEXWAIT = 5  # set the service mutex wait timeout (ms)
TOCRCONFIG_LOGFILE = 6        # set the log file name




# ###################################################
# TOCRdll.h converted to Python - first the structures (functions below)
# Structures are converted to c_types Structure class
# ###################################################

# TOCR declares Version 3.0.2.0

'''
typedef unsigned char       BYTE;
typedef signed short		VBBOOL;	// in VB Boolean is a signed short True is -1

typedef struct tagTOCRProcessOptions
{
	long			StructId;
	VBBOOL		InvertWholePage;
	VBBOOL		DeskewOff;
	BYTE			Orientation;
	VBBOOL		NoiseRemoveOff;
	VBBOOL		LineRemoveOff;
	VBBOOL		DeshadeOff;
	VBBOOL		InvertOff;
	VBBOOL		SectioningOn;
	VBBOOL		MergeBreakOff;
	VBBOOL		LineRejectOff;
	VBBOOL		CharacterRejectOff;
	VBBOOL		LexOff;
	VBBOOL		DisableCharacter[256];
} TOCRPROCESSOPTIONS;
'''

class TOCRPROCESSOPTIONS(Structure):
	_fields_ = [
		("StructId", c_long),
		("InvertWholePage", c_short),
		("DeskewOff", c_short),
		("Orientation", c_byte),
		("NoiseRemoveOff", c_short),
		("LineRemoveOff", c_short),
		("DeshadeOff", c_short),
		("InvertOff", c_short),
		("SectioningOn", c_short),
		("MergeBreakOff", c_short),
		("LineRejectOff", c_short),
		("CharacterRejectOff", c_short),
		("LexOff", c_short),
		("DisableCharacter", c_short * 256),
	]




'''
typedef struct tagTOCRJobInfo
{
	long		StructId;
	long		JobType;
	char		*InputFile;
	long		PageNo;
	TOCRPROCESSOPTIONS	ProcessOptions;
} TOCRJOBINFO;
'''

class TOCRJOBINFO(Structure):
	_fields_ = [
		("StructId", c_long),
		("JobType", c_long),
		("InputFile", c_char_p),
		("PageNo", c_long),
		("ProcessOptions", TOCRPROCESSOPTIONS)
	]


'''
typedef struct tagTOCRRESULTSHEADER
{
	long		StructId;
	long		XPixelsPerInch;
	long		YPixelsPerInch;
	long		NumItems;
	float		MeanConfidence;
} TOCRRESULTSHEADER;
'''

class TOCRRESULTSHEADER(Structure):
	_fields_ = [
		("StructId", c_long),
		("XPixelsPerInch", c_long),
		("YPixelsPerInch", c_long),
		("NumItems", c_long),
		("MeanConfidence", c_float)
	]


'''
typedef struct tagTOCRRESULTSITEM
{
	unsigned short	StructId;
	unsigned short	OCRCha;
	float			Confidence;
	unsigned short	XPos;
	unsigned short	YPos;
	unsigned short	XDim;
	unsigned short	YDim;
} TOCRRESULTSITEM;
'''

class TOCRRESULTSITEM(Structure):
	_fields_ = [
		("StructId", c_ushort),
		("OCRCha", c_ushort),
		("Confidence", c_float),
		("XPos", c_ushort),
		("YPos", c_ushort),
		("XDim", c_ushort),
		("YDim", c_ushort)
	]



'''
typedef struct tagTOCRRESULTS
{
	TOCRRESULTSHEADER		Hdr;
	TOCRRESULTSITEM		Item[1];
} TOCRRESULTS;
'''

class TOCRRESULTS(Structure):
	_fields_ = [
		("Hdr", TOCRRESULTSHEADER),
		("Item", TOCRRESULTSITEM*100000)
	]


'''
typedef struct tagTOCRRESULTSITEMEXALT
{
	unsigned short	Valid;
	unsigned short	OCRCha;
	float			Factor;
} TOCRRESULTSITEMEXALT;
'''


class TOCRRESULTSITEMEXALT(Structure):
	_fields_ = [
		("Valid",  c_ushort),
		("OCRCha", c_ushort),
		("Factor", c_float)
	]

'''
typedef struct tagTOCRRESULTSITEMEX
{
	unsigned short			StructId;
	unsigned short			OCRCha;
	float					Confidence;
	unsigned short			XPos;
	unsigned short			YPos;
	unsigned short			XDim;
	unsigned short			YDim;
	TOCRRESULTSITEMEXALT	Alt[5];
} TOCRRESULTSITEMEX;
'''

class TOCRRESULTSITEMEX(Structure):
	_fields_ = [
		("StructId",  c_ushort),
		("OCRCha", c_ushort),
		("Confidence", c_float),
		("XPos", c_ushort),
		("YPos", c_ushort),
		("XDim", c_ushort),
		("YDim", c_ushort),
		("Alt", TOCRRESULTSITEMEXALT*5),
	]


'''
typedef struct tagTOCRRESULTSEX
{
	TOCRRESULTSHEADER	Hdr;
	TOCRRESULTSITEMEX	Item[1];
} TOCRRESULTSEX;
'''

class TOCRRESULTSEX(Structure):
	_fields_ = [
		("Hdr", TOCRRESULTSHEADER),
		("Item", TOCRRESULTSITEMEX*100000)
	]





# ###################################################
# TOCRdll.h converted to Python - second the functions (structures above)
# C Function Declarations are converted to a Python-callable DLL function
# Note: functions have been converted as needed (not all converted)
# ###################################################

# ##################################
#EXTERN_C long WINAPI TOCRInitialise(long *JobNo);
dll_TOCRInitialise = getattr(windll.tocrDll, "TOCRInitialise")
dll_TOCRInitialise.argtypes = [POINTER(c_long)]
dll_TOCRInitialise.restype = c_long

def TOCRInitialise():
	JobNo = c_long()
	res = dll_TOCRInitialise(byref(JobNo))
	TOCR_Error.raiseIfError(res)
	return JobNo.value


# ##################################
#EXTERN_C long WINAPI TOCRShutdown(long JobNo);
dll_TOCRShutdown = getattr(windll.tocrDll, "TOCRShutdown")
dll_TOCRShutdown.argtypes = [c_long]
dll_TOCRShutdown.restype = c_long

def TOCRShutdown(JobNo):
	res = dll_TOCRShutdown(c_long(JobNo))
	TOCR_Error.raiseIfError(res)
	return
	

# ##################################
#EXTERN_C long WINAPI TOCRDoJob(long JobNo, TOCRJOBINFO *JobInfo);
dll_TOCRDoJob = getattr(windll.tocrDll, "TOCRDoJob")
dll_TOCRDoJob.argtypes = [c_long, POINTER(TOCRJOBINFO)]
dll_TOCRDoJob.restype = c_long

def TOCRDoJob(JobNo, JobInfo):
	res = dll_TOCRDoJob(c_long(JobNo), pointer(JobInfo))
	TOCR_Error.raiseIfError(res)
	return


# ##################################
#EXTERN_C long WINAPI TOCRWaitForJob(long JobNo, long *JobStatus);
dll_TOCRWaitForJob = getattr(windll.tocrDll, "TOCRWaitForJob")
dll_TOCRWaitForJob.argtypes = [c_long, POINTER(c_long)]
dll_TOCRWaitForJob.restype = c_long

def TOCRWaitForJob(JobNo):
	JobStatus = c_long(0)
	res = dll_TOCRWaitForJob(c_long(JobNo), byref(JobStatus))
	TOCR_Error.raiseIfError(res)
	return JobStatus.value



'''
EXTERN_C long WINAPI TOCRWaitForAnyJob(long *WaitAnyStatus, long *JobNo);
EXTERN_C long WINAPI TOCRGetJobDBInfo(long *JobSlotInf);
'''

# ##################################
# EXTERN_C long WINAPI TOCRGetJobStatus(long JobNo, long *JobStatus);
dll_TOCRGetJobStatus = getattr(windll.tocrDll, "TOCRGetJobStatus")
dll_TOCRGetJobStatus.argtypes = [c_long, POINTER(c_long)]
dll_TOCRGetJobStatus.restype = c_long

def TOCRGetJobStatus(JobNo):
	JobStatus = c_long()
	res = dll_TOCRGetJobStatus(c_long(JobNo), byref(JobStatus))
	TOCR_Error.raiseIfError(res)

	if JobStatus.value == TOCRJOBSTATUS_ERROR:
		return "TOCRJOBSTATUS_ERROR"
	elif JobStatus.value == TOCRJOBSTATUS_BUSY:
		return "TOCRJOBSTATUS_BUSY"
	elif JobStatus.value == TOCRJOBSTATUS_DONE:
		return "TOCRJOBSTATUS_DONE"
	elif JobStatus.value == TOCRJOBSTATUS_IDLE:
		return "TOCRJOBSTATUS_IDLE"

	raise Exception("Error in TOCRGetJobStatus: unknown status code", JobStatus.value)


# ##################################
# EXTERN_C long WINAPI TOCRGetJobStatusEx(long JobNo, long *JobStatus, float *Progress, long *AutoOrientation);


# ##################################
# EXTERN_C long WINAPI TOCRGetJobStatusMsg(long JobNo, char *Msg);
dll_TOCRGetJobStatusMsg = getattr(windll.tocrDll, "TOCRGetJobStatusMsg")
dll_TOCRGetJobStatusMsg.argtypes = [c_long, c_char_p]
dll_TOCRGetJobStatusMsg.restype = c_long

def TOCRGetJobStatusMsg(JobNo):
	buffer = "  " * TOCRJOBMSGLENGTH
	c_string = c_char_p(buffer)
	res = dll_TOCRGetJobStatusMsg(c_long(JobNo), c_string)
	TOCR_Error.raiseIfError(res)
	return c_string.value


# ##################################
# EXTERN_C long WINAPI TOCRGetNumPages(long JobNo, char *Filename, long JobType, long *NumPages);
dll_TOCRGetNumPages = getattr(windll.tocrDll, "TOCRGetNumPages")
dll_TOCRGetNumPages.argtypes = [c_long, c_char_p, c_long, POINTER(c_long)]
dll_TOCRGetNumPages.restype = c_long

def TOCRGetNumPages(JobNo, Filename, JobType):
	NumPages = c_long()
	res = dll_TOCRGetNumPages(c_long(JobNo), c_char_p(Filename), c_long(JobType), byref(NumPages))
	TOCR_Error.raiseIfError(res)
	return NumPages.value


# ##################################
# TOCRGetJobResults is superceded by TOCRGetJobResultsEx
# EXTERN_C long WINAPI TOCRGetJobResults(long JobNo, long *ResultsInf, TOCRRESULTS *Results);
#
# EXTERN_C long WINAPI TOCRGetJobResultsEx(long JobNo, long Mode, long *ResultsInf, void *ResultsEx);
dll_TOCRGetJobResultsEx = getattr(windll.tocrDll, "TOCRGetJobResultsEx")
dll_TOCRGetJobResultsEx.argtypes = [c_long, c_long, POINTER(c_long), c_void_p]
dll_TOCRGetJobResultsEx.restype = c_long

# Note: slight modification of the DLL function.  This executes two-pass call in a single function (for convenience)
def TOCRGetJobResultsEx(JobNo, Mode):
	# Call twice:
	# First, pass ResultsInf and function will return the size of the results
	ResultsInf = c_long(0)
	ResultsEx = c_void_p()
	res = dll_TOCRGetJobResultsEx(c_long(JobNo), c_long(Mode), byref(ResultsInf), ResultsEx)
	TOCR_Error.raiseIfError(res)

	# Second, pass ResultsInf with a non-zero size and ResultsEx pointing to a suitable-sized buffer
	buffer = (c_byte * ResultsInf.value)()
	res = dll_TOCRGetJobResultsEx(JobNo, Mode, byref(ResultsInf), byref(buffer))
	TOCR_Error.raiseIfError(res)

	if Mode == TOCRGETRESULTS_NORMAL:
		return ResultsInf.value, cast(buffer, POINTER(TOCRRESULTS)).contents
	elif Mode == TOCRGETRESULTS_EXTENDED:
		return ResultsInf.value, cast(buffer, POINTER(TOCRRESULTSEX)).contents
	else:
		raise Exception("TOCRGetJobResultsEx passed unknown Mode:", str(Mode))


# ##################################
#  Format conversion functions not implemented
# Note: TOCRConvertTIFFtoDIB Superseded by TOCRConvertFormat
# EXTERN_C long WINAPI TOCRConvertTIFFtoDIB(long JobNo, char *InputFilename, char *OutputFilename, long PageNo);
#
# EXTERN_C long WINAPI TOCRRotateMonoBitmap(long *hBmp, long Width, long Height, long Orientation);
# EXTERN_C long WINAPI TOCRConvertFormat(long JobNo, void *InputAddr, long InputFormat, void *OutputAddr, long OutputFormat, long PageNo);


# ##################################
# TOCRGetLicenceInfo Superseded by TOCRGetLicenceInfoEx
# EXTERN_C long WINAPI TOCRGetLicenceInfo(long *NumberOfJobSlots, long *Volume, long *Time, long *Remaining);
#
# EXTERN_C long WINAPI TOCRGetLicenceInfoEx(long JobNo, char *Licence, long *Volume, long *Time, long *Remaining, long *Features);
dll_TOCRGetLicenceInfoEx = getattr(windll.tocrDll, "TOCRGetLicenceInfoEx")
dll_TOCRGetLicenceInfoEx.argtypes = [c_long, c_char_p, POINTER(c_long), POINTER(c_long), POINTER(c_long), POINTER(c_long)]
dll_TOCRGetLicenceInfoEx.restype = c_long

def TOCRGetLicenceInfoEx(JobNo):
	Licence = (c_char * 20)()
	Volume = c_long()
	Time = c_long()
	Remaining = c_long()
	Features = c_long()
	
	res = dll_TOCRGetLicenceInfoEx(c_long(JobNo), Licence, byref(Volume), byref(Time), byref(Remaining), byref(Features))
	TOCR_Error.raiseIfError(res)
	return Licence.value, Volume.value, Time.value, Remaining.value, Features.value


# ##################################
# TOCRSetErrorMode is superseded by TOCRSetConfig
# EXTERN_C long WINAPI TOCRSetErrorMode(long JobNo, long ErrorMode);
#
# EXTERN_C long WINAPI TOCRSetConfig(long JobNo, long Parameter, long Value);
dll_TOCRSetConfig = getattr(windll.tocrDll, "TOCRSetConfig")
dll_TOCRSetConfig.argtypes = [c_long, c_long, c_long]
dll_TOCRSetConfig.restype = c_long

def TOCRSetConfig(JobNo, Parameter, Value):
	res = dll_TOCRSetConfig(c_long(JobNo), 
					c_long(Parameter), 
					c_long(Value))
	TOCR_Error.raiseIfError(res)


# ##################################
# TOCRGetErrorMode is superseded by TOCRGetConfig
# EXTERN_C long WINAPI TOCRGetErrorMode(long JobNo, long *ErrorMode);
# 
#EXTERN_C long WINAPI TOCRGetConfig(long JobNo, long Parameter, long *Value);
dll_TOCRGetConfig = getattr(windll.tocrDll, "TOCRGetConfig")
dll_TOCRGetConfig.argtypes = [c_long, c_long, POINTER(c_long)]
dll_TOCRGetConfig.restype = c_long

def TOCRGetConfig(JobNo, Parameter):
	if Parameter in [TOCRCONFIG_DLL_ERRORMODE, TOCRCONFIG_SRV_ERRORMODE, TOCRCONFIG_SRV_THREADPRIORITY]:
		Value = c_long()
		res = dll_TOCRGetConfig(c_long(JobNo), 
						c_long(Parameter), 
						byref(Value))
		TOCR_Error.raiseIfError(res)
		return Value.value
	elif Parameter == TOCRCONFIG_LOGFILE:
		Value = c_char() * 8192
		res = dll_TOCRGetConfig(c_long(JobNo), 
						c_long(Parameter), 
						byref(Value))
		TOCR_Error.raiseIfError(res)
		return Value.value


# ##################################
# TWAIN Device functions not implemented
# Note: TWAIN is API to digitial imaging devices such as scanners and digital cameras
#EXTERN_C long WINAPI TOCRTWAINAcquire(long *NumberOfImages);
#EXTERN_C long WINAPI TOCRTWAINGetImages(long *GlobalMemoryDIBs);
#EXTERN_C long WINAPI TOCRTWAINSelectDS(void);
#EXTERN_C long WINAPI TOCRTWAINShowUI(VBBOOL Show);




# Convert normal results to a string
def FormatResultsNormal(results):
	# results is a TOCRESULTS structure
	str = ""
	confidence = 1.0
	confCount = 0
	
	if results.Hdr.NumItems > 0:
		for itemNo in range(results.Hdr.NumItems):
			str += chr(results.Item[itemNo].OCRCha)
			
			if results.Item[itemNo].Confidence != 0.0:
				confidence *= results.Item[itemNo].Confidence
				confCount += 1

	if confCount == 0:
		print "WARNING: FormatResultsNormal. confCount is zero."
		return str, 1.0

	return str, math.pow(confidence, 1.0 / confCount)



# Convert extended results to a string
def FormatResultsExtended(results):
	# results is a TOCRESULTSEX structure
	str = ""
	confidence = 1.0
	confCount = 0
	
	if results.Hdr.NumItems > 0:
		for itemNo in range(results.Hdr.NumItems):
			str += chr(results.Item[itemNo].OCRCha)
			
			if results.Item[itemNo].Confidence != 0.0:
				confidence *= results.Item[itemNo].Confidence
				confCount += 1

	return str, math.pow(confidence, 1.0 / confCount)

# Converts either normal or extended results to a string + confidence
def FormatResults(results):
	if type(results).__name__ == "TOCRRESULTS":
		return FormatResultsNormal(results)
	elif type(results).__name__ == "TOCRRESULTSEX":
		return FormatResultsExtended(results)
	else:
		raise Exception("FormatResults passed object that is not TOCRRESULTS or TOCRRESULTSEX")


# ###################################################
# Conversion of the Examples from Transym OCR (C++ Main.cpp file)
# ###################################################

def TOCR_TiffFile(tifffile):
	DEBUG = False
	
	# Ensure that errors are returned through the API
	res = TOCRSetConfig(TOCRCONFIG_DEFAULTJOB,  TOCRCONFIG_DLL_ERRORMODE, TOCRERRORMODE_NONE)

	JobNo = TOCRInitialise()
	if DEBUG: print "Initialised Job No:", JobNo

	Licence, Volume, Time, Remaining, Features = TOCRGetLicenceInfoEx(JobNo)
	if DEBUG: print "Licence:", Licence

	NumPages = TOCRGetNumPages(JobNo, tifffile, TOCRJOBTYPE_TIFFFILE)
	if DEBUG: print "Number of pages:", NumPages
	
	error = TOCRGetConfig(JobNo, TOCRCONFIG_DLL_ERRORMODE)
	if DEBUG: print "Error mode:", error
	
	
	# Define the OCR job paramaters
	len = 256
	tmpArray = c_short * len
	DisableCharacterArray = tmpArray(0 * len)

	ProcessOptions = TOCRPROCESSOPTIONS(
		StructId = 0,
		InvertWholePage = 0,
		DeskewOff = 0,
		Orientation = TOCRJOBORIENT_AUTO,
		NoiseRemoveOff = 0,
		LineRemoveOff = 0,
		DeshadeOff = 0,
		InvertOff = 0,
		SectioningOn = 1,
		MergeBreakOff = 0,
		LineRejectOff = 0,
		CharacterRejectOff = 0,
		LexOff = 0,
		DisableCharacter = DisableCharacterArray)

	JobInfo = TOCRJOBINFO(
			StructId = 0, 
			JobType = TOCRJOBTYPE_TIFFFILE,
			InputFile = tifffile,
			PageNo = 0,
			ProcessOptions = ProcessOptions
	)

	TOCRDoJob(JobNo, JobInfo)

	JobStatus = TOCRWaitForJob(JobNo);
	if JobStatus != TOCRJOBSTATUS_DONE:
		raise Exception("Error in TOCR_TIFF calling TOCRWaitForJob: " + TOCRGetJobStatusMsg(JobNo)) 

	mode = TOCRGETRESULTS_NORMAL
	#mode = TOCRGETRESULTS_EXTENDED

	size, ocrResult = TOCRGetJobResultsEx(JobNo, mode)
	
	if size == 0 or ocrResult == None:
		raise Exception("Error in TOCR_TIFF calling TOCRGetJobResultsEx")

	text, confidence = FormatResults(ocrResult)

	TOCRShutdown(JobNo);

	return text, confidence



if __name__=='__main__':
	SAMPLE = "TOCR_Sample.tif"
	print " ----------- CONVERTING:", SAMPLE, "-----------"
	text, confidence = TOCR_TiffFile(SAMPLE)
	print "Confidence:", confidence
	print "Time taken:", timer
	print text

