import re
from abbyy_sdk import AbbyyAPI # Hypothetical; replace with actual ABBYY import
def recognize_field_with_validation(image_path, field_name):
api = AbbyyAPI(app_id='your_app_id', password='your_password')
# Preprocess: Detect and correct orientation
processed_image = api.process_image(image_path, options={'orientation': 'auto'})
# Primary recognition
result = api.recognize_field(processed_image, field_name)
value = result['text']
confidence = result['confidence'] # Often inflated
# Custom validation for numeric fields (e.g., currency)
if re.match(r'^\d+\.\d{2}$', value): # Normal format
return value, confidence
elif re.match(r'^\d{2}\.\d{3}$', value[::-1]): # Reversed? e.g., 00.056 -> 650.00
corrected_value = value[::-1] # Reverse string
if re.match(r'^\d+\.\d{2}$', corrected_value):
return corrected_value, confidence * 0.8 # Penalize confidence for correction
else:
# Fallback to RecognizeFieldOCRPL with retries
for attempt in range(3):
alt_result = api.recognize_field_ocr_pl(processed_image, field_name, options={'retries': 1})
alt_value = alt_result['text']
if re.match(r'^\d+\.\d{2}$', alt_value):
return alt_value, alt_result['confidence'] * 0.9 # Slight penalty
return None, 0 # Failed
# Usage
value, conf = recognize_field_with_validation('path/to/image.jpg', 'amount_field')
print(f"Recognized: {value}, Adjusted Confidence: {conf}")