OCR

  • 24 Views
  • Last Post 2 weeks ago
Rama Reddy posted this 3 weeks ago

Hi Team,

i am trying to improvise the accuracy of documents OCR. What more I can do to get a good accuracy.?

Attached Files

Order By: Standard | Newest | Votes
Rama Reddy posted this 3 weeks ago

import com.abbyy.FREngine.*; import java.nio.file.*; public class Hello { public static void main( String[] args ) { try { Hello application = new Hello(); application.Run(); } catch( Exception ex ) { displayMessage( ex.getMessage() ); } } public void Run() throws Exception { // Load ABBYY FineReader Engine loadEngine(); try{ // Process with ABBYY FineReader Engine processWithEngine(); } finally { // Unload ABBYY FineReader Engine unloadEngine(); } } private void loadEngine() throws Exception { displayMessage( "Initializing Engine..." ); engine = Engine.InitializeEngine( SamplesConfig.GetDllFolder(), SamplesConfig.GetCustomerProjectId(), SamplesConfig.GetLicensePath(), SamplesConfig.GetLicensePassword(), "", "", false ); } private void processWithEngine() { try { // Setup FREngine setupFREngine(); // Process sample image processImage(); } catch( Exception ex ) { displayMessage( ex.getMessage() ); } } private void setupFREngine() { displayMessage( "Loading predefined profile..." ); engine.LoadPredefinedProfile( "DocumentConversion_Accuracy" ); // Possible profile names are: // "DocumentConversion_Accuracy", "DocumentConversion_Speed", // "DocumentArchiving_Accuracy", "DocumentArchiving_Speed", // "BookArchiving_Accuracy", "BookArchiving_Speed", // "TextExtraction_Accuracy", "TextExtraction_Speed", // "FieldLevelRecognition", // "BarcodeRecognition_Accuracy", "BarcodeRecognition_Speed", // "HighCompressedImageOnlyPdf", // "BusinessCardsProcessing", // "EngineeringDrawingsProcessing", // "Version9Compatibility", // "Default" } private void processImage() { // String imagePath = SamplesConfig.GetSamplesFolder() + "//home//DCXMprod//ABBYY//Samples//images//Charlotta_1.jpg"; String imagePath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\ab12\\Additional BOLs_AB format 1-example-19.pdf"; try { // Don't recognize PDF file with a textual content, just copy it // Create document //engine.LoadPredefinedProfile("DocumentConversion_Accuracy"); //engine.CreateRecognizerParams().SetPredefinedTextLanguage("German"); //IEngine engine=null; //engine=Engine.GetEngineObject(SamplesConfig.GetDllFolder(),SamplesConfig.GetDeveloperSN()); //String profile=SamplesConfig.GetSamplesFolder() + "images/dff.ini"; //engine.LoadProfile(profile); IFRDocument document = engine.CreateFRDocument(); try { // Add image file to document displayMessage( "Loading image..." ); IPrepareImageMode imageparam=engine.CreatePrepareImageMode(); //imageparam.setEnhanceLocalContrast(true); IIntsCollection indices=engine.CreateIntsCollection(); indices.Add(0); document.AddImageFile( imagePath,imageparam, indices); //pages=document.getPages(); IDocumentProcessingParams docProcessingParams =engine.CreateDocumentProcessingParams(); IPageProcessingParams pageprocessingparams= docProcessingParams.getPageProcessingParams(); IPagePreprocessingParams pageproparams1=pageprocessingparams.getPagePreprocessingParams(); pageproparams1.setCorrectOrientation(true); pageproparams1.setCorrectInvertedImage(true); IPageAnalysisParams tabParams=pageprocessingparams.getPageAnalysisParams(); tabParams.setDetectBarcodes(true); //tabParams.setDetectTables(false); //tabParams.setEnableTextExtractionMode(true); tabParams.setAggressiveTableDetection(true); //tabParams.DetectTables=true; IRecognizerParams recognizerParams = pageprocessingparams.getRecognizerParams(); recognizerParams.setLowResolutionMode(true); IObjectsExtractionParams objparams=pageprocessingparams.getObjectsExtractionParams(); objparams.setDetectTextOnPictures(true); objparams.setRemoveGarbage(true); objparams.setEnableAggressiveTextExtraction(true); //document.Preprocess(pageproparams,objparams,null,null); //document.ProcessPages(indices,docProcessingParams); //document.Synthesize(null); //displayMessage( "Saving results..." ); // Save results to rtf with default parameters //IRTFExportParams rtfparam=engine.CreateRTFExportParams(); //rtfparam.setKeepLines(true); //rtfparam.setPageSynthesisMode("PSM_RTFEditableCopy"); //String rtfExportPath = SamplesConfig.GetSamplesFolder() + "images/Elli_1.rtf"; //document.Export( rtfExportPath, FileExportFormatEnum.FEF_RTF, rtfparam); // Save results to pdf using 'balanced' scenario IPDFExportParams pdfParams = engine.CreatePDFExportParams(); pdfParams.setScenario( PDFExportScenarioEnum.PES_Balanced ); //String pdfExportPath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\NB\\Additional_BOLs_AB_format_1-example-22d1.pdf"; //document.Export( pdfExportPath, FileExportFormatEnum.FEF_PDF, pdfParams ); //String texExportPath = SamplesConfig.GetSamplesFolder() + "images/US/Equip3.txt"; //document.Export( texExportPath, FileExportFormatEnum.FEF_TextUnicodeDefaults, null); //document.Close(); //document.AddImageFile(pdfExportPath,imageparam,null); //document.Recognize(null,objparams); //document.Preprocess(pageproparams1,objparams,recognizerParams,null); document.Process(docProcessingParams); //document.Synthesize(null); IXLExportParams xlparam=engine.CreateXLExportParams(); xlparam.setLayoutRetentionMode(XLSXLayoutRetentionModeEnum.XLLRM_ExactLines); xlparam.setTablesOnly(true); String xlExportPath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\ab12\\AdditionalBOLs_ABformat1-example-192.xls"; document.Export( xlExportPath, FileExportFormatEnum.FEF_XLSX, xlparam); IXMLExportParams XMParams=engine.CreateXMLExportParams(); XMParams.setWriteCharAttributes(XMLCharAttributesEnum.XCA_Extended); XMParams.setWriteNondeskewedCoordinates(false); String xmlExportPath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\ab12\\AdditionalBOLs_ABformat1-example-192.xml"; document.Export( xmlExportPath, FileExportFormatEnum.FEF_XML, XMParams); } finally { // Close document document.Close(); } } catch( Exception ex ) { displayMessage( ex.getMessage() ); } } private void unloadEngine() throws Exception { displayMessage( "Deinitializing Engine..." ); engine = null; Engine.DeinitializeEngine(); } private static void displayMessage( String message ) { System.out.println( message ); } private IEngine engine = null; }

Rama Reddy posted this 3 weeks ago

 import com.abbyy.FREngine.*;

import java.nio.file.*;

 

public class Hello {

 

public static void main( String[] args ) {

try {

Hello application = new Hello();

application.Run();

} catch( Exception ex ) {

displayMessage( ex.getMessage() );

}

}

 

public void Run() throws Exception {

// Load ABBYY FineReader Engine

loadEngine();

try{

// Process with ABBYY FineReader Engine

processWithEngine();

} finally {

// Unload ABBYY FineReader Engine

unloadEngine();

}

}

 

private void loadEngine() throws Exception {

displayMessage( "Initializing Engine..." );

engine = Engine.InitializeEngine( SamplesConfig.GetDllFolder(), SamplesConfig.GetCustomerProjectId(), 

SamplesConfig.GetLicensePath(), SamplesConfig.GetLicensePassword(), "", "", false );

}

 

private void processWithEngine() {

try {

// Setup FREngine

setupFREngine();

 

// Process sample image

processImage();

} catch( Exception ex ) {

displayMessage( ex.getMessage() );

}

}

 

private void setupFREngine() {

displayMessage( "Loading predefined profile..." );

engine.LoadPredefinedProfile( "DocumentConversion_Accuracy" );

// Possible profile names are:

//   "DocumentConversion_Accuracy", "DocumentConversion_Speed",

//   "DocumentArchiving_Accuracy", "DocumentArchiving_Speed",

//   "BookArchiving_Accuracy", "BookArchiving_Speed",

//   "TextExtraction_Accuracy", "TextExtraction_Speed",

//   "FieldLevelRecognition",

//   "BarcodeRecognition_Accuracy", "BarcodeRecognition_Speed",

//   "HighCompressedImageOnlyPdf",

//   "BusinessCardsProcessing",

//   "EngineeringDrawingsProcessing",

//   "Version9Compatibility",

//   "Default"

}

 

private void processImage() {

// String imagePath = SamplesConfig.GetSamplesFolder() + "//home//DCXMprod//ABBYY//Samples//images//Charlotta_1.jpg";

 String imagePath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\ab12\\Additional BOLs_AB format 1-example-19.pdf";

 

try {

// Don't recognize PDF file with a textual content, just copy it

 

 

// Create document

//engine.LoadPredefinedProfile("DocumentConversion_Accuracy");

//engine.CreateRecognizerParams().SetPredefinedTextLanguage("German");

//IEngine engine=null;

//engine=Engine.GetEngineObject(SamplesConfig.GetDllFolder(),SamplesConfig.GetDeveloperSN());

//String profile=SamplesConfig.GetSamplesFolder() + "images/dff.ini";

//engine.LoadProfile(profile);

IFRDocument document = engine.CreateFRDocument();

 

try {

// Add image file to document

displayMessage( "Loading image..." );

 

IPrepareImageMode imageparam=engine.CreatePrepareImageMode();

//imageparam.setEnhanceLocalContrast(true);

 

IIntsCollection indices=engine.CreateIntsCollection();

                indices.Add(0);

document.AddImageFile( imagePath,imageparam, indices);

//pages=document.getPages();

IDocumentProcessingParams docProcessingParams =engine.CreateDocumentProcessingParams();

IPageProcessingParams pageprocessingparams= docProcessingParams.getPageProcessingParams();

 

IPagePreprocessingParams pageproparams1=pageprocessingparams.getPagePreprocessingParams();

pageproparams1.setCorrectOrientation(true);

pageproparams1.setCorrectInvertedImage(true);

 

IPageAnalysisParams tabParams=pageprocessingparams.getPageAnalysisParams();

tabParams.setDetectBarcodes(true);

//tabParams.setDetectTables(false);

                //tabParams.setEnableTextExtractionMode(true);

                tabParams.setAggressiveTableDetection(true);

                //tabParams.DetectTables=true;

 

 

IRecognizerParams recognizerParams = pageprocessingparams.getRecognizerParams();

recognizerParams.setLowResolutionMode(true);

 

IObjectsExtractionParams objparams=pageprocessingparams.getObjectsExtractionParams();

objparams.setDetectTextOnPictures(true);

objparams.setRemoveGarbage(true);

objparams.setEnableAggressiveTextExtraction(true);

//document.Preprocess(pageproparams,objparams,null,null);

//document.ProcessPages(indices,docProcessingParams);

//document.Synthesize(null);

//displayMessage( "Saving results..." );

 

// Save results to rtf with default parameters

//IRTFExportParams rtfparam=engine.CreateRTFExportParams();

//rtfparam.setKeepLines(true);

//rtfparam.setPageSynthesisMode("PSM_RTFEditableCopy");

//String rtfExportPath = SamplesConfig.GetSamplesFolder() + "images/Elli_1.rtf";

//document.Export( rtfExportPath, FileExportFormatEnum.FEF_RTF, rtfparam);

 

// Save results to pdf using 'balanced' scenario

IPDFExportParams pdfParams = engine.CreatePDFExportParams();

pdfParams.setScenario( PDFExportScenarioEnum.PES_Balanced );

 

//String pdfExportPath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\NB\\Additional_BOLs_AB_format_1-example-22d1.pdf";

//document.Export( pdfExportPath, FileExportFormatEnum.FEF_PDF, pdfParams );

 

//String texExportPath = SamplesConfig.GetSamplesFolder() + "images/US/Equip3.txt";

//document.Export( texExportPath, FileExportFormatEnum.FEF_TextUnicodeDefaults, null);

 

 

 

//document.Close();

//document.AddImageFile(pdfExportPath,imageparam,null);

//document.Recognize(null,objparams);

//document.Preprocess(pageproparams1,objparams,recognizerParams,null);

document.Process(docProcessingParams);

//document.Synthesize(null);

 

IXLExportParams xlparam=engine.CreateXLExportParams();

xlparam.setLayoutRetentionMode(XLSXLayoutRetentionModeEnum.XLLRM_ExactLines);

xlparam.setTablesOnly(true);

String xlExportPath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\ab12\\AdditionalBOLs_ABformat1-example-192.xls";

document.Export( xlExportPath, FileExportFormatEnum.FEF_XLSX, xlparam);

 

 

IXMLExportParams XMParams=engine.CreateXMLExportParams();

XMParams.setWriteCharAttributes(XMLCharAttributesEnum.XCA_Extended);

XMParams.setWriteNondeskewedCoordinates(false);

String xmlExportPath = SamplesConfig.GetSamplesFolder() + "\\SampleImages\\ab12\\AdditionalBOLs_ABformat1-example-192.xml";

document.Export( xmlExportPath, FileExportFormatEnum.FEF_XML, XMParams);

} finally {

// Close document

document.Close();

}

} catch( Exception ex ) {

displayMessage( ex.getMessage() );

}

}

 

private void unloadEngine() throws Exception {

displayMessage( "Deinitializing Engine..." );

engine = null;

Engine.DeinitializeEngine();

}

 

private static void displayMessage( String message ) {

System.out.println( message );

}

 

private IEngine engine = null;

}

Nadezhda A. Solovyeva posted this 2 weeks ago

Hi Rama, 

In most cases, the OCR quality can be improved for the specific set of documents based on the type of that document. Such settings can only be suggested after the analysis of your documents image. 

For generic case, please use our quality-speed suggestion to tune your parameters either for best accuracy or for the best speed: https://abbyy.technology/en:features:ocr:quality-speed

In your code, all available generic optimization are already turned on by the setting Predefined profile to DocumentConversion_Accuracy

 

Close