From fa65b9781086468047004b6884b09aa424fb226d Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Tue, 24 May 2022 22:51:43 +0100 Subject: [PATCH] Add `clearImage` method to OCREngine/OCRClient This is of limited value at present since memory allocated to WebAssembly cannot be subsequently released without unloading the whole module. It might be useful to ensure that the state of the OCRClient is in-sync with other parts of the application though. --- src/lib.cpp | 12 ++++++++---- src/ocr-client.ts | 15 +++++++++++++++ src/ocr-engine.ts | 15 +++++++++++++++ test/ocr-client-test.js | 18 ++++++++++++++++++ test/ocr-engine-test.js | 11 +++++++++++ 5 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/lib.cpp b/src/lib.cpp index d909e8f..6700b58 100644 --- a/src/lib.cpp +++ b/src/lib.cpp @@ -160,7 +160,11 @@ class OCREngine { return {}; } - void ClearImage() { tesseract_->Clear(); } + void ClearImage() { + tesseract_->Clear(); + layout_analysis_done_ = false; + ocr_done_ = false; + } std::vector GetBoundingBoxes(TextUnit unit) { if (!layout_analysis_done_) { @@ -304,12 +308,12 @@ EMSCRIPTEN_BINDINGS(ocrlib) { class_("OCREngine") .constructor<>() .function("clearImage", &OCREngine::ClearImage) - .function("loadModel", &OCREngine::LoadModel) - .function("loadImage", &OCREngine::LoadImage) .function("getBoundingBoxes", &OCREngine::GetBoundingBoxes) .function("getOrientation", &OCREngine::GetOrientation) + .function("getText", &OCREngine::GetText) .function("getTextBoxes", &OCREngine::GetTextBoxes) - .function("getText", &OCREngine::GetText); + .function("loadImage", &OCREngine::LoadImage) + .function("loadModel", &OCREngine::LoadModel); value_object("OCRResult").field("error", &OCRResult::error); diff --git a/src/ocr-client.ts b/src/ocr-client.ts index b4492da..026e334 100644 --- a/src/ocr-client.ts +++ b/src/ocr-client.ts @@ -149,6 +149,21 @@ export class OCRClient { return engine.loadImage(image); } + /** + * Clear the current image and text recognition results. + * + * This will clear the loaded image data internally, but keep the text + * recognition model loaded. + * + * At present there is no way to shrink WebAssembly memory, so this will not + * return the memory used by the image to the OS/browser. To release memory, + * the web worker needs to be shut down via {@link destroy}. + */ + async clearImage(): Promise { + const engine = await this._ocrEngine; + return engine.clearImage(); + } + /** * Perform layout analysis on the current image, if not already done, and * return bounding boxes for a given unit of text. diff --git a/src/ocr-engine.ts b/src/ocr-engine.ts index 85192d7..5aecbfd 100644 --- a/src/ocr-engine.ts +++ b/src/ocr-engine.ts @@ -180,6 +180,21 @@ export class OCREngine { this._imageLoaded = true; } + /** + * Clear the current image and text recognition results. + * + * This will clear the loaded image data internally, but keep the text + * recognition model loaded. + * + * At present there is no way to shrink WebAssembly memory, so this will not + * return the memory used by the image to the OS/browser. To release memory, + * the `OCREngine` instance needs to be destroyed via {@link destroy}. + */ + clearImage() { + this._engine.clearImage(); + this._imageLoaded = false; + } + /** * Perform layout analysis on the current image, if not already done, and * return bounding boxes for a given unit of text. diff --git a/test/ocr-client-test.js b/test/ocr-client-test.js index 07eaf77..7f2f860 100644 --- a/test/ocr-client-test.js +++ b/test/ocr-client-test.js @@ -118,4 +118,22 @@ describe("OCRClient", () => { assert.equal(orient.rotation, 0); assert.equal(orient.confidence, 1.0); }); + + it("clears the image", async () => { + const imageData = await loadImage(resolve("./small-test-page.jpg")); + await ocr.loadImage(imageData); + await ocr.getBoundingBoxes("word"); + + await ocr.clearImage(); + + let error; + try { + await ocr.getBoundingBoxes("word"); + } catch (e) { + error = e; + } + + assert.instanceOf(error, Error); + assert.equal(error.message, "No image loaded"); + }); }); diff --git a/test/ocr-engine-test.js b/test/ocr-engine-test.js index 7c46bab..a6ac7cd 100644 --- a/test/ocr-engine-test.js +++ b/test/ocr-engine-test.js @@ -308,4 +308,15 @@ describe("OCREngine", () => { assert.equal(estimatedOrient.confidence, 1); } }); + + it("clears the image", async () => { + ocr.loadImage(emptyImage(100, 100)); + ocr.getBoundingBoxes("word"); + + ocr.clearImage(); + + assert.throws(() => { + ocr.getBoundingBoxes("word"); + }, "No image loaded"); + }); });