Add Verbose arg to oce image-ocr-extract-text (#37372)

* Added release notes * added UT * added description to the command * added to the release notes
demisto · Nov 26, 2024 · 5a76ff0 · 5a76ff0
1 parent c9f9914
commit 5a76ff0
Show file tree

Hide file tree

Showing 6 changed files with 43 additions and 6 deletions.
diff --git a/Packs/ImageOCR/Integrations/ImageOCR/ImageOCR.py b/Packs/ImageOCR/Integrations/ImageOCR/ImageOCR.py
@@ -17,13 +17,18 @@ def list_languages() -> list[str]:
     return sorted(lines[1:])  # ignore first line
 
 
-def extract_text(image_path: str, languages: list[str] = None) -> str:
-    exe_params = [TESSERACT_EXE, "-v", image_path, 'stdout']
+def extract_text(image_path: str, languages: list[str] = [], verbose: bool = False) -> str:
+    exe_params = [TESSERACT_EXE, image_path, 'stdout']
+    if verbose:
+        exe_params.extend(["-v"])
+
     if languages:
         exe_params.extend(['-l', '+'.join(languages)])
+
     res = subprocess.run(exe_params, capture_output=True, check=True, text=True)
     if res.stderr:
         demisto.debug(f'tesseract returned ok but stderr contains warnings: {res.stderr}')
+
     return res.stdout
 
 
@@ -37,6 +42,7 @@ def list_languages_command() -> CommandResults:
 
 def extract_text_command(args: dict, instance_languages: list, skip_corrupted: bool) -> tuple[list, list]:
     langs = argToList(args.get('langs')) or instance_languages
+    verbose = argToBoolean(args.get('verbose', False))
     demisto.debug(f"Using langs settings: {langs}")
     results, errors = [], []
 
@@ -48,7 +54,7 @@ def extract_text_command(args: dict, instance_languages: list, skip_corrupted: b
                 raise DemistoException(f"Couldn't find entry id: {entry_id}")
 
             demisto.debug(f'Extracting text from file: {file_path}')
-            res = extract_text(file_path['path'], langs)
+            res = extract_text(file_path['path'], langs, verbose)
             file_entry = {'EntryID': entry_id, 'Text': res}
             results.append(
                 CommandResults(

diff --git a/Packs/ImageOCR/Integrations/ImageOCR/ImageOCR.yml b/Packs/ImageOCR/Integrations/ImageOCR/ImageOCR.yml
@@ -12,7 +12,7 @@ configuration:
   section: Connect
   advanced: true
   required: false
-- additionalinfo: If true, will not raise an error if the image is corrupted and could not be processed.
+- additionalinfo: If true, will not raise an error if the image is corrupt and could not be processed.
   display: Skip on corrupted images
   name: skip_corrupted
   section: Connect
@@ -36,8 +36,12 @@ script:
     - description: A CSV of language codes of the language to use for OCR. Overrides the default configured language list.
       isArray: true
       name: langs
-    description: Extracts text from an image.
+    - description: Turn on verbose flag to display tesseract and other used libraries versions.
+      required: false
+      defaultValue: False
+      name: verbose
     name: image-ocr-extract-text
+    description: Extract text from images.
     outputs:
     - contextPath: File.Text
       description: Extracted text from the passed image file.

diff --git a/Packs/ImageOCR/Integrations/ImageOCR/ImageOCR_test.py b/Packs/ImageOCR/Integrations/ImageOCR/ImageOCR_test.py
@@ -52,6 +52,26 @@ def test_extract_text(image, expected_text, langs):
     assert expected_text in res
 
 
+def test_extract_text_verbose_params():
+    """
+    Given:
+     - An image with text
+
+    When:
+     - Running the extract_text command
+
+    Then:
+     - Validate the result with and without the verbose parameter.
+    """
+    path = 'test_data/bomb.jpg'
+    res_verbose = extract_text(path, verbose=True)
+    # Some of the verbose data.
+    assert "tesseract" in res_verbose
+    # Without verbose.
+    res_without_verbose = extract_text(path, verbose=False)
+    assert "tesseract" not in res_without_verbose
+
+
 def test_extract_text_command(mocker):
     """
     Given:

diff --git a/Packs/ImageOCR/Integrations/ImageOCR/README.md b/Packs/ImageOCR/Integrations/ImageOCR/README.md
@@ -77,6 +77,7 @@ Extracts text from an image.
 | --- | --- | --- |
 | entryid | A comma-separated list of Entry IDs of image files to process. | Required | 
 | langs | A CSV of language codes of the language to use for OCR. Overrides the default configured language list. | Optional | 
+| verbose | Turn on verbose flag to display tesseract and other used libraries versions. | Optional | 
 
 #### Context Output
 

diff --git a/Packs/ImageOCR/ReleaseNotes/1_1_35.md b/Packs/ImageOCR/ReleaseNotes/1_1_35.md
@@ -0,0 +1,6 @@
+
+#### Integrations
+
+##### Image OCR
+
+- Added the *verbose* argument to the ***image-ocr-extract-text*** command.
diff --git a/Packs/ImageOCR/pack_metadata.json b/Packs/ImageOCR/pack_metadata.json
@@ -2,7 +2,7 @@
     "name": "Image OCR",
     "description": "Extracts text from images.",
     "support": "xsoar",
-    "currentVersion": "1.1.34",
+    "currentVersion": "1.1.35",
     "author": "Cortex XSOAR",
     "url": "https://www.paloaltonetworks.com/cortex",
     "email": "",