diff --git a/requirements/base.txt b/requirements/base.txt index d1a2bf825c..edebeca004 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -27,7 +27,7 @@ click==8.1.7 # via # nltk # python-oxmsg -cryptography==43.0.1 +cryptography==43.0.3 # via unstructured-client dataclasses-json==0.6.7 # via -r ./base.in diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in index 611a8bc052..494f6dc4ff 100644 --- a/requirements/extra-pdf-image.in +++ b/requirements/extra-pdf-image.in @@ -11,5 +11,5 @@ google-cloud-vision effdet # Do not move to constraints.in, otherwise unstructured-inference will not be upgraded # when unstructured library is. -unstructured-inference==0.7.41 +unstructured-inference==0.8.0 unstructured.pytesseract>=0.3.12 diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index dcae1eb627..56deab0d81 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -25,7 +25,7 @@ coloredlogs==15.0.1 # via onnxruntime contourpy==1.3.0 # via matplotlib -cryptography==43.0.1 +cryptography==43.0.3 # via # -c ./base.txt # pdfminer-six @@ -93,7 +93,7 @@ lxml==5.3.0 # via # -c ./base.txt # pikepdf -markupsafe==3.0.1 +markupsafe==3.0.2 # via jinja2 matplotlib==3.9.2 # via @@ -284,7 +284,7 @@ typing-extensions==4.12.2 # torch tzdata==2024.2 # via pandas -unstructured-inference==0.7.41 +unstructured-inference==0.8.0 # via -r ./extra-pdf-image.in unstructured-pytesseract==0.3.13 # via -r ./extra-pdf-image.in diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index 6bdca3066b..08430387f0 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -43,7 +43,7 @@ langdetect==1.0.9 # via # -c ./base.txt # -r ./huggingface.in -markupsafe==3.0.1 +markupsafe==3.0.2 # via jinja2 mpmath==1.3.0 # via sympy diff --git a/requirements/test.txt b/requirements/test.txt index 0ff54c096f..c6c2f938b6 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -241,7 +241,7 @@ wrapt==1.16.0 # vcrpy xmljson==0.2.1 # via label-studio-sdk -yarl==1.15.4 +yarl==1.15.5 # via vcrpy # The following packages are considered to be unsafe in a requirements file: diff --git a/unstructured/partition/pdf_image/inference_utils.py b/unstructured/partition/pdf_image/inference_utils.py index 81551c685d..7218eb93b9 100644 --- a/unstructured/partition/pdf_image/inference_utils.py +++ b/unstructured/partition/pdf_image/inference_utils.py @@ -98,7 +98,7 @@ def merge_text_regions(regions: TextRegions) -> TextRegion: max_x2 = regions.x2.max().astype(float) max_y2 = regions.y2.max().astype(float) - merged_text = " ".join(regions.texts) + merged_text = " ".join([text for text in regions.texts if text]) source = regions.source return TextRegion.from_coords(min_x1, min_y1, max_x2, max_y2, merged_text, source)