Skip to content

Commit

Permalink
remove redundant error catches and improve error message
Browse files Browse the repository at this point in the history
  • Loading branch information
Coniferish committed Oct 9, 2023
1 parent d16c6d5 commit 35e82b9
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 40 deletions.
23 changes: 0 additions & 23 deletions unstructured/partition/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@

from unstructured.chunking.title import add_chunking_strategy
from unstructured.documents.elements import Element, process_metadata
from unstructured.logger import logger
from unstructured.partition.common import exactly_one
from unstructured.partition.lang import (
convert_old_ocr_languages_to_languages,
)
from unstructured.partition.pdf import partition_pdf_or_image


Expand Down Expand Up @@ -55,25 +51,6 @@ def partition_image(
"""
exactly_one(filename=filename, file=file)

if not isinstance(languages, list):
raise TypeError(
'The language parameter must be a list of language codes as strings, ex. ["eng"]',
)

if ocr_languages is not None:
if languages != ["eng"]:
raise ValueError(
"Only one of languages and ocr_languages should be specified. "
"languages is preferred. ocr_languages is marked for deprecation.",
)

else:
languages = convert_old_ocr_languages_to_languages(ocr_languages)
logger.warning(
"The ocr_languages kwarg will be deprecated in a future version of unstructured. "
"Please use languages instead.",
)

return partition_pdf_or_image(
filename=filename,
file=file,
Expand Down
22 changes: 5 additions & 17 deletions unstructured/partition/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,22 +133,6 @@ def partition_pdf(
"""
exactly_one(filename=filename, file=file)

if ocr_languages is not None:
# check if languages was set to anything not the default value
# languages and ocr_languages were therefore both provided - raise error
if languages != ["eng"]:
raise ValueError(
"Only one of languages and ocr_languages should be specified. "
"languages is preferred. ocr_languages is marked for deprecation.",
)

else:
languages = convert_old_ocr_languages_to_languages(ocr_languages)
logger.warning(
"The ocr_languages kwarg will be deprecated in a future version of unstructured. "
"Please use languages instead.",
)

return partition_pdf_or_image(
filename=filename,
file=file,
Expand Down Expand Up @@ -214,9 +198,13 @@ def partition_pdf_or_image(
# function.

if not isinstance(languages, list):
raise TypeError("The language parameter must be a list of language codes as strings.")
raise TypeError(
'The language parameter must be a list of language codes as strings, ex. ["eng"]',
)

if ocr_languages is not None:
# check if languages was set to anything not the default value
# languages and ocr_languages were therefore both provided - raise error
if languages != ["eng"]:
raise ValueError(
"Only one of languages and ocr_languages should be specified. "
Expand Down

0 comments on commit 35e82b9

Please sign in to comment.