diff --git a/Makefile b/Makefile index 19051c435b..51ede92052 100644 --- a/Makefile +++ b/Makefile @@ -324,7 +324,7 @@ check: check-src check-tests check-version ## check-src: runs linters (source only, no tests) .PHONY: check-src check-src: - ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --ignore PT011,PT012,SIM117 + ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --ignore COM812,PT011,PT012,SIM117 black --line-length 100 ${PACKAGE_NAME} --check flake8 ${PACKAGE_NAME} mypy ${PACKAGE_NAME} --ignore-missing-imports --check-untyped-defs diff --git a/scripts/collect_env.py b/scripts/collect_env.py index f0649147b1..6cb48e0f25 100644 --- a/scripts/collect_env.py +++ b/scripts/collect_env.py @@ -40,7 +40,7 @@ def get_os_version(): return platform.platform() -def is_python_package_installed(package_name): +def is_python_package_installed(package_name: str): """ Check if a Python package is installed @@ -57,14 +57,10 @@ def is_python_package_installed(package_name): check=True, ) - for line in result.stdout.splitlines(): - if line.lower().startswith(package_name.lower()): - return True - - return False + return any(line.lower().startswith(package_name.lower()) for line in result.stdout.splitlines()) -def is_brew_package_installed(package_name): +def is_brew_package_installed(package_name: str): """ Check if a Homebrew package is installed @@ -95,11 +91,7 @@ def is_brew_package_installed(package_name): check=True, ) - for line in result.stdout.splitlines(): - if line.lower().startswith(package_name.lower()): - return True - - return False + return any(line.lower().startswith(package_name.lower()) for line in result.stdout.splitlines()) def get_python_package_version(package_name): @@ -221,8 +213,7 @@ def main(): ): print( "PaddleOCR version: ", - get_python_package_version("paddlepaddle") - or get_python_package_version("paddleocr"), + get_python_package_version("paddlepaddle") or get_python_package_version("paddleocr"), ) else: print("PaddleOCR is not installed") diff --git a/scripts/performance/run_partition.py b/scripts/performance/run_partition.py index 4da380f02e..3710f02c64 100644 --- a/scripts/performance/run_partition.py +++ b/scripts/performance/run_partition.py @@ -13,11 +13,7 @@ file_path = sys.argv[1] strategy = sys.argv[2] - model_name = None - if len(sys.argv) > 3: - model_name = sys.argv[3] - else: - model_name = os.environ.get("PARTITION_MODEL_NAME") + model_name = sys.argv[3] if len(sys.argv) > 3 else os.environ.get("PARTITION_MODEL_NAME") result = partition(file_path, strategy=strategy, model_name=model_name) # access element in the return value to make sure we got something back, otherwise error result[1] diff --git a/unstructured/documents/html.py b/unstructured/documents/html.py index ff32ebc5a1..75299fe898 100644 --- a/unstructured/documents/html.py +++ b/unstructured/documents/html.py @@ -444,11 +444,8 @@ def _construct_text(tag_elem: etree.Element, include_tail_text: bool = True) -> return text.strip() -def _has_break_tags(tag_elem: etree.Element) -> bool: - for descendant in tag_elem.iterdescendants(): - if descendant.tag in TEXTBREAK_TAGS: - return True - return False +def _has_break_tags(tag_elem: etree._Element) -> bool: # pyright: ignore[reportPrivateUsage] + return any(descendant.tag in TEXTBREAK_TAGS for descendant in tag_elem.iterdescendants()) def _unfurl_break_tags(tag_elem: etree.Element) -> List[etree.Element]: diff --git a/unstructured/partition/json.py b/unstructured/partition/json.py index f4771da7d5..33d7d7a469 100644 --- a/unstructured/partition/json.py +++ b/unstructured/partition/json.py @@ -54,10 +54,7 @@ def partition_json( last_modification_date = get_last_modified_date_from_file(file) file_content = file.read() - if isinstance(file_content, str): - file_text = file_content - else: - file_text = file_content.decode() + file_text = file_content if isinstance(file_content, str) else file_content.decode() file.seek(0) elif text is not None: diff --git a/unstructured/staging/prodigy.py b/unstructured/staging/prodigy.py index e4d5a99ca9..ba822ce423 100644 --- a/unstructured/staging/prodigy.py +++ b/unstructured/staging/prodigy.py @@ -28,9 +28,7 @@ def _validate_prodigy_metadata( ) if isinstance(id_error_index, int): raise ValueError( - 'The key "id" is not allowed with metadata parameter at index: {index}'.format( - index=id_error_index, - ), + f'The key "id" is not allowed with metadata parameter at index: {id_error_index}' ) validated_metadata = metadata else: