From 3769abcb931f0d8143a002cb47b8c4d20da42e06 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Sat, 16 Sep 2023 16:04:31 -0700 Subject: [PATCH] fix: CI lint complaints I didn't touch any of these Python files in my PR which makes me think either the linting changed recently or PRs that fail CI have been getting merged. Anyway, happy to fix them up for the greater good :) --- Makefile | 2 +- scripts/collect_env.py | 19 +++++-------------- scripts/performance/run_partition.py | 6 +----- unstructured/documents/html.py | 7 ++----- unstructured/partition/json.py | 5 +---- unstructured/staging/prodigy.py | 4 +--- 6 files changed, 11 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index 19051c435b..51ede92052 100644 --- a/Makefile +++ b/Makefile @@ -324,7 +324,7 @@ check: check-src check-tests check-version ## check-src: runs linters (source only, no tests) .PHONY: check-src check-src: - ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --ignore PT011,PT012,SIM117 + ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --ignore COM812,PT011,PT012,SIM117 black --line-length 100 ${PACKAGE_NAME} --check flake8 ${PACKAGE_NAME} mypy ${PACKAGE_NAME} --ignore-missing-imports --check-untyped-defs diff --git a/scripts/collect_env.py b/scripts/collect_env.py index f0649147b1..6cb48e0f25 100644 --- a/scripts/collect_env.py +++ b/scripts/collect_env.py @@ -40,7 +40,7 @@ def get_os_version(): return platform.platform() -def is_python_package_installed(package_name): +def is_python_package_installed(package_name: str): """ Check if a Python package is installed @@ -57,14 +57,10 @@ def is_python_package_installed(package_name): check=True, ) - for line in result.stdout.splitlines(): - if line.lower().startswith(package_name.lower()): - return True - - return False + return any(line.lower().startswith(package_name.lower()) for line in result.stdout.splitlines()) -def is_brew_package_installed(package_name): +def is_brew_package_installed(package_name: str): """ Check if a Homebrew package is installed @@ -95,11 +91,7 @@ def is_brew_package_installed(package_name): check=True, ) - for line in result.stdout.splitlines(): - if line.lower().startswith(package_name.lower()): - return True - - return False + return any(line.lower().startswith(package_name.lower()) for line in result.stdout.splitlines()) def get_python_package_version(package_name): @@ -221,8 +213,7 @@ def main(): ): print( "PaddleOCR version: ", - get_python_package_version("paddlepaddle") - or get_python_package_version("paddleocr"), + get_python_package_version("paddlepaddle") or get_python_package_version("paddleocr"), ) else: print("PaddleOCR is not installed") diff --git a/scripts/performance/run_partition.py b/scripts/performance/run_partition.py index 4da380f02e..3710f02c64 100644 --- a/scripts/performance/run_partition.py +++ b/scripts/performance/run_partition.py @@ -13,11 +13,7 @@ file_path = sys.argv[1] strategy = sys.argv[2] - model_name = None - if len(sys.argv) > 3: - model_name = sys.argv[3] - else: - model_name = os.environ.get("PARTITION_MODEL_NAME") + model_name = sys.argv[3] if len(sys.argv) > 3 else os.environ.get("PARTITION_MODEL_NAME") result = partition(file_path, strategy=strategy, model_name=model_name) # access element in the return value to make sure we got something back, otherwise error result[1] diff --git a/unstructured/documents/html.py b/unstructured/documents/html.py index ff32ebc5a1..75299fe898 100644 --- a/unstructured/documents/html.py +++ b/unstructured/documents/html.py @@ -444,11 +444,8 @@ def _construct_text(tag_elem: etree.Element, include_tail_text: bool = True) -> return text.strip() -def _has_break_tags(tag_elem: etree.Element) -> bool: - for descendant in tag_elem.iterdescendants(): - if descendant.tag in TEXTBREAK_TAGS: - return True - return False +def _has_break_tags(tag_elem: etree._Element) -> bool: # pyright: ignore[reportPrivateUsage] + return any(descendant.tag in TEXTBREAK_TAGS for descendant in tag_elem.iterdescendants()) def _unfurl_break_tags(tag_elem: etree.Element) -> List[etree.Element]: diff --git a/unstructured/partition/json.py b/unstructured/partition/json.py index f4771da7d5..33d7d7a469 100644 --- a/unstructured/partition/json.py +++ b/unstructured/partition/json.py @@ -54,10 +54,7 @@ def partition_json( last_modification_date = get_last_modified_date_from_file(file) file_content = file.read() - if isinstance(file_content, str): - file_text = file_content - else: - file_text = file_content.decode() + file_text = file_content if isinstance(file_content, str) else file_content.decode() file.seek(0) elif text is not None: diff --git a/unstructured/staging/prodigy.py b/unstructured/staging/prodigy.py index e4d5a99ca9..ba822ce423 100644 --- a/unstructured/staging/prodigy.py +++ b/unstructured/staging/prodigy.py @@ -28,9 +28,7 @@ def _validate_prodigy_metadata( ) if isinstance(id_error_index, int): raise ValueError( - 'The key "id" is not allowed with metadata parameter at index: {index}'.format( - index=id_error_index, - ), + f'The key "id" is not allowed with metadata parameter at index: {id_error_index}' ) validated_metadata = metadata else: