From 140f32a43c987b6f000097c230eceb1b1f657b5b Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Mon, 18 Dec 2023 14:23:45 +1300 Subject: [PATCH] refactor: PEP-8 compliance (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: Add missing parameter docs * fix: Remove docs for non-existent parameter * docs: Fix reference to renamed parameter * refactor: Fix PEP-8 E713 "test for membership should be ‘not in’" . * refactor: Use lowercase function names * refactor: Avoid shadowing built-in name * refactor: Import from the same level consistently --- scripts/aws/aws_helper.py | 16 +++---- scripts/files/files_helper.py | 4 +- scripts/files/fs.py | 9 +++- scripts/files/fs_s3.py | 22 +++++----- scripts/files/tests/file_helper_test.py | 6 +-- scripts/files/tests/file_tiff_test.py | 10 ++--- scripts/files/tests/fs_s3_test.py | 55 ++++++++++++------------- scripts/gdal/gdal_bands.py | 1 + scripts/gdal/gdal_preset.py | 1 - scripts/gdal/tests/gdal_bands_test.py | 2 +- scripts/standardising.py | 2 +- scripts/thumbnails.py | 4 +- 12 files changed, 68 insertions(+), 64 deletions(-) diff --git a/scripts/aws/aws_helper.py b/scripts/aws/aws_helper.py index 656b57b3f..92a360ef1 100644 --- a/scripts/aws/aws_helper.py +++ b/scripts/aws/aws_helper.py @@ -4,9 +4,9 @@ from typing import Any, Dict, List, NamedTuple, Optional from urllib.parse import urlparse -import boto3 -import botocore +from boto3 import Session from botocore.credentials import AssumeRoleCredentialFetcher, DeferredRefreshableCredentials, ReadOnlyCredentials +from botocore.session import Session as BotocoreSession from linz_logger import get_log from scripts.aws.aws_credential_source import CredentialSource @@ -14,8 +14,8 @@ S3Path = NamedTuple("S3Path", [("bucket", str), ("key", str)]) aws_profile = environ.get("AWS_PROFILE") -session = boto3.Session(profile_name=aws_profile) -sessions: Dict[str, boto3.Session] = {} +session = Session(profile_name=aws_profile) +sessions: Dict[str, Session] = {} bucket_roles: List[CredentialSource] = [] @@ -40,14 +40,14 @@ def _init_roles() -> None: get_log().debug("bucket_config_loaded", config=bucket_config_path, prefix_count=len(bucket_roles)) -def _get_client_creator(local_session: boto3.Session) -> Any: +def _get_client_creator(local_session: Session) -> Any: def client_creator(service_name: str, **kwargs: Any) -> Any: return local_session.client(service_name, **kwargs) return client_creator -def get_session(prefix: str) -> boto3.Session: +def get_session(prefix: str) -> Session: """Get an AWS session to deal with an object on `s3`. Args: @@ -78,14 +78,14 @@ def get_session(prefix: str) -> boto3.Session: role_arn=cfg.roleArn, extra_args=extra_args, ) - botocore_session = botocore.session.Session() + botocore_session = BotocoreSession() # pylint:disable=protected-access botocore_session._credentials = DeferredRefreshableCredentials( method="assume-role", refresh_using=fetcher.fetch_credentials ) - current_session = boto3.Session(botocore_session=botocore_session) + current_session = Session(botocore_session=botocore_session) sessions[cfg.roleArn] = current_session get_log().info("role_assume", prefix=prefix, bucket=cfg.bucket, role_arn=cfg.roleArn) diff --git a/scripts/files/files_helper.py b/scripts/files/files_helper.py index f4570870d..90f3cbdd8 100644 --- a/scripts/files/files_helper.py +++ b/scripts/files/files_helper.py @@ -51,7 +51,7 @@ def is_tiff(path: str) -> bool: return path.lower().endswith((".tiff", ".tif")) -def is_GTiff(path: str, gdalinfo_data: Optional[GdalInfo] = None) -> bool: +def is_geotiff(path: str, gdalinfo_data: Optional[GdalInfo] = None) -> bool: """Verifies if a file is a GTiff based on the presence of the `coordinateSystem`. @@ -64,7 +64,7 @@ def is_GTiff(path: str, gdalinfo_data: Optional[GdalInfo] = None) -> bool: """ if not gdalinfo_data: gdalinfo_data = gdal_info(path) - if not "coordinateSystem" in gdalinfo_data: + if "coordinateSystem" not in gdalinfo_data: return False if gdalinfo_data["driverShortName"] == "GTiff": return True diff --git a/scripts/files/fs.py b/scripts/files/fs.py index c1d3c8ca0..2e6455ab7 100644 --- a/scripts/files/fs.py +++ b/scripts/files/fs.py @@ -58,6 +58,7 @@ def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> Args: inputs: list of files to read target: target folder to write to + concurrency: max thread pool workers Returns: list of written file paths @@ -65,7 +66,8 @@ def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> written_tiffs: List[str] = [] with ThreadPoolExecutor(max_workers=concurrency) as executor: futuress = { - executor.submit(write, os.path.join(target, f"{os.path.basename(input)}"), read(input)): input for input in inputs + executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_)): input_ + for input_ in inputs } for future in as_completed(futuress): if future.exception(): @@ -86,6 +88,7 @@ def find_sidecars(inputs: List[str], extensions: List[str], concurrency: Optiona Args: inputs: list of input files to search for extensions extensions: the sidecar file extensions + concurrency: max thread pool workers Returns: list of existing sidecar files @@ -100,7 +103,9 @@ def _validate_path(path: str) -> Optional[str]: sidecars: List[str] = [] with ThreadPoolExecutor(max_workers=concurrency) as executor: for extension in extensions: - futuress = {executor.submit(_validate_path, f"{os.path.splitext(input)[0]}{extension}"): input for input in inputs} + futuress = { + executor.submit(_validate_path, f"{os.path.splitext(input_)[0]}{extension}"): input_ for input_ in inputs + } for future in as_completed(futuress): if future.exception(): get_log().warn("Find sidecar failed", error=future.exception()) diff --git a/scripts/files/fs_s3.py b/scripts/files/fs_s3.py index af7e451ec..b72ee6009 100644 --- a/scripts/files/fs_s3.py +++ b/scripts/files/fs_s3.py @@ -2,8 +2,8 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, Generator, List, Optional, Union -import boto3 -import botocore +from boto3 import client, resource +from botocore.exceptions import ClientError from linz_logger import get_log from scripts.aws.aws_helper import get_session, parse_path @@ -25,7 +25,7 @@ def write(destination: str, source: bytes, content_type: Optional[str] = None) - raise Exception("The 'source' is None.") s3_path = parse_path(destination) key = s3_path.key - s3 = boto3.resource("s3") + s3 = resource("s3") try: s3_object = s3.Object(s3_path.bucket, key) @@ -34,7 +34,7 @@ def write(destination: str, source: bytes, content_type: Optional[str] = None) - else: s3_object.put(Body=source) get_log().debug("write_s3_success", path=destination, duration=time_in_ms() - start_time) - except botocore.exceptions.ClientError as ce: + except ClientError as ce: get_log().error("write_s3_error", path=destination, error=f"Unable to write the file: {ce}") raise ce @@ -55,7 +55,7 @@ def read(path: str, needs_credentials: bool = False) -> bytes: start_time = time_in_ms() s3_path = parse_path(path) key = s3_path.key - s3 = boto3.resource("s3") + s3 = resource("s3") try: if needs_credentials: @@ -95,7 +95,7 @@ def exists(path: str, needs_credentials: bool = False) -> bool: True if the S3 Object exists """ s3_path, key = parse_path(path) - s3 = boto3.resource("s3") + s3 = resource("s3") try: if needs_credentials: @@ -168,7 +168,7 @@ def prefix_from_path(path: str) -> str: return path.replace(f"s3://{bucket_name}/", "") -def list_json_in_uri(uri: str, s3_client: Optional[boto3.client]) -> List[str]: +def list_json_in_uri(uri: str, s3_client: Optional[client]) -> List[str]: """Get the `JSON` files from a s3 path Args: @@ -179,7 +179,7 @@ def list_json_in_uri(uri: str, s3_client: Optional[boto3.client]) -> List[str]: a list of JSON files """ if not s3_client: - s3_client = boto3.client("s3") + s3_client = client("s3") files = [] paginator = s3_client.get_paginator("list_objects_v2") response_iterator = paginator.paginate(Bucket=bucket_name_from_path(uri), Prefix=prefix_from_path(uri)) @@ -195,7 +195,7 @@ def list_json_in_uri(uri: str, s3_client: Optional[boto3.client]) -> List[str]: return files -def _get_object(bucket: str, file_name: str, s3_client: boto3.client) -> Any: +def _get_object(bucket: str, file_name: str, s3_client: client) -> Any: """Get the object from `s3` Args: @@ -211,7 +211,7 @@ def _get_object(bucket: str, file_name: str, s3_client: boto3.client) -> Any: def get_object_parallel_multithreading( - bucket: str, files_to_read: List[str], s3_client: Optional[boto3.client], concurrency: int + bucket: str, files_to_read: List[str], s3_client: Optional[client], concurrency: int ) -> Generator[Any, Union[Any, BaseException], None]: """Get s3 objects in parallel @@ -225,7 +225,7 @@ def get_object_parallel_multithreading( the object when got """ if not s3_client: - s3_client = boto3.client("s3") + s3_client = client("s3") with ThreadPoolExecutor(max_workers=concurrency) as executor: future_to_key = {executor.submit(_get_object, bucket, key, s3_client): key for key in files_to_read} diff --git a/scripts/files/tests/file_helper_test.py b/scripts/files/tests/file_helper_test.py index 043b5fae1..4662fe851 100644 --- a/scripts/files/tests/file_helper_test.py +++ b/scripts/files/tests/file_helper_test.py @@ -1,4 +1,4 @@ -from scripts.files.files_helper import is_GTiff, is_tiff +from scripts.files.files_helper import is_geotiff, is_tiff from scripts.gdal.tests.gdalinfo import fake_gdal_info @@ -21,5 +21,5 @@ def test_is_geotiff() -> None: gdalinfo_not_geotiff["driverShortName"] = "GTiff" gdalinfo_geotiff["coordinateSystem"] = {"wkt": "PROJCRS['NZGD2000 / New Zealand Transverse Mercator 2000']"} - assert is_GTiff("file.tiff", gdalinfo_geotiff) is True - assert is_GTiff("file.tiff", gdalinfo_not_geotiff) is False + assert is_geotiff("file.tiff", gdalinfo_geotiff) is True + assert is_geotiff("file.tiff", gdalinfo_not_geotiff) is False diff --git a/scripts/files/tests/file_tiff_test.py b/scripts/files/tests/file_tiff_test.py index e07730423..c93e03a18 100644 --- a/scripts/files/tests/file_tiff_test.py +++ b/scripts/files/tests/file_tiff_test.py @@ -66,7 +66,7 @@ def test_check_band_count_invalid_4() -> None: assert file_tiff.get_errors() -def test_check_band_count_valid_1_DEM() -> None: +def test_check_band_count_valid_1_dem() -> None: """ tests check_band_count when the input layer has a valid band count which is 1 bands and a DEM preset @@ -80,7 +80,7 @@ def test_check_band_count_valid_1_DEM() -> None: assert not file_tiff.get_errors() -def test_check_band_count_invalid_alpha_DEM() -> None: +def test_check_band_count_invalid_alpha_dem() -> None: """ tests check_band_count when the input layer has a valid band count which is 2 bands where the second band is Alpha and DEM preset @@ -95,7 +95,7 @@ def test_check_band_count_invalid_alpha_DEM() -> None: assert file_tiff.get_errors() -def test_check_band_count_invalid_3_DEM() -> None: +def test_check_band_count_invalid_3_dem() -> None: """ tests check_band_count when the input layer has an invalid band count which is 3 bands where the preset is DEM. @@ -157,7 +157,7 @@ def test_check_color_interpretation_invalid() -> None: assert file_tiff.get_errors() -def test_check_color_interpretation_valid_DEM() -> None: +def test_check_color_interpretation_valid_dem() -> None: """ tests check_color_interpretation with the correct color interpretation """ @@ -170,7 +170,7 @@ def test_check_color_interpretation_valid_DEM() -> None: assert not file_tiff.get_errors() -def test_check_color_interpretation_invalid_DEM() -> None: +def test_check_color_interpretation_invalid_dem() -> None: """ tests check_color_interpretation with the incorrect color interpretation """ diff --git a/scripts/files/tests/fs_s3_test.py b/scripts/files/tests/fs_s3_test.py index 17b77825b..60aac0bf8 100644 --- a/scripts/files/tests/fs_s3_test.py +++ b/scripts/files/tests/fs_s3_test.py @@ -1,11 +1,10 @@ import json -import boto3 -import botocore -import pytest +from boto3 import client, resource +from botocore.exceptions import ClientError from moto import mock_s3 from moto.s3.responses import DEFAULT_REGION_NAME -from pytest import CaptureFixture +from pytest import CaptureFixture, raises from scripts.files.files_helper import ContentType from scripts.files.fs_s3 import exists, read, write @@ -13,35 +12,35 @@ @mock_s3 # type: ignore def test_write() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") write("s3://testbucket/test.file", b"test content") - resp = client.get_object(Bucket="testbucket", Key="test.file") + resp = boto3_client.get_object(Bucket="testbucket", Key="test.file") assert resp["Body"].read() == b"test content" assert resp["ContentType"] == "binary/octet-stream" @mock_s3 # type: ignore def test_write_content_type() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") write("s3://testbucket/test.tiff", b"test content", ContentType.GEOTIFF.value) - resp = client.get_object(Bucket="testbucket", Key="test.tiff") + resp = boto3_client.get_object(Bucket="testbucket", Key="test.tiff") assert resp["Body"].read() == b"test content" assert resp["ContentType"] == ContentType.GEOTIFF.value @mock_s3 # type: ignore def test_read() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") content = read("s3://testbucket/test.file") @@ -50,7 +49,7 @@ def test_read() -> None: @mock_s3 # type: ignore def test_read_bucket_not_found(capsys: CaptureFixture[str]) -> None: - with pytest.raises(botocore.exceptions.ClientError): + with raises(ClientError): read("s3://testbucket/test.file") # python-linz-logger uses structlog which doesn't use stdlib so can't capture the logs with `caplog` @@ -60,10 +59,10 @@ def test_read_bucket_not_found(capsys: CaptureFixture[str]) -> None: @mock_s3 # type: ignore def test_read_key_not_found(capsys: CaptureFixture[str]) -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - with pytest.raises(botocore.exceptions.ClientError): + with raises(ClientError): read("s3://testbucket/test.file") logs = json.loads(capsys.readouterr().out) @@ -72,10 +71,10 @@ def test_read_key_not_found(capsys: CaptureFixture[str]) -> None: @mock_s3 # type: ignore def test_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") file_exists = exists("s3://testbucket/test.file") @@ -84,10 +83,10 @@ def test_exists() -> None: @mock_s3 # type: ignore def test_directory_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content") directory_exists = exists("s3://testbucket/hello/") @@ -105,10 +104,10 @@ def test_exists_bucket_not_exists(capsys: CaptureFixture[str]) -> None: @mock_s3 # type: ignore def test_exists_object_not_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") file_exists = exists("s3://testbucket/test.file") @@ -117,10 +116,10 @@ def test_exists_object_not_exists() -> None: @mock_s3 # type: ignore def test_exists_object_starting_with_not_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") file_exists = exists("s3://testbucket/hello/another.fi") diff --git a/scripts/gdal/gdal_bands.py b/scripts/gdal/gdal_bands.py index def81b51d..e2f5f3cd0 100644 --- a/scripts/gdal/gdal_bands.py +++ b/scripts/gdal/gdal_bands.py @@ -28,6 +28,7 @@ def get_gdal_band_offset(file: str, info: Optional[GdalInfo] = None, preset: Opt Args: file: file to check info: optional precomputed gdalinfo + preset: "dem_lerc" preset used to differentiate single band elevation tiffs from single band historical imagery Returns: list of band mappings eg "-b 1 -b 1 -b 1" diff --git a/scripts/gdal/gdal_preset.py b/scripts/gdal/gdal_preset.py index d42a7bb0f..f0e5feca0 100644 --- a/scripts/gdal/gdal_preset.py +++ b/scripts/gdal/gdal_preset.py @@ -90,7 +90,6 @@ def get_gdal_command(preset: str, epsg: str) -> List[str]: Args: preset: gdal preset to use. Defined in `gdal.gdal_preset.py` epsg: the EPSG code of the file - convert_from: Defaults to None. Returns: a list of arguments to run `gdal_translate` diff --git a/scripts/gdal/tests/gdal_bands_test.py b/scripts/gdal/tests/gdal_bands_test.py index 187db841f..df290c1bf 100644 --- a/scripts/gdal/tests/gdal_bands_test.py +++ b/scripts/gdal/tests/gdal_bands_test.py @@ -21,7 +21,7 @@ def test_gdal_grey_bands_detection() -> None: assert " ".join(bands) == "-b 2 -b 2 -b 2 -b 1" -def test_gdal_grey_bands_DEM_detection() -> None: +def test_gdal_grey_bands_dem_detection() -> None: gdalinfo = fake_gdal_info() add_band(gdalinfo, color_interpretation="Gray") diff --git a/scripts/standardising.py b/scripts/standardising.py index c20182bb3..2f52d2e6c 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -104,7 +104,7 @@ def standardising( """Apply transformations using GDAL to the source file. Args: - file: path to the file to standardise + files: paths to the files to standardise preset: gdal preset to use. See `gdal.gdal_preset.py` source_epsg: EPSG code of the source file target_epsg: EPSG code of reprojection diff --git a/scripts/thumbnails.py b/scripts/thumbnails.py index 93109d0c8..152fb3123 100644 --- a/scripts/thumbnails.py +++ b/scripts/thumbnails.py @@ -7,7 +7,7 @@ from linz_logger import get_log -from scripts.files.files_helper import ContentType, get_file_name_from_path, is_GTiff, is_tiff +from scripts.files.files_helper import ContentType, get_file_name_from_path, is_geotiff, is_tiff from scripts.files.fs import exists, read, write from scripts.gdal import gdalinfo from scripts.gdal.gdal_helper import run_gdal @@ -47,7 +47,7 @@ def thumbnails(path: str, target: str) -> str | None: # For both GeoTIFF and TIFF (not georeferenced) this is done in 2 steps. # Why? because it hasn't been found another way to get the same visual aspect. gdalinfo_data = gdalinfo.gdal_info(source_tiff) - if is_GTiff(source_tiff, gdalinfo_data): + if is_geotiff(source_tiff, gdalinfo_data): get_log().info("thumbnail_generate_geotiff", path=target_thumbnail) run_gdal(get_thumbnail_command("jpeg", source_tiff, transitional_jpg, "50%", "50%", None, gdalinfo_data)) run_gdal(get_thumbnail_command("jpeg", transitional_jpg, tmp_thumbnail, "30%", "30%", None, gdalinfo_data))