Skip to content

Commit

Permalink
Merge branch 'master' into dependabot/pip/mypy-1.10.0
Browse files Browse the repository at this point in the history
  • Loading branch information
l0b0 authored May 16, 2024
2 parents 1562ea9 + 2828f14 commit fad5a20
Show file tree
Hide file tree
Showing 19 changed files with 235 additions and 73 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,23 @@ jobs:
- name: End to end test - Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true
cmp --silent "${{ runner.temp }}/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff
- name: End to end test - Elevation
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30m
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30m --create-footprints=true
cmp --silent "${{ runner.temp }}/BK39_10000_0102.tiff" ./scripts/tests/data/output/BK39_10000_0102.tiff
cmp --silent "${{ runner.temp }}/BK39_10000_0101.tiff" ./scripts/tests/data/output/BK39_10000_0101.tiff
- name: End to end test - Historical Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60m
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60m --create-footprints=true
cmp --silent "${{ runner.temp }}/BQ31_5000_0608.tiff" ./scripts/tests/data/output/BQ31_5000_0608.tiff
- name: End to end test - Cutline (Aerial Imagery)
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10m
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10m --create-footprints=true
cmp --silent "${{ runner.temp }}/cutline/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829_cut.tiff
- name: End to end test - Thumbnails (Topo50/Topo250)
Expand All @@ -59,7 +59,7 @@ jobs:
- name: End to end test - Restandardise Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true
cmp --silent "${{ runner.temp }}/restandardise/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff
- name: End to end test - Translate Ascii Files (Elevation)
Expand All @@ -69,7 +69,7 @@ jobs:
- name: End to end test - Remove empty files
run: |
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60m
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60m --create-footprints=true
empty_target_directory="$(find "${{ runner.temp }}/tmp-empty" -maxdepth 0 -type d -empty)"
[[ -n "$empty_target_directory" ]]
Expand Down
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Changelog

## [4.6.0](https://github.com/linz/topo-imagery/compare/v4.5.0...v4.6.0) (2024-05-07)


### Features

* add checksum to item links in collections TDE-1138 ([#953](https://github.com/linz/topo-imagery/issues/953)) ([afea8f0](https://github.com/linz/topo-imagery/commit/afea8f0f9722d0980657385e420f040e93085eef))
* add option to create footprints ([#959](https://github.com/linz/topo-imagery/issues/959)) ([ea5c98b](https://github.com/linz/topo-imagery/commit/ea5c98baa584c5445dcbedf159d409dfeeddf7ce))


### Bug Fixes

* Make compatible with latest moto ([#949](https://github.com/linz/topo-imagery/issues/949)) ([5902df0](https://github.com/linz/topo-imagery/commit/5902df0fec6dd15bb3a89ec616a2c27136a6aec5))
* Use correct types for S3 client ([#954](https://github.com/linz/topo-imagery/issues/954)) ([2c96c13](https://github.com/linz/topo-imagery/commit/2c96c1382d521d641e2b167e4096759af9b13dce))

## [4.5.0](https://github.com/linz/topo-imagery/compare/v4.4.0...v4.5.0) (2024-03-24)


Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Run `docker run topo-imagery python standardise_validate.py --help` to get the l
- Example of local execution. This example uses the test data available on this repo and create the output will be created in a `~/tmp/` on the local machine (volume share with `Docker`):

```bash
docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10m
docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10m --create-footprints=true
```

To use an AWS test dataset (input located in an AWS S3 bucket), log into the AWS account and add the following arguments to the `docker run` command:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ ignore_missing_imports = true

[tool.poetry]
name = "topo-imagery"
version = "4.5.0"
version = "4.6.0"
description = "A collection of scripts for processing imagery"
authors = [
"Blayne Chard <[email protected]>",
Expand Down
16 changes: 15 additions & 1 deletion scripts/files/fs.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import os
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
from typing import List, Optional
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional

from boto3 import resource
from linz_logger import get_log

from scripts.aws.aws_helper import is_s3
from scripts.files import fs_local, fs_s3

if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client
else:
S3Client = dict


def write(destination: str, source: bytes, content_type: Optional[str] = None) -> str:
"""Write a file from its source to a destination path.
Expand Down Expand Up @@ -79,6 +86,13 @@ def exists(path: str) -> bool:
return fs_local.exists(path)


def modified(path: str, s3_client: Optional[S3Client] = None) -> datetime:
"""Get modified datetime for S3 URL or local path"""
if is_s3(path):
return fs_s3.modified(fs_s3.bucket_name_from_path(path), fs_s3.prefix_from_path(path), s3_client)
return fs_local.modified(Path(path))


def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> List[str]:
"""Writes list of files to target destination using multithreading.
Expand Down
8 changes: 8 additions & 0 deletions scripts/files/fs_local.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
from datetime import datetime, timezone
from pathlib import Path


def write(destination: str, source: bytes) -> None:
Expand Down Expand Up @@ -36,3 +38,9 @@ def exists(path: str) -> bool:
True if the path exists
"""
return os.path.exists(path)


def modified(path: Path) -> datetime:
"""Get path modified datetime as UTC"""
modified_timestamp = os.path.getmtime(path)
return datetime.fromtimestamp(modified_timestamp, tz=timezone.utc)
6 changes: 6 additions & 0 deletions scripts/files/fs_s3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import TYPE_CHECKING, Any, Generator, List, Optional, Union

from boto3 import client, resource
Expand Down Expand Up @@ -237,3 +238,8 @@ def get_object_parallel_multithreading(
yield key, future.result()
else:
yield key, exception


def modified(bucket_name: str, key: str, s3_client: Optional[S3Client]) -> datetime:
s3_client = s3_client or client("s3")
return _get_object(bucket_name, key, s3_client)["LastModified"]
17 changes: 17 additions & 0 deletions scripts/files/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from shutil import rmtree
from tempfile import mkdtemp
from typing import Generator

import pytest


@pytest.fixture(name="setup", autouse=True)
def fixture_setup() -> Generator[str, None, None]:
"""
This function creates a temporary directory and deletes it after each test.
See following link for details:
https://docs.pytest.org/en/stable/fixture.html#yield-fixtures-recommended
"""
target = mkdtemp()
yield target
rmtree(target)
27 changes: 11 additions & 16 deletions scripts/files/tests/fs_local_test.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,10 @@
import os
from shutil import rmtree
from tempfile import mkdtemp
from typing import Generator
from pathlib import Path

import pytest

from scripts.files.fs_local import exists, read, write


@pytest.fixture(name="setup", autouse=True)
def fixture_setup() -> Generator[str, None, None]:
"""
This function creates a temporary directory and deletes it after each test.
See following link for details:
https://docs.pytest.org/en/stable/fixture.html#yield-fixtures-recommended
"""
target = mkdtemp()
yield target
rmtree(target)
from scripts.files.fs_local import exists, modified, read, write
from scripts.tests.datetimes_test import any_epoch_datetime


@pytest.mark.dependency(name="write")
Expand Down Expand Up @@ -58,3 +45,11 @@ def test_exists(setup: str) -> None:
def test_exists_file_not_found() -> None:
found = exists("/tmp/test.file")
assert found is False


def test_should_get_modified_datetime(setup: str) -> None:
path = Path(os.path.join(setup, "modified.file"))
path.touch()
modified_datetime = any_epoch_datetime()
os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
assert modified(path) == modified_datetime
21 changes: 20 additions & 1 deletion scripts/files/tests/fs_s3_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
from boto3 import client, resource
from botocore.exceptions import ClientError
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
from moto.s3.models import s3_backends
from moto.s3.responses import DEFAULT_REGION_NAME
from moto.wafv2.models import GLOBAL_REGION
from mypy_boto3_s3 import S3Client
from pytest import CaptureFixture, raises
from pytest_subtests import SubTests

from scripts.files.files_helper import ContentType
from scripts.files.fs_s3 import exists, list_files_in_uri, read, write
from scripts.files.fs_s3 import exists, list_files_in_uri, modified, read, write
from scripts.tests.datetimes_test import any_epoch_datetime


@mock_aws
Expand Down Expand Up @@ -156,3 +161,17 @@ def test_list_files_in_uri(subtests: SubTests) -> None:

with subtests.test():
assert "data/image.tiff" not in files


@mock_aws
def test_should_get_modified_datetime() -> None:
bucket_name = "any-bucket-name"
key = "any-key"
modified_datetime = any_epoch_datetime()

s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body")
s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime

assert modified(bucket_name, key, s3_client) == modified_datetime
32 changes: 30 additions & 2 deletions scripts/files/tests/fs_test.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import os
from pathlib import Path
from shutil import rmtree
from tempfile import mkdtemp

from boto3 import resource
from boto3 import client, resource
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
from moto.s3.models import s3_backends
from moto.s3.responses import DEFAULT_REGION_NAME
from moto.wafv2.models import GLOBAL_REGION
from mypy_boto3_s3 import S3Client
from pytest import CaptureFixture, raises
from pytest_subtests import SubTests

from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars
from scripts.files.fs import NoSuchFileError, modified, read, write, write_all, write_sidecars
from scripts.tests.datetimes_test import any_epoch_datetime


def test_read_key_not_found_local() -> None:
Expand Down Expand Up @@ -81,3 +87,25 @@ def test_write_sidecars_one_found(capsys: CaptureFixture[str], subtests: SubTest
assert "wrote_sidecar_file" in logs

rmtree(target)


@mock_aws
def test_should_get_s3_object_modified_datetime() -> None:
bucket_name = "any-bucket-name"
key = "any-key"
modified_datetime = any_epoch_datetime()

s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body")
s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime

assert modified(f"s3://{bucket_name}/{key}", s3_client) == modified_datetime


def test_should_get_local_file_modified_datetime(setup: str) -> None:
path = os.path.join(setup, "modified.file")
Path(path).touch()
modified_datetime = any_epoch_datetime()
os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
assert modified(path) == modified_datetime
17 changes: 17 additions & 0 deletions scripts/json_codec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json
from typing import Any, Dict


def dict_to_json_bytes(input_dict: Dict[str, Any]) -> bytes:
"""
Try to convert a `dict` into UTF-8 encoded `bytes` representing a JSON dictionary
Examples:
>>> dict_to_json_bytes({})
b'{}'
>>> dict_to_json_bytes({"ā": "😀"}) # Unicode code points U+0101 and U+1F600
b'{"\xc4\x81": "\xf0\x9f\x98\x80"}'
>>> json.loads(dict_to_json_bytes({"ā": "😀"}))
{'ā': '😀'}
"""
return json.dumps(input_dict, ensure_ascii=False).encode("utf-8")
17 changes: 9 additions & 8 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
from typing import Any, Dict, List, Optional

Expand All @@ -8,6 +7,7 @@
from scripts.datetimes import format_rfc_3339_datetime_string, parse_rfc_3339_datetime
from scripts.files.files_helper import ContentType
from scripts.files.fs import write
from scripts.json_codec import dict_to_json_bytes
from scripts.stac.imagery.capture_area import generate_capture_area, gsd_to_float
from scripts.stac.imagery.metadata_constants import (
DATA_CATEGORIES,
Expand Down Expand Up @@ -95,7 +95,7 @@ def add_capture_area(self, polygons: List[shapely.geometry.shape], target: str,

# The GSD is measured in meters (e.g., `0.3m`)
capture_area_document = generate_capture_area(polygons, gsd_to_float(self.metadata["gsd"]))
capture_area_content: bytes = json.dumps(capture_area_document).encode("utf-8")
capture_area_content: bytes = dict_to_json_bytes(capture_area_document)
file_checksum = checksum.multihash_as_hex(capture_area_content)
capture_area = {
"href": f"./{CAPTURE_AREA_FILE_NAME}",
Expand Down Expand Up @@ -129,20 +129,21 @@ def add_item(self, item: Dict[Any, Any]) -> None:
item: STAC Item to add
"""
item_self_link = next((feat for feat in item["links"] if feat["rel"] == "self"), None)
file_checksum = checksum.multihash_as_hex(dict_to_json_bytes(item))
if item_self_link:
self.add_link(href=item_self_link["href"])
self.add_link(href=item_self_link["href"], file_checksum=file_checksum)
self.update_temporal_extent(item["properties"]["start_datetime"], item["properties"]["end_datetime"])
self.update_spatial_extent(item["bbox"])

def add_link(self, href: str, rel: str = "item", file_type: str = "application/json") -> None:
def add_link(self, href: str, file_checksum: str) -> None:
"""Add a `link` to the existing `links` list of the Collection.
Args:
href: path
rel: type of link. Defaults to "item".
file_type: type of file pointed by the link. Defaults to "application/json".
file_checksum: Optional checksum of file.
"""
self.stac["links"].append({"rel": rel, "href": href, "type": file_type})
link = {"rel": "item", "href": href, "type": "application/json", "file:checksum": file_checksum}
self.stac["links"].append(link)

def add_providers(self, providers: List[Provider]) -> None:
"""Add a list of Providers to the existing list of `providers` of the Collection.
Expand Down Expand Up @@ -234,7 +235,7 @@ def write_to(self, destination: str) -> None:
Args:
destination: path of the destination
"""
write(destination, json.dumps(self.stac, ensure_ascii=False).encode("utf-8"), content_type=ContentType.JSON.value)
write(destination, dict_to_json_bytes(self.stac), content_type=ContentType.JSON.value)

def _title(self) -> str:
"""Generates the title for imagery and elevation datasets.
Expand Down
Loading

0 comments on commit fad5a20

Please sign in to comment.