diff --git a/.github/workflows/format-tests.yml b/.github/workflows/format-tests.yml index 928a4fec6..765a7b6d5 100644 --- a/.github/workflows/format-tests.yml +++ b/.github/workflows/format-tests.yml @@ -31,28 +31,28 @@ jobs: - name: End to end test - Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true --current-datetime=2010-09-18T12:34:56Z cmp --silent "${{ runner.temp }}/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff - name: End to end test - Elevation run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30 --create-footprints=true + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30 --create-footprints=true --current-datetime=2010-09-18T12:34:56Z cmp --silent "${{ runner.temp }}/BK39_10000_0102.tiff" ./scripts/tests/data/output/BK39_10000_0102.tiff cmp --silent "${{ runner.temp }}/BK39_10000_0101.tiff" ./scripts/tests/data/output/BK39_10000_0101.tiff - name: End to end test - Historical Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60 --create-footprints=true + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60 --create-footprints=true --current-datetime=2010-09-18T12:34:56Z cmp --silent "${{ runner.temp }}/BQ31_5000_0608.tiff" ./scripts/tests/data/output/BQ31_5000_0608.tiff - name: End to end test - Cutline (Aerial Imagery) run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10 --create-footprints=true + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10 --create-footprints=true --current-datetime=2010-09-18T12:34:56Z cmp --silent "${{ runner.temp }}/cutline/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829_cut.tiff - name: End to end test - Footprint run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true --current-datetime=2010-09-18T12:34:56Z jq 'select(.xy_coordinate_resolution == 1E-8) // error("Wrong or missing X/Y coordinate resolution")' "${{ runner.temp }}/BG35_1000_4829_footprint.geojson" cmp --silent <(jq "del(.features[0].properties.location, .xy_coordinate_resolution)" "${{ runner.temp }}/BG35_1000_4829_footprint.geojson") <(jq "del(.features[0].properties.location, .xy_coordinate_resolution)" ./scripts/tests/data/output/BG35_1000_4829_footprint.geojson) @@ -64,7 +64,7 @@ jobs: - name: End to end test - Restandardise Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true --current-datetime=2010-09-18T12:34:56Z cmp --silent "${{ runner.temp }}/restandardise/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff - name: End to end test - Translate Ascii Files (Elevation) @@ -74,7 +74,7 @@ jobs: - name: End to end test - Remove empty files run: | - docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60 --create-footprints=true + docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60 --create-footprints=true --current-datetime=2010-09-18T12:34:56Z empty_target_directory="$(find "${{ runner.temp }}/tmp-empty" -maxdepth 0 -type d -empty)" [[ -n "$empty_target_directory" ]] diff --git a/scripts/stac/imagery/create_stac.py b/scripts/stac/imagery/create_stac.py index 2cf2cc450..7a95b5318 100644 --- a/scripts/stac/imagery/create_stac.py +++ b/scripts/stac/imagery/create_stac.py @@ -5,10 +5,10 @@ from linz_logger import get_log from shapely.geometry.base import BaseGeometry -from scripts.datetimes import format_rfc_3339_datetime_string, utc_now +from scripts.datetimes import utc_now from scripts.files import fs from scripts.files.files_helper import get_file_name_from_path -from scripts.files.fs import modified, read +from scripts.files.fs import NoSuchFileError, read from scripts.files.geotiff import get_extents from scripts.gdal.gdal_helper import gdal_info from scripts.gdal.gdalinfo import GdalInfo @@ -88,8 +88,10 @@ def create_item( end_datetime: str, collection_id: str, gdal_version: str, + current_datetime: str, gdalinfo_result: GdalInfo | None = None, derived_from: list[str] | None = None, + odr_url: str | None = None, ) -> ImageryItem: """Create an ImageryItem (STAC) to be linked to a Collection. @@ -99,13 +101,16 @@ def create_item( end_datetime: end date of the survey collection_id: collection id to link to the Item gdal_version: GDAL version + current_datetime: date and time for setting consistent update and/or creation timestamp gdalinfo_result: result of the gdalinfo command. Defaults to None. derived_from: list of STAC Items from where this Item is derived. Defaults to None. + odr_url: S3 URL of the already published files in ODR (if this is a resupply). Defaults to None. Returns: a STAC Item wrapped in ImageryItem """ - item = create_base_item(asset_path, gdal_version) + item = create_or_load_base_item(asset_path, gdal_version, current_datetime, odr_url) + base_stac = item.stac.copy() if not gdalinfo_result: gdalinfo_result = gdal_info(asset_path) @@ -131,15 +136,24 @@ def create_item( item.update_spatial(*get_extents(gdalinfo_result)) item.add_collection(collection_id) + if item.stac != base_stac and item.stac["properties"]["updated"] != current_datetime: + item.stac["properties"][ + "updated" + ] = current_datetime # some of the metadata has changed, so we need to make sure the `updated` time is set correctly + get_log().info("ImageryItem created", path=asset_path) return item -def create_base_item(asset_path: str, gdal_version: str) -> ImageryItem: +def create_or_load_base_item( + asset_path: str, gdal_version: str, current_datetime: str, odr_url: str | None = None +) -> ImageryItem: """ Args: - asset_path: path of the visual asset (TIFF) + asset_path: path with filename of the visual asset (TIFF) gdal_version: GDAL version string + current_datetime: date and time used for setting consistent update and/or creation timestamp + odr_url: S3 URL of the already published files in ODR (if this is a resupply). Defaults to None. Returns: An ImageryItem with basic information. @@ -147,8 +161,6 @@ def create_base_item(asset_path: str, gdal_version: str) -> ImageryItem: id_ = get_file_name_from_path(asset_path) file_content = fs.read(asset_path) file_content_checksum = checksum.multihash_as_hex(file_content) - file_modified_datetime = format_rfc_3339_datetime_string(modified(asset_path)) - now_string = format_rfc_3339_datetime_string(utc_now()) if (topo_imagery_hash := os.environ.get("GIT_HASH")) is not None: commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}" @@ -157,19 +169,28 @@ def create_base_item(asset_path: str, gdal_version: str) -> ImageryItem: stac_processing = STACProcessing( **{ - "processing:datetime": now_string, + "processing:datetime": current_datetime, "processing:software": STACProcessingSoftware(**{"gdal": gdal_version, "linz/topo-imagery": commit_url}), "processing:version": os.environ.get("GIT_VERSION", "GIT_VERSION not specified"), } ) + if odr_url: + try: + imagery_item = ImageryItem.from_file(os.path.join(odr_url, f"{id_}.json")) + imagery_item.update_checksum_related_metadata(file_content_checksum, stac_processing_data=stac_processing) + return imagery_item + + except NoSuchFileError: + get_log().info(f"No Item is published for ID: {id_}") + stac_asset = STACAsset( **{ "href": os.path.join(".", os.path.basename(asset_path)), "file:checksum": file_content_checksum, - "created": file_modified_datetime, - "updated": file_modified_datetime, + "created": current_datetime, + "updated": current_datetime, } ) - return ImageryItem(id_, now_string, stac_asset, stac_processing) + return ImageryItem(id_, stac_asset, stac_processing) diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index d3b1cfc6a..8a9f46dc8 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -1,5 +1,7 @@ +import json from typing import Any, TypedDict +from scripts.files.fs import read from scripts.stac.link import Link, Relation from scripts.stac.util.STAC_VERSION import STAC_VERSION from scripts.stac.util.media_type import StacMediaType @@ -26,7 +28,7 @@ class ImageryItem: stac: dict[str, Any] - def __init__(self, id_: str, now_string: str, stac_asset: STACAsset, stac_processing: STACProcessing) -> None: + def __init__(self, id_: str, stac_asset: STACAsset, stac_processing: STACProcessing) -> None: self.stac = { "type": "Feature", "stac_version": STAC_VERSION, @@ -34,9 +36,51 @@ def __init__(self, id_: str, now_string: str, stac_asset: STACAsset, stac_proces "links": [Link(path=f"./{id_}.json", rel=Relation.SELF, media_type=StacMediaType.GEOJSON).stac], "assets": {"visual": {**stac_asset, "type": "image/tiff; application=geotiff; profile=cloud-optimized"}}, "stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value], - "properties": {"created": now_string, "updated": now_string, **stac_processing}, + "properties": {"created": stac_asset["created"], "updated": stac_asset["updated"], **stac_processing}, } + @classmethod + def from_file(cls, file_name: str) -> "ImageryItem": + """Create an ImageryItem from a file. + + Args: + file_name: The s3 URL or local path of the file to load. + + Returns: + ImageryItem: The new ImageryItem. + """ + file_content = read(file_name) + stac_dict_from_s3 = json.loads(file_content.decode("UTF-8")) + if (bbox := stac_dict_from_s3.get("bbox")) is not None: + stac_dict_from_s3["bbox"] = tuple(bbox) + new_item = cls( + id_=stac_dict_from_s3["id"], + stac_asset=stac_dict_from_s3["assets"]["visual"], + stac_processing=stac_dict_from_s3["properties"], + ) + new_item.stac = stac_dict_from_s3 + + return new_item + + def update_checksum_related_metadata(self, file_content_checksum: str, stac_processing_data: STACProcessing) -> None: + """Set the assets.visual.file:checksum attribute if it has changed. + If the checksum has changed, this also updates the following attributes: + assets.visual.updated + properties.updated + properties.processing:datetime + properties.processing:software + properties.processing:version + + Args: + file_content_checksum (str): the new checksum + stac_processing_data (STACProcessing): new data for the STAC processing extension attributes for this asset/item + """ + if file_content_checksum != self.stac["assets"]["visual"]["file:checksum"]: + self.stac["assets"]["visual"]["file:checksum"] = file_content_checksum + self.stac["assets"]["visual"]["updated"] = stac_processing_data["processing:datetime"] + self.stac["properties"].update(stac_processing_data) + self.stac["properties"]["updated"] = stac_processing_data["processing:datetime"] + def update_datetime(self, start_datetime: str, end_datetime: str) -> None: """Update the Item `start_datetime` and `end_datetime` property. diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index 03d8e19c4..fbf905b0c 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -117,7 +117,6 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, fake_linz_slug: } item = ImageryItem( "BR34_5000_0304", - now_string, STACAsset( **{ "href": "any href", @@ -163,7 +162,7 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, fake_linz_slug: assert collection.stac[property_name] == now_string with subtests.test(msg=f"item properties.{property_name}"): - assert item.stac["properties"][property_name] == now_string + assert item.stac["properties"][property_name] == asset_datetimes[property_name] with subtests.test(msg=f"item assets.visual.{property_name}"): assert item.stac["assets"]["visual"][property_name] == asset_datetimes[property_name] diff --git a/scripts/stac/imagery/tests/conftest.py b/scripts/stac/imagery/tests/conftest.py index 0860b2d55..aefa7573d 100644 --- a/scripts/stac/imagery/tests/conftest.py +++ b/scripts/stac/imagery/tests/conftest.py @@ -1,6 +1,6 @@ from datetime import datetime from decimal import Decimal -from typing import Iterator +from typing import Any, Iterator import pytest @@ -21,3 +21,41 @@ def fake_collection_metadata() -> Iterator[CollectionMetadata]: "geographic_description": None, } yield collection_metadata + + +@pytest.fixture +def fake_imagery_item_stac() -> dict[str, Any]: + return { + "type": "Feature", + "stac_version": "1.0.0", + "id": "empty", + "links": [{"href": "./empty.json", "rel": "self", "type": "application/geo+json"}], + "assets": { + "visual": { + "href": "any href", + "file:checksum": "my_checksum", + "created": "any created datetime", + "updated": "any processing datetime", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + } + }, + "stac_extensions": [ + "https://stac-extensions.github.io/file/v2.0.0/schema.json", + "https://stac-extensions.github.io/processing/v1.2.0/schema.json", + ], + "properties": { + "created": "any created datetime", + "updated": "any processing datetime", + "processing:datetime": "any processing datetime", + "processing:software": {"gdal": "any GDAL version", "linz/topo-imagery": "any topo imagery version"}, + "processing:version": "any processing version", + "start_datetime": "2021-01-27T00:00:00Z", + "end_datetime": "2021-01-29T00:00:00Z", + "datetime": None, + }, + "geometry": { + "type": "Polygon", + "coordinates": [[[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]]], + }, + "bbox": (1799667.5, 5815977.0, 1800422.5, 5814986.0), + } diff --git a/scripts/stac/imagery/tests/create_stac_test.py b/scripts/stac/imagery/tests/create_stac_test.py index 2ffd0b708..63e4316ab 100644 --- a/scripts/stac/imagery/tests/create_stac_test.py +++ b/scripts/stac/imagery/tests/create_stac_test.py @@ -2,9 +2,12 @@ from pathlib import Path from typing import cast +from pytest_subtests import SubTests + from scripts.gdal.gdalinfo import GdalInfo from scripts.stac.imagery.create_stac import create_collection, create_item from scripts.stac.imagery.metadata_constants import CollectionMetadata +from scripts.stac.imagery.tests.generators import any_multihash_as_hex def test_create_item_with_derived_from(tmp_path: Path) -> None: @@ -25,6 +28,7 @@ def test_create_item_with_derived_from(tmp_path: Path) -> None: "", "abc123", "any GDAL version", + "any current datetime", fake_gdal_info, [derived_from_path.as_posix()], ) @@ -62,6 +66,7 @@ def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None: "", "abc123", "any GDAL version", + "any current datetime", fake_gdal_info, [derived_from_path_a.as_posix(), derived_from_path_b.as_posix()], ) @@ -85,3 +90,117 @@ def test_create_collection(fake_collection_metadata: CollectionMetadata, fake_li ) assert collection.stac["id"] == collection_id + + +def test_create_item_with_published_path(tmp_path: Path) -> None: + item_name = "empty" + existing_item_file = tmp_path / f"{item_name}.json" + tiff_path = f"./scripts/tests/data/{item_name}.tiff" + + fake_gdal_info: GdalInfo = cast( + GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}} + ) + + item_from_scratch = create_item( + tiff_path, + "a start datetime", + "an end datetime", + item_name, + "any GDAL version", + "this current datetime", + fake_gdal_info, + ) + existing_item_file.write_text(json.dumps(item_from_scratch.stac)) + item_from_odr_unchanged = create_item( + tiff_path, + "a start datetime", + "an end datetime", + item_name, + "any GDAL version", + "this current datetime", + fake_gdal_info, + odr_url=tmp_path.as_posix(), + ) + assert item_from_odr_unchanged.stac == item_from_scratch.stac + + item_from_odr_changed = create_item( + tiff_path, + "another start datetime", + "another end datetime", + item_name, + "another GDAL version", + "another current datetime", + fake_gdal_info, + odr_url=tmp_path.as_posix(), + ) + del item_from_odr_changed.stac["properties"]["start_datetime"] + del item_from_odr_changed.stac["properties"]["end_datetime"] + del item_from_scratch.stac["properties"]["start_datetime"] + del item_from_scratch.stac["properties"]["end_datetime"] + assert item_from_odr_changed.stac == item_from_scratch.stac + + +def test_create_item_when_resupplying_with_new_file(subtests: SubTests, tmp_path: Path) -> None: + fake_gdal_info: GdalInfo = cast( + GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}} + ) + + current_datetime = "current datetime" + item = create_item( + "./scripts/tests/data/empty.tiff", + "", + "", + "abc123", + "any GDAL version", + current_datetime, + fake_gdal_info, + odr_url=tmp_path.as_posix(), + ) + + with subtests.test("created"): + assert item.stac["properties"]["created"] == current_datetime + + with subtests.test("updated"): + assert item.stac["properties"]["updated"] == current_datetime + + +def test_create_item_when_resupplying_with_changed_asset_file(subtests: SubTests, tmp_path: Path) -> None: + item_name = "empty" + original_item = tmp_path / f"{item_name}.json" + asset_file = f"./scripts/tests/data/{item_name}.tiff" + created_datetime = "created datetime" + updated_datetime = "updated datetime" + original_item_content = { + "type": "Feature", + "id": item_name, + "assets": { + "visual": { + "href": asset_file, + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "file:checksum": any_multihash_as_hex(), + "created": created_datetime, + "updated": updated_datetime, + } + }, + "properties": {"created": created_datetime, "updated": updated_datetime}, + } + + original_item.write_text(json.dumps(original_item_content)) + + current_datetime = "current datetime" + item = create_item( + "./scripts/tests/data/empty.tiff", + "", + "", + "abc123", + "any GDAL version", + current_datetime, + cast(GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}), + odr_url=tmp_path.as_posix(), + ) + + with subtests.test(msg="assets.visual.created"): + assert item.stac["assets"]["visual"]["created"] == created_datetime + + with subtests.test(msg="assets.visual.updated"): + assert item.stac["assets"]["visual"]["updated"] == current_datetime diff --git a/scripts/stac/imagery/tests/generators.py b/scripts/stac/imagery/tests/generators.py index 93cd6da5c..7305c6c34 100644 --- a/scripts/stac/imagery/tests/generators.py +++ b/scripts/stac/imagery/tests/generators.py @@ -1,7 +1,9 @@ from datetime import datetime +from os import urandom from typing import Callable from scripts.stac.imagery.item import STACAsset, STACProcessing, STACProcessingSoftware +from scripts.stac.util.checksum import multihash_as_hex def fixed_now_function(now: datetime) -> Callable[[], datetime]: @@ -32,3 +34,7 @@ def any_stac_processing() -> STACProcessing: "processing:version": "any processing version", } ) + + +def any_multihash_as_hex() -> str: + return multihash_as_hex(urandom(64)) diff --git a/scripts/stac/imagery/tests/item_test.py b/scripts/stac/imagery/tests/item_test.py index 717b13140..5ac309b4a 100644 --- a/scripts/stac/imagery/tests/item_test.py +++ b/scripts/stac/imagery/tests/item_test.py @@ -1,6 +1,9 @@ +import json from datetime import datetime from decimal import Decimal from os import environ +from pathlib import Path +from typing import Any from unittest.mock import patch from pytest_subtests import SubTests @@ -10,7 +13,9 @@ from scripts.stac.imagery.item import ImageryItem from scripts.stac.imagery.metadata_constants import CollectionMetadata from scripts.stac.imagery.tests.generators import any_stac_asset, any_stac_processing -from scripts.tests.datetimes_test import any_epoch_datetime, any_epoch_datetime_string +from scripts.stac.link import Relation +from scripts.stac.util.media_type import StacMediaType +from scripts.tests.datetimes_test import any_epoch_datetime def test_imagery_stac_item(subtests: SubTests) -> None: @@ -29,10 +34,9 @@ def test_imagery_stac_item(subtests: SubTests) -> None: git_hash = "any Git hash" git_version = "any Git version string" asset = any_stac_asset() - now_string = any_epoch_datetime_string() stac_processing = any_stac_processing() with patch.dict(environ, {"GIT_HASH": git_hash, "GIT_VERSION": git_version}): - item = ImageryItem(id_, now_string, asset, stac_processing) + item = ImageryItem(id_, asset, stac_processing) item.update_spatial(geometry, bbox) item.update_datetime(start_datetime, end_datetime) @@ -48,16 +52,16 @@ def test_imagery_stac_item(subtests: SubTests) -> None: "links": [ { "href": "./empty.json", - "rel": "self", - "type": "application/geo+json", + "rel": Relation.SELF, + "type": StacMediaType.GEOJSON, }, ], "properties": { - "created": now_string, + "created": asset["created"], "datetime": None, "end_datetime": end_datetime, "start_datetime": start_datetime, - "updated": now_string, + "updated": asset["updated"], **stac_processing, }, "stac_extensions": [ @@ -69,6 +73,42 @@ def test_imagery_stac_item(subtests: SubTests) -> None: } +def test_create_item_from_file(tmp_path: Path, fake_imagery_item_stac: dict[str, Any]) -> None: + temp_file = tmp_path / "existing_item.json" + temp_file.write_text(json.dumps(fake_imagery_item_stac)) + imagery_item = ImageryItem.from_file(str(temp_file)) + + assert imagery_item.stac == fake_imagery_item_stac + + +def test_update_item_checksum(subtests: SubTests, tmp_path: Path, fake_imagery_item_stac: dict[str, Any]) -> None: + temp_file = tmp_path / "existing_item.json" + temp_file.write_text(json.dumps(fake_imagery_item_stac)) + + existing_checksum = fake_imagery_item_stac["assets"]["visual"]["file:checksum"] + + new_stac_processing = any_stac_processing() + new_updated_date = new_stac_processing["processing:datetime"] + new_stac_properties = fake_imagery_item_stac["properties"].copy() + new_stac_properties.update(new_stac_processing) + new_checksum = "my_new_checksum" + + imagery_item = ImageryItem.from_file(str(temp_file)) + + imagery_item.update_checksum_related_metadata(existing_checksum, new_stac_processing) + with subtests.test(msg="item.stac should not change when checksum did not change"): + assert imagery_item.stac == fake_imagery_item_stac + + with subtests.test(msg="item.stac checksum changes to newly supplied checksum"): + imagery_item.update_checksum_related_metadata(new_checksum, new_stac_processing) + assert imagery_item.stac["assets"]["visual"]["file:checksum"] == new_checksum + + assert imagery_item.stac["assets"]["visual"]["updated"] == new_updated_date + assert imagery_item.stac["properties"]["updated"] == new_updated_date + + assert imagery_item.stac["properties"] == new_stac_properties + + # pylint: disable=duplicate-code def test_imagery_add_collection(fake_linz_slug: str, subtests: SubTests) -> None: metadata: CollectionMetadata = { @@ -87,7 +127,7 @@ def test_imagery_add_collection(fake_linz_slug: str, subtests: SubTests) -> None path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) - item = ImageryItem(id_, any_epoch_datetime_string(), any_stac_asset(), any_stac_processing()) + item = ImageryItem(id_, any_stac_asset(), any_stac_processing()) item.add_collection(collection.stac["id"]) diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index 1c4fe78df..3000e073a 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -29,6 +29,15 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--from-file", dest="from_file", required=True, help="The path to a json file containing the input tiffs" ) + parser.add_argument( + "--odr-url", + dest="odr_url", + help=( + "The s3 URL of the published dataset in ODR if this is a re-supply. " + "Example: 's3://nz-imagery/wellington/porirua_2024_0.1m/rgb/2193/'" + ), + required=False, + ) parser.add_argument("--source-epsg", dest="source_epsg", required=True, help="The EPSG code of the source imagery") parser.add_argument( "--target-epsg", @@ -59,6 +68,15 @@ def parse_args() -> argparse.Namespace: type=valid_date, ) parser.add_argument("--target", dest="target", help="Target output", required=True) + parser.add_argument( + "--current-datetime", + dest="current_datetime", + help=( + "The datetime to be used as current datetime in the metadata. " + "Format: RFC 3339 UTC datetime, `YYYY-MM-DDThh:mm:ssZ`." + ), + required=True, + ) return parser.parse_args() @@ -147,8 +165,10 @@ def main() -> None: end_datetime, arguments.collection_id, gdal_version, + arguments.current_datetime, file.get_gdalinfo(), file.get_derived_from_paths(), + arguments.odr_url, ) write(stac_item_path, dict_to_json_bytes(item.stac), content_type=ContentType.GEOJSON.value) get_log().info("stac_saved", path=stac_item_path) diff --git a/scripts/tests/collection_from_items_test.py b/scripts/tests/collection_from_items_test.py index 40ded74a2..736748f54 100644 --- a/scripts/tests/collection_from_items_test.py +++ b/scripts/tests/collection_from_items_test.py @@ -19,7 +19,6 @@ from scripts.stac.imagery.item import ImageryItem from scripts.stac.imagery.metadata_constants import CollectionMetadata from scripts.stac.imagery.tests.generators import any_stac_asset, any_stac_processing -from scripts.tests.datetimes_test import any_epoch_datetime_string if TYPE_CHECKING: from mypy_boto3_s3 import S3Client @@ -31,7 +30,7 @@ def setup() -> Iterator[ImageryItem]: # Create mocked STAC Item with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}): - item = ImageryItem("123", any_epoch_datetime_string(), any_stac_asset(), any_stac_processing()) + item = ImageryItem("123", any_stac_asset(), any_stac_processing()) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],