From de61f275b072125a021ea913d538d12a2b28c7d3 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Thu, 19 Dec 2024 15:26:53 +1300 Subject: [PATCH] feat: update existing Collection (WIP) --- scripts/stac/imagery/collection.py | 53 ++++++++++-- scripts/stac/imagery/create_stac.py | 25 +++--- .../stac/imagery/tests/create_stac_test.py | 80 +++++++++++++++++++ 3 files changed, 138 insertions(+), 20 deletions(-) diff --git a/scripts/stac/imagery/collection.py b/scripts/stac/imagery/collection.py index 385be888..f3bf404d 100644 --- a/scripts/stac/imagery/collection.py +++ b/scripts/stac/imagery/collection.py @@ -1,3 +1,4 @@ +import json import os from typing import Any @@ -92,6 +93,30 @@ def __init__( self.add_providers(merge_provider_roles(providers)) + @classmethod + def from_file(cls, file_name: str, metadata: CollectionMetadata, updated_datetime: str) -> "ImageryCollection": + """Load an ImageryCollection from a Collection file. + + Args: + file_name: The s3 URL or local path of the Collection file to load. + + Returns: + The loaded ImageryCollection. + """ + file_content = read(file_name) + stac_from_file = json.loads(file_content.decode("UTF-8")) + stac_from_file["updated"] = updated_datetime + collection = cls( + metadata=metadata, + created_datetime=stac_from_file["created"], + updated_datetime=stac_from_file["updated"], + linz_slug=stac_from_file["linz:slug"], + ) + # Override STAC from the original collection + collection.stac = stac_from_file + + return collection + def add_capture_area(self, polygons: list[BaseGeometry], target: str, artifact_target: str = "/tmp") -> None: """Add the capture area of the Collection. The `href` or path of the capture-area.geojson is always set as the relative `./capture-area.geojson` @@ -165,14 +190,26 @@ def add_item(self, item: dict[Any, Any]) -> None: """ item_self_link = next((feat for feat in item["links"] if feat["rel"] == "self"), None) if item_self_link: - self.stac["links"].append( - Link( - path=item_self_link["href"], - rel=Relation.ITEM, - media_type=StacMediaType.GEOJSON, - file_content=dict_to_json_bytes(item), - ).stac - ) + link_to_add = Link( + path=item_self_link["href"], + rel=Relation.ITEM, + media_type=StacMediaType.GEOJSON, + file_content=dict_to_json_bytes(item), + ).stac + + # Check if the Item to add already exists in the collection + exist = False + for link in self.stac["links"]: + if link["href"] == link_to_add["href"]: + if link["file:checksum"] == link_to_add["file:checksum"]: + exist = True + break + # If the item has been updated, remove the old link + self.stac["links"].remove(link) + break + + if not exist: + self.stac["links"].append(link_to_add) self.update_temporal_extent(item["properties"]["start_datetime"], item["properties"]["end_datetime"]) self.update_spatial_extent(item["bbox"]) diff --git a/scripts/stac/imagery/create_stac.py b/scripts/stac/imagery/create_stac.py index ae44152e..c16c173e 100644 --- a/scripts/stac/imagery/create_stac.py +++ b/scripts/stac/imagery/create_stac.py @@ -58,19 +58,20 @@ def create_collection( Returns: an ImageryCollection object """ - existing_collection = {} if odr_url: - existing_collection = get_published_file_contents(odr_url, "collection") - - collection = ImageryCollection( - metadata=collection_metadata, - created_datetime=cast(str, existing_collection.get("created", current_datetime)), - updated_datetime=current_datetime, - linz_slug=linz_slug, - collection_id=collection_id, - providers=get_providers(licensors, producers), - add_title_suffix=add_title_suffix, - ) + collection = ImageryCollection.from_file( + os.path.join(odr_url, "collection.json"), collection_metadata, current_datetime + ) + else: + collection = ImageryCollection( + metadata=collection_metadata, + created_datetime=current_datetime, + updated_datetime=current_datetime, + linz_slug=linz_slug, + collection_id=collection_id, + providers=get_providers(licensors, producers), + add_title_suffix=add_title_suffix, + ) for item in stac_items: collection.add_item(item) diff --git a/scripts/stac/imagery/tests/create_stac_test.py b/scripts/stac/imagery/tests/create_stac_test.py index a3048a97..33733042 100644 --- a/scripts/stac/imagery/tests/create_stac_test.py +++ b/scripts/stac/imagery/tests/create_stac_test.py @@ -258,6 +258,7 @@ def test_create_collection_resupply( "type": "Collection", "stac_version": STAC_VERSION, "id": collection_id, + "linz:slug": fake_linz_slug, "created": created_datetime_string, "updated": created_datetime_string, } @@ -287,6 +288,85 @@ def test_create_collection_resupply( assert collection.stac["updated"] == updated_datetime_string +def test_create_collection_resupply_add_items( + fake_collection_metadata: CollectionMetadata, fake_linz_slug: str, subtests: SubTests, tmp_path: Path +) -> None: + collection_id = "test_collection" + created_datetime = any_epoch_datetime() + created_datetime_string = format_rfc_3339_datetime_string(created_datetime) + existing_item_link = { + "rel": "item", + "href": "./item_a.json", + "type": "application/geo+json", + "file:checksum": "1220559708896b75aebab2bbadbc184f6b9ce22708adbb725e93bf3f08a38d2bc71e", + } + + existing_collection_content = { + "type": "Collection", + "stac_version": STAC_VERSION, + "id": collection_id, + "linz:slug": fake_linz_slug, + "links": [ + { + "rel": "root", + "href": "https://nz-imagery.s3.ap-southeast-2.amazonaws.com/catalog.json", + "type": "application/json", + }, + {"rel": "self", "href": "./collection.json", "type": "application/json"}, + existing_item_link, + ], + "created": created_datetime_string, + "updated": created_datetime_string, + } + existing_collection_path = tmp_path / "collection.json" + existing_collection_path.write_text(json.dumps(existing_collection_content)) + + item_to_add = { + "type": "Feature", + "id": "item_b", + "links": [ + {"href": "./item_b.json", "rel": "self", "type": "application/geo+json"}, + {"href": "./collection.json", "rel": "collection", "type": "application/json"}, + {"href": "./collection.json", "rel": "parent", "type": "application/json"}, + ], + "properties": {"start_datetime": "2024-09-02T12:00:00Z", "end_datetime": "2024-09-02T12:00:00Z"}, + "bbox": [171.8256487, -34.3559317, 172.090076, -34.0291036], + } + + item_to_add_link = { + "rel": "item", + "href": "./item_b.json", + "type": "application/geo+json", + "file:checksum": "12203040c94dda3807c4430b312e9b400604188a639f22cc8067136084662fc2618d", + } + + updated_datetime_string = format_rfc_3339_datetime_string(created_datetime + timedelta(days=1)) + + collection = create_collection( + collection_id=collection_id, + linz_slug=fake_linz_slug, + collection_metadata=fake_collection_metadata, + current_datetime=updated_datetime_string, + producers=[], + licensors=[], + stac_items=[item_to_add], + item_polygons=[], + add_capture_dates=False, + uri="test", + odr_url=tmp_path.as_posix(), + ) + + with subtests.test("created datetime"): + assert collection.stac["created"] == existing_collection_content["created"] + + with subtests.test("updated datetime"): + assert collection.stac["updated"] == updated_datetime_string + + with subtests.test("links"): + assert item_to_add_link in collection.stac["links"] + assert existing_item_link in collection.stac["links"] + + def test_create_item_with_odr_url(tmp_path: Path) -> None: item_name = "empty" existing_item_file = tmp_path / f"{item_name}.json"