From 4bf9fbaa321d47cd3d1170563e141633461b076b Mon Sep 17 00:00:00 2001 From: Tobias Schmidt <13055656+schmidtnz@users.noreply.github.com> Date: Fri, 22 Nov 2024 10:06:03 +1300 Subject: [PATCH 1/3] refactor: assert asset create and update times in loop TDE-1298 (#1181) ### Motivation Refactoring to support TDE-1298 - handle subtests for all created and updated dates in a similar manner ### Modifications Changed `test_add_item` to include `assets.visual` `updated` and `created` attributes in `for` loop. ### Verification `pytest` --- scripts/stac/imagery/tests/collection_test.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index cadc9ce48..f77388552 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -110,8 +110,10 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, fake_linz_slug: now = any_epoch_datetime() now_string = format_rfc_3339_datetime_string(now) collection = ImageryCollection(fake_collection_metadata, fixed_now_function(now), fake_linz_slug) - asset_created_datetime = any_epoch_datetime_string() - asset_updated_datetime = any_epoch_datetime_string() + asset_datetimes = { + "created": any_epoch_datetime_string(), + "updated": any_epoch_datetime_string(), + } item = ImageryItem( "BR34_5000_0304", now_string, @@ -119,8 +121,8 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, fake_linz_slug: **{ "href": "any href", "file:checksum": "any checksum", - "created": asset_created_datetime, - "updated": asset_updated_datetime, + "created": asset_datetimes["created"], + "updated": asset_datetimes["updated"], } ), any_stac_processing(), @@ -162,11 +164,8 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, fake_linz_slug: with subtests.test(msg=f"item properties.{property_name}"): assert item.stac["properties"][property_name] == now_string - with subtests.test(msg="item assets.visual.created"): - assert item.stac["assets"]["visual"]["created"] == asset_created_datetime - - with subtests.test(msg="item assets.visual.updated"): - assert item.stac["assets"]["visual"]["updated"] == asset_updated_datetime + with subtests.test(msg=f"item assets.visual.{property_name}"): + assert item.stac["assets"]["visual"][property_name] == asset_datetimes[property_name] def test_write_collection(fake_collection_metadata: CollectionMetadata, fake_linz_slug: str) -> None: From 119a2cdcac9bea5cc653d4f7d1087a0ea1a6287d Mon Sep 17 00:00:00 2001 From: Tobias Schmidt <13055656+schmidtnz@users.noreply.github.com> Date: Fri, 22 Nov 2024 10:13:44 +1300 Subject: [PATCH 2/3] refactor: only allow one of specific link types TDE-1298 (#1182) ### Motivation Refactoring to support TDE-1298 - only allow one link of each of the types `Collection`, `Parent`, `Self`. ### Modifications Added logic in `ImageryItem`s `add_link` method to recognise when a link of each type already exists. Added `__str__` method to `StacMediaType` and `Relation` types, and made sure the `Link` class will contain strings for `rel` and `type`. ### Verification `pytest` --- scripts/stac/imagery/item.py | 9 ++++++++- scripts/stac/link.py | 11 +++++++---- scripts/stac/util/media_type.py | 3 +++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index c9ca76794..d3b1cfc6a 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -71,4 +71,11 @@ def add_collection(self, collection_id: str) -> None: self.add_link(Link(path="./collection.json", rel=Relation.PARENT, media_type=StacMediaType.JSON)) def add_link(self, link: Link) -> None: - self.stac["links"].append(link.stac) + if self.stac.get("links") and link.stac["rel"] in [ + Relation.COLLECTION, + Relation.PARENT, + Relation.SELF, + ]: # STAC specification prescribes there can be only one of these + self.stac["links"][:] = [l for l in self.stac["links"] if l.get("rel") != link.stac["rel"]] + + self.stac.setdefault("links", []).append(link.stac) diff --git a/scripts/stac/link.py b/scripts/stac/link.py index 79b63ba2c..e380428c5 100644 --- a/scripts/stac/link.py +++ b/scripts/stac/link.py @@ -15,6 +15,9 @@ class Relation(str, Enum): DERIVED_FROM = "derived_from" """ https://github.com/radiantearth/stac-spec/blob/master/best-practices.md#derived-from-relation-derived_from""" + def __str__(self) -> str: + return self.value + # pylint: disable=too-few-public-methods class Link: @@ -22,7 +25,7 @@ class Link: Attributes: path: A string that represents the actual link in the format of an URL. - rel: A string that represents the relationship that the link has to the object it will be added to. + rel: `Relation` that represents the relationship that the link has to the object it will be added to. media_type: `StacMediaType` of the link file. file_content: Optional. The content of the file that will be used to store the checksum in `file:checksum`. It assumes using the STAC `file` extension. @@ -30,11 +33,11 @@ class Link: stac: dict[str, str] - def __init__(self, path: str, rel: str, media_type: StacMediaType, file_content: bytes | None = None) -> None: + def __init__(self, path: str, rel: Relation, media_type: StacMediaType, file_content: bytes | None = None) -> None: self.stac = { "href": path, - "rel": rel, - "type": media_type, + "rel": str(rel), + "type": str(media_type), } if file_content: diff --git a/scripts/stac/util/media_type.py b/scripts/stac/util/media_type.py index cd56c21f5..81ad975ef 100644 --- a/scripts/stac/util/media_type.py +++ b/scripts/stac/util/media_type.py @@ -11,3 +11,6 @@ class StacMediaType(str, Enum): For STAC Item """ + + def __str__(self) -> str: + return self.value From bc93aaffe04219a331b31cc1e5efc24a4eb2ac73 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt <13055656+schmidtnz@users.noreply.github.com> Date: Fri, 22 Nov 2024 10:15:15 +1300 Subject: [PATCH 3/3] refactor: rearranged create_stac.py to support TDE-1298 (#1183) ### Motivation Rearrange function flow in `create_stac.py` to simplify implementation of TDE-1298. ### Modifications Changed which function outputs would be kept in a local variable and moved creating `stac_asset` closer to where it is used. ### Verification `pytest` --- scripts/stac/imagery/create_stac.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/scripts/stac/imagery/create_stac.py b/scripts/stac/imagery/create_stac.py index 8748ac3c6..2cf2cc450 100644 --- a/scripts/stac/imagery/create_stac.py +++ b/scripts/stac/imagery/create_stac.py @@ -110,8 +110,6 @@ def create_item( if not gdalinfo_result: gdalinfo_result = gdal_info(asset_path) - geometry, bbox = get_extents(gdalinfo_result) - if derived_from is not None: for derived in derived_from: derived_item_content = read(derived) @@ -130,7 +128,7 @@ def create_item( ) item.update_datetime(start_datetime, end_datetime) - item.update_spatial(geometry, bbox) + item.update_spatial(*get_extents(gdalinfo_result)) item.add_collection(collection_id) get_log().info("ImageryItem created", path=asset_path) @@ -148,17 +146,8 @@ def create_base_item(asset_path: str, gdal_version: str) -> ImageryItem: """ id_ = get_file_name_from_path(asset_path) file_content = fs.read(asset_path) + file_content_checksum = checksum.multihash_as_hex(file_content) file_modified_datetime = format_rfc_3339_datetime_string(modified(asset_path)) - - stac_asset = STACAsset( - **{ - "href": os.path.join(".", os.path.basename(asset_path)), - "file:checksum": checksum.multihash_as_hex(file_content), - "created": file_modified_datetime, - "updated": file_modified_datetime, - } - ) - now_string = format_rfc_3339_datetime_string(utc_now()) if (topo_imagery_hash := os.environ.get("GIT_HASH")) is not None: @@ -174,4 +163,13 @@ def create_base_item(asset_path: str, gdal_version: str) -> ImageryItem: } ) + stac_asset = STACAsset( + **{ + "href": os.path.join(".", os.path.basename(asset_path)), + "file:checksum": file_content_checksum, + "created": file_modified_datetime, + "updated": file_modified_datetime, + } + ) + return ImageryItem(id_, now_string, stac_asset, stac_processing)