Skip to content

Commit

Permalink
refactor: Simplify ImageryItem constructor TDE-1298 (#1154)
Browse files Browse the repository at this point in the history
### Modifications

Pull out STAC asset and processing data types.

### Verification

Modified unit tests accordingly.

---------

Co-authored-by: paulfouquet <[email protected]>
  • Loading branch information
l0b0 and paulfouquet authored Nov 3, 2024
1 parent e2d9e3d commit 80620df
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 138 deletions.
61 changes: 51 additions & 10 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
import json
import os
from typing import Any

from linz_logger import get_log
from shapely.geometry.base import BaseGeometry

from scripts.datetimes import utc_now
from scripts.datetimes import format_rfc_3339_datetime_string, utc_now
from scripts.files import fs
from scripts.files.files_helper import get_file_name_from_path
from scripts.files.fs import read
from scripts.files.fs import modified, read
from scripts.files.geotiff import get_extents
from scripts.gdal.gdal_helper import gdal_info
from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset, STACProcessing, STACProcessingSoftware
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.media_type import StacMediaType


Expand Down Expand Up @@ -77,7 +80,7 @@ def create_collection(


def create_item(
file: str,
asset_path: str,
start_datetime: str,
end_datetime: str,
collection_id: str,
Expand All @@ -88,7 +91,7 @@ def create_item(
"""Create an ImageryItem (STAC) to be linked to a Collection.
Args:
file: asset tiff file
asset_path: path of the visual asset (TIFF)
start_datetime: start date of the survey
end_datetime: end date of the survey
collection_id: collection id to link to the Item
Expand All @@ -99,15 +102,13 @@ def create_item(
Returns:
a STAC Item wrapped in ImageryItem
"""
id_ = get_file_name_from_path(file)
item = create_base_item(asset_path, gdal_version)

if not gdalinfo_result:
gdalinfo_result = gdal_info(file)
gdalinfo_result = gdal_info(asset_path)

geometry, bbox = get_extents(gdalinfo_result)

item = ImageryItem(id_, file, gdal_version, utc_now)

if derived_from is not None:
for derived in derived_from:
derived_item_content = read(derived)
Expand All @@ -129,5 +130,45 @@ def create_item(
item.update_spatial(geometry, bbox)
item.add_collection(collection_id)

get_log().info("ImageryItem created", path=file)
get_log().info("ImageryItem created", path=asset_path)
return item


def create_base_item(asset_path: str, gdal_version: str) -> ImageryItem:
"""
Args:
asset_path: path of the visual asset (TIFF)
gdal_version: GDAL version string
Returns:
An ImageryItem with basic information.
"""
id_ = get_file_name_from_path(asset_path)
file_content = fs.read(asset_path)
file_modified_datetime = format_rfc_3339_datetime_string(modified(asset_path))

stac_asset = STACAsset(
**{
"href": os.path.join(".", os.path.basename(asset_path)),
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
)

now_string = format_rfc_3339_datetime_string(utc_now())

if (topo_imagery_hash := os.environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
commit_url = "GIT_HASH not specified"

stac_processing = STACProcessing(
**{
"processing:datetime": now_string,
"processing:software": STACProcessingSoftware(**{"gdal": gdal_version, "linz/topo-imagery": commit_url}),
"processing:version": os.environ.get("GIT_VERSION", "GIT_VERSION not specified"),
}
)

return ImageryItem(id_, now_string, stac_asset, stac_processing)
55 changes: 21 additions & 34 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,40 @@
import os
from collections.abc import Callable
from datetime import datetime
from os import environ
from typing import Any
from typing import Any, TypedDict

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.files import fs
from scripts.files.fs import modified
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.media_type import StacMediaType
from scripts.stac.util.stac_extensions import StacExtensions

STACAsset = TypedDict("STACAsset", {"href": str, "file:checksum": str, "created": str, "updated": str})

STACProcessingSoftware = TypedDict("STACProcessingSoftware", {"gdal": str, "linz/topo-imagery": str})
"""STAC Processing extension LINZ specific fields"""

STACProcessing = TypedDict(
"STACProcessing",
{
"processing:datetime": str,
"processing:software": STACProcessingSoftware,
"processing:version": str,
},
)
"""Some of the STAC processing extension fields are not declared in this TypedDict
(https://github.com/stac-extensions/processing?tab=readme-ov-file#fields)
"""


class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, gdal_version: str, now: Callable[[], datetime]) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))
now_string = format_rfc_3339_datetime_string(now())
if (topo_imagery_hash := environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
commit_url = "GIT_HASH not specified"

def __init__(self, id_: str, now_string: str, stac_asset: STACAsset, stac_processing: STACProcessing) -> None:
self.stac = {
"type": "Feature",
"stac_version": STAC_VERSION,
"id": id_,
"links": [Link(path=f"./{id_}.json", rel=Relation.SELF, media_type=StacMediaType.GEOJSON).stac],
"assets": {
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"assets": {"visual": {**stac_asset, "type": "image/tiff; application=geotiff; profile=cloud-optimized"}},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
"properties": {
"created": now_string,
"updated": now_string,
"processing:datetime": now_string,
"processing:software": {"gdal": gdal_version, "linz/topo-imagery": commit_url},
"processing:version": environ.get("GIT_VERSION", "GIT_VERSION not specified"),
},
"properties": {"created": now_string, "updated": now_string, **stac_processing},
}

def update_datetime(self, start_datetime: str, end_datetime: str) -> None:
Expand Down
48 changes: 27 additions & 21 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json
import os
import tempfile
from collections.abc import Callable
from datetime import datetime, timezone
from shutil import rmtree
from tempfile import mkdtemp

Expand All @@ -17,11 +15,12 @@
from scripts.files.fs import read
from scripts.files.fs_s3 import write
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.imagery.tests.generators import any_stac_processing, fixed_now_function
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime
from scripts.tests.datetimes_test import any_epoch_datetime, any_epoch_datetime_string


def test_title_description_id_created_on_init(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
Expand Down Expand Up @@ -103,21 +102,25 @@ def test_interval_updated_from_existing(fake_collection_metadata: CollectionMeta
assert collection.stac["extent"]["temporal"]["interval"] == [["2021-01-27T00:00:00Z", "2021-02-20T00:00:00Z"]]


def fixed_now_function(now: datetime) -> Callable[[], datetime]:
def func() -> datetime:
return now

return func


def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
now = any_epoch_datetime()
now_function = fixed_now_function(now)
collection = ImageryCollection(fake_collection_metadata, now_function)
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", now_function)
now_string = format_rfc_3339_datetime_string(now)
collection = ImageryCollection(fake_collection_metadata, fixed_now_function(now))
asset_created_datetime = any_epoch_datetime_string()
asset_updated_datetime = any_epoch_datetime_string()
item = ImageryItem(
"BR34_5000_0304",
now_string,
STACAsset(
**{
"href": "any href",
"file:checksum": "any checksum",
"created": asset_created_datetime,
"updated": asset_updated_datetime,
}
),
any_stac_processing(),
)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down Expand Up @@ -150,13 +153,16 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTes

for property_name in ["created", "updated"]:
with subtests.test(msg=f"collection {property_name}"):
assert collection.stac[property_name] == format_rfc_3339_datetime_string(now)
assert collection.stac[property_name] == now_string

with subtests.test(msg=f"item properties.{property_name}"):
assert item.stac["properties"][property_name] == format_rfc_3339_datetime_string(now)
assert item.stac["properties"][property_name] == now_string

with subtests.test(msg="item assets.visual.created"):
assert item.stac["assets"]["visual"]["created"] == asset_created_datetime

with subtests.test(msg=f"item assets.visual.{property_name}"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"
with subtests.test(msg="item assets.visual.updated"):
assert item.stac["assets"]["visual"]["updated"] == asset_updated_datetime


def test_write_collection(fake_collection_metadata: CollectionMetadata) -> None:
Expand Down
34 changes: 34 additions & 0 deletions scripts/stac/imagery/tests/generators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from datetime import datetime
from typing import Callable

from scripts.stac.imagery.item import STACAsset, STACProcessing, STACProcessingSoftware


def fixed_now_function(now: datetime) -> Callable[[], datetime]:
def func() -> datetime:
return now

return func


def any_stac_asset() -> STACAsset:
return STACAsset(
**{
"href": "any href",
"file:checksum": "any checksum",
"created": "any created datetime",
"updated": "any updated datetime",
}
)


def any_stac_processing() -> STACProcessing:
return STACProcessing(
**{
"processing:datetime": "any processing datetime",
"processing:software": STACProcessingSoftware(
**{"gdal": "any GDAL version", "linz/topo-imagery": "any topo imagery version"}
),
"processing:version": "any processing version",
}
)
Loading

0 comments on commit 80620df

Please sign in to comment.