Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Simplify ImageryItem constructor TDE-1298 #1154

Merged
merged 3 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 51 additions & 10 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
import json
import os
from typing import Any

from linz_logger import get_log
from shapely.geometry.base import BaseGeometry

from scripts.datetimes import utc_now
from scripts.datetimes import format_rfc_3339_datetime_string, utc_now
from scripts.files import fs
from scripts.files.files_helper import get_file_name_from_path
from scripts.files.fs import read
from scripts.files.fs import modified, read
from scripts.files.geotiff import get_extents
from scripts.gdal.gdal_helper import gdal_info
from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset, STACProcessing, STACProcessingSoftware
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.media_type import StacMediaType


Expand Down Expand Up @@ -77,7 +80,7 @@ def create_collection(


def create_item(
file: str,
asset_path: str,
start_datetime: str,
end_datetime: str,
collection_id: str,
Expand All @@ -88,7 +91,7 @@ def create_item(
"""Create an ImageryItem (STAC) to be linked to a Collection.

Args:
file: asset tiff file
asset_path: path of the visual asset (TIFF)
start_datetime: start date of the survey
end_datetime: end date of the survey
collection_id: collection id to link to the Item
Expand All @@ -99,15 +102,13 @@ def create_item(
Returns:
a STAC Item wrapped in ImageryItem
"""
id_ = get_file_name_from_path(file)
item = create_base_item(asset_path, gdal_version)

if not gdalinfo_result:
gdalinfo_result = gdal_info(file)
gdalinfo_result = gdal_info(asset_path)

geometry, bbox = get_extents(gdalinfo_result)

item = ImageryItem(id_, file, gdal_version, utc_now)

if derived_from is not None:
for derived in derived_from:
derived_item_content = read(derived)
Expand All @@ -129,5 +130,45 @@ def create_item(
item.update_spatial(geometry, bbox)
item.add_collection(collection_id)

get_log().info("ImageryItem created", path=file)
get_log().info("ImageryItem created", path=asset_path)
return item


def create_base_item(asset_path: str, gdal_version: str) -> ImageryItem:
"""
Args:
asset_path: path of the visual asset (TIFF)
gdal_version: GDAL version string

Returns:
An ImageryItem with basic information.
"""
id_ = get_file_name_from_path(asset_path)
file_content = fs.read(asset_path)
file_modified_datetime = format_rfc_3339_datetime_string(modified(asset_path))

stac_asset = STACAsset(
**{
"href": os.path.join(".", os.path.basename(asset_path)),
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
)

now_string = format_rfc_3339_datetime_string(utc_now())

if (topo_imagery_hash := os.environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
commit_url = "GIT_HASH not specified"

stac_processing = STACProcessing(
**{
"processing:datetime": now_string,
"processing:software": STACProcessingSoftware(**{"gdal": gdal_version, "linz/topo-imagery": commit_url}),
"processing:version": os.environ.get("GIT_VERSION", "GIT_VERSION not specified"),
}
)

return ImageryItem(id_, now_string, stac_asset, stac_processing)
55 changes: 21 additions & 34 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,40 @@
import os
from collections.abc import Callable
from datetime import datetime
from os import environ
from typing import Any
from typing import Any, TypedDict

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.files import fs
from scripts.files.fs import modified
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.media_type import StacMediaType
from scripts.stac.util.stac_extensions import StacExtensions

STACAsset = TypedDict("STACAsset", {"href": str, "file:checksum": str, "created": str, "updated": str})

STACProcessingSoftware = TypedDict("STACProcessingSoftware", {"gdal": str, "linz/topo-imagery": str})
"""STAC Processing extension LINZ specific fields"""

STACProcessing = TypedDict(
"STACProcessing",
{
"processing:datetime": str,
"processing:software": STACProcessingSoftware,
"processing:version": str,
},
)
"""Some of the STAC processing extension fields are not declared in this TypedDict
(https://github.com/stac-extensions/processing?tab=readme-ov-file#fields)
"""


class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, gdal_version: str, now: Callable[[], datetime]) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))
now_string = format_rfc_3339_datetime_string(now())
if (topo_imagery_hash := environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
commit_url = "GIT_HASH not specified"

def __init__(self, id_: str, now_string: str, stac_asset: STACAsset, stac_processing: STACProcessing) -> None:
self.stac = {
"type": "Feature",
"stac_version": STAC_VERSION,
"id": id_,
"links": [Link(path=f"./{id_}.json", rel=Relation.SELF, media_type=StacMediaType.GEOJSON).stac],
"assets": {
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"assets": {"visual": {**stac_asset, "type": "image/tiff; application=geotiff; profile=cloud-optimized"}},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
"properties": {
"created": now_string,
"updated": now_string,
"processing:datetime": now_string,
"processing:software": {"gdal": gdal_version, "linz/topo-imagery": commit_url},
"processing:version": environ.get("GIT_VERSION", "GIT_VERSION not specified"),
},
"properties": {"created": now_string, "updated": now_string, **stac_processing},
}

def update_datetime(self, start_datetime: str, end_datetime: str) -> None:
Expand Down
48 changes: 27 additions & 21 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json
import os
import tempfile
from collections.abc import Callable
from datetime import datetime, timezone
from shutil import rmtree
from tempfile import mkdtemp

Expand All @@ -17,11 +15,12 @@
from scripts.files.fs import read
from scripts.files.fs_s3 import write
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.imagery.tests.generators import any_stac_processing, fixed_now_function
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime
from scripts.tests.datetimes_test import any_epoch_datetime, any_epoch_datetime_string


def test_title_description_id_created_on_init(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
Expand Down Expand Up @@ -103,21 +102,25 @@ def test_interval_updated_from_existing(fake_collection_metadata: CollectionMeta
assert collection.stac["extent"]["temporal"]["interval"] == [["2021-01-27T00:00:00Z", "2021-02-20T00:00:00Z"]]


def fixed_now_function(now: datetime) -> Callable[[], datetime]:
def func() -> datetime:
return now

return func


def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
now = any_epoch_datetime()
now_function = fixed_now_function(now)
collection = ImageryCollection(fake_collection_metadata, now_function)
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", now_function)
now_string = format_rfc_3339_datetime_string(now)
collection = ImageryCollection(fake_collection_metadata, fixed_now_function(now))
asset_created_datetime = any_epoch_datetime_string()
asset_updated_datetime = any_epoch_datetime_string()
item = ImageryItem(
"BR34_5000_0304",
now_string,
STACAsset(
**{
"href": "any href",
"file:checksum": "any checksum",
"created": asset_created_datetime,
"updated": asset_updated_datetime,
}
),
any_stac_processing(),
)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down Expand Up @@ -150,13 +153,16 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTes

for property_name in ["created", "updated"]:
with subtests.test(msg=f"collection {property_name}"):
assert collection.stac[property_name] == format_rfc_3339_datetime_string(now)
assert collection.stac[property_name] == now_string

with subtests.test(msg=f"item properties.{property_name}"):
assert item.stac["properties"][property_name] == format_rfc_3339_datetime_string(now)
assert item.stac["properties"][property_name] == now_string

with subtests.test(msg="item assets.visual.created"):
assert item.stac["assets"]["visual"]["created"] == asset_created_datetime

with subtests.test(msg=f"item assets.visual.{property_name}"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"
with subtests.test(msg="item assets.visual.updated"):
assert item.stac["assets"]["visual"]["updated"] == asset_updated_datetime


def test_write_collection(fake_collection_metadata: CollectionMetadata) -> None:
Expand Down
34 changes: 34 additions & 0 deletions scripts/stac/imagery/tests/generators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from datetime import datetime
from typing import Callable

from scripts.stac.imagery.item import STACAsset, STACProcessing, STACProcessingSoftware


def fixed_now_function(now: datetime) -> Callable[[], datetime]:
def func() -> datetime:
return now

return func


def any_stac_asset() -> STACAsset:
return STACAsset(
**{
"href": "any href",
"file:checksum": "any checksum",
"created": "any created datetime",
"updated": "any updated datetime",
}
)


def any_stac_processing() -> STACProcessing:
return STACProcessing(
**{
"processing:datetime": "any processing datetime",
"processing:software": STACProcessingSoftware(
**{"gdal": "any GDAL version", "linz/topo-imagery": "any topo imagery version"}
),
"processing:version": "any processing version",
}
)
Loading
Loading