Skip to content

Commit

Permalink
feat: Add topo-imagery version information to STAC TDE-1265 (#1080)
Browse files Browse the repository at this point in the history
### Motivation

As a data maintainer, geospatial data engineer or data consumer, I would
like to reference the technical configuration used to create the TIFFs
in our ODR datasets in order to be able to recreate the TIFFs or carry
out troubleshooting.

### Modifications

Add `processing:software`, `processing:version`, and
`processing:datetime` item properties.

### Verification

`pytest`

---------

Co-authored-by: Alice Fage <[email protected]>
  • Loading branch information
l0b0 and amfage authored Sep 26, 2024
1 parent df257c9 commit 2039606
Show file tree
Hide file tree
Showing 12 changed files with 92 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Build containers
run: |
docker build . --tag topo-imagery --label "github_run_id=${GITHUB_RUN_ID}"
docker build --label "github_run_id=${GITHUB_RUN_ID}" --tag topo-imagery .
- name: End to end test - Aerial Imagery
run: |
Expand Down
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ RUN /root/.local/bin/poetry bundle venv --no-ansi --no-interaction --only=main -

FROM ghcr.io/osgeo/gdal:ubuntu-small-3.9.0@sha256:d1a38af532e5d9e3991c4a6bddc2f2cb52644dc30a4eb8242101e8e23c3f83f6

ARG GIT_HASH
ENV GIT_HASH=$GIT_HASH
ARG GIT_VERSION
ENV GIT_VERSION=$GIT_VERSION

ENV TZ=Etc/UTC

# Copy just the bundle from the first stage
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The scripts have been implemented to be run inside the Docker container only. Th
- Build the `Docker` image:

```bash
docker build . -t topo-imagery
docker build --tag=topo-imagery .
```

- Running `standardising_validate.py` script
Expand Down
4 changes: 3 additions & 1 deletion scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def create_item(
start_datetime: str,
end_datetime: str,
collection_id: str,
gdal_version: str,
gdalinfo_result: GdalInfo | None = None,
derived_from: list[str] | None = None,
) -> ImageryItem:
Expand All @@ -28,6 +29,7 @@ def create_item(
start_datetime: start date of the survey
end_datetime: end date of the survey
collection_id: collection id to link to the Item
gdal_version: GDAL version
gdalinfo_result: result of the gdalinfo command. Defaults to None.
derived_from: list of STAC Items from where this Item is derived. Defaults to None.
Expand All @@ -41,7 +43,7 @@ def create_item(

geometry, bbox = get_extents(gdalinfo_result)

item = ImageryItem(id_, file, utc_now)
item = ImageryItem(id_, file, gdal_version, utc_now)

if derived_from is not None:
for derived in derived_from:
Expand Down
18 changes: 15 additions & 3 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from collections.abc import Callable
from datetime import datetime
from os import environ
from typing import Any

from scripts.datetimes import format_rfc_3339_datetime_string
Expand All @@ -16,10 +17,15 @@
class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None:
def __init__(self, id_: str, file: str, gdal_version: str, now: Callable[[], datetime]) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))
now_string = format_rfc_3339_datetime_string(now())
if (topo_imagery_hash := environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
commit_url = "GIT_HASH not specified"

self.stac = {
"type": "Feature",
"stac_version": STAC_VERSION,
Expand All @@ -34,8 +40,14 @@ def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None:
"updated": file_modified_datetime,
}
},
"stac_extensions": [StacExtensions.file.value],
"properties": {"created": now_string, "updated": now_string},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
"properties": {
"created": now_string,
"updated": now_string,
"processing:datetime": now_string,
"processing:software": {"gdal": gdal_version, "linz/topo-imagery": commit_url},
"processing:version": environ.get("GIT_VERSION", "GIT_VERSION not specified"),
},
}

def update_datetime(self, start_datetime: str, end_datetime: str) -> None:
Expand Down
2 changes: 1 addition & 1 deletion scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None:
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, now_function)
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", now_function)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down
11 changes: 10 additions & 1 deletion scripts/stac/imagery/tests/create_stac_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,15 @@ def test_create_item_with_derived_from(tmp_path: Path) -> None:
GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}
)

item = create_item("./scripts/tests/data/empty.tiff", "", "", "abc123", fake_gdal_info, [derived_from_path.as_posix()])
item = create_item(
"./scripts/tests/data/empty.tiff",
"",
"",
"abc123",
"any GDAL version",
fake_gdal_info,
[derived_from_path.as_posix()],
)

assert {
"href": derived_from_path.as_posix(),
Expand Down Expand Up @@ -52,6 +60,7 @@ def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None:
"",
"",
"abc123",
"any GDAL version",
fake_gdal_info,
[derived_from_path_a.as_posix(), derived_from_path_b.as_posix()],
)
Expand Down
46 changes: 43 additions & 3 deletions scripts/stac/imagery/tests/item_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import datetime
from datetime import datetime, timezone
from decimal import Decimal
from os import environ
from unittest.mock import patch

from pytest_mock import MockerFixture
from pytest_subtests import SubTests
Expand All @@ -8,6 +10,7 @@
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime


Expand All @@ -25,7 +28,14 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"

item = ImageryItem(id_, path, any_epoch_datetime)
def fake_now() -> datetime:
return datetime(1979, 1, 1, tzinfo=timezone.utc)

git_hash = "any Git hash"
git_version = "any Git version string"
gdal_version_string = "any GDAL version string"
with patch.dict(environ, {"GIT_HASH": git_hash, "GIT_VERSION": git_version}):
item = ImageryItem(id_, path, gdal_version_string, fake_now)
item.update_spatial(geometry, bbox)
item.update_datetime(start_datetime, end_datetime)
# checks
Expand All @@ -41,6 +51,26 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
with subtests.test():
assert item.stac["properties"]["datetime"] is None

with subtests.test():
assert (
item.stac["properties"]["created"]
== item.stac["properties"]["updated"]
== item.stac["properties"]["processing:datetime"]
== "1979-01-01T00:00:00Z"
)

with subtests.test():
assert item.stac["properties"]["processing:version"] == git_version

with subtests.test():
assert item.stac["properties"]["processing:software"] == {
"gdal": gdal_version_string,
"linz/topo-imagery": f"https://github.com/linz/topo-imagery/commit/{git_hash}",
}

with subtests.test():
assert item.stac["stac_extensions"] == [StacExtensions.file.value, StacExtensions.processing.value]

with subtests.test():
assert item.stac["geometry"]["coordinates"] == geometry["coordinates"]

Expand Down Expand Up @@ -79,7 +109,7 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No
path = "./scripts/tests/data/empty.tiff"
id_ = get_file_name_from_path(path)
mocker.patch("scripts.files.fs.read", return_value=b"")
item = ImageryItem(id_, path, any_epoch_datetime)
item = ImageryItem(id_, path, "any GDAL version", any_epoch_datetime)

item.add_collection(collection.stac["id"])

Expand All @@ -91,3 +121,13 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No

with subtests.test():
assert {"rel": "parent", "href": "./collection.json", "type": "application/json"} in item.stac["links"]


def test_should_set_fallback_version_strings(subtests: SubTests) -> None:
item = ImageryItem("any ID", "./scripts/tests/data/empty.tiff", "any GDAL version", any_epoch_datetime)

with subtests.test():
assert item.stac["properties"]["processing:software"]["linz/topo-imagery"] == "GIT_HASH not specified"

with subtests.test():
assert item.stac["properties"]["processing:version"] == "GIT_VERSION not specified"
1 change: 1 addition & 0 deletions scripts/stac/util/stac_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@

class StacExtensions(str, Enum):
file = "https://stac-extensions.github.io/file/v2.0.0/schema.json"
processing = "https://stac-extensions.github.io/processing/v1.2.0/schema.json"
6 changes: 5 additions & 1 deletion scripts/standardise_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from scripts.files.file_tiff import FileTiff
from scripts.files.files_helper import SUFFIX_JSON, ContentType
from scripts.files.fs import exists, write
from scripts.gdal.gdal_helper import get_srs, get_vfs_path
from scripts.gdal.gdal_helper import get_gdal_version, get_srs, get_vfs_path
from scripts.json_codec import dict_to_json_bytes
from scripts.stac.imagery.create_stac import create_item
from scripts.standardising import run_standardising
Expand Down Expand Up @@ -107,6 +107,8 @@ def main() -> None:
if is_argo():
concurrency = 4

gdal_version = get_gdal_version()

tiff_files = run_standardising(
tile_files,
arguments.preset,
Expand All @@ -116,6 +118,7 @@ def main() -> None:
arguments.target_epsg,
arguments.gsd,
arguments.create_footprints,
gdal_version,
arguments.target,
)

Expand Down Expand Up @@ -143,6 +146,7 @@ def main() -> None:
start_datetime,
end_datetime,
arguments.collection_id,
gdal_version,
file.get_gdalinfo(),
file.get_derived_from_paths(),
)
Expand Down
5 changes: 3 additions & 2 deletions scripts/standardising.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from scripts.files.files_helper import SUFFIX_FOOTPRINT, ContentType, is_tiff
from scripts.files.fs import exists, read, write, write_all, write_sidecars
from scripts.gdal.gdal_bands import get_gdal_band_offset
from scripts.gdal.gdal_helper import EpsgNumber, gdal_info, get_gdal_version, run_gdal
from scripts.gdal.gdal_helper import EpsgNumber, gdal_info, run_gdal
from scripts.gdal.gdal_preset import (
get_alpha_command,
get_build_vrt_command,
Expand All @@ -35,6 +35,7 @@ def run_standardising(
target_epsg: str,
gsd: Decimal,
create_footprints: bool,
gdal_version: str,
target_output: str = "/tmp/",
) -> list[FileTiff]:
"""Run `standardising()` in parallel (`concurrency`).
Expand All @@ -47,6 +48,7 @@ def run_standardising(
source_epsg: EPSG code of the source file
target_epsg: EPSG code of reprojection
gsd: Ground Sample Distance in meters
gdal_version: version of GDAL used for standardising
target_output: output directory path. Defaults to "/tmp/"
Returns:
Expand All @@ -55,7 +57,6 @@ def run_standardising(
# pylint: disable-msg=too-many-arguments
start_time = time_in_ms()

gdal_version = get_gdal_version()
get_log().info("standardising_start", gdalVersion=gdal_version, fileCount=len(todo))

with Pool(concurrency) as p:
Expand Down
5 changes: 4 additions & 1 deletion scripts/tests/collection_from_items_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from collections.abc import Generator
from datetime import datetime
from decimal import Decimal
from os import environ
from unittest.mock import patch

import pytest
from boto3 import client, resource
Expand All @@ -21,7 +23,8 @@
@pytest.fixture(name="item", autouse=True)
def setup() -> Generator[ImageryItem, None, None]:
# Create mocked STAC Item
item = ImageryItem("123", "./scripts/tests/data/empty.tiff", utc_now)
with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}):
item = ImageryItem("123", "./scripts/tests/data/empty.tiff", "any GDAL version", utc_now)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down

0 comments on commit 2039606

Please sign in to comment.