Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: TDE-452 create imagery stac item #100

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
491 changes: 425 additions & 66 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ python = "^3.8.10"
boto3 = "^1.24.12"
linz-logger = "^0.9.0"
certifi = "^2022.6.15"
py-multihash = "^2.0.1"

[tool.poetry.dev-dependencies]
black = "^22.3.0"
Expand All @@ -52,3 +53,4 @@ mypy-boto3-s3 = "^1.24.0"
pytest = "^7.1.2"
pytest-dependency = "^0.5.1"
moto = "^3.1.16"
pytest-mock = "^3.8.2"
42 changes: 39 additions & 3 deletions scripts/cli/cli_helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import argparse
import json
from datetime import datetime
from os import environ
from typing import List

from dateutil import parser, tz
from linz_logger import get_log


Expand All @@ -26,12 +28,46 @@ def parse_source() -> List[str]:
Returns:
List[str]: A list of paths.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--source", dest="source", nargs="+", required=True)
arguments = parser.parse_args()
parse = argparse.ArgumentParser()
parse.add_argument("--source", dest="source", nargs="+", required=True)
arguments = parse.parse_args()

return format_source(arguments.source)


def is_argo() -> bool:
return bool(environ.get("ARGO_TEMPLATE"))


def format_date(date: datetime) -> str:
MDavidson17 marked this conversation as resolved.
Show resolved Hide resolved
"""Parse the CLI argument '--date' and format it to UTC.
Args:
date: datetime
Returns:
str: date and time in UTC
"""
date_string_nz = f"{date.strftime('%Y-%m-%d')}T00:00:00.000"
datetime_utc = nzt_datetime_to_utc_datetime(date_string_nz)
return datetime_utc.strftime("%Y-%m-%dT%H:%M:%s") + "Z"


def nzt_datetime_to_utc_datetime(date: str) -> datetime:
utc_tz = tz.gettz("UTC")
nz_tz = tz.gettz("Pacific/Auckland")

try:
nz_time = parser.parse(date).replace(tzinfo=nz_tz)
except parser.ParserError as err:
raise Exception(f"Not a valid date: {err}") from err

utc_time: datetime = nz_time.astimezone(utc_tz)

return utc_time


def valid_date(s: str) -> datetime:
try:
return datetime.strptime(s, "%Y-%m-%d")
except ValueError as e:
msg = f"not a valid date: {s}"
raise argparse.ArgumentTypeError(msg) from e
54 changes: 54 additions & 0 deletions scripts/create_stac_items.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse
import json
import os
from typing import List

from linz_logger import get_log

from scripts.cli.cli_helper import format_date, format_source, valid_date
from scripts.files.files_helper import get_file_name_from_path, is_tiff
from scripts.files.fs import write
from scripts.logging.time_helper import time_in_ms
from scripts.stac.imagery_stac import create_imagery_stac_item


def create_imagery_items(files: List[str], start_datetime: str, end_datetime: str) -> None:
start_time = time_in_ms()
get_log().info("create_stac_items_start", source=files)

for path in files:
if not is_tiff(path):
get_log().trace("create_stac_skipped_file_not_tiff", file=path)
continue

id_ = get_file_name_from_path(path)
stac = create_imagery_stac_item(id_, path, start_datetime, end_datetime)

tmp_file_path = os.path.join("/tmp/", f"{id_}.json")
write(tmp_file_path, json.dumps(stac).encode("utf-8"))

get_log().info("imagery_stac_item_created", source=path)

get_log().info("create_stac_items_complete", source=files, duration=time_in_ms() - start_time)


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--source", dest="source", nargs="+", required=True)
parser.add_argument(
"--start_datetime", dest="start_datetime", help="start datetime in format YYYY-MM-DD", type=valid_date, required=True
)
parser.add_argument(
"--end_datetime", dest="end_datetime", help="end datetime in format YYYY-MM-DD", type=valid_date, required=True
)
arguments = parser.parse_args()

files = format_source(arguments.source)
start_datetime = format_date(arguments.start_datetime)
end_datetime = format_date(arguments.end_datetime)

create_imagery_items(files, start_datetime, end_datetime)


if __name__ == "__main__":
main()
25 changes: 25 additions & 0 deletions scripts/gdal/gdalinfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import json
from typing import Any, Dict

from linz_logger import get_log

from scripts.gdal.gdal_helper import GDALExecutionException, run_gdal


def gdal_info(path: str) -> Dict[Any, Any]:
gdalinfo_command = ["gdalinfo", "-stats", "-json"]
try:
gdalinfo_process = run_gdal(gdalinfo_command, path)
gdalinfo_result = {}
try:
paulfouquet marked this conversation as resolved.
Show resolved Hide resolved
gdalinfo_result = json.loads(gdalinfo_process.stdout)
except json.JSONDecodeError as e:
get_log().error("load_gdalinfo_result_error", file=path, error=e)
raise e
if gdalinfo_process.stderr:
get_log().error("Gdalinfo_error", file=path, error=str(gdalinfo_process.stderr))
raise Exception(f"Gdalinfo Error {str(gdalinfo_process.stderr)}")
return gdalinfo_result
except GDALExecutionException as gee:
get_log().error("gdalinfo_failed", file=path, error=str(gee))
raise gee
33 changes: 33 additions & 0 deletions scripts/stac/imagery_stac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Any, Dict

from scripts.stac.util import checksum, geotiff
from scripts.stac.util.stac_extensions import StacExtensions

PYSTAC_VERSION = "1.0.0"


def create_imagery_stac_item(id_: str, path: str, start_datetime: str, end_datetime: str) -> Dict[str, Any]:
geometry, bbox = geotiff.get_extents(path)
return {
"type": "Feature",
"stac_version": PYSTAC_VERSION,
"id": id_,
"properties": {
"start_datetime": start_datetime,
"end_datetime": end_datetime,
"datetime": None,
},
"geometry": {"type": "Polygon", "coordinates": [geometry]},
"bbox": bbox,
"links": [
{"rel": "self", "href": f"./{id_}.json", "type": "application/json"},
],
"assets": {
"image": {
MDavidson17 marked this conversation as resolved.
Show resolved Hide resolved
"href": path,
"type": "image/tiff; application:geotiff; profile:cloud-optimized",
"file:checksum": checksum.multihash_as_hex(path),
}
},
"stac_extensions": [StacExtensions.file.value],
}
26 changes: 26 additions & 0 deletions scripts/stac/tests/stac_item_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from scripts.files.files_helper import get_file_name_from_path
from scripts.stac.imagery_stac import create_imagery_stac_item


def test_imagery_stac_item(mocker) -> None: # type: ignore
# mock functions that interact with files
geometry = [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]]
bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0]
checksum = "1220cdef68d62fb912110b810e62edc53de07f7a44fb2b310db700e9d9dd58baa6b4"
mocker.patch("scripts.stac.util.checksum.multihash_as_hex", return_value=checksum)
mocker.patch("scripts.stac.util.geotiff.get_extents", return_value=(geometry, bbox))

path = "./test/BR34_5000_0302.tiff"
id_ = get_file_name_from_path(path)
start_datetime = "2021-01-27 00:00:00Z"
end_datetime = "2021-01-27 00:00:00Z"

stac = create_imagery_stac_item(id_, path, start_datetime, end_datetime)
# checks
assert stac["id"] == id_
assert stac["properties"]["start_datetime"] == start_datetime
assert stac["properties"]["end_datetime"] == end_datetime
assert stac["properties"]["datetime"] is None
assert stac["geometry"]["coordinates"] == [geometry]
assert stac["bbox"] == bbox
assert stac["assets"]["image"]["file:checksum"] == checksum
17 changes: 17 additions & 0 deletions scripts/stac/util/checksum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import hashlib
import io

import multihash

from scripts.files import fs

CHUNK_SIZE = 1024 * 1024 # 1MB


def multihash_as_hex(path: str) -> str:
file_hash = hashlib.sha256()
file = io.BytesIO(fs.read(path))
while chunk := file.read(CHUNK_SIZE):
file_hash.update(chunk)
result: str = multihash.to_hex_string(multihash.encode(file_hash.digest(), "sha2-256"))
return result
16 changes: 16 additions & 0 deletions scripts/stac/util/geotiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from typing import List, Tuple

from scripts.gdal.gdalinfo import gdal_info


def get_extents(path: str) -> Tuple[List[List[float]], List[float]]:
corner_coordinates = gdal_info(path)["cornerCoordinates"]

upper_left = [corner_coordinates["upperLeft"][0], corner_coordinates["upperLeft"][1]]
upper_right = [corner_coordinates["upperRight"][0], corner_coordinates["upperRight"][1]]
lower_left = [corner_coordinates["lowerLeft"][0], corner_coordinates["lowerLeft"][1]]
lower_right = [corner_coordinates["lowerRight"][0], corner_coordinates["lowerRight"][1]]

geometry = [upper_left, upper_right, lower_right, lower_left]
bbox = [upper_left[0], upper_left[1], lower_right[0], lower_right[1]]
return geometry, bbox
5 changes: 5 additions & 0 deletions scripts/stac/util/stac_extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from enum import Enum


class StacExtensions(str, Enum):
file = "https://stac-extensions.github.io/file/v2.0.0/schema.json"