Skip to content

Commit

Permalink
feat!: Auto-fill GSD unit TDE-1211 (#1089)
Browse files Browse the repository at this point in the history
### Motivation

This is always in metres, so we shouldn't ask the end user to fill it
in.

### Modifications

Change GSD parameters to be decimal numbers, rather than strings

### Verification

`pytest`

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Sep 26, 2024
1 parent 40adcd6 commit df257c9
Show file tree
Hide file tree
Showing 15 changed files with 72 additions and 56 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,23 @@ jobs:
- name: End to end test - Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true
cmp --silent "${{ runner.temp }}/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff
- name: End to end test - Elevation
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30m --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30 --create-footprints=true
cmp --silent "${{ runner.temp }}/BK39_10000_0102.tiff" ./scripts/tests/data/output/BK39_10000_0102.tiff
cmp --silent "${{ runner.temp }}/BK39_10000_0101.tiff" ./scripts/tests/data/output/BK39_10000_0101.tiff
- name: End to end test - Historical Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60m --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60 --create-footprints=true
cmp --silent "${{ runner.temp }}/BQ31_5000_0608.tiff" ./scripts/tests/data/output/BQ31_5000_0608.tiff
- name: End to end test - Cutline (Aerial Imagery)
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10m --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10 --create-footprints=true
cmp --silent "${{ runner.temp }}/cutline/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829_cut.tiff
- name: End to end test - Thumbnails (Topo50/Topo250)
Expand All @@ -58,7 +58,7 @@ jobs:
- name: End to end test - Restandardise Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true
cmp --silent "${{ runner.temp }}/restandardise/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff
- name: End to end test - Translate Ascii Files (Elevation)
Expand All @@ -68,7 +68,7 @@ jobs:
- name: End to end test - Remove empty files
run: |
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60m --create-footprints=true
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60 --create-footprints=true
empty_target_directory="$(find "${{ runner.temp }}/tmp-empty" -maxdepth 0 -type d -empty)"
[[ -n "$empty_target_directory" ]]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Run `docker run topo-imagery python standardise_validate.py --help` to get the l
- Example of local execution. This example uses the test data available on this repo and create the output will be created in a `~/tmp/` on the local machine (volume share with `Docker`):

```bash
docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10m --create-footprints=true
docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10 --create-footprints=true
```

To use an AWS test dataset (input located in an AWS S3 bucket), log into the AWS account and add the following arguments to the `docker run` command:
Expand Down
14 changes: 14 additions & 0 deletions scripts/cli/cli_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
import json
import warnings
from datetime import datetime
from decimal import Decimal
from os import environ
from typing import NamedTuple

Expand Down Expand Up @@ -120,3 +122,15 @@ def coalesce_multi_single(multi_items: str | None, single_item: str | None) -> l
elif single_item:
output.append(single_item)
return output


def str_to_gsd(value: str) -> Decimal:
number_value = value.removesuffix("m")
if number_value != value:
warnings.warn(
"Specifying GSD with a trailing 'm' character will not be supported in future versions. "
"Please use a plain decimal number like '0.3' instead.",
DeprecationWarning,
)

return Decimal(number_value)
4 changes: 2 additions & 2 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from boto3 import client
from linz_logger import get_log

from scripts.cli.cli_helper import coalesce_multi_single, valid_date
from scripts.cli.cli_helper import coalesce_multi_single, str_to_gsd, valid_date
from scripts.datetimes import utc_now
from scripts.files.files_helper import SUFFIX_FOOTPRINT, SUFFIX_JSON
from scripts.files.fs_s3 import bucket_name_from_path, get_object_parallel_multithreading, list_files_in_uri
Expand Down Expand Up @@ -41,7 +41,7 @@ def parse_args(args: List[str] | None) -> Namespace:
required=True,
choices=HUMAN_READABLE_REGIONS.keys(),
)
parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True)
parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset, for example 0.3", type=str_to_gsd, required=True)
parser.add_argument(
"--geographic-description",
dest="geographic_description",
Expand Down
25 changes: 5 additions & 20 deletions scripts/stac/imagery/capture_area.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from decimal import Decimal
from typing import Any, Sequence

from linz_logger import get_log
Expand All @@ -8,29 +9,13 @@

from scripts.logging.time_helper import time_in_ms

DECIMAL_DEGREES_1M = 0.00001
DECIMAL_DEGREES_1M = Decimal("0.00001")
"""
Degree precision of ~1m (decimal places 5, https://en.wikipedia.org/wiki/Decimal_degrees)
"""


def gsd_to_float(gsd: str) -> float:
"""Transform the gsd from its string/human readable format to a float.
Args:
gsd: gsd as it's passed to the cli
Returns:
gsd as a float
Examples:
>>> gsd_to_float("0.2m")
0.2
"""
return float(gsd.replace("m", ""))


def get_buffer_distance(gsd: float) -> float:
def get_buffer_distance(gsd: Decimal) -> float:
"""The `gsd` (in meters) is multiplied by 2 and then by the 1m degree of precision.
A `buffer factor` of 2 was decided on after experimenting with different outputs,
details of this can be found in TDE-1049.
Expand All @@ -41,7 +26,7 @@ def get_buffer_distance(gsd: float) -> float:
Returns:
buffer distance as a float
"""
return gsd * 2 * DECIMAL_DEGREES_1M
return float(gsd * 2 * DECIMAL_DEGREES_1M)


def to_feature(geometry: BaseGeometry) -> dict[str, Any]:
Expand Down Expand Up @@ -84,7 +69,7 @@ def merge_polygons(polygons: Sequence[BaseGeometry], buffer_distance: float) ->
return oriented_union_simplified


def generate_capture_area(polygons: Sequence[BaseGeometry], gsd: float) -> dict[str, Any]:
def generate_capture_area(polygons: Sequence[BaseGeometry], gsd: Decimal) -> dict[str, Any]:
"""Generate the capture area from a list of BaseGeometries.
Providing the `gsd` allows to round the geometry as we've seen some tiffs
geometry being slightly off, sometimes due to rounding issue in their
Expand Down
19 changes: 14 additions & 5 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scripts.files.files_helper import ContentType
from scripts.files.fs import read, write
from scripts.json_codec import dict_to_json_bytes
from scripts.stac.imagery.capture_area import generate_capture_area, gsd_to_float
from scripts.stac.imagery.capture_area import generate_capture_area
from scripts.stac.imagery.metadata_constants import (
DATA_CATEGORIES,
DEM,
Expand Down Expand Up @@ -103,7 +103,7 @@ def add_capture_area(self, polygons: list[BaseGeometry], target: str, artifact_t
"""

# The GSD is measured in meters (e.g., `0.3m`)
capture_area_document = generate_capture_area(polygons, gsd_to_float(self.metadata["gsd"]))
capture_area_document = generate_capture_area(polygons, self.metadata["gsd"])
capture_area_content: bytes = dict_to_json_bytes(capture_area_document)
file_checksum = checksum.multihash_as_hex(capture_area_content)
capture_area = {
Expand Down Expand Up @@ -303,7 +303,13 @@ def _title(self) -> str:
raise MissingMetadataError("historic_survey_number")
return " ".join(
value
for value in [imagery_name, self.metadata["gsd"], historic_survey_number, f"({date})", lifecycle_tag]
for value in [
imagery_name,
f"{self.metadata['gsd']}{GSD_UNIT}",
historic_survey_number,
f"({date})",
lifecycle_tag,
]
if value is not None
)

Expand All @@ -316,7 +322,7 @@ def _title(self) -> str:
value
for value in [
imagery_name,
self.metadata["gsd"],
f"{self.metadata['gsd']}{GSD_UNIT}",
DATA_CATEGORIES[self.metadata["category"]],
f"({date})",
lifecycle_tag,
Expand All @@ -330,7 +336,7 @@ def _title(self) -> str:
region,
elevation_description,
"LiDAR",
self.metadata["gsd"],
f"{self.metadata['gsd']}{GSD_UNIT}",
DATA_CATEGORIES[self.metadata["category"]],
f"({date})",
lifecycle_tag,
Expand Down Expand Up @@ -376,3 +382,6 @@ def _description(self) -> str:
desc = desc + f", published as a record of the {event} event"

return desc + "."


GSD_UNIT = "m"
3 changes: 2 additions & 1 deletion scripts/stac/imagery/metadata_constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
from decimal import Decimal
from typing import TypedDict


Expand All @@ -19,7 +20,7 @@ class CollectionMetadata(TypedDict):

category: str
region: str
gsd: str
gsd: Decimal
start_datetime: datetime
end_datetime: datetime
lifecycle: str
Expand Down
13 changes: 7 additions & 6 deletions scripts/stac/imagery/tests/capture_area_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from decimal import Decimal
from typing import cast

from shapely import get_exterior_ring, is_ccw
Expand Down Expand Up @@ -84,9 +85,9 @@ def test_generate_capture_area_rounded() -> None:

# Using GSD of 0.2m
# Generate the capture area of the polygons that don't have a gap between each other
capture_area_expected = generate_capture_area(polygons_no_gap, 0.2)
capture_area_expected = generate_capture_area(polygons_no_gap, Decimal("0.2"))
# Generate the capture area of the polygons that have a gap between each other
capture_area_result = generate_capture_area(polygon_with_gap, 0.2)
capture_area_result = generate_capture_area(polygon_with_gap, Decimal("0.2"))
print(f"Polygon no gap A: {to_feature(polygons_no_gap[0])}")
print(f"Polygon no gap B: {to_feature(polygons_no_gap[1])}")
print(f"Polygon with gap A: {to_feature(polygon_with_gap[0])}")
Expand All @@ -110,9 +111,9 @@ def test_generate_capture_area_not_rounded() -> None:

# Using GSD of 0.2m
# Generate the capture area of the polygons that don't have a gap between each other
capture_area_expected = generate_capture_area(polygons_no_gap, 0.2)
capture_area_expected = generate_capture_area(polygons_no_gap, Decimal("0.2"))
# Generate the capture area of the polygons that have a gap between each other
capture_area_result = generate_capture_area(polygon_with_gap, 0.2)
capture_area_result = generate_capture_area(polygon_with_gap, Decimal("0.2"))

print(f"Polygon no gap A: {to_feature(polygons_no_gap[0])}")
print(f"Polygon no gap B: {to_feature(polygons_no_gap[1])}")
Expand Down Expand Up @@ -146,7 +147,7 @@ def test_capture_area_orientation_polygon() -> None:
}
)
)
capture_area = generate_capture_area(polygons, 0.05)
capture_area = generate_capture_area(polygons, Decimal("0.05"))
assert is_ccw(get_exterior_ring(shape(capture_area["geometry"])))


Expand Down Expand Up @@ -185,6 +186,6 @@ def test_capture_area_orientation_multipolygon() -> None:
}
)
)
capture_area = generate_capture_area(polygons, 0.05)
capture_area = generate_capture_area(polygons, Decimal("0.05"))
mp_geom = cast(MultiPolygon, shape(capture_area["geometry"]))
assert is_ccw(get_exterior_ring(mp_geom.geoms[0]))
3 changes: 2 additions & 1 deletion scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tempfile
from collections.abc import Callable, Generator
from datetime import datetime, timezone
from decimal import Decimal
from shutil import rmtree
from tempfile import mkdtemp

Expand Down Expand Up @@ -31,7 +32,7 @@ def setup() -> Generator[CollectionMetadata, None, None]:
metadata: CollectionMetadata = {
"category": "urban-aerial-photos",
"region": "auckland",
"gsd": "0.3m",
"gsd": Decimal("0.3"),
"start_datetime": datetime(2022, 2, 2),
"end_datetime": datetime(2022, 2, 2),
"lifecycle": "completed",
Expand Down
5 changes: 3 additions & 2 deletions scripts/stac/imagery/tests/generate_description_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections.abc import Generator
from datetime import datetime
from decimal import Decimal

import pytest

Expand All @@ -14,7 +15,7 @@ def setup() -> Generator[tuple[CollectionMetadata, CollectionMetadata], None, No
metadata_auck: CollectionMetadata = {
"category": "rural-aerial-photos",
"region": "auckland",
"gsd": "0.3m",
"gsd": Decimal("0.3"),
"start_datetime": datetime(2023, 1, 1),
"end_datetime": datetime(2023, 2, 2),
"lifecycle": "completed",
Expand All @@ -25,7 +26,7 @@ def setup() -> Generator[tuple[CollectionMetadata, CollectionMetadata], None, No
metadata_hb: CollectionMetadata = {
"category": "rural-aerial-photos",
"region": "hawkes-bay",
"gsd": "0.3m",
"gsd": Decimal("0.3"),
"start_datetime": datetime(2023, 1, 1),
"end_datetime": datetime(2023, 2, 2),
"lifecycle": "completed",
Expand Down
5 changes: 3 additions & 2 deletions scripts/stac/imagery/tests/generate_title_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections.abc import Generator
from datetime import datetime
from decimal import Decimal

import pytest

Expand All @@ -14,7 +15,7 @@ def setup() -> Generator[tuple[CollectionMetadata, CollectionMetadata], None, No
metadata_auck: CollectionMetadata = {
"category": "rural-aerial-photos",
"region": "auckland",
"gsd": "0.3m",
"gsd": Decimal("0.3"),
"start_datetime": datetime(2023, 1, 1),
"end_datetime": datetime(2023, 2, 2),
"lifecycle": "completed",
Expand All @@ -25,7 +26,7 @@ def setup() -> Generator[tuple[CollectionMetadata, CollectionMetadata], None, No
metadata_hb: CollectionMetadata = {
"category": "rural-aerial-photos",
"region": "hawkes-bay",
"gsd": "0.3m",
"gsd": Decimal("0.3"),
"start_datetime": datetime(2023, 1, 1),
"end_datetime": datetime(2023, 2, 2),
"lifecycle": "completed",
Expand Down
3 changes: 2 additions & 1 deletion scripts/stac/imagery/tests/item_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
from decimal import Decimal

from pytest_mock import MockerFixture
from pytest_subtests import SubTests
Expand Down Expand Up @@ -64,7 +65,7 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No
metadata: CollectionMetadata = {
"category": "urban-aerial-photos",
"region": "auckland",
"gsd": "0.3m",
"gsd": Decimal("0.3"),
"start_datetime": datetime(2022, 2, 2),
"end_datetime": datetime(2022, 2, 2),
"lifecycle": "completed",
Expand Down
4 changes: 2 additions & 2 deletions scripts/standardise_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from linz_logger import get_log

from scripts.cli.cli_helper import InputParameterError, is_argo, load_input_files, valid_date
from scripts.cli.cli_helper import InputParameterError, is_argo, load_input_files, str_to_gsd, valid_date
from scripts.datetimes import format_rfc_3339_nz_midnight_datetime_string
from scripts.files.file_tiff import FileTiff
from scripts.files.files_helper import SUFFIX_JSON, ContentType
Expand Down Expand Up @@ -36,7 +36,7 @@ def parse_args() -> argparse.Namespace:
required=True,
help="The target EPSG code. If different to source the imagery will be reprojected",
)
parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True)
parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset, for example 0.3", type=str_to_gsd, required=True)
parser.add_argument(
"--create-footprints",
dest="create_footprints",
Expand Down
Loading

0 comments on commit df257c9

Please sign in to comment.