Skip to content

Commit

Permalink
feat: title and description by arguments TDE-960 (#757)
Browse files Browse the repository at this point in the history
* feat: generate title & description from inputs

* test: autogenerate title & description tests

* fix: sort imports

* fix: mypy & pylint

* fix: don't allow/account for empty string

* test: test for no empty strings

j

* fix: fix broken test

* fix: lifecycle should never be none

* fix: add choices to parameters

* fix: name variable datetime only when is a datetime type

* fix: add example for Historical Imagery Survey Number

* fix: required field doesn't need default

* fix: add nullable_str to argparse type

remove now unecessary code

* fix: make subtypes enums

* fix: don't need to set to None

* fix: Improved way of managing event and ending with a .

* fix: remove unused param

* fix: tidy choices enum listing

* fix: gsd cannot be none

* fix: str='' is the same as str=None

* fix: typo

* fix: use enum dictionary to map regions

* fix: update tests

* fix: nit

* fix: list enums correctly

* fix: args copy paste error

* fix: rename subtype -> category

to match metadata name and basemaps

* fix: use kwargs to resolve too many args

* fix: add pseudo * to make params keyword args

* refactor: subtype -> category

* refactor: parameters and relocate

* fix: formatting

* Update scripts/stac/imagery/metadata_constants.py

Co-authored-by: Alice Fage <[email protected]>

* fix: merge conflict

---------

Co-authored-by: Alice Fage <[email protected]>
  • Loading branch information
MDavidson17 and amfage authored Dec 19, 2023
1 parent 4e84901 commit dd3e282
Show file tree
Hide file tree
Showing 7 changed files with 531 additions and 54 deletions.
70 changes: 64 additions & 6 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,67 @@
from boto3 import client
from linz_logger import get_log

from scripts.cli.cli_helper import coalesce_multi_single
from scripts.cli.cli_helper import coalesce_multi_single, valid_date
from scripts.files.fs_s3 import bucket_name_from_path, get_object_parallel_multithreading, list_json_in_uri
from scripts.logging.time_helper import time_in_ms
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.metadata_constants import (
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
)
from scripts.stac.imagery.provider import Provider, ProviderRole


# pylint: disable-msg=too-many-locals
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--uri", dest="uri", help="s3 path to items and collection.json write location", required=True)
parser.add_argument("--collection-id", dest="collection_id", help="Collection ID", required=True)
parser.add_argument("--title", dest="title", help="Collection title", required=True)
parser.add_argument("--description", dest="description", help="Collection description", required=True)
parser.add_argument(
"--category",
dest="category",
help="Dataset category description",
required=True,
choices=[type.value for type in ImageryCategories] + [type.value for type in ElevationCategories],
)
parser.add_argument(
"--region",
dest="region",
help="Region of Dataset",
required=True,
choices=HUMAN_READABLE_REGIONS.keys(),
)
parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True)
parser.add_argument(
"--location", dest="location", help="Optional Location of dataset, e.g.- Hutt City", type=str, required=False
)
parser.add_argument(
"--start-date",
dest="start_date",
help="Start date in format YYYY-MM-DD (Inclusive)",
type=valid_date,
required=True,
)
parser.add_argument(
"--end-date", dest="end_date", help="End date in format YYYY-MM-DD (Inclusive)", type=valid_date, required=True
)
parser.add_argument("--event", dest="event", help="Event name if applicable", type=str, required=False)
parser.add_argument(
"--historic-survey-number",
dest="historic_survey_number",
help="Historic Survey Number if Applicable. E.g.- SCN8844",
type=str,
required=False,
)
parser.add_argument(
"--lifecycle",
dest="lifecycle",
help="Designating dataset status",
required=True,
choices=["under development", "preview", "ongoing", "completed", "deprecated"],
)
parser.add_argument(
"--producer",
dest="producer",
Expand All @@ -44,9 +92,19 @@ def main() -> None:
for licensor_name in coalesce_multi_single(arguments.licensor_list, arguments.licensor):
providers.append({"name": licensor_name, "roles": [ProviderRole.LICENSOR]})

collection = ImageryCollection(
title=arguments.title, description=arguments.description, collection_id=arguments.collection_id, providers=providers
)
title_metadata: CollectionTitleMetadata = {
"category": arguments.category,
"region": arguments.region,
"gsd": arguments.gsd,
"start_datetime": arguments.start_date,
"end_datetime": arguments.end_date,
"lifecycle": arguments.lifecyle,
"location": arguments.location,
"event": arguments.event,
"historic_survey_number": arguments.historic_survey_number,
}

collection = ImageryCollection(title_metadata=title_metadata, collection_id=arguments.collection_id, providers=providers)

if not uri.startswith("s3://"):
msg = f"uri is not a s3 path: {uri}"
Expand Down
120 changes: 117 additions & 3 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@

from scripts.files.files_helper import ContentType
from scripts.files.fs import write
from scripts.stac.imagery.metadata_constants import (
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
SubtypeParameterError,
)
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.util.STAC_VERSION import STAC_VERSION

Expand All @@ -14,17 +21,22 @@ class ImageryCollection:
stac: Dict[str, Any]

def __init__(
self, title: str, description: str, collection_id: Optional[str] = None, providers: Optional[List[Provider]] = None
self,
title_metadata: CollectionTitleMetadata,
collection_id: Optional[str] = None,
providers: Optional[List[Provider]] = None,
) -> None:
if not collection_id:
collection_id = str(ulid.ULID())

self.title_metadata = title_metadata

self.stac = {
"type": "Collection",
"stac_version": STAC_VERSION,
"id": collection_id,
"title": title,
"description": description,
"title": self._title(),
"description": self._description(),
"license": "CC-BY-4.0",
"links": [{"rel": "self", "href": "./collection.json", "type": "application/json"}],
"providers": [],
Expand Down Expand Up @@ -179,3 +191,105 @@ def write_to(self, destination: str) -> None:
destination: path of the destination
"""
write(destination, json.dumps(self.stac, ensure_ascii=False).encode("utf-8"), content_type=ContentType.JSON.value)

def _title(self) -> str:
"""Generates the title for imagery and elevation datasets.
Satellite Imagery / Urban Aerial Photos / Rural Aerial Photos:
[Location / Region if no Location specified] [GSD] [?Event Name] [Data Sub-Type] ([Year(s)]) [?- Preview]
DEM / DSM:
[Location / Region if no Location specified] [?- Event Name] LiDAR [GSD] [Data Sub-Type] ([Year(s)]) [?- Preview]
If Historic Survey Number:
[Location / Region if no Location specified] [GSD] [Survey Number] ([Year(s)]) [?- Preview]
Returns:
Dataset Title
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year} - {self.title_metadata['end_datetime'].year}"

# determine dataset name
if location:
name = location
else:
name = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]

# determine if dataset is preview
if self.title_metadata.get("lifecycle") == "preview":
preview = "- preview"
else:
preview = None

if historic_survey_number:
return " ".join(f"{name} {self.title_metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split())

if self.title_metadata["category"] in [ImageryCategories.SATELLITE, ImageryCategories.URBAN, ImageryCategories.RURAL]:
return " ".join(
f"{name} {self.title_metadata['gsd']} {event or ''} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
if self.title_metadata["category"] in [ElevationCategories.DEM, ElevationCategories.DSM]:
return " ".join(
f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.title_metadata['gsd']} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
raise SubtypeParameterError(self.title_metadata["category"])

def _elevation_title_event(self, event: Optional[str]) -> Optional[str]:
if event:
return f"- {event}"
return None

def _description(self) -> str:
"""Generates the descriptions for imagery and elevation datasets.
Urban Aerial Photos / Rural Aerial Photos:
Orthophotography within the [Region] region captured in the [Year(s)] flying season.
DEM / DSM:
[Digital Surface Model / Digital Elevation Model] within the [region] [?- location] region in [year(s)].
Satellite Imagery:
Satellite imagery within the [Region] region captured in [Year(s)].
Historical Imagery:
Scanned aerial imagery within the [Region] region captured in [Year(s)].
Returns:
Dataset Description
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year} - {self.title_metadata['end_datetime'].year}"

# format location for metadata description
if location:
location = f"- {location}"

region = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]

if historic_survey_number:
desc = f"Scanned aerial imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] == ImageryCategories.SATELLITE:
desc = f"Satellite imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] in [ImageryCategories.URBAN, ImageryCategories.RURAL]:
desc = f"Orthophotography within the {region} region captured in the {date} flying season"
elif self.title_metadata["category"] == ElevationCategories.DEM:
desc = " ".join(f"Digital Elevation Model within the {region} {location or ''} region in {date}".split())
elif self.title_metadata["category"] == ElevationCategories.DSM:
desc = " ".join(f"Digital Surface Model within the {region} {location or ''} region in {date}".split())
else:
raise SubtypeParameterError(self.title_metadata["category"])

if event:
desc = desc + f", published as a record of the {event} event"

return desc + "."
69 changes: 69 additions & 0 deletions scripts/stac/imagery/metadata_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from datetime import datetime
from enum import Enum
from typing import Optional, TypedDict


class CollectionTitleMetadata(TypedDict):
"""
region: Region of Dataset
gsd: Dataset Ground Sample Distance
start_date: Dataset capture start date
end_date: Dataset capture end date
lifecycle: Dataset status
Optional:
location: Optional location of dataset, e.g. Hutt City
event: Optional details of capture event, e.g. Cyclone Gabrielle
historic_survey_number: Optional historic imagery survey number, e.g. SNC88445
"""

category: str
region: str
gsd: str
start_datetime: datetime
end_datetime: datetime
lifecycle: str
location: Optional[str]
event: Optional[str]
historic_survey_number: Optional[str]


class SubtypeParameterError(Exception):
def __init__(self, category: str) -> None:
self.message = f"Unrecognised/Unimplemented Subtype Parameter: {category}"


class ImageryCategories(str, Enum):
SATELLITE = "Satellite Imagery"
URBAN = "Urban Aerial Photos"
RURAL = "Rural Aerial Photos"
AERIAL = "Aerial Photos"
HISTORICAL = "Scanned Aerial Photos"


class ElevationCategories(str, Enum):
DEM = "DEM"
DSM = "DSM"


HUMAN_READABLE_REGIONS = {
"antarctica": "Antarctica",
"auckland": "Auckland",
"bay-of-plenty": "Bay of Plenty",
"canterbury": "Canterbury",
"gisborne": "Gisborne",
"global": "Global",
"hawkes-bay": "Hawke's Bay",
"manawatu-whanganui": "Manawatū-Whanganui",
"marlborough": "Marlborough",
"nelson": "Nelson",
"new-zealand": "New Zealand",
"northland": "Northland",
"otago": "Otago",
"pacific-islands": "Pacific Islands",
"southland": "Southland",
"taranaki": "Taranaki",
"tasman": "Tasman",
"waikato": "Waikato",
"wellington": "Wellington",
"west-coast": "West Coast",
}
Loading

0 comments on commit dd3e282

Please sign in to comment.