Skip to content

Commit

Permalink
feat!: add new fields to the Collection TDE-985 (#765)
Browse files Browse the repository at this point in the history
* feat: add new fields from LINZ STAC extension to the Collection TDE-985

* fix: tests fails

* fix: remove linz STAC extension from Collection

* wip

* feat: add stac metadata TDE-985

* build: revert pylint change

* tests: fix the tests

* fix: --geographic_desc should be --geographic_description

* fix: argument help message should not contain unecessary -

* tests: event_name should not be in all the tests

* fix: formatting

* fix: should not break if --category is passed as human readable values

* refactor: reduce number of dict lookup

* fix: location should be geographic_description

* feat: change --location to --geographic-description

* fix: formatting

* fix: unused 'type: ignore' comment

* build: fix mypy ignore comment

* refactor: check historical imagery based on its category rather than survey number
  • Loading branch information
paulfouquet authored Jan 26, 2024
1 parent 16b5b04 commit 78016f6
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 156 deletions.
40 changes: 26 additions & 14 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,7 @@
from scripts.files.fs_s3 import bucket_name_from_path, get_object_parallel_multithreading, list_json_in_uri
from scripts.logging.time_helper import time_in_ms
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.metadata_constants import (
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
)
from scripts.stac.imagery.metadata_constants import DATA_CATEGORIES, HUMAN_READABLE_REGIONS, CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole


Expand All @@ -29,7 +24,7 @@ def main() -> None:
dest="category",
help="Dataset category description",
required=True,
choices=[type.value for type in ImageryCategories] + [type.value for type in ElevationCategories],
choices=list(DATA_CATEGORIES.keys()) + list(DATA_CATEGORIES.values()),
)
parser.add_argument(
"--region",
Expand All @@ -40,7 +35,11 @@ def main() -> None:
)
parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True)
parser.add_argument(
"--location", dest="location", help="Optional Location of dataset, e.g.- Hutt City", type=str, required=False
"--geographic-description",
dest="geographic_description",
help="Optional Geographic Description of dataset, e.g. Hutt City",
type=str,
required=False,
)
parser.add_argument(
"--start-date",
Expand All @@ -56,7 +55,7 @@ def main() -> None:
parser.add_argument(
"--historic-survey-number",
dest="historic_survey_number",
help="Historic Survey Number if Applicable. E.g.- SCN8844",
help="Historic Survey Number if Applicable. E.g. SCN8844",
type=str,
required=False,
)
Expand Down Expand Up @@ -92,19 +91,32 @@ def main() -> None:
for licensor_name in coalesce_multi_single(arguments.licensor_list, arguments.licensor):
providers.append({"name": licensor_name, "roles": [ProviderRole.LICENSOR]})

title_metadata: CollectionTitleMetadata = {
"category": arguments.category,
# category can also be passed as human readable name (e.g. "Aerial Photos")
# Get the corresponding identifier to simplify the process
category = arguments.category
if not DATA_CATEGORIES.get(category):
for key, value in DATA_CATEGORIES.items():
if value == category:
category = key
break

collection_metadata: CollectionMetadata = {
"category": category,
"region": arguments.region,
"gsd": arguments.gsd,
"start_datetime": arguments.start_date,
"end_datetime": arguments.end_date,
"lifecycle": arguments.lifecycle,
"location": arguments.location,
"event": arguments.event,
"geographic_description": arguments.geographic_description,
"event_name": arguments.event,
"historic_survey_number": arguments.historic_survey_number,
}

collection = ImageryCollection(title_metadata=title_metadata, collection_id=arguments.collection_id, providers=providers)
collection = ImageryCollection(
metadata=collection_metadata,
collection_id=arguments.collection_id,
providers=providers,
)

if not uri.startswith("s3://"):
msg = f"uri is not a s3 path: {uri}"
Expand Down
117 changes: 73 additions & 44 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
from scripts.files.files_helper import ContentType
from scripts.files.fs import write
from scripts.stac.imagery.metadata_constants import (
DATA_CATEGORIES,
DEM,
DSM,
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
RURAL_AERIAL_PHOTOS,
SATELLITE_IMAGERY,
SCANNED_AERIAL_PHOTOS,
URBAN_AERIAL_PHOTOS,
CollectionMetadata,
MissingMetadataError,
SubtypeParameterError,
)
from scripts.stac.imagery.provider import Provider, ProviderRole
Expand All @@ -22,14 +28,14 @@ class ImageryCollection:

def __init__(
self,
title_metadata: CollectionTitleMetadata,
metadata: CollectionMetadata,
collection_id: Optional[str] = None,
providers: Optional[List[Provider]] = None,
) -> None:
if not collection_id:
collection_id = str(ulid.ULID())

self.title_metadata = title_metadata
self.metadata = metadata

self.stac = {
"type": "Collection",
Expand All @@ -40,8 +46,18 @@ def __init__(
"license": "CC-BY-4.0",
"links": [{"rel": "self", "href": "./collection.json", "type": "application/json"}],
"providers": [],
"linz:lifecycle": metadata["lifecycle"],
"linz:geospatial_category": metadata["category"],
"linz:region": metadata["region"],
"linz:security_classification": "unclassified",
}

# Optional metadata
if event_name := metadata.get("event_name"):
self.stac["linz:event_name"] = event_name
if geographic_description := metadata.get("geographic_description"):
self.stac["linz:geographic_description"] = geographic_description

# If the providers passed has already a LINZ provider: add its default roles to it
has_linz = False
if providers:
Expand Down Expand Up @@ -195,50 +211,59 @@ def write_to(self, destination: str) -> None:
def _title(self) -> str:
"""Generates the title for imagery and elevation datasets.
Satellite Imagery / Urban Aerial Photos / Rural Aerial Photos:
[Location / Region if no Location specified] [GSD] [?Event Name] [Data Sub-Type] ([Year(s)]) [?- Preview]
[geographic_description / Region if no geographic_description specified] [GSD] [?Event Name] [Data Sub-Type]
([Year(s)]) [?- Preview]
DEM / DSM:
[Location / Region if no Location specified] [?- Event Name] LiDAR [GSD] [Data Sub-Type] ([Year(s)]) [?- Preview]
[geographic_description / Region if no geographic_description specified] [?- Event Name] LiDAR [GSD]
[Data Sub-Type] ([Year(s)]) [?- Preview]
If Historic Survey Number:
[Location / Region if no Location specified] [GSD] [Survey Number] ([Year(s)]) [?- Preview]
[geographic_description / Region if no geographic_description specified] [GSD] [Survey Number] ([Year(s)])
[?- Preview]
Returns:
Dataset Title
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")
geographic_description = self.metadata.get("geographic_description")
historic_survey_number = self.metadata.get("historic_survey_number")
event = self.metadata.get("event_name")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
if self.metadata["start_datetime"].year == self.metadata["end_datetime"].year:
date = str(self.metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}"
date = f"{self.metadata['start_datetime'].year}-{self.metadata['end_datetime'].year}"

# determine dataset name
if location:
name = location
if geographic_description:
name = geographic_description
else:
name = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]
name = HUMAN_READABLE_REGIONS[self.metadata["region"]]

# determine if dataset is preview
if self.title_metadata.get("lifecycle") == "preview":
if self.metadata.get("lifecycle") == "preview":
preview = "- Preview"
else:
preview = None

if historic_survey_number:
return " ".join(f"{name} {self.title_metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split())
if self.metadata["category"] == SCANNED_AERIAL_PHOTOS:
if not historic_survey_number:
raise MissingMetadataError("historic_survey_number")
return " ".join(f"{name} {self.metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split())

if self.title_metadata["category"] in [ImageryCategories.SATELLITE, ImageryCategories.URBAN, ImageryCategories.RURAL]:
if self.metadata["category"] in [
SATELLITE_IMAGERY,
URBAN_AERIAL_PHOTOS,
RURAL_AERIAL_PHOTOS,
]:
return " ".join(
f"{name} {self.title_metadata['gsd']} {event or ''} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
f"{name} {self.metadata['gsd']} {event or ''} {DATA_CATEGORIES[self.metadata['category']]} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
if self.title_metadata["category"] in [ElevationCategories.DEM, ElevationCategories.DSM]:
if self.metadata["category"] in [DEM, DSM]:
return " ".join(
f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.title_metadata['gsd']} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.metadata['gsd']} {DATA_CATEGORIES[self.metadata['category']]} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
raise SubtypeParameterError(self.title_metadata["category"])
raise SubtypeParameterError(self.metadata["category"])

def _elevation_title_event(self, event: Optional[str]) -> Optional[str]:
if event:
Expand All @@ -250,7 +275,8 @@ def _description(self) -> str:
Urban Aerial Photos / Rural Aerial Photos:
Orthophotography within the [Region] region captured in the [Year(s)] flying season.
DEM / DSM:
[Digital Surface Model / Digital Elevation Model] within the [region] [?- location] region in [year(s)].
[Digital Surface Model / Digital Elevation Model] within the [region]
[?- geographic_description] region in [year(s)].
Satellite Imagery:
Satellite imagery within the [Region] region captured in [Year(s)].
Historical Imagery:
Expand All @@ -260,34 +286,37 @@ def _description(self) -> str:
Dataset Description
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")
geographic_description = self.metadata.get("geographic_description")
event = self.metadata.get("event_name")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
if self.metadata["start_datetime"].year == self.metadata["end_datetime"].year:
date = str(self.metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}"
date = f"{self.metadata['start_datetime'].year}-{self.metadata['end_datetime'].year}"

# format location for metadata description
if location:
location = f"- {location}"
# format geographic_description for metadata description
if geographic_description:
geographic_description = f"- {geographic_description}"

region = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]
region = HUMAN_READABLE_REGIONS[self.metadata["region"]]

if historic_survey_number:
if self.metadata["category"] == SCANNED_AERIAL_PHOTOS:
desc = f"Scanned aerial imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] == ImageryCategories.SATELLITE:
elif self.metadata["category"] == SATELLITE_IMAGERY:
desc = f"Satellite imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] in [ImageryCategories.URBAN, ImageryCategories.RURAL]:
elif self.metadata["category"] in [URBAN_AERIAL_PHOTOS, RURAL_AERIAL_PHOTOS]:
desc = f"Orthophotography within the {region} region captured in the {date} flying season"
elif self.title_metadata["category"] == ElevationCategories.DEM:
desc = " ".join(f"Digital Elevation Model within the {region} {location or ''} region in {date}".split())
elif self.title_metadata["category"] == ElevationCategories.DSM:
desc = " ".join(f"Digital Surface Model within the {region} {location or ''} region in {date}".split())
elif self.metadata["category"] == DEM:
desc = " ".join(
f"Digital Elevation Model within the {region} {geographic_description or ''} region in {date}".split()
)
elif self.metadata["category"] == DSM:
desc = " ".join(
f"Digital Surface Model within the {region} {geographic_description or ''} region in {date}".split()
)
else:
raise SubtypeParameterError(self.title_metadata["category"])
raise SubtypeParameterError(self.metadata["category"])

if event:
desc = desc + f", published as a record of the {event} event"
Expand Down
38 changes: 24 additions & 14 deletions scripts/stac/imagery/metadata_constants.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from datetime import datetime
from enum import Enum
from typing import Optional, TypedDict


class CollectionTitleMetadata(TypedDict):
class CollectionMetadata(TypedDict):
"""
region: Region of Dataset
gsd: Dataset Ground Sample Distance
start_date: Dataset capture start date
end_date: Dataset capture end date
lifecycle: Dataset status
Optional:
location: Optional location of dataset, e.g. Hutt City
geographic_description: Optional geographic_description of dataset, e.g. Hutt City
event: Optional details of capture event, e.g. Cyclone Gabrielle
historic_survey_number: Optional historic imagery survey number, e.g. SNC88445
"""
Expand All @@ -22,8 +21,8 @@ class CollectionTitleMetadata(TypedDict):
start_datetime: datetime
end_datetime: datetime
lifecycle: str
location: Optional[str]
event: Optional[str]
geographic_description: Optional[str]
event_name: Optional[str]
historic_survey_number: Optional[str]


Expand All @@ -32,17 +31,28 @@ def __init__(self, category: str) -> None:
self.message = f"Unrecognised/Unimplemented Subtype Parameter: {category}"


class ImageryCategories(str, Enum):
SATELLITE = "Satellite Imagery"
URBAN = "Urban Aerial Photos"
RURAL = "Rural Aerial Photos"
AERIAL = "Aerial Photos"
HISTORICAL = "Scanned Aerial Photos"
class MissingMetadataError(Exception):
def __init__(self, metadata: str) -> None:
self.message = f"Missing metadata: {metadata}"


class ElevationCategories(str, Enum):
DEM = "DEM"
DSM = "DSM"
AERIAL_PHOTOS = "aerial-photos"
SCANNED_AERIAL_PHOTOS = "scanned-aerial-photos"
RURAL_AERIAL_PHOTOS = "rural-aerial-photos"
SATELLITE_IMAGERY = "satellite-imagery"
URBAN_AERIAL_PHOTOS = "urban-aerial-photos"
DEM = "dem"
DSM = "dsm"

DATA_CATEGORIES = {
AERIAL_PHOTOS: "Aerial Photos",
SCANNED_AERIAL_PHOTOS: "Scanned Aerial Photos",
RURAL_AERIAL_PHOTOS: "Rural Aerial Photos",
SATELLITE_IMAGERY: "Satellite Imagery",
URBAN_AERIAL_PHOTOS: "Urban Aerial Photos",
DEM: "DEM",
DSM: "DSM",
}


HUMAN_READABLE_REGIONS = {
Expand Down
Loading

0 comments on commit 78016f6

Please sign in to comment.