Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: add new fields to the Collection TDE-985 #765

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d47b71b
feat: add new fields from LINZ STAC extension to the Collection TDE-985
paulfouquet Dec 5, 2023
fe25ff4
fix: tests fails
paulfouquet Dec 5, 2023
0b103b7
refactor: merge master
paulfouquet Dec 20, 2023
1368e44
fix: remove linz STAC extension from Collection
paulfouquet Dec 20, 2023
d9abfcc
wip
paulfouquet Dec 29, 2023
2158a73
feat: add stac metadata TDE-985
paulfouquet Jan 9, 2024
778ebba
build: revert pylint change
paulfouquet Jan 9, 2024
74a76b4
Merge branch 'master' into feat/use-linz-stac-extension-for-collectio…
paulfouquet Jan 9, 2024
4621e11
tests: fix the tests
paulfouquet Jan 9, 2024
cb54fd4
fix: --geographic_desc should be --geographic_description
paulfouquet Jan 9, 2024
a451fa6
fix: argument help message should not contain unecessary -
paulfouquet Jan 9, 2024
4b06f13
tests: event_name should not be in all the tests
paulfouquet Jan 9, 2024
b31a1b1
fix: formatting
paulfouquet Jan 9, 2024
383306a
fix: should not break if --category is passed as human readable values
paulfouquet Jan 10, 2024
06b5cfb
refactor: reduce number of dict lookup
paulfouquet Jan 10, 2024
0d4f91d
fix: location should be geographic_description
paulfouquet Jan 14, 2024
e7820bd
feat: change --location to --geographic-description
paulfouquet Jan 16, 2024
490dcb9
fix: formatting
paulfouquet Jan 16, 2024
501f429
fix: unused 'type: ignore' comment
paulfouquet Jan 16, 2024
fbfdd4a
Merge branch 'master' into feat/use-linz-stac-extension-for-collectio…
paulfouquet Jan 16, 2024
8f10bf0
build: fix mypy ignore comment
paulfouquet Jan 16, 2024
795f004
refactor: check historical imagery based on its category rather than …
paulfouquet Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,7 @@
from scripts.files.fs_s3 import bucket_name_from_path, get_object_parallel_multithreading, list_json_in_uri
from scripts.logging.time_helper import time_in_ms
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.metadata_constants import (
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
)
from scripts.stac.imagery.metadata_constants import DATA_CATEGORIES, HUMAN_READABLE_REGIONS, CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole


Expand All @@ -29,7 +24,7 @@ def main() -> None:
dest="category",
help="Dataset category description",
required=True,
choices=[type.value for type in ImageryCategories] + [type.value for type in ElevationCategories],
choices=DATA_CATEGORIES.keys(),
)
parser.add_argument(
"--region",
Expand All @@ -42,6 +37,13 @@ def main() -> None:
parser.add_argument(
"--location", dest="location", help="Optional Location of dataset, e.g.- Hutt City", type=str, required=False
)
parser.add_argument(
"--geographic-desc",
paulfouquet marked this conversation as resolved.
Show resolved Hide resolved
dest="geographic_description",
help="Optional Location Name, e.g.- South Island",
paulfouquet marked this conversation as resolved.
Show resolved Hide resolved
type=str,
required=False,
)
parser.add_argument(
"--start-date",
dest="start_date",
Expand Down Expand Up @@ -92,19 +94,24 @@ def main() -> None:
for licensor_name in coalesce_multi_single(arguments.licensor_list, arguments.licensor):
providers.append({"name": licensor_name, "roles": [ProviderRole.LICENSOR]})

title_metadata: CollectionTitleMetadata = {
collection_metadata: CollectionMetadata = {
"category": arguments.category,
"region": arguments.region,
"gsd": arguments.gsd,
"start_datetime": arguments.start_date,
"end_datetime": arguments.end_date,
"lifecycle": arguments.lifecycle,
"location": arguments.location,
"event": arguments.event,
"event_name": arguments.event,
"historic_survey_number": arguments.historic_survey_number,
"geographic_description": arguments.geographic_description,
}

collection = ImageryCollection(title_metadata=title_metadata, collection_id=arguments.collection_id, providers=providers)
collection = ImageryCollection(
metadata=collection_metadata,
collection_id=arguments.collection_id,
providers=providers,
)

if not uri.startswith("s3://"):
msg = f"uri is not a s3 path: {uri}"
Expand Down
80 changes: 49 additions & 31 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@
from scripts.files.files_helper import ContentType
from scripts.files.fs import write
from scripts.stac.imagery.metadata_constants import (
DATA_CATEGORIES,
DEM,
DSM,
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
RURAL_AERIAL_PHOTOS,
SATELLITE_IMAGERY,
URBAN_AERIAL_PHOTOS,
CollectionMetadata,
SubtypeParameterError,
)
from scripts.stac.imagery.provider import Provider, ProviderRole
Expand All @@ -22,14 +26,14 @@ class ImageryCollection:

def __init__(
self,
title_metadata: CollectionTitleMetadata,
metadata: CollectionMetadata,
collection_id: Optional[str] = None,
providers: Optional[List[Provider]] = None,
) -> None:
if not collection_id:
collection_id = str(ulid.ULID())

self.title_metadata = title_metadata
self.metadata = metadata

self.stac = {
"type": "Collection",
Expand All @@ -40,8 +44,18 @@ def __init__(
"license": "CC-BY-4.0",
"links": [{"rel": "self", "href": "./collection.json", "type": "application/json"}],
"providers": [],
"linz:lifecycle": metadata["lifecycle"],
"linz:geospatial_category": metadata["category"],
"linz:region": metadata["region"],
"linz:security_classification": "unclassified",
}

# Optional metadata
if metadata.get("event_name"):
l0b0 marked this conversation as resolved.
Show resolved Hide resolved
self.stac["linz:event_name"] = metadata["event_name"]
if metadata.get("geographic_description"):
self.stac["linz:geographic_description"] = metadata["geographic_description"]

# If the providers passed has already a LINZ provider: add its default roles to it
has_linz = False
if providers:
Expand Down Expand Up @@ -205,40 +219,44 @@ def _title(self) -> str:
Dataset Title
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")
location = self.metadata.get("location")
historic_survey_number = self.metadata.get("historic_survey_number")
event = self.metadata.get("event_name")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
if self.metadata["start_datetime"].year == self.metadata["end_datetime"].year:
date = str(self.metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}"
date = f"{self.metadata['start_datetime'].year}-{self.metadata['end_datetime'].year}"

# determine dataset name
if location:
name = location
else:
name = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]
name = HUMAN_READABLE_REGIONS[self.metadata["region"]]

# determine if dataset is preview
if self.title_metadata.get("lifecycle") == "preview":
if self.metadata.get("lifecycle") == "preview":
preview = "- Preview"
else:
preview = None

if historic_survey_number:
return " ".join(f"{name} {self.title_metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split())
return " ".join(f"{name} {self.metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split())

if self.title_metadata["category"] in [ImageryCategories.SATELLITE, ImageryCategories.URBAN, ImageryCategories.RURAL]:
if self.metadata["category"] in [
SATELLITE_IMAGERY,
URBAN_AERIAL_PHOTOS,
RURAL_AERIAL_PHOTOS,
]:
return " ".join(
f"{name} {self.title_metadata['gsd']} {event or ''} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
f"{name} {self.metadata['gsd']} {event or ''} {DATA_CATEGORIES[self.metadata['category']]} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
if self.title_metadata["category"] in [ElevationCategories.DEM, ElevationCategories.DSM]:
if self.metadata["category"] in [DEM, DSM]:
return " ".join(
f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.title_metadata['gsd']} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.metadata['gsd']} {DATA_CATEGORIES[self.metadata['category']]} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
raise SubtypeParameterError(self.title_metadata["category"])
raise SubtypeParameterError(self.metadata["category"])

def _elevation_title_event(self, event: Optional[str]) -> Optional[str]:
if event:
Expand All @@ -260,34 +278,34 @@ def _description(self) -> str:
Dataset Description
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")
location = self.metadata.get("location")
historic_survey_number = self.metadata.get("historic_survey_number")
event = self.metadata.get("event_name")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
if self.metadata["start_datetime"].year == self.metadata["end_datetime"].year:
date = str(self.metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}"
date = f"{self.metadata['start_datetime'].year}-{self.metadata['end_datetime'].year}"

# format location for metadata description
if location:
location = f"- {location}"

region = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]
region = HUMAN_READABLE_REGIONS[self.metadata["region"]]

if historic_survey_number:
desc = f"Scanned aerial imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] == ImageryCategories.SATELLITE:
elif self.metadata["category"] == SATELLITE_IMAGERY:
desc = f"Satellite imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] in [ImageryCategories.URBAN, ImageryCategories.RURAL]:
elif self.metadata["category"] in [URBAN_AERIAL_PHOTOS, RURAL_AERIAL_PHOTOS]:
desc = f"Orthophotography within the {region} region captured in the {date} flying season"
elif self.title_metadata["category"] == ElevationCategories.DEM:
elif self.metadata["category"] == DEM:
desc = " ".join(f"Digital Elevation Model within the {region} {location or ''} region in {date}".split())
elif self.title_metadata["category"] == ElevationCategories.DSM:
elif self.metadata["category"] == DSM:
desc = " ".join(f"Digital Surface Model within the {region} {location or ''} region in {date}".split())
else:
raise SubtypeParameterError(self.title_metadata["category"])
raise SubtypeParameterError(self.metadata["category"])

if event:
desc = desc + f", published as a record of the {event} event"
Expand Down
33 changes: 20 additions & 13 deletions scripts/stac/imagery/metadata_constants.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from datetime import datetime
from enum import Enum
from typing import Optional, TypedDict


class CollectionTitleMetadata(TypedDict):
class CollectionMetadata(TypedDict):
"""
region: Region of Dataset
gsd: Dataset Ground Sample Distance
Expand All @@ -14,6 +13,7 @@ class CollectionTitleMetadata(TypedDict):
location: Optional location of dataset, e.g. Hutt City
event: Optional details of capture event, e.g. Cyclone Gabrielle
historic_survey_number: Optional historic imagery survey number, e.g. SNC88445
geographic_description: Optional location name, e.g. South Island
"""

category: str
Expand All @@ -23,26 +23,33 @@ class CollectionTitleMetadata(TypedDict):
end_datetime: datetime
lifecycle: str
location: Optional[str]
event: Optional[str]
event_name: Optional[str]
historic_survey_number: Optional[str]
geographic_description: Optional[str]


class SubtypeParameterError(Exception):
def __init__(self, category: str) -> None:
self.message = f"Unrecognised/Unimplemented Subtype Parameter: {category}"


class ImageryCategories(str, Enum):
SATELLITE = "Satellite Imagery"
URBAN = "Urban Aerial Photos"
RURAL = "Rural Aerial Photos"
AERIAL = "Aerial Photos"
HISTORICAL = "Scanned Aerial Photos"
AERIAL_PHOTOS = "aerial-photos"
SCANNED_AERIAL_PHOTOS = "scanned-aerial-photos"
RURAL_AERIAL_PHOTOS = "rural-aerial-photos"
SATELLITE_IMAGERY = "satellite-imagery"
URBAN_AERIAL_PHOTOS = "urban-aerial-photos"
DEM = "dem"
DSM = "dsm"


class ElevationCategories(str, Enum):
DEM = "DEM"
DSM = "DSM"
DATA_CATEGORIES = {
AERIAL_PHOTOS: "Aerial Photos",
SCANNED_AERIAL_PHOTOS: "Scanned Aerial Photos",
RURAL_AERIAL_PHOTOS: "Rural Aerial Photos",
SATELLITE_IMAGERY: "Satellite Imagery",
URBAN_AERIAL_PHOTOS: "Urban Aerial Photos",
DEM: "DEM",
DSM: "DSM",
}


HUMAN_READABLE_REGIONS = {
Expand Down
Loading