Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: add new fields to the Collection TDE-985 #765

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d47b71b
feat: add new fields from LINZ STAC extension to the Collection TDE-985
paulfouquet Dec 5, 2023
fe25ff4
fix: tests fails
paulfouquet Dec 5, 2023
0b103b7
refactor: merge master
paulfouquet Dec 20, 2023
1368e44
fix: remove linz STAC extension from Collection
paulfouquet Dec 20, 2023
d9abfcc
wip
paulfouquet Dec 29, 2023
2158a73
feat: add stac metadata TDE-985
paulfouquet Jan 9, 2024
778ebba
build: revert pylint change
paulfouquet Jan 9, 2024
74a76b4
Merge branch 'master' into feat/use-linz-stac-extension-for-collectio…
paulfouquet Jan 9, 2024
4621e11
tests: fix the tests
paulfouquet Jan 9, 2024
cb54fd4
fix: --geographic_desc should be --geographic_description
paulfouquet Jan 9, 2024
a451fa6
fix: argument help message should not contain unecessary -
paulfouquet Jan 9, 2024
4b06f13
tests: event_name should not be in all the tests
paulfouquet Jan 9, 2024
b31a1b1
fix: formatting
paulfouquet Jan 9, 2024
383306a
fix: should not break if --category is passed as human readable values
paulfouquet Jan 10, 2024
06b5cfb
refactor: reduce number of dict lookup
paulfouquet Jan 10, 2024
0d4f91d
fix: location should be geographic_description
paulfouquet Jan 14, 2024
e7820bd
feat: change --location to --geographic-description
paulfouquet Jan 16, 2024
490dcb9
fix: formatting
paulfouquet Jan 16, 2024
501f429
fix: unused 'type: ignore' comment
paulfouquet Jan 16, 2024
fbfdd4a
Merge branch 'master' into feat/use-linz-stac-extension-for-collectio…
paulfouquet Jan 16, 2024
8f10bf0
build: fix mypy ignore comment
paulfouquet Jan 16, 2024
795f004
refactor: check historical imagery based on its category rather than …
paulfouquet Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,7 @@
from scripts.files.fs_s3 import bucket_name_from_path, get_object_parallel_multithreading, list_json_in_uri
from scripts.logging.time_helper import time_in_ms
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.metadata_constants import (
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
)
from scripts.stac.imagery.metadata_constants import DATA_CATEGORIES, HUMAN_READABLE_REGIONS, CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole


Expand All @@ -29,7 +24,7 @@ def main() -> None:
dest="category",
help="Dataset category description",
required=True,
choices=[type.value for type in ImageryCategories] + [type.value for type in ElevationCategories],
choices=list(DATA_CATEGORIES.keys()) + list(DATA_CATEGORIES.values()),
)
parser.add_argument(
"--region",
Expand All @@ -40,7 +35,11 @@ def main() -> None:
)
parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True)
parser.add_argument(
"--location", dest="location", help="Optional Location of dataset, e.g.- Hutt City", type=str, required=False
"--geographic-description",
dest="geographic_description",
help="Optional Geographic Description of dataset, e.g. Hutt City",
type=str,
required=False,
)
parser.add_argument(
"--start-date",
Expand All @@ -56,7 +55,7 @@ def main() -> None:
parser.add_argument(
"--historic-survey-number",
dest="historic_survey_number",
help="Historic Survey Number if Applicable. E.g.- SCN8844",
help="Historic Survey Number if Applicable. E.g. SCN8844",
type=str,
required=False,
)
Expand Down Expand Up @@ -92,19 +91,32 @@ def main() -> None:
for licensor_name in coalesce_multi_single(arguments.licensor_list, arguments.licensor):
providers.append({"name": licensor_name, "roles": [ProviderRole.LICENSOR]})

title_metadata: CollectionTitleMetadata = {
"category": arguments.category,
# category can also be passed as human readable name (e.g. "Aerial Photos")
# Get the corresponding identifier to simplify the process
category = arguments.category
if not DATA_CATEGORIES.get(category):
for key, value in DATA_CATEGORIES.items():
if value == category:
category = key
break

collection_metadata: CollectionMetadata = {
"category": category,
"region": arguments.region,
"gsd": arguments.gsd,
"start_datetime": arguments.start_date,
"end_datetime": arguments.end_date,
"lifecycle": arguments.lifecycle,
"location": arguments.location,
"event": arguments.event,
"geographic_description": arguments.geographic_description,
"event_name": arguments.event,
"historic_survey_number": arguments.historic_survey_number,
}

collection = ImageryCollection(title_metadata=title_metadata, collection_id=arguments.collection_id, providers=providers)
collection = ImageryCollection(
metadata=collection_metadata,
collection_id=arguments.collection_id,
providers=providers,
)

if not uri.startswith("s3://"):
msg = f"uri is not a s3 path: {uri}"
Expand Down
117 changes: 73 additions & 44 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
from scripts.files.files_helper import ContentType
from scripts.files.fs import write
from scripts.stac.imagery.metadata_constants import (
DATA_CATEGORIES,
DEM,
DSM,
HUMAN_READABLE_REGIONS,
CollectionTitleMetadata,
ElevationCategories,
ImageryCategories,
RURAL_AERIAL_PHOTOS,
SATELLITE_IMAGERY,
SCANNED_AERIAL_PHOTOS,
URBAN_AERIAL_PHOTOS,
CollectionMetadata,
MissingMetadataError,
SubtypeParameterError,
)
from scripts.stac.imagery.provider import Provider, ProviderRole
Expand All @@ -22,14 +28,14 @@ class ImageryCollection:

def __init__(
self,
title_metadata: CollectionTitleMetadata,
metadata: CollectionMetadata,
collection_id: Optional[str] = None,
providers: Optional[List[Provider]] = None,
) -> None:
if not collection_id:
collection_id = str(ulid.ULID())

self.title_metadata = title_metadata
self.metadata = metadata

self.stac = {
"type": "Collection",
Expand All @@ -40,8 +46,18 @@ def __init__(
"license": "CC-BY-4.0",
"links": [{"rel": "self", "href": "./collection.json", "type": "application/json"}],
"providers": [],
"linz:lifecycle": metadata["lifecycle"],
"linz:geospatial_category": metadata["category"],
"linz:region": metadata["region"],
"linz:security_classification": "unclassified",
}

# Optional metadata
if event_name := metadata.get("event_name"):
self.stac["linz:event_name"] = event_name
if geographic_description := metadata.get("geographic_description"):
self.stac["linz:geographic_description"] = geographic_description

# If the providers passed has already a LINZ provider: add its default roles to it
has_linz = False
if providers:
Expand Down Expand Up @@ -195,50 +211,59 @@ def write_to(self, destination: str) -> None:
def _title(self) -> str:
"""Generates the title for imagery and elevation datasets.
Satellite Imagery / Urban Aerial Photos / Rural Aerial Photos:
[Location / Region if no Location specified] [GSD] [?Event Name] [Data Sub-Type] ([Year(s)]) [?- Preview]
[geographic_description / Region if no geographic_description specified] [GSD] [?Event Name] [Data Sub-Type]
([Year(s)]) [?- Preview]
DEM / DSM:
[Location / Region if no Location specified] [?- Event Name] LiDAR [GSD] [Data Sub-Type] ([Year(s)]) [?- Preview]
[geographic_description / Region if no geographic_description specified] [?- Event Name] LiDAR [GSD]
[Data Sub-Type] ([Year(s)]) [?- Preview]
If Historic Survey Number:
[Location / Region if no Location specified] [GSD] [Survey Number] ([Year(s)]) [?- Preview]
[geographic_description / Region if no geographic_description specified] [GSD] [Survey Number] ([Year(s)])
[?- Preview]

Returns:
Dataset Title
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")
geographic_description = self.metadata.get("geographic_description")
historic_survey_number = self.metadata.get("historic_survey_number")
event = self.metadata.get("event_name")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
if self.metadata["start_datetime"].year == self.metadata["end_datetime"].year:
date = str(self.metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}"
date = f"{self.metadata['start_datetime'].year}-{self.metadata['end_datetime'].year}"

# determine dataset name
if location:
name = location
if geographic_description:
name = geographic_description
else:
name = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]
name = HUMAN_READABLE_REGIONS[self.metadata["region"]]

# determine if dataset is preview
if self.title_metadata.get("lifecycle") == "preview":
if self.metadata.get("lifecycle") == "preview":
preview = "- Preview"
else:
preview = None

if historic_survey_number:
return " ".join(f"{name} {self.title_metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split())
if self.metadata["category"] == SCANNED_AERIAL_PHOTOS:
if not historic_survey_number:
raise MissingMetadataError("historic_survey_number")
return " ".join(f"{name} {self.metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split())

if self.title_metadata["category"] in [ImageryCategories.SATELLITE, ImageryCategories.URBAN, ImageryCategories.RURAL]:
if self.metadata["category"] in [
SATELLITE_IMAGERY,
URBAN_AERIAL_PHOTOS,
RURAL_AERIAL_PHOTOS,
]:
return " ".join(
f"{name} {self.title_metadata['gsd']} {event or ''} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
f"{name} {self.metadata['gsd']} {event or ''} {DATA_CATEGORIES[self.metadata['category']]} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
if self.title_metadata["category"] in [ElevationCategories.DEM, ElevationCategories.DSM]:
if self.metadata["category"] in [DEM, DSM]:
return " ".join(
f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.title_metadata['gsd']} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.metadata['gsd']} {DATA_CATEGORIES[self.metadata['category']]} ({date}) {preview or ''}".split() # pylint: disable=line-too-long
)
raise SubtypeParameterError(self.title_metadata["category"])
raise SubtypeParameterError(self.metadata["category"])

def _elevation_title_event(self, event: Optional[str]) -> Optional[str]:
if event:
Expand All @@ -250,7 +275,8 @@ def _description(self) -> str:
Urban Aerial Photos / Rural Aerial Photos:
Orthophotography within the [Region] region captured in the [Year(s)] flying season.
DEM / DSM:
[Digital Surface Model / Digital Elevation Model] within the [region] [?- location] region in [year(s)].
[Digital Surface Model / Digital Elevation Model] within the [region]
[?- geographic_description] region in [year(s)].
Satellite Imagery:
Satellite imagery within the [Region] region captured in [Year(s)].
Historical Imagery:
Expand All @@ -260,34 +286,37 @@ def _description(self) -> str:
Dataset Description
"""
# format optional metadata
location = self.title_metadata.get("location")
historic_survey_number = self.title_metadata.get("historic_survey_number")
event = self.title_metadata.get("event")
geographic_description = self.metadata.get("geographic_description")
event = self.metadata.get("event_name")

# format date for metadata
if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year:
date = str(self.title_metadata["start_datetime"].year)
if self.metadata["start_datetime"].year == self.metadata["end_datetime"].year:
date = str(self.metadata["start_datetime"].year)
else:
date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}"
date = f"{self.metadata['start_datetime'].year}-{self.metadata['end_datetime'].year}"

# format location for metadata description
if location:
location = f"- {location}"
# format geographic_description for metadata description
if geographic_description:
geographic_description = f"- {geographic_description}"

region = HUMAN_READABLE_REGIONS[self.title_metadata["region"]]
region = HUMAN_READABLE_REGIONS[self.metadata["region"]]

if historic_survey_number:
if self.metadata["category"] == SCANNED_AERIAL_PHOTOS:
desc = f"Scanned aerial imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] == ImageryCategories.SATELLITE:
elif self.metadata["category"] == SATELLITE_IMAGERY:
desc = f"Satellite imagery within the {region} region captured in {date}"
elif self.title_metadata["category"] in [ImageryCategories.URBAN, ImageryCategories.RURAL]:
elif self.metadata["category"] in [URBAN_AERIAL_PHOTOS, RURAL_AERIAL_PHOTOS]:
desc = f"Orthophotography within the {region} region captured in the {date} flying season"
elif self.title_metadata["category"] == ElevationCategories.DEM:
desc = " ".join(f"Digital Elevation Model within the {region} {location or ''} region in {date}".split())
elif self.title_metadata["category"] == ElevationCategories.DSM:
desc = " ".join(f"Digital Surface Model within the {region} {location or ''} region in {date}".split())
elif self.metadata["category"] == DEM:
desc = " ".join(
f"Digital Elevation Model within the {region} {geographic_description or ''} region in {date}".split()
)
elif self.metadata["category"] == DSM:
desc = " ".join(
f"Digital Surface Model within the {region} {geographic_description or ''} region in {date}".split()
)
else:
raise SubtypeParameterError(self.title_metadata["category"])
raise SubtypeParameterError(self.metadata["category"])

if event:
desc = desc + f", published as a record of the {event} event"
Expand Down
38 changes: 24 additions & 14 deletions scripts/stac/imagery/metadata_constants.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from datetime import datetime
from enum import Enum
from typing import Optional, TypedDict


class CollectionTitleMetadata(TypedDict):
class CollectionMetadata(TypedDict):
"""
region: Region of Dataset
gsd: Dataset Ground Sample Distance
start_date: Dataset capture start date
end_date: Dataset capture end date
lifecycle: Dataset status
Optional:
location: Optional location of dataset, e.g. Hutt City
geographic_description: Optional geographic_description of dataset, e.g. Hutt City
event: Optional details of capture event, e.g. Cyclone Gabrielle
historic_survey_number: Optional historic imagery survey number, e.g. SNC88445
"""
Expand All @@ -22,8 +21,8 @@ class CollectionTitleMetadata(TypedDict):
start_datetime: datetime
end_datetime: datetime
lifecycle: str
location: Optional[str]
event: Optional[str]
geographic_description: Optional[str]
event_name: Optional[str]
historic_survey_number: Optional[str]


Expand All @@ -32,17 +31,28 @@ def __init__(self, category: str) -> None:
self.message = f"Unrecognised/Unimplemented Subtype Parameter: {category}"


class ImageryCategories(str, Enum):
SATELLITE = "Satellite Imagery"
URBAN = "Urban Aerial Photos"
RURAL = "Rural Aerial Photos"
AERIAL = "Aerial Photos"
HISTORICAL = "Scanned Aerial Photos"
class MissingMetadataError(Exception):
def __init__(self, metadata: str) -> None:
self.message = f"Missing metadata: {metadata}"


class ElevationCategories(str, Enum):
DEM = "DEM"
DSM = "DSM"
AERIAL_PHOTOS = "aerial-photos"
SCANNED_AERIAL_PHOTOS = "scanned-aerial-photos"
RURAL_AERIAL_PHOTOS = "rural-aerial-photos"
SATELLITE_IMAGERY = "satellite-imagery"
URBAN_AERIAL_PHOTOS = "urban-aerial-photos"
DEM = "dem"
DSM = "dsm"

DATA_CATEGORIES = {
AERIAL_PHOTOS: "Aerial Photos",
SCANNED_AERIAL_PHOTOS: "Scanned Aerial Photos",
RURAL_AERIAL_PHOTOS: "Rural Aerial Photos",
SATELLITE_IMAGERY: "Satellite Imagery",
URBAN_AERIAL_PHOTOS: "Urban Aerial Photos",
DEM: "DEM",
DSM: "DSM",
}


HUMAN_READABLE_REGIONS = {
Expand Down
Loading