Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(core): datetime filters in guess_product_types #1222

Merged
merged 2 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 36 additions & 7 deletions eodag/api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
UnsupportedProductType,
UnsupportedProvider,
)
from eodag.utils.rest import rfc3339_str_to_datetime
from eodag.utils.stac_reader import fetch_stac_items

if TYPE_CHECKING:
Expand Down Expand Up @@ -275,8 +276,7 @@ def build_index(self) -> None:
)

product_types_schema = Schema(
ID=fields.STORED,
alias=fields.ID,
ID=fields.ID(stored=True),
abstract=fields.TEXT,
instrument=fields.IDLIST,
platform=fields.ID,
Expand All @@ -286,8 +286,8 @@ def build_index(self) -> None:
md5=fields.ID,
license=fields.ID,
title=fields.TEXT,
missionStartDate=fields.ID,
missionEndDate=fields.ID,
missionStartDate=fields.STORED,
missionEndDate=fields.STORED,
keywords=fields.KEYWORD(analyzer=kw_analyzer),
stacCollection=fields.STORED,
)
Expand Down Expand Up @@ -960,6 +960,8 @@ def guess_product_type(
keywords: Optional[str] = None,
abstract: Optional[str] = None,
title: Optional[str] = None,
missionStartDate: Optional[str] = None,
missionEndDate: Optional[str] = None,
**kwargs: Any,
) -> List[str]:
"""
Expand Down Expand Up @@ -989,6 +991,10 @@ def guess_product_type(
:type abstract: Optional[str]
:param title: Title parameter.
:type title: Optional[str]
:param missionStartDate: start date for datetime filtering. Not used by free_text
:type missionStartDate: Optional[str]
:param missionEndDate: end date for datetime filtering. Not used by free_text
:type missionEndDate: Optional[str]
:returns: The best match for the given parameters.
:rtype: List[str]
:raises: :class:`~eodag.utils.exceptions.NoMatchingProductType`
Expand Down Expand Up @@ -1020,15 +1026,38 @@ def guess_product_type(
if filters_text:
text += f"({filters_text})"

if not text and (missionStartDate or missionEndDate):
text = "*"

with self._product_types_index.searcher() as searcher:
p = EODAGQueryParser(list(filters.keys()), self._product_types_index.schema)
query = p.parse(text)
results = searcher.search(query, limit=None)

guesses: List[str] = [r["ID"] for r in results or []]
guesses: List[Dict[str, str]] = [dict(r) for r in results or []]

# datetime filtering
if missionStartDate or missionEndDate:
guesses = [
g
for g in guesses
if (
not missionEndDate
or g.get("missionStartDate")
and rfc3339_str_to_datetime(g["missionStartDate"])
<= rfc3339_str_to_datetime(missionEndDate)
)
and (
not missionStartDate
or g.get("missionEndDate")
and rfc3339_str_to_datetime(g["missionEndDate"])
>= rfc3339_str_to_datetime(missionStartDate)
)
]

if guesses:
return guesses
return [g["ID"] for g in guesses or []]

raise NoMatchingProductType()

def search(
Expand Down Expand Up @@ -1395,7 +1424,7 @@ def search_all(
# of items_per_page if defined for the provider used.
try:
product_type = self.get_product_type_from_alias(
kwargs.get("productType", None) or self.guess_product_type(**kwargs)[0]
self.guess_product_type(**kwargs)[0]
)
except NoMatchingProductType:
product_type = GENERIC_PRODUCT_TYPE
Expand Down
4 changes: 2 additions & 2 deletions eodag/resources/product_types.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3110,7 +3110,7 @@ SATELLITE_SEA_ICE_EDGE_TYPE:
license: proprietary
title: Sea ice edge and type daily gridded data from 1978 to present derived from satellite observations
missionStartDate: "1978-10-25T00:00:00Z"
missionEndDate: "2023-05-02-02T23:59:59"
missionEndDate: "2023-05-02T23:59:59"

SATELLITE_SEA_ICE_THICKNESS:
abstract: |
Expand Down Expand Up @@ -3322,7 +3322,7 @@ SATELLITE_SEA_LEVEL_MEDITERRANEAN:
license: proprietary
title: Sea level daily gridded data from satellite observations for the Mediterranean Sea
missionStartDate: "1993-01-01T00:00:00Z"
missionEndDate: "2018-11-01T:23:59:59Z"
missionEndDate: "2018-11-01T23:59:59Z"

SEASONAL_POSTPROCESSED_SL:
abstract: |
Expand Down
2 changes: 2 additions & 0 deletions eodag/resources/stac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ conformance:
- https://api.stacspec.org/v1.0.0/ogcapi-features#query
- https://api.stacspec.org/v1.0.0/ogcapi-features#sort
- https://api.stacspec.org/v1.0.0/collections
- https://api.stacspec.org/v1.0.0/collection-search
- https://api.stacspec.org/v1.0.0/collection-search#free-text
- https://api.stacspec.org/v1.0.0/collection-search#advanced-free-text
- http://www.opengis.net/spec/ogcapi-common-2/1.0/conf/simple-query
- http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/core
- http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/oas30
- http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/geojson
Expand Down
7 changes: 6 additions & 1 deletion eodag/rest/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ async def all_collections(
platform: Optional[str] = None,
instrument: Optional[str] = None,
constellation: Optional[str] = None,
datetime: Optional[str] = None,
) -> Dict[str, Any]:
"""Build STAC collections

Expand All @@ -416,7 +417,11 @@ async def _fetch() -> Dict[str, Any]:
)
collections = deepcopy(stac_config["collections"])
collections["collections"] = stac_collection.get_collection_list(
q=q, platform=platform, instrument=instrument, constellation=constellation
q=q,
platform=platform,
instrument=instrument,
constellation=constellation,
datetime=datetime,
)

# # parse f-strings
Expand Down
3 changes: 2 additions & 1 deletion eodag/rest/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ async def collections(
platform: Optional[str] = None,
instrument: Optional[str] = None,
constellation: Optional[str] = None,
datetime: Optional[str] = None,
) -> ORJSONResponse:
"""STAC collections

Expand All @@ -581,7 +582,7 @@ async def collections(
logger.debug("URL: %s", request.url)

collections = await all_collections(
request, provider, q, platform, instrument, constellation
request, provider, q, platform, instrument, constellation, datetime
)
return ORJSONResponse(collections)

Expand Down
19 changes: 13 additions & 6 deletions eodag/rest/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
get_metadata_path,
)
from eodag.rest.config import Settings
from eodag.rest.utils.rfc3339 import str_to_interval
from eodag.utils import (
DEFAULT_MISSION_START_DATE,
deepcopy,
Expand Down Expand Up @@ -831,6 +832,7 @@ def get_collection_list(
platform: Optional[str] = None,
instrument: Optional[str] = None,
constellation: Optional[str] = None,
datetime: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""Build STAC collections list

Expand All @@ -841,24 +843,29 @@ def get_collection_list(
"""
collection_model = deepcopy(self.stac_config["collection"])

start, end = str_to_interval(datetime)

all_pt = self.eodag_api.list_product_types(
provider=self.provider, fetch_providers=False
)

if collection:
product_types = [pt for pt in all_pt if collection == pt["ID"]]
elif any((q, platform, instrument, constellation)):
# product types matching filters
if any((collection, q, platform, instrument, constellation, datetime)):
try:
guessed_product_types = self.eodag_api.guess_product_type(
free_text=q,
platformSerialIdentifier=platform,
instrument=instrument,
platform=constellation,
productType=collection,
missionStartDate=start.isoformat() if start else None,
missionEndDate=end.isoformat() if end else None,
)
except NoMatchingProductType:
guessed_product_types = []
product_types = [pt for pt in all_pt if pt["ID"] in guessed_product_types]
product_types = []
else:
product_types = [
pt for pt in all_pt if pt["ID"] in guessed_product_types
]
else:
product_types = all_pt

Expand Down
9 changes: 7 additions & 2 deletions eodag/rest/types/stac_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from typing_extensions import Annotated

from eodag.rest.utils.rfc3339 import rfc3339_str_to_datetime, str_to_interval
from eodag.utils.exceptions import ValidationError

if TYPE_CHECKING:
try:
Expand Down Expand Up @@ -224,8 +225,12 @@ def validate_datetime(cls, v: str) -> str:
dates.append("..")
continue

# throws ValueError if invalid RFC 3339 string
dates.append(rfc3339_str_to_datetime(value).strftime("%Y-%m-%dT%H:%M:%SZ"))
try:
dates.append(
rfc3339_str_to_datetime(value).strftime("%Y-%m-%dT%H:%M:%SZ")
)
except ValidationError as e:
raise ValueError(e)

if dates[0] == ".." and dates[1] == "..":
raise ValueError(
Expand Down
31 changes: 1 addition & 30 deletions eodag/rest/utils/rfc3339.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,38 +16,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import re
from typing import Optional, Tuple

import dateutil.parser

RFC3339_PATTERN = (
r"^(\d{4})-(\d{2})-(\d{2})"
r"(?:T(\d{2}):(\d{2}):(\d{2})(\.\d+)?"
r"(Z|([+-])(\d{2}):(\d{2}))?)?$"
)


def rfc3339_str_to_datetime(s: str) -> datetime.datetime:
"""Convert a string conforming to RFC 3339 to a :class:`datetime.datetime`.

:param s: The string to convert to :class:`datetime.datetime`
:type s: str

:returns: The datetime represented by the ISO8601 (RFC 3339) formatted string
:rtype: :class:`datetime.datetime`

raises: :class:`ValueError`
"""
# Uppercase the string
s = s.upper()

# Match against RFC3339 regex.
result = re.match(RFC3339_PATTERN, s)
if not result:
raise ValueError("Invalid RFC3339 datetime.")

return dateutil.parser.isoparse(s).replace(tzinfo=datetime.timezone.utc)
from eodag.utils.rest import rfc3339_str_to_datetime


def str_to_interval(
Expand Down
30 changes: 30 additions & 0 deletions eodag/utils/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,21 @@

from __future__ import annotations

import datetime
import re
from typing import Any, Dict, Optional, Tuple

import dateutil.parser
from dateutil import tz

from eodag.utils.exceptions import ValidationError

RFC3339_PATTERN = (
r"^(\d{4})-(\d{2})-(\d{2})"
r"(?:T(\d{2}):(\d{2}):(\d{2})(\.\d+)?"
r"(Z|([+-])(\d{2}):(\d{2}))?)?$"
)


def get_datetime(arguments: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
"""Get start and end dates from a dict containing `/` separated dates in `datetime` item
Expand Down Expand Up @@ -72,3 +80,25 @@ def get_date(date: Optional[str]) -> Optional[str]:
except ValueError as e:
exc = ValidationError("invalid input date: %s" % e)
raise exc


def rfc3339_str_to_datetime(s: str) -> datetime.datetime:
"""Convert a string conforming to RFC 3339 to a :class:`datetime.datetime`.

:param s: The string to convert to :class:`datetime.datetime`
:type s: str

:returns: The datetime represented by the ISO8601 (RFC 3339) formatted string
:rtype: :class:`datetime.datetime`

raises: :class:`ValidationError`
"""
# Uppercase the string
s = s.upper()

# Match against RFC3339 regex.
result = re.match(RFC3339_PATTERN, s)
if not result:
raise ValidationError("Invalid RFC3339 datetime.")

return dateutil.parser.isoparse(s).replace(tzinfo=datetime.timezone.utc)
26 changes: 26 additions & 0 deletions tests/units/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1985,6 +1985,32 @@ def test_guess_product_type_with_kwargs(self):
actual = self.dag.guess_product_type(productType="foo")
self.assertEqual(actual, ["foo"])

# with dates
self.assertEqual(
self.dag.product_types_config.source["S2_MSI_L1C"]["missionStartDate"],
"2015-06-23T00:00:00Z",
)
self.assertNotIn(
"S2_MSI_L1C", self.dag.guess_product_type(missionEndDate="2015-06-01")
)
self.assertIn(
"S2_MSI_L1C", self.dag.guess_product_type(missionEndDate="2015-07-01")
)
self.assertEqual(
self.dag.product_types_config.source["S2_MSI_L2AP"]["missionStartDate"],
"2017-05-23T00:00:00Z",
)
self.assertEqual(
self.dag.product_types_config.source["S2_MSI_L2AP"]["missionEndDate"],
"2018-03-25T00:00:00Z",
)
self.assertNotIn(
"S2_MSI_L2AP", self.dag.guess_product_type(missionStartDate="2018-04-01")
)
self.assertIn(
"S2_MSI_L2AP", self.dag.guess_product_type(missionStartDate="2018-03-01")
)

def test_guess_product_type_without_kwargs(self):
"""guess_product_type must raise an exception when no kwargs are provided"""
with self.assertRaises(NoMatchingProductType):
Expand Down
3 changes: 3 additions & 0 deletions tests/units/test_http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1865,5 +1865,8 @@ def test_collection_free_text_search(self, guess_pt: Mock, list_pt: Mock):
platformSerialIdentifier=None,
instrument=None,
platform=None,
missionStartDate=None,
missionEndDate=None,
productType=None,
)
self.assertEqual(200, r.status_code)
Loading
Loading