Skip to content

Commit

Permalink
refactor: eodag.utils.product_types.search_product_types usage
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrunato committed Mar 7, 2024
1 parent 88e2119 commit 787bfc6
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 71 deletions.
85 changes: 14 additions & 71 deletions eodag/api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
Set,
Tuple,
Union,
cast,
)

import geojson
Expand All @@ -45,8 +44,6 @@
from whoosh import analysis, fields
from whoosh.fields import Schema
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import QueryParser
from whoosh.searching import Results

from eodag.api.product.metadata_mapping import (
NOT_MAPPED,
Expand Down Expand Up @@ -97,6 +94,7 @@
UnsupportedProductType,
UnsupportedProvider,
)
from eodag.utils.product_types import search_product_types
from eodag.utils.stac_reader import fetch_stac_items

if TYPE_CHECKING:
Expand Down Expand Up @@ -560,9 +558,16 @@ def list_product_types(
)

if filter:
product_types = self.__apply_product_type_free_text_search_filter(
product_types, filter=filter
whooshable_filter = filter.replace(",", " OR ")
product_types_ids = search_product_types(
self._product_types_index,
title=whooshable_filter,
abstract=whooshable_filter,
keywords=whooshable_filter,
)
product_types = [
p for p in product_types if p["ID"] in product_types_ids
]
return sorted(product_types, key=itemgetter("ID"))

# Only get the product types supported by the available providers
Expand All @@ -581,56 +586,6 @@ def list_product_types(
# Return the product_types sorted in lexicographic order of their ID
return sorted(product_types, key=itemgetter("ID"))

def __apply_product_type_free_text_search_filter(
self, product_types: List[Dict[str, Any]], filter: str
) -> List[Dict[str, Any]]:
"""Apply the free text search filter to the given list of product types
:param product_types: The list of product types
:type product_types: list
:param filter: Comma separated list of search terms (ex. "EO,Earth Observation")
:type filter: str
:returns: The new list of product types
:rtype: list
"""
# Apply the free text search
if not filter:
# no filter was given -> return the original list
return product_types
fts_terms = filter.split(",")
fts_supported_params = {"title", "abstract", "keywords"}
with self._product_types_index.searcher() as searcher:
results: Optional[Results] = None
# For each search key, do a guess and then upgrade the result (i.e. when
# merging results, if a hit appears in both results, its position is raised
# to the top. This way, the top most result will be the hit that best
# matches the given queries. Put another way, this best guess is the one
# that crosses the highest number of search params from the given queries
for term in fts_terms:
for search_key in fts_supported_params:
result = cast(
Results,
searcher.search( # type: ignore
QueryParser(
search_key, self._product_types_index.schema
).parse( # type: ignore
term
),
limit=None,
),
)
if not results:
results = result
else:
results.upgrade_and_extend(result) # type: ignore
if not results:
# no result found -> intersection is empty set
return []

result_ids = {r["ID"] for r in results or []}
return [p for p in product_types if p["ID"] in result_ids]

def fetch_product_types_list(self, provider: Optional[str] = None) -> None:
"""Fetch product types list and update if needed
Expand Down Expand Up @@ -1004,22 +959,10 @@ def guess_product_type(self, **kwargs: Any) -> List[str]:
}
if not self._product_types_index:
raise EodagError("Missing product types index")
with self._product_types_index.searcher() as searcher:
results = None
# For each search key, do a guess and then upgrade the result (i.e. when
# merging results, if a hit appears in both results, its position is raised
# to the top. This way, the top most result will be the hit that best
# matches the given queries. Put another way, this best guess is the one
# that crosses the highest number of search params from the given queries
for search_key in supported_params:
query = QueryParser(search_key, self._product_types_index.schema).parse(
kwargs[search_key]
)
if results is None:
results = searcher.search(query, limit=None)
else:
results.upgrade_and_extend(searcher.search(query, limit=None))
guesses: List[str] = [r["ID"] for r in results or []]

guesses = search_product_types(
self._product_types_index, **{p: kwargs[p] for p in supported_params}
)
if guesses:
return guesses
raise NoMatchingProductType()
Expand Down
19 changes: 19 additions & 0 deletions eodag/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,22 @@ class ProviderSortables(TypedDict):

sortables: List[str]
max_sort_params: Annotated[Optional[int], Gt(0)]


class ProductTypeProperties(TypedDict, total=False):
"""Product type properties"""

ID: str
alias: str
abstract: str
instrument: str
platform: str
platformSerialIdentifier: str
processingLevel: str
sensorType: str
md5: str
license: str
title: str
missionStartDate: str
missionEndDate: str
keywords: str
70 changes: 70 additions & 0 deletions eodag/utils/product_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
# Copyright 2024, CS GROUP - France, https://www.csgroup.eu/
#
# This file is part of EODAG project
# https://www.github.com/CS-SI/EODAG
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import TYPE_CHECKING, cast

from whoosh.qparser import QueryParser
from whoosh.searching import Results

from eodag.utils import Unpack

if TYPE_CHECKING:
from typing import List, Optional

from whoosh.index import Index

from eodag.types import ProductTypeProperties


def search_product_types(
index: Index, **search_kwargs: Unpack[ProductTypeProperties]
) -> List[str]:
"""Find product types using given index and search args
:param index: Input nested dictionnary
:type index: :class:`~whoosh.index.Index`
:param search_kwargs: whoosh-compatibles search arguments
:type search_kwargs: Unpack[ProductTypeProperties]
:returns: found product types ids
:rtype: Set[str]
"""

with index.searcher() as searcher:
results: Optional[Results] = None
# For each search key, do a guess and then upgrade the result (i.e. when
# merging results, if a hit appears in both results, its position is raised
# to the top. This way, the top most result will be the hit that best
# matches the given queries. Put another way, this best guess is the one
# that crosses the highest number of search params from the given queries
for search_key, search_value in search_kwargs.items():
result = cast(
Results,
searcher.search(
QueryParser(search_key, index.schema).parse(search_value),
limit=None,
),
)
if not results:
results = result
else:
results.upgrade_and_extend(result)
if not results:
return []

return [r["ID"] for r in results or []]

0 comments on commit 787bfc6

Please sign in to comment.