diff --git a/eodag/api/core.py b/eodag/api/core.py index 796f97d37..92716831e 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -34,7 +34,6 @@ Set, Tuple, Union, - cast, ) import geojson @@ -46,7 +45,6 @@ from whoosh.fields import Schema from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import QueryParser -from whoosh.searching import Results from eodag.api.product.metadata_mapping import ( NOT_MAPPED, @@ -517,10 +515,7 @@ def set_locations_conf(self, locations_conf_path: str) -> None: self.locations_config = [] def list_product_types( - self, - provider: Optional[str] = None, - fetch_providers: bool = True, - filter: Optional[str] = None, + self, provider: Optional[str] = None, fetch_providers: bool = True ) -> List[Dict[str, Any]]: """Lists supported product types. @@ -530,8 +525,6 @@ def list_product_types( :param fetch_providers: (optional) Whether to fetch providers for new product types or not :type fetch_providers: bool - :param filter: (optional) Comma separated list of free text search terms. - :type filter: str :returns: The list of the product types that can be accessed using eodag. :rtype: list(dict) :raises: :class:`~eodag.utils.exceptions.UnsupportedProvider` @@ -554,17 +547,10 @@ def list_product_types( product_type = dict(ID=product_type_id, **config) if product_type_id not in product_types: product_types.append(product_type) - else: - raise UnsupportedProvider( - f"invalid requested provider: {provider} is not (yet) supported" - ) - - if filter: - product_types = self.__apply_product_type_free_text_search_filter( - product_types, filter=filter - ) - return sorted(product_types, key=itemgetter("ID")) - + return sorted(product_types, key=itemgetter("ID")) + raise UnsupportedProvider( + f"invalid requested provider: {provider} is not (yet) supported" + ) # Only get the product types supported by the available providers for provider in self.available_providers(): current_product_type_ids = [pt["ID"] for pt in product_types] @@ -572,65 +558,14 @@ def list_product_types( [ pt for pt in self.list_product_types( - provider=provider, fetch_providers=False, filter=filter + provider=provider, fetch_providers=False ) if pt["ID"] not in current_product_type_ids ] ) - # Return the product_types sorted in lexicographic order of their ID return sorted(product_types, key=itemgetter("ID")) - def __apply_product_type_free_text_search_filter( - self, product_types: List[Dict[str, Any]], filter: str - ) -> List[Dict[str, Any]]: - """Apply the free text search filter to the given list of product types - - :param product_types: The list of product types - :type product_types: list - :param filter: Comma separated list of search terms (ex. "EO,Earth Observation") - :type filter: str - :returns: The new list of product types - :rtype: list - - """ - # Apply the free text search - if not filter: - # no filter was given -> return the original list - return product_types - fts_terms = filter.split(",") - fts_supported_params = {"title", "abstract", "keywords"} - with self._product_types_index.searcher() as searcher: - results: Optional[Results] = None - # For each search key, do a guess and then upgrade the result (i.e. when - # merging results, if a hit appears in both results, its position is raised - # to the top. This way, the top most result will be the hit that best - # matches the given queries. Put another way, this best guess is the one - # that crosses the highest number of search params from the given queries - for term in fts_terms: - for search_key in fts_supported_params: - result = cast( - Results, - searcher.search( # type: ignore - QueryParser( - search_key, self._product_types_index.schema - ).parse( # type: ignore - term - ), - limit=None, - ), - ) - if not results: - results = result - else: - results.upgrade_and_extend(result) # type: ignore - if not results: - # no result found -> intersection is empty set - return [] - - result_ids = {r["ID"] for r in results or []} - return [p for p in product_types if p["ID"] in result_ids] - def fetch_product_types_list(self, provider: Optional[str] = None) -> None: """Fetch product types list and update if needed @@ -979,9 +914,22 @@ def get_alias_from_product_type(self, product_type: str) -> str: return self.product_types_config[product_type].get("alias", product_type) - def guess_product_type(self, **kwargs: Any) -> List[str]: - """Find eodag product types codes that best match a set of search params + def guess_product_type( + self, + free_text_filter: Optional[str] = None, + intersect: bool = False, + **kwargs: Any, + ) -> List[str]: + """Find eodag product types ids that best match a set of search params + See https://whoosh.readthedocs.io/en/latest/querylang.html#the-default-query-language + for syntax. + + :param free_text_filter: whoosh compatible free text search filter used to search + `title`, `abstract` and `keywords` + :type free_text_filter: Optional[str] + :param intersect: join results for each parameter using INTERSECT instead of UNION + :type intersect: bool :param kwargs: A set of search parameters as keywords arguments :returns: The best match for the given parameters :rtype: list[str] @@ -989,6 +937,9 @@ def guess_product_type(self, **kwargs: Any) -> List[str]: """ if kwargs.get("productType", None): return [kwargs["productType"]] + free_text_search_params = ( + ["title", "abstract", "keywords"] if free_text_filter else [] + ) supported_params = { param for param in ( @@ -999,6 +950,8 @@ def guess_product_type(self, **kwargs: Any) -> List[str]: "sensorType", "keywords", "md5", + "abstract", + "title", ) if kwargs.get(param, None) is not None } @@ -1006,19 +959,35 @@ def guess_product_type(self, **kwargs: Any) -> List[str]: raise EodagError("Missing product types index") with self._product_types_index.searcher() as searcher: results = None - # For each search key, do a guess and then upgrade the result (i.e. when - # merging results, if a hit appears in both results, its position is raised - # to the top. This way, the top most result will be the hit that best + # Using `upgrade_and_extend`, for each search key, do a guess and + # then upgrade the result (i.e. when merging results, + # if a hit appears in both results, its position is raised + # to the top). This way, the top most result will be the hit that best # matches the given queries. Put another way, this best guess is the one # that crosses the highest number of search params from the given queries + + # Always use UNION to join free_text_search results + for search_key in free_text_search_params: + query = QueryParser(search_key, self._product_types_index.schema).parse( + free_text_filter + ) + if results is None: + results = searcher.search(query, limit=None) + else: + results.upgrade_and_extend(searcher.search(query, limit=None)) + + # join results from kwargs using UNION or INTERSECT for search_key in supported_params: query = QueryParser(search_key, self._product_types_index.schema).parse( kwargs[search_key] ) if results is None: results = searcher.search(query, limit=None) + elif intersect: + results.filter(searcher.search(query, limit=None)) else: results.upgrade_and_extend(searcher.search(query, limit=None)) + guesses: List[str] = [r["ID"] for r in results or []] if guesses: return guesses diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index 2dcff2058..fef6fb098 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -637,22 +637,37 @@ def __get_product_types( """ if filters is None: filters = {} + free_text_filter = filters.pop("q", None) + + # product types matching filters try: - guessed_product_types = self.eodag_api.guess_product_type(**filters) + guessed_product_types = ( + self.eodag_api.guess_product_type(**filters) if filters else [] + ) except NoMatchingProductType: guessed_product_types = [] + + # product types matching free text filter + if free_text_filter and not guessed_product_types: + whooshable_filter = " OR ".join( + [f"({x})" for x in free_text_filter.split(",")] + ) + try: + guessed_product_types = self.eodag_api.guess_product_type( + whooshable_filter + ) + except NoMatchingProductType: + guessed_product_types = [] + + # list product types with all metadata using guessed ids if guessed_product_types: product_types = [ pt - for pt in self.eodag_api.list_product_types( - provider=self.provider, filter=filters.get("q", None) - ) + for pt in self.eodag_api.list_product_types(provider=self.provider) if pt["ID"] in guessed_product_types ] else: - product_types = self.eodag_api.list_product_types( - provider=self.provider, filter=filters.get("q", None) - ) + product_types = self.eodag_api.list_product_types(provider=self.provider) return product_types def __get_collection_list( diff --git a/tests/units/test_core.py b/tests/units/test_core.py index d865ef770..b6eaa26ff 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -517,19 +517,7 @@ def test_list_product_types_fetch_providers(self, mock_fetch_product_types_list) self.dag.list_product_types(provider="peps", fetch_providers=True) mock_fetch_product_types_list.assert_called_once_with(self.dag, provider="peps") - def test_list_product_types_with_free_text_filter_ok(self): - """Core api must correctly return the list of supported product types""" - - product_types = self.dag.list_product_types( - fetch_providers=False, filter="ABSTRACTFOO" - ) - self.assertIsInstance(product_types, list) - for product_type in product_types: - self.assertListProductTypesRightStructure(product_type) - # There should be no repeated product type in the output - self.assertEqual(len(product_types), len(set(pt["ID"] for pt in product_types))) - - def test_list_product_types_with_free_text_filter(self): + def test_guess_product_type_with_filter(self): """Testing the search terms""" with open( @@ -538,47 +526,43 @@ def test_list_product_types_with_free_text_filter(self): ext_product_types_conf = json.load(f) self.dag.update_product_types_list(ext_product_types_conf) - # match in the abstract - product_types = self.dag.list_product_types( - fetch_providers=False, filter="ABSTRACTFOO" - ) - product_types_ids = [r["ID"] for r in product_types or []] + # Free text search: match in the abstract + filter = "ABSTRACTFOO" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foo"]) - - # passing the provider - product_types = self.dag.list_product_types( - provider="astraea_eod", fetch_providers=False, filter="ABSTRACTFOO" - ) - product_types_ids = [r["ID"] for r in product_types or []] + filter = "(ABSTRACTFOO)" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foo"]) - - # match in the abstract - product_types = self.dag.list_product_types( - fetch_providers=False, filter=" FOO THIS IS " - ) - product_types_ids = [r["ID"] for r in product_types or []] + filter = " FOO THIS IS " + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foo"]) - # match in the keywords - product_types = self.dag.list_product_types( - fetch_providers=False, filter="LECTUS_BAR_KEY" - ) - product_types_ids = [r["ID"] for r in product_types or []] + # Free text search: match in the keywords + filter = "LECTUS_BAR_KEY" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["bar"]) - # match in the title - product_types = self.dag.list_product_types( - fetch_providers=False, filter="COLLECTION FOOBAR" - ) - product_types_ids = [r["ID"] for r in product_types or []] + # Free text search: match in the title + filter = "COLLECTION FOOBAR" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foobar"]) - # multiple terms - product_types = self.dag.list_product_types( - fetch_providers=False, filter="FOOANDBAR,FOOBAR" + # Free text search: multiple terms + filter = "(This is FOOBAR) OR (This is BAR)" + product_types_ids = self.dag.guess_product_type(filter) + self.assertListEqual(sorted(product_types_ids), ["bar", "foobar"]) + + # Free text search: multiple terms joined with param search (UNION) + filter = "(This is FOOBAR) OR (This is BAR)" + product_types_ids = self.dag.guess_product_type(filter, title="FOO*") + self.assertListEqual(sorted(product_types_ids), ["bar", "foo", "foobar"]) + + # Free text search: multiple terms joined with param search (INTERSECT) + filter = "(This is FOOBAR) OR (This is BAR)" + product_types_ids = self.dag.guess_product_type( + filter, intersect=True, title="titleFOO*" ) - product_types_ids = [r["ID"] for r in product_types or []] - self.assertListEqual(product_types_ids, ["bar", "foo", "foobar"]) + self.assertListEqual(sorted(product_types_ids), ["foobar"]) def test_update_product_types_list(self): """Core api.update_product_types_list must update eodag product types list""" diff --git a/tests/units/test_http_server.py b/tests/units/test_http_server.py index 234c58a7a..82b337326 100644 --- a/tests/units/test_http_server.py +++ b/tests/units/test_http_server.py @@ -964,7 +964,6 @@ def test_list_product_types_ok(self, list_pt: Mock, guess_pt: Mock): """A simple request for product types with(out) a provider must succeed""" for url in ("/collections",): r = self.app.get(url) - self.assertTrue(guess_pt.called) self.assertTrue(list_pt.called) self.assertEqual(200, r.status_code) self.assertListEqual( @@ -1379,10 +1378,12 @@ def test_cql_post_search(self): ) @mock.patch("eodag.rest.core.eodag_api.list_product_types", autospec=True) - def test_collection_free_text_search(self, list_pt: Mock): + @mock.patch("eodag.rest.core.eodag_api.guess_product_type", autospec=True) + def test_collection_free_text_search(self, guess_pt: Mock, list_pt: Mock): """Test STAC Collection free-text search""" url = "/collections?q=TERM1,TERM2" r = self.app.get(url) - list_pt.assert_called_once_with(provider=None, filter="TERM1,TERM2") + list_pt.assert_called_once_with(provider=None) + guess_pt.assert_called_once_with("(TERM1) OR (TERM2)") self.assertEqual(200, r.status_code)