From 1465cfbeca408ed786f920bac35c25b965908bfd Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Mon, 30 Oct 2023 08:59:30 +0100 Subject: [PATCH 01/10] feat: add collection free text search --- eodag/rest/stac.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index de6d76c1f..df45fc8e2 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -651,6 +651,41 @@ def __get_product_types( product_types = self.eodag_api.list_product_types(provider=self.provider) return product_types + def __filter_collection_free_text_search( + self, + search_terms: List[str], + collection: Dict[str, Any], + keys: List[str] = ["title", "description", "keywords"], + ): + """Implements STAC Collection free-text search + + :param search_terms: Terms to search in the collection + :type search_terms: list + :param collection: Collection to be filtered + :type collection: dict + :param keys List of keys of the collection to which the filter is applied + :type keys: list + :returns: True if the collection matches the filter criteria, False otherwise + :rtype: bool + """ + + def check_recursively(term: str, obj: Any) -> bool: + ret = False + if type(obj) is str: + if term in obj.lower(): + ret = True + elif type(obj) is list: + for i in obj: # type: ignore + ret = check_recursively(term, i) + return ret + + search_terms = [t.strip().lower() for t in search_terms] + for term in search_terms: + for key in keys: + if check_recursively(term, collection[key]): + return True + return False + def __get_collection_list( self, filters: Optional[Dict[str, Any]] = None ) -> List[Dict[str, Any]]: @@ -709,6 +744,15 @@ def __get_collection_list( collection_list.append(product_type_collection) + if "q" in filters: + # STAC Collection free-text search + search_terms = filters["q"].split(",") + new_collection_list = [] + for collection in collection_list: + if self.__filter_collection_free_text_search(search_terms, collection): + new_collection_list.append(collection) + collection_list = new_collection_list + return collection_list def get_collections( From 8c5efa589816edf3fd34e840f7d86e779faf38b5 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Tue, 31 Oct 2023 09:49:07 +0100 Subject: [PATCH 02/10] feat: add tests for collection free text search --- eodag/rest/stac.py | 2 + tests/units/test_http_server.py | 102 ++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index df45fc8e2..30c204a87 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -677,6 +677,8 @@ def check_recursively(term: str, obj: Any) -> bool: elif type(obj) is list: for i in obj: # type: ignore ret = check_recursively(term, i) + if ret: + break return ret search_terms = [t.strip().lower() for t in search_terms] diff --git a/tests/units/test_http_server.py b/tests/units/test_http_server.py index db6d8f6d9..b70e5dc7c 100644 --- a/tests/units/test_http_server.py +++ b/tests/units/test_http_server.py @@ -1377,3 +1377,105 @@ def test_cql_post_search(self): } }, ) + + @mock.patch( + "eodag.rest.utils.eodag_api.guess_product_type", autospec=True, return_value=[] + ) + @mock.patch( + "eodag.rest.utils.eodag_api.list_product_types", + autospec=True, + return_value=[ + { + "ID": "S2_MSI_L1C", + "abstract": "The Level-1C product is composed of 100x100 km2 tiles " + "(ortho-images in UTM/WGS84 projection). [...]", + "instrument": "MSI", + "platform": "SENTINEL2", + "platformSerialIdentifier": ["S2A", "S2B"], + "processingLevel": "L1", + "sensorType": "OPTICAL", + "title": "SENTINEL2 Level - 1C", + }, + { + "ID": "S2_MSI_L2A", + "abstract": "The Level-2A product provides Bottom Of Atmosphere (BOA) " + "reflectance images derived from the associated Level-1C " + "products. [...]", + "instrument": "MSI", + "platform": "SENTINEL2", + "platformSerialIdentifier": ["S2A", "S2B"], + "processingLevel": "L2", + "sensorType": "OPTICAL", + "title": "SENTINEL2 Level-2A", + }, + { + "ID": "L57_REFLECTANCE", + "abstract": "Landsat 5,7,8 L2A data (old format) distributed by Theia " + "(2014 to 2017-03-20) using MUSCATE prototype, Lamber 93 " + "projection.", + "instrument": ["OLI", "TIRS"], + "platform": "LANDSAT", + "platformSerialIdentifier": ["L5", "L7", "L8"], + "processingLevel": "L2A", + "sensorType": "OPTICAL", + "title": "Landsat 5,7,8 Level-2A", + }, + ], + ) + def test_collection_free_text_search(self, list_pt: Mock, guess_pt: Mock): + """Test STAC Collection free-text search""" + + url = "/collections?q=NOT FOUND" + r = self.app.get(url) + self.assertTrue(guess_pt.called) + self.assertTrue(list_pt.called) + self.assertEqual(200, r.status_code) + self.assertFalse( + [ + it["title"] + for it in json.loads(r.content.decode("utf-8")).get("links", []) + if it["rel"] == "child" + ], + ) + + url = "/collections?q= LEVEL - 1C " + r = self.app.get(url) + self.assertTrue(guess_pt.called) + self.assertTrue(list_pt.called) + self.assertEqual(200, r.status_code) + self.assertListEqual( + ["S2_MSI_L1C"], + [ + it["title"] + for it in json.loads(r.content.decode("utf-8")).get("links", []) + if it["rel"] == "child" + ], + ) + + url = "/collections?q=projection,atmosphere" + r = self.app.get(url) + self.assertTrue(guess_pt.called) + self.assertTrue(list_pt.called) + self.assertEqual(200, r.status_code) + self.assertListEqual( + ["S2_MSI_L1C", "S2_MSI_L2A", "L57_REFLECTANCE"], + [ + it["title"] + for it in json.loads(r.content.decode("utf-8")).get("links", []) + if it["rel"] == "child" + ], + ) + + url = "/collections?q=l7" + r = self.app.get(url) + self.assertTrue(guess_pt.called) + self.assertTrue(list_pt.called) + self.assertEqual(200, r.status_code) + self.assertListEqual( + ["L57_REFLECTANCE"], + [ + it["title"] + for it in json.loads(r.content.decode("utf-8")).get("links", []) + if it["rel"] == "child" + ], + ) From 6e5247f5606f34ab835e1712d8db4ab51c5448d9 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Mon, 20 Nov 2023 15:52:04 +0100 Subject: [PATCH 03/10] fix: reimplemented free text search for collections --- eodag/api/core.py | 72 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index 2407732d8..1081ca2d2 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -289,7 +289,7 @@ def build_index(self) -> None: product_types_schema = Schema( ID=fields.STORED, alias=fields.ID, - abstract=fields.STORED, + abstract=fields.TEXT, instrument=fields.IDLIST, platform=fields.ID, platformSerialIdentifier=fields.IDLIST, @@ -297,7 +297,7 @@ def build_index(self) -> None: sensorType=fields.ID, md5=fields.ID, license=fields.ID, - title=fields.ID, + title=fields.TEXT, missionStartDate=fields.ID, missionEndDate=fields.ID, keywords=fields.KEYWORD(analyzer=kw_analyzer), @@ -515,7 +515,7 @@ def set_locations_conf(self, locations_conf_path: str) -> None: self.locations_config = [] def list_product_types( - self, provider: Optional[str] = None, fetch_providers: bool = True + self, provider: Optional[str] = None, fetch_providers: bool = True, filter: Optional[str] = None ) -> List[Dict[str, Any]]: """Lists supported product types. @@ -525,6 +525,8 @@ def list_product_types( :param fetch_providers: (optional) Whether to fetch providers for new product types or not :type fetch_providers: bool + :param filter: (optional) Comma separated list of free text search terms. + :type filter: str :returns: The list of the product types that can be accessed using eodag. :rtype: list(dict) :raises: :class:`~eodag.utils.exceptions.UnsupportedProvider` @@ -547,10 +549,17 @@ def list_product_types( product_type = dict(ID=product_type_id, **config) if product_type_id not in product_types: product_types.append(product_type) - return sorted(product_types, key=itemgetter("ID")) - raise UnsupportedProvider( - f"invalid requested provider: {provider} is not (yet) supported" - ) + else: + raise UnsupportedProvider( + f"invalid requested provider: {provider} is not (yet) supported" + ) + + if filter: + product_types = self.__apply_product_type_free_text_search_filter( + product_types, filter=filter + ) + return sorted(product_types, key=itemgetter("ID")) + # Only get the product types supported by the available providers for provider in self.available_providers(): current_product_type_ids = [pt["ID"] for pt in product_types] @@ -558,14 +567,61 @@ def list_product_types( [ pt for pt in self.list_product_types( - provider=provider, fetch_providers=False + provider=provider, fetch_providers=False, filter=filter ) if pt["ID"] not in current_product_type_ids ] ) + # Return the product_types sorted in lexicographic order of their ID return sorted(product_types, key=itemgetter("ID")) + def __apply_product_type_free_text_search_filter( + self, product_types: List[str], filter: str + ) -> List[str]: + """Apply the free text search filter to the given list of product types + + :param product_types: The list of product types + :type product_types: list + :param filter: Comma separated list of search terms (ex. "EO,Earth Observation") + :type filter: str + :returns: The new list of product types + :rtype: list + + """ + # Apply the free text search + if not filter: + # no filter was given -> return the original list + return product_types + fts_terms = filter.split(",") + fts_supported_params = {"title", "abstract", "keywords"} + with self._product_types_index.searcher() as searcher: + results = None + # For each search key, do a guess and then upgrade the result (i.e. when + # merging results, if a hit appears in both results, its position is raised + # to the top. This way, the top most result will be the hit that best + # matches the given queries. Put another way, this best guess is the one + # that crosses the highest number of search params from the given queries + for term in fts_terms: + for search_key in fts_supported_params: + query = QueryParser( + search_key, self._product_types_index.schema + ).parse(term) + partial_result = searcher.search(query, limit=None) + if results is None: + results = partial_result + else: + results.upgrade_and_extend(partial_result) + if results is None: + # no result found -> intersection is empty set + return [] + else: + result_ids = {r["ID"] for r in results or []} + filtered_product_types = [ + p for p in product_types if p["ID"] in result_ids + ] + return filtered_product_types + def fetch_product_types_list(self, provider: Optional[str] = None) -> None: """Fetch product types list and update if needed From cf0b003126ddec5ea14486e69d73a4bacb4c6e12 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Wed, 22 Nov 2023 11:14:27 +0100 Subject: [PATCH 04/10] feat: add support for free text search of collections in server mode --- eodag/api/core.py | 5 ++-- eodag/resources/stac.yml | 1 + eodag/rest/stac.py | 54 +++++----------------------------------- 3 files changed, 9 insertions(+), 51 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index 1081ca2d2..ede944583 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -607,11 +607,10 @@ def __apply_product_type_free_text_search_filter( query = QueryParser( search_key, self._product_types_index.schema ).parse(term) - partial_result = searcher.search(query, limit=None) if results is None: - results = partial_result + results = searcher.search(query, limit=None) else: - results.upgrade_and_extend(partial_result) + results.upgrade_and_extend(searcher.search(query, limit=None)) if results is None: # no result found -> intersection is empty set return [] diff --git a/eodag/resources/stac.yml b/eodag/resources/stac.yml index 1c408e434..bf9765e9b 100644 --- a/eodag/resources/stac.yml +++ b/eodag/resources/stac.yml @@ -62,6 +62,7 @@ conformance: - https://api.stacspec.org/v1.0.0/ogcapi-features#query - https://api.stacspec.org/v1.0.0/ogcapi-features#sort - https://api.stacspec.org/v1.0.0/collections + - https://api.stacspec.org/v1.0.0/collection-search#free-text - http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/core - http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/oas30 - http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/geojson diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index 30c204a87..2dcff2058 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -644,50 +644,17 @@ def __get_product_types( if guessed_product_types: product_types = [ pt - for pt in self.eodag_api.list_product_types(provider=self.provider) + for pt in self.eodag_api.list_product_types( + provider=self.provider, filter=filters.get("q", None) + ) if pt["ID"] in guessed_product_types ] else: - product_types = self.eodag_api.list_product_types(provider=self.provider) + product_types = self.eodag_api.list_product_types( + provider=self.provider, filter=filters.get("q", None) + ) return product_types - def __filter_collection_free_text_search( - self, - search_terms: List[str], - collection: Dict[str, Any], - keys: List[str] = ["title", "description", "keywords"], - ): - """Implements STAC Collection free-text search - - :param search_terms: Terms to search in the collection - :type search_terms: list - :param collection: Collection to be filtered - :type collection: dict - :param keys List of keys of the collection to which the filter is applied - :type keys: list - :returns: True if the collection matches the filter criteria, False otherwise - :rtype: bool - """ - - def check_recursively(term: str, obj: Any) -> bool: - ret = False - if type(obj) is str: - if term in obj.lower(): - ret = True - elif type(obj) is list: - for i in obj: # type: ignore - ret = check_recursively(term, i) - if ret: - break - return ret - - search_terms = [t.strip().lower() for t in search_terms] - for term in search_terms: - for key in keys: - if check_recursively(term, collection[key]): - return True - return False - def __get_collection_list( self, filters: Optional[Dict[str, Any]] = None ) -> List[Dict[str, Any]]: @@ -746,15 +713,6 @@ def __get_collection_list( collection_list.append(product_type_collection) - if "q" in filters: - # STAC Collection free-text search - search_terms = filters["q"].split(",") - new_collection_list = [] - for collection in collection_list: - if self.__filter_collection_free_text_search(search_terms, collection): - new_collection_list.append(collection) - collection_list = new_collection_list - return collection_list def get_collections( From 3ff0167567f399613270d5e37c9ba9e68e12716e Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Wed, 22 Nov 2023 11:24:30 +0100 Subject: [PATCH 05/10] tests: add tests for free text search of collections --- .../ext_product_types_free_text_search.json | 59 ++++++++++ tests/units/test_core.py | 63 +++++++++++ tests/units/test_http_server.py | 101 +----------------- 3 files changed, 126 insertions(+), 97 deletions(-) create mode 100644 tests/resources/ext_product_types_free_text_search.json diff --git a/tests/resources/ext_product_types_free_text_search.json b/tests/resources/ext_product_types_free_text_search.json new file mode 100644 index 000000000..498bf4423 --- /dev/null +++ b/tests/resources/ext_product_types_free_text_search.json @@ -0,0 +1,59 @@ +{ + "astraea_eod": { + "providers_config": { + "foo": { + "productType": "foo", + "metadata_mapping": { + "cloudCover": "$.null" + } + }, + "bar": { + "productType": "bar", + "metadata_mapping": { + "cloudCover": "$.null" + } + }, + "foobar": { + "productType": "foobar", + "metadata_mapping": { + "cloudCover": "$.null" + } + } + }, + "product_types_config": { + "foo": { + "abstract": "abstractFOO - This is FOO. FooAndBar", + "instrument": "Not Available", + "platform": "Not Available", + "platformSerialIdentifier": "Not Available", + "processingLevel": "Not Available", + "keywords": "suspendisse", + "license": "WTFPL", + "title": "titleFOO - Lorem FOO collection", + "missionStartDate": "2012-12-12T00:00:00.000Z" + }, + "bar": { + "abstract": "abstractBAR - This is BAR", + "instrument": "Not Available", + "platform": "Not Available", + "platformSerialIdentifier": "Not Available", + "processingLevel": "Not Available", + "keywords": "lectus,lectus_bar_key", + "license": "WTFPL", + "title": "titleBAR - Lorem BAR collection (FooAndBar)", + "missionStartDate": "2012-12-12T00:00:00.000Z" + }, + "foobar": { + "abstract": "abstract FOOBAR - This is FOOBAR", + "instrument": "Not Available", + "platform": "Not Available", + "platformSerialIdentifier": "Not Available", + "processingLevel": "Not Available", + "keywords": "tortor", + "license": "WTFPL", + "title": "titleFOOBAR - Lorem FOOBAR collection", + "missionStartDate": "2012-12-12T00:00:00.000Z" + } + } + } +} diff --git a/tests/units/test_core.py b/tests/units/test_core.py index 29ab7b818..d865ef770 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -517,6 +517,69 @@ def test_list_product_types_fetch_providers(self, mock_fetch_product_types_list) self.dag.list_product_types(provider="peps", fetch_providers=True) mock_fetch_product_types_list.assert_called_once_with(self.dag, provider="peps") + def test_list_product_types_with_free_text_filter_ok(self): + """Core api must correctly return the list of supported product types""" + + product_types = self.dag.list_product_types( + fetch_providers=False, filter="ABSTRACTFOO" + ) + self.assertIsInstance(product_types, list) + for product_type in product_types: + self.assertListProductTypesRightStructure(product_type) + # There should be no repeated product type in the output + self.assertEqual(len(product_types), len(set(pt["ID"] for pt in product_types))) + + def test_list_product_types_with_free_text_filter(self): + """Testing the search terms""" + + with open( + os.path.join(TEST_RESOURCES_PATH, "ext_product_types_free_text_search.json") + ) as f: + ext_product_types_conf = json.load(f) + self.dag.update_product_types_list(ext_product_types_conf) + + # match in the abstract + product_types = self.dag.list_product_types( + fetch_providers=False, filter="ABSTRACTFOO" + ) + product_types_ids = [r["ID"] for r in product_types or []] + self.assertListEqual(product_types_ids, ["foo"]) + + # passing the provider + product_types = self.dag.list_product_types( + provider="astraea_eod", fetch_providers=False, filter="ABSTRACTFOO" + ) + product_types_ids = [r["ID"] for r in product_types or []] + self.assertListEqual(product_types_ids, ["foo"]) + + # match in the abstract + product_types = self.dag.list_product_types( + fetch_providers=False, filter=" FOO THIS IS " + ) + product_types_ids = [r["ID"] for r in product_types or []] + self.assertListEqual(product_types_ids, ["foo"]) + + # match in the keywords + product_types = self.dag.list_product_types( + fetch_providers=False, filter="LECTUS_BAR_KEY" + ) + product_types_ids = [r["ID"] for r in product_types or []] + self.assertListEqual(product_types_ids, ["bar"]) + + # match in the title + product_types = self.dag.list_product_types( + fetch_providers=False, filter="COLLECTION FOOBAR" + ) + product_types_ids = [r["ID"] for r in product_types or []] + self.assertListEqual(product_types_ids, ["foobar"]) + + # multiple terms + product_types = self.dag.list_product_types( + fetch_providers=False, filter="FOOANDBAR,FOOBAR" + ) + product_types_ids = [r["ID"] for r in product_types or []] + self.assertListEqual(product_types_ids, ["bar", "foo", "foobar"]) + def test_update_product_types_list(self): """Core api.update_product_types_list must update eodag product types list""" with open(os.path.join(TEST_RESOURCES_PATH, "ext_product_types.json")) as f: diff --git a/tests/units/test_http_server.py b/tests/units/test_http_server.py index b70e5dc7c..d465c8bff 100644 --- a/tests/units/test_http_server.py +++ b/tests/units/test_http_server.py @@ -1378,104 +1378,11 @@ def test_cql_post_search(self): }, ) - @mock.patch( - "eodag.rest.utils.eodag_api.guess_product_type", autospec=True, return_value=[] - ) - @mock.patch( - "eodag.rest.utils.eodag_api.list_product_types", - autospec=True, - return_value=[ - { - "ID": "S2_MSI_L1C", - "abstract": "The Level-1C product is composed of 100x100 km2 tiles " - "(ortho-images in UTM/WGS84 projection). [...]", - "instrument": "MSI", - "platform": "SENTINEL2", - "platformSerialIdentifier": ["S2A", "S2B"], - "processingLevel": "L1", - "sensorType": "OPTICAL", - "title": "SENTINEL2 Level - 1C", - }, - { - "ID": "S2_MSI_L2A", - "abstract": "The Level-2A product provides Bottom Of Atmosphere (BOA) " - "reflectance images derived from the associated Level-1C " - "products. [...]", - "instrument": "MSI", - "platform": "SENTINEL2", - "platformSerialIdentifier": ["S2A", "S2B"], - "processingLevel": "L2", - "sensorType": "OPTICAL", - "title": "SENTINEL2 Level-2A", - }, - { - "ID": "L57_REFLECTANCE", - "abstract": "Landsat 5,7,8 L2A data (old format) distributed by Theia " - "(2014 to 2017-03-20) using MUSCATE prototype, Lamber 93 " - "projection.", - "instrument": ["OLI", "TIRS"], - "platform": "LANDSAT", - "platformSerialIdentifier": ["L5", "L7", "L8"], - "processingLevel": "L2A", - "sensorType": "OPTICAL", - "title": "Landsat 5,7,8 Level-2A", - }, - ], - ) - def test_collection_free_text_search(self, list_pt: Mock, guess_pt: Mock): + @mock.patch("eodag.rest.utils.eodag_api.list_product_types", autospec=True) + def test_collection_free_text_search(self, list_pt: Mock): """Test STAC Collection free-text search""" - url = "/collections?q=NOT FOUND" - r = self.app.get(url) - self.assertTrue(guess_pt.called) - self.assertTrue(list_pt.called) - self.assertEqual(200, r.status_code) - self.assertFalse( - [ - it["title"] - for it in json.loads(r.content.decode("utf-8")).get("links", []) - if it["rel"] == "child" - ], - ) - - url = "/collections?q= LEVEL - 1C " - r = self.app.get(url) - self.assertTrue(guess_pt.called) - self.assertTrue(list_pt.called) - self.assertEqual(200, r.status_code) - self.assertListEqual( - ["S2_MSI_L1C"], - [ - it["title"] - for it in json.loads(r.content.decode("utf-8")).get("links", []) - if it["rel"] == "child" - ], - ) - - url = "/collections?q=projection,atmosphere" + url = "/collections?q=TERM1,TERM2" r = self.app.get(url) - self.assertTrue(guess_pt.called) - self.assertTrue(list_pt.called) + list_pt.assert_called_once_with(provider=None, filter="TERM1,TERM2") self.assertEqual(200, r.status_code) - self.assertListEqual( - ["S2_MSI_L1C", "S2_MSI_L2A", "L57_REFLECTANCE"], - [ - it["title"] - for it in json.loads(r.content.decode("utf-8")).get("links", []) - if it["rel"] == "child" - ], - ) - - url = "/collections?q=l7" - r = self.app.get(url) - self.assertTrue(guess_pt.called) - self.assertTrue(list_pt.called) - self.assertEqual(200, r.status_code) - self.assertListEqual( - ["L57_REFLECTANCE"], - [ - it["title"] - for it in json.loads(r.content.decode("utf-8")).get("links", []) - if it["rel"] == "child" - ], - ) From aff19a0e33310141586921db9092fbc29c6a7ee2 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Fri, 24 Nov 2023 10:26:22 +0100 Subject: [PATCH 06/10] fix: add new parameter 'q' and new response 'Accepted' to /collections --- eodag/resources/stac_api.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/eodag/resources/stac_api.yml b/eodag/resources/stac_api.yml index 112ad4799..5b37bd903 100644 --- a/eodag/resources/stac_api.yml +++ b/eodag/resources/stac_api.yml @@ -174,9 +174,12 @@ paths: operationId: getCollections parameters: - $ref: '#/components/parameters/provider' + - $ref: '#/components/parameters/q' responses: '200': $ref: '#/components/responses/Collections' + '202': + $ref: '#/components/responses/Accepted' '500': $ref: '#/components/responses/ServerError' /collections/{collectionId}: @@ -1913,6 +1916,12 @@ components: text/html: schema: type: string + Accepted: + description: The request has been accepted, but the data is not yet ready. Please wait a few minutes before trying again. + content: + application/json: + schema: + $ref: '#/components/schemas/exception' Collections: description: >- The feature collections shared by this API. From ff09d897910d1578610c2c7e644d2db3ba4148bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aubin=20Lambar=C3=A9?= Date: Tue, 6 Feb 2024 13:16:58 +0100 Subject: [PATCH 07/10] refactor: small simplifications + type hint --- eodag/api/core.py | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index ede944583..796f97d37 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -34,6 +34,7 @@ Set, Tuple, Union, + cast, ) import geojson @@ -45,6 +46,7 @@ from whoosh.fields import Schema from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import QueryParser +from whoosh.searching import Results from eodag.api.product.metadata_mapping import ( NOT_MAPPED, @@ -515,7 +517,10 @@ def set_locations_conf(self, locations_conf_path: str) -> None: self.locations_config = [] def list_product_types( - self, provider: Optional[str] = None, fetch_providers: bool = True, filter: Optional[str] = None + self, + provider: Optional[str] = None, + fetch_providers: bool = True, + filter: Optional[str] = None, ) -> List[Dict[str, Any]]: """Lists supported product types. @@ -577,8 +582,8 @@ def list_product_types( return sorted(product_types, key=itemgetter("ID")) def __apply_product_type_free_text_search_filter( - self, product_types: List[str], filter: str - ) -> List[str]: + self, product_types: List[Dict[str, Any]], filter: str + ) -> List[Dict[str, Any]]: """Apply the free text search filter to the given list of product types :param product_types: The list of product types @@ -596,7 +601,7 @@ def __apply_product_type_free_text_search_filter( fts_terms = filter.split(",") fts_supported_params = {"title", "abstract", "keywords"} with self._product_types_index.searcher() as searcher: - results = None + results: Optional[Results] = None # For each search key, do a guess and then upgrade the result (i.e. when # merging results, if a hit appears in both results, its position is raised # to the top. This way, the top most result will be the hit that best @@ -604,22 +609,27 @@ def __apply_product_type_free_text_search_filter( # that crosses the highest number of search params from the given queries for term in fts_terms: for search_key in fts_supported_params: - query = QueryParser( - search_key, self._product_types_index.schema - ).parse(term) - if results is None: - results = searcher.search(query, limit=None) + result = cast( + Results, + searcher.search( # type: ignore + QueryParser( + search_key, self._product_types_index.schema + ).parse( # type: ignore + term + ), + limit=None, + ), + ) + if not results: + results = result else: - results.upgrade_and_extend(searcher.search(query, limit=None)) - if results is None: + results.upgrade_and_extend(result) # type: ignore + if not results: # no result found -> intersection is empty set return [] - else: - result_ids = {r["ID"] for r in results or []} - filtered_product_types = [ - p for p in product_types if p["ID"] in result_ids - ] - return filtered_product_types + + result_ids = {r["ID"] for r in results or []} + return [p for p in product_types if p["ID"] in result_ids] def fetch_product_types_list(self, provider: Optional[str] = None) -> None: """Fetch product types list and update if needed From 88e21190ab002d0def48c5bb13e31549376ffc18 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Thu, 29 Feb 2024 16:30:21 +0100 Subject: [PATCH 08/10] test: fix collection free-text search test after rebase --- tests/units/test_http_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/units/test_http_server.py b/tests/units/test_http_server.py index d465c8bff..234c58a7a 100644 --- a/tests/units/test_http_server.py +++ b/tests/units/test_http_server.py @@ -1378,7 +1378,7 @@ def test_cql_post_search(self): }, ) - @mock.patch("eodag.rest.utils.eodag_api.list_product_types", autospec=True) + @mock.patch("eodag.rest.core.eodag_api.list_product_types", autospec=True) def test_collection_free_text_search(self, list_pt: Mock): """Test STAC Collection free-text search""" From 241127f356c23e4d3475daa764b9bc012bd0fb8a Mon Sep 17 00:00:00 2001 From: Sylvain Brunato Date: Fri, 8 Mar 2024 11:41:57 +0100 Subject: [PATCH 09/10] feat: use enhanced guess_product_type for free-text-search --- eodag/api/core.py | 121 ++++++++++++-------------------- eodag/rest/stac.py | 29 ++++++-- tests/units/test_core.py | 74 ++++++++----------- tests/units/test_http_server.py | 7 +- 4 files changed, 100 insertions(+), 131 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index 796f97d37..92716831e 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -34,7 +34,6 @@ Set, Tuple, Union, - cast, ) import geojson @@ -46,7 +45,6 @@ from whoosh.fields import Schema from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import QueryParser -from whoosh.searching import Results from eodag.api.product.metadata_mapping import ( NOT_MAPPED, @@ -517,10 +515,7 @@ def set_locations_conf(self, locations_conf_path: str) -> None: self.locations_config = [] def list_product_types( - self, - provider: Optional[str] = None, - fetch_providers: bool = True, - filter: Optional[str] = None, + self, provider: Optional[str] = None, fetch_providers: bool = True ) -> List[Dict[str, Any]]: """Lists supported product types. @@ -530,8 +525,6 @@ def list_product_types( :param fetch_providers: (optional) Whether to fetch providers for new product types or not :type fetch_providers: bool - :param filter: (optional) Comma separated list of free text search terms. - :type filter: str :returns: The list of the product types that can be accessed using eodag. :rtype: list(dict) :raises: :class:`~eodag.utils.exceptions.UnsupportedProvider` @@ -554,17 +547,10 @@ def list_product_types( product_type = dict(ID=product_type_id, **config) if product_type_id not in product_types: product_types.append(product_type) - else: - raise UnsupportedProvider( - f"invalid requested provider: {provider} is not (yet) supported" - ) - - if filter: - product_types = self.__apply_product_type_free_text_search_filter( - product_types, filter=filter - ) - return sorted(product_types, key=itemgetter("ID")) - + return sorted(product_types, key=itemgetter("ID")) + raise UnsupportedProvider( + f"invalid requested provider: {provider} is not (yet) supported" + ) # Only get the product types supported by the available providers for provider in self.available_providers(): current_product_type_ids = [pt["ID"] for pt in product_types] @@ -572,65 +558,14 @@ def list_product_types( [ pt for pt in self.list_product_types( - provider=provider, fetch_providers=False, filter=filter + provider=provider, fetch_providers=False ) if pt["ID"] not in current_product_type_ids ] ) - # Return the product_types sorted in lexicographic order of their ID return sorted(product_types, key=itemgetter("ID")) - def __apply_product_type_free_text_search_filter( - self, product_types: List[Dict[str, Any]], filter: str - ) -> List[Dict[str, Any]]: - """Apply the free text search filter to the given list of product types - - :param product_types: The list of product types - :type product_types: list - :param filter: Comma separated list of search terms (ex. "EO,Earth Observation") - :type filter: str - :returns: The new list of product types - :rtype: list - - """ - # Apply the free text search - if not filter: - # no filter was given -> return the original list - return product_types - fts_terms = filter.split(",") - fts_supported_params = {"title", "abstract", "keywords"} - with self._product_types_index.searcher() as searcher: - results: Optional[Results] = None - # For each search key, do a guess and then upgrade the result (i.e. when - # merging results, if a hit appears in both results, its position is raised - # to the top. This way, the top most result will be the hit that best - # matches the given queries. Put another way, this best guess is the one - # that crosses the highest number of search params from the given queries - for term in fts_terms: - for search_key in fts_supported_params: - result = cast( - Results, - searcher.search( # type: ignore - QueryParser( - search_key, self._product_types_index.schema - ).parse( # type: ignore - term - ), - limit=None, - ), - ) - if not results: - results = result - else: - results.upgrade_and_extend(result) # type: ignore - if not results: - # no result found -> intersection is empty set - return [] - - result_ids = {r["ID"] for r in results or []} - return [p for p in product_types if p["ID"] in result_ids] - def fetch_product_types_list(self, provider: Optional[str] = None) -> None: """Fetch product types list and update if needed @@ -979,9 +914,22 @@ def get_alias_from_product_type(self, product_type: str) -> str: return self.product_types_config[product_type].get("alias", product_type) - def guess_product_type(self, **kwargs: Any) -> List[str]: - """Find eodag product types codes that best match a set of search params + def guess_product_type( + self, + free_text_filter: Optional[str] = None, + intersect: bool = False, + **kwargs: Any, + ) -> List[str]: + """Find eodag product types ids that best match a set of search params + See https://whoosh.readthedocs.io/en/latest/querylang.html#the-default-query-language + for syntax. + + :param free_text_filter: whoosh compatible free text search filter used to search + `title`, `abstract` and `keywords` + :type free_text_filter: Optional[str] + :param intersect: join results for each parameter using INTERSECT instead of UNION + :type intersect: bool :param kwargs: A set of search parameters as keywords arguments :returns: The best match for the given parameters :rtype: list[str] @@ -989,6 +937,9 @@ def guess_product_type(self, **kwargs: Any) -> List[str]: """ if kwargs.get("productType", None): return [kwargs["productType"]] + free_text_search_params = ( + ["title", "abstract", "keywords"] if free_text_filter else [] + ) supported_params = { param for param in ( @@ -999,6 +950,8 @@ def guess_product_type(self, **kwargs: Any) -> List[str]: "sensorType", "keywords", "md5", + "abstract", + "title", ) if kwargs.get(param, None) is not None } @@ -1006,19 +959,35 @@ def guess_product_type(self, **kwargs: Any) -> List[str]: raise EodagError("Missing product types index") with self._product_types_index.searcher() as searcher: results = None - # For each search key, do a guess and then upgrade the result (i.e. when - # merging results, if a hit appears in both results, its position is raised - # to the top. This way, the top most result will be the hit that best + # Using `upgrade_and_extend`, for each search key, do a guess and + # then upgrade the result (i.e. when merging results, + # if a hit appears in both results, its position is raised + # to the top). This way, the top most result will be the hit that best # matches the given queries. Put another way, this best guess is the one # that crosses the highest number of search params from the given queries + + # Always use UNION to join free_text_search results + for search_key in free_text_search_params: + query = QueryParser(search_key, self._product_types_index.schema).parse( + free_text_filter + ) + if results is None: + results = searcher.search(query, limit=None) + else: + results.upgrade_and_extend(searcher.search(query, limit=None)) + + # join results from kwargs using UNION or INTERSECT for search_key in supported_params: query = QueryParser(search_key, self._product_types_index.schema).parse( kwargs[search_key] ) if results is None: results = searcher.search(query, limit=None) + elif intersect: + results.filter(searcher.search(query, limit=None)) else: results.upgrade_and_extend(searcher.search(query, limit=None)) + guesses: List[str] = [r["ID"] for r in results or []] if guesses: return guesses diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index 2dcff2058..fef6fb098 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -637,22 +637,37 @@ def __get_product_types( """ if filters is None: filters = {} + free_text_filter = filters.pop("q", None) + + # product types matching filters try: - guessed_product_types = self.eodag_api.guess_product_type(**filters) + guessed_product_types = ( + self.eodag_api.guess_product_type(**filters) if filters else [] + ) except NoMatchingProductType: guessed_product_types = [] + + # product types matching free text filter + if free_text_filter and not guessed_product_types: + whooshable_filter = " OR ".join( + [f"({x})" for x in free_text_filter.split(",")] + ) + try: + guessed_product_types = self.eodag_api.guess_product_type( + whooshable_filter + ) + except NoMatchingProductType: + guessed_product_types = [] + + # list product types with all metadata using guessed ids if guessed_product_types: product_types = [ pt - for pt in self.eodag_api.list_product_types( - provider=self.provider, filter=filters.get("q", None) - ) + for pt in self.eodag_api.list_product_types(provider=self.provider) if pt["ID"] in guessed_product_types ] else: - product_types = self.eodag_api.list_product_types( - provider=self.provider, filter=filters.get("q", None) - ) + product_types = self.eodag_api.list_product_types(provider=self.provider) return product_types def __get_collection_list( diff --git a/tests/units/test_core.py b/tests/units/test_core.py index d865ef770..b6eaa26ff 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -517,19 +517,7 @@ def test_list_product_types_fetch_providers(self, mock_fetch_product_types_list) self.dag.list_product_types(provider="peps", fetch_providers=True) mock_fetch_product_types_list.assert_called_once_with(self.dag, provider="peps") - def test_list_product_types_with_free_text_filter_ok(self): - """Core api must correctly return the list of supported product types""" - - product_types = self.dag.list_product_types( - fetch_providers=False, filter="ABSTRACTFOO" - ) - self.assertIsInstance(product_types, list) - for product_type in product_types: - self.assertListProductTypesRightStructure(product_type) - # There should be no repeated product type in the output - self.assertEqual(len(product_types), len(set(pt["ID"] for pt in product_types))) - - def test_list_product_types_with_free_text_filter(self): + def test_guess_product_type_with_filter(self): """Testing the search terms""" with open( @@ -538,47 +526,43 @@ def test_list_product_types_with_free_text_filter(self): ext_product_types_conf = json.load(f) self.dag.update_product_types_list(ext_product_types_conf) - # match in the abstract - product_types = self.dag.list_product_types( - fetch_providers=False, filter="ABSTRACTFOO" - ) - product_types_ids = [r["ID"] for r in product_types or []] + # Free text search: match in the abstract + filter = "ABSTRACTFOO" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foo"]) - - # passing the provider - product_types = self.dag.list_product_types( - provider="astraea_eod", fetch_providers=False, filter="ABSTRACTFOO" - ) - product_types_ids = [r["ID"] for r in product_types or []] + filter = "(ABSTRACTFOO)" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foo"]) - - # match in the abstract - product_types = self.dag.list_product_types( - fetch_providers=False, filter=" FOO THIS IS " - ) - product_types_ids = [r["ID"] for r in product_types or []] + filter = " FOO THIS IS " + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foo"]) - # match in the keywords - product_types = self.dag.list_product_types( - fetch_providers=False, filter="LECTUS_BAR_KEY" - ) - product_types_ids = [r["ID"] for r in product_types or []] + # Free text search: match in the keywords + filter = "LECTUS_BAR_KEY" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["bar"]) - # match in the title - product_types = self.dag.list_product_types( - fetch_providers=False, filter="COLLECTION FOOBAR" - ) - product_types_ids = [r["ID"] for r in product_types or []] + # Free text search: match in the title + filter = "COLLECTION FOOBAR" + product_types_ids = self.dag.guess_product_type(filter) self.assertListEqual(product_types_ids, ["foobar"]) - # multiple terms - product_types = self.dag.list_product_types( - fetch_providers=False, filter="FOOANDBAR,FOOBAR" + # Free text search: multiple terms + filter = "(This is FOOBAR) OR (This is BAR)" + product_types_ids = self.dag.guess_product_type(filter) + self.assertListEqual(sorted(product_types_ids), ["bar", "foobar"]) + + # Free text search: multiple terms joined with param search (UNION) + filter = "(This is FOOBAR) OR (This is BAR)" + product_types_ids = self.dag.guess_product_type(filter, title="FOO*") + self.assertListEqual(sorted(product_types_ids), ["bar", "foo", "foobar"]) + + # Free text search: multiple terms joined with param search (INTERSECT) + filter = "(This is FOOBAR) OR (This is BAR)" + product_types_ids = self.dag.guess_product_type( + filter, intersect=True, title="titleFOO*" ) - product_types_ids = [r["ID"] for r in product_types or []] - self.assertListEqual(product_types_ids, ["bar", "foo", "foobar"]) + self.assertListEqual(sorted(product_types_ids), ["foobar"]) def test_update_product_types_list(self): """Core api.update_product_types_list must update eodag product types list""" diff --git a/tests/units/test_http_server.py b/tests/units/test_http_server.py index 234c58a7a..82b337326 100644 --- a/tests/units/test_http_server.py +++ b/tests/units/test_http_server.py @@ -964,7 +964,6 @@ def test_list_product_types_ok(self, list_pt: Mock, guess_pt: Mock): """A simple request for product types with(out) a provider must succeed""" for url in ("/collections",): r = self.app.get(url) - self.assertTrue(guess_pt.called) self.assertTrue(list_pt.called) self.assertEqual(200, r.status_code) self.assertListEqual( @@ -1379,10 +1378,12 @@ def test_cql_post_search(self): ) @mock.patch("eodag.rest.core.eodag_api.list_product_types", autospec=True) - def test_collection_free_text_search(self, list_pt: Mock): + @mock.patch("eodag.rest.core.eodag_api.guess_product_type", autospec=True) + def test_collection_free_text_search(self, guess_pt: Mock, list_pt: Mock): """Test STAC Collection free-text search""" url = "/collections?q=TERM1,TERM2" r = self.app.get(url) - list_pt.assert_called_once_with(provider=None, filter="TERM1,TERM2") + list_pt.assert_called_once_with(provider=None) + guess_pt.assert_called_once_with("(TERM1) OR (TERM2)") self.assertEqual(200, r.status_code) From 55a9a62a9b9c2d0ef3d8281c9f7a67e7b7287874 Mon Sep 17 00:00:00 2001 From: Sylvain Brunato Date: Fri, 8 Mar 2024 15:07:28 +0100 Subject: [PATCH 10/10] docs: enhanced guess_product_type --- docs/notebooks/api_user_guide/4_search.ipynb | 95 +++++++++++++------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/docs/notebooks/api_user_guide/4_search.ipynb b/docs/notebooks/api_user_guide/4_search.ipynb index ade3ccabb..0337a757f 100644 --- a/docs/notebooks/api_user_guide/4_search.ipynb +++ b/docs/notebooks/api_user_guide/4_search.ipynb @@ -2228,7 +2228,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the previous request we made use of the whoosh query language which can be used to do complex text search. It supports the boolean operators AND, OR and NOT to combine the search terms. If a space is given between two words as in the example above, this corresponds to the operator AND. Brackets '()' can also be used. The example above also shows the use of the wildcard operator '*' which can represent any numer of characters. The wildcard operator '?' always represents only one character. It is also possible to match a range of terms by using square brackets '[]' and TO, e.g. [A TO D] will match all words in the lexical range between A and D. Below you can find some examples for the different operators." + "In the previous request we made use of the [whoosh query language](https://whoosh.readthedocs.io/en/latest/querylang.html#the-default-query-language) which can be used to do complex text search. It supports the boolean operators `AND`, `OR` and `NOT` to combine the search terms. If a space is given between two words as in the example above, this corresponds to the operator AND. Brackets `()` can also be used. The example above also shows the use of the wildcard operator `*` which can represent any numer of characters. The wildcard operator `?` always represents only one character. It is also possible to match a range of terms by using square brackets `[]` and TO, e.g. `[A TO D]` will match all words in the lexical range between A and D. Below you can find some examples for the different operators." ] }, { @@ -2274,9 +2274,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "returns all product types where the platform is either LANDSAT or SENTINEL1:\n", - "\n", - "['L57_REFLECTANCE', 'LANDSAT_C2L1', 'LANDSAT_C2L2', 'LANDSAT_C2L2ALB_BT', 'LANDSAT_C2L2ALB_SR', 'LANDSAT_C2L2ALB_ST', 'LANDSAT_C2L2ALB_TA', 'LANDSAT_C2L2_SR', 'LANDSAT_C2L2_ST', 'LANDSAT_ETM_C1', 'LANDSAT_ETM_C2L1', 'LANDSAT_ETM_C2L2', 'LANDSAT_TM_C1', 'LANDSAT_TM_C2L1', 'LANDSAT_TM_C2L2', 'S1_SAR_GRD', 'S1_SAR_OCN', 'S1_SAR_RAW', 'S1_SAR_SLC']" + "returns all product types where the platform is either LANDSAT or SENTINEL1." ] }, { @@ -2319,9 +2317,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "returns all product types which contain either the keywords LANDSAT and collection2 or the keyword SAR:\n", - "\n", - "['LANDSAT_C2L1', 'LANDSAT_C2L2', 'LANDSAT_C2L2ALB_BT', 'LANDSAT_C2L2ALB_SR', 'LANDSAT_C2L2ALB_ST', 'LANDSAT_C2L2ALB_TA', 'LANDSAT_C2L2_SR', 'LANDSAT_C2L2_ST', 'LANDSAT_ETM_C2L1', 'LANDSAT_ETM_C2L2', 'LANDSAT_TM_C2L1', 'LANDSAT_TM_C2L2', 'S1_SAR_GRD', 'S1_SAR_OCN', 'S1_SAR_RAW', 'S1_SAR_SLC']" + "returns all product types which contain either the keywords LANDSAT and collection2 or the keyword SAR." ] }, { @@ -2366,9 +2362,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "returns all product types where the platformSerialIdentifier is composed of 'L' and one other character:\n", - "\n", - "['L57_REFLECTANCE', 'L8_OLI_TIRS_C1L1', 'L8_REFLECTANCE', 'LANDSAT_C2L1', 'LANDSAT_C2L2', 'LANDSAT_C2L2ALB_BT', 'LANDSAT_C2L2ALB_SR', 'LANDSAT_C2L2ALB_ST', 'LANDSAT_C2L2ALB_TA', 'LANDSAT_C2L2_SR', 'LANDSAT_C2L2_ST', 'LANDSAT_ETM_C1', 'LANDSAT_ETM_C2L1', 'LANDSAT_ETM_C2L2', 'LANDSAT_TM_C1', 'LANDSAT_TM_C2L1', 'LANDSAT_TM_C2L2']" + "returns all product types where the platformSerialIdentifier is composed of 'L' and one other character." ] }, { @@ -2439,9 +2433,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "returns all product types where the platform is SENTINEL1, SENTINEL2 or SENTINEL3:\n", - "\n", - "['S1_SAR_GRD', 'S1_SAR_OCN', 'S1_SAR_RAW', 'S1_SAR_SLC', 'S2_MSI_L1C', 'S2_MSI_L2A', 'S2_MSI_L2A_COG', 'S2_MSI_L2A_MAJA', 'S2_MSI_L2B_MAJA_SNOW', 'S2_MSI_L2B_MAJA_WATER', 'S2_MSI_L3A_WASP', 'S3_EFR', 'S3_ERR', 'S3_LAN', 'S3_OLCI_L2LFR', 'S3_OLCI_L2LRR', 'S3_OLCI_L2WFR', 'S3_OLCI_L2WRR', 'S3_RAC', 'S3_SLSTR_L1RBT', 'S3_SLSTR_L2AOD', 'S3_SLSTR_L2FRP', 'S3_SLSTR_L2LST', 'S3_SLSTR_L2WST', 'S3_SRA', 'S3_SRA_A', 'S3_SRA_BS', 'S3_SY_AOD', 'S3_SY_SYN', 'S3_SY_V10', 'S3_SY_VG1', 'S3_SY_VGP', 'S3_WAT']" + "returns all product types where the platform is SENTINEL1, SENTINEL2 or SENTINEL3." ] }, { @@ -2454,7 +2446,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -2477,7 +2469,7 @@ " 'LANDSAT_TM_C2L2']" ] }, - "execution_count": 74, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -2491,29 +2483,68 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "['LANDSAT_C2L1', \n", - "'L57_REFLECTANCE', \n", - "'LANDSAT_C2L2', \n", - "'LANDSAT_C2L2ALB_BT', \n", - "'LANDSAT_C2L2ALB_SR', \n", - "'LANDSAT_C2L2ALB_ST', \n", - "'LANDSAT_C2L2ALB_TA', \n", - "'LANDSAT_C2L2_SR', \n", - "'LANDSAT_C2L2_ST', \n", - "'LANDSAT_ETM_C1', \n", - "'LANDSAT_ETM_C2L1', \n", - "'LANDSAT_ETM_C2L2', \n", - "'LANDSAT_TM_C1', \n", - "'LANDSAT_TM_C2L1', \n", - "'LANDSAT_TM_C2L2']" + "The product types in the result are ordered by how well they match the criteria. In the example above only the first product type (LANDSAT_C2L1) matches the second parameter (platformSerialIdentifier=\"L1\"), all other product types only match the first criterion. Therefore, it is usually best to use the first product type in the list as it will be the one that fits best." ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "The product types in the result are ordered by how well they match the criteria. In the example above only the first product type (LANDSAT_C2L1) matches the second parameter (platformSerialIdentifier=\"L1\"), all other product types only match the first criterion. Therefore, it is usually best to use the first product type in the list as it will be the one that fits best." + "Per paramater guesses are joined using a `UNION` by default (`intersect=False`). This can also be changed to an intersection:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['LANDSAT_C2L1']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dag.guess_product_type(platform=\"LANDSAT\", platformSerialIdentifier=\"L1\", intersect=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Whoosh query language](https://whoosh.readthedocs.io/en/latest/querylang.html#the-default-query-language) *free text search* can also be passed to the method, it will be used to search in `title`, `abstract` and `keywords` fields:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ERA5_SL_MONTHLY',\n", + " 'ERA5_PL_MONTHLY',\n", + " 'ERA5_LAND_MONTHLY',\n", + " 'ERA5_SL',\n", + " 'ERA5_PL',\n", + " 'GLOFAS_SEASONAL_REFORECAST',\n", + " 'SEASONAL_MONTHLY_PL',\n", + " 'SEASONAL_MONTHLY_SL']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dag.guess_product_type(\"ECMWF AND MONTHLY\")" ] }, {