From fab3a8437b55ea0b9d4b010e14111e37064eadcc Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Fri, 19 Jan 2024 16:37:03 +0100 Subject: [PATCH 01/14] feat: merge external product type metadata in server mode --- eodag/api/core.py | 45 ++++++++++++++++++++++++++++++++++++++++++++- eodag/rest/stac.py | 1 + 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index e1a53d265..65051dd74 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -35,7 +35,10 @@ from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import QueryParser -from eodag.api.product.metadata_mapping import mtd_cfg_as_conversion_and_querypath +from eodag.api.product.metadata_mapping import ( + mtd_cfg_as_conversion_and_querypath, + properties_from_json, +) from eodag.api.search_result import SearchResult from eodag.config import ( PluginConfig, @@ -217,6 +220,45 @@ def __init__( self.set_locations_conf(locations_conf_path) self.search_errors: Set = set() + def load_external_product_types_metadata( + self, product_type_conf: Dict[str, Any] + ) -> None: + """Load external enhanced metadata for a specific product type. + Existing keys are not updated. Lists are merged also for existing keys. + + :param product_type_conf: Product type configuration to update with the external + metadata. + :type product_type_conf: Dict[str, Any] + """ + external_metadata_path = product_type_conf.get("stacCollection") + if not external_metadata_path: + return None + logger.info( + f"Fetching external enhanced metadata for product type {product_type_conf['ID']}" + ) + stac_provider_config = load_stac_provider_config() + parsable_metadata = stac_provider_config["search"]["discover_product_types"][ + "generic_product_type_parsable_metadata" + ] + parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) + external_metadata = get_ext_product_types_conf(external_metadata_path) + ext_product_type_conf = properties_from_json( + external_metadata, + parsable_metadata, + ) + # Merge `ext_product_type_conf` into `self.product_type_conf` + for ext_cfg_k, ext_cfg_v in ext_product_type_conf.items(): + old_value = product_type_conf.get(ext_cfg_k) + if old_value is None: + product_type_conf[ext_cfg_k] = ext_cfg_v + elif ext_cfg_k in ("instrument", "platformSerialIdentifier", "keywords"): + merged_values = old_value.split(",") + new_values = ext_cfg_v.split(",") + for v in new_values: + if v not in merged_values: + merged_values.append(v) + product_type_conf[ext_cfg_k] = ",".join(merged_values) + def get_version(self) -> str: """Get eodag package version""" return pkg_resources.get_distribution("eodag").version @@ -288,6 +330,7 @@ def build_index(self) -> None: missionStartDate=fields.ID, missionEndDate=fields.ID, keywords=fields.KEYWORD(analyzer=kw_analyzer), + stacCollection=fields.STORED, ) self._product_types_index = create_in(index_dir, product_types_schema) ix_writer = self._product_types_index.writer() diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index aae31ade9..20bb57d0d 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -710,6 +710,7 @@ def __get_collection_list( providers_models.append(provider_m) # parse jsonpath + self.eodag_api.load_external_product_types_metadata(product_type) product_type_collection = jsonpath_parse_dict_items( collection_model, { From 81b34512f693dfff6f1276d6d78948f6f86fd0a0 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Thu, 1 Feb 2024 13:17:59 +0100 Subject: [PATCH 02/14] feat: load external STAC collections The metadata supported in EODAG from the external STAC collections are merged in `EODataAccessGateway.product_types_config` All the extra fields from the external STAC collections are added to the class `StacCollection` --- eodag/api/core.py | 63 ++++++++++++++++++++++++++++++++++----------- eodag/rest/core.py | 1 + eodag/rest/stac.py | 26 ++++++++++++++++++- eodag/rest/utils.py | 0 4 files changed, 74 insertions(+), 16 deletions(-) create mode 100644 eodag/rest/utils.py diff --git a/eodag/api/core.py b/eodag/api/core.py index 65051dd74..26357731b 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -220,44 +220,61 @@ def __init__( self.set_locations_conf(locations_conf_path) self.search_errors: Set = set() - def load_external_product_types_metadata( - self, product_type_conf: Dict[str, Any] - ) -> None: - """Load external enhanced metadata for a specific product type. + self.fetch_external_product_types_metadata() + + def load_external_product_type_metadata( + self, product_type_id: str, product_type_conf: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """Loads external enhanced metadata for a given product type. + + The existing product type configuration is merged with the one from the external file and + the result is returned. + Existing keys are not updated. Lists are merged also for existing keys. + Only the keys defined in `search.discover_product_types.generic_product_type_parsable_metadata` + from `eodag/resources/stac_provider.yml` are loaded into the configuration. + :param product_type_id: Product type ID to update with the external metadata. + :type product_type_id: str :param product_type_conf: Product type configuration to update with the external metadata. :type product_type_conf: Dict[str, Any] + :returns: The enhanced product type dictionary, None if no external metadata is available + :rtype: Optional[Dict[str, Any]] """ - external_metadata_path = product_type_conf.get("stacCollection") - if not external_metadata_path: + external_stac_collection_path = product_type_conf.get("stacCollection") + if not external_stac_collection_path: return None + enhanced_product_type_conf = deepcopy(product_type_conf) logger.info( - f"Fetching external enhanced metadata for product type {product_type_conf['ID']}" + f"Fetching external enhanced product type metadata for {product_type_id}" ) stac_provider_config = load_stac_provider_config() parsable_metadata = stac_provider_config["search"]["discover_product_types"][ "generic_product_type_parsable_metadata" ] parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) - external_metadata = get_ext_product_types_conf(external_metadata_path) - ext_product_type_conf = properties_from_json( - external_metadata, + external_stac_collection = get_ext_product_types_conf( + external_stac_collection_path + ) + # Loading external enhanced product types metadata + external_stac_collection_parsed = properties_from_json( + external_stac_collection, parsable_metadata, ) - # Merge `ext_product_type_conf` into `self.product_type_conf` - for ext_cfg_k, ext_cfg_v in ext_product_type_conf.items(): - old_value = product_type_conf.get(ext_cfg_k) + # Merge `external_stac_collection_parsed` into `enhanced_product_type_conf` + for ext_cfg_k, ext_cfg_v in external_stac_collection_parsed.items(): + old_value = enhanced_product_type_conf.get(ext_cfg_k) if old_value is None: - product_type_conf[ext_cfg_k] = ext_cfg_v + enhanced_product_type_conf[ext_cfg_k] = ext_cfg_v elif ext_cfg_k in ("instrument", "platformSerialIdentifier", "keywords"): merged_values = old_value.split(",") new_values = ext_cfg_v.split(",") for v in new_values: if v not in merged_values: merged_values.append(v) - product_type_conf[ext_cfg_k] = ",".join(merged_values) + enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) + return enhanced_product_type_conf def get_version(self) -> str: """Get eodag package version""" @@ -612,6 +629,22 @@ def list_product_types( # Return the product_types sorted in lexicographic order of their ID return sorted(product_types, key=itemgetter("ID")) + def fetch_external_product_types_metadata(self) -> None: + """Fetch external product types metadata and update if needed""" + # load external product type metadata + for product_type_id, product_type_conf in self.product_types_config.items(): + enhanced_product_type = self.load_external_product_type_metadata( + product_type_id, + product_type_conf, + ) + if enhanced_product_type: + product_type_conf.update(enhanced_product_type) + new_md5 = obj_md5sum(self.product_types_config.source) + if new_md5 != self.product_types_config_md5: + self.product_types_config_md5 = new_md5 + # rebuild index after product types list update + self.build_index() + def fetch_product_types_list(self, provider: Optional[str] = None) -> None: """Fetch product types list and update if needed diff --git a/eodag/rest/core.py b/eodag/rest/core.py index 37cd2d06d..fd33b5835 100644 --- a/eodag/rest/core.py +++ b/eodag/rest/core.py @@ -689,6 +689,7 @@ def crunch_products( def eodag_api_init() -> None: """Init EODataAccessGateway server instance, pre-running all time consuming tasks""" eodag_api.fetch_product_types_list() + StacCollection.fetch_external_stac_collections(eodag_api) # pre-build search plugins for provider in eodag_api.available_providers(): diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index 20bb57d0d..8cb9a8fa6 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -39,6 +39,7 @@ format_metadata, get_metadata_path, ) +from eodag.config import get_ext_product_types_conf from eodag.utils import ( DEFAULT_MISSION_START_DATE, deepcopy, @@ -610,6 +611,25 @@ class StacCollection(StacCommon): :type root: str """ + # External STAC collections + ext_stac_collections: Dict[str, Dict[str, Any]] = dict() + + @classmethod + def fetch_external_stac_collections(cls, eodag_api: EODataAccessGateway) -> None: + """Load external STAC collections + + :param eodag_api: EODAG python API instance + :type eodag_api: :class:`eodag.api.core.EODataAccessGateway` + """ + list_product_types = eodag_api.list_product_types() + for product_type in list_product_types: + ext_stac_collection_path = product_type.get("stacCollection") + if not ext_stac_collection_path: + continue + logger.info(f"Fetching external STAC collection for {product_type['ID']}") + ext_stac_collection = get_ext_product_types_conf(ext_stac_collection_path) + cls.ext_stac_collections[product_type["ID"]] = ext_stac_collection + def __init__( self, url: str, @@ -710,7 +730,6 @@ def __get_collection_list( providers_models.append(provider_m) # parse jsonpath - self.eodag_api.load_external_product_types_metadata(product_type) product_type_collection = jsonpath_parse_dict_items( collection_model, { @@ -718,6 +737,11 @@ def __get_collection_list( "providers": providers_models, }, ) + # merge EODAG's collection with the external collection + product_type_id = product_type.get("_id", None) or product_type["ID"] + ext_stac_collection = self.ext_stac_collections.get(product_type_id, {}) + ext_stac_collection.update(product_type_collection) + product_type_collection = ext_stac_collection # parse f-strings format_args = deepcopy(self.stac_config) format_args["collection"] = dict( diff --git a/eodag/rest/utils.py b/eodag/rest/utils.py new file mode 100644 index 000000000..e69de29bb From 2d14001fae4b2ee595b80ee5f13cc0141dea108c Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Thu, 1 Feb 2024 13:21:16 +0100 Subject: [PATCH 03/14] refactor: moved function `EODataAccessGateway.load_external_product_type_metadata` --- eodag/api/core.py | 108 +++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index 26357731b..782da6dc1 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -222,60 +222,6 @@ def __init__( self.fetch_external_product_types_metadata() - def load_external_product_type_metadata( - self, product_type_id: str, product_type_conf: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: - """Loads external enhanced metadata for a given product type. - - The existing product type configuration is merged with the one from the external file and - the result is returned. - - Existing keys are not updated. Lists are merged also for existing keys. - Only the keys defined in `search.discover_product_types.generic_product_type_parsable_metadata` - from `eodag/resources/stac_provider.yml` are loaded into the configuration. - - :param product_type_id: Product type ID to update with the external metadata. - :type product_type_id: str - :param product_type_conf: Product type configuration to update with the external - metadata. - :type product_type_conf: Dict[str, Any] - :returns: The enhanced product type dictionary, None if no external metadata is available - :rtype: Optional[Dict[str, Any]] - """ - external_stac_collection_path = product_type_conf.get("stacCollection") - if not external_stac_collection_path: - return None - enhanced_product_type_conf = deepcopy(product_type_conf) - logger.info( - f"Fetching external enhanced product type metadata for {product_type_id}" - ) - stac_provider_config = load_stac_provider_config() - parsable_metadata = stac_provider_config["search"]["discover_product_types"][ - "generic_product_type_parsable_metadata" - ] - parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) - external_stac_collection = get_ext_product_types_conf( - external_stac_collection_path - ) - # Loading external enhanced product types metadata - external_stac_collection_parsed = properties_from_json( - external_stac_collection, - parsable_metadata, - ) - # Merge `external_stac_collection_parsed` into `enhanced_product_type_conf` - for ext_cfg_k, ext_cfg_v in external_stac_collection_parsed.items(): - old_value = enhanced_product_type_conf.get(ext_cfg_k) - if old_value is None: - enhanced_product_type_conf[ext_cfg_k] = ext_cfg_v - elif ext_cfg_k in ("instrument", "platformSerialIdentifier", "keywords"): - merged_values = old_value.split(",") - new_values = ext_cfg_v.split(",") - for v in new_values: - if v not in merged_values: - merged_values.append(v) - enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) - return enhanced_product_type_conf - def get_version(self) -> str: """Get eodag package version""" return pkg_resources.get_distribution("eodag").version @@ -629,6 +575,60 @@ def list_product_types( # Return the product_types sorted in lexicographic order of their ID return sorted(product_types, key=itemgetter("ID")) + def load_external_product_type_metadata( + self, product_type_id: str, product_type_conf: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """Loads external enhanced metadata for a given product type. + + The existing product type configuration is merged with the one from the external file and + the result is returned. + + Existing keys are not updated. Lists are merged also for existing keys. + Only the keys defined in `search.discover_product_types.generic_product_type_parsable_metadata` + from `eodag/resources/stac_provider.yml` are loaded into the configuration. + + :param product_type_id: Product type ID to update with the external metadata. + :type product_type_id: str + :param product_type_conf: Product type configuration to update with the external + metadata. + :type product_type_conf: Dict[str, Any] + :returns: The enhanced product type dictionary, None if no external metadata is available + :rtype: Optional[Dict[str, Any]] + """ + external_stac_collection_path = product_type_conf.get("stacCollection") + if not external_stac_collection_path: + return None + enhanced_product_type_conf = deepcopy(product_type_conf) + logger.info( + f"Fetching external enhanced product type metadata for {product_type_id}" + ) + stac_provider_config = load_stac_provider_config() + parsable_metadata = stac_provider_config["search"]["discover_product_types"][ + "generic_product_type_parsable_metadata" + ] + parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) + external_stac_collection = get_ext_product_types_conf( + external_stac_collection_path + ) + # Loading external enhanced product types metadata + external_stac_collection_parsed = properties_from_json( + external_stac_collection, + parsable_metadata, + ) + # Merge `external_stac_collection_parsed` into `enhanced_product_type_conf` + for ext_cfg_k, ext_cfg_v in external_stac_collection_parsed.items(): + old_value = enhanced_product_type_conf.get(ext_cfg_k) + if old_value is None: + enhanced_product_type_conf[ext_cfg_k] = ext_cfg_v + elif ext_cfg_k in ("instrument", "platformSerialIdentifier", "keywords"): + merged_values = old_value.split(",") + new_values = ext_cfg_v.split(",") + for v in new_values: + if v not in merged_values: + merged_values.append(v) + enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) + return enhanced_product_type_conf + def fetch_external_product_types_metadata(self) -> None: """Fetch external product types metadata and update if needed""" # load external product type metadata From 63be096ce6ade94f7477dc997dc62f9b716b8551 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Fri, 2 Feb 2024 13:39:29 +0100 Subject: [PATCH 04/14] fix: minor fix in fetching and merging `stacCollection` --- eodag/api/core.py | 3 ++- eodag/rest/stac.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index 782da6dc1..cd2ff87eb 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -36,6 +36,7 @@ from whoosh.qparser import QueryParser from eodag.api.product.metadata_mapping import ( + NOT_AVAILABLE, mtd_cfg_as_conversion_and_querypath, properties_from_json, ) @@ -624,7 +625,7 @@ def load_external_product_type_metadata( merged_values = old_value.split(",") new_values = ext_cfg_v.split(",") for v in new_values: - if v not in merged_values: + if v != NOT_AVAILABLE and v not in merged_values: merged_values.append(v) enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) return enhanced_product_type_conf diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index 8cb9a8fa6..fdc103391 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -621,7 +621,7 @@ def fetch_external_stac_collections(cls, eodag_api: EODataAccessGateway) -> None :param eodag_api: EODAG python API instance :type eodag_api: :class:`eodag.api.core.EODataAccessGateway` """ - list_product_types = eodag_api.list_product_types() + list_product_types = eodag_api.list_product_types(fetch_providers=False) for product_type in list_product_types: ext_stac_collection_path = product_type.get("stacCollection") if not ext_stac_collection_path: From 55a08b2c5340e5648bcb872700ce0470b1f68649 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Fri, 2 Feb 2024 14:02:17 +0100 Subject: [PATCH 05/14] test: fetching and merging external enhanced product types metadata --- tests/units/test_core.py | 100 ++++++++++++++++++++++++++++++++- tests/units/test_stac_utils.py | 63 +++++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) diff --git a/tests/units/test_core.py b/tests/units/test_core.py index 2e8636691..0be6ce9b2 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -34,7 +34,7 @@ from shapely.geometry import LineString, MultiPolygon, Polygon from eodag import __version__ as eodag_version -from eodag.utils import GENERIC_PRODUCT_TYPE +from eodag.utils import GENERIC_PRODUCT_TYPE, deepcopy from tests import TEST_RESOURCES_PATH from tests.context import ( DEFAULT_MAX_ITEMS_PER_PAGE, @@ -1064,6 +1064,104 @@ def assertListProductTypesRightStructure(self, structure): or structure["_id"] in self.SUPPORTED_PRODUCT_TYPES ) + @mock.patch( + "eodag.api.core.get_ext_product_types_conf", + autospec=True, + return_value=json.loads( + """{ + "new_field":"New Value", + "title":"A different title for Sentinel 2 MSI Level 1C", + "summaries": { + "instruments": [ + "MSI" + ], + "platform": [ + "SENTINEL-2A", + "SENTINEL-2B" + ], + "constellation": [ + "Sentinel-2" + ], + "processing:level": [ + "L1C" + ] + } + }""" + ), + ) + def test_load_external_product_type_metadata(self, mock_get_ext_product_types_conf): + """Load the supported EODAG metadata from an external STAC collection""" + product_type_conf = deepcopy(self.dag.product_types_config["S2_MSI_L1C"]) + ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" + product_type_conf["stacCollection"] = ext_stac_collection_path + enhanced_product_type = self.dag.load_external_product_type_metadata( + "S2_MSI_L1C", + product_type_conf, + ) + mock_get_ext_product_types_conf.assert_called_with(ext_stac_collection_path) + # Fields not supported by EODAG + self.assertNotIn("new_field", enhanced_product_type) + # Merge lists + self.assertEqual( + enhanced_product_type["platformSerialIdentifier"], + "S2A,S2B,SENTINEL-2A,SENTINEL-2B", + ) + # Don't override existings keys + self.assertEqual(enhanced_product_type["title"], "SENTINEL2 Level-1C") + # The parameter passed `load_external_product_type_metadata` is not modified + del product_type_conf["stacCollection"] + self.assertDictEqual( + product_type_conf, + self.dag.product_types_config["S2_MSI_L1C"], + ) + + @mock.patch( + "eodag.api.core.get_ext_product_types_conf", + autospec=True, + return_value=json.loads( + """{ + "new_field":"New Value", + "title":"A different title for Sentinel 2 MSI Level 1C", + "summaries": { + "instruments": [ + "MSI" + ], + "platform": [ + "SENTINEL-2A", + "SENTINEL-2B" + ], + "constellation": [ + "Sentinel-2" + ], + "processing:level": [ + "L1C" + ] + } + }""" + ), + ) + def test_fetch_external_product_types_metadata( + self, mock_get_ext_product_types_conf + ): + """Updates product types config with metadata from external STAC collections""" + product_type_conf = self.dag.product_types_config["S2_MSI_L1C"] + ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" + product_type_conf["stacCollection"] = ext_stac_collection_path + self.dag.fetch_external_product_types_metadata() + mock_get_ext_product_types_conf.assert_called_with(ext_stac_collection_path) + enhanced_product_type = self.dag.product_types_config["S2_MSI_L1C"] + # Fields not supported by EODAG + self.assertNotIn("new_field", enhanced_product_type) + # Merge lists + self.assertEqual( + enhanced_product_type["platformSerialIdentifier"], + "S2A,S2B,SENTINEL-2A,SENTINEL-2B", + ) + # Don't override existings keys + self.assertEqual(enhanced_product_type["title"], "SENTINEL2 Level-1C") + # Restore the product type + del self.dag.product_types_config["S2_MSI_L1C"]["stacCollection"] + @mock.patch("eodag.api.core.open_dir", autospec=True) @mock.patch("eodag.api.core.exists_in", autospec=True, return_value=True) def test_core_object_open_index_if_exists(self, exists_in_mock, open_dir_mock): diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index ecef201b1..2a4c226a6 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -27,6 +27,7 @@ from pygeofilter.values import Geometry import eodag.rest.utils.rfc3339 as rfc3339 +from eodag.rest.stac import StacCollection from eodag.rest.types.stac_search import SearchPostRequest from eodag.rest.utils.cql_evaluate import EodagEvaluator from eodag.utils.exceptions import ValidationError @@ -330,6 +331,68 @@ def test_predicate(self): }, ) + @mock.patch("eodag.rest.stac.get_ext_product_types_conf", autospec=True) + def test_fetch_external_stac_collections( + self, mock_stac_get_ext_product_types_conf + ): + """Load external STAC collections""" + external_json = """{ + "new_field":"New Value", + "title":"A different title for Sentinel 2 MSI Level 1C", + "summaries": { + "instruments": [ + "MSI" + ], + "platform": [ + "SENTINEL-2A", + "SENTINEL-2B" + ], + "constellation": [ + "Sentinel-2" + ], + "processing:level": [ + "L1C" + ] + } + }""" + mock_stac_get_ext_product_types_conf.return_value = json.loads(external_json) + product_type_conf = self.rest_utils.eodag_api.product_types_config["S2_MSI_L1C"] + ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" + product_type_conf["stacCollection"] = ext_stac_collection_path + # using a context manager cause `self.rest_utils.get_stac_collection_by_id needs` + # to call the not-patched versione of `eodag.api.core.get_ext_product_types_conf` + with mock.patch( + "eodag.api.core.get_ext_product_types_conf", autospec=True + ) as mock_core_get_ext_product_types_conf: + mock_core_get_ext_product_types_conf.return_value = json.loads( + external_json + ) + self.rest_utils.eodag_api.fetch_external_product_types_metadata() + mock_core_get_ext_product_types_conf.assert_called_once_with( + ext_stac_collection_path + ) + StacCollection.fetch_external_stac_collections(self.rest_utils.eodag_api) + r = self.rest_utils.get_stac_collection_by_id( + url="", root="", collection_id="S2_MSI_L1C" + ) + mock_stac_get_ext_product_types_conf.assert_called_with( + ext_stac_collection_path + ) + mock_stac_get_ext_product_types_conf.call_args_list + # Fields not supported by EODAG + self.assertIn("new_field", r) + # Merge lists + self.assertEqual( + r["summaries"]["platform"][0], "S2A,S2B,SENTINEL-2A,SENTINEL-2B" + ) + # Don't override existings keys + self.assertEqual(r["title"], "SENTINEL2 Level-1C") + # Restore previous state + del self.rest_utils.eodag_api.product_types_config["S2_MSI_L1C"][ + "stacCollection" + ] + StacCollection.ext_stac_collections.clear() + def test_contains(self): attribute = ast.Attribute("test") self.assertEqual( From 7a5772e0683d844f786431f492d379267123b3d5 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Fri, 22 Mar 2024 14:22:53 +0100 Subject: [PATCH 06/14] refactor: change accessibility of the new functions in `core.py` to protected --- eodag/api/core.py | 8 ++++---- tests/units/test_core.py | 6 +++--- tests/units/test_stac_utils.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index cd2ff87eb..37f336bb0 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -221,7 +221,7 @@ def __init__( self.set_locations_conf(locations_conf_path) self.search_errors: Set = set() - self.fetch_external_product_types_metadata() + self._fetch_external_product_types_metadata() def get_version(self) -> str: """Get eodag package version""" @@ -576,7 +576,7 @@ def list_product_types( # Return the product_types sorted in lexicographic order of their ID return sorted(product_types, key=itemgetter("ID")) - def load_external_product_type_metadata( + def _load_external_product_type_metadata( self, product_type_id: str, product_type_conf: Dict[str, Any] ) -> Optional[Dict[str, Any]]: """Loads external enhanced metadata for a given product type. @@ -630,11 +630,11 @@ def load_external_product_type_metadata( enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) return enhanced_product_type_conf - def fetch_external_product_types_metadata(self) -> None: + def _fetch_external_product_types_metadata(self) -> None: """Fetch external product types metadata and update if needed""" # load external product type metadata for product_type_id, product_type_conf in self.product_types_config.items(): - enhanced_product_type = self.load_external_product_type_metadata( + enhanced_product_type = self._load_external_product_type_metadata( product_type_id, product_type_conf, ) diff --git a/tests/units/test_core.py b/tests/units/test_core.py index 0be6ce9b2..04e668c3d 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -1094,7 +1094,7 @@ def test_load_external_product_type_metadata(self, mock_get_ext_product_types_co product_type_conf = deepcopy(self.dag.product_types_config["S2_MSI_L1C"]) ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" product_type_conf["stacCollection"] = ext_stac_collection_path - enhanced_product_type = self.dag.load_external_product_type_metadata( + enhanced_product_type = self.dag._load_external_product_type_metadata( "S2_MSI_L1C", product_type_conf, ) @@ -1108,7 +1108,7 @@ def test_load_external_product_type_metadata(self, mock_get_ext_product_types_co ) # Don't override existings keys self.assertEqual(enhanced_product_type["title"], "SENTINEL2 Level-1C") - # The parameter passed `load_external_product_type_metadata` is not modified + # The parameter passed `_load_external_product_type_metadata` is not modified del product_type_conf["stacCollection"] self.assertDictEqual( product_type_conf, @@ -1147,7 +1147,7 @@ def test_fetch_external_product_types_metadata( product_type_conf = self.dag.product_types_config["S2_MSI_L1C"] ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" product_type_conf["stacCollection"] = ext_stac_collection_path - self.dag.fetch_external_product_types_metadata() + self.dag._fetch_external_product_types_metadata() mock_get_ext_product_types_conf.assert_called_with(ext_stac_collection_path) enhanced_product_type = self.dag.product_types_config["S2_MSI_L1C"] # Fields not supported by EODAG diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index 2a4c226a6..c859f2b84 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -367,7 +367,7 @@ def test_fetch_external_stac_collections( mock_core_get_ext_product_types_conf.return_value = json.loads( external_json ) - self.rest_utils.eodag_api.fetch_external_product_types_metadata() + self.rest_utils.eodag_api._fetch_external_product_types_metadata() mock_core_get_ext_product_types_conf.assert_called_once_with( ext_stac_collection_path ) From a5c116021792605d07ad95a9598d03dbfb8d709b Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Mon, 25 Mar 2024 08:30:16 +0100 Subject: [PATCH 07/14] rebase: remove `utils.py` --- eodag/rest/utils.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 eodag/rest/utils.py diff --git a/eodag/rest/utils.py b/eodag/rest/utils.py deleted file mode 100644 index e69de29bb..000000000 From 95c4a542019fdced3ca98b6470de936989076220 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Mon, 25 Mar 2024 10:47:04 +0100 Subject: [PATCH 08/14] refactor: use `stac.get_ext_stac_collection` to get the external STAC collection --- eodag/api/core.py | 3 ++- eodag/rest/stac.py | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index 37f336bb0..c416c64c1 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -55,6 +55,7 @@ ) from eodag.plugins.manager import PluginManager from eodag.plugins.search.build_search_result import BuildPostSearchResult +from eodag.rest.stac import get_ext_stac_collection from eodag.types import model_fields_to_annotated from eodag.types.queryables import CommonQueryables from eodag.types.whoosh import EODAGQueryParser @@ -608,7 +609,7 @@ def _load_external_product_type_metadata( "generic_product_type_parsable_metadata" ] parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) - external_stac_collection = get_ext_product_types_conf( + external_stac_collection = get_ext_stac_collection( external_stac_collection_path ) # Loading external enhanced product types metadata diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index fdc103391..d9024f3c1 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -27,6 +27,8 @@ import dateutil.parser import geojson +import orjson +import requests import shapefile from dateutil import tz from dateutil.relativedelta import relativedelta @@ -39,9 +41,10 @@ format_metadata, get_metadata_path, ) -from eodag.config import get_ext_product_types_conf from eodag.utils import ( DEFAULT_MISSION_START_DATE, + HTTP_REQ_TIMEOUT, + USER_AGENT, deepcopy, dict_items_recursive_apply, format_dict_items, @@ -49,6 +52,7 @@ jsonpath_parse_dict_items, string_to_jsonpath, update_nested_dict, + uri_to_path, urljoin, ) from eodag.utils.exceptions import ( @@ -68,6 +72,44 @@ STAC_CATALOGS_PREFIX = "catalogs" +def get_ext_stac_collection(stac_uri: str) -> Dict[str, Any]: + """Read external STAC collection + + :param stac_uri: URI to local or remote collection + :type stac_uri: str + :returns: The external STAC collection + :rtype: dict + """ + logger.info("Fetching external STAC collection from %s", stac_uri) + if stac_uri.lower().startswith("http"): + # read from remote + try: + response = requests.get( + stac_uri, headers=USER_AGENT, timeout=HTTP_REQ_TIMEOUT + ) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + logger.debug(e) + logger.warning( + "Could not read remote external STAC collection from %s", stac_uri + ) + return {} + elif stac_uri.lower().startswith("file"): + stac_uri = uri_to_path(stac_uri) + + # read from local + try: + with open(stac_uri, "rb") as f: + return orjson.loads(f.read()) + except (orjson.JSONDecodeError, FileNotFoundError) as e: + logger.debug(e) + logger.warning( + "Could not read local external STAC collection from %s", stac_uri + ) + return {} + + class StacCommon: """Stac common object @@ -627,7 +669,7 @@ def fetch_external_stac_collections(cls, eodag_api: EODataAccessGateway) -> None if not ext_stac_collection_path: continue logger.info(f"Fetching external STAC collection for {product_type['ID']}") - ext_stac_collection = get_ext_product_types_conf(ext_stac_collection_path) + ext_stac_collection = get_ext_stac_collection(ext_stac_collection_path) cls.ext_stac_collections[product_type["ID"]] = ext_stac_collection def __init__( From 4eca28039c82e35be3d2e504f34c35c0092b7eb7 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Mon, 25 Mar 2024 10:49:49 +0100 Subject: [PATCH 09/14] tests: external STAC collections --- tests/units/test_core.py | 14 ++-- tests/units/test_stac_utils.py | 125 +++++++++++++++++---------------- 2 files changed, 69 insertions(+), 70 deletions(-) diff --git a/tests/units/test_core.py b/tests/units/test_core.py index 04e668c3d..2e7fc54de 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -1065,7 +1065,7 @@ def assertListProductTypesRightStructure(self, structure): ) @mock.patch( - "eodag.api.core.get_ext_product_types_conf", + "eodag.api.core.get_ext_stac_collection", autospec=True, return_value=json.loads( """{ @@ -1089,7 +1089,7 @@ def assertListProductTypesRightStructure(self, structure): }""" ), ) - def test_load_external_product_type_metadata(self, mock_get_ext_product_types_conf): + def test_load_external_product_type_metadata(self, mock_get_ext_stac_collection): """Load the supported EODAG metadata from an external STAC collection""" product_type_conf = deepcopy(self.dag.product_types_config["S2_MSI_L1C"]) ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" @@ -1098,7 +1098,7 @@ def test_load_external_product_type_metadata(self, mock_get_ext_product_types_co "S2_MSI_L1C", product_type_conf, ) - mock_get_ext_product_types_conf.assert_called_with(ext_stac_collection_path) + mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) # Fields not supported by EODAG self.assertNotIn("new_field", enhanced_product_type) # Merge lists @@ -1116,7 +1116,7 @@ def test_load_external_product_type_metadata(self, mock_get_ext_product_types_co ) @mock.patch( - "eodag.api.core.get_ext_product_types_conf", + "eodag.api.core.get_ext_stac_collection", autospec=True, return_value=json.loads( """{ @@ -1140,15 +1140,13 @@ def test_load_external_product_type_metadata(self, mock_get_ext_product_types_co }""" ), ) - def test_fetch_external_product_types_metadata( - self, mock_get_ext_product_types_conf - ): + def test_fetch_external_product_types_metadata(self, mock_get_ext_stac_collection): """Updates product types config with metadata from external STAC collections""" product_type_conf = self.dag.product_types_config["S2_MSI_L1C"] ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" product_type_conf["stacCollection"] = ext_stac_collection_path self.dag._fetch_external_product_types_metadata() - mock_get_ext_product_types_conf.assert_called_with(ext_stac_collection_path) + mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) enhanced_product_type = self.dag.product_types_config["S2_MSI_L1C"] # Fields not supported by EODAG self.assertNotIn("new_field", enhanced_product_type) diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index c859f2b84..2093148a6 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -55,6 +55,12 @@ def setUpClass(cls): cls.rest_utils = rest_utils + import eodag.rest.core as rest_core + + importlib.reload(rest_core) + + cls.rest_core = rest_core + search_results_file = os.path.join( TEST_RESOURCES_PATH, "eodag_search_result_peps.geojson" ) @@ -247,6 +253,63 @@ def test_get_datetime(self): self.assertEqual(dtstart, start) self.assertEqual(dtend, end) + @mock.patch("eodag.rest.stac.get_ext_stac_collection", autospec=True) + def test_fetch_external_stac_collections(self, mock_get_ext_stac_collection): + """Load external STAC collections""" + external_json = """{ + "new_field":"New Value", + "title":"A different title for Sentinel 2 MSI Level 1C", + "summaries": { + "instruments": [ + "MSI" + ], + "platform": [ + "SENTINEL-2A", + "SENTINEL-2B" + ], + "constellation": [ + "Sentinel-2" + ], + "processing:level": [ + "L1C" + ] + } + }""" + mock_get_ext_stac_collection.return_value = json.loads(external_json) + product_type_conf = self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"] + ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" + product_type_conf["stacCollection"] = ext_stac_collection_path + # Update EODataAccessGateway.product_types_config + # Using a context manager cause `self.rest.core.get_stac_collection_by_id needs` + # to call the not-patched version of `eodag.api.core.get_ext_stac_collection` + with mock.patch( + "eodag.api.core.get_ext_stac_collection", autospec=True + ) as mock_core_get_ext_stac_collection: + mock_core_get_ext_stac_collection.return_value = json.loads(external_json) + self.rest_core.eodag_api._fetch_external_product_types_metadata() + mock_core_get_ext_stac_collection.assert_called_once_with( + ext_stac_collection_path + ) + # Init StacCollection.ext_stac_collections + StacCollection.fetch_external_stac_collections(self.rest_core.eodag_api) + r = self.rest_core.get_stac_collection_by_id( + url="", root="", collection_id="S2_MSI_L1C" + ) + mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) + # Fields not supported by EODAG + self.assertIn("new_field", r) + # Merge lists + self.assertEqual( + r["summaries"]["platform"][0], "S2A,S2B,SENTINEL-2A,SENTINEL-2B" + ) + # Don't override existing keys + self.assertEqual(r["title"], "SENTINEL2 Level-1C") + # Restore previous state + del self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"][ + "stacCollection" + ] + StacCollection.ext_stac_collections.clear() + class TestEodagCql2jsonEvaluator(unittest.TestCase): def setUp(self): @@ -331,68 +394,6 @@ def test_predicate(self): }, ) - @mock.patch("eodag.rest.stac.get_ext_product_types_conf", autospec=True) - def test_fetch_external_stac_collections( - self, mock_stac_get_ext_product_types_conf - ): - """Load external STAC collections""" - external_json = """{ - "new_field":"New Value", - "title":"A different title for Sentinel 2 MSI Level 1C", - "summaries": { - "instruments": [ - "MSI" - ], - "platform": [ - "SENTINEL-2A", - "SENTINEL-2B" - ], - "constellation": [ - "Sentinel-2" - ], - "processing:level": [ - "L1C" - ] - } - }""" - mock_stac_get_ext_product_types_conf.return_value = json.loads(external_json) - product_type_conf = self.rest_utils.eodag_api.product_types_config["S2_MSI_L1C"] - ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" - product_type_conf["stacCollection"] = ext_stac_collection_path - # using a context manager cause `self.rest_utils.get_stac_collection_by_id needs` - # to call the not-patched versione of `eodag.api.core.get_ext_product_types_conf` - with mock.patch( - "eodag.api.core.get_ext_product_types_conf", autospec=True - ) as mock_core_get_ext_product_types_conf: - mock_core_get_ext_product_types_conf.return_value = json.loads( - external_json - ) - self.rest_utils.eodag_api._fetch_external_product_types_metadata() - mock_core_get_ext_product_types_conf.assert_called_once_with( - ext_stac_collection_path - ) - StacCollection.fetch_external_stac_collections(self.rest_utils.eodag_api) - r = self.rest_utils.get_stac_collection_by_id( - url="", root="", collection_id="S2_MSI_L1C" - ) - mock_stac_get_ext_product_types_conf.assert_called_with( - ext_stac_collection_path - ) - mock_stac_get_ext_product_types_conf.call_args_list - # Fields not supported by EODAG - self.assertIn("new_field", r) - # Merge lists - self.assertEqual( - r["summaries"]["platform"][0], "S2A,S2B,SENTINEL-2A,SENTINEL-2B" - ) - # Don't override existings keys - self.assertEqual(r["title"], "SENTINEL2 Level-1C") - # Restore previous state - del self.rest_utils.eodag_api.product_types_config["S2_MSI_L1C"][ - "stacCollection" - ] - StacCollection.ext_stac_collections.clear() - def test_contains(self): attribute = ast.Attribute("test") self.assertEqual( From 8be87aa41080cb15e20ed3f95768454ae5db16d0 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Mon, 25 Mar 2024 13:07:31 +0100 Subject: [PATCH 10/14] refactor: move `get_ext_stac_collection` to `eodag.utils` --- eodag/api/core.py | 2 +- eodag/rest/stac.py | 42 +------------------------- eodag/utils/__init__.py | 40 +++++++++++++++++++++++++ tests/units/test_stac_utils.py | 54 ++++++++++++++++++---------------- 4 files changed, 71 insertions(+), 67 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index c416c64c1..6112153ae 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -55,7 +55,6 @@ ) from eodag.plugins.manager import PluginManager from eodag.plugins.search.build_search_result import BuildPostSearchResult -from eodag.rest.stac import get_ext_stac_collection from eodag.types import model_fields_to_annotated from eodag.types.queryables import CommonQueryables from eodag.types.whoosh import EODAGQueryParser @@ -71,6 +70,7 @@ _deprecated, copy_deepcopy, deepcopy, + get_ext_stac_collection, get_geometry_from_various, makedirs, obj_md5sum, diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index d9024f3c1..f3f798aba 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -27,8 +27,6 @@ import dateutil.parser import geojson -import orjson -import requests import shapefile from dateutil import tz from dateutil.relativedelta import relativedelta @@ -48,11 +46,11 @@ deepcopy, dict_items_recursive_apply, format_dict_items, + get_ext_stac_collection, guess_file_type, jsonpath_parse_dict_items, string_to_jsonpath, update_nested_dict, - uri_to_path, urljoin, ) from eodag.utils.exceptions import ( @@ -72,44 +70,6 @@ STAC_CATALOGS_PREFIX = "catalogs" -def get_ext_stac_collection(stac_uri: str) -> Dict[str, Any]: - """Read external STAC collection - - :param stac_uri: URI to local or remote collection - :type stac_uri: str - :returns: The external STAC collection - :rtype: dict - """ - logger.info("Fetching external STAC collection from %s", stac_uri) - if stac_uri.lower().startswith("http"): - # read from remote - try: - response = requests.get( - stac_uri, headers=USER_AGENT, timeout=HTTP_REQ_TIMEOUT - ) - response.raise_for_status() - return response.json() - except requests.RequestException as e: - logger.debug(e) - logger.warning( - "Could not read remote external STAC collection from %s", stac_uri - ) - return {} - elif stac_uri.lower().startswith("file"): - stac_uri = uri_to_path(stac_uri) - - # read from local - try: - with open(stac_uri, "rb") as f: - return orjson.loads(f.read()) - except (orjson.JSONDecodeError, FileNotFoundError) as e: - logger.debug(e) - logger.warning( - "Could not read local external STAC collection from %s", stac_uri - ) - return {} - - class StacCommon: """Stac common object diff --git a/eodag/utils/__init__.py b/eodag/utils/__init__.py index b7236d540..6dd767f15 100644 --- a/eodag/utils/__init__.py +++ b/eodag/utils/__init__.py @@ -78,6 +78,8 @@ ) from urllib.request import url2pathname +import requests + if sys.version_info >= (3, 9): from typing import Annotated, get_args, get_origin # noqa else: @@ -1472,3 +1474,41 @@ def get_ssl_context(ssl_verify: bool) -> ssl.SSLContext: ctx.check_hostname = True ctx.verify_mode = ssl.CERT_REQUIRED return ctx + + +def get_ext_stac_collection(stac_uri: str) -> Dict[str, Any]: + """Read external STAC collection + + :param stac_uri: URI to local or remote collection + :type stac_uri: str + :returns: The external STAC collection + :rtype: dict + """ + logger.info("Fetching external STAC collection from %s", stac_uri) + if stac_uri.lower().startswith("http"): + # read from remote + try: + response = requests.get( + stac_uri, headers=USER_AGENT, timeout=HTTP_REQ_TIMEOUT + ) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + logger.debug(e) + logger.warning( + "Could not read remote external STAC collection from %s", stac_uri + ) + return {} + elif stac_uri.lower().startswith("file"): + stac_uri = uri_to_path(stac_uri) + + # read from local + try: + with open(stac_uri, "rb") as f: + return orjson.loads(f.read()) + except (orjson.JSONDecodeError, FileNotFoundError) as e: + logger.debug(e) + logger.warning( + "Could not read local external STAC collection from %s", stac_uri + ) + return {} diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index 2093148a6..5fd99e0a6 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -253,8 +253,7 @@ def test_get_datetime(self): self.assertEqual(dtstart, start) self.assertEqual(dtend, end) - @mock.patch("eodag.rest.stac.get_ext_stac_collection", autospec=True) - def test_fetch_external_stac_collections(self, mock_get_ext_stac_collection): + def test_fetch_external_stac_collections(self): """Load external STAC collections""" external_json = """{ "new_field":"New Value", @@ -275,13 +274,11 @@ def test_fetch_external_stac_collections(self, mock_get_ext_stac_collection): ] } }""" - mock_get_ext_stac_collection.return_value = json.loads(external_json) product_type_conf = self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"] ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" product_type_conf["stacCollection"] = ext_stac_collection_path - # Update EODataAccessGateway.product_types_config - # Using a context manager cause `self.rest.core.get_stac_collection_by_id needs` - # to call the not-patched version of `eodag.api.core.get_ext_stac_collection` + + # Check if `eodag.utils.get_ext_stac_collection()` is called with right arg with mock.patch( "eodag.api.core.get_ext_stac_collection", autospec=True ) as mock_core_get_ext_stac_collection: @@ -290,25 +287,32 @@ def test_fetch_external_stac_collections(self, mock_get_ext_stac_collection): mock_core_get_ext_stac_collection.assert_called_once_with( ext_stac_collection_path ) - # Init StacCollection.ext_stac_collections - StacCollection.fetch_external_stac_collections(self.rest_core.eodag_api) - r = self.rest_core.get_stac_collection_by_id( - url="", root="", collection_id="S2_MSI_L1C" - ) - mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) - # Fields not supported by EODAG - self.assertIn("new_field", r) - # Merge lists - self.assertEqual( - r["summaries"]["platform"][0], "S2A,S2B,SENTINEL-2A,SENTINEL-2B" - ) - # Don't override existing keys - self.assertEqual(r["title"], "SENTINEL2 Level-1C") - # Restore previous state - del self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"][ - "stacCollection" - ] - StacCollection.ext_stac_collections.clear() + + # Check if the returned STAC collection contains updated data + with mock.patch( + "eodag.rest.stac.get_ext_stac_collection", autospec=True + ) as mock_stac_get_ext_stac_collection: + mock_stac_get_ext_stac_collection.return_value = json.loads(external_json) + StacCollection.fetch_external_stac_collections(self.rest_core.eodag_api) + r = self.rest_core.get_stac_collection_by_id( + url="", root="", collection_id="S2_MSI_L1C" + ) + mock_stac_get_ext_stac_collection.assert_called_with( + ext_stac_collection_path + ) + # Fields not supported by EODAG + self.assertIn("new_field", r) + # Merge lists + self.assertEqual( + r["summaries"]["platform"][0], "S2A,S2B,SENTINEL-2A,SENTINEL-2B" + ) + # Don't override existing keys + self.assertEqual(r["title"], "SENTINEL2 Level-1C") + # Restore previous state + del self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"][ + "stacCollection" + ] + StacCollection.ext_stac_collections.clear() class TestEodagCql2jsonEvaluator(unittest.TestCase): From 0709b61cbc01eb4a5facbdad81775cdabb170098 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Tue, 23 Apr 2024 16:29:07 +0200 Subject: [PATCH 11/14] refactor: move methods from `core` to `stac` --- eodag/api/core.py | 79 +------------------ eodag/rest/core.py | 1 + eodag/rest/stac.py | 90 ++++++++++++++++++++++ tests/units/test_core.py | 98 +----------------------- tests/units/test_stac_utils.py | 136 +++++++++++++++++++++++++++++---- 5 files changed, 216 insertions(+), 188 deletions(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index 6112153ae..7c35b41a6 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -35,11 +35,7 @@ from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import QueryParser -from eodag.api.product.metadata_mapping import ( - NOT_AVAILABLE, - mtd_cfg_as_conversion_and_querypath, - properties_from_json, -) +from eodag.api.product.metadata_mapping import mtd_cfg_as_conversion_and_querypath from eodag.api.search_result import SearchResult from eodag.config import ( PluginConfig, @@ -70,7 +66,6 @@ _deprecated, copy_deepcopy, deepcopy, - get_ext_stac_collection, get_geometry_from_various, makedirs, obj_md5sum, @@ -222,8 +217,6 @@ def __init__( self.set_locations_conf(locations_conf_path) self.search_errors: Set = set() - self._fetch_external_product_types_metadata() - def get_version(self) -> str: """Get eodag package version""" return pkg_resources.get_distribution("eodag").version @@ -577,76 +570,6 @@ def list_product_types( # Return the product_types sorted in lexicographic order of their ID return sorted(product_types, key=itemgetter("ID")) - def _load_external_product_type_metadata( - self, product_type_id: str, product_type_conf: Dict[str, Any] - ) -> Optional[Dict[str, Any]]: - """Loads external enhanced metadata for a given product type. - - The existing product type configuration is merged with the one from the external file and - the result is returned. - - Existing keys are not updated. Lists are merged also for existing keys. - Only the keys defined in `search.discover_product_types.generic_product_type_parsable_metadata` - from `eodag/resources/stac_provider.yml` are loaded into the configuration. - - :param product_type_id: Product type ID to update with the external metadata. - :type product_type_id: str - :param product_type_conf: Product type configuration to update with the external - metadata. - :type product_type_conf: Dict[str, Any] - :returns: The enhanced product type dictionary, None if no external metadata is available - :rtype: Optional[Dict[str, Any]] - """ - external_stac_collection_path = product_type_conf.get("stacCollection") - if not external_stac_collection_path: - return None - enhanced_product_type_conf = deepcopy(product_type_conf) - logger.info( - f"Fetching external enhanced product type metadata for {product_type_id}" - ) - stac_provider_config = load_stac_provider_config() - parsable_metadata = stac_provider_config["search"]["discover_product_types"][ - "generic_product_type_parsable_metadata" - ] - parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) - external_stac_collection = get_ext_stac_collection( - external_stac_collection_path - ) - # Loading external enhanced product types metadata - external_stac_collection_parsed = properties_from_json( - external_stac_collection, - parsable_metadata, - ) - # Merge `external_stac_collection_parsed` into `enhanced_product_type_conf` - for ext_cfg_k, ext_cfg_v in external_stac_collection_parsed.items(): - old_value = enhanced_product_type_conf.get(ext_cfg_k) - if old_value is None: - enhanced_product_type_conf[ext_cfg_k] = ext_cfg_v - elif ext_cfg_k in ("instrument", "platformSerialIdentifier", "keywords"): - merged_values = old_value.split(",") - new_values = ext_cfg_v.split(",") - for v in new_values: - if v != NOT_AVAILABLE and v not in merged_values: - merged_values.append(v) - enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) - return enhanced_product_type_conf - - def _fetch_external_product_types_metadata(self) -> None: - """Fetch external product types metadata and update if needed""" - # load external product type metadata - for product_type_id, product_type_conf in self.product_types_config.items(): - enhanced_product_type = self._load_external_product_type_metadata( - product_type_id, - product_type_conf, - ) - if enhanced_product_type: - product_type_conf.update(enhanced_product_type) - new_md5 = obj_md5sum(self.product_types_config.source) - if new_md5 != self.product_types_config_md5: - self.product_types_config_md5 = new_md5 - # rebuild index after product types list update - self.build_index() - def fetch_product_types_list(self, provider: Optional[str] = None) -> None: """Fetch product types list and update if needed diff --git a/eodag/rest/core.py b/eodag/rest/core.py index fd33b5835..29c5c9ddf 100644 --- a/eodag/rest/core.py +++ b/eodag/rest/core.py @@ -689,6 +689,7 @@ def crunch_products( def eodag_api_init() -> None: """Init EODataAccessGateway server instance, pre-running all time consuming tasks""" eodag_api.fetch_product_types_list() + StacCollection.fetch_external_product_types_metadata(eodag_api) StacCollection.fetch_external_stac_collections(eodag_api) # pre-build search plugins diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index f3f798aba..7162f1255 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -36,9 +36,13 @@ from eodag.api.product.metadata_mapping import ( DEFAULT_METADATA_MAPPING, + NOT_AVAILABLE, format_metadata, get_metadata_path, + mtd_cfg_as_conversion_and_querypath, + properties_from_json, ) +from eodag.config import load_stac_provider_config from eodag.utils import ( DEFAULT_MISSION_START_DATE, HTTP_REQ_TIMEOUT, @@ -49,6 +53,7 @@ get_ext_stac_collection, guess_file_type, jsonpath_parse_dict_items, + obj_md5sum, string_to_jsonpath, update_nested_dict, urljoin, @@ -632,6 +637,91 @@ def fetch_external_stac_collections(cls, eodag_api: EODataAccessGateway) -> None ext_stac_collection = get_ext_stac_collection(ext_stac_collection_path) cls.ext_stac_collections[product_type["ID"]] = ext_stac_collection + @classmethod + def _load_external_product_type_metadata( + cls, + product_type_id: str, + product_type_conf: Dict[str, Any], + parsable_metadata: Dict[str, Any], + ) -> Optional[Dict[str, Any]]: + """Loads external enhanced metadata for a given product type. + + The existing product type configuration is merged with the one from the external file and + the result is returned. + + Existing keys are not updated. Lists are merged also for existing keys. + Only the keys defined in `search.discover_product_types.generic_product_type_parsable_metadata` + from `eodag/resources/stac_provider.yml` are loaded into the configuration. + + :param product_type_id: Product type ID to update with the external metadata. + :type product_type_id: str + :param product_type_conf: Product type configuration to update with the external + metadata. + :type product_type_conf: Dict[str, Any] + :param parsable_metadata: Parsable metadata. + :type parsable_metadata: Dict[str, Any] + :returns: The enhanced product type dictionary, None if no external metadata is available + :rtype: Optional[Dict[str, Any]] + """ + external_stac_collection_path = product_type_conf.get("stacCollection") + if not external_stac_collection_path: + return None + enhanced_product_type_conf = deepcopy(product_type_conf) + logger.info( + f"Fetching external enhanced product type metadata for {product_type_id}" + ) + external_stac_collection = get_ext_stac_collection( + external_stac_collection_path + ) + # Loading external enhanced product types metadata + external_stac_collection_parsed = properties_from_json( + external_stac_collection, + parsable_metadata, + ) + # Merge `external_stac_collection_parsed` into `enhanced_product_type_conf` + for ext_cfg_k, ext_cfg_v in external_stac_collection_parsed.items(): + old_value = enhanced_product_type_conf.get(ext_cfg_k) + if old_value is None: + enhanced_product_type_conf[ext_cfg_k] = ext_cfg_v + elif ext_cfg_k in ("instrument", "platformSerialIdentifier", "keywords"): + merged_values = old_value.split(",") + new_values = ext_cfg_v.split(",") + for v in new_values: + if v != NOT_AVAILABLE and v not in merged_values: + merged_values.append(v) + enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) + return enhanced_product_type_conf + + @classmethod + def fetch_external_product_types_metadata( + cls, eodag_api: EODataAccessGateway + ) -> None: + """Fetch external product types metadata and update EODAG if needed""" + stac_provider_config = load_stac_provider_config() + parsable_metadata = stac_provider_config["search"]["discover_product_types"][ + "generic_product_type_parsable_metadata" + ] + parsable_metadata: Dict[str, Any] = mtd_cfg_as_conversion_and_querypath( + parsable_metadata + ) + # load external product type metadata + for ( + product_type_id, + product_type_conf, + ) in eodag_api.product_types_config.items(): + enhanced_product_type = cls._load_external_product_type_metadata( + product_type_id, + product_type_conf, + parsable_metadata, + ) + if enhanced_product_type: + product_type_conf.update(enhanced_product_type) + new_md5 = obj_md5sum(eodag_api.product_types_config.source) + if new_md5 != eodag_api.product_types_config_md5: + eodag_api.product_types_config_md5 = new_md5 + # rebuild index after product types list update + eodag_api.build_index() + def __init__( self, url: str, diff --git a/tests/units/test_core.py b/tests/units/test_core.py index 2e7fc54de..2e8636691 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -34,7 +34,7 @@ from shapely.geometry import LineString, MultiPolygon, Polygon from eodag import __version__ as eodag_version -from eodag.utils import GENERIC_PRODUCT_TYPE, deepcopy +from eodag.utils import GENERIC_PRODUCT_TYPE from tests import TEST_RESOURCES_PATH from tests.context import ( DEFAULT_MAX_ITEMS_PER_PAGE, @@ -1064,102 +1064,6 @@ def assertListProductTypesRightStructure(self, structure): or structure["_id"] in self.SUPPORTED_PRODUCT_TYPES ) - @mock.patch( - "eodag.api.core.get_ext_stac_collection", - autospec=True, - return_value=json.loads( - """{ - "new_field":"New Value", - "title":"A different title for Sentinel 2 MSI Level 1C", - "summaries": { - "instruments": [ - "MSI" - ], - "platform": [ - "SENTINEL-2A", - "SENTINEL-2B" - ], - "constellation": [ - "Sentinel-2" - ], - "processing:level": [ - "L1C" - ] - } - }""" - ), - ) - def test_load_external_product_type_metadata(self, mock_get_ext_stac_collection): - """Load the supported EODAG metadata from an external STAC collection""" - product_type_conf = deepcopy(self.dag.product_types_config["S2_MSI_L1C"]) - ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" - product_type_conf["stacCollection"] = ext_stac_collection_path - enhanced_product_type = self.dag._load_external_product_type_metadata( - "S2_MSI_L1C", - product_type_conf, - ) - mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) - # Fields not supported by EODAG - self.assertNotIn("new_field", enhanced_product_type) - # Merge lists - self.assertEqual( - enhanced_product_type["platformSerialIdentifier"], - "S2A,S2B,SENTINEL-2A,SENTINEL-2B", - ) - # Don't override existings keys - self.assertEqual(enhanced_product_type["title"], "SENTINEL2 Level-1C") - # The parameter passed `_load_external_product_type_metadata` is not modified - del product_type_conf["stacCollection"] - self.assertDictEqual( - product_type_conf, - self.dag.product_types_config["S2_MSI_L1C"], - ) - - @mock.patch( - "eodag.api.core.get_ext_stac_collection", - autospec=True, - return_value=json.loads( - """{ - "new_field":"New Value", - "title":"A different title for Sentinel 2 MSI Level 1C", - "summaries": { - "instruments": [ - "MSI" - ], - "platform": [ - "SENTINEL-2A", - "SENTINEL-2B" - ], - "constellation": [ - "Sentinel-2" - ], - "processing:level": [ - "L1C" - ] - } - }""" - ), - ) - def test_fetch_external_product_types_metadata(self, mock_get_ext_stac_collection): - """Updates product types config with metadata from external STAC collections""" - product_type_conf = self.dag.product_types_config["S2_MSI_L1C"] - ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" - product_type_conf["stacCollection"] = ext_stac_collection_path - self.dag._fetch_external_product_types_metadata() - mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) - enhanced_product_type = self.dag.product_types_config["S2_MSI_L1C"] - # Fields not supported by EODAG - self.assertNotIn("new_field", enhanced_product_type) - # Merge lists - self.assertEqual( - enhanced_product_type["platformSerialIdentifier"], - "S2A,S2B,SENTINEL-2A,SENTINEL-2B", - ) - # Don't override existings keys - self.assertEqual(enhanced_product_type["title"], "SENTINEL2 Level-1C") - # Restore the product type - del self.dag.product_types_config["S2_MSI_L1C"]["stacCollection"] - @mock.patch("eodag.api.core.open_dir", autospec=True) @mock.patch("eodag.api.core.exists_in", autospec=True, return_value=True) def test_core_object_open_index_if_exists(self, exists_in_mock, open_dir_mock): diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index 5fd99e0a6..78f7a092d 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -27,9 +27,12 @@ from pygeofilter.values import Geometry import eodag.rest.utils.rfc3339 as rfc3339 +from eodag.api.product.metadata_mapping import mtd_cfg_as_conversion_and_querypath +from eodag.config import load_stac_provider_config from eodag.rest.stac import StacCollection from eodag.rest.types.stac_search import SearchPostRequest from eodag.rest.utils.cql_evaluate import EodagEvaluator +from eodag.utils import deepcopy from eodag.utils.exceptions import ValidationError from tests import TEST_RESOURCES_PATH, mock from tests.context import SearchResult @@ -253,6 +256,114 @@ def test_get_datetime(self): self.assertEqual(dtstart, start) self.assertEqual(dtend, end) + @mock.patch( + "eodag.rest.stac.get_ext_stac_collection", + autospec=True, + return_value=json.loads( + """{ + "new_field":"New Value", + "title":"A different title for Sentinel 2 MSI Level 1C", + "summaries": { + "instruments": [ + "MSI" + ], + "platform": [ + "SENTINEL-2A", + "SENTINEL-2B" + ], + "constellation": [ + "Sentinel-2" + ], + "processing:level": [ + "L1C" + ] + } + }""" + ), + ) + def test_load_external_product_type_metadata(self, mock_get_ext_stac_collection): + """Load the supported EODAG metadata from an external STAC collection""" + product_type_conf = deepcopy( + self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"] + ) + ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" + product_type_conf["stacCollection"] = ext_stac_collection_path + stac_provider_config = load_stac_provider_config() + parsable_metadata = stac_provider_config["search"]["discover_product_types"][ + "generic_product_type_parsable_metadata" + ] + parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) + enhanced_product_type = StacCollection._load_external_product_type_metadata( + "S2_MSI_L1C", + product_type_conf, + parsable_metadata, + ) + mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) + # Fields not supported by EODAG + self.assertNotIn("new_field", enhanced_product_type) + # Merge lists + self.assertEqual( + "S2A,S2B,SENTINEL-2A,SENTINEL-2B", + enhanced_product_type["platformSerialIdentifier"], + ) + # Don't override existings keys + self.assertEqual("SENTINEL2 Level-1C", enhanced_product_type["title"]) + # The parameter passed `_load_external_product_type_metadata` is not modified + del product_type_conf["stacCollection"] + self.assertDictEqual( + product_type_conf, + self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"], + ) + + @mock.patch( + "eodag.rest.stac.get_ext_stac_collection", + autospec=True, + return_value=json.loads( + """{ + "new_field":"New Value", + "title":"A different title for Sentinel 2 MSI Level 1C", + "summaries": { + "instruments": [ + "MSI" + ], + "platform": [ + "SENTINEL-2A", + "SENTINEL-2B" + ], + "constellation": [ + "Sentinel-2" + ], + "processing:level": [ + "L1C" + ] + } + }""" + ), + ) + def test_fetch_external_product_types_metadata(self, mock_get_ext_stac_collection): + """Updates product types config with metadata from external STAC collections""" + product_type_conf = self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"] + ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" + product_type_conf["stacCollection"] = ext_stac_collection_path + StacCollection.fetch_external_product_types_metadata(self.rest_core.eodag_api) + mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) + enhanced_product_type = self.rest_core.eodag_api.product_types_config[ + "S2_MSI_L1C" + ] + # Fields not supported by EODAG + self.assertNotIn("new_field", enhanced_product_type) + # Merge lists + self.assertEqual( + "S2A,S2B,SENTINEL-2A,SENTINEL-2B", + enhanced_product_type["platformSerialIdentifier"], + ) + # Don't override existing keys + self.assertEqual("SENTINEL2 Level-1C", enhanced_product_type["title"]) + # Restore the product type + del self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"][ + "stacCollection" + ] + def test_fetch_external_stac_collections(self): """Load external STAC collections""" external_json = """{ @@ -278,21 +389,20 @@ def test_fetch_external_stac_collections(self): ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" product_type_conf["stacCollection"] = ext_stac_collection_path - # Check if `eodag.utils.get_ext_stac_collection()` is called with right arg - with mock.patch( - "eodag.api.core.get_ext_stac_collection", autospec=True - ) as mock_core_get_ext_stac_collection: - mock_core_get_ext_stac_collection.return_value = json.loads(external_json) - self.rest_core.eodag_api._fetch_external_product_types_metadata() - mock_core_get_ext_stac_collection.assert_called_once_with( - ext_stac_collection_path - ) - - # Check if the returned STAC collection contains updated data with mock.patch( "eodag.rest.stac.get_ext_stac_collection", autospec=True ) as mock_stac_get_ext_stac_collection: mock_stac_get_ext_stac_collection.return_value = json.loads(external_json) + + # Check if `eodag.utils.get_ext_stac_collection()` is called with right arg + StacCollection.fetch_external_product_types_metadata( + self.rest_core.eodag_api + ) + mock_stac_get_ext_stac_collection.assert_called_once_with( + ext_stac_collection_path + ) + + # Check if the returned STAC collection contains updated data StacCollection.fetch_external_stac_collections(self.rest_core.eodag_api) r = self.rest_core.get_stac_collection_by_id( url="", root="", collection_id="S2_MSI_L1C" @@ -304,10 +414,10 @@ def test_fetch_external_stac_collections(self): self.assertIn("new_field", r) # Merge lists self.assertEqual( - r["summaries"]["platform"][0], "S2A,S2B,SENTINEL-2A,SENTINEL-2B" + "S2A,S2B,SENTINEL-2A,SENTINEL-2B", r["summaries"]["platform"][0] ) # Don't override existing keys - self.assertEqual(r["title"], "SENTINEL2 Level-1C") + self.assertEqual("SENTINEL2 Level-1C", r["title"]) # Restore previous state del self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"][ "stacCollection" From 3707d6567cdee5483c07925623b3f1c31f740ba0 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Wed, 24 Apr 2024 11:57:06 +0200 Subject: [PATCH 12/14] feat: use external STAC collection only in server mode --- eodag/rest/core.py | 1 - eodag/rest/stac.py | 103 ++++----------------------------- tests/units/test_stac_utils.py | 93 ++++------------------------- 3 files changed, 22 insertions(+), 175 deletions(-) diff --git a/eodag/rest/core.py b/eodag/rest/core.py index 29c5c9ddf..fd33b5835 100644 --- a/eodag/rest/core.py +++ b/eodag/rest/core.py @@ -689,7 +689,6 @@ def crunch_products( def eodag_api_init() -> None: """Init EODataAccessGateway server instance, pre-running all time consuming tasks""" eodag_api.fetch_product_types_list() - StacCollection.fetch_external_product_types_metadata(eodag_api) StacCollection.fetch_external_stac_collections(eodag_api) # pre-build search plugins diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index 7162f1255..d5ae09c2e 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -39,10 +39,7 @@ NOT_AVAILABLE, format_metadata, get_metadata_path, - mtd_cfg_as_conversion_and_querypath, - properties_from_json, ) -from eodag.config import load_stac_provider_config from eodag.utils import ( DEFAULT_MISSION_START_DATE, HTTP_REQ_TIMEOUT, @@ -53,7 +50,6 @@ get_ext_stac_collection, guess_file_type, jsonpath_parse_dict_items, - obj_md5sum, string_to_jsonpath, update_nested_dict, urljoin, @@ -637,91 +633,6 @@ def fetch_external_stac_collections(cls, eodag_api: EODataAccessGateway) -> None ext_stac_collection = get_ext_stac_collection(ext_stac_collection_path) cls.ext_stac_collections[product_type["ID"]] = ext_stac_collection - @classmethod - def _load_external_product_type_metadata( - cls, - product_type_id: str, - product_type_conf: Dict[str, Any], - parsable_metadata: Dict[str, Any], - ) -> Optional[Dict[str, Any]]: - """Loads external enhanced metadata for a given product type. - - The existing product type configuration is merged with the one from the external file and - the result is returned. - - Existing keys are not updated. Lists are merged also for existing keys. - Only the keys defined in `search.discover_product_types.generic_product_type_parsable_metadata` - from `eodag/resources/stac_provider.yml` are loaded into the configuration. - - :param product_type_id: Product type ID to update with the external metadata. - :type product_type_id: str - :param product_type_conf: Product type configuration to update with the external - metadata. - :type product_type_conf: Dict[str, Any] - :param parsable_metadata: Parsable metadata. - :type parsable_metadata: Dict[str, Any] - :returns: The enhanced product type dictionary, None if no external metadata is available - :rtype: Optional[Dict[str, Any]] - """ - external_stac_collection_path = product_type_conf.get("stacCollection") - if not external_stac_collection_path: - return None - enhanced_product_type_conf = deepcopy(product_type_conf) - logger.info( - f"Fetching external enhanced product type metadata for {product_type_id}" - ) - external_stac_collection = get_ext_stac_collection( - external_stac_collection_path - ) - # Loading external enhanced product types metadata - external_stac_collection_parsed = properties_from_json( - external_stac_collection, - parsable_metadata, - ) - # Merge `external_stac_collection_parsed` into `enhanced_product_type_conf` - for ext_cfg_k, ext_cfg_v in external_stac_collection_parsed.items(): - old_value = enhanced_product_type_conf.get(ext_cfg_k) - if old_value is None: - enhanced_product_type_conf[ext_cfg_k] = ext_cfg_v - elif ext_cfg_k in ("instrument", "platformSerialIdentifier", "keywords"): - merged_values = old_value.split(",") - new_values = ext_cfg_v.split(",") - for v in new_values: - if v != NOT_AVAILABLE and v not in merged_values: - merged_values.append(v) - enhanced_product_type_conf[ext_cfg_k] = ",".join(merged_values) - return enhanced_product_type_conf - - @classmethod - def fetch_external_product_types_metadata( - cls, eodag_api: EODataAccessGateway - ) -> None: - """Fetch external product types metadata and update EODAG if needed""" - stac_provider_config = load_stac_provider_config() - parsable_metadata = stac_provider_config["search"]["discover_product_types"][ - "generic_product_type_parsable_metadata" - ] - parsable_metadata: Dict[str, Any] = mtd_cfg_as_conversion_and_querypath( - parsable_metadata - ) - # load external product type metadata - for ( - product_type_id, - product_type_conf, - ) in eodag_api.product_types_config.items(): - enhanced_product_type = cls._load_external_product_type_metadata( - product_type_id, - product_type_conf, - parsable_metadata, - ) - if enhanced_product_type: - product_type_conf.update(enhanced_product_type) - new_md5 = obj_md5sum(eodag_api.product_types_config.source) - if new_md5 != eodag_api.product_types_config_md5: - eodag_api.product_types_config_md5 = new_md5 - # rebuild index after product types list update - eodag_api.build_index() - def __init__( self, url: str, @@ -829,11 +740,19 @@ def __get_collection_list( "providers": providers_models, }, ) - # merge EODAG's collection with the external collection + # override EODAG's collection with the external collection product_type_id = product_type.get("_id", None) or product_type["ID"] ext_stac_collection = self.ext_stac_collections.get(product_type_id, {}) - ext_stac_collection.update(product_type_collection) - product_type_collection = ext_stac_collection + # merge "keywords" list + merged_keywords = product_type_collection.get("keywords", []) + if "keywords" in ext_stac_collection: + new_keywords = ext_stac_collection["keywords"] + for v in new_keywords: + if v != NOT_AVAILABLE and v not in merged_keywords: + merged_keywords.append(v) + product_type_collection.update(ext_stac_collection) + if merged_keywords: + product_type_collection["keywords"] = merged_keywords # parse f-strings format_args = deepcopy(self.stac_config) format_args["collection"] = dict( diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index 78f7a092d..9a0f7d430 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -315,75 +315,12 @@ def test_load_external_product_type_metadata(self, mock_get_ext_stac_collection) self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"], ) - @mock.patch( - "eodag.rest.stac.get_ext_stac_collection", - autospec=True, - return_value=json.loads( - """{ - "new_field":"New Value", - "title":"A different title for Sentinel 2 MSI Level 1C", - "summaries": { - "instruments": [ - "MSI" - ], - "platform": [ - "SENTINEL-2A", - "SENTINEL-2B" - ], - "constellation": [ - "Sentinel-2" - ], - "processing:level": [ - "L1C" - ] - } - }""" - ), - ) - def test_fetch_external_product_types_metadata(self, mock_get_ext_stac_collection): - """Updates product types config with metadata from external STAC collections""" - product_type_conf = self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"] - ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" - product_type_conf["stacCollection"] = ext_stac_collection_path - StacCollection.fetch_external_product_types_metadata(self.rest_core.eodag_api) - mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) - enhanced_product_type = self.rest_core.eodag_api.product_types_config[ - "S2_MSI_L1C" - ] - # Fields not supported by EODAG - self.assertNotIn("new_field", enhanced_product_type) - # Merge lists - self.assertEqual( - "S2A,S2B,SENTINEL-2A,SENTINEL-2B", - enhanced_product_type["platformSerialIdentifier"], - ) - # Don't override existing keys - self.assertEqual("SENTINEL2 Level-1C", enhanced_product_type["title"]) - # Restore the product type - del self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"][ - "stacCollection" - ] - def test_fetch_external_stac_collections(self): """Load external STAC collections""" external_json = """{ "new_field":"New Value", "title":"A different title for Sentinel 2 MSI Level 1C", - "summaries": { - "instruments": [ - "MSI" - ], - "platform": [ - "SENTINEL-2A", - "SENTINEL-2B" - ], - "constellation": [ - "Sentinel-2" - ], - "processing:level": [ - "L1C" - ] - } + "keywords":["New Keyword"] }""" product_type_conf = self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"] ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" @@ -394,34 +331,26 @@ def test_fetch_external_stac_collections(self): ) as mock_stac_get_ext_stac_collection: mock_stac_get_ext_stac_collection.return_value = json.loads(external_json) - # Check if `eodag.utils.get_ext_stac_collection()` is called with right arg - StacCollection.fetch_external_product_types_metadata( - self.rest_core.eodag_api - ) - mock_stac_get_ext_stac_collection.assert_called_once_with( - ext_stac_collection_path - ) - # Check if the returned STAC collection contains updated data StacCollection.fetch_external_stac_collections(self.rest_core.eodag_api) - r = self.rest_core.get_stac_collection_by_id( + stac_coll = self.rest_core.get_stac_collection_by_id( url="", root="", collection_id="S2_MSI_L1C" ) mock_stac_get_ext_stac_collection.assert_called_with( ext_stac_collection_path ) - # Fields not supported by EODAG - self.assertIn("new_field", r) - # Merge lists + # New field + self.assertIn("new_field", stac_coll) + # Merge keywords + self.assertListEqual( + ["MSI", "SENTINEL2", "S2A,S2B", "L1", "OPTICAL", "New Keyword"], + stac_coll["keywords"], + ) + # Override existing fields self.assertEqual( - "S2A,S2B,SENTINEL-2A,SENTINEL-2B", r["summaries"]["platform"][0] + "A different title for Sentinel 2 MSI Level 1C", stac_coll["title"] ) - # Don't override existing keys - self.assertEqual("SENTINEL2 Level-1C", r["title"]) # Restore previous state - del self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"][ - "stacCollection" - ] StacCollection.ext_stac_collections.clear() From 53985008df7e7cbfe06cb95dc5e6c07cd39e4ae5 Mon Sep 17 00:00:00 2001 From: Nicola Dalpasso Date: Wed, 24 Apr 2024 12:13:18 +0200 Subject: [PATCH 13/14] feat: use external STAC collection only in server mode --- tests/units/test_stac_utils.py | 62 ---------------------------------- 1 file changed, 62 deletions(-) diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index 9a0f7d430..0ee4b20b8 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -27,12 +27,9 @@ from pygeofilter.values import Geometry import eodag.rest.utils.rfc3339 as rfc3339 -from eodag.api.product.metadata_mapping import mtd_cfg_as_conversion_and_querypath -from eodag.config import load_stac_provider_config from eodag.rest.stac import StacCollection from eodag.rest.types.stac_search import SearchPostRequest from eodag.rest.utils.cql_evaluate import EodagEvaluator -from eodag.utils import deepcopy from eodag.utils.exceptions import ValidationError from tests import TEST_RESOURCES_PATH, mock from tests.context import SearchResult @@ -256,65 +253,6 @@ def test_get_datetime(self): self.assertEqual(dtstart, start) self.assertEqual(dtend, end) - @mock.patch( - "eodag.rest.stac.get_ext_stac_collection", - autospec=True, - return_value=json.loads( - """{ - "new_field":"New Value", - "title":"A different title for Sentinel 2 MSI Level 1C", - "summaries": { - "instruments": [ - "MSI" - ], - "platform": [ - "SENTINEL-2A", - "SENTINEL-2B" - ], - "constellation": [ - "Sentinel-2" - ], - "processing:level": [ - "L1C" - ] - } - }""" - ), - ) - def test_load_external_product_type_metadata(self, mock_get_ext_stac_collection): - """Load the supported EODAG metadata from an external STAC collection""" - product_type_conf = deepcopy( - self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"] - ) - ext_stac_collection_path = "/path/to/external/stac/collections/S2_MSI_L1C.json" - product_type_conf["stacCollection"] = ext_stac_collection_path - stac_provider_config = load_stac_provider_config() - parsable_metadata = stac_provider_config["search"]["discover_product_types"][ - "generic_product_type_parsable_metadata" - ] - parsable_metadata = mtd_cfg_as_conversion_and_querypath(parsable_metadata) - enhanced_product_type = StacCollection._load_external_product_type_metadata( - "S2_MSI_L1C", - product_type_conf, - parsable_metadata, - ) - mock_get_ext_stac_collection.assert_called_with(ext_stac_collection_path) - # Fields not supported by EODAG - self.assertNotIn("new_field", enhanced_product_type) - # Merge lists - self.assertEqual( - "S2A,S2B,SENTINEL-2A,SENTINEL-2B", - enhanced_product_type["platformSerialIdentifier"], - ) - # Don't override existings keys - self.assertEqual("SENTINEL2 Level-1C", enhanced_product_type["title"]) - # The parameter passed `_load_external_product_type_metadata` is not modified - del product_type_conf["stacCollection"] - self.assertDictEqual( - product_type_conf, - self.rest_core.eodag_api.product_types_config["S2_MSI_L1C"], - ) - def test_fetch_external_stac_collections(self): """Load external STAC collections""" external_json = """{ From 425e39649957864c2875c60df233f353225e3f93 Mon Sep 17 00:00:00 2001 From: Sylvain Brunato Date: Fri, 24 May 2024 11:32:56 +0200 Subject: [PATCH 14/14] refactor: fetch_json usage --- eodag/rest/stac.py | 17 +++++++++++---- eodag/utils/__init__.py | 40 ---------------------------------- tests/units/test_stac_utils.py | 12 +++++----- 3 files changed, 18 insertions(+), 51 deletions(-) diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index d5ae09c2e..5cd0beae5 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -42,12 +42,9 @@ ) from eodag.utils import ( DEFAULT_MISSION_START_DATE, - HTTP_REQ_TIMEOUT, - USER_AGENT, deepcopy, dict_items_recursive_apply, format_dict_items, - get_ext_stac_collection, guess_file_type, jsonpath_parse_dict_items, string_to_jsonpath, @@ -57,8 +54,11 @@ from eodag.utils.exceptions import ( NoMatchingProductType, NotAvailableError, + RequestError, + TimeOutError, ValidationError, ) +from eodag.utils.requests import fetch_json if TYPE_CHECKING: from eodag.api.core import EODataAccessGateway @@ -630,7 +630,16 @@ def fetch_external_stac_collections(cls, eodag_api: EODataAccessGateway) -> None if not ext_stac_collection_path: continue logger.info(f"Fetching external STAC collection for {product_type['ID']}") - ext_stac_collection = get_ext_stac_collection(ext_stac_collection_path) + + try: + ext_stac_collection = fetch_json(ext_stac_collection_path) + except (RequestError, TimeOutError) as e: + logger.debug(e) + logger.warning( + f"Could not read remote external STAC collection from {ext_stac_collection_path}", + ) + ext_stac_collection = {} + cls.ext_stac_collections[product_type["ID"]] = ext_stac_collection def __init__( diff --git a/eodag/utils/__init__.py b/eodag/utils/__init__.py index 6dd767f15..b7236d540 100644 --- a/eodag/utils/__init__.py +++ b/eodag/utils/__init__.py @@ -78,8 +78,6 @@ ) from urllib.request import url2pathname -import requests - if sys.version_info >= (3, 9): from typing import Annotated, get_args, get_origin # noqa else: @@ -1474,41 +1472,3 @@ def get_ssl_context(ssl_verify: bool) -> ssl.SSLContext: ctx.check_hostname = True ctx.verify_mode = ssl.CERT_REQUIRED return ctx - - -def get_ext_stac_collection(stac_uri: str) -> Dict[str, Any]: - """Read external STAC collection - - :param stac_uri: URI to local or remote collection - :type stac_uri: str - :returns: The external STAC collection - :rtype: dict - """ - logger.info("Fetching external STAC collection from %s", stac_uri) - if stac_uri.lower().startswith("http"): - # read from remote - try: - response = requests.get( - stac_uri, headers=USER_AGENT, timeout=HTTP_REQ_TIMEOUT - ) - response.raise_for_status() - return response.json() - except requests.RequestException as e: - logger.debug(e) - logger.warning( - "Could not read remote external STAC collection from %s", stac_uri - ) - return {} - elif stac_uri.lower().startswith("file"): - stac_uri = uri_to_path(stac_uri) - - # read from local - try: - with open(stac_uri, "rb") as f: - return orjson.loads(f.read()) - except (orjson.JSONDecodeError, FileNotFoundError) as e: - logger.debug(e) - logger.warning( - "Could not read local external STAC collection from %s", stac_uri - ) - return {} diff --git a/tests/units/test_stac_utils.py b/tests/units/test_stac_utils.py index 0ee4b20b8..8945fcfdd 100644 --- a/tests/units/test_stac_utils.py +++ b/tests/units/test_stac_utils.py @@ -265,18 +265,16 @@ def test_fetch_external_stac_collections(self): product_type_conf["stacCollection"] = ext_stac_collection_path with mock.patch( - "eodag.rest.stac.get_ext_stac_collection", autospec=True - ) as mock_stac_get_ext_stac_collection: - mock_stac_get_ext_stac_collection.return_value = json.loads(external_json) - + "eodag.rest.stac.fetch_json", + autospec=True, + return_value=json.loads(external_json), + ) as mock_fetch_json: # Check if the returned STAC collection contains updated data StacCollection.fetch_external_stac_collections(self.rest_core.eodag_api) stac_coll = self.rest_core.get_stac_collection_by_id( url="", root="", collection_id="S2_MSI_L1C" ) - mock_stac_get_ext_stac_collection.assert_called_with( - ext_stac_collection_path - ) + mock_fetch_json.assert_called_with(ext_stac_collection_path) # New field self.assertIn("new_field", stac_coll) # Merge keywords