diff --git a/nmdc_runtime/site/repository.py b/nmdc_runtime/site/repository.py index c0de4366..d631022e 100644 --- a/nmdc_runtime/site/repository.py +++ b/nmdc_runtime/site/repository.py @@ -513,8 +513,8 @@ def biosample_submission_ingest(): "nmdc_portal_api_client": { "config": { "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"}, - "session_cookie": { - "env": "NMDC_PORTAL_API_SESSION_COOKIE" + "refresh_token": { + "env": "NMDC_PORTAL_API_REFRESH_TOKEN" }, } } @@ -553,8 +553,8 @@ def biosample_submission_ingest(): "nmdc_portal_api_client": { "config": { "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"}, - "session_cookie": { - "env": "NMDC_PORTAL_API_SESSION_COOKIE" + "refresh_token": { + "env": "NMDC_PORTAL_API_REFRESH_TOKEN" }, } } diff --git a/nmdc_runtime/site/resources.py b/nmdc_runtime/site/resources.py index 0ba01f42..6b776cae 100644 --- a/nmdc_runtime/site/resources.py +++ b/nmdc_runtime/site/resources.py @@ -371,16 +371,37 @@ def gold_api_client_resource(context: InitResourceContext): @dataclass class NmdcPortalApiClient: + base_url: str - # Using a cookie for authentication is not ideal and should be replaced - # when this API has an another authentication method - session_cookie: str + refresh_token: str + access_token: Optional[str] = None + access_token_expires_at: Optional[datetime] = None + + def _request(self, method: str, endpoint: str, **kwargs): + r""" + Submits a request to the specified API endpoint; + after refreshing the access token, if necessary. + """ + if self.access_token is None or datetime.now() > self.access_token_expires_at: + refresh_response = requests.post( + f"{self.base_url}/auth/refresh", + json={"refresh_token": self.refresh_token}, + ) + refresh_response.raise_for_status() + refresh_body = refresh_response.json() + self.access_token_expires_at = datetime.now() + timedelta( + seconds=refresh_body["expires_in"] + ) + self.access_token = refresh_body["access_token"] - def fetch_metadata_submission(self, id: str) -> Dict[str, Any]: - response = requests.get( - f"{self.base_url}/api/metadata_submission/{id}", - cookies={"session": self.session_cookie}, + headers = kwargs.get("headers", {}) + headers["Authorization"] = f"Bearer {self.access_token}" + return requests.request( + method, f"{self.base_url}{endpoint}", **kwargs, headers=headers ) + + def fetch_metadata_submission(self, id: str) -> Dict[str, Any]: + response = self._request("GET", f"/api/metadata_submission/{id}") response.raise_for_status() return response.json() @@ -388,13 +409,13 @@ def fetch_metadata_submission(self, id: str) -> Dict[str, Any]: @resource( config_schema={ "base_url": StringSource, - "session_cookie": StringSource, + "refresh_token": StringSource, } ) def nmdc_portal_api_client_resource(context: InitResourceContext): return NmdcPortalApiClient( base_url=context.resource_config["base_url"], - session_cookie=context.resource_config["session_cookie"], + refresh_token=context.resource_config["refresh_token"], ) diff --git a/nmdc_runtime/site/translation/submission_portal_translator.py b/nmdc_runtime/site/translation/submission_portal_translator.py index a11e9ee0..230240e4 100644 --- a/nmdc_runtime/site/translation/submission_portal_translator.py +++ b/nmdc_runtime/site/translation/submission_portal_translator.py @@ -13,6 +13,9 @@ from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator +BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name" + + @lru_cache def _get_schema_view(): """Return a SchemaView instance representing the NMDC schema""" @@ -98,7 +101,9 @@ def __init__( self.study_pi_image_url = study_pi_image_url self.study_funding_sources = study_funding_sources - self.biosample_extras = group_dicts_by_key("source_mat_id", biosample_extras) + self.biosample_extras = group_dicts_by_key( + BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras + ) self.biosample_extras_slot_mapping = group_dicts_by_key( "subject_id", biosample_extras_slot_mapping ) @@ -521,7 +526,7 @@ def _translate_biosample( :param default_env_package: Default value for `env_package` slot :return: nmdc:Biosample """ - source_mat_id = sample_data[0].get("source_mat_id", "").strip() + biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip() slots = { "id": nmdc_biosample_id, "part_of": nmdc_study_id, @@ -533,7 +538,7 @@ def _translate_biosample( slots.update(transformed_tab) if self.biosample_extras: - raw_extras = self.biosample_extras.get(source_mat_id) + raw_extras = self.biosample_extras.get(biosample_key) if raw_extras: transformed_extras = self._transform_dict_for_class( raw_extras, "Biosample", self.biosample_extras_slot_mapping @@ -564,7 +569,9 @@ def get_database(self) -> nmdc.Database: sample_data = metadata_submission_data.get("sampleData", {}) package_name = metadata_submission_data["packageName"] - sample_data_by_id = groupby("source_mat_id", concat(sample_data.values())) + sample_data_by_id = groupby( + BIOSAMPLE_UNIQUE_KEY_SLOT, concat(sample_data.values()) + ) nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_data_by_id)) sample_data_to_nmdc_biosample_ids = dict( zip(sample_data_by_id.keys(), nmdc_biosample_ids) @@ -583,15 +590,15 @@ def get_database(self) -> nmdc.Database: if self.omics_processing_mapping: # If there is data from an OmicsProcessing mapping file, process it now. This part - # assumes that there is a column in that file with the header __biosample_source_mat_id + # assumes that there is a column in that file with the header __biosample_samp_name # that can be used to join with the sample data from the submission portal. The - # biosample identified by that `source_mat_id` will be referenced in the `has_input` + # biosample identified by that `samp_name` will be referenced in the `has_input` # slot of the OmicsProcessing object. If a DataObject mapping file was also provided, # those objects will also be generated and referenced in the `has_output` slot of the - # OmicsProcessing object. By keying off of the `source_mat_id` slot of the submission's + # OmicsProcessing object. By keying off of the `samp_name` slot of the submission's # sample data there is an implicit 1:1 relationship between Biosample objects and # OmicsProcessing objects generated here. - join_key = "__biosample_source_mat_id" + join_key = f"__biosample_{BIOSAMPLE_UNIQUE_KEY_SLOT}" database.omics_processing_set = [] database.data_object_set = [] data_objects_by_sample_data_id = {} @@ -617,7 +624,7 @@ def get_database(self) -> nmdc.Database: or sample_data_id not in sample_data_to_nmdc_biosample_ids ): logging.warning( - f"Unrecognized biosample source_mat_id: {sample_data_id}" + f"Unrecognized biosample {BIOSAMPLE_UNIQUE_KEY_SLOT}: {sample_data_id}" ) continue nmdc_biosample_id = sample_data_to_nmdc_biosample_ids[sample_data_id] diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py index a697a54a..7b29a583 100644 --- a/tests/test_api/test_endpoints.py +++ b/tests/test_api/test_endpoints.py @@ -59,6 +59,9 @@ def ensure_test_resources(mdb): mdb.jobs.replace_one( {"id": job_id}, job.model_dump(exclude_unset=True), upsert=True ) + mdb["minter.requesters"].replace_one( + {"id": site_id}, {"id": site_id}, upsert=True + ) return { "site_client": { "site_id": site_id, diff --git a/tests/test_data/test_submission_portal_translator_data.yaml b/tests/test_data/test_submission_portal_translator_data.yaml index f48b81db..2ee454b9 100644 --- a/tests/test_data/test_submission_portal_translator_data.yaml +++ b/tests/test_data/test_submission_portal_translator_data.yaml @@ -882,12 +882,12 @@ input: templates: - plant-associated omics_processing_mapping: - - __biosample_source_mat_id: UUID:e8ed34cc-32f4-4fc5-9b9f-c2699e43163c + - __biosample_samp_name: G5R1_MAIN_09MAY2016 processing_institution: JGI instrument_name: Some fancy expensive thing omics_type: Metagenome data_object_mapping: - - __biosample_source_mat_id: UUID:e8ed34cc-32f4-4fc5-9b9f-c2699e43163c + - __biosample_samp_name: G5R1_MAIN_09MAY2016 data_object_type: Metagenome Raw Reads url: http://example.com/data.fastq.gz name: Metagenome Raw Reads diff --git a/tests/test_graphs/test_submission_portal_graphs.py b/tests/test_graphs/test_submission_portal_graphs.py index a2d257ee..27eeb164 100644 --- a/tests/test_graphs/test_submission_portal_graphs.py +++ b/tests/test_graphs/test_submission_portal_graphs.py @@ -1,11 +1,12 @@ import pytest import requests_mock +from nmdc_runtime.api.db.mongo import get_mongo_db from nmdc_runtime.site.graphs import ( translate_metadata_submission_to_nmdc_schema_database, ) from nmdc_runtime.site.repository import resource_defs - +from tests.test_api.test_endpoints import ensure_test_resources MOCK_PORTAL_API_BASE = "http://www.example.com/nmdc-portal-api" MOCK_PORTAL_SUBMISSION_ID = "test-submission-id" @@ -70,10 +71,12 @@ } -@pytest.mark.xfail(reason="DagsterInvalidConfigError: Error in config for job translate_metadata_submission_to_nmdc_schema_database") def test_translate_metadata_submission_to_nmdc_schema_database(): """Smoke test for translate_metadata_submission_to_nmdc_schema_database job""" + mdb = get_mongo_db() + rs = ensure_test_resources(mdb) + job = translate_metadata_submission_to_nmdc_schema_database.to_job( resource_defs=resource_defs ) @@ -83,12 +86,26 @@ def test_translate_metadata_submission_to_nmdc_schema_database(): "config": {"username": "test"}, }, "get_submission_portal_pipeline_inputs": { - "config": { + "inputs": { "submission_id": MOCK_PORTAL_SUBMISSION_ID, - "omics_processing_mapping_file_url": "", - "data_object_mapping_file_url": "", + "biosample_extras_file_url": None, + "biosample_extras_slot_mapping_file_url": None, + "data_object_mapping_file_url": None, + "omics_processing_mapping_file_url": None, } }, + "translate_portal_submission_to_nmdc_schema_database": { + "inputs": { + "study_category": "research_study", + "study_doi_category": "dataset_doi", + "study_doi_provider": "jgi", + "study_funding_sources": [ + "funder 1", + "funder 2", + ], + "study_pi_image_url": "http://www.example.com/test.png", + } + } }, "resources": { "mongo": { @@ -102,15 +119,13 @@ def test_translate_metadata_submission_to_nmdc_schema_database(): "nmdc_portal_api_client": { "config": { "base_url": MOCK_PORTAL_API_BASE, - "session_cookie": "xyz", + "refresh_token": "xyz123", } }, "runtime_api_site_client": { "config": { "base_url": {"env": "API_HOST"}, - "client_id": {"env": "API_SITE_CLIENT_ID"}, - "client_secret": {"env": "API_SITE_CLIENT_SECRET"}, - "site_id": {"env": "API_SITE_ID"}, + **rs["site_client"], } }, "runtime_api_user_client": { @@ -124,6 +139,10 @@ def test_translate_metadata_submission_to_nmdc_schema_database(): } with requests_mock.mock(real_http=True) as mock: + mock.post(f"{MOCK_PORTAL_API_BASE}/auth/refresh", json={ + "access_token": "abcde", + "expires_in": 86400, + }) mock.get( f"{MOCK_PORTAL_API_BASE}/api/metadata_submission/{MOCK_PORTAL_SUBMISSION_ID}", json=MOCK_PORTAL_SUBMISSION, diff --git a/tests/test_ops/test_data_api_ops.py b/tests/test_ops/test_data_api_ops.py index b5b301dd..3081524a 100644 --- a/tests/test_ops/test_data_api_ops.py +++ b/tests/test_ops/test_data_api_ops.py @@ -7,9 +7,13 @@ from nmdc_runtime.site.ops import fetch_nmdc_portal_submission_by_id +MOCK_BASE_URL = "http://example.com/nmdc_portal" +MOCK_SUBMISSION_ID = "353d751f-cff0-4558-9051-25a87ba00d3f" + + @pytest.fixture def client_config(): - return {"base_url": "http://example.com/nmdc_portal", "session_cookie": "12345"} + return {"base_url": MOCK_BASE_URL, "refresh_token": "12345"} @pytest.fixture @@ -25,13 +29,26 @@ def op_context(client_config): def test_metadata_submission(op_context): with requests_mock.mock() as mock: + mock.post(f"{MOCK_BASE_URL}/auth/refresh", json={ + "access_token": "abcde", + "expires_in": 86400, + }) mock.get( - "http://example.com/nmdc_portal/api/metadata_submission/353d751f-cff0-4558-9051-25a87ba00d3f", - json={"id": "353d751f-cff0-4558-9051-25a87ba00d3f"}, + f"{MOCK_BASE_URL}/api/metadata_submission/{MOCK_SUBMISSION_ID}", + json={"id": MOCK_SUBMISSION_ID}, ) + # The first request should initiate an access token refresh and then fetch the submission fetch_nmdc_portal_submission_by_id( - op_context, "353d751f-cff0-4558-9051-25a87ba00d3f" + op_context, MOCK_SUBMISSION_ID ) + assert len(mock.request_history) == 2 + assert mock.request_history[0].url == f"{MOCK_BASE_URL}/auth/refresh" + assert mock.request_history[1].url == f"{MOCK_BASE_URL}/api/metadata_submission/{MOCK_SUBMISSION_ID}" - assert len(mock.request_history) == 1 + # The second request should not need to refresh the access token + fetch_nmdc_portal_submission_by_id( + op_context, "353d751f-cff0-4558-9051-25a87ba00d3f" + ) + assert len(mock.request_history) == 3 + assert mock.request_history[2].url == f"{MOCK_BASE_URL}/api/metadata_submission/{MOCK_SUBMISSION_ID}"