Skip to content

Commit

Permalink
Merge pull request #572 from microbiomedata/issue-571-portal-api-clie…
Browse files Browse the repository at this point in the history
…nt-auth

Update `NmdcPortalApiClient` with new authentication scheme
  • Loading branch information
pkalita-lbl authored Jun 24, 2024
2 parents 7a26638 + d7c4f7a commit 636b968
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 38 deletions.
8 changes: 4 additions & 4 deletions nmdc_runtime/site/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,8 @@ def biosample_submission_ingest():
"nmdc_portal_api_client": {
"config": {
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
"session_cookie": {
"env": "NMDC_PORTAL_API_SESSION_COOKIE"
"refresh_token": {
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
},
}
}
Expand Down Expand Up @@ -553,8 +553,8 @@ def biosample_submission_ingest():
"nmdc_portal_api_client": {
"config": {
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
"session_cookie": {
"env": "NMDC_PORTAL_API_SESSION_COOKIE"
"refresh_token": {
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
},
}
}
Expand Down
39 changes: 30 additions & 9 deletions nmdc_runtime/site/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,30 +371,51 @@ def gold_api_client_resource(context: InitResourceContext):

@dataclass
class NmdcPortalApiClient:

base_url: str
# Using a cookie for authentication is not ideal and should be replaced
# when this API has an another authentication method
session_cookie: str
refresh_token: str
access_token: Optional[str] = None
access_token_expires_at: Optional[datetime] = None

def _request(self, method: str, endpoint: str, **kwargs):
r"""
Submits a request to the specified API endpoint;
after refreshing the access token, if necessary.
"""
if self.access_token is None or datetime.now() > self.access_token_expires_at:
refresh_response = requests.post(
f"{self.base_url}/auth/refresh",
json={"refresh_token": self.refresh_token},
)
refresh_response.raise_for_status()
refresh_body = refresh_response.json()
self.access_token_expires_at = datetime.now() + timedelta(
seconds=refresh_body["expires_in"]
)
self.access_token = refresh_body["access_token"]

def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
response = requests.get(
f"{self.base_url}/api/metadata_submission/{id}",
cookies={"session": self.session_cookie},
headers = kwargs.get("headers", {})
headers["Authorization"] = f"Bearer {self.access_token}"
return requests.request(
method, f"{self.base_url}{endpoint}", **kwargs, headers=headers
)

def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
response = self._request("GET", f"/api/metadata_submission/{id}")
response.raise_for_status()
return response.json()


@resource(
config_schema={
"base_url": StringSource,
"session_cookie": StringSource,
"refresh_token": StringSource,
}
)
def nmdc_portal_api_client_resource(context: InitResourceContext):
return NmdcPortalApiClient(
base_url=context.resource_config["base_url"],
session_cookie=context.resource_config["session_cookie"],
refresh_token=context.resource_config["refresh_token"],
)


Expand Down
25 changes: 16 additions & 9 deletions nmdc_runtime/site/translation/submission_portal_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator


BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name"


@lru_cache
def _get_schema_view():
"""Return a SchemaView instance representing the NMDC schema"""
Expand Down Expand Up @@ -98,7 +101,9 @@ def __init__(
self.study_pi_image_url = study_pi_image_url
self.study_funding_sources = study_funding_sources

self.biosample_extras = group_dicts_by_key("source_mat_id", biosample_extras)
self.biosample_extras = group_dicts_by_key(
BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
)
self.biosample_extras_slot_mapping = group_dicts_by_key(
"subject_id", biosample_extras_slot_mapping
)
Expand Down Expand Up @@ -521,7 +526,7 @@ def _translate_biosample(
:param default_env_package: Default value for `env_package` slot
:return: nmdc:Biosample
"""
source_mat_id = sample_data[0].get("source_mat_id", "").strip()
biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
slots = {
"id": nmdc_biosample_id,
"part_of": nmdc_study_id,
Expand All @@ -533,7 +538,7 @@ def _translate_biosample(
slots.update(transformed_tab)

if self.biosample_extras:
raw_extras = self.biosample_extras.get(source_mat_id)
raw_extras = self.biosample_extras.get(biosample_key)
if raw_extras:
transformed_extras = self._transform_dict_for_class(
raw_extras, "Biosample", self.biosample_extras_slot_mapping
Expand Down Expand Up @@ -564,7 +569,9 @@ def get_database(self) -> nmdc.Database:

sample_data = metadata_submission_data.get("sampleData", {})
package_name = metadata_submission_data["packageName"]
sample_data_by_id = groupby("source_mat_id", concat(sample_data.values()))
sample_data_by_id = groupby(
BIOSAMPLE_UNIQUE_KEY_SLOT, concat(sample_data.values())
)
nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_data_by_id))
sample_data_to_nmdc_biosample_ids = dict(
zip(sample_data_by_id.keys(), nmdc_biosample_ids)
Expand All @@ -583,15 +590,15 @@ def get_database(self) -> nmdc.Database:

if self.omics_processing_mapping:
# If there is data from an OmicsProcessing mapping file, process it now. This part
# assumes that there is a column in that file with the header __biosample_source_mat_id
# assumes that there is a column in that file with the header __biosample_samp_name
# that can be used to join with the sample data from the submission portal. The
# biosample identified by that `source_mat_id` will be referenced in the `has_input`
# biosample identified by that `samp_name` will be referenced in the `has_input`
# slot of the OmicsProcessing object. If a DataObject mapping file was also provided,
# those objects will also be generated and referenced in the `has_output` slot of the
# OmicsProcessing object. By keying off of the `source_mat_id` slot of the submission's
# OmicsProcessing object. By keying off of the `samp_name` slot of the submission's
# sample data there is an implicit 1:1 relationship between Biosample objects and
# OmicsProcessing objects generated here.
join_key = "__biosample_source_mat_id"
join_key = f"__biosample_{BIOSAMPLE_UNIQUE_KEY_SLOT}"
database.omics_processing_set = []
database.data_object_set = []
data_objects_by_sample_data_id = {}
Expand All @@ -617,7 +624,7 @@ def get_database(self) -> nmdc.Database:
or sample_data_id not in sample_data_to_nmdc_biosample_ids
):
logging.warning(
f"Unrecognized biosample source_mat_id: {sample_data_id}"
f"Unrecognized biosample {BIOSAMPLE_UNIQUE_KEY_SLOT}: {sample_data_id}"
)
continue
nmdc_biosample_id = sample_data_to_nmdc_biosample_ids[sample_data_id]
Expand Down
3 changes: 3 additions & 0 deletions tests/test_api/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def ensure_test_resources(mdb):
mdb.jobs.replace_one(
{"id": job_id}, job.model_dump(exclude_unset=True), upsert=True
)
mdb["minter.requesters"].replace_one(
{"id": site_id}, {"id": site_id}, upsert=True
)
return {
"site_client": {
"site_id": site_id,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_data/test_submission_portal_translator_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -882,12 +882,12 @@ input:
templates:
- plant-associated
omics_processing_mapping:
- __biosample_source_mat_id: UUID:e8ed34cc-32f4-4fc5-9b9f-c2699e43163c
- __biosample_samp_name: G5R1_MAIN_09MAY2016
processing_institution: JGI
instrument_name: Some fancy expensive thing
omics_type: Metagenome
data_object_mapping:
- __biosample_source_mat_id: UUID:e8ed34cc-32f4-4fc5-9b9f-c2699e43163c
- __biosample_samp_name: G5R1_MAIN_09MAY2016
data_object_type: Metagenome Raw Reads
url: http://example.com/data.fastq.gz
name: Metagenome Raw Reads
Expand Down
37 changes: 28 additions & 9 deletions tests/test_graphs/test_submission_portal_graphs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import pytest
import requests_mock

from nmdc_runtime.api.db.mongo import get_mongo_db
from nmdc_runtime.site.graphs import (
translate_metadata_submission_to_nmdc_schema_database,
)
from nmdc_runtime.site.repository import resource_defs

from tests.test_api.test_endpoints import ensure_test_resources

MOCK_PORTAL_API_BASE = "http://www.example.com/nmdc-portal-api"
MOCK_PORTAL_SUBMISSION_ID = "test-submission-id"
Expand Down Expand Up @@ -70,10 +71,12 @@
}


@pytest.mark.xfail(reason="DagsterInvalidConfigError: Error in config for job translate_metadata_submission_to_nmdc_schema_database")
def test_translate_metadata_submission_to_nmdc_schema_database():
"""Smoke test for translate_metadata_submission_to_nmdc_schema_database job"""

mdb = get_mongo_db()
rs = ensure_test_resources(mdb)

job = translate_metadata_submission_to_nmdc_schema_database.to_job(
resource_defs=resource_defs
)
Expand All @@ -83,12 +86,26 @@ def test_translate_metadata_submission_to_nmdc_schema_database():
"config": {"username": "test"},
},
"get_submission_portal_pipeline_inputs": {
"config": {
"inputs": {
"submission_id": MOCK_PORTAL_SUBMISSION_ID,
"omics_processing_mapping_file_url": "",
"data_object_mapping_file_url": "",
"biosample_extras_file_url": None,
"biosample_extras_slot_mapping_file_url": None,
"data_object_mapping_file_url": None,
"omics_processing_mapping_file_url": None,
}
},
"translate_portal_submission_to_nmdc_schema_database": {
"inputs": {
"study_category": "research_study",
"study_doi_category": "dataset_doi",
"study_doi_provider": "jgi",
"study_funding_sources": [
"funder 1",
"funder 2",
],
"study_pi_image_url": "http://www.example.com/test.png",
}
}
},
"resources": {
"mongo": {
Expand All @@ -102,15 +119,13 @@ def test_translate_metadata_submission_to_nmdc_schema_database():
"nmdc_portal_api_client": {
"config": {
"base_url": MOCK_PORTAL_API_BASE,
"session_cookie": "xyz",
"refresh_token": "xyz123",
}
},
"runtime_api_site_client": {
"config": {
"base_url": {"env": "API_HOST"},
"client_id": {"env": "API_SITE_CLIENT_ID"},
"client_secret": {"env": "API_SITE_CLIENT_SECRET"},
"site_id": {"env": "API_SITE_ID"},
**rs["site_client"],
}
},
"runtime_api_user_client": {
Expand All @@ -124,6 +139,10 @@ def test_translate_metadata_submission_to_nmdc_schema_database():
}

with requests_mock.mock(real_http=True) as mock:
mock.post(f"{MOCK_PORTAL_API_BASE}/auth/refresh", json={
"access_token": "abcde",
"expires_in": 86400,
})
mock.get(
f"{MOCK_PORTAL_API_BASE}/api/metadata_submission/{MOCK_PORTAL_SUBMISSION_ID}",
json=MOCK_PORTAL_SUBMISSION,
Expand Down
27 changes: 22 additions & 5 deletions tests/test_ops/test_data_api_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
from nmdc_runtime.site.ops import fetch_nmdc_portal_submission_by_id


MOCK_BASE_URL = "http://example.com/nmdc_portal"
MOCK_SUBMISSION_ID = "353d751f-cff0-4558-9051-25a87ba00d3f"


@pytest.fixture
def client_config():
return {"base_url": "http://example.com/nmdc_portal", "session_cookie": "12345"}
return {"base_url": MOCK_BASE_URL, "refresh_token": "12345"}


@pytest.fixture
Expand All @@ -25,13 +29,26 @@ def op_context(client_config):

def test_metadata_submission(op_context):
with requests_mock.mock() as mock:
mock.post(f"{MOCK_BASE_URL}/auth/refresh", json={
"access_token": "abcde",
"expires_in": 86400,
})
mock.get(
"http://example.com/nmdc_portal/api/metadata_submission/353d751f-cff0-4558-9051-25a87ba00d3f",
json={"id": "353d751f-cff0-4558-9051-25a87ba00d3f"},
f"{MOCK_BASE_URL}/api/metadata_submission/{MOCK_SUBMISSION_ID}",
json={"id": MOCK_SUBMISSION_ID},
)

# The first request should initiate an access token refresh and then fetch the submission
fetch_nmdc_portal_submission_by_id(
op_context, "353d751f-cff0-4558-9051-25a87ba00d3f"
op_context, MOCK_SUBMISSION_ID
)
assert len(mock.request_history) == 2
assert mock.request_history[0].url == f"{MOCK_BASE_URL}/auth/refresh"
assert mock.request_history[1].url == f"{MOCK_BASE_URL}/api/metadata_submission/{MOCK_SUBMISSION_ID}"

assert len(mock.request_history) == 1
# The second request should not need to refresh the access token
fetch_nmdc_portal_submission_by_id(
op_context, "353d751f-cff0-4558-9051-25a87ba00d3f"
)
assert len(mock.request_history) == 3
assert mock.request_history[2].url == f"{MOCK_BASE_URL}/api/metadata_submission/{MOCK_SUBMISSION_ID}"

0 comments on commit 636b968

Please sign in to comment.