diff --git a/.env.example b/.env.example
index b9505de9..03baec27 100644
--- a/.env.example
+++ b/.env.example
@@ -37,4 +37,5 @@ NEON_API_TOKEN=y
NEON_API_BASE_URL=https://data.neonscience.org/api/v0
NERSC_USERNAME=replaceme
-ORCID_CLIENT_ID=replaceme
\ No newline at end of file
+ORCID_CLIENT_ID=replaceme
+ORCID_CLIENT_SECRET=replaceme
\ No newline at end of file
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 00000000..1bc016af
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,33 @@
+name: Lint-check + Style-normalize Python files
+
+on:
+ pull_request:
+ paths:
+ - '.github/workflows/lint.yml'
+ - '**.py'
+
+
+jobs:
+ build:
+ name: lint-check and style-normalize Python files
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: ${{ github.event.pull_request.head.ref }}
+ - name: Set up Python 3.10
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+ - name: Lint with flake8 and Reformat with black
+ run: |
+ make init-lint-and-black
+ make lint
+ make black
+ - name: commit and push if reformatted
+ run: |
+ git config user.name github-actions
+ git config user.email github-actions@github.com
+ if git status --short | grep -q '\.py$'; then git add '*.py' && git commit -m "style: reformat" && git push; fi
+
diff --git a/.gitpod.yml b/.gitpod.yml
index f8ebd20d..5aa5f971 100644
--- a/.gitpod.yml
+++ b/.gitpod.yml
@@ -9,4 +9,4 @@ tasks:
- name: Start Dev on Fresh Gitpod
before: cp .env.example .env
init: docker compose up mongo --detach && make mongorestore-nmdc-dev
- command: make up-dev && docker-compose logs -f fastapi
+ command: make up-dev && docker compose logs -f fastapi
diff --git a/Makefile b/Makefile
index 6316a2fd..87c02752 100644
--- a/Makefile
+++ b/Makefile
@@ -18,14 +18,14 @@ update-deps:
update: update-deps init
up-dev:
- docker-compose up --build --force-recreate --detach --remove-orphans
+ docker compose up --build --force-recreate --detach --remove-orphans
dev-reset-db:
docker compose \
exec mongo /bin/bash -c "./app_tests/mongorestore-nmdc-testdb.sh"
up-test:
- docker-compose --file docker-compose.test.yml \
+ docker compose --file docker-compose.test.yml \
up --build --force-recreate --detach --remove-orphans
test-build:
@@ -41,6 +41,9 @@ test-run:
test: test-build test-run
+black:
+ black nmdc_runtime
+
lint:
# Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --extend-ignore=F722
@@ -48,14 +51,21 @@ lint:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 \
--statistics --extend-exclude="./build/" --extend-ignore=F722
+PIP_PINNED_FLAKE8 := $(shell grep 'flake8==' requirements/dev.txt)
+PIP_PINNED_BLACK := $(shell grep 'black==' requirements/dev.txt)
+
+init-lint-and-black:
+ pip install $(PIP_PINNED_FLAKE8)
+ pip install $(PIP_PINNED_BLACK)
+
down-dev:
- docker-compose down
+ docker compose down
down-test:
- docker-compose --file docker-compose.test.yml down
+ docker compose --file docker-compose.test.yml down
follow-fastapi:
- docker-compose logs fastapi -f
+ docker compose logs fastapi -f
fastapi-deploy-spin:
rancher kubectl rollout restart deployment/runtime-fastapi --namespace=nmdc-dev
diff --git a/nmdc_runtime/api/core/auth.py b/nmdc_runtime/api/core/auth.py
index 5e4d7c1c..85c5d5a5 100644
--- a/nmdc_runtime/api/core/auth.py
+++ b/nmdc_runtime/api/core/auth.py
@@ -6,17 +6,25 @@
from fastapi.exceptions import HTTPException
from fastapi.openapi.models import OAuthFlows as OAuthFlowsModel
from fastapi.param_functions import Form
-from fastapi.security import OAuth2, HTTPBasic, HTTPBasicCredentials
+from fastapi.security import (
+ OAuth2,
+ HTTPBasic,
+ HTTPBasicCredentials,
+ HTTPBearer,
+ HTTPAuthorizationCredentials,
+)
from fastapi.security.utils import get_authorization_scheme_param
from jose import JWTError, jwt
from passlib.context import CryptContext
from pydantic import BaseModel
+from starlette import status
from starlette.requests import Request
from starlette.status import HTTP_400_BAD_REQUEST, HTTP_401_UNAUTHORIZED
SECRET_KEY = os.getenv("JWT_SECRET_KEY")
ALGORITHM = "HS256"
ORCID_CLIENT_ID = os.getenv("ORCID_CLIENT_ID")
+ORCID_CLIENT_SECRET = os.getenv("ORCID_CLIENT_SECRET")
# https://orcid.org/.well-known/openid-configuration
# XXX do we want to live-load this?
@@ -129,15 +137,24 @@ async def __call__(self, request: Request) -> Optional[str]:
tokenUrl="token", auto_error=False
)
+bearer_scheme = HTTPBearer(scheme_name="bearerAuth", auto_error=False)
+
async def basic_credentials(req: Request):
return await HTTPBasic(auto_error=False)(req)
+async def bearer_credentials(req: Request):
+ return await HTTPBearer(scheme_name="bearerAuth", auto_error=False)(req)
+
+
class OAuth2PasswordOrClientCredentialsRequestForm:
def __init__(
self,
basic_creds: Optional[HTTPBasicCredentials] = Depends(basic_credentials),
+ bearer_creds: Optional[HTTPAuthorizationCredentials] = Depends(
+ bearer_credentials
+ ),
grant_type: str = Form(None, regex="^password$|^client_credentials$"),
username: Optional[str] = Form(None),
password: Optional[str] = Form(None),
@@ -145,14 +162,18 @@ def __init__(
client_id: Optional[str] = Form(None),
client_secret: Optional[str] = Form(None),
):
- if grant_type == "password" and (username is None or password is None):
+ if bearer_creds:
+ self.grant_type = "client_credentials"
+ self.username, self.password = None, None
+ self.scopes = scope.split()
+ self.client_id = bearer_creds.credentials
+ self.client_secret = None
+ elif grant_type == "password" and (username is None or password is None):
raise HTTPException(
status_code=HTTP_400_BAD_REQUEST,
detail="grant_type password requires username and password",
)
- if grant_type == "client_credentials" and (
- client_id is None or client_secret is None
- ):
+ elif grant_type == "client_credentials" and (client_id is None):
if basic_creds:
client_id = basic_creds.username
client_secret = basic_creds.password
diff --git a/nmdc_runtime/api/endpoints/metadata.py b/nmdc_runtime/api/endpoints/metadata.py
index 112d4098..810c6b56 100644
--- a/nmdc_runtime/api/endpoints/metadata.py
+++ b/nmdc_runtime/api/endpoints/metadata.py
@@ -239,6 +239,11 @@ async def submit_json_nmdcdb(
Submit a NMDC JSON Schema "nmdc:Database" object.
"""
+ if not permitted(user.username, "/metadata/json:submit"):
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Only specific users are allowed to submit json at this time.",
+ )
rv = validate_json(docs, mdb)
if rv["result"] == "errors":
raise HTTPException(
diff --git a/nmdc_runtime/api/endpoints/queries.py b/nmdc_runtime/api/endpoints/queries.py
index 4a272797..bc311d0f 100644
--- a/nmdc_runtime/api/endpoints/queries.py
+++ b/nmdc_runtime/api/endpoints/queries.py
@@ -34,7 +34,7 @@ def check_can_update_and_delete(user: User):
if not permitted(user.username, "/queries:run(query_cmd:DeleteCommand)"):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
- detail="Only specific users are allowed to issue update and delete commands."
+ detail="Only specific users are allowed to issue update and delete commands.",
)
@@ -125,7 +125,8 @@ def _run_query(query, mdb) -> CommandResponse:
detail="Can only delete documents in nmdc-schema collections.",
)
delete_specs = [
- {"filter": del_statement.q, "limit": del_statement.limit} for del_statement in query.cmd.deletes
+ {"filter": del_statement.q, "limit": del_statement.limit}
+ for del_statement in query.cmd.deletes
]
for spec in delete_specs:
docs = list(mdb[collection_name].find(**spec))
@@ -148,7 +149,8 @@ def _run_query(query, mdb) -> CommandResponse:
detail="Can only update documents in nmdc-schema collections.",
)
update_specs = [
- {"filter": up_statement.q, "limit": 0 if up_statement.multi else 1} for up_statement in query.cmd.updates
+ {"filter": up_statement.q, "limit": 0 if up_statement.multi else 1}
+ for up_statement in query.cmd.updates
]
for spec in update_specs:
docs = list(mdb[collection_name].find(**spec))
diff --git a/nmdc_runtime/api/endpoints/users.py b/nmdc_runtime/api/endpoints/users.py
index 4f79e752..c174092c 100644
--- a/nmdc_runtime/api/endpoints/users.py
+++ b/nmdc_runtime/api/endpoints/users.py
@@ -2,7 +2,9 @@
from datetime import timedelta
import pymongo.database
+import requests
from fastapi import Depends, APIRouter, HTTPException, status
+from fastapi.openapi.docs import get_swagger_ui_html
from jose import jws, JWTError
from starlette.requests import Request
from starlette.responses import HTMLResponse, RedirectResponse
@@ -16,6 +18,7 @@
ORCID_JWK,
ORCID_JWS_VERITY_ALGORITHM,
credentials_exception,
+ ORCID_CLIENT_SECRET,
)
from nmdc_runtime.api.core.auth import get_password_hash
from nmdc_runtime.api.core.util import generate_secret
@@ -32,43 +35,28 @@
router = APIRouter()
-@router.get("/orcid_authorize")
-async def orcid_authorize():
- """NOTE: You want to load /orcid_authorize directly in your web browser to initiate the login redirect flow."""
- return RedirectResponse(
- f"https://orcid.org/oauth/authorize?client_id={ORCID_CLIENT_ID}"
- "&response_type=token&scope=openid&"
- f"redirect_uri={BASE_URL_EXTERNAL}/orcid_token"
- )
-
-
-@router.get("/orcid_token")
-async def redirect_uri_for_orcid_token(req: Request):
- """
- Returns a web page that will display a user's orcid jwt token for copy/paste.
-
- This route is loaded by orcid.org after a successful orcid user login.
- """
- return HTMLResponse(
- """
-
-
-
-
-
-
-
- """
+@router.get("/orcid_code", response_class=RedirectResponse)
+async def receive_orcid_code(request: Request, code: str, state: str | None = None):
+ rv = requests.post(
+ "https://orcid.org/oauth/token",
+ data=(
+ f"client_id={ORCID_CLIENT_ID}&client_secret={ORCID_CLIENT_SECRET}&"
+ f"grant_type=authorization_code&code={code}&redirect_uri={BASE_URL_EXTERNAL}/orcid_code"
+ ),
+ headers={
+ "Content-type": "application/x-www-form-urlencoded",
+ "Accept": "application/json",
+ },
)
+ token_response = rv.json()
+ response = RedirectResponse(state or request.url_for("custom_swagger_ui_html"))
+ for key in ["user_orcid", "user_name", "user_id_token"]:
+ response.set_cookie(
+ key=key,
+ value=token_response[key.replace("user_", "")],
+ max_age=2592000,
+ )
+ return response
@router.post("/token", response_model=Token)
diff --git a/nmdc_runtime/api/endpoints/util.py b/nmdc_runtime/api/endpoints/util.py
index f8279efb..b6cda5da 100644
--- a/nmdc_runtime/api/endpoints/util.py
+++ b/nmdc_runtime/api/endpoints/util.py
@@ -110,20 +110,23 @@ def list_resources(req: ListRequest, mdb: MongoDatabase, collection_name: str):
}
return rv
else:
+ # the below block committed in anger. nmdc schema collections should have an 'id' field.
+ id_field = "id"
if "id_1" not in mdb[collection_name].index_information():
logging.warning(
f"list_resources: no index set on 'id' for collection {collection_name}"
)
+ id_field = "_id" # expected atm for functional_annotation_agg
resources = list(
mdb[collection_name].find(
filter=filter_,
projection=projection,
limit=limit,
- sort=[("id", 1)],
+ sort=[(id_field, 1)],
allow_disk_use=True,
)
)
- last_id = resources[-1]["id"]
+ last_id = resources[-1][id_field]
token = generate_one_id(mdb, "page_tokens")
mdb.page_tokens.insert_one(
{"_id": token, "ns": collection_name, "last_id": last_id}
diff --git a/nmdc_runtime/api/main.py b/nmdc_runtime/api/main.py
index 71a6863e..d403b607 100644
--- a/nmdc_runtime/api/main.py
+++ b/nmdc_runtime/api/main.py
@@ -1,19 +1,27 @@
import os
+import re
from contextlib import asynccontextmanager
from importlib import import_module
from importlib.metadata import version
+from typing import Annotated
import fastapi
+import requests
import uvicorn
-from fastapi import APIRouter, FastAPI
+from fastapi import APIRouter, FastAPI, Cookie
from fastapi.middleware.cors import CORSMiddleware
+from fastapi.openapi.docs import get_swagger_ui_html
+from fastapi.staticfiles import StaticFiles
from setuptools_scm import get_version
from starlette import status
-from starlette.responses import RedirectResponse
+from starlette.responses import RedirectResponse, HTMLResponse
from nmdc_runtime.api.analytics import Analytics
-from nmdc_runtime.util import all_docs_have_unique_id, ensure_unique_id_indexes
-from nmdc_runtime.api.core.auth import get_password_hash
+from nmdc_runtime.util import (
+ ensure_unique_id_indexes,
+ REPO_ROOT_DIR,
+)
+from nmdc_runtime.api.core.auth import get_password_hash, ORCID_CLIENT_ID
from nmdc_runtime.api.db.mongo import (
get_mongo_db,
)
@@ -249,7 +257,7 @@ def ensure_initial_resources_on_boot():
).model_dump(exclude_unset=True),
upsert=True,
)
- mdb.users.create_index("username")
+ mdb.users.create_index("username", unique=True)
site_id = os.getenv("API_SITE_ID")
runtime_site_ok = mdb.sites.count_documents(({"id": site_id})) > 0
@@ -302,16 +310,12 @@ def ensure_default_api_perms():
allowed = {
"/metadata/changesheets:submit": [
"admin",
- "dwinston",
- "mam",
- "montana",
- "pajau",
- "spatil",
],
"/queries:run(query_cmd:DeleteCommand)": [
"admin",
- "dwinston",
- "scanon",
+ ],
+ "/metadata/json:submit": [
+ "admin",
],
}
for doc in [
@@ -360,10 +364,17 @@ async def get_versions():
"\n\n"
"Dependency versions:\n\n"
f'nmdc-schema={version("nmdc_schema")}\n\n'
- "Documentation"
+ "Documentation\n\n"
+ ' '
+ f'Login with ORCiD'
+ " (note: this link is static; if you are logged in, you will see a 'locked' lock icon"
+ " in the below-right 'Authorized' button.)"
),
openapi_tags=tags_metadata,
lifespan=lifespan,
+ docs_url=None,
)
app.include_router(api_router)
@@ -376,6 +387,60 @@ async def get_versions():
allow_headers=["*"],
)
app.add_middleware(Analytics)
+app.mount(
+ "/static",
+ StaticFiles(directory=REPO_ROOT_DIR.joinpath("nmdc_runtime/static/")),
+ name="static",
+)
+
+
+@app.get("/docs", include_in_schema=False)
+def custom_swagger_ui_html(
+ user_id_token: Annotated[str | None, Cookie()] = None,
+):
+ access_token = None
+ if user_id_token:
+ # get bearer token
+ rv = requests.post(
+ url=f"{BASE_URL_EXTERNAL}/token",
+ data={
+ "client_id": user_id_token,
+ "client_secret": "",
+ "grant_type": "client_credentials",
+ },
+ headers={
+ "Content-type": "application/x-www-form-urlencoded",
+ "Accept": "application/json",
+ },
+ )
+ if rv.status_code != 200:
+ rv.reason = rv.text
+ rv.raise_for_status()
+ access_token = rv.json()["access_token"]
+
+ swagger_ui_parameters = {"withCredentials": True}
+ if access_token is not None:
+ swagger_ui_parameters.update(
+ {
+ "onComplete": f"""() => {{ ui.preauthorizeApiKey(bearerAuth, {access_token}) }}""",
+ }
+ )
+ response = get_swagger_ui_html(
+ openapi_url=app.openapi_url,
+ title=app.title,
+ oauth2_redirect_url=app.swagger_ui_oauth2_redirect_url,
+ swagger_js_url="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5.9.0/swagger-ui-bundle.js",
+ swagger_css_url="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5.9.0/swagger-ui.css",
+ swagger_ui_parameters=swagger_ui_parameters,
+ )
+ content = (
+ response.body.decode()
+ .replace('"', "")
+ .replace('"', "")
+ .replace("", '"')
+ .replace("", '"')
+ )
+ return HTMLResponse(content=content)
if __name__ == "__main__":
diff --git a/nmdc_runtime/api/models/object.py b/nmdc_runtime/api/models/object.py
index 17df772c..1fd5ced4 100644
--- a/nmdc_runtime/api/models/object.py
+++ b/nmdc_runtime/api/models/object.py
@@ -165,7 +165,7 @@ class DrsObjectOutBase(DrsObjectBase):
version: Optional[str] = None
@field_serializer("self_uri")
- def serialize_url(self, slf_uri: AnyUrl, _info):
+ def serialize_url(self, self_uri: AnyUrl, _info):
return str(self_uri)
diff --git a/nmdc_runtime/api/models/user.py b/nmdc_runtime/api/models/user.py
index 0a96e2eb..dd803f59 100644
--- a/nmdc_runtime/api/models/user.py
+++ b/nmdc_runtime/api/models/user.py
@@ -12,6 +12,7 @@
oauth2_scheme,
credentials_exception,
TokenData,
+ bearer_scheme,
)
from nmdc_runtime.api.db.mongo import get_mongo_db
@@ -49,6 +50,7 @@ def authenticate_user(mdb, username: str, password: str):
async def get_current_user(
token: str = Depends(oauth2_scheme),
+ bearer_credentials: str = Depends(bearer_scheme),
mdb: pymongo.database.Database = Depends(get_mongo_db),
) -> UserInDB:
if mdb.invalidated_tokens.find_one({"_id": token}):
diff --git a/nmdc_runtime/containers.py b/nmdc_runtime/containers.py
index 977bbff7..f3283730 100644
--- a/nmdc_runtime/containers.py
+++ b/nmdc_runtime/containers.py
@@ -9,7 +9,6 @@
class Container(containers.DeclarativeContainer):
-
user_queries = providers.Singleton(UserQueries)
user_service = providers.Factory(UserService, user_queries=user_queries)
diff --git a/nmdc_runtime/lib/nmdc_etl_class.py b/nmdc_runtime/lib/nmdc_etl_class.py
index 0f4ffb96..f9ca0a81 100644
--- a/nmdc_runtime/lib/nmdc_etl_class.py
+++ b/nmdc_runtime/lib/nmdc_etl_class.py
@@ -196,7 +196,6 @@ def transform_dataframe(
print_df=False,
print_dict=False,
) -> list:
-
## used for testing
if test_rows != 0:
nmdc_df = nmdc_df.head(test_rows)
diff --git a/nmdc_runtime/lib/transform_nmdc_data.py b/nmdc_runtime/lib/transform_nmdc_data.py
index 1ab2f422..afbcbe32 100644
--- a/nmdc_runtime/lib/transform_nmdc_data.py
+++ b/nmdc_runtime/lib/transform_nmdc_data.py
@@ -995,7 +995,6 @@ def make_quantity_value(nmdc_objs: list, tx_attributes: list, **kwargs) -> list:
for attribute in tx_attributes:
for obj in nmdc_objs:
if has_raw_value(obj, attribute):
-
val = getattr(obj, attribute)
## split raw value after first space
diff --git a/nmdc_runtime/site/translation/gold_translator.py b/nmdc_runtime/site/translation/gold_translator.py
index 265b8b9e..42d3fe6e 100644
--- a/nmdc_runtime/site/translation/gold_translator.py
+++ b/nmdc_runtime/site/translation/gold_translator.py
@@ -212,7 +212,7 @@ def _get_quantity_value(
return None
elif minimum_numeric_value is not None and maximum_numeric_value is None:
return nmdc.QuantityValue(
- has_raw_value=field_value,
+ has_raw_value=minimum_numeric_value,
has_numeric_value=nmdc.Double(minimum_numeric_value),
has_unit=unit,
)
diff --git a/nmdc_runtime/site/translation/neon_soil_translator.py b/nmdc_runtime/site/translation/neon_soil_translator.py
index 6de9aae8..8ed6df54 100644
--- a/nmdc_runtime/site/translation/neon_soil_translator.py
+++ b/nmdc_runtime/site/translation/neon_soil_translator.py
@@ -7,7 +7,16 @@
from nmdc_schema import nmdc
from nmdc_runtime.site.translation.translator import Translator
from nmdc_runtime.site.util import get_basename
-from nmdc_runtime.site.translation.neon_utils import _get_value_or_none, _create_controlled_identified_term_value, _create_controlled_term_value, _create_geolocation_value, _create_quantity_value, _create_timestamp_value, _create_text_value, _create_double_value
+from nmdc_runtime.site.translation.neon_utils import (
+ _get_value_or_none,
+ _create_controlled_identified_term_value,
+ _create_controlled_term_value,
+ _create_geolocation_value,
+ _create_quantity_value,
+ _create_timestamp_value,
+ _create_text_value,
+ _create_double_value,
+)
class NeonSoilDataTranslator(Translator):
@@ -126,9 +135,7 @@ def _translate_biosample(
collection_date=_create_timestamp_value(
biosample_row["collectDate"].values[0]
),
- temp=_create_quantity_value(
- biosample_row["soilTemp"].values[0], "Celsius"
- ),
+ temp=_create_quantity_value(biosample_row["soilTemp"].values[0], "Celsius"),
depth=nmdc.QuantityValue(
has_minimum_numeric_value=_get_value_or_none(
biosample_row, "sampleTopDepth"
@@ -138,13 +145,9 @@ def _translate_biosample(
),
has_unit="m",
),
- samp_collec_device=_get_value_or_none(
- biosample_row, "soilSamplingDevice"
- ),
+ samp_collec_device=_get_value_or_none(biosample_row, "soilSamplingDevice"),
soil_horizon=_get_value_or_none(biosample_row, "horizon"),
- analysis_type=_get_value_or_none(
- biosample_row, "sequenceAnalysisType"
- ),
+ analysis_type=_get_value_or_none(biosample_row, "sequenceAnalysisType"),
env_package=_create_text_value(biosample_row["sampleType"].values[0]),
nitro=_create_quantity_value(
biosample_row["nitrogenPercent"].values[0], "percent"
@@ -303,9 +306,7 @@ def _translate_library_preparation(
:return: Object that using LibraryPreparation process model.
"""
processing_institution = None
- laboratory_name = _get_value_or_none(
- library_preparation_row, "laboratoryName"
- )
+ laboratory_name = _get_value_or_none(library_preparation_row, "laboratoryName")
if laboratory_name is not None:
if re.search("Battelle", laboratory_name, re.IGNORECASE):
processing_institution = "Battelle"
@@ -356,9 +357,7 @@ def _translate_omics_processing(
has_input=processed_sample_id,
has_output=raw_data_file_data,
processing_institution=processing_institution,
- ncbi_project_name=_get_value_or_none(
- omics_processing_row, "ncbiProjectID"
- ),
+ ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
omics_type=_create_controlled_term_value(
omics_processing_row["investigation_type"].values[0]
),
diff --git a/nmdc_runtime/site/translation/neon_utils.py b/nmdc_runtime/site/translation/neon_utils.py
index 97d74de3..290e0613 100644
--- a/nmdc_runtime/site/translation/neon_utils.py
+++ b/nmdc_runtime/site/translation/neon_utils.py
@@ -5,35 +5,34 @@
from nmdc_schema import nmdc
-def _get_value_or_none(
- data: pd.DataFrame, column_name: str
- ) -> Union[str, float, None]:
- """
- Get the value from the specified column in the data DataFrame.
- If the column value is NaN, return None. However, there are handlers
- for a select set of columns - horizon, qaqcStatus, sampleTopDepth,
- and sampleBottomDepth.
-
- :param data: DataFrame to read the column value from.
- :return: Either a string, float or None depending on the column/column values.
- """
- if (
- column_name in data
- and not data[column_name].isna().any()
- and not data[column_name].empty
- ):
- if column_name == "horizon":
- return f"{data[column_name].values[0]} horizon"
- elif column_name == "qaqcStatus":
- return data[column_name].values[0].lower()
- elif column_name == "sampleTopDepth":
- return float(data[column_name].values[0]) / 100
- elif column_name == "sampleBottomDepth":
- return float(data[column_name].values[0]) / 100
- else:
- return data[column_name].values[0]
+def _get_value_or_none(data: pd.DataFrame, column_name: str) -> Union[str, float, None]:
+ """
+ Get the value from the specified column in the data DataFrame.
+ If the column value is NaN, return None. However, there are handlers
+ for a select set of columns - horizon, qaqcStatus, sampleTopDepth,
+ and sampleBottomDepth.
+
+ :param data: DataFrame to read the column value from.
+ :return: Either a string, float or None depending on the column/column values.
+ """
+ if (
+ column_name in data
+ and not data[column_name].isna().any()
+ and not data[column_name].empty
+ ):
+ if column_name == "horizon":
+ return f"{data[column_name].values[0]} horizon"
+ elif column_name == "qaqcStatus":
+ return data[column_name].values[0].lower()
+ elif column_name == "sampleTopDepth":
+ return float(data[column_name].values[0]) / 100
+ elif column_name == "sampleBottomDepth":
+ return float(data[column_name].values[0]) / 100
+ else:
+ return data[column_name].values[0]
+
+ return None
- return None
def _create_controlled_identified_term_value(
id: str = None, name: str = None
@@ -47,13 +46,10 @@ def _create_controlled_identified_term_value(
"""
if id is None or name is None:
return None
- return nmdc.ControlledIdentifiedTermValue(
- term=nmdc.OntologyClass(id=id, name=name)
- )
+ return nmdc.ControlledIdentifiedTermValue(term=nmdc.OntologyClass(id=id, name=name))
-def _create_controlled_term_value(
- name: str = None
-) -> nmdc.ControlledTermValue:
+
+def _create_controlled_term_value(name: str = None) -> nmdc.ControlledTermValue:
"""
Create a ControlledIdentifiedTermValue object with the specified id and name.
@@ -66,6 +62,7 @@ def _create_controlled_term_value(
return None
return nmdc.ControlledTermValue(has_raw_value=name)
+
def _create_timestamp_value(value: str = None) -> nmdc.TimestampValue:
"""
Create a TimestampValue object with the specified value.
@@ -78,6 +75,7 @@ def _create_timestamp_value(value: str = None) -> nmdc.TimestampValue:
return None
return nmdc.TimestampValue(has_raw_value=value)
+
def _create_quantity_value(
numeric_value: Union[str, int, float] = None, unit: str = None
) -> nmdc.QuantityValue:
@@ -94,6 +92,7 @@ def _create_quantity_value(
return None
return nmdc.QuantityValue(has_numeric_value=float(numeric_value), has_unit=unit)
+
def _create_text_value(value: str = None) -> nmdc.TextValue:
"""
Create a TextValue object with the specified value.
@@ -105,6 +104,7 @@ def _create_text_value(value: str = None) -> nmdc.TextValue:
return None
return nmdc.TextValue(has_raw_value=value)
+
def _create_double_value(value: str = None) -> nmdc.Double:
"""
Create a Double object with the specified value.
@@ -117,6 +117,7 @@ def _create_double_value(value: str = None) -> nmdc.Double:
return None
return nmdc.Double(value)
+
def _create_geolocation_value(
latitude: str = None, longitude: str = None
) -> nmdc.GeolocationValue:
@@ -142,4 +143,4 @@ def _create_geolocation_value(
return nmdc.GeolocationValue(
latitude=nmdc.DecimalDegree(latitude),
longitude=nmdc.DecimalDegree(longitude),
- )
\ No newline at end of file
+ )
diff --git a/nmdc_runtime/static/ORCIDiD_icon128x128.png b/nmdc_runtime/static/ORCIDiD_icon128x128.png
new file mode 100644
index 00000000..d73bdcbe
Binary files /dev/null and b/nmdc_runtime/static/ORCIDiD_icon128x128.png differ
diff --git a/requirements/dev.txt b/requirements/dev.txt
index aca7b8cb..390e5a21 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -4,18 +4,31 @@
#
# pip-compile --allow-unsafe --output-file=requirements/dev.txt --strip-extras requirements/dev.in
#
+aiohttp==3.9.1
+ # via
+ # -c requirements/main.txt
+ # black
+aiosignal==1.3.1
+ # via
+ # -c requirements/main.txt
+ # aiohttp
+async-timeout==4.0.3
+ # via
+ # -c requirements/main.txt
+ # aiohttp
attrs==23.1.0
# via
# -c requirements/main.txt
+ # aiohttp
# cattrs
# requests-cache
-black==23.11.0
+black==23.12.0
# via
# -c requirements/main.txt
# -r requirements/dev.in
build==1.0.3
# via pip-tools
-cattrs==23.2.2
+cattrs==23.2.3
# via
# -c requirements/main.txt
# requests-cache
@@ -23,10 +36,6 @@ certifi==2023.11.17
# via
# -c requirements/main.txt
# requests
-cffi==1.16.0
- # via
- # -c requirements/main.txt
- # cryptography
charset-normalizer==3.3.2
# via
# -c requirements/main.txt
@@ -36,14 +45,10 @@ click==8.1.7
# -c requirements/main.txt
# black
# pip-tools
-coverage==7.3.2
+coverage==7.3.3
# via
# -r requirements/dev.in
# pytest-cov
-cryptography==41.0.7
- # via
- # -c requirements/main.txt
- # secretstorage
docutils==0.20.1
# via
# -c requirements/main.txt
@@ -55,11 +60,17 @@ exceptiongroup==1.2.0
# pytest
flake8==6.1.0
# via -r requirements/dev.in
+frozenlist==1.4.1
+ # via
+ # -c requirements/main.txt
+ # aiohttp
+ # aiosignal
idna==3.6
# via
# -c requirements/main.txt
# requests
-importlib-metadata==6.8.0
+ # yarl
+importlib-metadata==7.0.0
# via
# keyring
# twine
@@ -71,10 +82,6 @@ invoke==2.2.0
# via -r requirements/dev.in
jaraco-classes==3.3.0
# via keyring
-jeepney==0.8.0
- # via
- # keyring
- # secretstorage
keyring==24.3.0
# via twine
markdown-it-py==3.0.0
@@ -89,11 +96,16 @@ mdurl==0.1.2
# markdown-it-py
more-itertools==10.1.0
# via jaraco-classes
+multidict==6.0.4
+ # via
+ # -c requirements/main.txt
+ # aiohttp
+ # yarl
mypy-extensions==1.0.0
# via
# -c requirements/main.txt
# black
-nh3==0.2.14
+nh3==0.2.15
# via readme-renderer
packaging==23.2
# via
@@ -101,7 +113,7 @@ packaging==23.2
# black
# build
# pytest
-pathspec==0.11.2
+pathspec==0.12.1
# via
# -c requirements/main.txt
# black
@@ -109,7 +121,7 @@ pip-tools==7.3.0
# via -r requirements/dev.in
pkginfo==1.9.6
# via twine
-platformdirs==4.0.0
+platformdirs==4.1.0
# via
# -c requirements/main.txt
# black
@@ -120,10 +132,6 @@ pluggy==1.3.0
# pytest
pycodestyle==2.11.1
# via flake8
-pycparser==2.21
- # via
- # -c requirements/main.txt
- # cffi
pyflakes==3.1.0
# via
# -c requirements/main.txt
@@ -141,7 +149,7 @@ pytest==7.4.3
# -r requirements/dev.in
# pytest-asyncio
# pytest-cov
-pytest-asyncio==0.21.1
+pytest-asyncio==0.23.2
# via -r requirements/dev.in
pytest-cov==4.1.0
# via -r requirements/dev.in
@@ -168,8 +176,6 @@ rfc3986==2.0.0
# via twine
rich==13.7.0
# via twine
-secretstorage==3.3.3
- # via keyring
six==1.16.0
# via
# -c requirements/main.txt
@@ -186,7 +192,7 @@ tomli==2.0.1
# pytest
twine==4.0.2
# via -r requirements/dev.in
-typing-extensions==4.8.0
+typing-extensions==4.9.0
# via
# -c requirements/main.txt
# black
@@ -203,6 +209,10 @@ urllib3==1.26.18
# twine
wheel==0.42.0
# via pip-tools
+yarl==1.9.4
+ # via
+ # -c requirements/main.txt
+ # aiohttp
zipp==3.17.0
# via importlib-metadata
diff --git a/requirements/main.in b/requirements/main.in
index 7d4db727..4509db70 100644
--- a/requirements/main.in
+++ b/requirements/main.in
@@ -24,7 +24,7 @@ mkdocs-jupyter
mkdocs-material
mkdocs-mermaid2-plugin
motor
-nmdc-schema==9.2.0
+nmdc-schema==9.3.2
openpyxl
pandas
passlib[bcrypt]
diff --git a/requirements/main.txt b/requirements/main.txt
index 852180fb..0bec54d1 100644
--- a/requirements/main.txt
+++ b/requirements/main.txt
@@ -4,9 +4,13 @@
#
# pip-compile --allow-unsafe --output-file=requirements/main.txt --strip-extras requirements/main.in
#
+aiohttp==3.9.1
+ # via black
+aiosignal==1.3.1
+ # via aiohttp
alabaster==0.7.13
# via sphinx
-alembic==1.12.1
+alembic==1.13.0
# via dagster
aniso8601==9.0.1
# via graphene
@@ -23,6 +27,8 @@ anyio==3.7.1
# jupyter-server
# starlette
# watchfiles
+appnope==0.1.3
+ # via ipykernel
argon2-cffi==23.1.0
# via jupyter-server
argon2-cffi-bindings==21.2.0
@@ -33,15 +39,18 @@ asttokens==2.4.1
# via stack-data
async-lru==2.0.4
# via jupyterlab
+async-timeout==4.0.3
+ # via aiohttp
attrs==23.1.0
# via
+ # aiohttp
# cattrs
# jsonschema
# referencing
# requests-cache
autoflake==2.2.1
# via shed
-babel==2.13.1
+babel==2.14.0
# via
# jupyterlab-server
# mkdocs-material
@@ -50,7 +59,7 @@ backoff==2.2.1
# via gql
base32-lib==1.0.2
# via -r requirements/main.in
-bcrypt==4.1.1
+bcrypt==4.1.2
# via passlib
beanie==1.23.6
# via -r requirements/main.in
@@ -59,19 +68,19 @@ beautifulsoup4==4.12.2
# -r requirements/main.in
# mkdocs-mermaid2-plugin
# nbconvert
-black==23.11.0
+black==23.12.0
# via shed
bleach==6.1.0
# via nbconvert
-boto3==1.33.3
+boto3==1.34.2
# via -r requirements/main.in
-botocore==1.33.3
+botocore==1.34.2
# via
# boto3
# s3transfer
cachetools==5.3.2
# via tox
-cattrs==23.2.2
+cattrs==23.2.3
# via requests-cache
certifi==2023.11.17
# via requests
@@ -126,23 +135,23 @@ curies==0.7.4
# via
# linkml-runtime
# prefixmaps
-dagit==1.5.9
+dagit==1.5.13
# via -r requirements/main.in
-dagster==1.5.9
+dagster==1.5.13
# via
# -r requirements/main.in
# dagster-graphql
# dagster-postgres
# dagster-webserver
-dagster-graphql==1.5.9
+dagster-graphql==1.5.13
# via
# -r requirements/main.in
# dagster-webserver
-dagster-pipes==1.5.9
+dagster-pipes==1.5.13
# via dagster
-dagster-postgres==0.21.9
+dagster-postgres==0.21.13
# via -r requirements/main.in
-dagster-webserver==1.5.9
+dagster-webserver==1.5.13
# via dagit
debugpy==1.8.0
# via ipykernel
@@ -154,7 +163,7 @@ dependency-injector==4.41.0
# via -r requirements/main.in
deprecated==1.2.14
# via linkml-runtime
-distlib==0.3.7
+distlib==0.3.8
# via virtualenv
dnspython==2.4.2
# via
@@ -182,7 +191,7 @@ exceptiongroup==1.2.0
# pytest
executing==2.0.1
# via stack-data
-fastapi==0.104.1
+fastapi==0.105.0
# via -r requirements/main.in
fastjsonschema==2.19.0
# via
@@ -196,9 +205,13 @@ fnc==0.5.3
# via -r requirements/main.in
fqdn==1.5.1
# via jsonschema
-frozendict==2.3.9
+frozendict==2.3.10
# via -r requirements/main.in
-fsspec==2023.10.0
+frozenlist==1.4.1
+ # via
+ # aiohttp
+ # aiosignal
+fsspec==2023.12.2
# via universal-pathlib
ghp-import==2.1.0
# via mkdocs
@@ -219,13 +232,11 @@ graphql-relay==3.2.0
# via graphene
graphviz==0.20.1
# via linkml
-greenlet==3.0.1
- # via sqlalchemy
-grpcio==1.59.3
+grpcio==1.60.0
# via
# dagster
# grpcio-health-checking
-grpcio-health-checking==1.59.3
+grpcio-health-checking==1.60.0
# via dagster
h11==0.14.0
# via uvicorn
@@ -269,7 +280,7 @@ isodate==0.6.1
# rdflib
isoduration==20.11.0
# via jsonschema
-isort==5.12.0
+isort==5.13.2
# via shed
jedi==0.19.1
# via ipython
@@ -323,7 +334,7 @@ jsonschema==4.20.0
# linkml
# linkml-runtime
# nbformat
-jsonschema-specifications==2023.11.1
+jsonschema-specifications==2023.11.2
# via jsonschema
jupyter==1.0.0
# via -r requirements/main.in
@@ -351,14 +362,14 @@ jupyter-events==0.9.0
# via jupyter-server
jupyter-lsp==2.2.1
# via jupyterlab
-jupyter-server==2.11.1
+jupyter-server==2.12.1
# via
# jupyter-lsp
# jupyterlab
# jupyterlab-server
# notebook
# notebook-shim
-jupyter-server-terminals==0.4.4
+jupyter-server-terminals==0.5.0
# via jupyter-server
jupyterlab==4.0.9
# via
@@ -372,19 +383,19 @@ jupyterlab-server==2.25.2
# notebook
jupyterlab-widgets==3.0.9
# via ipywidgets
-jupytext==1.15.2
+jupytext==1.16.0
# via mkdocs-jupyter
lazy-model==0.2.0
# via beanie
libcst==1.1.0
# via shed
-linkml==1.6.3
+linkml==1.6.6
# via
# -r requirements/main.in
# nmdc-schema
linkml-dataops==0.1.0
# via linkml
-linkml-runtime==1.6.2
+linkml-runtime==1.6.3
# via
# -r requirements/main.in
# linkml
@@ -426,7 +437,7 @@ mkdocs==1.5.3
# mkdocs-mermaid2-plugin
mkdocs-jupyter==0.24.6
# via -r requirements/main.in
-mkdocs-material==9.4.14
+mkdocs-material==9.5.2
# via
# -r requirements/main.in
# mkdocs-jupyter
@@ -439,14 +450,16 @@ motor==3.3.2
# -r requirements/main.in
# beanie
multidict==6.0.4
- # via yarl
+ # via
+ # aiohttp
+ # yarl
mypy-extensions==1.0.0
# via
# black
# typing-inspect
nbclient==0.9.0
# via nbconvert
-nbconvert==7.11.0
+nbconvert==7.12.0
# via
# jupyter
# jupyter-server
@@ -459,7 +472,7 @@ nbformat==5.9.2
# nbconvert
nest-asyncio==1.5.8
# via ipykernel
-nmdc-schema==9.2.0
+nmdc-schema==9.3.2
# via -r requirements/main.in
notebook==7.0.6
# via jupyter
@@ -487,6 +500,7 @@ packaging==23.2
# jupyter-server
# jupyterlab
# jupyterlab-server
+ # jupytext
# mkdocs
# nbconvert
# pyproject-api
@@ -498,7 +512,7 @@ packaging==23.2
# tox
paginate==0.5.6
# via mkdocs-material
-pandas==2.1.3
+pandas==2.1.4
# via
# -r requirements/main.in
# terminusdb-client
@@ -510,7 +524,7 @@ parso==0.8.3
# via jedi
passlib==1.7.4
# via -r requirements/main.in
-pathspec==0.11.2
+pathspec==0.12.1
# via
# black
# mkdocs
@@ -518,7 +532,7 @@ pendulum==2.1.2
# via dagster
pexpect==4.9.0
# via ipython
-platformdirs==4.0.0
+platformdirs==4.1.0
# via
# black
# jupyter-core
@@ -542,7 +556,7 @@ prefixmaps==0.2.0
# linkml-runtime
prometheus-client==0.19.0
# via jupyter-server
-prompt-toolkit==3.0.41
+prompt-toolkit==3.0.43
# via
# ipython
# jupyter-console
@@ -669,7 +683,7 @@ pyyaml==6.0.1
# uvicorn
pyyaml-env-tag==0.1
# via mkdocs
-pyzmq==25.1.1
+pyzmq==25.1.2
# via
# ipykernel
# jupyter-client
@@ -696,7 +710,7 @@ rdflib-shim==1.0.3
# pyshex
# pyshexc
# sparqlslurper
-referencing==0.31.1
+referencing==0.32.0
# via
# jsonschema
# jsonschema-specifications
@@ -742,12 +756,10 @@ rpds-py==0.13.2
rsa==4.9
# via python-jose
ruamel-yaml==0.18.5
- # via
- # linkml-dataops
- # nmdc-schema
+ # via linkml-dataops
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
-s3transfer==0.8.2
+s3transfer==0.9.0
# via boto3
semver==3.0.2
# via -r requirements/main.in
@@ -890,7 +902,7 @@ typeguard==2.13.3
# via terminusdb-client
types-python-dateutil==2.8.19.14
# via arrow
-typing-extensions==4.8.0
+typing-extensions==4.9.0
# via
# alembic
# async-lru
@@ -929,7 +941,7 @@ uvicorn==0.24.0.post1
# dagster-webserver
uvloop==0.19.0
# via uvicorn
-virtualenv==20.24.7
+virtualenv==20.25.0
# via tox
watchdog==3.0.0
# via
@@ -946,7 +958,7 @@ webencodings==0.5.1
# via
# bleach
# tinycss2
-websocket-client==1.6.4
+websocket-client==1.7.0
# via jupyter-server
websockets==12.0
# via uvicorn
@@ -958,8 +970,10 @@ xlrd==2.0.1
# via -r requirements/main.in
xlsxwriter==3.1.9
# via -r requirements/main.in
-yarl==1.9.3
- # via gql
+yarl==1.9.4
+ # via
+ # aiohttp
+ # gql
# The following packages are considered to be unsafe in a requirements file:
setuptools==69.0.2
diff --git a/tests/test_data/test_neon_soil_data_translator.py b/tests/test_data/test_neon_soil_data_translator.py
index 2e9ce938..f60144f2 100644
--- a/tests/test_data/test_neon_soil_data_translator.py
+++ b/tests/test_data/test_neon_soil_data_translator.py
@@ -1,7 +1,12 @@
from io import StringIO
import pytest
from nmdc_runtime.site.translation.neon_soil_translator import NeonSoilDataTranslator
-from nmdc_runtime.site.translation.neon_utils import (_create_controlled_identified_term_value, _create_controlled_term_value, _create_timestamp_value, _get_value_or_none)
+from nmdc_runtime.site.translation.neon_utils import (
+ _create_controlled_identified_term_value,
+ _create_controlled_term_value,
+ _create_timestamp_value,
+ _get_value_or_none,
+)
import pandas as pd
# Mock data for testing
@@ -824,15 +829,11 @@ def test_get_value_or_none(self):
# specific handler for depth slot
expected_minimum_depth = 0.0
- actual_minimum_depth = _get_value_or_none(
- test_biosample, "sampleTopDepth"
- )
+ actual_minimum_depth = _get_value_or_none(test_biosample, "sampleTopDepth")
assert expected_minimum_depth == actual_minimum_depth
expected_maximum_depth = 0.295
- actual_maximum_depth = _get_value_or_none(
- test_biosample, "sampleBottomDepth"
- )
+ actual_maximum_depth = _get_value_or_none(test_biosample, "sampleBottomDepth")
assert expected_maximum_depth == actual_maximum_depth
expected_sample_id = "BLAN_005-M-8-0-20200713"