Skip to content

Commit

Permalink
run black
Browse files Browse the repository at this point in the history
  • Loading branch information
Jing committed May 30, 2024
1 parent 6ef3cc7 commit 6f01714
Show file tree
Hide file tree
Showing 51 changed files with 3,723 additions and 2,511 deletions.
23 changes: 14 additions & 9 deletions components/nmdc_runtime/workflow/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ class Sequencing(Workflow):
enabled: bool = True
git_repo: str = ""
version: str = "1.0.0"
activity: Literal[
activity: Literal["metagenome_sequencing_activity_set"] = (
"metagenome_sequencing_activity_set"
] = "metagenome_sequencing_activity_set"
)
predecessor: str = ""
id_type: str = ""
input_prefix: str = ""
Expand Down Expand Up @@ -69,9 +69,9 @@ class MetagenomeAnnotation(Workflow):
git_repo: str = "https://github.com/microbiomedata/mg_annotation"
version: str = "1.0.0"
wdl: str = "annotation_full.wdl"
activity: Literal[
activity: Literal["metagenome_annotation_activity_set"] = (
"metagenome_annotation_activity_set"
] = "metagenome_annotation_activity_set"
)
predecessor: str = "MetagenomeAssembly"
input_prefix: str = "annotation"
id_type: str = "mgann"
Expand Down Expand Up @@ -152,19 +152,24 @@ class ReadBasedAnalysis(Workflow):
git_repo: str = "https://github.com/microbiomedata/ReadbasedAnalysis"
version: str = "1.0.2"
wdl: str = "ReadbasedAnalysis.wdl"
activity: Literal[
activity: Literal["read_based_taxonomy_analysis_activity_set"] = (
"read_based_taxonomy_analysis_activity_set"
] = "read_based_taxonomy_analysis_activity_set"
)
predecessor: str = "Read QC Analysis"
input_prefix: str = "nmdc_rba"
id_type: str = "mgrba"
inputs: ReadBasedAnalysisInputs = ReadBasedAnalysisInputs()


class WorkflowModel(BaseModel):
workflow: ReadQcAnalysis | MetagenomeAssembly | MAGs | ReadBasedAnalysis | Sequencing | MetagenomeAnnotation = Field(
..., discriminator="activity"
)
workflow: (
ReadQcAnalysis
| MetagenomeAssembly
| MAGs
| ReadBasedAnalysis
| Sequencing
| MetagenomeAnnotation
) = Field(..., discriminator="activity")


def get_all_workflows() -> list[Workflow]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
Workflow Execution Activies are a map of relevant data a user would like to have with
regards to job execution or instantiation within their local system.
"""

from .core import ActivityService, init_activity_service
from .spec import Database, WorkflowExecutionActivity
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Core functionality of the activity service module."""

from dataclasses import fields
from typing import Any, TypedDict
from uuid import uuid1
Expand Down
35 changes: 15 additions & 20 deletions demo/metadata_discovery/indexing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
" \"https://localhost:9200\",\n",
" # docker cp nmdc-elasticsearch_es01_1:/usr/share/elasticsearch/config/certs/ca/ca.crt .\n",
" ca_certs=\"ca.crt\",\n",
" basic_auth=(\"elastic\", ELASTIC_PASSWORD)\n",
" basic_auth=(\"elastic\", ELASTIC_PASSWORD),\n",
")\n",
"\n",
"client.info().body"
Expand All @@ -69,13 +69,13 @@
"\n",
"docs = []\n",
"rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=1&per_page=200\")\n",
"docs.extend(rv.json()['results'])\n",
"docs.extend(rv.json()[\"results\"])\n",
"rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=2&per_page=200\")\n",
"docs.extend(rv.json()['results'])\n",
"docs.extend(rv.json()[\"results\"])\n",
"rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=3&per_page=200\")\n",
"docs.extend(rv.json()['results'])\n",
"docs.extend(rv.json()[\"results\"])\n",
"rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=4&per_page=200\")\n",
"docs.extend(rv.json()['results'])\n",
"docs.extend(rv.json()[\"results\"])\n",
"len(docs)"
]
},
Expand Down Expand Up @@ -133,12 +133,12 @@
"source": [
"from typing import Dict\n",
"\n",
"ecosystem_paths : Dict[str, int] = {}\n",
"ecosystem_paths: Dict[str, int] = {}\n",
"for row in df_ecosystem_paths.itertuples():\n",
" _path = \" > \".join([str(e) for e in row[2:]])\n",
" _id = row[1]\n",
" ecosystem_paths[_path] = _id\n",
" \n",
"\n",
"assert len(df_ecosystem_paths) == len(ecosystem_paths)"
]
},
Expand Down Expand Up @@ -286,7 +286,7 @@
"outputs": [],
"source": [
"def curie_purl(curie):\n",
" prefix, n = curie.split(':', maxsplit=1)\n",
" prefix, n = curie.split(\":\", maxsplit=1)\n",
" return f\"http://purl.obolibrary.org/obo/{prefix}_{n}\""
]
},
Expand Down Expand Up @@ -314,15 +314,9 @@
"\n",
"for c, purl in curie_purl.items():\n",
" if c.startswith(\"ENVO:\"):\n",
" curie_label[c] = str(g_envo.value(\n",
" subject=URIRef(purl),\n",
" predicate=RDFS.label\n",
" ))\n",
" curie_label[c] = str(g_envo.value(subject=URIRef(purl), predicate=RDFS.label))\n",
" elif c.startswith(\"PO:\"):\n",
" curie_label[c] = str(g_po.value(\n",
" subject=URIRef(purl),\n",
" predicate=RDFS.label\n",
" ))\n",
" curie_label[c] = str(g_po.value(subject=URIRef(purl), predicate=RDFS.label))\n",
" else:\n",
" raise ValueError(\"Unknown CURIE prefix\")"
]
Expand Down Expand Up @@ -377,12 +371,13 @@
"metadata": {},
"outputs": [],
"source": [
"index_name = 'biosamples'\n",
"index_name = \"biosamples\"\n",
"\n",
"if client.indices.exists(index=index_name):\n",
" client.indices.delete(index=index_name)\n",
"\n",
"import json\n",
"\n",
"with open(f\"{index_name}.json\") as f:\n",
" index_body = json.load(f)\n",
"\n",
Expand All @@ -398,7 +393,7 @@
"source": [
"from elasticsearch.helpers import bulk\n",
"\n",
"bulk(client, [dict(_id=d[\"id\"], _index=index_name, **d)for d in docs])"
"bulk(client, [dict(_id=d[\"id\"], _index=index_name, **d) for d in docs])"
]
},
{
Expand All @@ -419,7 +414,7 @@
"outputs": [],
"source": [
"for doc in docs:\n",
" print(doc['ecosystem_path'])"
" print(doc[\"ecosystem_path\"])"
]
},
{
Expand All @@ -430,7 +425,7 @@
"outputs": [],
"source": [
"for doc in docs:\n",
" print(doc['mixs_triad'])"
" print(doc[\"mixs_triad\"])"
]
},
{
Expand Down
43 changes: 27 additions & 16 deletions demo/metadata_migration/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
" host=os.getenv(\"MONGO_HOST\"),\n",
" username=os.getenv(\"MONGO_USERNAME\"),\n",
" password=os.getenv(\"MONGO_PASSWORD\"),\n",
" directConnection=True, # connect to host as a standalone, rather than to entire replicaset\n",
" directConnection=True, # connect to host as a standalone, rather than to entire replicaset\n",
")\n",
"reader_client = MongoClient(\n",
" host=os.getenv(\"MONGO_HOST\"),\n",
" username=os.getenv(\"MONGO_READONLY_USERNAME\"),\n",
" password=os.getenv(\"MONGO_READONLY_PASSWORD\"),\n",
" directConnection=True, # connect to host as a standalone, rather than to entire replicaset\n",
" directConnection=True, # connect to host as a standalone, rather than to entire replicaset\n",
")\n",
"\n",
"mdb_src = reader_client[os.getenv(\"MONGO_DBNAME\")]\n",
Expand Down Expand Up @@ -84,18 +84,25 @@
"import fastjsonschema\n",
"from toolz import dissoc\n",
"\n",
"\n",
"def strip_oid(doc):\n",
" return dissoc(doc, \"_id\")\n",
"\n",
"\n",
"def nmdc_schema_collection_names() -> set:\n",
" return {\n",
" k for k, v in get_nmdc_jsonschema_dict()[\"$defs\"][\"Database\"][\"properties\"].items()\n",
" if v.get(\"items\",{}).get(\"$ref\")\n",
" k\n",
" for k, v in get_nmdc_jsonschema_dict()[\"$defs\"][\"Database\"][\n",
" \"properties\"\n",
" ].items()\n",
" if v.get(\"items\", {}).get(\"$ref\")\n",
" }\n",
"\n",
"\n",
"def present_src_collections(mdb) -> list:\n",
" return sorted(\n",
" n for n in (nmdc_schema_collection_names() & set(mdb_src.list_collection_names()))\n",
" n\n",
" for n in (nmdc_schema_collection_names() & set(mdb_src.list_collection_names()))\n",
" if mdb_src[n].estimated_document_count()\n",
" )"
]
Expand All @@ -121,6 +128,7 @@
"\n",
"from nmdc_runtime.util import get_nmdc_jsonschema_dict\n",
"\n",
"\n",
"def without_id_patterns(nmdc_jsonschema):\n",
" rv = deepcopy(nmdc_jsonschema)\n",
" for cls_, spec in rv[\"$defs\"].items():\n",
Expand Down Expand Up @@ -197,7 +205,7 @@
" raise Exception(f\"needs `term` or `has_raw_value`\")\n",
" if not (m := re.search(id_pattern, v[\"has_raw_value\"])):\n",
" raise Exception(f'{v[\"has_raw_value\"]} does not match a known ID pattern')\n",
" \n",
"\n",
" return assoc(v, \"term\", {\"id\": v[\"has_raw_value\"]})\n",
"\n",
"\n",
Expand All @@ -206,7 +214,7 @@
" raise Exception(\"list expected\")\n",
" if not all(\":\" in elt for elt in v):\n",
" raise Exception(\"CURIEs expected\")\n",
" \n",
"\n",
" rv = []\n",
" for elt in v:\n",
" prefix, localpart = elt.split(\":\", maxsplit=1)\n",
Expand All @@ -219,7 +227,7 @@
" raise Exception(\"list expected\")\n",
" if not all(\":\" in elt for elt in v):\n",
" raise Exception(\"CURIEs expected\")\n",
" \n",
"\n",
" rv = []\n",
" for elt in v:\n",
" prefix, localpart = elt.split(\":\", maxsplit=1)\n",
Expand All @@ -243,7 +251,7 @@
"def ensure_depth_via_depth2(v, d):\n",
" if \"depth\" not in d:\n",
" raise Exception(\"no `depth` field\")\n",
" \n",
"\n",
" depth = d[\"depth\"]\n",
" return {\"depth\": depth, \"depth2\": None}\n",
"\n",
Expand Down Expand Up @@ -281,8 +289,7 @@
"\n",
"def rename_num_tRNA(v):\n",
" return [\n",
" change_fieldname(elt, \"num_tRNA\", \"num_t_rna\")\n",
" for elt in v if \"num_tRNA\" in elt\n",
" change_fieldname(elt, \"num_tRNA\", \"num_t_rna\") for elt in v if \"num_tRNA\" in elt\n",
" ]\n",
"\n",
"\n",
Expand Down Expand Up @@ -340,7 +347,7 @@
" \"emsl\": \"emsl_biosample_identifiers\",\n",
" \"gold\": \"gold_biosample_identifiers\",\n",
" \"igsn\": \"igsn_biosample_identifiers\",\n",
" \"img.taxon\": \"img_identifiers\"\n",
" \"img.taxon\": \"img_identifiers\",\n",
"}\n",
"\n",
"\n",
Expand All @@ -350,10 +357,14 @@
"\n",
"def replace_fields(d, context):\n",
" assert \"collection_name\" in context\n",
" for fieldname, replacement in fieldname_replacements[context[\"collection_name\"]].items():\n",
" for fieldname, replacement in fieldname_replacements[\n",
" context[\"collection_name\"]\n",
" ].items():\n",
" if fieldname in d:\n",
" if isinstance(replacement, list):\n",
" assert all(callable(r) for r in replacement), \"replacement-list must be all functions\"\n",
" assert all(\n",
" callable(r) for r in replacement\n",
" ), \"replacement-list must be all functions\"\n",
" for rfun in replacement:\n",
" n_params = len(signature(rfun).parameters)\n",
" if n_params == 1:\n",
Expand Down Expand Up @@ -381,8 +392,8 @@
" except fastjsonschema.JsonSchemaException as e:\n",
" print(d[\"id\"])\n",
" print(e)\n",
" #pprint(d)\n",
" #raise e\n",
" # pprint(d)\n",
" # raise e\n",
" return None\n",
" validated.append(d)\n",
" return validated"
Expand Down
Loading

0 comments on commit 6f01714

Please sign in to comment.