run black

microbiomedata · May 30, 2024 · 6f01714 · 6f01714
1 parent 6ef3cc7
commit 6f01714
Show file tree

Hide file tree

Showing 51 changed files with 3,723 additions and 2,511 deletions.
diff --git a/components/nmdc_runtime/workflow/spec.py b/components/nmdc_runtime/workflow/spec.py
@@ -25,9 +25,9 @@ class Sequencing(Workflow):
     enabled: bool = True
     git_repo: str = ""
     version: str = "1.0.0"
-    activity: Literal[
+    activity: Literal["metagenome_sequencing_activity_set"] = (
         "metagenome_sequencing_activity_set"
-    ] = "metagenome_sequencing_activity_set"
+    )
     predecessor: str = ""
     id_type: str = ""
     input_prefix: str = ""
@@ -69,9 +69,9 @@ class MetagenomeAnnotation(Workflow):
     git_repo: str = "https://github.com/microbiomedata/mg_annotation"
     version: str = "1.0.0"
     wdl: str = "annotation_full.wdl"
-    activity: Literal[
+    activity: Literal["metagenome_annotation_activity_set"] = (
         "metagenome_annotation_activity_set"
-    ] = "metagenome_annotation_activity_set"
+    )
     predecessor: str = "MetagenomeAssembly"
     input_prefix: str = "annotation"
     id_type: str = "mgann"
@@ -152,19 +152,24 @@ class ReadBasedAnalysis(Workflow):
     git_repo: str = "https://github.com/microbiomedata/ReadbasedAnalysis"
     version: str = "1.0.2"
     wdl: str = "ReadbasedAnalysis.wdl"
-    activity: Literal[
+    activity: Literal["read_based_taxonomy_analysis_activity_set"] = (
         "read_based_taxonomy_analysis_activity_set"
-    ] = "read_based_taxonomy_analysis_activity_set"
+    )
     predecessor: str = "Read QC Analysis"
     input_prefix: str = "nmdc_rba"
     id_type: str = "mgrba"
     inputs: ReadBasedAnalysisInputs = ReadBasedAnalysisInputs()
 
 
 class WorkflowModel(BaseModel):
-    workflow: ReadQcAnalysis | MetagenomeAssembly | MAGs | ReadBasedAnalysis | Sequencing | MetagenomeAnnotation = Field(
-        ..., discriminator="activity"
-    )
+    workflow: (
+        ReadQcAnalysis
+        | MetagenomeAssembly
+        | MAGs
+        | ReadBasedAnalysis
+        | Sequencing
+        | MetagenomeAnnotation
+    ) = Field(..., discriminator="activity")
 
 
 def get_all_workflows() -> list[Workflow]:

diff --git a/components/nmdc_runtime/workflow_execution_activity/__init__.py b/components/nmdc_runtime/workflow_execution_activity/__init__.py
@@ -3,5 +3,6 @@
 Workflow Execution Activies are a map of relevant data a user would like to have with
 regards to job execution or instantiation within their local system.
 """
+
 from .core import ActivityService, init_activity_service
 from .spec import Database, WorkflowExecutionActivity
diff --git a/components/nmdc_runtime/workflow_execution_activity/core.py b/components/nmdc_runtime/workflow_execution_activity/core.py
@@ -1,4 +1,5 @@
 """Core functionality of the activity service module."""
+
 from dataclasses import fields
 from typing import Any, TypedDict
 from uuid import uuid1

diff --git a/demo/metadata_discovery/indexing.ipynb b/demo/metadata_discovery/indexing.ipynb
@@ -42,7 +42,7 @@
     "    \"https://localhost:9200\",\n",
     "    # docker cp nmdc-elasticsearch_es01_1:/usr/share/elasticsearch/config/certs/ca/ca.crt .\n",
     "    ca_certs=\"ca.crt\",\n",
-    "    basic_auth=(\"elastic\", ELASTIC_PASSWORD)\n",
+    "    basic_auth=(\"elastic\", ELASTIC_PASSWORD),\n",
     ")\n",
     "\n",
     "client.info().body"
@@ -69,13 +69,13 @@
     "\n",
     "docs = []\n",
     "rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=1&per_page=200\")\n",
-    "docs.extend(rv.json()['results'])\n",
+    "docs.extend(rv.json()[\"results\"])\n",
     "rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=2&per_page=200\")\n",
-    "docs.extend(rv.json()['results'])\n",
+    "docs.extend(rv.json()[\"results\"])\n",
     "rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=3&per_page=200\")\n",
-    "docs.extend(rv.json()['results'])\n",
+    "docs.extend(rv.json()[\"results\"])\n",
     "rv = requests.get(\"https://api.microbiomedata.org/biosamples?page=4&per_page=200\")\n",
-    "docs.extend(rv.json()['results'])\n",
+    "docs.extend(rv.json()[\"results\"])\n",
     "len(docs)"
    ]
   },
@@ -133,12 +133,12 @@
    "source": [
     "from typing import Dict\n",
     "\n",
-    "ecosystem_paths : Dict[str, int] = {}\n",
+    "ecosystem_paths: Dict[str, int] = {}\n",
     "for row in df_ecosystem_paths.itertuples():\n",
     "    _path = \" > \".join([str(e) for e in row[2:]])\n",
     "    _id = row[1]\n",
     "    ecosystem_paths[_path] = _id\n",
-    "    \n",
+    "\n",
     "assert len(df_ecosystem_paths) == len(ecosystem_paths)"
    ]
   },
@@ -286,7 +286,7 @@
    "outputs": [],
    "source": [
     "def curie_purl(curie):\n",
-    "    prefix, n = curie.split(':', maxsplit=1)\n",
+    "    prefix, n = curie.split(\":\", maxsplit=1)\n",
     "    return f\"http://purl.obolibrary.org/obo/{prefix}_{n}\""
    ]
   },
@@ -314,15 +314,9 @@
     "\n",
     "for c, purl in curie_purl.items():\n",
     "    if c.startswith(\"ENVO:\"):\n",
-    "        curie_label[c] = str(g_envo.value(\n",
-    "            subject=URIRef(purl),\n",
-    "            predicate=RDFS.label\n",
-    "        ))\n",
+    "        curie_label[c] = str(g_envo.value(subject=URIRef(purl), predicate=RDFS.label))\n",
     "    elif c.startswith(\"PO:\"):\n",
-    "        curie_label[c] = str(g_po.value(\n",
-    "            subject=URIRef(purl),\n",
-    "            predicate=RDFS.label\n",
-    "        ))\n",
+    "        curie_label[c] = str(g_po.value(subject=URIRef(purl), predicate=RDFS.label))\n",
     "    else:\n",
     "        raise ValueError(\"Unknown CURIE prefix\")"
    ]
@@ -377,12 +371,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "index_name = 'biosamples'\n",
+    "index_name = \"biosamples\"\n",
     "\n",
     "if client.indices.exists(index=index_name):\n",
     "    client.indices.delete(index=index_name)\n",
     "\n",
     "import json\n",
+    "\n",
     "with open(f\"{index_name}.json\") as f:\n",
     "    index_body = json.load(f)\n",
     "\n",
@@ -398,7 +393,7 @@
    "source": [
     "from elasticsearch.helpers import bulk\n",
     "\n",
-    "bulk(client, [dict(_id=d[\"id\"], _index=index_name, **d)for d in docs])"
+    "bulk(client, [dict(_id=d[\"id\"], _index=index_name, **d) for d in docs])"
    ]
   },
   {
@@ -419,7 +414,7 @@
    "outputs": [],
    "source": [
     "for doc in docs:\n",
-    "    print(doc['ecosystem_path'])"
+    "    print(doc[\"ecosystem_path\"])"
    ]
   },
   {
@@ -430,7 +425,7 @@
    "outputs": [],
    "source": [
     "for doc in docs:\n",
-    "    print(doc['mixs_triad'])"
+    "    print(doc[\"mixs_triad\"])"
    ]
   },
   {

diff --git a/demo/metadata_migration/main.ipynb b/demo/metadata_migration/main.ipynb
@@ -34,13 +34,13 @@
     "    host=os.getenv(\"MONGO_HOST\"),\n",
     "    username=os.getenv(\"MONGO_USERNAME\"),\n",
     "    password=os.getenv(\"MONGO_PASSWORD\"),\n",
-    "    directConnection=True, # connect to host as a standalone, rather than to entire replicaset\n",
+    "    directConnection=True,  # connect to host as a standalone, rather than to entire replicaset\n",
     ")\n",
     "reader_client = MongoClient(\n",
     "    host=os.getenv(\"MONGO_HOST\"),\n",
     "    username=os.getenv(\"MONGO_READONLY_USERNAME\"),\n",
     "    password=os.getenv(\"MONGO_READONLY_PASSWORD\"),\n",
-    "    directConnection=True, # connect to host as a standalone, rather than to entire replicaset\n",
+    "    directConnection=True,  # connect to host as a standalone, rather than to entire replicaset\n",
     ")\n",
     "\n",
     "mdb_src = reader_client[os.getenv(\"MONGO_DBNAME\")]\n",
@@ -84,18 +84,25 @@
     "import fastjsonschema\n",
     "from toolz import dissoc\n",
     "\n",
+    "\n",
     "def strip_oid(doc):\n",
     "    return dissoc(doc, \"_id\")\n",
     "\n",
+    "\n",
     "def nmdc_schema_collection_names() -> set:\n",
     "    return {\n",
-    "        k for k, v in get_nmdc_jsonschema_dict()[\"$defs\"][\"Database\"][\"properties\"].items()\n",
-    "        if v.get(\"items\",{}).get(\"$ref\")\n",
+    "        k\n",
+    "        for k, v in get_nmdc_jsonschema_dict()[\"$defs\"][\"Database\"][\n",
+    "            \"properties\"\n",
+    "        ].items()\n",
+    "        if v.get(\"items\", {}).get(\"$ref\")\n",
     "    }\n",
     "\n",
+    "\n",
     "def present_src_collections(mdb) -> list:\n",
     "    return sorted(\n",
-    "        n for n in (nmdc_schema_collection_names() & set(mdb_src.list_collection_names()))\n",
+    "        n\n",
+    "        for n in (nmdc_schema_collection_names() & set(mdb_src.list_collection_names()))\n",
     "        if mdb_src[n].estimated_document_count()\n",
     "    )"
    ]
@@ -121,6 +128,7 @@
     "\n",
     "from nmdc_runtime.util import get_nmdc_jsonschema_dict\n",
     "\n",
+    "\n",
     "def without_id_patterns(nmdc_jsonschema):\n",
     "    rv = deepcopy(nmdc_jsonschema)\n",
     "    for cls_, spec in rv[\"$defs\"].items():\n",
@@ -197,7 +205,7 @@
     "        raise Exception(f\"needs `term` or `has_raw_value`\")\n",
     "    if not (m := re.search(id_pattern, v[\"has_raw_value\"])):\n",
     "        raise Exception(f'{v[\"has_raw_value\"]} does not match a known ID pattern')\n",
-    "    \n",
+    "\n",
     "    return assoc(v, \"term\", {\"id\": v[\"has_raw_value\"]})\n",
     "\n",
     "\n",
@@ -206,7 +214,7 @@
     "        raise Exception(\"list expected\")\n",
     "    if not all(\":\" in elt for elt in v):\n",
     "        raise Exception(\"CURIEs expected\")\n",
-    "    \n",
+    "\n",
     "    rv = []\n",
     "    for elt in v:\n",
     "        prefix, localpart = elt.split(\":\", maxsplit=1)\n",
@@ -219,7 +227,7 @@
     "        raise Exception(\"list expected\")\n",
     "    if not all(\":\" in elt for elt in v):\n",
     "        raise Exception(\"CURIEs expected\")\n",
-    "    \n",
+    "\n",
     "    rv = []\n",
     "    for elt in v:\n",
     "        prefix, localpart = elt.split(\":\", maxsplit=1)\n",
@@ -243,7 +251,7 @@
     "def ensure_depth_via_depth2(v, d):\n",
     "    if \"depth\" not in d:\n",
     "        raise Exception(\"no `depth` field\")\n",
-    "    \n",
+    "\n",
     "    depth = d[\"depth\"]\n",
     "    return {\"depth\": depth, \"depth2\": None}\n",
     "\n",
@@ -281,8 +289,7 @@
     "\n",
     "def rename_num_tRNA(v):\n",
     "    return [\n",
-    "        change_fieldname(elt, \"num_tRNA\", \"num_t_rna\")\n",
-    "        for elt in v if \"num_tRNA\" in elt\n",
+    "        change_fieldname(elt, \"num_tRNA\", \"num_t_rna\") for elt in v if \"num_tRNA\" in elt\n",
     "    ]\n",
     "\n",
     "\n",
@@ -340,7 +347,7 @@
     "    \"emsl\": \"emsl_biosample_identifiers\",\n",
     "    \"gold\": \"gold_biosample_identifiers\",\n",
     "    \"igsn\": \"igsn_biosample_identifiers\",\n",
-    "    \"img.taxon\": \"img_identifiers\"\n",
+    "    \"img.taxon\": \"img_identifiers\",\n",
     "}\n",
     "\n",
     "\n",
@@ -350,10 +357,14 @@
     "\n",
     "def replace_fields(d, context):\n",
     "    assert \"collection_name\" in context\n",
-    "    for fieldname, replacement in fieldname_replacements[context[\"collection_name\"]].items():\n",
+    "    for fieldname, replacement in fieldname_replacements[\n",
+    "        context[\"collection_name\"]\n",
+    "    ].items():\n",
     "        if fieldname in d:\n",
     "            if isinstance(replacement, list):\n",
-    "                assert all(callable(r) for r in replacement), \"replacement-list must be all functions\"\n",
+    "                assert all(\n",
+    "                    callable(r) for r in replacement\n",
+    "                ), \"replacement-list must be all functions\"\n",
     "                for rfun in replacement:\n",
     "                    n_params = len(signature(rfun).parameters)\n",
     "                    if n_params == 1:\n",
@@ -381,8 +392,8 @@
     "        except fastjsonschema.JsonSchemaException as e:\n",
     "            print(d[\"id\"])\n",
     "            print(e)\n",
-    "            #pprint(d)\n",
-    "            #raise e\n",
+    "            # pprint(d)\n",
+    "            # raise e\n",
     "            return None\n",
     "        validated.append(d)\n",
     "    return validated"