From aae9723dad39c2c08072ffe20daffa49292fd603 Mon Sep 17 00:00:00 2001 From: Donny Winston Date: Thu, 21 Nov 2024 20:35:18 +0100 Subject: [PATCH 1/7] todo: progress --- Makefile | 2 +- ...lidation_referential_integrity_check.ipynb | 187 +++++++++++------- 2 files changed, 122 insertions(+), 67 deletions(-) diff --git a/Makefile b/Makefile index f3779003..85ba107b 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,7 @@ mongorestore-nmdc-db: mkdir -p /tmp/remote-mongodump/nmdc # SSH into the remote server, stream the dump directory as a gzipped tar archive, and extract it locally. ssh -i ~/.ssh/nersc ${NERSC_USERNAME}@dtn01.nersc.gov \ - 'tar -czf - -C /global/cfs/projectdirs/m3408/nmdc-mongodumps/dump_nmdc-prod_2024-07-29_20-12-07/nmdc .' \ + 'tar -czf - -C /global/cfs/projectdirs/m3408/nmdc-mongodumps/dump_nmdc-prod_2024-11-20_20-12-02/nmdc .' \ | tar -xzv -C /tmp/remote-mongodump/nmdc mongorestore -v -h localhost:27018 -u admin -p root --authenticationDatabase=admin \ --drop --nsInclude='nmdc.*' --dir /tmp/remote-mongodump diff --git a/docs/nb/bulk_validation_referential_integrity_check.ipynb b/docs/nb/bulk_validation_referential_integrity_check.ipynb index 06a01ec8..b616da32 100644 --- a/docs/nb/bulk_validation_referential_integrity_check.ipynb +++ b/docs/nb/bulk_validation_referential_integrity_check.ipynb @@ -37,7 +37,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "mongodb://localhost:27018\n" + "localhost:27018\n" ] } ], @@ -93,7 +93,7 @@ { "data": { "text/plain": [ - "'11.0.0rc22'" + "'11.1.0'" ] }, "execution_count": 3, @@ -161,7 +161,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'study_set', 'workflow_execution_set', 'material_processing_set', 'instrument_set', 'data_object_set', 'configuration_set', 'biosample_set', 'functional_annotation_agg', 'calibration_set', 'processed_sample_set', 'field_research_site_set', 'data_generation_set'}\n" + "{'data_object_set', 'functional_annotation_agg', 'material_processing_set', 'workflow_execution_set', 'calibration_set', 'data_generation_set', 'configuration_set', 'processed_sample_set', 'instrument_set', 'biosample_set', 'study_set', 'field_research_site_set'}\n" ] } ], @@ -279,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "829039e5-7abe-4c50-ba44-e384b45b7535", "metadata": { "scrolled": true @@ -288,12 +288,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6c88577a3a9342808d3bbc0e3707a95a", + "model_id": "d798cf56b8b541598d246c023543d29a", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/2351449 [00:00 List[str]:\n", + " r\"\"\"\n", + " Determine the slot's \"effective\" range, by taking into account its `any_of` constraints (if defined).\n", + "\n", + " Note: The `any_of` constraints constrain the slot's \"effective\" range beyond that described by the\n", + " induced slot definition's `range` attribute. `SchemaView` does not seem to provide the result\n", + " of applying those additional constraints, so we do it manually here (if any are defined).\n", + " Reference: https://github.com/orgs/linkml/discussions/2101#discussion-6625646\n", + "\n", + " Reference: https://linkml.io/linkml-model/latest/docs/any_of/\n", + " \"\"\"\n", + "\n", + " # Initialize the list to be empty.\n", + " names_of_eligible_target_classes = []\n", + "\n", + " # If the `any_of` constraint is defined on this slot, use that instead of the `range`.\n", + " if \"any_of\" in slot_definition and len(slot_definition.any_of) > 0:\n", + " for slot_expression in slot_definition.any_of:\n", + " # Use the slot expression's `range` to get the specified eligible class name\n", + " # and the names of all classes that inherit from that eligible class.\n", + " if slot_expression.range in schema_view.all_classes():\n", + " own_and_descendant_class_names = schema_view.class_descendants(slot_expression.range)\n", + " names_of_eligible_target_classes.extend(own_and_descendant_class_names)\n", + " else:\n", + " # Use the slot's `range` to get the specified eligible class name\n", + " # and the names of all classes that inherit from that eligible class.\n", + " if slot_definition.range in schema_view.all_classes():\n", + " own_and_descendant_class_names = schema_view.class_descendants(slot_definition.range)\n", + " names_of_eligible_target_classes.extend(own_and_descendant_class_names)\n", + "\n", + " # Remove duplicate class names.\n", + " names_of_eligible_target_classes = list(set(names_of_eligible_target_classes))\n", + "\n", + " return names_of_eligible_target_classes" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "98fbfdff-51d6-42c5-9448-3b4616a2c9cb", "metadata": {}, "outputs": [], @@ -345,13 +395,13 @@ "document_reference_ranged_slots = defaultdict(list)\n", "for cls_name, slot_map in cls_slot_map.items():\n", " for slot_name, slot in slot_map.items():\n", - " if str(slot.range) in document_referenceable_ranges:\n", + " if set(get_names_of_classes_in_effective_range_of_slot(schema_view, slot)) & document_referenceable_ranges:\n", " document_reference_ranged_slots[cls_name].append(slot_name)" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "d253c567-533f-440f-8376-03a6e1e905cf", "metadata": {}, "outputs": [], @@ -371,7 +421,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 40, "id": "b2e618f3-78b9-42b6-8ea9-63d080b1b0f6", "metadata": { "scrolled": true @@ -380,12 +430,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f63a4ce942bc4278b3e99a5a87b0155c", + "model_id": "158c79049a2c43c6b04904bc325946ec", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/2351449 [00:00 Date: Thu, 21 Nov 2024 21:58:02 +0100 Subject: [PATCH 2/7] todo: progress --- ...lidation_referential_integrity_check.ipynb | 183 +++--------------- 1 file changed, 26 insertions(+), 157 deletions(-) diff --git a/docs/nb/bulk_validation_referential_integrity_check.ipynb b/docs/nb/bulk_validation_referential_integrity_check.ipynb index b616da32..bfe16603 100644 --- a/docs/nb/bulk_validation_referential_integrity_check.ipynb +++ b/docs/nb/bulk_validation_referential_integrity_check.ipynb @@ -517,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 55, "id": "d0374653-c074-4a87-aef8-24323a5a63b3", "metadata": {}, "outputs": [], @@ -530,11 +530,11 @@ " for doc in mdb.alldocs.find(limit=limit):\n", " # Iterate over each key/value pair in the dictionary (document).\n", " for field, value in doc.items():\n", - " if field in (\"_id\", \"id\", \"type\"):\n", + " if field.startswith(\"_\") or field in (\"id\", \"type\"):\n", " continue\n", " acceptable_slot_classes = get_names_of_classes_in_effective_range_of_slot(\n", " schema_view,\n", - " cls_slot_map[doc[\"type\"][field],\n", + " cls_slot_map[doc[\"type\"][5:]][field],\n", " )\n", " if not isinstance(value, list):\n", " value = [value]\n", @@ -556,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 56, "id": "103d70b6-24ab-41bd-8b7f-d2faaa028bdf", "metadata": { "scrolled": true @@ -565,7 +565,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "483900e4d6bf4e46ab4a36ae0fb3f8a1", + "model_id": "bebb0ddb0055428f875494dbc0412d88", "version_major": 2, "version_minor": 0 }, @@ -580,25 +580,26 @@ "source": [ "from pprint import pprint\n", "\n", + "alldocs_ids = set(mdb.alldocs.distinct(\"id\"))\n", + "\n", "def doc_field_value_errors(assertions):\n", " errors = {\"not_found\": [], \"invalid_type\": []}\n", - " # collect a list of assertions that concern a specific referenced \"id\".\n", + " # group assertions by referenced \"id\" value.\n", " assertions_by_referenced_id_value = defaultdict(list)\n", " for a in assertions:\n", " assertions_by_referenced_id_value[a[\"value\"]].append(a)\n", - " # find the claimed type for every document id that is referenced by another document.\n", + " # associate each referenced document id with its type.\n", " doc_id_types = {}\n", " for d in list(mdb.alldocs.find({\"id\": {\"$in\": list(assertions_by_referenced_id_value.keys())}}, {\"_id\": 0, \"id\": 1, \"type\": 1})):\n", " doc_id_types[d[\"id\"]] = d[\"type\"]\n", "\n", " for id_value, id_value_assertions in assertions_by_referenced_id_value.items():\n", - " if id_value not in doc_id_types:\n", + " if id_value not in alldocs_ids:\n", " errors[\"not_found\"].extend(id_value_assertions)\n", " else:\n", " for a in id_value_assertions:\n", - " # check that the observed type (or any of its ancestors) for this id reference\n", - " # is in fact allowed by the referring slot's schema definition.\n", - " if not (set(a[\"acceptable_slot_classes\"]) & set(doc_id_types[a[\"value\"]])):\n", + " # check that the document-reported type for this id reference is kosher as per the referring slot's schema definition.\n", + " if doc_id_types[a[\"value\"]][5:] not in a[\"acceptable_slot_classes\"]:\n", " errors[\"invalid_type\"].append(a)\n", "\n", " return errors\n", @@ -633,17 +634,17 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 57, "id": "e01450d1-3369-4fc5-80be-9787e00a6597", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(4, 0)" + "(12, 0)" ] }, - "execution_count": 30, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -655,170 +656,38 @@ "# results with v10.5.5: (33, 6900)" ] }, - { - "cell_type": "markdown", - "id": "54a560df", - "metadata": {}, - "source": [ - "Display a few errors from one of the lists, as an example." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "afd25543-1cb3-4887-9aba-0086d4b998a6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'nmdc:dobj-11-achfhn33', 'nmdc:dobj-11-dpnhb305'}" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "{e[\"value\"] for e in errors[\"not_found\"]}" - ] - }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 73, "id": "a25857f4-e26e-4896-9e5f-607e7b4bb07c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "nmdc:wfmgan-11-c516q834.1 has_input nmdc:dobj-11-dpnhb305\n", - "nmdc:wfmgan-11-yzp9eq74.1 has_input nmdc:dobj-11-achfhn33\n", - "nmdc:wfmgan-11-c516q834.1 has_input nmdc:dobj-11-dpnhb305\n", - "nmdc:wfmgan-11-yzp9eq74.1 has_input nmdc:dobj-11-achfhn33\n" - ] - } - ], - "source": [ - "for e in errors[\"not_found\"]:\n", - " print(e[\"id\"], e['field'], e['value'])" - ] - }, - { - "cell_type": "markdown", - "id": "2bd191cd", - "metadata": {}, - "source": [ - "Display an example `invalid_type` errors for each of the set of expected types that are not being found:" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "f8788551-a9b1-4915-a23d-74cfcbe62ec1", - "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "{'nmdc:wfmag-11-8s9xk838.1 / has_input / nmdc:dobj-11-gxgpbv06',\n", + " 'nmdc:wfmag-11-8s9xk838.1 / has_input / nmdc:dobj-11-kr8ev105',\n", + " 'nmdc:wfmag-11-8s9xk838.1 / has_input / nmdc:dobj-11-whq9ph06',\n", + " 'nmdc:wfmag-11-dchy6q29.1 / has_input / nmdc:dobj-11-1wzar939',\n", + " 'nmdc:wfmag-11-dchy6q29.1 / has_input / nmdc:dobj-11-fg28a080',\n", + " 'nmdc:wfmag-11-dchy6q29.1 / has_input / nmdc:dobj-11-s4hp2x64'}" ] }, - "execution_count": 39, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "errors[\"invalid_type\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "33516e3c-f10d-4c30-942b-0d01d06082f9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id': 'nmdc:dobj-11-enyrng31', 'id_is_nmdc_id': True, 'field': 'was_generated_by', 'value': 'nmdc:omprc-11-2et99h53', 'slot_range': 'WorkflowExecution'}\n" - ] - } - ], - "source": [ - "slot_range_examples = {}\n", - "for e in errors[\"invalid_type\"]:\n", - " slot_range_examples[e[\"slot_range\"]] = e\n", - "\n", - "for ex in slot_range_examples.values():\n", - " print(ex)" + "set(f\"{e['id']} / {e['field']} / {e['value']}\" for e in errors[\"not_found\"])" ] }, { "cell_type": "markdown", - "id": "d4abec53", + "id": "b149872d-5814-4a73-ac5e-cc75fb578a01", "metadata": {}, "source": [ - "Spot check one of those errors." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "29ec7e82-d079-4525-bd7b-d770fd69d788", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'_id': ObjectId('66edad78007ef07eb670a09d'),\n", - " 'id': 'nmdc:omprc-11-sxze4w22',\n", - " 'has_input': ['nmdc:bsm-11-978cs285'],\n", - " 'has_output': ['nmdc:dobj-11-1epz0d53'],\n", - " 'associated_studies': ['nmdc:sty-11-28tm5d36'],\n", - " 'instrument_used': ['nmdc:inst-14-mwrrj632'],\n", - " 'type': ['MassSpectrometry',\n", - " 'DataGeneration',\n", - " 'PlannedProcess',\n", - " 'NamedThing']}" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# OmicsProcessing is not subclass of Activity\n", - "mdb.alldocs.find_one({\"id\": \"nmdc:omprc-11-sxze4w22\"})" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "802290e0-58dd-4fbd-835a-c9928006819d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'_id': ObjectId('66edad78007ef07eb67078c8'),\n", - " 'id': 'nmdc:procsm-11-v5sykd35',\n", - " 'type': ['ProcessedSample', 'MaterialEntity', 'NamedThing']}" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# ProcessedSample is not subclass of Biosample\n", - "mdb.alldocs.find_one({\"id\": \"nmdc:procsm-11-v5sykd35\"})" + "TODO: why the double assertions?" ] } ], From 700c27c505e7adf7d5aed48d46fe2aad7760710d Mon Sep 17 00:00:00 2001 From: Donny Winston Date: Tue, 26 Nov 2024 20:07:00 +0100 Subject: [PATCH 3/7] fix: ThreadPoolExecutor is evil? closes #576 --- Makefile | 2 +- ...lidation_referential_integrity_check.ipynb | 234 ++---------------- 2 files changed, 28 insertions(+), 208 deletions(-) diff --git a/Makefile b/Makefile index 85ba107b..8d49d064 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,7 @@ mongorestore-nmdc-db: mkdir -p /tmp/remote-mongodump/nmdc # SSH into the remote server, stream the dump directory as a gzipped tar archive, and extract it locally. ssh -i ~/.ssh/nersc ${NERSC_USERNAME}@dtn01.nersc.gov \ - 'tar -czf - -C /global/cfs/projectdirs/m3408/nmdc-mongodumps/dump_nmdc-prod_2024-11-20_20-12-02/nmdc .' \ + 'tar -czf - -C /global/cfs/projectdirs/m3408/nmdc-mongodumps/dump_nmdc-prod_2024-11-25_20-12-02/nmdc .' \ | tar -xzv -C /tmp/remote-mongodump/nmdc mongorestore -v -h localhost:27018 -u admin -p root --authenticationDatabase=admin \ --drop --nsInclude='nmdc.*' --dir /tmp/remote-mongodump diff --git a/docs/nb/bulk_validation_referential_integrity_check.ipynb b/docs/nb/bulk_validation_referential_integrity_check.ipynb index bfe16603..9defa5e5 100644 --- a/docs/nb/bulk_validation_referential_integrity_check.ipynb +++ b/docs/nb/bulk_validation_referential_integrity_check.ipynb @@ -140,15 +140,7 @@ "id": "bcb5802b-8205-49b7-8784-dc137baff1a0", "metadata": {}, "source": [ - "## Check for errors in the database" - ] - }, - { - "cell_type": "markdown", - "id": "1ab96cda-30ab-4e93-a0b1-3a936599305d", - "metadata": {}, - "source": [ - "The `nmdc_schema_collection_names` function returns the populated (having at least one document) set-intersection of (a) the set of collection names present in the Mongo database and (b) the set of Database slots in the schema that correspond to a collection (defined as being multivalued and values being inlined as a list)." + "## Create slot mappings" ] }, { @@ -156,18 +148,9 @@ "execution_count": 5, "id": "1d76b70e-4412-4b17-9db9-322ac791859a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'data_object_set', 'functional_annotation_agg', 'material_processing_set', 'workflow_execution_set', 'calibration_set', 'data_generation_set', 'configuration_set', 'processed_sample_set', 'instrument_set', 'biosample_set', 'study_set', 'field_research_site_set'}\n" - ] - } - ], + "outputs": [], "source": [ - "collection_names = get_nonempty_nmdc_schema_collection_names(mdb)\n", - "print(collection_names)" + "collection_names = populated_schema_collection_names_with_id_field(mdb) # `get_nonempty_nmdc_schema_collection_names` to include \"functional_annotation_agg\"" ] }, { @@ -211,107 +194,6 @@ "}" ] }, - { - "cell_type": "code", - "execution_count": 8, - "id": "12e7d00e-0ec4-45de-b0da-1b618ef7e80b", - "metadata": {}, - "outputs": [], - "source": [ - "def collect_errors(note_doc_field_errors):\n", - " errors = {\"bad_type\": [], \"no_type\": [], \"bad_slot\": [], \"is_null\": []}\n", - " n_docs_total = sum(mdb[coll_name].estimated_document_count() for coll_name in collection_names)\n", - " pbar = tqdm(total=n_docs_total)\n", - " n_errors_cache = 0\n", - " for coll_name in sorted(collection_names):\n", - " cls_names = collection_name_to_class_names[coll_name]\n", - " pbar.set_description(f\"processing {coll_name}...\")\n", - " # Iterate over each document (as a dictionary) in this collection.\n", - " for doc in mdb[coll_name].find():\n", - " doc = dissoc(doc, \"_id\")\n", - " \n", - " # Ensure we know the document's type.\n", - " cls_name = None\n", - " cls_type_match = re.match(r\"^nmdc:(?P.+)\", doc.get(\"type\", \"\"))\n", - " if cls_type_match is not None:\n", - " cls_name = cls_type_match.group(\"name\")\n", - " if cls_name not in cls_names:\n", - " errors[\"bad_type\"].append(f\"{coll_name} doc {doc['id']}: doc type {cls_name} not in those allowed for {coll_name}, i.e. {cls_names}.\")\n", - " cls_name = None\n", - " elif len(cls_names) == 1:\n", - " cls_name = cls_names[0]\n", - " else:\n", - " errors[\"no_type\"].append(f\"{coll_name} doc {doc['id']}: 'type' not set.\")\n", - "\n", - " if cls_name is not None: \n", - " slot_map = cls_slot_map[cls_name]\n", - " # Iterate over each key/value pair in the dictionary (document).\n", - " for field, value in doc.items():\n", - " if field in slot_map:\n", - " if not isinstance(value, list):\n", - " value = [value]\n", - " for v in value:\n", - " note_doc_field_errors(value=v,field=field,doc=doc,coll_name=coll_name,errors=errors) \n", - " else:\n", - " errors[\"bad_slot\"].append(f\"{coll_name} doc {doc['id']}: field '{field}' not a valid slot\")\n", - " pbar.update(1)\n", - " n_errors = sum([len(v) for v in errors.values()])\n", - " if n_errors > n_errors_cache:\n", - " print(f\"{n_errors} errors so far...\")\n", - " n_errors_cache = n_errors\n", - " pbar.close()\n", - " return errors" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "14afb4c6-b0b7-4fd7-8e2f-13c682c74409", - "metadata": {}, - "outputs": [], - "source": [ - "def note_doc_field_errors(value=None, field=None, doc=None, coll_name=None, errors=None):\n", - " # No fields should be null-valued.\n", - " # Example of how this may happen: JSON serialization from pydantic models may set optional fields to `null`.\n", - " if value is None:\n", - " errors[\"is_null\"].append(f\"{coll_name} doc {doc['id']}: field {field} is null.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "829039e5-7abe-4c50-ba44-e384b45b7535", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d798cf56b8b541598d246c023543d29a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/27762723 [00:00 Date: Wed, 20 Nov 2024 14:44:08 -0800 Subject: [PATCH 4/7] Drop the old tests2 dir --- tests2/__init__.py | 0 tests2/domain/__init__.py | 0 tests2/domain/service/__init__.py | 0 tests2/domain/service/test_user_service.py | 57 ---------------------- 4 files changed, 57 deletions(-) delete mode 100644 tests2/__init__.py delete mode 100644 tests2/domain/__init__.py delete mode 100644 tests2/domain/service/__init__.py delete mode 100644 tests2/domain/service/test_user_service.py diff --git a/tests2/__init__.py b/tests2/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests2/domain/__init__.py b/tests2/domain/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests2/domain/service/__init__.py b/tests2/domain/service/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests2/domain/service/test_user_service.py b/tests2/domain/service/test_user_service.py deleted file mode 100644 index 529ac81c..00000000 --- a/tests2/domain/service/test_user_service.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import Any, List - -import pytest - -from nmdc_runtime.domain.users.queriesInterface import IUserQueries -from nmdc_runtime.domain.users.userSchema import ( - UserAuth, - UserUpdate, - UserOut, -) - -from nmdc_runtime.domain.users.userService import UserService - -USER_OUT = UserOut( - email="test+email@test.com", -) - - -class UserQueriesDummy(IUserQueries): - async def create(self, user: Any) -> UserOut: - return USER_OUT - - async def update(self, old_user: Any, new_user: Any) -> UserOut: - return USER_OUT - - -@pytest.fixture -def user_out() -> UserOut: - return USER_OUT - - -@pytest.fixture -def user_schema() -> UserAuth: - return UserAuth( - username="bob", - password="test", - ) - - -@pytest.fixture -def user_update_schema() -> UserUpdate: - return UserUpdate( - email="test@test.com", - full_name="test", - password="test", - ) - - -class TestUserService: - @pytest.mark.asyncio - async def test_user_create_valid( - self, user_out: UserOut, user_schema: UserAuth - ) -> None: - user_service = UserService(UserQueriesDummy()) - - result = await user_service.create_user(user_schema) - assert result == UserOut(email="test+email@test.com") From 39e04448db96fd7e9096395ed2f16800a7ae00f5 Mon Sep 17 00:00:00 2001 From: Shreyas Cholia Date: Mon, 25 Nov 2024 03:59:55 -0800 Subject: [PATCH 5/7] Update to the newer syntax for getting utcnow() (#788) --- demo/metadata_migration/notebooks/bookkeeper.py | 4 ++-- nmdc_runtime/api/core/auth.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/demo/metadata_migration/notebooks/bookkeeper.py b/demo/metadata_migration/notebooks/bookkeeper.py index ad3cb918..97ccfb49 100644 --- a/demo/metadata_migration/notebooks/bookkeeper.py +++ b/demo/metadata_migration/notebooks/bookkeeper.py @@ -1,6 +1,6 @@ from typing import Optional from enum import Enum -from datetime import datetime +from datetime import datetime, timezone from pymongo import MongoClient from nmdc_schema.migrators.migrator_base import MigratorBase @@ -47,7 +47,7 @@ def __init__( @staticmethod def get_current_timestamp() -> str: r"""Returns an ISO 8601 timestamp (string) representing the current time in UTC.""" - utc_now = datetime.utcnow() + utc_now = datetime.now(timezone.utc) iso_utc_now = utc_now.isoformat() return iso_utc_now # e.g. "2024-02-21T04:31:03.115107" diff --git a/nmdc_runtime/api/core/auth.py b/nmdc_runtime/api/core/auth.py index 94685d2e..820f3dc0 100644 --- a/nmdc_runtime/api/core/auth.py +++ b/nmdc_runtime/api/core/auth.py @@ -1,5 +1,5 @@ import os -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Optional, Dict from fastapi import Depends @@ -101,9 +101,9 @@ def get_password_hash(password): def create_access_token(data: dict, expires_delta: Optional[timedelta] = None): to_encode = data.copy() if expires_delta: - expire = datetime.utcnow() + expires_delta + expire = datetime.now(timezone.utc) + expires_delta else: - expire = datetime.utcnow() + timedelta(minutes=15) + expire = datetime.now(timezone.utc) + timedelta(minutes=15) to_encode.update({"exp": expire}) encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) return encoded_jwt From 2da475c22da6de932d2692886142833759952413 Mon Sep 17 00:00:00 2001 From: eecavanna <134325062+eecavanna@users.noreply.github.com> Date: Sat, 23 Nov 2024 11:30:35 -0800 Subject: [PATCH 6/7] Add "Translators" subsystem as option in PR template (#767) --- .github/pull_request_template.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index f6f96b9f..b0c8634b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -55,6 +55,7 @@ In this branch, I... - [ ] Minter - [ ] Dagster - [ ] Project documentation (in the `docs` directory) +- [ ] Translators (metadata ingest pipelines) - [ ] MongoDB migrations - [ ] Other From d79f6eb06a3e5341620f11ab22dec3bdf700c90b Mon Sep 17 00:00:00 2001 From: eecavanna <134325062+eecavanna@users.noreply.github.com> Date: Mon, 25 Nov 2024 10:45:17 -0800 Subject: [PATCH 7/7] Remove reference to nonexistent `tests2` directory --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5da4e771..964124d4 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ url="https://github.com/microbiomedata/nmdc-runtime", packages=find_packages( include=["nmdc_runtime*", "components*"], - exclude=["tests", "tests2"], + exclude=["tests"], ), use_scm_version=True, setup_requires=["setuptools_scm"],