From ae0d96a6165c345c5abf18b11375a20e05e54e8d Mon Sep 17 00:00:00 2001
From: eecavanna <eecavanna@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:43:03 -0800
Subject: [PATCH 01/11] Add test where specified Study has no Biosamples

---
 tests/test_api/test_endpoints.py | 47 ++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index acfc81bf..47868fd2 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -29,19 +29,23 @@
 from nmdc_runtime.util import REPO_ROOT_DIR, ensure_unique_id_indexes
 
 
-def ensure_schema_collections_and_alldocs():
-    # Return if `alldocs` collection has already been materialized.
+def ensure_schema_collections_and_alldocs(force_refresh_of_alldocs: bool = False):
+    r"""
+    This function can be used to ensure things (?) about schema-described collections and the "alldocs" collection.
+
+    :param bool force_refresh_of_alldocs: Whether you want to force a refresh of the "alldocs" collection,
+                                          regardless of whether it is empty of not. By default, this function
+                                          will only refresh the "alldocs" collection if it is empty.
+    """
+
+    # Return if `alldocs` collection has already been materialized, and caller does not want to force a refresh of it.
     mdb = get_mongo_db()
-    if mdb.alldocs.estimated_document_count() > 0:
+    if mdb.alldocs.estimated_document_count() > 0 and not force_refresh_of_alldocs:
         print(
             "ensure_schema_collections_and_alldocs: `alldocs` collection already materialized"
         )
         return
 
-    # FIXME: Seed the database with documents that would be included in an `alldocs` collection,
-    #        such that the `/data_objects/study/{study_id}` endpoint (which uses that collection)
-    #        would return some data. Currently, we are practically _not testing_ that endpoint.
-
     ensure_unique_id_indexes(mdb)
     print("materializing alldocs...")
     materialize_alldocs(
@@ -438,8 +442,6 @@ def test_find_data_objects_for_nonexistent_study(api_site_client):
 
     Note: The `api_site_client` fixture's `request` method will raise an exception if the server responds with
           an unsuccessful status code.
-
-    TODO: Add tests focused on the situation where the `Study` _does_ exist.
     """
     ensure_schema_collections_and_alldocs()
     with pytest.raises(requests.exceptions.HTTPError):
@@ -449,6 +451,33 @@ def test_find_data_objects_for_nonexistent_study(api_site_client):
         )
 
 
+def test_find_data_objects_for_study_having_none(api_site_client):
+    # Seed the test database with a study having no associated data objects.
+    mdb = get_mongo_db()
+    study_id = "nmdc:sty-00-beeeeeef"
+    study_dict = {
+        "id": study_id,
+        "type": "nmdc:Study",
+        "study_category": "research_study",
+    }
+    mdb.get_collection(name="study_set").replace_one(
+        {"id": study_id}, study_dict, upsert=True
+    )
+
+    # Update the `alldocs` collection, which is a cache used by the endpoint under test.
+    ensure_schema_collections_and_alldocs(force_refresh_of_alldocs=True)
+
+    # Confirm the endpoint responds with no data objects.
+    response = api_site_client.request("GET", f"/data_objects/study/{study_id}")
+    assert response.status_code == 200
+    data_objects_by_biosample = response.json()
+    assert len(data_objects_by_biosample) == 0
+
+    # Clean up: Delete the documents we created within this test, from the database.
+    mdb.get_collection(name="study_set").delete_one({"id": study_id})
+    mdb.get_collection(name="alldocs").delete_many({})
+
+
 def test_find_planned_processes(api_site_client):
     mdb = get_mongo_db()
     database_dict = json.loads(

From dd17e34628b207d3d42619b7c649bf9e6e6af93e Mon Sep 17 00:00:00 2001
From: eecavanna <eecavanna@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:43:46 -0800
Subject: [PATCH 02/11] Document time-saving tip about pointing pytest at a
 specific module

---
 Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile b/Makefile
index a3509b69..f458db60 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,8 @@ test-dbinit:
 	docker compose --file docker-compose.test.yml \
 		exec mongo /bin/bash -c "/mongorestore-nmdc-testdb.sh"
 
+# Tip: If you append a file path to this "recipe", pytest will run only the tests defined in that file.
+#      For example, append `tests/test_api/test_endpoints.py` to have pytest only run the endpoint tests.
 test-run:
 	docker compose --file docker-compose.test.yml run test
 

From 1877587daf2958a3e8c5e22f8f03bbd04b568550 Mon Sep 17 00:00:00 2001
From: eecavanna <eecavanna@users.noreply.github.com>
Date: Tue, 17 Dec 2024 18:11:17 -0800
Subject: [PATCH 03/11] Add test where specified Study has 1 DataObject via 1
 MassSpectrometry

---
 tests/test_api/test_endpoints.py | 75 ++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index 47868fd2..b2ace4f2 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -478,6 +478,81 @@ def test_find_data_objects_for_study_having_none(api_site_client):
     mdb.get_collection(name="alldocs").delete_many({})
 
 
+def test_find_data_objects_for_study_having_one(api_site_client):
+    # Seed the test database with a study having one associated data object.
+    mdb = get_mongo_db()
+    study_id = "nmdc:sty-00-studio"
+    study_dict = {
+        "id": study_id,
+        "type": "nmdc:Study",
+        "study_category": "research_study",
+    }
+    mdb.get_collection(name="study_set").replace_one(
+        {"id": study_id}, study_dict, upsert=True
+    )
+    biosample_id = "nmdc:bsm-00-campione"
+    biosample_dict = {
+            "id": biosample_id,
+            "type": "nmdc:Biosample",
+            "associated_studies": [study_id],
+            "env_broad_scale": {
+                "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+                "type": "nmdc:ControlledIdentifiedTermValue"
+            },
+            "env_local_scale": {
+                "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+                "type": "nmdc:ControlledIdentifiedTermValue"
+            },
+            "env_medium": {
+                "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+                "type": "nmdc:ControlledIdentifiedTermValue"
+            }
+        }
+    mdb.get_collection(name="biosample_set").replace_one(
+        {"id": biosample_id}, biosample_dict, upsert=True
+    )
+    data_object_id = "nmdc:dobj-00-oggetto"
+    data_object_dict = {
+        "id": data_object_id,
+        "name": "Some name",
+        "description": "Some description",
+        "type": "nmdc:DataObject",
+    }
+    mdb.get_collection(name="data_object_set").replace_one(
+        {"id": data_object_id}, data_object_dict, upsert=True
+    )
+    # Note: The `MassSpectrometry` class inherits from the (abstract) `DataGeneration` class.
+    # Reference: https://microbiomedata.github.io/nmdc-schema/MassSpectrometry/
+    mass_spectrometry_id = "nmdc:dgms-00-spettro"
+    mass_spectrometry_dict = {
+        "id": mass_spectrometry_id,
+        "type": "nmdc:MassSpectrometry",
+        "analyte_category": "metaproteome",
+        "associated_studies": [study_id],
+        "has_input": [biosample_id],
+        "has_output": [data_object_id],
+    }
+    mdb.get_collection(name="data_generation_set").replace_one(
+        {"id": mass_spectrometry_id}, mass_spectrometry_dict, upsert=True
+    )
+
+    # Update the `alldocs` collection, which is a cache used by the endpoint under test.
+    ensure_schema_collections_and_alldocs(force_refresh_of_alldocs=True)
+
+    # Confirm the endpoint responds with no data objects.
+    response = api_site_client.request("GET", f"/data_objects/study/{study_id}")
+    assert response.status_code == 200
+    data_objects_by_biosample = response.json()
+    assert len(data_objects_by_biosample) == 1
+
+    # Clean up: Delete the documents we created within this test, from the database.
+    mdb.get_collection(name="study_set").delete_one({"id": study_id})
+    mdb.get_collection(name="biosample_set").delete_one({"id": biosample_id})
+    mdb.get_collection(name="data_generation_set").delete_one({"id": mass_spectrometry_id})
+    mdb.get_collection(name="data_object_set").delete_one({"id": data_object_id})
+    mdb.get_collection(name="alldocs").delete_many({})
+
+
 def test_find_planned_processes(api_site_client):
     mdb = get_mongo_db()
     database_dict = json.loads(

From ebb037c37fdd9946861b16a630c8cc693b528754 Mon Sep 17 00:00:00 2001
From: eecavanna <eecavanna@users.noreply.github.com>
Date: Tue, 17 Dec 2024 18:25:54 -0800
Subject: [PATCH 04/11] Assert more details about the API response payload

---
 tests/test_api/test_endpoints.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index b2ace4f2..24114d6d 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -544,6 +544,9 @@ def test_find_data_objects_for_study_having_one(api_site_client):
     assert response.status_code == 200
     data_objects_by_biosample = response.json()
     assert len(data_objects_by_biosample) == 1
+    assert data_objects_by_biosample[0]["biosample_id"] == biosample_id
+    assert len(data_objects_by_biosample[0]["data_objects"]) == 1
+    assert data_objects_by_biosample[0]["data_objects"][0]["id"] == data_object_id
 
     # Clean up: Delete the documents we created within this test, from the database.
     mdb.get_collection(name="study_set").delete_one({"id": study_id})

From ca82e4283ca5086459ba943853cda4cecfdef61e Mon Sep 17 00:00:00 2001
From: eecavanna <eecavanna@users.noreply.github.com>
Date: Tue, 17 Dec 2024 18:26:40 -0800
Subject: [PATCH 05/11] Fix inaccurate comment

---
 tests/test_api/test_endpoints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index 24114d6d..f68e2330 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -539,7 +539,7 @@ def test_find_data_objects_for_study_having_one(api_site_client):
     # Update the `alldocs` collection, which is a cache used by the endpoint under test.
     ensure_schema_collections_and_alldocs(force_refresh_of_alldocs=True)
 
-    # Confirm the endpoint responds with no data objects.
+    # Confirm the endpoint responds with the data object we inserted above.
     response = api_site_client.request("GET", f"/data_objects/study/{study_id}")
     assert response.status_code == 200
     data_objects_by_biosample = response.json()

From e520bd0cf789f357a93701652b0e315bee50a6f2 Mon Sep 17 00:00:00 2001
From: Donny Winston <donny@polyneme.xyz>
Date: Thu, 19 Dec 2024 19:20:53 +0100
Subject: [PATCH 06/11] style: format

---
 tests/test_api/test_endpoints.py | 36 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index f68e2330..005d7030 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -492,22 +492,22 @@ def test_find_data_objects_for_study_having_one(api_site_client):
     )
     biosample_id = "nmdc:bsm-00-campione"
     biosample_dict = {
-            "id": biosample_id,
-            "type": "nmdc:Biosample",
-            "associated_studies": [study_id],
-            "env_broad_scale": {
-                "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
-                "type": "nmdc:ControlledIdentifiedTermValue"
-            },
-            "env_local_scale": {
-                "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
-                "type": "nmdc:ControlledIdentifiedTermValue"
-            },
-            "env_medium": {
-                "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
-                "type": "nmdc:ControlledIdentifiedTermValue"
-            }
-        }
+        "id": biosample_id,
+        "type": "nmdc:Biosample",
+        "associated_studies": [study_id],
+        "env_broad_scale": {
+            "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+            "type": "nmdc:ControlledIdentifiedTermValue",
+        },
+        "env_local_scale": {
+            "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+            "type": "nmdc:ControlledIdentifiedTermValue",
+        },
+        "env_medium": {
+            "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+            "type": "nmdc:ControlledIdentifiedTermValue",
+        },
+    }
     mdb.get_collection(name="biosample_set").replace_one(
         {"id": biosample_id}, biosample_dict, upsert=True
     )
@@ -551,7 +551,9 @@ def test_find_data_objects_for_study_having_one(api_site_client):
     # Clean up: Delete the documents we created within this test, from the database.
     mdb.get_collection(name="study_set").delete_one({"id": study_id})
     mdb.get_collection(name="biosample_set").delete_one({"id": biosample_id})
-    mdb.get_collection(name="data_generation_set").delete_one({"id": mass_spectrometry_id})
+    mdb.get_collection(name="data_generation_set").delete_one(
+        {"id": mass_spectrometry_id}
+    )
     mdb.get_collection(name="data_object_set").delete_one({"id": data_object_id})
     mdb.get_collection(name="alldocs").delete_many({})
 

From 48ec4bf83bb2da0a16c702472e4c97646ce6b9e9 Mon Sep 17 00:00:00 2001
From: Donny Winston <donny@polyneme.xyz>
Date: Thu, 19 Dec 2024 19:24:28 +0100
Subject: [PATCH 07/11] feat: clarify docstring; ensure indexes even if no
 refresh

---
 tests/test_api/test_endpoints.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index 005d7030..fb8df1d5 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -31,22 +31,21 @@
 
 def ensure_schema_collections_and_alldocs(force_refresh_of_alldocs: bool = False):
     r"""
-    This function can be used to ensure things (?) about schema-described collections and the "alldocs" collection.
+    This function can be used to ensure properties of schema-described collections and the "alldocs" collection.
 
     :param bool force_refresh_of_alldocs: Whether you want to force a refresh of the "alldocs" collection,
                                           regardless of whether it is empty of not. By default, this function
                                           will only refresh the "alldocs" collection if it is empty.
     """
-
-    # Return if `alldocs` collection has already been materialized, and caller does not want to force a refresh of it.
     mdb = get_mongo_db()
+    ensure_unique_id_indexes(mdb)
+    # Return if `alldocs` collection has already been materialized, and caller does not want to force a refresh of it.
     if mdb.alldocs.estimated_document_count() > 0 and not force_refresh_of_alldocs:
         print(
             "ensure_schema_collections_and_alldocs: `alldocs` collection already materialized"
         )
         return
 
-    ensure_unique_id_indexes(mdb)
     print("materializing alldocs...")
     materialize_alldocs(
         build_op_context(

From 59817c14e2e56fcaad48bf0c8cd27124a68b29d2 Mon Sep 17 00:00:00 2001
From: Donny Winston <donny@polyneme.xyz>
Date: Thu, 19 Dec 2024 19:24:53 +0100
Subject: [PATCH 08/11] Update tests/test_api/test_endpoints.py

Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com>
---
 tests/test_api/test_endpoints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index fb8df1d5..d7e62d8e 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -34,7 +34,7 @@ def ensure_schema_collections_and_alldocs(force_refresh_of_alldocs: bool = False
     This function can be used to ensure properties of schema-described collections and the "alldocs" collection.
 
     :param bool force_refresh_of_alldocs: Whether you want to force a refresh of the "alldocs" collection,
-                                          regardless of whether it is empty of not. By default, this function
+                                          regardless of whether it is empty or not. By default, this function
                                           will only refresh the "alldocs" collection if it is empty.
     """
     mdb = get_mongo_db()

From d7f6a53f6ff65f6b2a70b2781012a8def5e7843e Mon Sep 17 00:00:00 2001
From: Donny Winston <donny@polyneme.xyz>
Date: Thu, 19 Dec 2024 20:07:35 +0100
Subject: [PATCH 09/11] feat: use real example metadata

---
 Makefile                         |   2 +-
 tests/test_api/test_endpoints.py | 116 +++++++++++++++++++++----------
 2 files changed, 79 insertions(+), 39 deletions(-)

diff --git a/Makefile b/Makefile
index f458db60..3089f211 100644
--- a/Makefile
+++ b/Makefile
@@ -103,7 +103,7 @@ mongorestore-nmdc-db:
 	mkdir -p /tmp/remote-mongodump/nmdc
 	# Optionally, manually update MONGO_REMOTE_DUMP_DIR env var:
 	# ```bash
-	# export MONGO_REMOTE_DUMP_DIR=$(ssh -i ~/.ssh/nersc -q ${NERSC_USERNAME}@dtn01.nersc.gov 'bash -s ' < get_latest_nmdc_prod_dump_dir.sh 2>/dev/null)
+	# export MONGO_REMOTE_DUMP_DIR=$(ssh -i ~/.ssh/nersc -q ${NERSC_USERNAME}@dtn01.nersc.gov 'bash -s ' < util/get_latest_nmdc_prod_dump_dir.sh 2>/dev/null)
 	# ```
 	# Rsync the remote dump directory items of interest:
 	rsync -av --exclude='_*' --exclude='fs\.*' \
diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index d7e62d8e..148408a6 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -26,7 +26,7 @@
     mongo_resource,
     RuntimeApiUserClient,
 )
-from nmdc_runtime.util import REPO_ROOT_DIR, ensure_unique_id_indexes
+from nmdc_runtime.util import REPO_ROOT_DIR, ensure_unique_id_indexes, validate_json
 
 
 def ensure_schema_collections_and_alldocs(force_refresh_of_alldocs: bool = False):
@@ -459,6 +459,8 @@ def test_find_data_objects_for_study_having_none(api_site_client):
         "type": "nmdc:Study",
         "study_category": "research_study",
     }
+    assert validate_json({"study_set": [study_dict]}, mdb)["result"] != "errors"
+
     mdb.get_collection(name="study_set").replace_one(
         {"id": study_id}, study_dict, upsert=True
     )
@@ -480,60 +482,93 @@ def test_find_data_objects_for_study_having_none(api_site_client):
 def test_find_data_objects_for_study_having_one(api_site_client):
     # Seed the test database with a study having one associated data object.
     mdb = get_mongo_db()
-    study_id = "nmdc:sty-00-studio"
+    study_id = "nmdc:sty-11-r2h77870"
     study_dict = {
         "id": study_id,
         "type": "nmdc:Study",
         "study_category": "research_study",
     }
-    mdb.get_collection(name="study_set").replace_one(
-        {"id": study_id}, study_dict, upsert=True
-    )
-    biosample_id = "nmdc:bsm-00-campione"
+    fakes = set()
+    assert validate_json({"study_set": [study_dict]}, mdb)["result"] != "errors"
+    if mdb.get_collection(name="study_set").find_one({"id": study_id}) is None:
+        mdb.get_collection(name="study_set").insert_one(study_dict)
+        fakes.add("study")
+    biosample_id = "nmdc:bsm-11-6zd5nb38"
     biosample_dict = {
         "id": biosample_id,
-        "type": "nmdc:Biosample",
-        "associated_studies": [study_id],
         "env_broad_scale": {
-            "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+            "has_raw_value": "ENVO_00000446",
+            "term": {
+                "id": "ENVO:00000446",
+                "name": "terrestrial biome",
+                "type": "nmdc:OntologyClass",
+            },
             "type": "nmdc:ControlledIdentifiedTermValue",
         },
         "env_local_scale": {
-            "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+            "has_raw_value": "ENVO_00005801",
+            "term": {
+                "id": "ENVO:00005801",
+                "name": "rhizosphere",
+                "type": "nmdc:OntologyClass",
+            },
             "type": "nmdc:ControlledIdentifiedTermValue",
         },
         "env_medium": {
-            "term": {"type": "nmdc:OntologyClass", "id": "ENVO:000000"},
+            "has_raw_value": "ENVO_00001998",
+            "term": {
+                "id": "ENVO:00001998",
+                "name": "soil",
+                "type": "nmdc:OntologyClass",
+            },
             "type": "nmdc:ControlledIdentifiedTermValue",
         },
+        "type": "nmdc:Biosample",
+        "associated_studies": [study_id],
     }
-    mdb.get_collection(name="biosample_set").replace_one(
-        {"id": biosample_id}, biosample_dict, upsert=True
+    assert validate_json({"biosample_set": [biosample_dict]}, mdb)["result"] != "errors"
+    if mdb.get_collection(name="biosample_set").find_one({"id": biosample_id}) is None:
+        mdb.get_collection(name="biosample_set").insert_one(biosample_dict)
+        fakes.add("biosample")
+
+    data_generation_id = "nmdc:omprc-11-nmtj1g51"
+    data_generation_dict = {
+        "id": data_generation_id,
+        "has_input": [biosample_id],
+        "type": "nmdc:NucleotideSequencing",
+        "analyte_category": "metagenome",
+        "associated_studies": [study_id],
+    }
+    assert (
+        validate_json({"data_generation_set": [data_generation_dict]}, mdb)["result"]
+        != "errors"
     )
-    data_object_id = "nmdc:dobj-00-oggetto"
+    if (
+        mdb.get_collection(name="data_generation_set").find_one(
+            {"id": data_generation_id}
+        )
+        is None
+    ):
+        mdb.get_collection(name="data_generation_set").insert_one(data_generation_dict)
+        fakes.add("data_generation")
+
+    data_object_id = "nmdc:dobj-11-cpv4y420"
     data_object_dict = {
         "id": data_object_id,
-        "name": "Some name",
-        "description": "Some description",
+        "name": "Raw sequencer read data",
+        "description": "Metagenome Raw Reads for nmdc:omprc-11-nmtj1g51",
         "type": "nmdc:DataObject",
     }
-    mdb.get_collection(name="data_object_set").replace_one(
-        {"id": data_object_id}, data_object_dict, upsert=True
-    )
-    # Note: The `MassSpectrometry` class inherits from the (abstract) `DataGeneration` class.
-    # Reference: https://microbiomedata.github.io/nmdc-schema/MassSpectrometry/
-    mass_spectrometry_id = "nmdc:dgms-00-spettro"
-    mass_spectrometry_dict = {
-        "id": mass_spectrometry_id,
-        "type": "nmdc:MassSpectrometry",
-        "analyte_category": "metaproteome",
-        "associated_studies": [study_id],
-        "has_input": [biosample_id],
-        "has_output": [data_object_id],
-    }
-    mdb.get_collection(name="data_generation_set").replace_one(
-        {"id": mass_spectrometry_id}, mass_spectrometry_dict, upsert=True
+    assert (
+        validate_json({"data_object_set": [data_object_dict]}, mdb)["result"]
+        != "errors"
     )
+    if (
+        mdb.get_collection(name="data_object_set").find_one({"id": data_object_id})
+        is None
+    ):
+        mdb.get_collection(name="data_object_set").insert_one(data_object_dict)
+        fakes.add("data_object")
 
     # Update the `alldocs` collection, which is a cache used by the endpoint under test.
     ensure_schema_collections_and_alldocs(force_refresh_of_alldocs=True)
@@ -548,12 +583,17 @@ def test_find_data_objects_for_study_having_one(api_site_client):
     assert data_objects_by_biosample[0]["data_objects"][0]["id"] == data_object_id
 
     # Clean up: Delete the documents we created within this test, from the database.
-    mdb.get_collection(name="study_set").delete_one({"id": study_id})
-    mdb.get_collection(name="biosample_set").delete_one({"id": biosample_id})
-    mdb.get_collection(name="data_generation_set").delete_one(
-        {"id": mass_spectrometry_id}
-    )
-    mdb.get_collection(name="data_object_set").delete_one({"id": data_object_id})
+    if "study" in fakes:
+        mdb.get_collection(name="study_set").delete_one({"id": study_id})
+    if "biosample" in fakes:
+        mdb.get_collection(name="biosample_set").delete_one({"id": biosample_id})
+    if "data_generation":
+        mdb.get_collection(name="data_generation_set").delete_one(
+            {"id": data_generation_id}
+        )
+    if "data_object" in fakes:
+        mdb.get_collection(name="data_object_set").delete_one({"id": data_object_id})
+
     mdb.get_collection(name="alldocs").delete_many({})
 
 

From 13585782f2996db0ab5d71f77bfdfa927ac58b51 Mon Sep 17 00:00:00 2001
From: Donny Winston <donny@polyneme.xyz>
Date: Thu, 19 Dec 2024 20:25:39 +0100
Subject: [PATCH 10/11] fix: more resilient assertion

---
 tests/test_api/test_endpoints.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index 148408a6..f525cb45 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -577,10 +577,13 @@ def test_find_data_objects_for_study_having_one(api_site_client):
     response = api_site_client.request("GET", f"/data_objects/study/{study_id}")
     assert response.status_code == 200
     data_objects_by_biosample = response.json()
-    assert len(data_objects_by_biosample) == 1
-    assert data_objects_by_biosample[0]["biosample_id"] == biosample_id
-    assert len(data_objects_by_biosample[0]["data_objects"]) == 1
-    assert data_objects_by_biosample[0]["data_objects"][0]["id"] == data_object_id
+    assert any(
+        biosample_data_objects["biosample_id"] == biosample_id
+        and any(
+            do["id"] == data_object_id for do in biosample_data_objects["data_objects"]
+        )
+        for biosample_data_objects in data_objects_by_biosample
+    )
 
     # Clean up: Delete the documents we created within this test, from the database.
     if "study" in fakes:

From d98ddfac7cb913cb8d0d50c681bc2d54631a00c2 Mon Sep 17 00:00:00 2001
From: Donny Winston <donny@polyneme.xyz>
Date: Thu, 19 Dec 2024 21:25:53 +0100
Subject: [PATCH 11/11] fix: add workflow_execution

---
 tests/test_api/test_endpoints.py | 33 ++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index f525cb45..26e5ce66 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -570,6 +570,35 @@ def test_find_data_objects_for_study_having_one(api_site_client):
         mdb.get_collection(name="data_object_set").insert_one(data_object_dict)
         fakes.add("data_object")
 
+    workflow_execution_id = "nmdc:wfmsa-11-fqq66x60.1"
+    workflow_execution_dict = {
+        "id": workflow_execution_id,
+        "started_at_time": "2023-03-24T02:02:59.479107+00:00",
+        "ended_at_time": "2023-03-24T02:02:59.479129+00:00",
+        "was_informed_by": data_generation_id,
+        "execution_resource": "JGI",
+        "git_url": "https://github.com/microbiomedata/RawSequencingData",
+        "has_input": [biosample_id],
+        "has_output": [data_object_id],
+        "type": "nmdc:MetagenomeSequencing",
+    }
+    assert (
+        validate_json({"workflow_execution_set": [workflow_execution_dict]}, mdb)[
+            "result"
+        ]
+        != "errors"
+    )
+    if (
+        mdb.get_collection(name="workflow_execution_set").find_one(
+            {"id": workflow_execution_id}
+        )
+        is None
+    ):
+        mdb.get_collection(name="workflow_execution_set").insert_one(
+            workflow_execution_dict
+        )
+        fakes.add("workflow_execution")
+
     # Update the `alldocs` collection, which is a cache used by the endpoint under test.
     ensure_schema_collections_and_alldocs(force_refresh_of_alldocs=True)
 
@@ -596,6 +625,10 @@ def test_find_data_objects_for_study_having_one(api_site_client):
         )
     if "data_object" in fakes:
         mdb.get_collection(name="data_object_set").delete_one({"id": data_object_id})
+    if "workflow_execution" in fakes:
+        mdb.get_collection(name="workflow_execution_set").delete_one(
+            {"id": workflow_execution_id}
+        )
 
     mdb.get_collection(name="alldocs").delete_many({})