From f3b5897c1c10527cdd39469b639caf2ba5fa1998 Mon Sep 17 00:00:00 2001 From: Sujay Patil Date: Mon, 22 Jan 2024 15:30:25 -0800 Subject: [PATCH] fix tests for soil and benthic water ingest pipelines --- .../test_neon_benthic_data_translator.py | 35 +++++++++++++------ .../test_neon_soil_data_translator.py | 30 +++++++++++++--- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/tests/test_data/test_neon_benthic_data_translator.py b/tests/test_data/test_neon_benthic_data_translator.py index 700b4fb0..6350b79b 100644 --- a/tests/test_data/test_neon_benthic_data_translator.py +++ b/tests/test_data/test_neon_benthic_data_translator.py @@ -1,13 +1,10 @@ +from io import StringIO import pytest from nmdc_runtime.site.translation.neon_benthic_translator import ( NeonBenthicDataTranslator, ) import pandas as pd -from linkml_runtime.dumpers import json_dumper -from nmdc_runtime.util import validate_json -from tests.conftest import get_mongo_test_db - # Mock data for testing benthic_data = { "mms_benthicMetagenomeSequencing": pd.DataFrame( @@ -131,11 +128,34 @@ ), } +def neon_envo_mappings_file(): + tsv_data = """neon_nlcd_value\tmrlc_edomvd_before_hyphen\tmrlc_edomv\tenvo_alt_id\tenvo_id\tenvo_label\tenv_local_scale\tsubCLassOf and part of path to biome\tother justification\tbiome_label\tbiome_id\tenv_broad_scale +deciduousForest\tDeciduous Forest\t41\tNLCD:41\tENVO:01000816\tarea of deciduous forest\tarea of deciduous forest [ENVO:01000816]\t --subCLassOf-->terretrial environmental zone--part of-->\t\tterrestrial biome\tENVO:00000448\tterrestrial biome [ENVO:00000448]""" + + return pd.read_csv(StringIO(tsv_data), delimiter="\t") + + +def neon_raw_data_file_mappings_file(): + tsv_data_dna = """dnaSampleID\tsequencerRunID\tinternalLabID\trawDataFileName\trawDataFileDescription\trawDataFilePath\tcheckSum +WLOU.20180726.AMC.EPILITHON.1-DNA1\tHWVWKBGX7\tAquaticPlate6WellA5\tBMI_HWVWKBGX7_AquaticPlate6WellA5_R2.fastq.gz\tR2 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2023/BMI_HWVWKBGX7_mms_R2/BMI_HWVWKBGX7_AquaticPlate6WellA5_R2.fastq.gz\t16c11600c77818979b11a05ce7899d6c +WLOU.20180726.AMC.EPILITHON.1-DNA1\tHWVWKBGX7\tAquaticPlate6WellA5\tBMI_HWVWKBGX7_AquaticPlate6WellA5_R1.fastq.gz\tR1 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2023/BMI_HWVWKBGX7_mms_R1/BMI_HWVWKBGX7_AquaticPlate6WellA5_R1.fastq.gz\t378052f3aeb3d587e3f94588247e7bda""" + + return pd.read_csv(StringIO(tsv_data_dna), delimiter="\t") + + +def site_code_mapping(): + return {"WLOU": "USA: Colorado, West St Louis Creek"} + class TestNeonBenthicDataTranslator: @pytest.fixture def translator(self, test_minter): - return NeonBenthicDataTranslator(benthic_data, id_minter=test_minter) + return NeonBenthicDataTranslator(benthic_data=benthic_data, + site_code_mapping=site_code_mapping(), + neon_envo_mappings_file=neon_envo_mappings_file(), + neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(), + id_minter=test_minter + ) def test_get_database(self, translator): database = translator.get_database() @@ -183,8 +203,3 @@ def test_get_database(self, translator): for omics_processing in omics_processing_list: omics_processing_input = omics_processing.has_input assert omics_processing_input == lib_prep_output - - mongo_db = get_mongo_test_db() - validation_result = validate_json(json_dumper.to_dict(database), mongo_db) - assert validation_result == {"result": "All Okay!"} - \ No newline at end of file diff --git a/tests/test_data/test_neon_soil_data_translator.py b/tests/test_data/test_neon_soil_data_translator.py index e63e24f2..2e9ce938 100644 --- a/tests/test_data/test_neon_soil_data_translator.py +++ b/tests/test_data/test_neon_soil_data_translator.py @@ -1,3 +1,4 @@ +from io import StringIO import pytest from nmdc_runtime.site.translation.neon_soil_translator import NeonSoilDataTranslator from nmdc_runtime.site.translation.neon_utils import (_create_controlled_identified_term_value, _create_controlled_term_value, _create_timestamp_value, _get_value_or_none) @@ -772,23 +773,42 @@ ), } +def neon_envo_mappings_file(): + tsv_data = """neon_nlcd_value\tmrlc_edomvd_before_hyphen\tmrlc_edomv\tenvo_alt_id\tenvo_id\tenvo_label\tenv_local_scale\tsubCLassOf and part of path to biome\tother justification\tbiome_label\tbiome_id\tenv_broad_scale +deciduousForest\tDeciduous Forest\t41\tNLCD:41\tENVO:01000816\tarea of deciduous forest\tarea of deciduous forest [ENVO:01000816]\t --subCLassOf-->terretrial environmental zone--part of-->\t\tterrestrial biome\tENVO:00000448\tterrestrial biome [ENVO:00000448]""" + + return pd.read_csv(StringIO(tsv_data), delimiter="\t") + + +def neon_raw_data_file_mappings_file(): + tsv_data_dna = """dnaSampleID\tsequencerRunID\tinternalLabID\trawDataFileName\trawDataFileDescription\trawDataFilePath\tcheckSum +BLAN_005-M-20200713-COMP-DNA1\tHVT2HBGXJ\t20S_08_0661\tBMI_HVT2HBGXJ_20S_08_0661_R1.fastq.gz\tR1 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2021/BMI_HVT2HBGXJ_mms_R1/BMI_HVT2HBGXJ_20S_08_0661_R1.fastq.gz\t8b5794e91b1e79e02f1a3e7ef53a73b3 +BLAN_005-M-20200713-COMP-DNA1\tHVT2HBGXJ\t20S_08_0661\tBMI_HVT2HBGXJ_20S_08_0661_R2.fastq.gz\tR2 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2021/BMI_HVT2HBGXJ_mms_R2/BMI_HVT2HBGXJ_20S_08_0661_R2.fastq.gz\t44dc66147143a6eb1e806defa7f3706e""" + + return pd.read_csv(StringIO(tsv_data_dna), delimiter="\t") + class TestNeonDataTranslator: @pytest.fixture def translator(self, test_minter): - return NeonSoilDataTranslator(mms_data, sls_data, id_minter=test_minter) + return NeonSoilDataTranslator(mms_data=mms_data, + sls_data=sls_data, + neon_envo_mappings_file=neon_envo_mappings_file(), + neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(), + id_minter=test_minter + ) - def test_missing_mms_table(self): + def test_missing_mms_table(self, test_minter): # Test behavior when mms data is missing a table with pytest.raises( ValueError, match="missing one of the metagenomic microbe soil tables" ): - NeonSoilDataTranslator({}, sls_data) + NeonSoilDataTranslator({}, sls_data, neon_envo_mappings_file(), neon_raw_data_file_mappings_file(), id_minter=test_minter) - def test_missing_sls_table(self): + def test_missing_sls_table(self, test_minter): # Test behavior when sls data is missing a table with pytest.raises(ValueError, match="missing one of the soil periodic tables"): - NeonSoilDataTranslator(mms_data, {}) + NeonSoilDataTranslator(mms_data, {}, neon_envo_mappings_file(), neon_raw_data_file_mappings_file(), id_minter=test_minter) def test_get_value_or_none(self): # use one biosample record to test this method