Skip to content

Commit

Permalink
fix tests for soil and benthic water ingest pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
sujaypatil96 committed Jan 22, 2024
1 parent 8d9e279 commit f3b5897
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 15 deletions.
35 changes: 25 additions & 10 deletions tests/test_data/test_neon_benthic_data_translator.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from io import StringIO
import pytest
from nmdc_runtime.site.translation.neon_benthic_translator import (
NeonBenthicDataTranslator,
)
import pandas as pd

from linkml_runtime.dumpers import json_dumper
from nmdc_runtime.util import validate_json
from tests.conftest import get_mongo_test_db

# Mock data for testing
benthic_data = {
"mms_benthicMetagenomeSequencing": pd.DataFrame(
Expand Down Expand Up @@ -131,11 +128,34 @@
),
}

def neon_envo_mappings_file():
tsv_data = """neon_nlcd_value\tmrlc_edomvd_before_hyphen\tmrlc_edomv\tenvo_alt_id\tenvo_id\tenvo_label\tenv_local_scale\tsubCLassOf and part of path to biome\tother justification\tbiome_label\tbiome_id\tenv_broad_scale
deciduousForest\tDeciduous Forest\t41\tNLCD:41\tENVO:01000816\tarea of deciduous forest\tarea of deciduous forest [ENVO:01000816]\t --subCLassOf-->terretrial environmental zone--part of-->\t\tterrestrial biome\tENVO:00000448\tterrestrial biome [ENVO:00000448]"""

return pd.read_csv(StringIO(tsv_data), delimiter="\t")


def neon_raw_data_file_mappings_file():
tsv_data_dna = """dnaSampleID\tsequencerRunID\tinternalLabID\trawDataFileName\trawDataFileDescription\trawDataFilePath\tcheckSum
WLOU.20180726.AMC.EPILITHON.1-DNA1\tHWVWKBGX7\tAquaticPlate6WellA5\tBMI_HWVWKBGX7_AquaticPlate6WellA5_R2.fastq.gz\tR2 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2023/BMI_HWVWKBGX7_mms_R2/BMI_HWVWKBGX7_AquaticPlate6WellA5_R2.fastq.gz\t16c11600c77818979b11a05ce7899d6c
WLOU.20180726.AMC.EPILITHON.1-DNA1\tHWVWKBGX7\tAquaticPlate6WellA5\tBMI_HWVWKBGX7_AquaticPlate6WellA5_R1.fastq.gz\tR1 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2023/BMI_HWVWKBGX7_mms_R1/BMI_HWVWKBGX7_AquaticPlate6WellA5_R1.fastq.gz\t378052f3aeb3d587e3f94588247e7bda"""

return pd.read_csv(StringIO(tsv_data_dna), delimiter="\t")


def site_code_mapping():
return {"WLOU": "USA: Colorado, West St Louis Creek"}


class TestNeonBenthicDataTranslator:
@pytest.fixture
def translator(self, test_minter):
return NeonBenthicDataTranslator(benthic_data, id_minter=test_minter)
return NeonBenthicDataTranslator(benthic_data=benthic_data,
site_code_mapping=site_code_mapping(),
neon_envo_mappings_file=neon_envo_mappings_file(),
neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(),
id_minter=test_minter
)

def test_get_database(self, translator):
database = translator.get_database()
Expand Down Expand Up @@ -183,8 +203,3 @@ def test_get_database(self, translator):
for omics_processing in omics_processing_list:
omics_processing_input = omics_processing.has_input
assert omics_processing_input == lib_prep_output

mongo_db = get_mongo_test_db()
validation_result = validate_json(json_dumper.to_dict(database), mongo_db)
assert validation_result == {"result": "All Okay!"}

30 changes: 25 additions & 5 deletions tests/test_data/test_neon_soil_data_translator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from io import StringIO
import pytest
from nmdc_runtime.site.translation.neon_soil_translator import NeonSoilDataTranslator
from nmdc_runtime.site.translation.neon_utils import (_create_controlled_identified_term_value, _create_controlled_term_value, _create_timestamp_value, _get_value_or_none)
Expand Down Expand Up @@ -772,23 +773,42 @@
),
}

def neon_envo_mappings_file():
tsv_data = """neon_nlcd_value\tmrlc_edomvd_before_hyphen\tmrlc_edomv\tenvo_alt_id\tenvo_id\tenvo_label\tenv_local_scale\tsubCLassOf and part of path to biome\tother justification\tbiome_label\tbiome_id\tenv_broad_scale
deciduousForest\tDeciduous Forest\t41\tNLCD:41\tENVO:01000816\tarea of deciduous forest\tarea of deciduous forest [ENVO:01000816]\t --subCLassOf-->terretrial environmental zone--part of-->\t\tterrestrial biome\tENVO:00000448\tterrestrial biome [ENVO:00000448]"""

return pd.read_csv(StringIO(tsv_data), delimiter="\t")


def neon_raw_data_file_mappings_file():
tsv_data_dna = """dnaSampleID\tsequencerRunID\tinternalLabID\trawDataFileName\trawDataFileDescription\trawDataFilePath\tcheckSum
BLAN_005-M-20200713-COMP-DNA1\tHVT2HBGXJ\t20S_08_0661\tBMI_HVT2HBGXJ_20S_08_0661_R1.fastq.gz\tR1 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2021/BMI_HVT2HBGXJ_mms_R1/BMI_HVT2HBGXJ_20S_08_0661_R1.fastq.gz\t8b5794e91b1e79e02f1a3e7ef53a73b3
BLAN_005-M-20200713-COMP-DNA1\tHVT2HBGXJ\t20S_08_0661\tBMI_HVT2HBGXJ_20S_08_0661_R2.fastq.gz\tR2 metagenomic archive of fastq files\thttps://storage.neonscience.org/neon-microbial-raw-seq-files/2021/BMI_HVT2HBGXJ_mms_R2/BMI_HVT2HBGXJ_20S_08_0661_R2.fastq.gz\t44dc66147143a6eb1e806defa7f3706e"""

return pd.read_csv(StringIO(tsv_data_dna), delimiter="\t")


class TestNeonDataTranslator:
@pytest.fixture
def translator(self, test_minter):
return NeonSoilDataTranslator(mms_data, sls_data, id_minter=test_minter)
return NeonSoilDataTranslator(mms_data=mms_data,
sls_data=sls_data,
neon_envo_mappings_file=neon_envo_mappings_file(),
neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(),
id_minter=test_minter
)

def test_missing_mms_table(self):
def test_missing_mms_table(self, test_minter):
# Test behavior when mms data is missing a table
with pytest.raises(
ValueError, match="missing one of the metagenomic microbe soil tables"
):
NeonSoilDataTranslator({}, sls_data)
NeonSoilDataTranslator({}, sls_data, neon_envo_mappings_file(), neon_raw_data_file_mappings_file(), id_minter=test_minter)

def test_missing_sls_table(self):
def test_missing_sls_table(self, test_minter):
# Test behavior when sls data is missing a table
with pytest.raises(ValueError, match="missing one of the soil periodic tables"):
NeonSoilDataTranslator(mms_data, {})
NeonSoilDataTranslator(mms_data, {}, neon_envo_mappings_file(), neon_raw_data_file_mappings_file(), id_minter=test_minter)

def test_get_value_or_none(self):
# use one biosample record to test this method
Expand Down

0 comments on commit f3b5897

Please sign in to comment.