Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding support for importing samtools/bcftools stats (#1139) #1140

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 75 additions & 32 deletions cases_import/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from cases_import.proto import Assay, FileDesignation, get_case_name_from_family_payload
from cases_qc.io import dragen as io_dragen
from cases_qc.io import samtools as io_samtools
from cases_qc.models import CaseQc
from seqmeta.models import TargetBedFile
from varfish.utils import JSONField
Expand Down Expand Up @@ -341,18 +342,14 @@ class DragenQcImportExecutor(FileImportExecutorBase):
def __init__(self, case: Case):
super().__init__(case.project)
self.case = case
#: Map the extended detailed type to the handler function.
self.handlers = {
"x-dragen-qc-cnv-metrics": self._import_dragen_qc_cnv_metrics,
#: Map the extended detailed type to the handler function, per-sample.
self.handlers_individual = {
"x-dragen-qc-fragment-length-hist": self._import_dragen_qc_fragment_length_hist,
"x-dragen-qc-mapping-metrics": self._import_dragen_qc_mapping_metrics,
"x-dragen-qc-ploidy-estimation-metrics": self._import_dragen_qc_ploidy_estimation_metrics,
"x-dragen-qc-roh-metrics": self._import_dragen_qc_roh_metrics,
"x-dragen-qc-sv-metrics": self._import_dragen_qc_sv_metrics,
"x-dragen-qc-time-metrics": self._import_dragen_qc_time_metrics,
"x-dragen-qc-trimmer-metrics": self._import_dragen_qc_trimmer_metrics,
"x-dragen-qc-vc-hethom-ratio-metrics": self._import_dragen_qc_vc_hethom_ratio_metrics,
"x-dragen-qc-vc-metrics": self._import_dragen_qc_vc_metrics,
"x-dragen-qc-wgs-contig-mean-cov": self._import_dragen_qc_wgs_contig_mean_cov,
"x-dragen-qc-wgs-coverage-metrics": self._import_dragen_qc_wgs_coverage_metrics,
"x-dragen-qc-wgs-fine-hist": self._import_dragen_qc_wgs_fine_hist,
Expand All @@ -362,17 +359,33 @@ def __init__(self, case: Case):
"x-dragen-qc-region-coverage-fine-hist": self._import_dragen_qc_region_coverage_fine_hist,
"x-dragen-qc-region-coverage-hist": self._import_dragen_qc_region_coverage_hist,
"x-dragen-qc-region-coverage-overall-mean-cov": self._import_dragen_qc_region_coverage_overall_mean_cov,
"x-samtools-qc-samtools-flagstat": self._import_samtools_qc_samtools_flagstat,
"x-samtools-qc-samtools-idxstats": self._import_samtools_qc_samtools_idxstats,
"x-samtools-qc-samtools-stats": self._import_samtools_qc_samtools_stats,
}
#: Map the extended detailed type to the handler function, for pedigree.
self.handlers_pedigree = {
"x-dragen-qc-cnv-metrics": self._import_dragen_qc_cnv_metrics,
"x-dragen-qc-sv-metrics": self._import_dragen_qc_sv_metrics,
"x-dragen-qc-vc-hethom-ratio-metrics": self._import_dragen_qc_vc_hethom_ratio_metrics,
"x-dragen-qc-vc-metrics": self._import_dragen_qc_vc_metrics,
"x-samtools-qc-bcftools-stats": self._import_samtools_qc_bcftools_stats,
}

def run(self):
caseqc = CaseQc.objects.create(case=self.case)
pedigree = self.case.pedigree_obj
for external_file in pedigree.pedigreeexternalfile_set.all():
self._import_externalfile(external_file, caseqc)
for individual in pedigree.individual_set.all():
for external_file in IndividualExternalFile.objects.filter(individual=individual):
self._import_external_file(individual.name, external_file, caseqc)
self._import_externalfile(external_file, caseqc, individual_name=individual.name)

def _import_external_file(
self, individual_name: str, external_file: IndividualExternalFile, caseqc: CaseQc
def _import_externalfile(
self,
external_file: IndividualExternalFile,
caseqc: CaseQc,
individual_name: str | None = None,
):
"""Import quality metrics from external file, if any.

Expand All @@ -389,21 +402,19 @@ def _import_external_file(
return # no detailed type

base_mimetype, x_detailed_type = mimetype.split("+", 1)
if x_detailed_type not in self.handlers:
maps = (self.handlers_individual, self.handlers_pedigree)
if not any(x_detailed_type in map for map in maps):
return # no handler configured

if base_mimetype != "text/csv":
return # only CSV supported

self.handlers[x_detailed_type](individual_name, external_file, caseqc)
if x_detailed_type in self.handlers_individual:
self.handlers_individual[x_detailed_type](individual_name, external_file, caseqc)
else:
assert x_detailed_type in self.handlers_pedigree
self.handlers_pedigree[x_detailed_type](external_file, caseqc)

def _import_dragen_qc_cnv_metrics(
self, individual_name: str, external_file: IndividualExternalFile, caseqc: CaseQc
):
sample_name = external_file.identifier_map.get(individual_name, individual_name)
def _import_dragen_qc_cnv_metrics(self, external_file: PedigreeExternalFile, caseqc: CaseQc):
with self.fs.open(external_file.path, "rt") as inputf:
io_dragen.load_cnv_metrics(
sample=sample_name,
input_file=inputf,
caseqc=caseqc,
)
Expand Down Expand Up @@ -452,13 +463,9 @@ def _import_dragen_qc_roh_metrics(
caseqc=caseqc,
)

def _import_dragen_qc_sv_metrics(
self, individual_name: str, external_file: IndividualExternalFile, caseqc: CaseQc
):
sample_name = external_file.identifier_map.get(individual_name, individual_name)
def _import_dragen_qc_sv_metrics(self, external_file: PedigreeExternalFile, caseqc: CaseQc):
with self.fs.open(external_file.path, "rt") as inputf:
io_dragen.load_sv_metrics(
sample=sample_name,
input_file=inputf,
caseqc=caseqc,
)
Expand Down Expand Up @@ -486,23 +493,17 @@ def _import_dragen_qc_trimmer_metrics(
)

def _import_dragen_qc_vc_hethom_ratio_metrics(
self, individual_name: str, external_file: IndividualExternalFile, caseqc: CaseQc
self, external_file: PedigreeExternalFile, caseqc: CaseQc
):
sample_name = external_file.identifier_map.get(individual_name, individual_name)
with self.fs.open(external_file.path, "rt") as inputf:
io_dragen.load_vc_hethom_ratio_metrics(
sample=sample_name,
input_file=inputf,
caseqc=caseqc,
)

def _import_dragen_qc_vc_metrics(
self, individual_name: str, external_file: IndividualExternalFile, caseqc: CaseQc
):
sample_name = external_file.identifier_map.get(individual_name, individual_name)
def _import_dragen_qc_vc_metrics(self, external_file: PedigreeExternalFile, caseqc: CaseQc):
with self.fs.open(external_file.path, "rt") as inputf:
io_dragen.load_vc_metrics(
sample=sample_name,
input_file=inputf,
caseqc=caseqc,
)
Expand Down Expand Up @@ -614,6 +615,48 @@ def _import_dragen_qc_region_coverage_overall_mean_cov(
caseqc=caseqc,
)

def _import_samtools_qc_bcftools_stats(
self, external_file: PedigreeExternalFile, caseqc: CaseQc
):
with self.fs.open(external_file.path, "rt") as inputf:
io_samtools.load_bcftools_stats(
input_file=inputf,
caseqc=caseqc,
)

def _import_samtools_qc_samtools_flagstat(
self, individual_name: str, external_file: PedigreeExternalFile, caseqc: CaseQc
):
sample_name = external_file.identifier_map.get(individual_name, individual_name)
with self.fs.open(external_file.path, "rt") as inputf:
io_samtools.load_samtools_flagstat(
sample=sample_name,
input_file=inputf,
caseqc=caseqc,
)

def _import_samtools_qc_samtools_idxstats(
self, individual_name: str, external_file: PedigreeExternalFile, caseqc: CaseQc
):
sample_name = external_file.identifier_map.get(individual_name, individual_name)
with self.fs.open(external_file.path, "rt") as inputf:
io_samtools.load_samtools_idxstats(
sample=sample_name,
input_file=inputf,
caseqc=caseqc,
)

def _import_samtools_qc_samtools_stats(
self, individual_name: str, external_file: PedigreeExternalFile, caseqc: CaseQc
):
sample_name = external_file.identifier_map.get(individual_name, individual_name)
with self.fs.open(external_file.path, "rt") as inputf:
io_samtools.load_samtools_stats(
sample=sample_name,
input_file=inputf,
caseqc=caseqc,
)


class CaseImportBackgroundJobExecutor:
"""Implementation of ``CaseImportBackgroundJob`` execution."""
Expand Down
4 changes: 2 additions & 2 deletions cases_import/tests/data/singleton_dragen_qc.yaml
Git LFS file not shown
3 changes: 3 additions & 0 deletions cases_import/tests/data/singleton_samtools_qc.yaml
Git LFS file not shown
92 changes: 76 additions & 16 deletions cases_import/tests/test_models_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ def setUp(self):
@mock.patch("cases_qc.io.dragen.load_region_overall_mean_cov")
def test_run(
self,
load_region_overall_mean_cov,
load_region_hist,
load_region_fine_hist,
load_region_coverage_metrics,
mock_load_region_overall_mean_cov,
mock_load_region_hist,
mock_load_region_fine_hist,
mock_load_region_coverage_metrics,
mock_load_wgs_overall_mean_cov,
mock_load_wgs_hist_metrics,
mock_load_wgs_fine_hist,
Expand Down Expand Up @@ -133,7 +133,6 @@ def test_run(
caseqc = CaseQc.objects.first()

mock_load_cnv_metrics.assert_called_once_with(
sample="NA12878-PCRF450-1",
input_file=mock.ANY,
caseqc=caseqc,
)
Expand Down Expand Up @@ -183,7 +182,6 @@ def test_run(
)

mock_load_sv_metrics.assert_called_once_with(
sample="NA12878-PCRF450-1",
input_file=mock.ANY,
caseqc=caseqc,
)
Expand Down Expand Up @@ -213,7 +211,6 @@ def test_run(
)

mock_load_vc_hethom_ratio_metrics.assert_called_once_with(
sample="NA12878-PCRF450-1",
input_file=mock.ANY,
caseqc=caseqc,
)
Expand All @@ -223,7 +220,6 @@ def test_run(
)

mock_load_vc_metrics.assert_called_once_with(
sample="NA12878-PCRF450-1",
input_file=mock.ANY,
caseqc=caseqc,
)
Expand Down Expand Up @@ -282,55 +278,119 @@ def test_run(
os.path.realpath("cases_qc/tests/data/sample.wgs_overall_mean_cov.csv"),
)

load_region_overall_mean_cov.assert_called_once_with(
mock_load_region_overall_mean_cov.assert_called_once_with(
sample="NA12878-PCRF450-1",
region_name="region-3",
input_file=mock.ANY,
caseqc=caseqc,
)
self.assertEqual(
load_region_overall_mean_cov.call_args[1]["input_file"].name,
mock_load_region_overall_mean_cov.call_args[1]["input_file"].name,
os.path.realpath(
"cases_qc/tests/data/sample.qc-coverage-region-3_overall_mean_cov.csv"
),
)

load_region_hist.assert_called_once_with(
mock_load_region_hist.assert_called_once_with(
sample="NA12878-PCRF450-1",
region_name="region-3",
input_file=mock.ANY,
caseqc=caseqc,
)
self.assertEqual(
load_region_hist.call_args[1]["input_file"].name,
mock_load_region_hist.call_args[1]["input_file"].name,
os.path.realpath("cases_qc/tests/data/sample.qc-coverage-region-3_hist.csv"),
)

load_region_fine_hist.assert_called_once_with(
mock_load_region_fine_hist.assert_called_once_with(
sample="NA12878-PCRF450-1",
region_name="region-3",
input_file=mock.ANY,
caseqc=caseqc,
)
self.assertEqual(
load_region_fine_hist.call_args[1]["input_file"].name,
mock_load_region_fine_hist.call_args[1]["input_file"].name,
os.path.realpath("cases_qc/tests/data/sample.qc-coverage-region-3_fine_hist.csv"),
)

load_region_coverage_metrics.assert_called_once_with(
mock_load_region_coverage_metrics.assert_called_once_with(
sample="NA12878-PCRF450-1",
region_name="region-3",
input_file=mock.ANY,
caseqc=caseqc,
)
self.assertEqual(
load_region_coverage_metrics.call_args[1]["input_file"].name,
mock_load_region_coverage_metrics.call_args[1]["input_file"].name,
os.path.realpath(
"cases_qc/tests/data/sample.qc-coverage-region-3_coverage_metrics.csv"
),
)


class ImportCreateWithSamtoolsQcTest(ExecutorTestMixin, TestCaseSnapshot, TestCase):
"""Test the executor with action=create and external files for Samtools QC.

This will actually run the import of the Samtools QC files.
"""

def setUp(self):
self.maxDiff = None
self._setUpExecutor(
CaseImportAction.ACTION_CREATE,
fac_kwargs={
"path_phenopacket_yaml": "cases_import/tests/data/singleton_samtools_qc.yaml"
},
)

@mock.patch("cases_qc.io.samtools.load_bcftools_stats")
@mock.patch("cases_qc.io.samtools.load_samtools_flagstat")
@mock.patch("cases_qc.io.samtools.load_samtools_stats")
def test_run(
self,
mock_load_samtools_stats,
mock_load_samtools_flagstat,
mock_load_bcftools_stats,
):
"""Test import of a case with full set of Samtools QC files."""
self.assertEqual(Case.objects.count(), 0)
self.assertEqual(CaseQc.objects.count(), 0)

self.executor.run()

self.assertEqual(Case.objects.count(), 1)
self.assertEqual(CaseQc.objects.count(), 1)
caseqc = CaseQc.objects.first()

mock_load_samtools_stats.assert_called_once_with(
sample="NA12878-PCRF450-1",
input_file=mock.ANY,
caseqc=caseqc,
)
self.assertEqual(
mock_load_samtools_stats.call_args[1]["input_file"].name,
os.path.realpath("cases_qc/tests/data/sample.samtools-stats.txt"),
)

mock_load_samtools_flagstat.assert_called_once_with(
sample="NA12878-PCRF450-1",
input_file=mock.ANY,
caseqc=caseqc,
)
self.assertEqual(
mock_load_samtools_flagstat.call_args[1]["input_file"].name,
os.path.realpath("cases_qc/tests/data/sample.samtools-flagstat.txt"),
)

mock_load_bcftools_stats.assert_called_once_with(
input_file=mock.ANY,
caseqc=caseqc,
)
self.assertEqual(
mock_load_bcftools_stats.call_args[1]["input_file"].name,
os.path.realpath("cases_qc/tests/data/sample.bcftools-stats.txt"),
)


class ImportUpdateTest(ExecutorTestMixin, TestCaseSnapshot, TestCase):
"""Test the executor with action=update"""

Expand Down
Loading
Loading