From 73c1cf7102455c426fa9a048ccabf318d99207ba Mon Sep 17 00:00:00 2001 From: Bede Constantinides Date: Mon, 16 Dec 2024 13:42:30 +0000 Subject: [PATCH] Error msg for mismatched read counts, update log output for stdout, test --- src/hostile/lib.py | 26 ++++++++++++++++++++------ src/hostile/util.py | 4 +++- tests/test_all.py | 10 +++++----- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/hostile/lib.py b/src/hostile/lib.py index 48b6fc8..b40fb88 100644 --- a/src/hostile/lib.py +++ b/src/hostile/lib.py @@ -46,6 +46,7 @@ def gather_stats( aligner: str, invert: bool, index: str, + stdout: bool, ) -> list[dict[str, str | int | float | list[str]]]: stats = [] for fastq1 in fastqs: @@ -64,7 +65,12 @@ def gather_stats( proportion_removed = float(0) options = [ k - for k, v in {"rename": rename, "reorder": reorder, "invert": invert}.items() + for k, v in { + "invert": invert, + "rename": rename, + "reorder": reorder, + "stdout": stdout, + }.items() if v ] report = SampleReport( @@ -93,6 +99,7 @@ def gather_stats_paired( aligner: str, index: str, invert: bool, + stdout: bool, ) -> list[dict[str, str | int | float]]: stats = [] for fastq1, fastq2 in fastqs: @@ -113,7 +120,12 @@ def gather_stats_paired( proportion_removed = float(0) options = [ k - for k, v in {"rename": rename, "reorder": reorder, "invert": invert}.items() + for k, v in { + "invert": invert, + "rename": rename, + "reorder": reorder, + "stdout": stdout, + }.items() if v ] stats.append( @@ -126,10 +138,10 @@ def gather_stats_paired( fastq2_in_name=fastq2.name, fastq1_in_path=str(fastq1), fastq2_in_path=str(fastq2), - fastq1_out_name=fastq1_out_path.name, - fastq2_out_name=fastq2_out_path.name, - fastq1_out_path=str(fastq1_out_path), - fastq2_out_path=str(fastq2_out_path), + fastq1_out_name=fastq1_out_path.name if not stdout else None, + fastq2_out_name=fastq2_out_path.name if not stdout else None, + fastq1_out_path=str(fastq1_out_path) if not stdout else None, + fastq2_out_path=str(fastq2_out_path) if not stdout else None, reads_in=n_reads_in, reads_out=n_reads_out, reads_removed=n_reads_removed, @@ -191,6 +203,7 @@ def clean_fastqs( aligner=aligner.name, index=index, invert=invert, + stdout=stdout, ) util.fix_empty_fastqs(stats) logging.info("Cleaning complete") @@ -256,6 +269,7 @@ def clean_paired_fastqs( aligner=aligner.name, index=index, invert=invert, + stdout=stdout, ) util.fix_empty_fastqs(stats) logging.info("Cleaning complete") diff --git a/src/hostile/util.py b/src/hostile/util.py index 4b44146..9c364ae 100644 --- a/src/hostile/util.py +++ b/src/hostile/util.py @@ -72,6 +72,8 @@ def handle_alignment_exceptions(exception: subprocess.CalledProcessError) -> Non logging.debug(f"stderr: {exception.stderr}") alignment_successful = False stream_empty = False + if "Error, fewer reads in file specified" in exception.stderr: # Bowtie2 + raise RuntimeError("fastq1 and fastq2 contain different numbers of reads") if 'Failed to read header for "-"' in exception.stderr: stream_empty = True if "overall alignment rate" in exception.stderr: # Bowtie2 @@ -83,7 +85,7 @@ def handle_alignment_exceptions(exception: subprocess.CalledProcessError) -> Non pass else: logging.error( - f"Hostile encountered a problem. Check available RAM and storage\n" + f"Hostile encountered a problem. Details below\n" f"pipeline stdout:\n{exception.stdout}\n" f"pipeline stderr:\n{exception.stderr}\n" ) diff --git a/tests/test_all.py b/tests/test_all.py index b99c419..dd4cb6a 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -656,8 +656,9 @@ def test_stats_options(tmp_path): rename=True, reorder=True, force=True, + stdout=True, ) - assert ["rename", "reorder", "invert"] == stats[0]["options"] + assert {"rename", "reorder", "invert", "stdout"} == set(stats[0]["options"]) def test_fixing_empty_fastqs_single(tmp_path): @@ -690,18 +691,17 @@ def test_fixing_empty_fastqs_paired(tmp_path): def test_mismatched_number_of_reads_bowtie2(tmp_path): """This has caused sinister errors in the wild, yet is handled gracefully here""" - with pytest.raises(subprocess.CalledProcessError): + with pytest.raises(RuntimeError): lib.clean_paired_fastqs( fastqs=[ ( data_dir / "sars-cov-2_100_1.fastq.gz", - data_dir / "sars-cov-2_50_2.fastq.gz", + data_dir / "sars-cov-2_1_2.fastq", ), ], aligner=lib.ALIGNER.bowtie2, index=data_dir / "sars-cov-2/sars-cov-2", - out_dir=tmp_path, - force=True, + stdout=True, )