From 637e387e8693a3c5962ee4d218e859dfc7a33998 Mon Sep 17 00:00:00 2001 From: Bede Constantinides Date: Mon, 16 Dec 2024 17:50:41 +0000 Subject: [PATCH] Smart thread count allocation between alignment and compression --- README.md | 95 +++++++++++++++++++++++------------------ src/hostile/__init__.py | 2 +- src/hostile/aligner.py | 30 +++++++------ src/hostile/cli.py | 2 +- src/hostile/lib.py | 32 ++++++++------ src/hostile/util.py | 32 +++++++++----- 6 files changed, 111 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index b1b5ddc..b5c322c 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,13 @@ # Hostile -Hostile accurately removes host sequences from short and long read (meta)genomes, consuming single-read or paired `fastq[.gz]` input. Batteries are included – a human reference genome is downloaded when run for the first time. Hostile is precise by default, removing an [order of magnitude fewer microbial reads](https://log.bede.im/2023/08/29/precise-host-read-removal.html#evaluating-accuracy) than existing approaches while removing >99.5% of real human reads from 1000 Genomes Project samples. For the best possible retention of microbial reads, use an existing index masked against bacterial and/or viral genomes, or make your own using the built-in masking utility. Read headers can be replaced with integers (using `--rename`) for privacy and smaller FASTQs. Heavy lifting is done with fast existing tools (Minimap2/Bowtie2 and Samtools). Bowtie2 is the default aligner for short (paired) reads while Minimap2 is default aligner for long reads. In benchmarks, bacterial Illumina reads were decontaminated at 32Mbp/s (210k reads/sec) and bacterial ONT reads at 22Mbp/s, using 8 alignment threads. By default, Hostile requires 4GB of RAM for decontaminating short reads and 13GB for long reads (Minimap2). Further information and benchmarks can be found in the [paper](https://doi.org/10.1093/bioinformatics/btad728) and [blog post](https://log.bede.im/2023/08/29/precise-host-read-removal.html). Please open an issue to report problems or otherwise [reach](https://bsky.app/profile/bedec.bsky.social) [out](mailto:b@bede.im) for help and advice. +Hostile accurately removes host sequences from short and long read (meta)genomes, consuming single-read or paired `fastq[.gz]` input. Batteries are included – a human reference genome is downloaded when run for the first time. Hostile is precise by default, removing an [order of magnitude fewer microbial reads](https://log.bede.im/2023/08/29/precise-host-read-removal.html#evaluating-accuracy) than existing approaches while removing >99.5% of real human reads from 1000 Genomes Project samples. For the best possible retention of microbial reads, use an existing index masked against bacterial and/or viral genomes, or make your own using the built-in masking utility. Read headers can be replaced with integers (using `--rename`) for privacy and smaller FASTQs. Heavy lifting is done with fast existing tools (Minimap2/Bowtie2 and Samtools). In benchmarks, bacterial Illumina reads were decontaminated at 32Mbp/s (210k reads/sec) and bacterial ONT reads at 22Mbp/s, using 8 alignment threads. In typical use, Hostile requires 4GB of RAM for decontaminating short reads (Bowtie2) and 13GB for long reads (Minimap2). Further information and benchmarks can be found in the [paper](https://doi.org/10.1093/bioinformatics/btad728) and [blog post](https://log.bede.im/2023/08/29/precise-host-read-removal.html). Please open an issue to report problems or otherwise [reach](https://bsky.app/profile/bedec.bsky.social) [out](mailto:b@bede.im) for help and advice. ## Indexes -The default index `human-t2t-hla` comprises [T2T-CHM13v2.0](https://www.ncbi.nlm.nih.gov/assembly/11828891) and [IPD-IMGT/HLA](https://www.ebi.ac.uk/ipd/imgt/hla/) v3.51, and is downloaded automatically when running Hostile unless another index is specified. Marginally higher microbial sequence retention may be possible using masked indexes. The index `human-t2t-hla-argos985` is masked against [985 reference grade bacterial genomes](https://www.ncbi.nlm.nih.gov/bioproject/231221) including common human pathogens, while `human-t2t-hla.argos-bacteria-985_rs-viral-202401_ml-phage-202401` is further masked against all known virus and phage genomes. The latter should be used when retention of viral sequences is a priority. To use a standard index, simply pass its name as the value of the `--index` argument, which takes care of downloading and caching the relevant index. [Object storage](https://objectstorage.uk-london-1.oraclecloud.com/n/lrbvkel2wjot/b/human-genome-bucket/o) is provided by the [ModMedMicro research unit](https://www.expmedndm.ox.ac.uk/modernising-medical-microbiology) at the University of Oxford. Custom indexes are also supported (see below). +The default index `human-t2t-hla` comprises [T2T-CHM13v2.0](https://www.ncbi.nlm.nih.gov/assembly/11828891) and [IPD-IMGT/HLA](https://www.ebi.ac.uk/ipd/imgt/hla/) v3.51, and is downloaded automatically when running Hostile unless another index is specified. Higher microbial sequence retention may be possible using masked indexes, which are very easy to use. The index `human-t2t-hla-argos985` is masked against [985 reference grade bacterial genomes](https://www.ncbi.nlm.nih.gov/bioproject/231221) including common human pathogens, while `human-t2t-hla.argos-bacteria-985_rs-viral-202401_ml-phage-202401` is further masked against all known virus and phage genomes. The latter should be used when retention of viral sequences is a priority. To use a standard index, simply pass its name as the value of the `--index` argument, which takes care of downloading and caching the relevant index. [Object storage](https://objectstorage.uk-london-1.oraclecloud.com/n/lrbvkel2wjot/b/human-genome-bucket/o) is provided by the [ModMedMicro research unit](https://www.expmedndm.ox.ac.uk/modernising-medical-microbiology) at the University of Oxford. Custom indexes are also supported (see below). | Name | Composition | Date | Masked positions | | :----------------------------------------------------------: | :----------------------------------------------------------: | ------- | ---------------------- | @@ -50,7 +50,24 @@ A [Biocontainer image](https://biocontainers.pro/tools/hostile) is also availabl -## Using non-default (including custom) indexes +## Getting started + +```bash +# Long reads +hostile clean --fastq1 long.fastq.gz # Creates long.clean.fastq.gz +hostile clean --fastq1 --index mouse-mm39 # Use mouse index +hostile clean --fastq1 long.fastq.gz --stdout > long.clean.fastq # Send to stdout +hostile clean --invert --fastq1 long.fastq.gz # Keep only host reads + +# Short reads +hostile clean --fastq1 short.r1.fq.gz --aligner bowtie2 # Single/unpaired +hostile clean --fastq1 short.r1.fq.gz --fastq2 short.r2.fq.gz # Paired +hostile clean --fastq1 short.r1.fq.gz --fastq2 short.r2.fq.gz --stdout > clean.fq # Send interleaved read pairs to stdout +``` + + + +## Custom indexes - To download ahead of time and cache the default index (`human-t2t-hla`), run `hostile fetch` - To list available standard indexes, run `hostile fetch --list` @@ -111,93 +128,89 @@ options: -**Long reads, default index** +**Long reads** Writes compressed fastq.gz files to current working directory, sends log to stdout ```bash $ hostile clean --fastq1 tests/data/tuberculosis_1_1.fastq.gz -INFO: Hostile version 1.0.0. Mode: long read (Minimap2) -INFO: Found cached standard index human-t2t-hla +INFO: Hostile v2.0.0. Mode: long read (Minimap2) +INFO: Found cached standard index human-t2t-hla (MMI available) INFO: Cleaning… INFO: Cleaning complete [ { - "version": "1.0.0", + "version": "2.0.0", "aligner": "minimap2", "index": "human-t2t-hla", "options": [], "fastq1_in_name": "tuberculosis_1_1.fastq.gz", "fastq1_in_path": "/Users/bede/Research/git/hostile/tests/data/tuberculosis_1_1.fastq.gz", - "fastq1_out_name": "tuberculosis_1_1.clean.fastq.gz", - "fastq1_out_path": "/Users/bede/Research/git/hostile/tuberculosis_1_1.clean.fastq.gz", "reads_in": 1, "reads_out": 1, "reads_removed": 0, - "reads_removed_proportion": 0.0 + "reads_removed_proportion": 0.0, + "fastq1_out_name": "tuberculosis_1_1.clean.fastq.gz", + "fastq1_out_path": "/Users/bede/Research/git/hostile/tuberculosis_1_1.clean.fastq.gz" } ] - ``` -**Long reads, default index, stream reads to stdout** - -Sends uncompressed FASTQ to stdout, log to stderr +**Long reads, send reads to stdout** ```bash -$ hostile clean --fastq1 tests/data/tuberculosis_1_1.fastq.gz -INFO: Hostile version 1.0.0. Mode: long read (Minimap2) -INFO: Found cached standard index human-t2t-hla +$ hostile clean --fastq1 tests/data/tuberculosis_1_1.fastq.gz --stdout > out.fastq +INFO: Hostile v2.0.0. Mode: long read (Minimap2) +INFO: Found cached standard index human-t2t-hla (MMI available) INFO: Cleaning… INFO: Cleaning complete [ { - "version": "1.0.0", + "version": "2.0.0", "aligner": "minimap2", "index": "human-t2t-hla", - "options": [], + "options": [ + "stdout" + ], "fastq1_in_name": "tuberculosis_1_1.fastq.gz", "fastq1_in_path": "/Users/bede/Research/git/hostile/tests/data/tuberculosis_1_1.fastq.gz", - "fastq1_out_name": "tuberculosis_1_1.clean.fastq.gz", - "fastq1_out_path": "/Users/bede/Research/git/hostile/tuberculosis_1_1.clean.fastq.gz", "reads_in": 1, "reads_out": 1, "reads_removed": 0, "reads_removed_proportion": 0.0 } ] - ``` -**Short paired reads, default index** +**Short paired reads** ```bash $ hostile clean --fastq1 human_1_1.fastq.gz --fastq2 human_1_2.fastq.gz --aligner bowtie2 -INFO: Hostile version 1.0.0. Mode: paired short read (Bowtie2) -INFO: Found cached standard index human-t2t-hla -INFO: Cleaning… -INFO: Cleaning complete +14:40:51 INFO: Hostile v2.0.0. Mode: paired short read (Bowtie2) +14:40:51 INFO: Found cached standard index human-t2t-hla +14:40:51 INFO: Cleaning… +14:40:52 INFO: Cleaning complete [ { - "version": "1.0.0", + "version": "2.0.0", "aligner": "bowtie2", "index": "human-t2t-hla", "options": [], "fastq1_in_name": "human_1_1.fastq.gz", - "fastq1_in_path": "/Users/bede/human_1_1.fastq.gz", - "fastq1_out_name": "human_1_1.clean_1.fastq.gz", - "fastq1_out_path": "/Users/bede/human_1_1.clean_1.fastq.gz", + "fastq1_in_path": "/Users/bede/Research/git/hostile/tests/data/human_1_1.fastq.gz", "reads_in": 2, "reads_out": 0, "reads_removed": 2, "reads_removed_proportion": 1.0, "fastq2_in_name": "human_1_2.fastq.gz", - "fastq2_in_path": "/Users/bede/human_1_2.fastq.gz", + "fastq2_in_path": "/Users/bede/Research/git/hostile/tests/data/human_1_2.fastq.gz", + "fastq1_out_name": "human_1_1.clean_1.fastq.gz", + "fastq1_out_path": "/Users/bede/Research/git/hostile/human_1_1.clean_1.fastq.gz", "fastq2_out_name": "human_1_2.clean_2.fastq.gz", - "fastq2_out_path": "/Users/bede/human_1_2.clean_2.fastq.gz" + "fastq2_out_path": "/Users/bede/Research/git/hostile/human_1_2.clean_2.fastq.gz" } ] ``` @@ -206,30 +219,28 @@ INFO: Cleaning complete ```bash $ hostile clean --fastq1 human_1_1.fastq.gz --fastq2 human_1_2.fastq.gz --aligner bowtie2 --index human-t2t-hla-argos985 > log.json -INFO: Hostile version 1.0.0. Mode: paired short read (Bowtie2) -INFO: Found cached standard index human-t2t-hla +INFO: Hostile v2.0.0. Mode: paired short read (Bowtie2) +INFO: Found cached standard index human-t2t-hla-argos985 INFO: Cleaning… INFO: Cleaning complete ``` -**Short single/unpaired reads, save log** +**Short single/unpaired reads, compress with Zstandard** -By default, single/unpaired fastqs are assumed to be long reads. Override this by specifying `--aligner bowtie2` when decontaminating unpaired short reads. +By default, single/unpaired fastqs are assumed to be long reads. Ensure to override this with `--aligner bowtie2` when decontaminating single/unpaired short reads. ```bash -$ hostile clean --aligner bowtie2 --fastq1 tests/data/human_1_1.fastq.gz > log.json -INFO: Hostile version 1.0.0. Mode: short read (Bowtie2) -INFO: Found cached standard index human-t2t-hla +$ hostile clean --fastq1 human_1_1.fastq.gz --aligner bowtie2 | +INFO: Hostile v2.0.0. Mode: paired short read (Bowtie2) +INFO: Found cached standard index human-t2t-hla-argos985 INFO: Cleaning… INFO: Cleaning complete ``` - - ## Python usage ```python diff --git a/src/hostile/__init__.py b/src/hostile/__init__.py index 1a609c5..3409fe5 100644 --- a/src/hostile/__init__.py +++ b/src/hostile/__init__.py @@ -1,3 +1,3 @@ """Accurate host read removal""" -__version__ = "1.1.0" +__version__ = "2.0.0" diff --git a/src/hostile/aligner.py b/src/hostile/aligner.py index a4709cf..22120ff 100644 --- a/src/hostile/aligner.py +++ b/src/hostile/aligner.py @@ -126,7 +126,8 @@ def gen_clean_cmd( rename: bool, reorder: bool, aligner_args: str, - threads: int, + aligner_threads: int, + compression_threads: int, force: bool, ) -> str: fastq, out_dir = Path(fastq), Path(out_dir) @@ -161,7 +162,7 @@ def gen_clean_cmd( "{MMI_PATH}": str(mmi_path), "{FASTQ}": str(fastq), "{ALIGNER_ARGS}": str(aligner_args), - "{THREADS}": str(threads), + "{ALIGNER_THREADS}": str(aligner_threads), } if self.name == "Minimap2" and not mmi_path.is_file(): @@ -174,9 +175,9 @@ def gen_clean_cmd( # If we are streaming output, write to stdout instead of a file if stdout: - fastq_cmd = "samtools fastq --threads 4 -c 6 -0 -" # write to stdout + fastq_cmd = "samtools fastq --threads 0 -c 6 -0 -" # write to stdout else: - fastq_cmd = f"samtools fastq --threads 4 -c 6 -0 '{fastq_out_path}'" + fastq_cmd = f"samtools fastq --threads {compression_threads} -c 6 -0 '{fastq_out_path}'" cmd = ( # Align, stream reads to stdout in SAM format @@ -208,7 +209,8 @@ def gen_paired_clean_cmd( rename: bool, reorder: bool, aligner_args: str, - threads: int, + aligner_threads: int, + compression_threads: int, force: bool, ) -> str: fastq1, fastq2, out_dir = Path(fastq1), Path(fastq2), Path(out_dir) @@ -259,7 +261,7 @@ def gen_paired_clean_cmd( "{FASTQ1}": str(fastq1), "{FASTQ2}": str(fastq2), "{ALIGNER_ARGS}": str(aligner_args), - "{THREADS}": str(threads), + "{ALIGNER_THREADS}": str(aligner_threads), } if self.name == "Minimap2": @@ -276,9 +278,9 @@ def gen_paired_clean_cmd( alignment_cmd = alignment_cmd.replace(k, cmd_template[k]) if stdout: - fastq_cmd = "samtools fastq --threads 4 -c 6 -N -0 -" + fastq_cmd = "samtools fastq --threads 0 -c 6 -N -0 -" else: - fastq_cmd = f"samtools fastq --threads 4 -c 6 -N -1 '{fastq1_out_path}' -2 '{fastq2_out_path}' -0 /dev/null -s /dev/null" + fastq_cmd = f"samtools fastq --threads {compression_threads} -c 6 -N -1 '{fastq1_out_path}' -2 '{fastq2_out_path}' -0 /dev/null -s /dev/null" cmd = ( f"{alignment_cmd}" @@ -302,12 +304,12 @@ def gen_paired_clean_cmd( data_dir=util.CACHE_DIR, single_cmd=( "'{BIN_PATH}' -x '{INDEX_PATH}' -U '{FASTQ}'" - " -k 1 --mm -p {THREADS} {ALIGNER_ARGS}" + " -k 1 --mm -p {ALIGNER_THREADS} {ALIGNER_ARGS}" ), single_unindexed_cmd="", paired_cmd=( "{BIN_PATH} -x '{INDEX_PATH}' -1 '{FASTQ1}' -2 '{FASTQ2}'" - " -k 1 --mm -p {THREADS} {ALIGNER_ARGS}" + " -k 1 --mm -p {ALIGNER_THREADS} {ALIGNER_ARGS}" ), paired_unindexed_cmd="", ), @@ -315,10 +317,10 @@ def gen_paired_clean_cmd( name="Minimap2", bin_path=Path("minimap2"), data_dir=util.CACHE_DIR, - single_cmd="'{BIN_PATH}' -ax map-ont --secondary no -t {THREADS} {ALIGNER_ARGS} '{MMI_PATH}' '{FASTQ}'", - single_unindexed_cmd="'{BIN_PATH}' -ax map-ont --secondary no -t {THREADS} {ALIGNER_ARGS} -d '{MMI_PATH}' '{INDEX_PATH}' '{FASTQ}'", - paired_cmd="'{BIN_PATH}' -ax sr --secondary no -t {THREADS} {ALIGNER_ARGS} '{MMI_PATH}' '{FASTQ1}' '{FASTQ2}'", - paired_unindexed_cmd="'{BIN_PATH}' -ax sr --secondary no -t {THREADS} {ALIGNER_ARGS} -d '{MMI_PATH}' '{INDEX_PATH}' '{FASTQ1}' '{FASTQ2}'", + single_cmd="'{BIN_PATH}' -ax map-ont --secondary no -t {ALIGNER_THREADS} {ALIGNER_ARGS} '{MMI_PATH}' '{FASTQ}'", + single_unindexed_cmd="'{BIN_PATH}' -ax map-ont --secondary no -t {ALIGNER_THREADS} {ALIGNER_ARGS} -d '{MMI_PATH}' '{INDEX_PATH}' '{FASTQ}'", + paired_cmd="'{BIN_PATH}' -ax sr --secondary no -t {ALIGNER_THREADS} {ALIGNER_ARGS} '{MMI_PATH}' '{FASTQ1}' '{FASTQ2}'", + paired_unindexed_cmd="'{BIN_PATH}' -ax sr --secondary no -t {ALIGNER_THREADS} {ALIGNER_ARGS} -d '{MMI_PATH}' '{INDEX_PATH}' '{FASTQ1}' '{FASTQ2}'", ), }, ) diff --git a/src/hostile/cli.py b/src/hostile/cli.py index 035a725..20ac04c 100644 --- a/src/hostile/cli.py +++ b/src/hostile/cli.py @@ -30,7 +30,7 @@ def clean( reorder: bool = False, out_dir: Path = util.CWD, stdout: bool = False, - threads: int = util.THREADS, + threads: int = util.CPU_COUNT, force: bool = False, aligner_args: str = "", offline: bool = False, diff --git a/src/hostile/lib.py b/src/hostile/lib.py index b02d309..5b76a83 100644 --- a/src/hostile/lib.py +++ b/src/hostile/lib.py @@ -161,17 +161,20 @@ def clean_fastqs( stdout: bool = False, aligner: ALIGNER = ALIGNER.minimap2, aligner_args: str = "", - threads: int = util.THREADS, + threads: int = util.CPU_COUNT, force: bool = False, offline: bool = False, ): - logging.debug(f"clean_fastqs() {threads=}") + aligner_threads, compression_threads = util.allocate_threads(threads, stdout=stdout) + logging.debug( + f"clean_fastqs() {threads=} {aligner_threads=} {compression_threads=}" + ) logging.debug(f"{util.CACHE_DIR=}") logging.debug(f"{util.INDEX_REPOSITORY_URL=}") if aligner == ALIGNER.bowtie2: - logging.info(f"Hostile version {__version__}. Mode: short read (Bowtie2)") + logging.info(f"Hostile v{__version__}. Mode: short read (Bowtie2)") elif aligner == ALIGNER.minimap2: - logging.info(f"Hostile version {__version__}. Mode: long read (Minimap2)") + logging.info(f"Hostile v{__version__}. Mode: long read (Minimap2)") fastqs = [Path(path).absolute() for path in fastqs] if not all(fastq.is_file() for fastq in fastqs): raise FileNotFoundError("One or more fastq files do not exist") @@ -187,7 +190,8 @@ def clean_fastqs( rename=rename, reorder=reorder, aligner_args=aligner_args, - threads=threads, + aligner_threads=aligner_threads, + compression_threads=compression_threads, force=force, ) for fastq in fastqs @@ -220,21 +224,20 @@ def clean_paired_fastqs( stdout: bool = False, aligner: ALIGNER = ALIGNER.bowtie2, aligner_args: str = "", - threads: int = util.THREADS, + threads: int = util.CPU_COUNT, force: bool = False, offline: bool = False, ): - logging.debug(f"clean_paired_fastqs() {threads=}") + aligner_threads, compression_threads = util.allocate_threads(threads, stdout=stdout) + logging.debug( + f"clean_paired_fastqs() {threads=} {aligner_threads=} {compression_threads=}" + ) logging.debug(f"{util.CACHE_DIR=}") logging.debug(f"{util.INDEX_REPOSITORY_URL=}") if aligner == ALIGNER.bowtie2: - logging.info( - f"Hostile version {__version__}. Mode: paired short read (Bowtie2)" - ) + logging.info(f"Hostile v{__version__}. Mode: paired short read (Bowtie2)") elif aligner == ALIGNER.minimap2: - logging.info( - f"Hostile version {__version__}. Mode: paired short read (Minimap2)" - ) + logging.info(f"Hostile v{__version__}. Mode: paired short read (Minimap2)") fastqs = [ (Path(path1).absolute(), Path(path2).absolute()) for path1, path2 in fastqs ] @@ -253,7 +256,8 @@ def clean_paired_fastqs( rename=rename, reorder=reorder, aligner_args=aligner_args, - threads=threads, + aligner_threads=aligner_threads, + compression_threads=compression_threads, force=force, ) for fastq_pair in fastqs diff --git a/src/hostile/util.py b/src/hostile/util.py index 9c364ae..d589335 100644 --- a/src/hostile/util.py +++ b/src/hostile/util.py @@ -18,15 +18,25 @@ from tqdm import tqdm -def choose_default_thread_count(cpu_count: int) -> int: - """Choose a sensible number of threads for alignment""" - cpu_count = int(cpu_count) - if cpu_count <= 1: - return 1 - elif 1 < cpu_count < 17: - return int(cpu_count / 2) - else: - return 10 +def allocate_threads(cpu_count: int, stdout: bool = False) -> tuple[int, int]: + """Choose default thread counts for alignment and compression""" + cpu_count = max(1, int(cpu_count)) # Ensure at least 1 CPU core is considered + + if cpu_count == 1: + return 1, 1 - 1 + + if stdout: + alignment_threads = min(30, max(1, cpu_count - 1)) + return alignment_threads, 1 - 1 + + if cpu_count > 32: + return 22, 10 - 1 + + # Calculate alignment and compression threads to approximate a 2:1 ratio + alignment_threads = max(1, (2 * cpu_count) // 3) + compression_threads = min(10, max(1, cpu_count - alignment_threads)) - 1 + + return alignment_threads, compression_threads CWD = Path.cwd() @@ -37,7 +47,7 @@ def choose_default_thread_count(cpu_count: int) -> int: ) CPU_COUNT = multiprocessing.cpu_count() -THREADS = choose_default_thread_count(CPU_COUNT) +# THREADS = allocate_threads(CPU_COUNT) DEFAULT_INDEX_REPOSITORY_URL = "https://objectstorage.uk-london-1.oraclecloud.com/n/lrbvkel2wjot/b/human-genome-bucket/o" INDEX_REPOSITORY_URL = os.environ.get( "HOSTILE_REPOSITORY_URL", DEFAULT_INDEX_REPOSITORY_URL @@ -72,6 +82,8 @@ def handle_alignment_exceptions(exception: subprocess.CalledProcessError) -> Non logging.debug(f"stderr: {exception.stderr}") alignment_successful = False stream_empty = False + if "function mm_idx_load": # Minimap2 index corruption + raise RuntimeError("Minimap2 index appears corrupted, run hostile index purge") if "Error, fewer reads in file specified" in exception.stderr: # Bowtie2 raise RuntimeError("fastq1 and fastq2 contain different numbers of reads") if 'Failed to read header for "-"' in exception.stderr: