Skip to content

Commit

Permalink
Adds --aligner-args option for passing arguments to the aligner
Browse files Browse the repository at this point in the history
  • Loading branch information
bede committed Nov 22, 2023
1 parent 3bb8435 commit cff46d0
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 6 deletions.
8 changes: 6 additions & 2 deletions src/hostile/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def gen_clean_cmd(
index: Path | None,
rename: bool,
sort_by_name: bool,
aligner_args: str,
threads: int,
force: bool,
) -> str:
Expand All @@ -90,6 +91,7 @@ def gen_clean_cmd(
"{REF_ARCHIVE_PATH}": str(self.ref_archive_path),
"{INDEX_PATH}": str(self.idx_path),
"{FASTQ}": str(fastq),
"{ALIGNER_ARGS}": str(aligner_args),
"{THREADS}": str(threads),
}
alignment_cmd = self.cmd
Expand All @@ -99,7 +101,7 @@ def gen_clean_cmd(
rename_cmd = (
# ' | awk \'BEGIN{{FS=OFS="\\t"}} {{$1=int(NR)" "; print $0}}\''
# Skips header lines (starting with @) and begins counter from first record
'| awk \'BEGIN {{ FS=OFS="\\t"; line_count=0 }} /^@/ {{ next }} {{ $1=int(line_count+1)" "; print $0; line_count++ }}\''
' | awk \'BEGIN {{ FS=OFS="\\t"; line_count=0 }} /^@/ {{ next }} {{ $1=int(line_count+1)" "; print $0; line_count++ }}\''
if rename
else ""
)
Expand Down Expand Up @@ -129,6 +131,7 @@ def gen_paired_clean_cmd(
index: Path | None,
rename: bool,
sort_by_name: bool,
aligner_args: str,
threads: int,
force: bool,
) -> str:
Expand All @@ -154,14 +157,15 @@ def gen_paired_clean_cmd(
"{INDEX_PATH}": str(self.idx_path),
"{FASTQ1}": str(fastq1),
"{FASTQ2}": str(fastq2),
"{ALIGNER_ARGS}": str(aligner_args),
"{THREADS}": str(threads),
}
alignment_cmd = self.paired_cmd
for k in cmd_template.keys():
alignment_cmd = alignment_cmd.replace(k, cmd_template[k])
sort_cmd = " | samtools sort -n -O sam -@ 6 -m 1G" if sort_by_name else ""
rename_cmd = (
# f' | awk \'BEGIN{{FS=OFS="\\t"; start=0}} /^@/{{next}} !start && !/^@/{{start=1}} start{{$1=int((NR+1)/2)" "; print $0}}\''
# ' | awk \'BEGIN{{FS=OFS="\\t"; start=0}} /^@/{{next}} !start && !/^@/{{start=1}} start{{$1=int((NR+1)/2)" "; print $0}}\''
# Skips header lines (starting with @) and begins counter from first record
' | awk \'BEGIN {{ FS=OFS="\\t"; start=0; line_count=1 }} /^@/ {{ next }} !start && !/^@/ {{ start=1 }} start {{ $1=int((line_count+1)/2)" "; print $0; line_count++ }}\''
if rename
Expand Down
4 changes: 4 additions & 0 deletions src/hostile/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def clean(
sort_by_name: bool = False,
out_dir: Path = lib.CWD,
threads: int = lib.THREADS,
aligner_args: str = "",
force: bool = False,
debug: bool = False,
) -> None:
Expand All @@ -40,6 +41,7 @@ def clean(
:arg sort_by_name: sort reads by name
:arg out_dir: path to output directory
:arg threads: number of threads to use
:arg aligner_args: additional arguments for alignment
:arg force: overwrite existing output files
:arg debug: show debug messages
"""
Expand All @@ -64,6 +66,7 @@ def clean(
sort_by_name=sort_by_name,
out_dir=out_dir,
aligner=aligner_paired,
aligner_args=aligner_args,
threads=threads,
force=force,
)
Expand All @@ -75,6 +78,7 @@ def clean(
sort_by_name=sort_by_name,
out_dir=out_dir,
aligner=aligner_unpaired,
aligner_args=aligner_args,
threads=threads,
force=force,
)
Expand Down
15 changes: 11 additions & 4 deletions src/hostile/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,13 @@
# cdn_base_url="http://localhost:8000", # python -m http.server
cdn_base_url=f"https://objectstorage.uk-london-1.oraclecloud.com/n/lrbvkel2wjot/b/human-genome-bucket/o",
data_dir=XDG_DATA_DIR,
cmd=("{BIN_PATH} -x '{INDEX_PATH}' -U '{FASTQ}'" " -k 1 --mm -p {THREADS}"),
cmd=(
"{BIN_PATH} -x '{INDEX_PATH}' -U '{FASTQ}'"
" -k 1 --mm -p {THREADS} {ALIGNER_ARGS}"
),
paired_cmd=(
"{BIN_PATH} -x '{INDEX_PATH}' -1 '{FASTQ1}' -2 '{FASTQ2}'"
" -k 1 --mm -p {THREADS}"
" -k 1 --mm -p {THREADS} {ALIGNER_ARGS}"
),
idx_archive_fn="human-t2t-hla.tar",
idx_name="human-t2t-hla",
Expand All @@ -58,8 +61,8 @@
# cdn_base_url="http://localhost:8000", # python -m http.server
cdn_base_url=f"https://objectstorage.uk-london-1.oraclecloud.com/n/lrbvkel2wjot/b/human-genome-bucket/o",
data_dir=XDG_DATA_DIR,
cmd="{BIN_PATH} -ax map-ont -m 40 --secondary no -t {THREADS} '{REF_ARCHIVE_PATH}' '{FASTQ}'",
paired_cmd="{BIN_PATH} -ax sr -m 40 --secondary no -t {THREADS} '{REF_ARCHIVE_PATH}' '{FASTQ1}' '{FASTQ2}'",
cmd="{BIN_PATH} -ax map-ont -m 40 --secondary no -t {THREADS} {ALIGNER_ARGS} '{REF_ARCHIVE_PATH}' '{FASTQ}'",
paired_cmd="{BIN_PATH} -ax sr -m 40 --secondary no -t {THREADS} {ALIGNER_ARGS} '{REF_ARCHIVE_PATH}' '{FASTQ1}' '{FASTQ2}'",
ref_archive_fn="human-t2t-hla.fa.gz",
idx_name="human-t2t-hla.fa.gz",
),
Expand Down Expand Up @@ -201,6 +204,7 @@ def clean_fastqs(
sort_by_name: bool = False,
out_dir: Path = CWD,
aligner: ALIGNER = ALIGNER.minimap2,
aligner_args: str = "",
threads: int = THREADS,
force: bool = False,
):
Expand All @@ -220,6 +224,7 @@ def clean_fastqs(
index=index,
rename=rename,
sort_by_name=sort_by_name,
aligner_args=aligner_args,
threads=threads,
force=force,
)
Expand All @@ -242,6 +247,7 @@ def clean_paired_fastqs(
sort_by_name: bool = False,
out_dir: Path = CWD,
aligner: ALIGNER = ALIGNER.bowtie2,
aligner_args: str = "",
threads: int = THREADS,
force: bool = False,
):
Expand All @@ -262,6 +268,7 @@ def clean_paired_fastqs(
index=index,
rename=rename,
sort_by_name=sort_by_name,
aligner_args=aligner_args,
threads=threads,
force=force,
)
Expand Down
34 changes: 34 additions & 0 deletions tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,3 +506,37 @@ def test_paired_sort_rename():
assert fifth_line_1 == "@2 /1"
assert fifth_line_2 == "@2 /2"
shutil.rmtree(out_dir, ignore_errors=True)


def test_minimap2_aligner_args():
stats = lib.clean_fastqs(
fastqs=[data_dir / "sars-cov-2_100_1.fastq.gz"],
aligner=lib.ALIGNER.minimap2,
index=data_dir / "sars-cov-2/sars-cov-2.fasta.gz",
sort_by_name=True,
out_dir=out_dir,
aligner_args="-x asm5", # Lets everything through
force=True,
)
assert stats[0]["reads_out"] == 50
shutil.rmtree(out_dir, ignore_errors=True)


def test_bowtie2_aligner_args():
stats = lib.clean_paired_fastqs(
fastqs=[
(
data_dir / "sars-cov-2_100_1.fastq.gz",
data_dir / "sars-cov-2_100_2.fastq.gz",
),
],
aligner=lib.ALIGNER.bowtie2,
index=data_dir / "sars-cov-2/sars-cov-2",
rename=True,
sort_by_name=True,
out_dir=out_dir,
aligner_args="--ignore-quals",
force=True,
)
assert stats[0]["reads_out"] == 8
shutil.rmtree(out_dir, ignore_errors=True)

0 comments on commit cff46d0

Please sign in to comment.