Skip to content

Commit

Permalink
refactor: fgbio read clipping (#545)
Browse files Browse the repository at this point in the history
* change primer file formatting

* change read clipping

* fmt

* fix primer clipping plot

* fix primer clipping plot

* fix

* fix

* typo

* test
  • Loading branch information
alethomas authored Jul 1, 2022
1 parent 4a40003 commit 711045a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 39 deletions.
51 changes: 18 additions & 33 deletions workflow/rules/read_clipping.smk
Original file line number Diff line number Diff line change
Expand Up @@ -21,63 +21,48 @@ rule samtools_sort:
"0.74.0/bio/samtools/sort"


rule bed_to_bedpe:
rule bed_to_tsv:
input:
check_bed_for_URL(config["preprocessing"]["amplicon-primers"]),
output:
"resources/primer.bedpe",
"resources/primer.tsv",
log:
"logs/bed-to-bedpe.log",
conda:
"../envs/python.yaml"
script:
"../scripts/bed-to-bedpe.py"
"../scripts/bed-to-tsv.py"


rule bamclipper:
rule trim_primers_fgbio:
input:
bam="results/{date}/read-sorted/{read_type}~position/{sample}.initial.bam",
bai="results/{date}/read-sorted/{read_type}~position/{sample}.initial.bam.bai",
bedpe="resources/primer.bedpe",
output:
temp(
"results/{date}/read-clipping/softclipped/{read_type}/{sample}/{sample}.initial.primerclipped.bam"
ref="resources/genomes/{reference}.fasta".format(
reference=config["preprocessing"]["amplicon-reference"]
),
params:
output_dir=get_output_dir,
cwd=lambda w: os.getcwd(),
bed_path=lambda w, input: os.path.join(os.getcwd(), input.bedpe),
bam=lambda w, input: os.path.basename(input.bam),
primers="resources/primer.tsv",
output:
"results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam",
log:
"logs/{date}/bamclipper/{read_type}/{sample}.log",
"logs/{date}/fgbio_primer_clipping/{read_type}/{sample}.log",
conda:
"../envs/bamclipper.yaml"
threads: 6
"../envs/fgbio.yaml"
shell:
"(cp {input.bam} {params.output_dir} &&"
" cp {input.bai} {params.output_dir} &&"
" cd {params.output_dir} &&"
" bamclipper.sh -b {params.bam} -p {params.bed_path} -n {threads} -u 5 -d 5) "
" > {params.cwd}/{log} 2>&1"
"fgbio TrimPrimers -i {input.bam} -p {input.primers} -o {output} -H true -r {input.ref} > {log} 2>&1"


rule fgbio:
rule filter_bam_fgbio:
input:
bam="results/{date}/read-clipping/softclipped/{read_type}/{sample}/{sample}.initial.primerclipped.bam",
bai="results/{date}/read-clipping/softclipped/{read_type}/{sample}/{sample}.initial.primerclipped.bam.bai",
ref="resources/genomes/{reference}.fasta".format(
reference=config["preprocessing"]["amplicon-reference"]
),
bam="results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam",
output:
temp(
"results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam"
),
"results/{date}/read-clipping/hc_filtered/{read_type}/{sample}/{sample}.bam",
log:
"logs/{date}/fgbio/{read_type}/{sample}.log",
"logs/{date}/fgbio_filter_bam/{read_type}/{sample}.log",
conda:
"../envs/fgbio.yaml"
shell:
"fgbio --sam-validation-stringency=LENIENT ClipBam -i {input.bam} -o {output} -H true -r {input.ref} > {log} 2>&1"
"fgbio FilterBam -i {input.bam} -o {output} --min-insert-size 100 --remove-single-end-mappings > {log} 2>&1"


rule samtools_fastq_pe:
Expand Down Expand Up @@ -132,7 +117,7 @@ rule plot_primer_clipping:
),
params:
samples=lambda wildcards: get_samples_for_date(wildcards.date),
bedpe="resources/primer.bedpe",
bedpe="resources/primer.tsv",
log:
"logs/{date}/plot-primer-clipping.log",
conda:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@
df_sense["chrom"],
df_sense["start"],
df_sense["end"],
df_antisense["chrom"],
df_antisense["start"],
df_antisense["end"],
]
headers = ["chrom1", "start1", "end1", "chrom2", "start2", "end2"]
headers = ["chrom", "left_start", "left_end", "right_start", "right_end"]
df_bedpe = pd.concat(data, axis=1, keys=headers)
df_bedpe.to_csv(snakemake.output[0], header=None, index=None, sep="\t", mode="a")
df_bedpe.to_csv(snakemake.output[0], index=None, sep="\t", mode="a")
8 changes: 5 additions & 3 deletions workflow/scripts/plot-primer-clipping.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
from intervaltree import IntervalTree

# read primer bedpe to df
PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=None)
PRIMER.drop(PRIMER.columns[[0, 3]], axis=1, inplace=True)
PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=0)
print(PRIMER)
PRIMER.drop(PRIMER.columns[[0]], axis=1, inplace=True)
print(PRIMER)
PRIMER.columns = ["p1_start", "p1_end", "p2_start", "p2_end"]

# convert df to interval trees
Expand Down Expand Up @@ -116,7 +118,7 @@ def count_intervals(file):
"uncut primer within",
"cut primer exact",
"cut primer within",
"no mathing win",
"no matching win",
],
}
)
Expand Down

0 comments on commit 711045a

Please sign in to comment.