Skip to content

Commit

Permalink
Merge branch 'main' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
hoelzer authored Nov 8, 2024
2 parents 1b952d8 + 6385b01 commit 61c9f31
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 14 deletions.
17 changes: 17 additions & 0 deletions bin/bam_stats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

FILE=${1}
#LIST=$2

#STAT=$(basename $(basename ${LIST}) .txt)
echo "Filename"$'\t'"Total"$'\t'"mapped_reads"$'\t'"unmapped_reads"$'\t'"proportion_mapped"$'\t'"proportion_unmapped" > ${FILE}.mappedstats.txt


mapped_reads=$(samtools stats ${FILE}.sorted.bam | grep ^SN | awk 'NR == 7 {print $4}')
unmapped_reads=$(samtools stats ${FILE}.sorted.bam | grep ^SN | awk 'NR == 9 {print $4}')
total=$(samtools stats ${FILE}.sorted.bam | grep ^SN | awk 'NR == 1 {print $5}')
proportion_mapped=$(awk "BEGIN {printf \"%.2f\n\", $mapped_reads*100/$total}")
proportion_unmapped=$(awk "BEGIN {printf \"%.2f\n\", $unmapped_reads*100/$total}")
printf "%s\t%s\t%s\t%s\t%s\t%s\n" $FILE $total $mapped_reads $unmapped_reads $proportion_mapped $proportion_unmapped >> ${FILE}.mappedstats.txt
# remove bam file
#rm -rf /MIGE/01_DATA/02_MAPPING/${FILE}*.{bam,bai}
10 changes: 7 additions & 3 deletions clean.nf
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,9 @@ lib_pairedness = params.input_type == 'illumina' ? 'paired' : 'single'

include { prepare_contamination } from './workflows/prepare_contamination_wf' addParams( tool: tool )
include { check_own as prepare_keep } from './modules/prepare_contamination'

include { clean } from './workflows/clean_wf' addParams( tool: tool, lib_pairedness: lib_pairedness )
include { keep } from './workflows/keep_wf' addParams( tool: tool, lib_pairedness: lib_pairedness )

include { summarize } from './workflows/summarize_wf'
include { qc } from './workflows/qc_wf'

/**************************
Expand All @@ -196,7 +195,12 @@ workflow {
contamination = prepare_contamination.out

clean(input_ch, contamination, nanoControlBedChannel, 'map-to-remove')


if (!params.bbduk) {
bam_sort = clean.out.sort_bam_ch
summarize(bam_sort)
}

if (params.keep){
prepare_keep(keepFastaChannel)
keep_fasta = prepare_keep.out
Expand Down
16 changes: 8 additions & 8 deletions modules/prepare_contamination.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,29 @@ process download_host {
"""
case $host in
hsa)
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' -O host-temp.fa.gz
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' -O host-temp.fa.gz || curl 'ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' -o host-temp.fa.gz
;;
mmu)
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz' -O host-temp.fa.gz
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz' -O host-temp.fa.gz || curl 'ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz' -o host-temp.fa.gz
;;
cli)
wget 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/337/935/GCF_000337935.1_Cliv_1.0/GCF_000337935.1_Cliv_1.0_genomic.fna.gz' -O host-temp.fa.gz
wget 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/337/935/GCF_000337935.1_Cliv_1.0/GCF_000337935.1_Cliv_1.0_genomic.fna.gz' -O host-temp.fa.gz || curl 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/337/935/GCF_000337935.1_Cliv_1.0/GCF_000337935.1_Cliv_1.0_genomic.fna.gz' -o host-temp.fa.gz
;;
csa)
wget 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/409/795/GCF_000409795.2_Chlorocebus_sabeus_1.1/GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic.fna.gz' -O host-temp.fa.gz
wget 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/409/795/GCF_000409795.2_Chlorocebus_sabeus_1.1/GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic.fna.gz' -O host-temp.fa.gz || curl 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/409/795/GCF_000409795.2_Chlorocebus_sabeus_1.1/GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic.fna.gz' -o host-temp.fa.gz
;;
gga)
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz' -O host-temp.fa.gz
wget 'ftp://ftp.ensembl.org/pub/release-99/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz' -O host-temp.fa.gz || curl 'ftp://ftp.ensembl.org/pub/release-99/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz' -o host-temp.fa.gz
;;
eco)
wget 'ftp://ftp.ensemblgenomes.org/pub/release-45/bacteria//fasta/bacteria_90_collection/escherichia_coli_k_12/dna/Escherichia_coli_k_12.ASM80076v1.dna.toplevel.fa.gz' -O host-temp.fa.gz
wget 'ftp://ftp.ensemblgenomes.org/pub/release-45/bacteria//fasta/bacteria_90_collection/escherichia_coli_k_12/dna/Escherichia_coli_k_12.ASM80076v1.dna.toplevel.fa.gz' -O host-temp.fa.gz || curl 'ftp://ftp.ensemblgenomes.org/pub/release-45/bacteria//fasta/bacteria_90_collection/escherichia_coli_k_12/dna/Escherichia_coli_k_12.ASM80076v1.dna.toplevel.fa.gz' -o host-temp.fa.gz
;;
sc2)
wget 'https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3?download=true' -O host-temp.fa
wget 'https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3?download=true' -O host-temp.fa || curl 'https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3?download=true' -o host-temp.fa
gzip host-temp.fa
;;
t2t)
wget 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/914/755/GCA_009914755.4_T2T-CHM13v2.0/GCA_009914755.4_T2T-CHM13v2.0_genomic.fna.gz' -O host-temp.fa.gz
wget 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/914/755/GCA_009914755.4_T2T-CHM13v2.0/GCA_009914755.4_T2T-CHM13v2.0_genomic.fna.gz' -O host-temp.fa.gz || curl 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/914/755/GCA_009914755.4_T2T-CHM13v2.0/GCA_009914755.4_T2T-CHM13v2.0_genomic.fna.gz' -o host-temp.fa.gz
;;
*)
echo "Unknown host ($host)."
Expand Down
83 changes: 83 additions & 0 deletions modules/summarize.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
process BAM_STATISTICS {

label 'minimap2'

publishDir (

path: "${params.output}/intermediate",
mode: params.publish_dir_mode,
pattern: "*.mappedstats.txt",
enabled: !params.no_intermediate,
saveAs: { fn ->
fn.startsWith("keep_") ? "map-to-keep/${fn.replaceAll(~'^keep_', '')}" : "map-to-remove/${fn}"
}
)

input:
tuple val(name), val(type), path(bam)


output:
path("*.mappedstats.txt")


script:
"""
bam_stats.sh ${name}
"""

stub:
"""
touch ${bam.baseName}.mappedstats.txt
"""
}


process COMBINE_BAM_STATISTICS {

label 'minimap2'

publishDir (

path: "${params.output}/intermediate",
mode: params.publish_dir_mode,
pattern: "combined_bam_stats.txt",
enabled: !params.no_intermediate,
saveAs: { fn ->
fn.startsWith("keep_") ? "map-to-keep/${fn.replaceAll(~'^keep_', '')}" : "map-to-remove/${fn}"
}
)

tag { 'combine bam statistics files'}


input:
path(bam_statistics_files)


output:
path("combined_bam_stats.txt")


script:
"""
BAM_STATISTICS_FILES=(${bam_statistics_files})
for index in \${!BAM_STATISTICS_FILES[@]}; do
BAM_STATISTICS_FILE=\${BAM_STATISTICS_FILES[\$index]}
# add header line if first file
if [[ \$index -eq 0 ]]; then
echo "\$(head -1 \${BAM_STATISTICS_FILE})" >> combined_bam_stats.txt
fi
echo "\$(awk 'FNR==2 {print}' \${BAM_STATISTICS_FILE})" >> combined_bam_stats.txt
done
"""

stub:
"""
touch combined_bam_stats.txt
"""
}
8 changes: 5 additions & 3 deletions workflows/clean_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ workflow clean {
flagstats = Channel.empty()
out_reads = bbduk.out.cleaned_reads.concat(bbduk.out.contaminated_reads)
bams_bai = Channel.empty()
} else {

sort_bam_ch = Channel.empty()
}
else {
if ( params.bwa ) {
bwa_index(contamination)
bwa(input, bwa_index.out, contamination) | sort_bam | index_bam | ( idxstats_from_bam & flagstats_from_bam )
Expand All @@ -32,7 +33,6 @@ workflow clean {
minimap2(input, contamination) | sort_bam | index_bam | ( idxstats_from_bam & flagstats_from_bam )
split_bam(minimap2.out.bam)
}

contamination_bam = split_bam.out.mapped
cleaned_bam = split_bam.out.unmapped
if ( params.control && 'dcs' in params.control.split(',') && params.dcs_strict ) {
Expand All @@ -57,6 +57,7 @@ workflow clean {
idxstats = idxstats_from_bam.out
flagstats = flagstats_from_bam.out
out_reads = fastq_from_bam.out
sort_bam_ch = sort_bam.out
}

emit:
Expand All @@ -65,4 +66,5 @@ workflow clean {
flagstats
out_reads
bams_bai
sort_bam_ch
}
23 changes: 23 additions & 0 deletions workflows/summarize_wf.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
include { BAM_STATISTICS; COMBINE_BAM_STATISTICS } from '../modules/summarize'

workflow summarize {

take:
sorted_bam

main:
BAM_STATISTICS(sorted_bam)

collected_bam_statistics_ch = BAM_STATISTICS.out.collect( sort: {a, b -> a[0].getBaseName() <=> b[0].getBaseName()} )


COMBINE_BAM_STATISTICS(collected_bam_statistics_ch)

// define output
bamstats = BAM_STATISTICS.out
summarystats = COMBINE_BAM_STATISTICS.out

emit:
bamstats
summarystats
}

0 comments on commit 61c9f31

Please sign in to comment.