Skip to content

Commit

Permalink
Merge pull request #66 from hoelzer/fix/concat-carefully
Browse files Browse the repository at this point in the history
Avoid zcat'ing *.gz when output file is also a .gz file.
  • Loading branch information
MarieLataretu authored Nov 15, 2023
2 parents 91865cb + 307ea83 commit e87a382
Showing 1 changed file with 26 additions and 25 deletions.
51 changes: 26 additions & 25 deletions modules/prepare_contamination.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,31 @@ process download_host {

script:
"""
if [ $host == 'hsa' ]; then
wget ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
zcat *.gz | bgzip -@ ${task.cpus} -c > ${host}.fa.gz
fi
if [ $host == 'mmu' ]; then
wget ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz
zcat *.gz | bgzip -@ ${task.cpus} -c > ${host}.fa.gz
fi
if [ $host == 'cli' ]; then
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/337/935/GCF_000337935.1_Cliv_1.0/GCF_000337935.1_Cliv_1.0_genomic.fna.gz
zcat *.gz | bgzip -@ ${task.cpus} -c > ${host}.fa.gz
fi
if [ $host == 'csa' ]; then
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/409/795/GCF_000409795.2_Chlorocebus_sabeus_1.1/GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic.fna.gz
zcat *.gz | bgzip -@ ${task.cpus} -c > ${host}.fa.gz
fi
if [ $host == 'gga' ]; then
wget ftp://ftp.ensembl.org/pub/release-99/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz
zcat *.gz | bgzip -@ ${task.cpus} -c > ${host}.fa.gz
fi
if [ $host == 'eco' ]; then
wget ftp://ftp.ensemblgenomes.org/pub/release-45/bacteria//fasta/bacteria_90_collection/escherichia_coli_k_12/dna/Escherichia_coli_k_12.ASM80076v1.dna.toplevel.fa.gz
zcat *.gz | bgzip -@ ${task.cpus} -c > ${host}.fa.gz
fi
case $host in
hsa)
wget ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz -O host-temp.fa.gz
;;
mmu)
wget ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz -O host-temp.fa.gz
;;
cli)
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/337/935/GCF_000337935.1_Cliv_1.0/GCF_000337935.1_Cliv_1.0_genomic.fna.gz -O host-temp.fa.gz
;;
csa)
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/409/795/GCF_000409795.2_Chlorocebus_sabeus_1.1/GCF_000409795.2_Chlorocebus_sabeus_1.1_genomic.fna.gz -O host-temp.fa.gz
;;
gga)
wget ftp://ftp.ensembl.org/pub/release-99/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz -O host-temp.fa.gz
;;
eco)
wget ftp://ftp.ensemblgenomes.org/pub/release-45/bacteria//fasta/bacteria_90_collection/escherichia_coli_k_12/dna/Escherichia_coli_k_12.ASM80076v1.dna.toplevel.fa.gz -O host-temp.fa.gz
;;
*)
echo "Unknown host ($host)."
;;
esac
zcat host-temp.fa.gz | bgzip -@ ${task.cpus} -c > ${host}.fa.gz
"""
stub:
"""
Expand Down Expand Up @@ -108,4 +109,4 @@ process concat_contamination {
"""
touch db.fa.gz db.fa.fai
"""
}
}

0 comments on commit e87a382

Please sign in to comment.