Skip to content

Commit

Permalink
Merge pull request #2 from jfy133/upstream-for-indel
Browse files Browse the repository at this point in the history
Upstream
  • Loading branch information
jfy133 authored Jun 8, 2022
2 parents de88adc + 1fc0d3d commit e9c2965
Show file tree
Hide file tree
Showing 92 changed files with 1,812 additions and 254 deletions.
1 change: 0 additions & 1 deletion modules/bamtools/convert/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
## TODO nf-core: Delete / customise this example output
- out:
type: file
description: The data in the asked format (bed, fasta, fastq, json, pileup, sam, yaml)
Expand Down
2 changes: 1 addition & 1 deletion modules/bcftools/concat/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process BCFTOOLS_CONCAT {
'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }"

input:
tuple val(meta), path(vcfs)
tuple val(meta), path(vcfs), path(tbi)

output:
tuple val(meta), path("*.gz"), emit: vcf
Expand Down
5 changes: 5 additions & 0 deletions modules/bcftools/concat/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ input:
description: |
List containing 2 or more vcf files
e.g. [ 'file1.vcf', 'file2.vcf' ]
- tbi:
type: files
description: |
List containing 2 or more index files (optional)
e.g. [ 'file1.tbi', 'file2.tbi' ]
output:
- meta:
type: map
Expand Down
2 changes: 1 addition & 1 deletion modules/bcftools/roh/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process BCFTOOLS_ROH {

input:
tuple val(meta), path(vcf), path(tbi)
path af_file
tuple path(af_file), path(af_file_tbi)
path genetic_map
path regions_file
path samples_file
Expand Down
3 changes: 3 additions & 0 deletions modules/bcftools/roh/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ input:
- af_file:
type: file
description: "Read allele frequencies from a tab-delimited file containing the columns: CHROM\tPOS\tREF,ALT\tAF."
- af_file_tbi:
type: file
description: "tbi index of af_file."
- genetic_map:
type: file
description: "Genetic map in the format required also by IMPUTE2."
Expand Down
180 changes: 150 additions & 30 deletions modules/cnvkit/batch/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ process CNVKIT_BATCH {
input:
tuple val(meta), path(tumor), path(normal)
path fasta
path fasta_fai
path targets
path reference

Expand All @@ -28,48 +29,167 @@ process CNVKIT_BATCH {
script:
def args = task.ext.args ?: ''

def tumor_exists = tumor ? true : false
def normal_exists = normal ? true : false

// execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow
// input pair is assumed to have same extension if both exist
def is_cram = tumor.Extension == "cram" ? true : false
def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}"
def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false
def normal_cram = normal_exists && normal.Extension == "cram" ? true : false
def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false
def normal_bam = normal_exists && normal.Extension == "bam" ? true : false

def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}"

// do not run samtools on normal samples in tumor_only mode
def normal_exists = normal ? true: false
// tumor_only mode does not need fasta & target
// instead it requires a pre-computed reference.cnn which is built from fasta & target
def (normal_out, normal_args, fasta_args) = ["", "", ""]
def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : ""

if (normal_exists){
def normal_prefix = normal.BaseName
normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}"
normal_args = normal_prefix ? "--normal $normal_out" : ""
normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}"
fasta_args = fasta ? "--fasta $fasta" : ""

// germline mode
// normal samples must be input without a flag
// requires flag --normal to be empty []
if(!tumor_exists){
tumor_out = "${normal_prefix}" + ".bam"
normal_args = "--normal "
}
// somatic mode
else {
normal_args = normal_prefix ? "--normal $normal_out" : ""
}
}

def target_args = targets ? "--targets $targets" : ""
def reference_args = reference ? "--reference $reference" : ""

"""
if $is_cram; then
samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out
if $normal_exists; then
samtools view -T $fasta $normal -@ $task.cpus -o $normal_out
fi
fi
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
// somatic_mode cram_input
if (tumor_cram && normal_cram){
"""
samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out
samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// somatic_mode bam_input
else if (tumor_bam && normal_bam){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// tumor_only_mode cram_input
else if(tumor_cram && !normal_exists){
"""
samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// tumor_only bam_input
else if(tumor_bam && !normal_exists){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// germline mode cram_input
// normal_args must be --normal []
else if (normal_cram && !tumor_exists){
"""
samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $tumor_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// germline mode bam_input
else if (normal_bam && !tumor_exists){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}

}
4 changes: 4 additions & 0 deletions modules/cnvkit/batch/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ input:
type: file
description: |
Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided)
- fasta_fai:
type: file
description: |
Input reference genome fasta index (optional, but recommended for cram_input)
- targetfile:
type: file
description: |
Expand Down
60 changes: 45 additions & 15 deletions modules/deeptools/bamcoverage/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ process DEEPTOOLS_BAMCOVERAGE {
tag "$meta.id"
label 'process_low'

conda (params.enable_conda ? "bioconda::deeptools=3.5.1" : null)
conda (params.enable_conda ? "bioconda::deeptools=3.5.1 bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0':
'quay.io/biocontainers/deeptools:3.5.1--py_0' }"
'https://depot.galaxyproject.org/singularity/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0':
'quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0' }"

input:
tuple val(meta), path(input), path(input_index)
path(fasta)
path(fasta_fai)

output:
tuple val(meta), path("*.bigWig") , emit: bigwig, optional: true
Expand All @@ -22,16 +24,44 @@ process DEEPTOOLS_BAMCOVERAGE {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}.bigWig"

"""
bamCoverage \\
--bam $input \\
$args \\
--numberOfProcessors ${task.cpus} \\
--outFileName ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g")
END_VERSIONS
"""
// cram_input is currently not working with deeptools
// therefore it's required to convert cram to bam first
def is_cram = input.Extension == "cram" ? true : false
def input_out = is_cram ? input.BaseName + ".bam" : "${input}"
def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : ""

if (is_cram){
"""
samtools view -T $fasta $input $fai_reference -@ $task.cpus -o $input_out
samtools index -b $input_out -@ $task.cpus
bamCoverage \\
--bam $input_out \\
$args \\
--numberOfProcessors ${task.cpus} \\
--outFileName ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g")
END_VERSIONS
"""

}
else {
"""
bamCoverage \\
--bam $input_out \\
$args \\
--numberOfProcessors ${task.cpus} \\
--outFileName ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g")
END_VERSIONS
"""
}

}
9 changes: 9 additions & 0 deletions modules/deeptools/bamcoverage/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ input:
type: file
description: BAM/CRAM index file
pattern: "*.{bai,crai}"
- fasta:
type: file
description: Reference file the CRAM file was created with (required with CRAM input)
pattern: "*.{fasta,fa}"
- fasta_fai:
type: file
description: Index of the reference file (optional, but recommended)
pattern: "*.{fai}"

output:
- meta:
Expand All @@ -47,3 +55,4 @@ output:

authors:
- "@FriederikeHanssen"
- "@SusiJo"
4 changes: 2 additions & 2 deletions modules/ensemblvep/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a
# Setup default ARG variables
ARG GENOME=GRCh38
ARG SPECIES=homo_sapiens
ARG VEP_VERSION=104
ARG VEP_TAG=104.3
ARG VEP_VERSION=105
ARG VEP_TAG=105.0

# Add conda installation dir to PATH (instead of doing 'conda activate')
ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH
Expand Down
12 changes: 6 additions & 6 deletions modules/ensemblvep/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ build_push() {
docker push nfcore/vep:${VEP_TAG}.${GENOME}
}

build_push "GRCh37" "homo_sapiens" "104" "104.3"
build_push "GRCh38" "homo_sapiens" "104" "104.3"
build_push "GRCm38" "mus_musculus" "102" "104.3"
build_push "GRCm39" "mus_musculus" "104" "104.3"
build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3"
build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3"
build_push "GRCh37" "homo_sapiens" "105" "105.0"
build_push "GRCh38" "homo_sapiens" "105" "105.0"
build_push "GRCm38" "mus_musculus" "102" "105.0"
build_push "GRCm39" "mus_musculus" "105" "105.0"
build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0"
build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0"
Loading

0 comments on commit e9c2965

Please sign in to comment.