Skip to content

Commit

Permalink
Merge pull request nf-core#568 from MaxUlysse/vcftools
Browse files Browse the repository at this point in the history
Add VCFtools
  • Loading branch information
Szilveszter Juhos authored Apr 16, 2018
2 parents 81cc16e + f722f79 commit 68f0d74
Show file tree
Hide file tree
Showing 13 changed files with 161 additions and 18 deletions.
5 changes: 3 additions & 2 deletions buildContainers.nf
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def defineContainersList(){
'snpeff',
'snpeffgrch37',
'snpeffgrch38',
'vcftools',
'vepgrch37',
'vepgrch38'
]
Expand All @@ -211,8 +212,8 @@ def helpMessage() {
log.info " Default: all"
log.info " Possible values:"
log.info " all, fastqc, freebayes, gatk, igvtools, multiqc, mutect1"
log.info " picard, qualimap, r-base, runallelecount, sarek"
log.info " snpeff, snpeffgrch37, snpeffgrch38, vepgrch37, vepgrch38"
log.info " picard, qualimap, r-base, runallelecount, sarek, snpeff"
log.info " snpeffgrch37, snpeffgrch38, vcftools, vepgrch37, vepgrch38"
log.info " --docker: Build containers using Docker"
log.info " --help"
log.info " you're reading it"
Expand Down
1 change: 1 addition & 0 deletions configuration/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@ process {
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
$RunStrelka.container = "${params.repository}/sarek:${params.tag}"
$RunStrelkaBP.container = "${params.repository}/sarek:${params.tag}"
$RunVcftools.container = "${params.repository}/vcftools:${params.tag}"
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
}
1 change: 1 addition & 0 deletions configuration/singularity-path.config
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,6 @@ process {
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
$RunStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunStrelkaBP.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunVcftools.container = "${params.containerPath}/vcftools-${params.tag}.img"
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
}
15 changes: 15 additions & 0 deletions containers/vcftools/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM nfcore/base:latest

LABEL \
author="Maxime Garcia" \
description="vcftools image used in Sarek 2.0" \
maintainer="[email protected]"

COPY environment.yml /

RUN \
conda env create -f /environment.yml && \
conda clean -a

# Export PATH
ENV PATH /opt/conda/envs/sarek-vcftools-2.0/bin:$PATH
9 changes: 9 additions & 0 deletions containers/vcftools/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# You can use this file to create a conda environment:
# conda env create -f environment.yml
name: sarek-vcftools-2.0
channels:
- defaults
- conda-forge
- bioconda
dependencies:
- vcftools=0.1.15
1 change: 1 addition & 0 deletions doc/BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ nextflow run . [--docker] [--singularity] [--containerPath <path>] [--push] [--c
- `snpeff` this container serves as a base for `snpeffgrch37` and `snpeffgrch38`
- `snpeffgrch37`
- `snpeffgrch38`
- `vcftools`
- `vepgrch37`
- `vepgrch38`

Expand Down
11 changes: 11 additions & 0 deletions doc/CONTAINERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ For processing + germline variant calling + Reports:
- [picard](#picard-)
- [qualimap](#qualimap-)
- [sarek](#sarek-)
- [vcftools](#vcftools-)

For processing + somatic variant calling + Reports:
- [fastqc](#fastqc-)
Expand All @@ -21,6 +22,7 @@ For processing + somatic variant calling + Reports:
- [r-base](#r-base-)
- [runallelecount](#runallelecount-)
- [sarek](#sarek-)
- [vcftools](#vcftools-)

For annotation for GRCh37, you will need:
- [snpeffgrch37](#snpeffgrch37-)
Expand Down Expand Up @@ -104,6 +106,12 @@ A container named after the process is made for each process. If a container can
- Contain **[snpEff][snpeff-link]** 4.3i
- Contain GRCh38.86

## vcftools [![vcftools-docker status][vcftools-docker-badge]][vcftools-docker-link]

- Based on `nfcore/base:latest`
- Contain **[vcftools][vcftools-link]** 0.1.15


## vepgrch37 [![vepgrch37-docker status][vepgrch37-docker-badge]][vepgrch37-docker-link]

- Based on `willmclaren/ensembl-vep:release_90.6`
Expand Down Expand Up @@ -169,6 +177,9 @@ A container named after the process is made for each process. If a container can
[snpeffgrch38-docker-badge]: https://img.shields.io/docker/automated/maxulysse/snpeffgrch38.svg
[snpeffgrch38-docker-link]: https://hub.docker.com/r/maxulysse/snpeffgrch38
[strelka-link]: https://github.com/Illumina/strelka
[vcftools-docker-badge]: https://img.shields.io/docker/automated/maxulysse/vcftools.svg
[vcftools-docker-link]: https://hub.docker.com/r/maxulysse/vcftools
[vcftools-link]: https://vcftools.github.io/index.html
[vep-docker-badge]: https://img.shields.io/docker/automated/maxulysse/vep.svg
[vep-docker-link]: https://hub.docker.com/r/maxulysse/vep
[vep-link]: https://github.com/Ensembl/ensembl-vep
Expand Down
1 change: 1 addition & 0 deletions doc/PROCESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ We divide them for the moment into 5 main steps:
- RunSamtoolsStats - Run Samtools stats on recalibrated BAM files
- RunBamQC - Run qualimap BamQC on recalibrated BAM files
- RunBcftoolsStats - Run BCFTools stats on vcf files
- RunVcftools - Run VCFTools on vcf files

## Annotation:

Expand Down
63 changes: 57 additions & 6 deletions germlineVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,8 @@ process ConcatVCF {
file(genomeIndex) from Channel.value(referenceMap.genomeIndex)

output:
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated
file("*.vcf.gz.tbi") into vcfConcatenatedTbi
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated


when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC

Expand Down Expand Up @@ -453,8 +453,9 @@ process ConcatVCF {

if (params.verbose) vcfConcatenated = vcfConcatenated.view {
"Variant Calling output:\n\
Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\
File : ${it[4].fileName}"
Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\
Files : ${it[4].fileName}\n\
Index : ${it[5].fileName}"
}

process RunSingleStrelka {
Expand Down Expand Up @@ -549,7 +550,11 @@ if (params.verbose) singleMantaOutput = singleMantaOutput.view {
Index : ${it[4].fileName}"
}

vcfForBCFtools = Channel.empty().mix(
vcfForQC = Channel.empty().mix(
vcfConcatenated.map {
variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi ->
[variantcaller, vcf]
},
singleStrelkaOutput.map {
variantcaller, idPatient, idSample, vcf, tbi ->
[variantcaller, vcf[1]]
Expand All @@ -559,6 +564,8 @@ vcfForBCFtools = Channel.empty().mix(
[variantcaller, vcf[2]]
})

(vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2)

process RunBcftoolsStats {
tag {vcf}

Expand All @@ -585,6 +592,49 @@ if (params.verbose) bcfReport = bcfReport.view {

bcfReport.close()

process RunVcftools {
tag {vcf}

publishDir directoryMap.vcftools, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport

when: !params.noReports

script:
"""
vcftools \
--gzvcf ${vcf} \
--relatedness2 \
--out ${vcf.baseName}
vcftools \
--gzvcf ${vcf} \
--TsTv-by-count \
--out ${vcf.baseName}
vcftools \
--gzvcf ${vcf} \
--TsTv-by-qual \
--out ${vcf.baseName}
vcftools \
--gzvcf ${vcf} \
--FILTER-summary \
--out ${vcf.baseName}
"""
}

if (params.verbose) vcfReport = vcfReport.view {
"VCFTools stats report:\n\
File : [${it.fileName}]"
}

vcfReport.close()
/*
================================================================================
= F U N C T I O N S =
Expand Down Expand Up @@ -646,10 +696,11 @@ def defineDirectoryMap() {
'bamQC' : "${params.outDir}/Reports/bamQC",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
'vcftools' : "${params.outDir}/Reports/VCFTools",
'ascat' : "${params.outDir}/VariantCalling/Ascat",
'freebayes' : "${params.outDir}/VariantCalling/FreeBayes",
'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
'gvcf-hc' : "${params.outDir}/VariantCalling/HaplotypeCallerGVCF",
'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
'manta' : "${params.outDir}/VariantCalling/Manta",
'mutect1' : "${params.outDir}/VariantCalling/MuTect1",
'mutect2' : "${params.outDir}/VariantCalling/MuTect2",
Expand Down
7 changes: 5 additions & 2 deletions runMultiQC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ process GenerateMultiQCconfig {
echo "- 'samtools'" >> multiqc_config.yaml
echo "- 'qualimap'" >> multiqc_config.yaml
echo "- 'bcftools'" >> multiqc_config.yaml
echo "- 'vcftools'" >> multiqc_config.yaml
echo "- 'snpeff'" >> multiqc_config.yaml
"""
}
Expand All @@ -106,6 +107,7 @@ reportsForMultiQC = Channel.empty()
Channel.fromPath("${directoryMap.markDuplicatesQC}/*"),
Channel.fromPath("${directoryMap.samtoolsStats}/*"),
Channel.fromPath("${directoryMap.snpeffReports}/*"),
Channel.fromPath("${directoryMap.vcftools}/*"),
multiQCconfig
).collect()

Expand Down Expand Up @@ -148,10 +150,11 @@ def defineDirectoryMap() {
'bamQC' : "${params.outDir}/Reports/bamQC",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'fastQC' : "${params.outDir}/Reports/FastQC",
'snpeffReports' : "${params.outDir}/Reports/SnpEff",
'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates",
'multiQC' : "${params.outDir}/Reports/MultiQC",
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats"
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
'snpeffReports' : "${params.outDir}/Reports/SnpEff",
'vcftools' : "${params.outDir}/Reports/VCFTools"
]
}

Expand Down
8 changes: 4 additions & 4 deletions scripts/do_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ do
esac
done

if [ $GENOME = smallGRCh37 ]
if [[ $GENOME = smallGRCh37 ]]
then
$GENOME = GRCh37
fi
Expand All @@ -55,10 +55,10 @@ function toLower() {
echo $1 | tr '[:upper:]' '[:lower:]'
}

if [ $TOOL = docker ] && [ GRCh37,GRCh38 =~ $GENOME ]
if [[ $TOOL = docker ]] && [[ GRCh37,GRCh38 =~ $GENOME ]]
then
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff,vcftools
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME})
else
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME})
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vcftools,vep$(toLower ${GENOME})
fi
2 changes: 1 addition & 1 deletion scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ fi

if [[ ALL,STEP =~ $TEST ]]
then
run_wrapper --germline --sample $SAMPLE
run_wrapper --germline --sampleDir data/tiny/tiny/normal
run_wrapper --germline --step realign --noReports
run_wrapper --germline --step recalibrate --noReports
clean_repo
Expand Down
55 changes: 52 additions & 3 deletions somaticVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,7 @@ process ConcatVCF {
file(genomeIndex) from Channel.value(referenceMap.genomeIndex)

output:
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated
file("*.vcf.gz.tbi") into vcfConcatenatedTbi
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated

when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC

Expand Down Expand Up @@ -777,7 +776,11 @@ if (params.verbose) ascatOutput = ascatOutput.view {
(strelkaIndels, strelkaSNVS) = strelkaOutput.into(2)
(mantaSomaticSV, mantaDiploidSV) = mantaOutput.into(2)

vcfForBCFtools = Channel.empty().mix(
vcfForQC = Channel.empty().mix(
vcfConcatenated.map {
variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi ->
[variantcaller, vcf]
},
mantaDiploidSV.map {
variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi ->
[variantcaller, vcf[2]]
Expand All @@ -799,6 +802,8 @@ vcfForBCFtools = Channel.empty().mix(
[variantcaller, vcf[1]]
})

(vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2)

process RunBcftoolsStats {
tag {vcf}

Expand All @@ -825,6 +830,49 @@ if (params.verbose) bcfReport = bcfReport.view {

bcfReport.close()

process RunVcftools {
tag {vcf}

publishDir directoryMap.vcftools, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport

when: !params.noReports

script:
"""
vcftools \
--gzvcf ${vcf} \
--relatedness2 \
--out ${vcf.baseName}
vcftools \
--gzvcf ${vcf} \
--TsTv-by-count \
--out ${vcf.baseName}
vcftools \
--gzvcf ${vcf} \
--TsTv-by-qual \
--out ${vcf.baseName}
vcftools \
--gzvcf ${vcf} \
--FILTER-summary \
--out ${vcf.baseName}
"""
}

if (params.verbose) vcfReport = vcfReport.view {
"VCFTools stats report:\n\
File : [${it.fileName}]"
}

vcfReport.close()
/*
================================================================================
= F U N C T I O N S =
Expand Down Expand Up @@ -886,6 +934,7 @@ def defineDirectoryMap() {
'bamQC' : "${params.outDir}/Reports/bamQC",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
'vcftools' : "${params.outDir}/Reports/VCFTools",
'ascat' : "${params.outDir}/VariantCalling/Ascat",
'freebayes' : "${params.outDir}/VariantCalling/FreeBayes",
'manta' : "${params.outDir}/VariantCalling/Manta",
Expand Down

0 comments on commit 68f0d74

Please sign in to comment.