Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Add VCFtools #568

Merged
merged 9 commits into from
Apr 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions buildContainers.nf
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def defineContainersList(){
'snpeff',
'snpeffgrch37',
'snpeffgrch38',
'vcftools',
'vepgrch37',
'vepgrch38'
]
Expand All @@ -211,8 +212,8 @@ def helpMessage() {
log.info " Default: all"
log.info " Possible values:"
log.info " all, fastqc, freebayes, gatk, igvtools, multiqc, mutect1"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be mutect2 I guess

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, mutect2 is shipped with the GATK container

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IC, of course. Shall I merge this first, or shall I wait for #567 ?

log.info " picard, qualimap, r-base, runallelecount, sarek"
log.info " snpeff, snpeffgrch37, snpeffgrch38, vepgrch37, vepgrch38"
log.info " picard, qualimap, r-base, runallelecount, sarek, snpeff"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have fastqc, multiqc, qualimap, vcftools, as separate containers: these are all QC tools, is it possible to keep them separate, or can we merge these containers sometime?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That could be a good idea, I'll think about it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

log.info " snpeffgrch37, snpeffgrch38, vcftools, vepgrch37, vepgrch38"
log.info " --docker: Build containers using Docker"
log.info " --help"
log.info " you're reading it"
Expand Down
1 change: 1 addition & 0 deletions configuration/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@ process {
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
$RunStrelka.container = "${params.repository}/sarek:${params.tag}"
$RunStrelkaBP.container = "${params.repository}/sarek:${params.tag}"
$RunVcftools.container = "${params.repository}/vcftools:${params.tag}"
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
}
1 change: 1 addition & 0 deletions configuration/singularity-path.config
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,6 @@ process {
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
$RunStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunStrelkaBP.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunVcftools.container = "${params.containerPath}/vcftools-${params.tag}.img"
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
}
15 changes: 15 additions & 0 deletions containers/vcftools/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM nfcore/base:latest

LABEL \
author="Maxime Garcia" \
description="vcftools image used in Sarek 2.0" \
maintainer="[email protected]"

COPY environment.yml /

RUN \
conda env create -f /environment.yml && \
conda clean -a

# Export PATH
ENV PATH /opt/conda/envs/sarek-vcftools-2.0/bin:$PATH
9 changes: 9 additions & 0 deletions containers/vcftools/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# You can use this file to create a conda environment:
# conda env create -f environment.yml
name: sarek-vcftools-2.0
channels:
- defaults
- conda-forge
- bioconda
dependencies:
- vcftools=0.1.15
1 change: 1 addition & 0 deletions doc/BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ nextflow run . [--docker] [--singularity] [--containerPath <path>] [--push] [--c
- `snpeff` this container serves as a base for `snpeffgrch37` and `snpeffgrch38`
- `snpeffgrch37`
- `snpeffgrch38`
- `vcftools`
- `vepgrch37`
- `vepgrch38`

Expand Down
11 changes: 11 additions & 0 deletions doc/CONTAINERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ For processing + germline variant calling + Reports:
- [picard](#picard-)
- [qualimap](#qualimap-)
- [sarek](#sarek-)
- [vcftools](#vcftools-)

For processing + somatic variant calling + Reports:
- [fastqc](#fastqc-)
Expand All @@ -21,6 +22,7 @@ For processing + somatic variant calling + Reports:
- [r-base](#r-base-)
- [runallelecount](#runallelecount-)
- [sarek](#sarek-)
- [vcftools](#vcftools-)

For annotation for GRCh37, you will need:
- [snpeffgrch37](#snpeffgrch37-)
Expand Down Expand Up @@ -104,6 +106,12 @@ A container named after the process is made for each process. If a container can
- Contain **[snpEff][snpeff-link]** 4.3i
- Contain GRCh38.86

## vcftools [![vcftools-docker status][vcftools-docker-badge]][vcftools-docker-link]

- Based on `nfcore/base:latest`
- Contain **[vcftools][vcftools-link]** 0.1.15


## vepgrch37 [![vepgrch37-docker status][vepgrch37-docker-badge]][vepgrch37-docker-link]

- Based on `willmclaren/ensembl-vep:release_90.6`
Expand Down Expand Up @@ -169,6 +177,9 @@ A container named after the process is made for each process. If a container can
[snpeffgrch38-docker-badge]: https://img.shields.io/docker/automated/maxulysse/snpeffgrch38.svg
[snpeffgrch38-docker-link]: https://hub.docker.com/r/maxulysse/snpeffgrch38
[strelka-link]: https://github.com/Illumina/strelka
[vcftools-docker-badge]: https://img.shields.io/docker/automated/maxulysse/vcftools.svg
[vcftools-docker-link]: https://hub.docker.com/r/maxulysse/vcftools
[vcftools-link]: https://vcftools.github.io/index.html
[vep-docker-badge]: https://img.shields.io/docker/automated/maxulysse/vep.svg
[vep-docker-link]: https://hub.docker.com/r/maxulysse/vep
[vep-link]: https://github.com/Ensembl/ensembl-vep
Expand Down
1 change: 1 addition & 0 deletions doc/PROCESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ We divide them for the moment into 5 main steps:
- RunSamtoolsStats - Run Samtools stats on recalibrated BAM files
- RunBamQC - Run qualimap BamQC on recalibrated BAM files
- RunBcftoolsStats - Run BCFTools stats on vcf files
- RunVcftools - Run VCFTools on vcf files

## Annotation:

Expand Down
63 changes: 57 additions & 6 deletions germlineVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,8 @@ process ConcatVCF {
file(genomeIndex) from Channel.value(referenceMap.genomeIndex)

output:
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated
file("*.vcf.gz.tbi") into vcfConcatenatedTbi
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated


when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC

Expand Down Expand Up @@ -453,8 +453,9 @@ process ConcatVCF {

if (params.verbose) vcfConcatenated = vcfConcatenated.view {
"Variant Calling output:\n\
Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\
File : ${it[4].fileName}"
Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\
Files : ${it[4].fileName}\n\
Index : ${it[5].fileName}"
}

process RunSingleStrelka {
Expand Down Expand Up @@ -549,7 +550,11 @@ if (params.verbose) singleMantaOutput = singleMantaOutput.view {
Index : ${it[4].fileName}"
}

vcfForBCFtools = Channel.empty().mix(
vcfForQC = Channel.empty().mix(
vcfConcatenated.map {
variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi ->
[variantcaller, vcf]
},
singleStrelkaOutput.map {
variantcaller, idPatient, idSample, vcf, tbi ->
[variantcaller, vcf[1]]
Expand All @@ -559,6 +564,8 @@ vcfForBCFtools = Channel.empty().mix(
[variantcaller, vcf[2]]
})

(vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2)

process RunBcftoolsStats {
tag {vcf}

Expand All @@ -585,6 +592,49 @@ if (params.verbose) bcfReport = bcfReport.view {

bcfReport.close()

process RunVcftools {
tag {vcf}

publishDir directoryMap.vcftools, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport

when: !params.noReports

script:
"""
vcftools \
--gzvcf ${vcf} \
--relatedness2 \
--out ${vcf.baseName}

vcftools \
--gzvcf ${vcf} \
--TsTv-by-count \
--out ${vcf.baseName}

vcftools \
--gzvcf ${vcf} \
--TsTv-by-qual \
--out ${vcf.baseName}

vcftools \
--gzvcf ${vcf} \
--FILTER-summary \
--out ${vcf.baseName}
"""
}

if (params.verbose) vcfReport = vcfReport.view {
"VCFTools stats report:\n\
File : [${it.fileName}]"
}

vcfReport.close()
/*
================================================================================
= F U N C T I O N S =
Expand Down Expand Up @@ -646,10 +696,11 @@ def defineDirectoryMap() {
'bamQC' : "${params.outDir}/Reports/bamQC",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
'vcftools' : "${params.outDir}/Reports/VCFTools",
'ascat' : "${params.outDir}/VariantCalling/Ascat",
'freebayes' : "${params.outDir}/VariantCalling/FreeBayes",
'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
'gvcf-hc' : "${params.outDir}/VariantCalling/HaplotypeCallerGVCF",
'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
'manta' : "${params.outDir}/VariantCalling/Manta",
'mutect1' : "${params.outDir}/VariantCalling/MuTect1",
'mutect2' : "${params.outDir}/VariantCalling/MuTect2",
Expand Down
7 changes: 5 additions & 2 deletions runMultiQC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ process GenerateMultiQCconfig {
echo "- 'samtools'" >> multiqc_config.yaml
echo "- 'qualimap'" >> multiqc_config.yaml
echo "- 'bcftools'" >> multiqc_config.yaml
echo "- 'vcftools'" >> multiqc_config.yaml
echo "- 'snpeff'" >> multiqc_config.yaml
"""
}
Expand All @@ -106,6 +107,7 @@ reportsForMultiQC = Channel.empty()
Channel.fromPath("${directoryMap.markDuplicatesQC}/*"),
Channel.fromPath("${directoryMap.samtoolsStats}/*"),
Channel.fromPath("${directoryMap.snpeffReports}/*"),
Channel.fromPath("${directoryMap.vcftools}/*"),
multiQCconfig
).collect()

Expand Down Expand Up @@ -148,10 +150,11 @@ def defineDirectoryMap() {
'bamQC' : "${params.outDir}/Reports/bamQC",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'fastQC' : "${params.outDir}/Reports/FastQC",
'snpeffReports' : "${params.outDir}/Reports/SnpEff",
'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates",
'multiQC' : "${params.outDir}/Reports/MultiQC",
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats"
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
'snpeffReports' : "${params.outDir}/Reports/SnpEff",
'vcftools' : "${params.outDir}/Reports/VCFTools"
]
}

Expand Down
8 changes: 4 additions & 4 deletions scripts/do_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ do
esac
done

if [ $GENOME = smallGRCh37 ]
if [[ $GENOME = smallGRCh37 ]]
then
$GENOME = GRCh37
fi
Expand All @@ -55,10 +55,10 @@ function toLower() {
echo $1 | tr '[:upper:]' '[:lower:]'
}

if [ $TOOL = docker ] && [ GRCh37,GRCh38 =~ $GENOME ]
if [[ $TOOL = docker ]] && [[ GRCh37,GRCh38 =~ $GENOME ]]
then
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff,vcftools
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME})
else
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME})
nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vcftools,vep$(toLower ${GENOME})
fi
2 changes: 1 addition & 1 deletion scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ fi

if [[ ALL,STEP =~ $TEST ]]
then
run_wrapper --germline --sample $SAMPLE
run_wrapper --germline --sampleDir data/tiny/tiny/normal
run_wrapper --germline --step realign --noReports
run_wrapper --germline --step recalibrate --noReports
clean_repo
Expand Down
55 changes: 52 additions & 3 deletions somaticVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,7 @@ process ConcatVCF {
file(genomeIndex) from Channel.value(referenceMap.genomeIndex)

output:
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated
file("*.vcf.gz.tbi") into vcfConcatenatedTbi
set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated

when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC

Expand Down Expand Up @@ -777,7 +776,11 @@ if (params.verbose) ascatOutput = ascatOutput.view {
(strelkaIndels, strelkaSNVS) = strelkaOutput.into(2)
(mantaSomaticSV, mantaDiploidSV) = mantaOutput.into(2)

vcfForBCFtools = Channel.empty().mix(
vcfForQC = Channel.empty().mix(
vcfConcatenated.map {
variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi ->
[variantcaller, vcf]
},
mantaDiploidSV.map {
variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi ->
[variantcaller, vcf[2]]
Expand All @@ -799,6 +802,8 @@ vcfForBCFtools = Channel.empty().mix(
[variantcaller, vcf[1]]
})

(vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2)

process RunBcftoolsStats {
tag {vcf}

Expand All @@ -825,6 +830,49 @@ if (params.verbose) bcfReport = bcfReport.view {

bcfReport.close()

process RunVcftools {
tag {vcf}

publishDir directoryMap.vcftools, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport

when: !params.noReports

script:
"""
vcftools \
--gzvcf ${vcf} \
--relatedness2 \
--out ${vcf.baseName}

vcftools \
--gzvcf ${vcf} \
--TsTv-by-count \
--out ${vcf.baseName}

vcftools \
--gzvcf ${vcf} \
--TsTv-by-qual \
--out ${vcf.baseName}

vcftools \
--gzvcf ${vcf} \
--FILTER-summary \
--out ${vcf.baseName}
"""
}

if (params.verbose) vcfReport = vcfReport.view {
"VCFTools stats report:\n\
File : [${it.fileName}]"
}

vcfReport.close()
/*
================================================================================
= F U N C T I O N S =
Expand Down Expand Up @@ -886,6 +934,7 @@ def defineDirectoryMap() {
'bamQC' : "${params.outDir}/Reports/bamQC",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats",
'vcftools' : "${params.outDir}/Reports/VCFTools",
'ascat' : "${params.outDir}/VariantCalling/Ascat",
'freebayes' : "${params.outDir}/VariantCalling/FreeBayes",
'manta' : "${params.outDir}/VariantCalling/Manta",
Expand Down