Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace some params #324

Merged
merged 2 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
profile: [test_split_fastq, test_targeted, test_trimming, test_no_gatk_spark, test_umi_tso, test_umi_qiaseq]
profile: [test_split_fastq, test_targeted, test_trimming, test_use_gatk_spark, test_umi_tso, test_umi_qiaseq]
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/sarek -profile test_no_gatk_spark
* nextflow run nf-core/sarek -profile test_use_gatk_spark
*/

includeConfig 'test.config'

params {
no_gatk_spark = true
use_gatk_spark = true
}
4 changes: 2 additions & 2 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
* `test_annotation`
* A profile with a complete configuration for automated testing
* Input data is a `VCF` for testing annotation
* `test_no_gatk_spark`
* `test_use_gatk_spark`
* A profile with a complete configuration for automated testing
* Specify `--no_gatk_spark`
* Specify `--use_gatk_spark`
* `test_split_fastq`
* A profile with a complete configuration for automated testing
* Specify `--split_fastq 500`
Expand Down
34 changes: 17 additions & 17 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def helpMessage() {
Preprocessing:
--markdup_java_options [str] Establish values for markDuplicates memory consumption
Default: ${params.markdup_java_options}
--no_gatk_spark [bool] Disable usage of GATK Spark implementation of their tools in local mode
--use_gatk_spark [bool] Enable usage of GATK Spark implementation of their tools in local mode
--save_bam_mapped [bool] Save Mapped BAMs
--skip_markduplicates [bool] Skip MarkDuplicates

Expand All @@ -93,7 +93,7 @@ def helpMessage() {
Default: ${params.cf_ploidy}
--cf_window [int] Control-FREEC window size
Default: Disabled
--no_gvcf [bool] No g.vcf output from GATK HaplotypeCaller
--generate_gvcf [bool] Enable g.vcf output from GATK HaplotypeCaller
--no_strelka_bp [bool] Will not use Manta candidateSmallIndels for Strelka (not recommended by Best Practices)
--pon [file] Panel-of-normals VCF (bgzipped) for GATK Mutect2 / Sentieon TNscope
See: https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_hellbender_tools_walkers_mutect_CreateSomaticPanelOfNormals.php
Expand Down Expand Up @@ -414,7 +414,7 @@ if (params.split_fastq) summary['Reads in fastq'] = p

summary['MarkDuplicates'] = "Options"
summary['Java options'] = params.markdup_java_options
summary['GATK Spark'] = params.no_gatk_spark ? 'No' : 'Yes'
summary['GATK Spark'] = params.use_gatk_spark ? 'Yes' : 'No'

summary['Save BAMs mapped'] = params.save_bam_mapped ? 'Yes' : 'No'
summary['Skip MarkDuplicates'] = params.skip_markduplicates ? 'Yes' : 'No'
Expand All @@ -432,7 +432,7 @@ if ('controlfreec' in tools) {
if (params.cf_ploidy) summary['ploidy'] = params.cf_ploidy
}

if ('haplotypecaller' in tools) summary['GVCF'] = params.no_gvcf ? 'No' : 'Yes'
if ('haplotypecaller' in tools) summary['GVCF'] = params.generate_gvcf ? 'Yes' : 'No'
if ('strelka' in tools && 'manta' in tools) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes'
if (params.pon && ('mutect2' in tools || (params.sentieon && 'tnscope' in tools))) summary['Panel of normals'] = params.pon

Expand Down Expand Up @@ -1387,7 +1387,18 @@ process MarkDuplicates {
script:
markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
metrics = 'markduplicates' in skipQC ? '' : "-M ${idSample}.bam.metrics"
if (params.no_gatk_spark)
if (params.use_gatk_spark)
"""
gatk --java-options ${markdup_java_options} \
MarkDuplicatesSpark \
-I ${idSample}.bam \
-O ${idSample}.md.bam \
${metrics} \
--tmp-dir . \
--create-output-bam-index true \
--spark-master local[${task.cpus}]
"""
else
"""
gatk --java-options ${markdup_java_options} \
MarkDuplicates \
Expand All @@ -1401,17 +1412,6 @@ process MarkDuplicates {

mv ${idSample}.md.bai ${idSample}.md.bam.bai
"""
else
"""
gatk --java-options ${markdup_java_options} \
MarkDuplicatesSpark \
-I ${idSample}.bam \
-O ${idSample}.md.bam \
${metrics} \
--tmp-dir . \
--create-output-bam-index true \
--spark-master local[${task.cpus}]
"""
}

(tsv_bam_duplicates_marked, tsv_bam_duplicates_marked_sample) = tsv_bam_duplicates_marked.into(2)
Expand Down Expand Up @@ -2010,7 +2010,7 @@ process HaplotypeCaller {

gvcfHaplotypeCaller = gvcfHaplotypeCaller.groupTuple(by:[0, 1, 2])

if (params.no_gvcf) gvcfHaplotypeCaller.close()
if (!params.generate_gvcf) gvcfHaplotypeCaller.close()
else gvcfHaplotypeCaller = gvcfHaplotypeCaller.dump(tag:'GVCF HaplotypeCaller')

// STEP GATK HAPLOTYPECALLER.2
Expand Down
22 changes: 11 additions & 11 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ params {
// Preprocessing
aligner = 'bwa-mem'
markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details
no_gatk_spark = null // GATK Spark implementation of their tools in local mode used by default
use_gatk_spark = false // GATK Spark implementation of their tools in local mode not used by default
save_bam_mapped = null // Mapped BAMs not saved
skip_markduplicates = null // Do not skip markDuplicates by default

Expand All @@ -52,7 +52,7 @@ params {
cf_coeff = "0.05" // default value for Control-FREEC
cf_ploidy = "2" // you can use 2,3,4
cf_window = null // by default we are not using this in Control-FREEC
no_gvcf = null // g.vcf are produced by HaplotypeCaller
generate_gvcf = null // g.vcf are not produced by HaplotypeCaller by default
no_strelka_bp = null // Strelka will use Manta candidateSmallIndels if available
pon = false // No default PON (Panel of Normals) file for GATK Mutect2 / Sentieon TNscope
pon_index = false // No default PON index for GATK Mutect2 / Sentieon TNscope
Expand Down Expand Up @@ -140,15 +140,15 @@ profiles {
podman {
podman.enabled = true
}
test { includeConfig 'conf/test.config' }
test_annotation { includeConfig 'conf/test_annotation.config' }
test_no_gatk_spark { includeConfig 'conf/test_no_gatk_spark.config' }
test_split_fastq { includeConfig 'conf/test_split_fastq.config' }
test_targeted { includeConfig 'conf/test_targeted.config' }
test_tool { includeConfig 'conf/test_tool.config' }
test_trimming { includeConfig 'conf/test_trimming.config' }
test_umi_tso { includeConfig 'conf/test_umi_tso.config' }
test_umi_qiaseq { includeConfig 'conf/test_umi_qiaseq.config' }
test { includeConfig 'conf/test.config' }
test_annotation { includeConfig 'conf/test_annotation.config' }
test_use_gatk_spark { includeConfig 'conf/test_use_gatk_spark.config' }
test_split_fastq { includeConfig 'conf/test_split_fastq.config' }
test_targeted { includeConfig 'conf/test_targeted.config' }
test_tool { includeConfig 'conf/test_tool.config' }
test_trimming { includeConfig 'conf/test_trimming.config' }
test_umi_tso { includeConfig 'conf/test_umi_tso.config' }
test_umi_qiaseq { includeConfig 'conf/test_umi_qiaseq.config' }
}

// Load genomes.config or igenomes.config
Expand Down
17 changes: 8 additions & 9 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,10 @@
"help_text": "See https://github.com/SciLifeLab/Sarek/pull/689",
"hidden": true
},
"no_gatk_spark": {
"type": "string",
"default": "null",
"fa_icon": "fas fa-ban",
"description": "Disable usage of GATK Spark implementation"
"use_gatk_spark": {
"type": "boolean",
"fa_icon": "fas fa-forward",
"description": "Enable usage of GATK Spark implementation"
},
"save_bam_mapped": {
"type": "boolean",
Expand All @@ -230,7 +229,7 @@
},
"skip_markduplicates": {
"type": "boolean",
"fa_icon": "fas fa-forward",
"fa_icon": "fas fa-fast-forward",
"description": "Skip GATK MarkDuplicates",
"help_text": "This params will also save the mapped BAMS, to enable restart from step prepare_recalibration"
}
Expand Down Expand Up @@ -275,10 +274,10 @@
"description": "Overwrite Control-FREEC window size",
"help_text": "It is recommended to use a window size of 0 for exome data"
},
"no_gvcf": {
"generate_gvcf": {
"type": "boolean",
"fa_icon": "fas fa-ban",
"description": "No g.vcf output from GATK HaplotypeCaller"
"fa_icon": "fas fa-copy",
"description": "Generate g.vcf output from GATK HaplotypeCaller"
},
"no_strelka_bp": {
"type": "boolean",
Expand Down