From e4a6b9c0a81c6a07ee1849fba24fcc9e646f5bfe Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 15 Dec 2018 22:14:13 +0100 Subject: [PATCH 1/5] Add Changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb016e5e7..9d20de272 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished / Dev Branch] +### `Added` +* [#111](https://github.com/nf-core/eager/pull/110) - Allow [Zipped FastA reference input](https://github.com/nf-core/eager/issues/91) + ### `Fixed` * [#110](https://github.com/nf-core/eager/pull/110) - Fix for [MultiQC Missing Second FastQC report](https://github.com/nf-core/eager/issues/107) From 67e10ad77c035114bf1c0b0a1496031c96090b85 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 15 Dec 2018 22:38:47 +0100 Subject: [PATCH 2/5] Make gzipped input great again! --- main.nf | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/main.nf b/main.nf index 8899fea60..9ca7d8dd3 100644 --- a/main.nf +++ b/main.nf @@ -216,9 +216,37 @@ output_docs = file("$baseDir/docs/output.md") wherearemyfiles = file("$baseDir/assets/where_are_my_files.txt") // Validate inputs +if(!"${params.fasta}".indexOf(".gz")){ Channel.fromPath("${params.fasta}") .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"} .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping} +} else { +//Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir +Channel.fromPath("${params.fasta}") + .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"} + .set {ch_unzip_fasta} + +process unzip_reference{ + tag "$zipfasta" + + input: + file zipfasta from ch_unzip_fasta + + output: + file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing, ch_fasta_for_bwa_mapping, ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper, ch_fasta_for_circularmapper_index,ch_fasta_for_bwamem_mapping) + + script: + """ + pigz -f -d -p ${task.cpus} $zipfasta + """ + } +} + + + + + + //Index files provided? Then check whether they are correct and complete if (params.aligner != 'bwa' && !params.circularmapper && !params.bwamem){ From a8257c132c3958d40b959581f583d5888b7b187f Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 15 Dec 2018 22:43:44 +0100 Subject: [PATCH 3/5] Add proper testcase for zipped FastA input --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 70e563719..5ee8220ea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,5 +48,7 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2' # Test running with BWA Mem - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/ + # Test with zipped reference input + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz' # Test basic pipeline with Conda too - travis_wait 25 nextflow run ${TRAVIS_BUILD_DIR} -profile test,conda --pairedEnd --bwa_index results/reference_genome/bwa_index/ \ No newline at end of file From 47ad6f45bf0a70322519fd1995701ac3dfc367fa Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 15 Dec 2018 22:44:57 +0100 Subject: [PATCH 4/5] Document me :-) --- docs/configuration/reference_genomes.md | 1 - docs/usage.md | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md index b0ac327a8..324cfef07 100644 --- a/docs/configuration/reference_genomes.md +++ b/docs/configuration/reference_genomes.md @@ -10,7 +10,6 @@ Read [Adding your own system](adding_your_own.md) to find out how to set up cust ## Adding paths to a config file Specifying long paths every time you run the pipeline is a pain. To make this easier, the pipeline comes configured to understand reference genome keywords which correspond to preconfigured paths, meaning that you can just specify `--genome ID` when running the pipeline. ->>>>>>> TEMPLATE Note that this genome key can also be specified in a config file if you always use the same genome. diff --git a/docs/usage.md b/docs/usage.md index 8a90a3a25..053406816 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -136,7 +136,7 @@ If you prefer, you can specify the full path to your reference genome when you r ```bash --fasta '[path to Fasta reference]' ``` -> If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`. +> If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`. You may also specify the path to a gzipped (`*.gz` file extension) FastA as reference genome - this will be uncompressed by the pipeline automatically for you. ### `--genome` (using iGenomes) From 0d2241dcf5077fe9124bf33609965f0b85948a8c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 15 Dec 2018 23:03:05 +0100 Subject: [PATCH 5/5] Handle unzipping more logically --- main.nf | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/main.nf b/main.nf index 9ca7d8dd3..bcd6e9756 100644 --- a/main.nf +++ b/main.nf @@ -216,30 +216,30 @@ output_docs = file("$baseDir/docs/output.md") wherearemyfiles = file("$baseDir/assets/where_are_my_files.txt") // Validate inputs -if(!"${params.fasta}".indexOf(".gz")){ -Channel.fromPath("${params.fasta}") - .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"} - .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping} -} else { -//Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir -Channel.fromPath("${params.fasta}") - .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"} - .set {ch_unzip_fasta} +if("${params.fasta}".endsWith(".gz")){ + //Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir + Channel.fromPath("${params.fasta}") + .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"} + .set {ch_unzip_fasta} -process unzip_reference{ - tag "$zipfasta" + process unzip_reference{ + tag "$zipfasta" - input: - file zipfasta from ch_unzip_fasta + input: + file zipfasta from ch_unzip_fasta - output: - file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing, ch_fasta_for_bwa_mapping, ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper, ch_fasta_for_circularmapper_index,ch_fasta_for_bwamem_mapping) + output: + file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing, ch_fasta_for_bwa_mapping, ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper, ch_fasta_for_circularmapper_index,ch_fasta_for_bwamem_mapping) - script: - """ - pigz -f -d -p ${task.cpus} $zipfasta - """ - } + script: + """ + pigz -f -d -p ${task.cpus} $zipfasta + """ + } + } else { + Channel.fromPath("${params.fasta}") + .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"} + .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping} }