From e4a6b9c0a81c6a07ee1849fba24fcc9e646f5bfe Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Sat, 15 Dec 2018 22:14:13 +0100
Subject: [PATCH 1/5] Add Changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cb016e5e7..9d20de272 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unpublished / Dev Branch]
 
+### `Added`
+* [#111](https://github.com/nf-core/eager/pull/110) - Allow [Zipped FastA reference input](https://github.com/nf-core/eager/issues/91)
+
 ### `Fixed`
 * [#110](https://github.com/nf-core/eager/pull/110) - Fix for [MultiQC Missing Second FastQC report](https://github.com/nf-core/eager/issues/107)
 

From 67e10ad77c035114bf1c0b0a1496031c96090b85 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Sat, 15 Dec 2018 22:38:47 +0100
Subject: [PATCH 2/5] Make gzipped input great again!

---
 main.nf | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/main.nf b/main.nf
index 8899fea60..9ca7d8dd3 100644
--- a/main.nf
+++ b/main.nf
@@ -216,9 +216,37 @@ output_docs = file("$baseDir/docs/output.md")
 wherearemyfiles = file("$baseDir/assets/where_are_my_files.txt")
 
 // Validate inputs
+if(!"${params.fasta}".indexOf(".gz")){
 Channel.fromPath("${params.fasta}")
     .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
     .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping}
+} else {
+//Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir
+Channel.fromPath("${params.fasta}")
+        .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
+        .set {ch_unzip_fasta}
+
+process unzip_reference{
+    tag "$zipfasta"
+
+    input:
+    file zipfasta from ch_unzip_fasta
+
+    output:
+    file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing,  ch_fasta_for_bwa_mapping, ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper, ch_fasta_for_circularmapper_index,ch_fasta_for_bwamem_mapping)
+
+    script:
+    """
+    pigz -f -d -p ${task.cpus} $zipfasta
+    """
+    }
+}
+    
+
+
+
+
+
 
 //Index files provided? Then check whether they are correct and complete
 if (params.aligner != 'bwa' && !params.circularmapper && !params.bwamem){

From a8257c132c3958d40b959581f583d5888b7b187f Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Sat, 15 Dec 2018 22:43:44 +0100
Subject: [PATCH 3/5] Add proper testcase for zipped FastA input

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 70e563719..5ee8220ea 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -48,5 +48,7 @@ script:
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2'
   # Test running with BWA Mem
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/
+  # Test with zipped reference input
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz'
   # Test basic pipeline with Conda too 
   - travis_wait 25 nextflow run ${TRAVIS_BUILD_DIR} -profile test,conda --pairedEnd --bwa_index results/reference_genome/bwa_index/
\ No newline at end of file

From 47ad6f45bf0a70322519fd1995701ac3dfc367fa Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Sat, 15 Dec 2018 22:44:57 +0100
Subject: [PATCH 4/5] Document me :-)

---
 docs/configuration/reference_genomes.md | 1 -
 docs/usage.md                           | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md
index b0ac327a8..324cfef07 100644
--- a/docs/configuration/reference_genomes.md
+++ b/docs/configuration/reference_genomes.md
@@ -10,7 +10,6 @@ Read [Adding your own system](adding_your_own.md) to find out how to set up cust
 ## Adding paths to a config file
 Specifying long paths every time you run the pipeline is a pain.
 To make this easier, the pipeline comes configured to understand reference genome keywords which correspond to preconfigured paths, meaning that you can just specify `--genome ID` when running the pipeline.
->>>>>>> TEMPLATE
 
 Note that this genome key can also be specified in a config file if you always use the same genome.
 
diff --git a/docs/usage.md b/docs/usage.md
index 8a90a3a25..053406816 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -136,7 +136,7 @@ If you prefer, you can specify the full path to your reference genome when you r
 ```bash
 --fasta '[path to Fasta reference]'
 ```
-> If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`.
+> If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`. You may also specify the path to a gzipped (`*.gz` file extension) FastA as reference genome - this will be uncompressed by the pipeline automatically for you.
 
 ### `--genome` (using iGenomes)
 

From 0d2241dcf5077fe9124bf33609965f0b85948a8c Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Sat, 15 Dec 2018 23:03:05 +0100
Subject: [PATCH 5/5] Handle unzipping more logically

---
 main.nf | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/main.nf b/main.nf
index 9ca7d8dd3..bcd6e9756 100644
--- a/main.nf
+++ b/main.nf
@@ -216,30 +216,30 @@ output_docs = file("$baseDir/docs/output.md")
 wherearemyfiles = file("$baseDir/assets/where_are_my_files.txt")
 
 // Validate inputs
-if(!"${params.fasta}".indexOf(".gz")){
-Channel.fromPath("${params.fasta}")
-    .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
-    .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping}
-} else {
-//Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir
-Channel.fromPath("${params.fasta}")
-        .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
-        .set {ch_unzip_fasta}
+if("${params.fasta}".endsWith(".gz")){
+    //Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir
+    Channel.fromPath("${params.fasta}")
+            .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
+            .set {ch_unzip_fasta}
 
-process unzip_reference{
-    tag "$zipfasta"
+    process unzip_reference{
+        tag "$zipfasta"
 
-    input:
-    file zipfasta from ch_unzip_fasta
+        input:
+        file zipfasta from ch_unzip_fasta
 
-    output:
-    file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing,  ch_fasta_for_bwa_mapping, ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper, ch_fasta_for_circularmapper_index,ch_fasta_for_bwamem_mapping)
+        output:
+        file "*.fasta" into (ch_fasta_for_bwa_indexing, ch_fasta_for_faidx_indexing, ch_fasta_for_dict_indexing,  ch_fasta_for_bwa_mapping, ch_fasta_for_damageprofiler, ch_fasta_for_qualimap, ch_fasta_for_pmdtools, ch_fasta_for_circularmapper, ch_fasta_for_circularmapper_index,ch_fasta_for_bwamem_mapping)
 
-    script:
-    """
-    pigz -f -d -p ${task.cpus} $zipfasta
-    """
-    }
+        script:
+        """
+        pigz -f -d -p ${task.cpus} $zipfasta
+        """
+    }   
+    } else {
+    Channel.fromPath("${params.fasta}")
+    .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
+    .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping}
 }