fmalmeida · fmalmeida · Feb 16, 2024 · Oct 29, 2023 · Oct 29, 2023 · Oct 29, 2023
diff --git a/.github/workflows/test_pr_hybrid_docker.yml b/.github/workflows/test_pr_hybrid_docker.yml
@@ -28,10 +28,10 @@ jobs:
 
     - name: Run tests for hybrid
       run: |
-        nextflow run main.nf -profile docker,test,hybrid,pacbio
+        nextflow run main.nf -profile docker,test,hybrid,pacbio --skip_haslr
         rm -r work .nextflow*
 
     - name: View results
       run: |
         sudo apt-get install -y tree
-        tree hybrid_test
+        tree hybrid_test_pacbio
diff --git a/.../workflows/test_pr_hybrid_singularity.yml → .../workflows/test_pr_hybrid_singularity.bkp b/.../workflows/test_pr_hybrid_singularity.yml → .../workflows/test_pr_hybrid_singularity.bkp
diff --git a/...orkflows/test_pr_illumina_singularity.yml → ...orkflows/test_pr_illumina_singularity.bkp b/...orkflows/test_pr_illumina_singularity.yml → ...orkflows/test_pr_illumina_singularity.bkp
diff --git a/...kflows/test_pr_lreads_singularity_ont.yml → ...kflows/test_pr_lreads_singularity_ont.bkp b/...kflows/test_pr_lreads_singularity_ont.yml → ...kflows/test_pr_lreads_singularity_ont.bkp
diff --git a/assets/hybrid_test_ont.yml b/assets/hybrid_test_ont.yml
@@ -1,9 +1,8 @@
-samplesheet:
-
+samplesheet:  
   - id: ont_hybrid
     nanopore:  https://github.com/fmalmeida/test_datasets/raw/main/ecoli_ont_15X.fastq.gz
     genome_size: 1m
     illumina: 
       - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_1.fastq.gz
       - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_2.fastq.gz
-    hybrid_strategy: both
+    hybrid_strategy: both
diff --git a/assets/hybrid_test_pacbio.yml b/assets/hybrid_test_pacbio.yml
@@ -1,9 +1,8 @@
-samplesheet:
-
+samplesheet:  
   - id: pacbio_hybrid
     pacbio:  https://github.com/fmalmeida/test_datasets/raw/main/ecoli_pacbio_15X.fastq.gz
     genome_size: 1m
     illumina: 
       - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_1.fastq.gz
       - https://github.com/fmalmeida/test_datasets/raw/main/ecoli_illumina_15X_2.fastq.gz
-    hybrid_strategy: both
+    hybrid_strategy: both
diff --git a/conf/defaults.config b/conf/defaults.config
@@ -56,14 +56,16 @@ params {
 
 // Select the appropriate shasta config to use for assembly
 // Since shasta v0.8 (Oct/2021) this parameter is now mandatory.
+// You can check availability at: https://paoloshasta.github.io/shasta/Configurations.html
   shasta_config = "Nanopore-Oct2021"
 
 // Tells the pipeline to interpret the long reads as "corrected" long reads.
-// This will activate (if available) the options for corrected reads in the
-// assemblers: -corrected (in canu), --pacbio-corr|--nano-corr (in flye), etc.
-// Be cautious when using this parameter. If your reads are not corrected, and
+// This will activate (if available) the options for corrected or even high
+// quality (hq) reads in the assemblers.
+// Be cautious when using this parameter. If your reads are not corrected|hq, and
 // you use this parameter, you will probably do not generate any contig.
-  corrected_long_reads = false
+  corrected_longreads    = false
+  high_quality_longreads = false
 
 // This parameter below (hybrid_strategy) is to select the hybrid strategies adopted by the pipeline.
 // Read the documentation https://mpgap.readthedocs.io/en/latest/manual.html to know more about the hybrid strategies.
@@ -134,7 +136,7 @@ params {
 
 // Max resource options
 // Defaults only, expecting to be overwritten
-  max_memory                 = '14.GB'
+  max_memory                 = '20.GB'
   max_cpus                   = 6
   max_time                   = '40.h'
 

diff --git a/conf/docker.config b/conf/docker.config
@@ -4,4 +4,4 @@ singularity.enabled     = false
 docker.enabled          = true
 docker.runOptions       = '-u \$(id -u):\$(id -g)'
 // fixOwnership            = true
-process.container       = "fmalmeida/mpgap:v3.2"
+process.container       = "fmalmeida/mpgap@sha256:0439466a52a3aef70c3e3b2b8ba5504bf167db2437a7fbb85d40f94c95a67fb9"
diff --git a/conf/singularity.config b/conf/singularity.config
@@ -3,4 +3,4 @@ params.selected_profile = "singularity"
 docker.enabled          = false
 singularity.enabled     = true
 singularity.autoMounts  = true
-process.container       = "docker://fmalmeida/mpgap:v3.2"
+process.container       = "docker://fmalmeida/mpgap@sha256:0439466a52a3aef70c3e3b2b8ba5504bf167db2437a7fbb85d40f94c95a67fb9"
diff --git a/docs/manual.md b/docs/manual.md
@@ -116,7 +116,8 @@ However, they can also be set in a sample-specific manner. If a sample has a val
 | :--------------------------------------- | :------- | :------ | :---------- |
 | `--wtdbg2_technology`    | :material-close: | ont/sq           | It tells the pipeline which technology the long reads are, which is required for wtdbg2. Options are: `ont` for Nanopore reads, `rs` for PacBio RSII, `sq` for PacBio Sequel, `ccs` for PacBio CCS reads. With not wanted, consider using `--skip_wtdbg2` |
 | `--shasta_config`        | :material-close: | Nanopore-Oct2021 | It tells the pipeline which shasta pre-set configuration to use when assembling nanopore reads. Please read the [shasta configuration manual page](https://chanzuckerberg.github.io/shasta/Configurations.html) to know the available models |
-| `--corrected_long_reads` | :material-check: | False            | It tells the pipeline to interpret the input long reads as "corrected". This will activate (if available) the options for corrected reads in the assemblers. For example: `-corrected` (in canu), `--pacbio-corr|--nano-corr` (in flye), etc. Be cautious when using this parameter. If your reads are not corrected, and you use this parameter, you will probably do not generate any contig |
+| `--corrected_longreads` | :material-check: | False            | It tells the pipeline to interpret the input long reads as "corrected". This will activate (if available) the options for corrected reads in the assemblers. For example: `-corrected` (in canu), `--pacbio-corr|--nano-corr` (in flye), etc. Be cautious when using this parameter. If your reads are not corrected, and you use this parameter, you will probably do not generate any contig |
+| `--high_quality_longreads` | :material-check: | False          | It tells the pipeline to interpret the input long reads as "(high quality - hifi)". This will activate (if available) the options for high quality (hifi) reads in the assemblers. For example: `-corrected` (in canu), `--pacbio-hifi|--nano-hq` (in flye), etc. Be cautious when using this parameter. If your reads are not corrected, and you use this parameter, you will probably do not generate any contig |
 
 ### Long reads polishers
 

diff --git a/environment.yml b/environment.yml
@@ -3,60 +3,60 @@
 name: mpgap-3.2
 
 channels:
-  - conda-forge
-  - bioconda
-  - defaults
-  - anaconda
+- conda-forge
+- bioconda
+- defaults
+- anaconda
 
 dependencies:
-  ## python pip dependencies
-  - pip
-
-  ## anaconda main channel
-  - main::openssl
-
-  ## anaconda packages
-  - anaconda::python=3.8
-
-  ## conda-forge packages
-  - conda-forge::gcc
-  - conda-forge::hdf5=1.10                        # nanopolish is not loading it
-  - conda-forge::hdf5-static                      # nanopolish is not loading it
-  - conda-forge::zlib                             # for nanopolish
+## python pip dependencies
+- pip
 
-  ## bioconda packages
-  - bioconda::htslib
-  - bioconda::blast=2.12.0
-  - bioconda::racon=1.4.20
-  - bioconda::pilon=1.24
-  - bioconda::canu=2.2
-  - bioconda::spades=3.15.3
-  - bioconda::unicycler=0.5.0
-  - bioconda::mummer
-  - bioconda::bowtie2
-  - bioconda::freebayes
-  - bioconda::minimap2=2.22
-  - bioconda::nanopolish=0.13.2
-  - bioconda::pbgcpp=2.0.2
-  - bioconda::pbmm2=1.7.0
-  - bioconda::polypolish=0.5.0
-  - bioconda::flye=2.9
-  - bioconda::quast=5.2.0
-  - bioconda::miniasm=0.3_r179
-  - bioconda::seqtk=1.3
-  - bioconda::shovill=1.1.0
-  - bioconda::shasta=0.11
-  # - bioconda::trycycler=0.5.4
-  - bioconda::haslr=0.8a1
-  # - biocconda::homopolish=0.4.1
-  - bioconda::raven-assembler=1.6.1
-  - bioconda::multiqc>=1.14
-  - bioconda::csvtk=0.23.0
-  - bioconda::wtdbg=2.5
-  - bioconda::medaka
+## anaconda main channel
+- main::openssl
 
-  # for medaka > 1.4
-  - bioconda::samtools>=1.11
-  - bioconda::tabix>=1.11
-  - bioconda::bcftools>=1.11
-  - bioconda::mappy
+## anaconda packages
+- anaconda::python=3.8
+
+## conda-forge packages
+- conda-forge::gcc
+- conda-forge::hdf5=1.10 # nanopolish is not loading it
+- conda-forge::hdf5-static # nanopolish is not loading it
+- conda-forge::zlib # for nanopolish
+
+## bioconda packages
+- bioconda::htslib
+- bioconda::blast=2.12.0
+- bioconda::racon=1.4.20
+- bioconda::pilon=1.24
+- bioconda::canu=2.2
+- bioconda::spades=3.15.3
+- bioconda::unicycler=0.5.0
+- bioconda::mummer
+- bioconda::bowtie2
+- bioconda::freebayes
+- bioconda::minimap2=2.22
+- bioconda::nanopolish=0.13.2
+- bioconda::pbgcpp=2.0.2
+- bioconda::pbmm2=1.7.0
+- bioconda::polypolish=0.5.0
+- bioconda::flye=2.9.2
+- bioconda::quast=5.2.0
+- bioconda::miniasm=0.3_r179
+- bioconda::seqtk=1.3
+- bioconda::shovill=1.1.0
+- bioconda::shasta=0.11
+# - bioconda::trycycler=0.5.4
+- bioconda::haslr=0.8a1
+# - biocconda::homopolish=0.4.1
+- bioconda::raven-assembler=1.6.1
+- bioconda::multiqc>=1.14
+- bioconda::csvtk=0.23.0
+- bioconda::wtdbg=2.5
+- bioconda::medaka=1.11.1
+
+# for medaka > 1.4
+- bioconda::samtools>=1.11
+- bioconda::tabix>=1.11
+- bioconda::bcftools>=1.11
+- bioconda::mappy
diff --git a/example_samplesheet.yml b/example_samplesheet.yml
@@ -14,30 +14,30 @@
 # A template (with the correct fields, syntax and indentation) is given below:
 
 samplesheet:
-  - id: sample_1
-    illumina: 
-      - dataset/reads_1.fastq.gz
-      - dataset/reads_2.fastq.gz
-  - id: sample_2
-    illumina:
-      - dataset/reads_unpaired.fastq.gz
-  - id: sample_3
-    illumina:
-      - dataset/reads_1.fastq.gz
-      - dataset/reads_2.fastq.gz
-      - dataset/pairs_merged.fastq.gz
-  - id: sample_4
-    nanopore: dataset/ont_reads.fastq.gz
-    corrected_long_reads: true
-    nanopolish_fast5: dataset/kleb/fast5_pass
-    genome_size: 5.5m
-  - id: sample_5
-    pacbio: dataset/pacbio_reads.fastq.gz
-    pacbio_bam: dataset/pacbio_reads.subreads.bam
-    wtdbg2_technology: rs  
-  - id: sample_6
-    illumina:
-      - dataset/reads_1.fastq.gz
-      - dataset/reads_2.fastq.gz
-    nanopore: dataset/ont_reads.fastq.gz
-    hybrid_strategy: both
+- id: sample_1
+  illumina:
+  - dataset/reads_1.fastq.gz
+  - dataset/reads_2.fastq.gz
+- id: sample_2
+  illumina:
+  - dataset/reads_unpaired.fastq.gz
+- id: sample_3
+  illumina:
+  - dataset/reads_1.fastq.gz
+  - dataset/reads_2.fastq.gz
+  - dataset/pairs_merged.fastq.gz
+- id: sample_4
+  nanopore: dataset/ont_reads.fastq.gz
+  corrected_longreads: true
+  nanopolish_fast5: dataset/kleb/fast5_pass
+  genome_size: 5.5m
+- id: sample_5
+  pacbio: dataset/pacbio_reads.fastq.gz
+  pacbio_bam: dataset/pacbio_reads.subreads.bam
+  wtdbg2_technology: rs
+- id: sample_6
+  illumina:
+  - dataset/reads_1.fastq.gz
+  - dataset/reads_2.fastq.gz
+  nanopore: dataset/ont_reads.fastq.gz
+  hybrid_strategy: both
diff --git a/lib/WorkflowMpGAP.groovy b/lib/WorkflowMpGAP.groovy
@@ -19,6 +19,11 @@ class WorkflowMpGAP {
             log.error "ERROR!\nA major error has occurred!\n\t==>  Parameter --hybrid_strategy must be either 1, 2 or both.\n\t Online documentation is available at: https://mpgap.readthedocs.io/en/latest/\nPlease, read the docs.\nCheers."
             System.exit(1)
         }
+
+        if (params.corrected_longreads && params.high_quality_longreads) {
+            log.error "ERROR!\nA major error has occurred!\n\t==>  Parameters --corrected_longreads and --high_quality_longreads were used at the same time. These activate assembler configurations for reads of different quality levels. Cannot be used at the same time ( uncorrected < corrected < high_quality ).\nCheers."
+            System.exit(1)
+        }
     }
 
 }
diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md
@@ -6,6 +6,8 @@ The tracking for changes started in v2.
 
 * Update unicyler to v0.5.0
 * Adjust Pilon polishing module to select how many rounds of polishing to run. Default is 4.
+* Adjust `raven` module to allow pre-set `-k` and `-w` values for corrected/high-quality longreads, while allowing user modification
+* Increase default `--max_memory` value to 20.GB.
 * Add a directory called `final_assemblies` in the main output directory holding all the assemblies generated in the pipeline execution.
 * Updated documentation as discussed in [[#58](https://github.com/fmalmeida/MpGAP/issues/58)] and [[#57](https://github.com/fmalmeida/MpGAP/issues/57)].
 

diff --git a/modules/Hybrid/haslr_hybrid.nf b/modules/Hybrid/haslr_hybrid.nf
diff --git a/modules/Hybrid/pilon_polish.nf b/modules/Hybrid/pilon_polish.nf
diff --git a/modules/Hybrid/polypolish.nf b/modules/Hybrid/polypolish.nf
diff --git a/modules/Hybrid/spades_hybrid.nf b/modules/Hybrid/spades_hybrid.nf
diff --git a/modules/Hybrid/unicycler_hybrid.nf b/modules/Hybrid/unicycler_hybrid.nf
diff --git a/modules/LongReads/canu.nf b/modules/LongReads/canu.nf
diff --git a/modules/LongReads/flye.nf b/modules/LongReads/flye.nf