diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3f201885..4d37a278 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,6 +29,7 @@ jobs:
         ANALYSIS:
           - "test_screening"
           - "test_targeted"
+          - "test_umis"
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v3
@@ -40,4 +41,4 @@ jobs:
 
       - name: Run pipeline with test data (${{ matrix.ANALYSIS }})
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ANALYSIS }},docker --outdir ./results_targeted
+          nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ANALYSIS }},docker --outdir ./results_${{ matrix.ANALYSIS }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 09385ad7..ccb1efd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Add new parameters --reference and --protospacer ([#45](https://github.com/nf-core/crisprseq/pull/45))
+- Add UMI clustering to crisprseq-targeted ([#24](https://github.com/nf-core/crisprseq/pull/24))
 
 ### Fixed
 
diff --git a/README.md b/README.md
index 9cd521aa..86ecafe6 100644
--- a/README.md
+++ b/README.md
@@ -38,11 +38,19 @@ For crispr targeted:
 2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
 3. Adapter trimming ([`Cutadapt`](http://dx.doi.org/10.14806/ej.17.1.200))
 4. Quality filtering ([`Seqtk`](https://github.com/lh3/seqtk))
-5. Read mapping:
+5. UMI clustering (optional):
+   a. Extract UMI sequences (Python script)
+   b. Cluster UMI sequences ([`Vsearch`](https://github.com/torognes/vsearch))
+   c. Obtain the most abundant UMI sequence for each cluster ([`Vsearch`](https://github.com/torognes/vsearch))
+   d. Obtain a consensus for each cluster ([`minimap2`](https://github.com/lh3/minimap2))
+   e. Polish consensus sequence ([`racon`](https://github.com/lbcb-sci/racon))
+   f. Repeat a second rand of consensus + polishing (`minimap2` + `racon`)
+   g. Obtain the final consensus of each cluster ([Medaka](https://nanoporetech.github.io/medaka/index.html))
+6. Read mapping:
    - ([`minimap2`](https://github.com/lh3/minimap2), _default_)
    - ([`bwa`](http://bio-bwa.sourceforge.net/))
    - ([`bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml))
-6. CIGAR parsing for edit calling ([`R`](https://www.r-project.org/))
+7. CIGAR parsing for edit calling ([`R`](https://www.r-project.org/))
 
 For crispr screening:
 
@@ -62,7 +70,9 @@ For crispr screening:
 3. Download the pipeline and test it on a minimal dataset with a single command:
 
    ```bash
-   nextflow run nf-core/crisprseq -profile test,YOURPROFILE --outdir <OUTDIR>
+   nextflow run nf-core/crisprseq -profile test_screening,YOURPROFILE --outdir <OUTDIR>
+   # or
+   nextflow run nf-core/crisprseq -profile test_targeted,YOURPROFILE --outdir <OUTDIR>
    ```
 
    Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.
diff --git a/bin/extract_umis.py b/bin/extract_umis.py
index 78802c9f..e4792d0f 100755
--- a/bin/extract_umis.py
+++ b/bin/extract_umis.py
@@ -1,6 +1,10 @@
 #!/usr/bin/env python3
-# FROM: pipeline-umi-amplicon distributed by ONT https://github.com/nanoporetech/pipeline-umi-amplicon
-# https://github.com/nanoporetech/pipeline-umi-amplicon/blob/69ec3907879aea406b5fb02e3db83b579bfb8b45/lib/umi_amplicon_tools/extract_umis.py
+#
+# This code is obtained from Oxford Nanopore Technologies pipeline-umi-amplicon
+# Distributed under the Mozilla Public License, v. 2.0
+#
+# Original source code: https://github.com/nanoporetech/pipeline-umi-amplicon/blob/69ec3907879aea406b5fb02e3db83b579bfb8b45/lib/umi_amplicon_tools/extract_umis.py
+#
 
 import argparse
 import logging
diff --git a/conf/modules.config b/conf/modules.config
index ad961c41..47155bcb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -127,7 +127,7 @@ process {
         ]
     }
 
-    withName: SEQTK_SEQ {
+    withName: SEQTK_SEQ_MASK {
         ext.args = '-q 20 -L 80 -n N'
         publishDir = [
             path: { "${params.outdir}/preprocessing/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
@@ -140,6 +140,18 @@ process {
         ext.args = '--max-error 3 --adapter-length 250 --fwd-context ""'
     }
 
+    withName: VSEARCH_CLUSTER {
+        ext.args = "--minseqlength ${params.vsearch_minseqlength} --maxseqlength ${params.vsearch_maxseqlength} --qmask none --clusterout_sort --gapopen 0E/5I --gapext 0E/2I --mismatch -8 --match 6 --iddef 0 --minwordmatches 0 --qmask none -id ${params.vsearch_id}"
+        ext.args2 = '--cluster_fast'
+        ext.args3 = '--clusters'
+    }
+
+
+    withName: VSEARCH_SORT {
+        ext.args = '--topn 1'
+        ext.prefix = { "${fasta.baseName}_top" }
+    }
+
     withName: MERGING_SUMMARY {
         publishDir = [
             path: { "${params.outdir}/summary/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
@@ -148,7 +160,43 @@ process {
         ]
     }
 
-    withName: DUMMY_FINAL_UMI {
+    withName: MINIMAP2_ALIGN_UMI_1 {
+        ext.args = '-x map-ont'
+        ext.prefix = { "${reads.baseName}_cycle1" }
+        publishDir = [
+            path: { "${params.outdir}/minimap2_umi" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: MINIMAP2_ALIGN_UMI_2 {
+        ext.args = '-x map-ont'
+        ext.prefix = { "${reads.baseName}_cycle2" }
+        publishDir = [
+            path: { "${params.outdir}/minimap2_umi" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+    withName: RACON_1 {
+        ext.args = '-t 4 -m 8 -x -6 -g -8 -w 500 --no-trimming'
+        ext.prefix = { "${reads.baseName}_cycle1" }
+    }
+
+    withName: RACON_2 {
+        ext.args = '-t 4 -m 8 -x -6 -g -8 -w 500 --no-trimming'
+        ext.prefix = { "${reads.baseName}_cycle2" }
+    }
+
+    withName: MEDAKA {
+        ext.args = '-m r941_min_high_g303'
+        ext.prefix = { "${reads.baseName}_medakaConsensus" }
+    }
+
+    withName: SEQTK_SEQ_FATOFQ {
+        ext.args = '-F "#"'
         publishDir = [
             path: { "${params.outdir}/preprocessing/UMI" },
             mode: params.publish_dir_mode,
diff --git a/conf/test_umis.config b/conf/test_umis.config
new file mode 100644
index 00000000..6efc80d8
--- /dev/null
+++ b/conf/test_umis.config
@@ -0,0 +1,29 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests (with UMIs option)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/crisprseq -profile test,<conda/docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test profile UMIs'
+    config_profile_description = 'Minimal test dataset to check pipeline function with UMIs option'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input          = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata-edition/samplesheet_test_umis.csv'
+    analysis       = 'targeted'
+    umi_clustering = true
+
+    // Aligner
+    aligner = 'minimap2'
+}
diff --git a/docs/output/targeted.md b/docs/output/targeted.md
index 16ffaf8a..b04ca3d8 100644
--- a/docs/output/targeted.md
+++ b/docs/output/targeted.md
@@ -15,14 +15,18 @@ The directories listed below will be created in the results directory after the
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
 - [Preprocessing](#preprocessing)
-  - [Sequences](#sequences) - Input sequence preparation (reference, protospacer, template)
+  - [sequences](#sequences) - Input sequence preparation (reference, protospacer, template)
   - [cat](#cat) - Concatenate sample fastq files if required
-  - [Pear](#pear) - Join double-end reads if required
-  - [FastQC](#fastqc) - Read Quality Control
-  - [Adapters](#adapters) - Find adapters (Overrepresented sequences) in reads
-  - [Cutadapt](#cutadapt) - Trim adapters
-  - [Seqtk](#seqtk) - Mask low-quality bases
-  <!-- -UMI(#umi) -->
+  - [pear](#pear) - Join double-end reads if required
+  - [fastqc](#fastqc) - Read Quality Control
+  - [adapters](#adapters) - Find adapters (Overrepresented sequences) in reads
+  - [cutadapt](#cutadapt) - Trim adapters
+  - [seqtk](#seqtk) - Mask low-quality bases
+- [UMI clustering](#umi-clustering)
+  - [vsearch](#vsearch)
+  - [minimap2 umi](#minimap2-umi)
+  - [racon](#racon)
+  - [medaka](#medaka)
 - [Mapping](#mapping)
   - [minimap2](#minimap2) - Mapping reads to reference
   - [BWA](#bwa) - Mapping reads to reference
@@ -143,7 +147,56 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m
 
 [Seqtk](https://github.com/lh3/seqtk) masks (converts to Ns) bases with quality lower than 20 and removes sequences shorter than 80 bases.
 
-<!-- ### UMI -->
+## UMI clustering
+
+### vsearch
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `vsearch/`
+  - `*_clusters*`: Contains all UMI sequences which clustered together.
+  - `*_clusters*_top.fasta`: Contains the most abundant UMI sequence from the cluster.
+
+</details>
+
+[VSEARCH](https://github.com/torognes/vsearch) is a versatile open-source tool which includes chimera detection, clustering, dereplication and rereplication, extraction, FASTA/FASTQ/SFF file processing, masking, orienting, pair-wise alignment, restriction site cutting, searching, shuffling, sorting, subsampling, and taxonomic classification of amplicon sequences for metagenomics, genomics, and population genetics. `vsearch/clsuter` can cluster sequences using a single-pass, greedy centroid-based clustering algorithm. `vsearch/sort` can sort fasta entries by decreasing abundance (`--sortbysize`) or sequence length (`--sortbylength`).
+
+### minimap2_umi
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `minimap2_umi/`
+  - `*_sequences_clycle[1,2].paf`: Alignment of the cluster sequences against the top UMi sequence in paf format.
+
+</details>
+
+[Minimap2](https://github.com/lh3/minimap2) is a sequence alignment program that aligns DNA sequences against a reference database.
+
+### racon
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `racon/`
+  - `*_sequences_clycle[1,2]_assembly_consensus.fasta.gz`: Consensus sequence obtained from the cluster multiple sequence alignment.
+
+</details>
+
+[Racon](https://github.com/lbcb-sci/racon) is an ultrafast consensus module for raw de novo genome assembly of long uncorrected reads.
+
+### medaka
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `medaka/`
+  - `*_medakaConsensus.fasta`: Final consensus sequence of each UMI cluster. Obtained after two rounds of minimap2 + racon.
+
+</details>
+
+[Medaka](https://nanoporetech.github.io/medaka/index.html) is a tool to create consensus sequences and variant calls from nanopore sequencing data.
 
 ## Mapping
 
diff --git a/docs/usage/targeted.md b/docs/usage/targeted.md
index 8f415d0b..51e7a60f 100644
--- a/docs/usage/targeted.md
+++ b/docs/usage/targeted.md
@@ -55,6 +55,26 @@ chr6,chr6-61942198-61942498_R1.fastq.gz,,CAA...GGA,TTTTATGATATTTATCTTTT,TTC...CA
 
 An [example samplesheet](https://nf-co.re/crisprseq/1.0/assets/samplesheet.csv) has been provided with the pipeline.
 
+## Optional pipeline steps
+
+### Trimming of overrepresented sequences
+
+To trim the overrepresented sequences found with FastQC from the reads, use the parameter `--overrepresented`.
+Such sequences are not trimmed by default.
+When using the `--overrepresented` parameter, Cutadapt is used to trim overrepresented sequences from the input FASTQ files.
+
+### UMI clustering
+
+If the provided samples were sequenced using umi-molecular identifiers (UMIs), use the parameter `--umi_clustering` in order to run the clustering steps.
+
+1. Extract UMI sequences (Python script)
+2. Cluster UMI sequences ([`Vsearch`](https://github.com/torognes/vsearch))
+3. Obtain the most abundant UMI sequence for each cluster ([`Vsearch`](https://github.com/torognes/vsearch))
+4. Obtain a consensus for each cluster ([`minimap2`](https://github.com/lh3/minimap2))
+5. Polish consensus sequence ([`racon`](https://github.com/lbcb-sci/racon))
+6. Repeat a second rand of consensus + polishing (`minimap2` + `racon`)
+7. Obtain the final consensus of each cluster ([Medaka](https://nanoporetech.github.io/medaka/index.html))
+
 ## Other input parameters
 
 ### Reference
diff --git a/modules.json b/modules.json
index 8af5a345..94b62c4a 100644
--- a/modules.json
+++ b/modules.json
@@ -52,6 +52,12 @@
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     },
+                    "medaka": {
+                        "branch": "master",
+                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/medaka/medaka.diff"
+                    },
                     "mageck/count": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
@@ -76,6 +82,11 @@
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/minimap2/align/minimap2-align.diff"
                     },
+                    "minimap2/index": {
+                        "branch": "master",
+                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba",
@@ -86,6 +97,16 @@
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     },
+                    "racon": {
+                        "branch": "master",
+                        "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/faidx": {
+                        "branch": "master",
+                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "installed_by": ["modules"]
+                    },
                     "samtools/index": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
@@ -95,6 +116,17 @@
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
+                    },
+                    "vsearch/cluster": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/vsearch/cluster/vsearch-cluster.diff"
+                    },
+                    "vsearch/sort": {
+                        "branch": "master",
+                        "git_sha": "e7801603532df26b4bb4ef324ca2c39f7a4d0eee",
+                        "installed_by": ["modules"]
                     }
                 }
             },
diff --git a/modules/local/dummy_final_umi.nf b/modules/local/dummy_final_umi.nf
deleted file mode 100644
index 64069410..00000000
--- a/modules/local/dummy_final_umi.nf
+++ /dev/null
@@ -1,55 +0,0 @@
-process DUMMY_FINAL_UMI {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "conda-forge::p7zip==16.02"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/p7zip:16.02' :
-        'quay.io/biocontainers/p7zip:16.02' }"
-
-    input:
-    tuple val(meta), path(reads)
-
-    output:
-    tuple val(meta), path("*_clustered.fastq.gz"), emit: dummy
-    path "versions.yml"                       , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}_clustered.fastq
-
-    7za \\
-        a \\
-        $args \\
-        ${prefix}_clustered.fastq.gz \\
-        ${prefix}_clustered.fastq
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//')
-    END_VERSIONS
-    """
-}
-
-
-
-process summary_reads{
-
-    input:
-    set sampleID, file(summary), file(finalReads) from summaryReport.join(readsSummary)
-
-    output:
-    set val(sampleID), file(summary) into summaryReportReads
-
-    script:
-    """
-    final_count=`expr \$(cat $finalReads | wc -l) / 4`
-    echo "clustered-reads," \$final_count >> ${sampleID}_summary.csv
-    """
-
-    }
diff --git a/modules/local/extract_umis.nf b/modules/local/extract_umis.nf
index 4f9be2a3..d6c8fb44 100644
--- a/modules/local/extract_umis.nf
+++ b/modules/local/extract_umis.nf
@@ -11,9 +11,9 @@ process EXTRACT_UMIS {
     tuple val(meta), path(reads)
 
     output:
-    tuple val(meta), path(fasta) , emit: fasta
-    tuple val(meta), path(tsv)   , emit: tsv
-    path "versions.yml"          , emit: versions
+    tuple val(meta), path("*.fasta") , emit: fasta
+    tuple val(meta), path("*.tsv")   , emit: tsv
+    path "versions.yml"              , emit: versions
 
     script:
     def args = task.ext.args ?: ''
diff --git a/modules/nf-core/medaka/main.nf b/modules/nf-core/medaka/main.nf
new file mode 100644
index 00000000..80e4c065
--- /dev/null
+++ b/modules/nf-core/medaka/main.nf
@@ -0,0 +1,51 @@
+process MEDAKA {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "bioconda::medaka=1.4.4"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/medaka:1.4.4--py38h130def0_0' :
+        'biocontainers/medaka:1.4.4--py38h130def0_0' }"
+
+    input:
+    tuple val(meta), path(reads), path(assembly)
+
+    output:
+    tuple val(meta), path("*_medakaConsensus.fasta"), emit: assembly
+    path "versions.yml"                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    if [[ "${reads.extension}" == "gz" ]]; then
+        gzip -df $reads
+        reads=$reads.baseName
+    else
+        reads=$reads
+    fi
+    if [[ "${assembly.extension}" == "gz" ]]; then
+        gzip -df $assembly
+        assembly=$assembly.baseName
+    else
+        assembly=$assembly
+    fi
+
+    medaka_consensus \\
+        -t $task.cpus \\
+        $args \\
+        -i \$reads \\
+        -d \$assembly \\
+        -o ./
+
+    mv consensus.fasta ${prefix}.fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/medaka/medaka.diff b/modules/nf-core/medaka/medaka.diff
new file mode 100644
index 00000000..7f8752ac
--- /dev/null
+++ b/modules/nf-core/medaka/medaka.diff
@@ -0,0 +1,49 @@
+Changes in module 'nf-core/medaka'
+--- modules/nf-core/medaka/main.nf
++++ modules/nf-core/medaka/main.nf
+@@ -11,8 +11,8 @@
+     tuple val(meta), path(reads), path(assembly)
+ 
+     output:
+-    tuple val(meta), path("*.fa.gz"), emit: assembly
+-    path "versions.yml"             , emit: versions
++    tuple val(meta), path("*_medakaConsensus.fasta"), emit: assembly
++    path "versions.yml"                             , emit: versions
+ 
+     when:
+     task.ext.when == null || task.ext.when
+@@ -21,16 +21,27 @@
+     def args = task.ext.args ?: ''
+     def prefix = task.ext.prefix ?: "${meta.id}"
+     """
++    if [[ "${reads.extension}" == "gz" ]]; then
++        gzip -df $reads
++        reads=$reads.baseName
++    else
++        reads=$reads
++    fi
++    if [[ "${assembly.extension}" == "gz" ]]; then
++        gzip -df $assembly
++        assembly=$assembly.baseName
++    else
++        assembly=$assembly
++    fi
++
+     medaka_consensus \\
+         -t $task.cpus \\
+         $args \\
+-        -i $reads \\
+-        -d $assembly \\
++        -i \$reads \\
++        -d \$assembly \\
+         -o ./
+ 
+-    mv consensus.fasta ${prefix}.fa
+-
+-    gzip -n ${prefix}.fa
++    mv consensus.fasta ${prefix}.fasta
+ 
+     cat <<-END_VERSIONS > versions.yml
+     "${task.process}":
+
+************************************************************
diff --git a/modules/nf-core/medaka/meta.yml b/modules/nf-core/medaka/meta.yml
new file mode 100644
index 00000000..ed124d61
--- /dev/null
+++ b/modules/nf-core/medaka/meta.yml
@@ -0,0 +1,47 @@
+name: medaka
+description: A tool to create consensus sequences and variant calls from nanopore sequencing data
+keywords:
+  - assembly
+  - polishing
+  - nanopore
+tools:
+  - medaka:
+      description: Neural network sequence error correction.
+      homepage: https://nanoporetech.github.io/medaka/index.html
+      documentation: https://nanoporetech.github.io/medaka/index.html
+      tool_dev_url: https://github.com/nanoporetech/medaka
+
+      licence: ["Mozilla Public License 2.0"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: List of input nanopore fasta/FastQ files
+      pattern: "*.{fasta,fa,fastq,fastq.gz,fq,fq.gz}"
+  - assembly:
+      type: file
+      description: Genome assembly
+      pattern: "*.{fasta,fa}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - assembly:
+      type: file
+      description: Polished genome assembly
+      pattern: "*.fa.gz"
+
+authors:
+  - "@avantonder"
diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf
new file mode 100644
index 00000000..7a1bb227
--- /dev/null
+++ b/modules/nf-core/minimap2/index/main.nf
@@ -0,0 +1,34 @@
+process MINIMAP2_INDEX {
+    label 'process_medium'
+
+    // Note: the versions here need to match the versions used in minimap2/align
+    conda "bioconda::minimap2=2.24"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/minimap2:2.24--h7132678_1' :
+        'biocontainers/minimap2:2.24--h7132678_1' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path("*.mmi"), emit: index
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    minimap2 \\
+        -t $task.cpus \\
+        -d ${fasta.baseName}.mmi \\
+        $args \\
+        $fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        minimap2: \$(minimap2 --version 2>&1)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml
new file mode 100644
index 00000000..b58f35c6
--- /dev/null
+++ b/modules/nf-core/minimap2/index/meta.yml
@@ -0,0 +1,40 @@
+name: minimap2_index
+description: Provides fasta index required by minimap2 alignment.
+keywords:
+  - index
+  - fasta
+  - reference
+tools:
+  - minimap2:
+      description: |
+        A versatile pairwise aligner for genomic and spliced nucleotide sequences.
+      homepage: https://github.com/lh3/minimap2
+      documentation: https://github.com/lh3/minimap2#uguide
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fasta:
+      type: file
+      description: |
+        Reference database in FASTA format.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - index:
+      type: file
+      description: Minimap2 fasta index.
+      pattern: "*.mmi"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@yuukiiwa"
+  - "@drpatelh"
diff --git a/modules/nf-core/racon/main.nf b/modules/nf-core/racon/main.nf
new file mode 100644
index 00000000..f8ed3691
--- /dev/null
+++ b/modules/nf-core/racon/main.nf
@@ -0,0 +1,38 @@
+process RACON {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "bioconda::racon=1.4.20"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1' :
+        'quay.io/biocontainers/racon:1.4.20--h9a82719_1' }"
+
+    input:
+    tuple val(meta), path(reads), path(assembly), path(paf)
+
+    output:
+    tuple val(meta), path('*_assembly_consensus.fasta.gz') , emit: improved_assembly
+    path "versions.yml"          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    racon -t "$task.cpus" \\
+        "${reads}" \\
+        "${paf}" \\
+        $args \\
+        "${assembly}" > \\
+        ${prefix}_assembly_consensus.fasta
+
+    gzip -n ${prefix}_assembly_consensus.fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        racon: \$( racon --version 2>&1 | sed 's/^.*v//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/racon/meta.yml b/modules/nf-core/racon/meta.yml
new file mode 100644
index 00000000..2e7737d9
--- /dev/null
+++ b/modules/nf-core/racon/meta.yml
@@ -0,0 +1,52 @@
+name: racon
+description: Consensus module for raw de novo DNA assembly of long uncorrected reads
+keywords:
+  - assembly
+  - pacbio
+  - nanopore
+  - polish
+tools:
+  - racon:
+      description: Ultrafast consensus module for raw de novo genome assembly of long uncorrected reads.
+      homepage: https://github.com/lbcb-sci/racon
+      documentation: https://github.com/lbcb-sci/racon
+      tool_dev_url: https://github.com/lbcb-sci/racon
+      doi: 10.1101/gr.214270.116
+      licence: ["MIT"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: List of input FastQ files. Racon expects single end reads
+      pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
+  - assembly:
+      type: file
+      description: Genome assembly to be improved
+      pattern: "*.{fasta,fa}"
+  - paf:
+      type: file
+      description: Alignment in PAF format
+      pattern: "*.paf"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - improved_assembly:
+      type: file
+      description: Improved genome assembly
+      pattern: "*_assembly_consensus.fasta.gz"
+
+authors:
+  - "@avantonder"
diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf
new file mode 100644
index 00000000..4dd0e5b0
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/main.nf
@@ -0,0 +1,44 @@
+process SAMTOOLS_FAIDX {
+    tag "$fasta"
+    label 'process_single'
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path ("*.fai"), emit: fai
+    tuple val(meta), path ("*.gzi"), emit: gzi, optional: true
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    samtools \\
+        faidx \\
+        $args \\
+        $fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${fasta}.fai
+    cat <<-END_VERSIONS > versions.yml
+
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml
new file mode 100644
index 00000000..fe2fe9a1
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/meta.yml
@@ -0,0 +1,47 @@
+name: samtools_faidx
+description: Index FASTA file
+keywords:
+  - index
+  - fasta
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fasta:
+      type: file
+      description: FASTA file
+      pattern: "*.{fa,fasta}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fai:
+      type: file
+      description: FASTA index file
+      pattern: "*.{fai}"
+  - gzi:
+      type: file
+      description: Optional gzip index file for compressed inputs
+      pattern: "*.gzi"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@ewels"
+  - "@phue"
diff --git a/modules/nf-core/vsearch/cluster/main.nf b/modules/nf-core/vsearch/cluster/main.nf
new file mode 100644
index 00000000..413f70c9
--- /dev/null
+++ b/modules/nf-core/vsearch/cluster/main.nf
@@ -0,0 +1,66 @@
+process VSEARCH_CLUSTER {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::vsearch=2.21.1 bioconda::samtools=1.16.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-53dae514294fca7b44842b784ed85a5303ac2d80:7b3365d778c690ca79bc85aaaeb86bb39a2dec69-0':
+        'quay.io/biocontainers/mulled-v2-53dae514294fca7b44842b784ed85a5303ac2d80:7b3365d778c690ca79bc85aaaeb86bb39a2dec69-0' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path('*.aln.gz')                , optional: true, emit: aln
+    tuple val(meta), path('*.biom.gz')               , optional: true, emit: biom
+    tuple val(meta), path('*.mothur.tsv.gz')         , optional: true, emit: mothur
+    tuple val(meta), path('*.otu.tsv.gz')            , optional: true, emit: otu
+    tuple val(meta), path('*.bam')                   , optional: true, emit: bam
+    tuple val(meta), path('*.out.tsv.gz')            , optional: true, emit: out
+    tuple val(meta), path('*.blast.tsv.gz')          , optional: true, emit: blast
+    tuple val(meta), path('*.uc.tsv.gz')             , optional: true, emit: uc
+    tuple val(meta), path('*.centroids.fasta.gz')    , optional: true, emit: centroids
+    tuple val(meta), path('*_clusters*')             , optional: true, emit: clusters
+    tuple val(meta), path('*.profile.txt.gz')        , optional: true, emit: profile
+    tuple val(meta), path('*.msa.fasta.gz')          , optional: true, emit: msa
+    path "versions.yml"                              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def args3 = task.ext.args3 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    if (!args2.contains("--cluster_fast") && !args2.contains("--cluster_size") && !args2.contains("--cluster_smallmem") && !args2.contains("--cluster_unoise") ) {
+            error "Unknown clustering option provided (${args2})"
+        }
+    def out_ext = args3.contains("--alnout") ? ".aln" :
+                    args3.contains("--biomout") ? ".biom" :
+                    args3.contains("--blast6out") ? ".blast.tsv" :
+                    args3.contains("--centroids") ? ".centroids.fasta" :
+                    args3.contains("--clusters") ? "_clusters" :
+                    args3.contains("--mothur_shared_out") ? ".mothur.tsv" :
+                    args3.contains("--msaout") ? ".msa.fasta" :
+                    args3.contains("--otutabout") ? ".otu.tsv" :
+                    args3.contains("--profile") ? ".profile.txt" :
+                    args3.contains("--samout") ? ".sam" :
+                    args3.contains("--uc") ? "uc.tsv" :
+                    args3.contains("--userout") ? "out.tsv" :
+                    ""
+    if (out_ext == "") { error "Unknown output file format provided (${args3})" }
+    """
+    vsearch \\
+        $args2 $fasta \\
+        $args3 ${prefix}${out_ext} \\
+        --threads $task.cpus \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/vsearch/cluster/meta.yml b/modules/nf-core/vsearch/cluster/meta.yml
new file mode 100644
index 00000000..5cc620cb
--- /dev/null
+++ b/modules/nf-core/vsearch/cluster/meta.yml
@@ -0,0 +1,68 @@
+name: "vsearch_cluster"
+description: Cluster sequences using a single-pass, greedy centroid-based clustering algorithm.
+keywords:
+  - vsearch
+  - clustering
+tools:
+  - vsearch:
+      description: VSEARCH is a versatile open-source tool for microbiome analysis, including chimera detection, clustering, dereplication and rereplication, extraction, FASTA/FASTQ/SFF file processing, masking, orienting, pair-wise alignment, restriction site cutting, searching, shuffling, sorting, subsampling, and taxonomic classification of amplicon sequences for metagenomics, genomics, and population genetics. (USEARCH alternative)
+      homepage: https://github.com/torognes/vsearch
+      documentation: https://github.com/torognes/vsearch/releases/download/v2.21.1/vsearch_manual.pdf
+      tool_dev_url: https://github.com/torognes/vsearch
+      doi: 10.7717/peerj.2584
+      licence: ["GPL v3-or-later OR BSD-2-clause"]
+
+input:
+  - meta:
+      type: map
+      description: Groovy Map containing sample information e.g. [ id:'test' ]
+  - fasta:
+      type: file
+      description: Sequences to cluster in FASTA format
+      pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - aln:
+      type: file
+      description: Results in pairwise alignment format
+      pattern: "*.aln.gz"
+  - biom:
+      type: file
+      description: Results in an OTU table in the biom version 1.0 file format
+      pattern: "*.biom.gz"
+  - mothur:
+      type: file
+      description: Results in an OTU table in the mothur ’shared’ tab-separated plain text file format
+      pattern: "*.mothur.tsv.gz"
+  - otu:
+      type: file
+      description: Results in an OTU table in the classic tab-separated plain text format
+      pattern: "*.otu.tsv.gz"
+  - bam:
+      type: file
+      description: Results written in bam format
+      pattern: "*.bam"
+  - out:
+      type: file
+      description: Results in tab-separated output, columns defined by user
+      pattern: "*.out.tsv.gz"
+  - blast:
+      type: file
+      description: Tab delimited results in blast-like tabular format
+      pattern: "*.blast.tsv.gz"
+  - uc:
+      type: file
+      description: Tab delimited results in a uclust-like format with 10 columns
+      pattern: "*.uc.gz"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@mirpedrol"
diff --git a/modules/nf-core/vsearch/cluster/vsearch-cluster.diff b/modules/nf-core/vsearch/cluster/vsearch-cluster.diff
new file mode 100644
index 00000000..ff6808ac
--- /dev/null
+++ b/modules/nf-core/vsearch/cluster/vsearch-cluster.diff
@@ -0,0 +1,59 @@
+Changes in module 'nf-core/vsearch/cluster'
+--- modules/nf-core/vsearch/cluster/main.nf
++++ modules/nf-core/vsearch/cluster/main.nf
+@@ -20,7 +20,7 @@
+     tuple val(meta), path('*.blast.tsv.gz')          , optional: true, emit: blast
+     tuple val(meta), path('*.uc.tsv.gz')             , optional: true, emit: uc
+     tuple val(meta), path('*.centroids.fasta.gz')    , optional: true, emit: centroids
+-    tuple val(meta), path('*.clusters.fasta.gz')     , optional: true, emit: clusters
++    tuple val(meta), path('*_clusters*')             , optional: true, emit: clusters
+     tuple val(meta), path('*.profile.txt.gz')        , optional: true, emit: profile
+     tuple val(meta), path('*.msa.fasta.gz')          , optional: true, emit: msa
+     path "versions.yml"                              , emit: versions
+@@ -37,16 +37,16 @@
+     if (!args2.contains("--cluster_fast") && !args2.contains("--cluster_size") && !args2.contains("--cluster_smallmem") && !args2.contains("--cluster_unoise") ) {
+             error "Unknown clustering option provided (${args2})"
+         }
+-    def out_ext = args3.contains("--alnout") ? "aln" :
+-                    args3.contains("--biomout") ? "biom" :
+-                    args3.contains("--blast6out") ? "blast.tsv" :
+-                    args3.contains("--centroids") ? "centroids.fasta" :
+-                    args3.contains("--clusters") ? "clusters.fasta" :
+-                    args3.contains("--mothur_shared_out") ? "mothur.tsv" :
+-                    args3.contains("--msaout") ? "msa.fasta" :
+-                    args3.contains("--otutabout") ? "otu.tsv" :
+-                    args3.contains("--profile") ? "profile.txt" :
+-                    args3.contains("--samout") ? "sam" :
++    def out_ext = args3.contains("--alnout") ? ".aln" :
++                    args3.contains("--biomout") ? ".biom" :
++                    args3.contains("--blast6out") ? ".blast.tsv" :
++                    args3.contains("--centroids") ? ".centroids.fasta" :
++                    args3.contains("--clusters") ? "_clusters" :
++                    args3.contains("--mothur_shared_out") ? ".mothur.tsv" :
++                    args3.contains("--msaout") ? ".msa.fasta" :
++                    args3.contains("--otutabout") ? ".otu.tsv" :
++                    args3.contains("--profile") ? ".profile.txt" :
++                    args3.contains("--samout") ? ".sam" :
+                     args3.contains("--uc") ? "uc.tsv" :
+                     args3.contains("--userout") ? "out.tsv" :
+                     ""
+@@ -54,16 +54,9 @@
+     """
+     vsearch \\
+         $args2 $fasta \\
+-        $args3 ${prefix}.${out_ext} \\
++        $args3 ${prefix}${out_ext} \\
+         --threads $task.cpus \\
+         $args
+-
+-    if [[ $args3 != "--samout" ]]
+-    then
+-        gzip -n ${prefix}.${out_ext}
+-    else
+-        samtools view -T $fasta -S -b ${prefix}.${out_ext} > ${prefix}.bam
+-    fi
+ 
+     cat <<-END_VERSIONS > versions.yml
+     "${task.process}":
+
+************************************************************
diff --git a/modules/nf-core/vsearch/sort/main.nf b/modules/nf-core/vsearch/sort/main.nf
new file mode 100644
index 00000000..a6f06535
--- /dev/null
+++ b/modules/nf-core/vsearch/sort/main.nf
@@ -0,0 +1,47 @@
+process VSEARCH_SORT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::vsearch=2.21.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--h95f258a_0':
+        'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }"
+
+    input:
+    tuple val(meta), path(fasta)
+    val sort_arg
+
+    output:
+    tuple val(meta), path("*.fasta"), emit: fasta
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if ("$fasta" == "${prefix}.fasta") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
+    """
+    vsearch \\
+        $sort_arg $fasta \\
+        --threads $task.cpus \\
+        --output ${prefix}.fasta \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g;s/,.*//g;s/^v//;s/_.*//')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${prefix}.fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g;s/,.*//g;s/^v//;s/_.*//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/vsearch/sort/meta.yml b/modules/nf-core/vsearch/sort/meta.yml
new file mode 100644
index 00000000..a527b7f1
--- /dev/null
+++ b/modules/nf-core/vsearch/sort/meta.yml
@@ -0,0 +1,49 @@
+name: "vsearch_sort"
+description: Sort fasta entries by decreasing abundance (--sortbysize) or sequence length (--sortbylength).
+keywords:
+  - vsearch/sort
+  - vsearch
+  - sort
+  - amplicon sequences
+  - metagenomics
+  - genomics
+  - population genetics
+tools:
+  - vsearch:
+      description: VSEARCH is a versatile open-source tool for microbiome analysis, including chimera detection, clustering, dereplication and rereplication, extraction, FASTA/FASTQ/SFF file processing, masking, orienting, pair-wise alignment, restriction site cutting, searching, shuffling, sorting, subsampling, and taxonomic classification of amplicon sequences for metagenomics, genomics, and population genetics. (USEARCH alternative)
+      homepage: https://github.com/torognes/vsearch
+      documentation: https://github.com/torognes/vsearch/releases/download/v2.21.1/vsearch_manual.pdf
+      tool_dev_url: https://github.com/torognes/vsearch
+      doi: 10.7717/peerj.2584
+      licence: ["GPL v3-or-later OR BSD-2-clause"]
+
+input:
+  - meta:
+      type: map
+      description: Groovy Map containing sample information e.g. [ id:'test' ]
+  - fasta:
+      type: file
+      description: Sequences to be sorted in FASTA format
+      pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
+  - sort_arg:
+      type: string
+      description: Argument to provide to sort algorithm. Sort by abundance with --sortbysize or by sequence length with --sortbylength.
+      enums: ["--sortbysize", "--sortbylength"]
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fasta:
+      type: file
+      description: Sorted FASTA file
+      pattern: "*.{fasta}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@mirpedrol"
diff --git a/nextflow.config b/nextflow.config
index a1fa8021..1db28f3e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,6 +22,18 @@ params {
     min_reads                  = 30
     min_targeted_genes         = 3
 
+    // Pipeline steps
+    overrepresented            = false
+    umi_clustering             = false
+
+    // UMI parameters
+    umi_bin_size               = 1
+    medaka_model               = 'r941_min_high_g303'
+
+    // Vsearch options
+    vsearch_minseqlength       = 55
+    vsearch_maxseqlength       = 57
+    vsearch_id                 = 0.99
 
     // References
     genome                     = null
@@ -110,6 +122,7 @@ profiles {
     docker {
         docker.enabled         = true
         docker.userEmulation   = true
+        docker.registry        = 'quay.io'
         singularity.enabled    = false
         podman.enabled         = false
         shifter.enabled        = false
@@ -154,6 +167,7 @@ profiles {
     }
     test_targeted  { includeConfig 'conf/test_targeted.config' }
     test_full      { includeConfig 'conf/test_full.config' }
+    test_umis      { includeConfig 'conf/test_umis.config' }
     test_screening { includeConfig 'conf/test_screening.config' }
 }
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b38ebd95..f599d796 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -48,15 +48,55 @@
                 }
             }
         },
+        "targeted_pipeline_steps": {
+            "title": "targeted pipeline steps",
+            "type": "object",
+            "description": "Alternative pipeline steps to include in the targeted analysis.",
+            "default": "",
+            "properties": {
+                "overrepresented": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-sort-numeric-up-alt",
+                    "description": "Trim overrepresented sequences from reads (cutadapt)"
+                },
+                "umi_clustering": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-layer-group",
+                    "description": "If the sample contains umi-molecular identifyers (UMIs), run the UMI extraction, clustering and consensus steps."
+                }
+            },
+            "fa_icon": "fas fa-shoe-prints"
+        },
+        "umi_parameters": {
+            "title": "UMI parameters",
+            "type": "object",
+            "description": "Parameters regarding umi molecular identifiers (UMIs)",
+            "default": "",
+            "properties": {
+                "umi_bin_size": {
+                    "type": "integer",
+                    "default": 1,
+                    "description": "Minimum size of a UMI cluster.",
+                    "fa_icon": "fas fa-sort-amount-down"
+                },
+                "medaka_model": {
+                    "type": "string",
+                    "default": "r941_min_high_g360",
+                    "fa_icon": "fas fa-font",
+                    "description": "Medaka model (-m) to use according to the basecaller used."
+                }
+            },
+            "fa_icon": "fas fa-layer-group"
+        },
         "targeted_parameters": {
             "title": "Targeted parameters",
             "type": "object",
-            "description": "",
+            "description": "Parameters used for alignment processes",
             "default": "",
             "properties": {
                 "aligner": {
                     "type": "string",
-                    "description": "Aligner program to use",
+                    "description": "Aligner program to use.",
                     "default": "minimap2",
                     "fa_icon": "fas fa-align-justify",
                     "enum": ["minimap2", "bwa", "bowtie2"]
@@ -66,7 +106,42 @@
                     "fa_icon": "fas fa-grip-lines",
                     "description": "Provide the same protospacer sequence for all samples. Will override protospacer sequences provided by an input samplesheet."
                 }
-            }
+            },
+            "fa_icon": "fas fa-align-justify"
+        },
+        "vsearch_parameters": {
+            "title": "Vsearch parameters",
+            "type": "object",
+            "description": "Parameters to use in Vsearch processes",
+            "default": "",
+            "properties": {
+                "vsearch_minseqlength": {
+                    "type": "integer",
+                    "default": 55,
+                    "fa_icon": "fas fa-minus",
+                    "description": "Vsearch minimum sequence length.",
+                    "help_text": "Discard sequences shorter than vsearch_minseqlength.",
+                    "minimum": 1
+                },
+                "vsearch_maxseqlength": {
+                    "type": "integer",
+                    "default": 57,
+                    "fa_icon": "fas fa-plus",
+                    "description": "Vsearch maximum sequence length.",
+                    "help_text": "Discard sequences longer than vsearch_minseqlength.",
+                    "minimum": 1
+                },
+                "vsearch_id": {
+                    "type": "number",
+                    "default": 0.99,
+                    "fa_icon": "fas fa-equals",
+                    "description": "Vsearch pairwise identity threshold.",
+                    "help_text": "Do not add the target to the cluster if the pairwise identity with the centroid is lower than id. The pairwise identity is defined as the number of (matching columns) / (alignment length - terminal gaps).",
+                    "minimum": 0,
+                    "maximum": 1
+                }
+            },
+            "fa_icon": "fas fa-layer-group"
         },
         "screening_parameters": {
             "title": "Screening parameters",
@@ -98,12 +173,12 @@
                 "min_reads": {
                     "type": "number",
                     "description": "a filter threshold value for sgRNAs, based on their average counts in the control sample",
-                    "default": 30.0
+                    "default": 30
                 },
                 "min_targeted_genes": {
                     "type": "number",
                     "description": "Minimal number of different genes targeted by sgRNAs in a biased segment in order for the corresponding counts to be corrected",
-                    "default": 3.0
+                    "default": 3
                 }
             }
         },
@@ -326,6 +401,13 @@
                     "description": "Show all params when using `--help`",
                     "hidden": true,
                     "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
+                },
+                "schema_ignore_params": {
+                    "type": "string",
+                    "default": "genomes",
+                    "description": "Ignore JSON schema validation of the following params",
+                    "fa_icon": "fas fa-ban",
+                    "hidden": true
                 }
             }
         }
@@ -334,9 +416,27 @@
         {
             "$ref": "#/definitions/input_output_options"
         },
+        {
+            "$ref": "#/definitions/targeted_pipeline_steps"
+        },
+        {
+            "$ref": "#/definitions/umi_parameters"
+        },
+        {
+            "$ref": "#/definitions/targeted_pipeline_steps"
+        },
+        {
+            "$ref": "#/definitions/umi_parameters"
+        },
         {
             "$ref": "#/definitions/targeted_parameters"
         },
+        {
+            "$ref": "#/definitions/vsearch_parameters"
+        },
+        {
+            "$ref": "#/definitions/vsearch_parameters"
+        },
         {
             "$ref": "#/definitions/screening_parameters"
         },
diff --git a/templates/merging_summary.py b/templates/merging_summary.py
index 0eb1239a..804a5be3 100755
--- a/templates/merging_summary.py
+++ b/templates/merging_summary.py
@@ -6,22 +6,28 @@
 
 with gzip.open("${raw_reads[0]}", "rt") as handle:
     raw_reads_count = len(list(SeqIO.parse(handle, "fastq")))
-if "$assembled_reads" == "null":
+
+if "$assembled_reads" == "null_a":
     assembled_reads_count = 0
 else:
     with gzip.open("$assembled_reads", "rt") as handle:
         assembled_reads_count = len(list(SeqIO.parse(handle, "fastq")))  # Merged reads R1+R2
+
 with gzip.open("$trimmed_reads", "rt") as handle:
     trimmed_reads_count = len(list(SeqIO.parse(handle, "fastq")))  # Filtered reads
 
-with open("$trimmed_adapters", "r") as handle:
-    for line in handle:
-        if line.startswith("Reads with adapters"):
-            for field in "".join(line.split(",")).split():
-                if field.isdigit():
-                    adapters_count = field  # reads with adapters
-                if "%" in field:
-                    adapters_percentage = field  # percentage of reads with adapters: ex. "(100.0%)"
+if "$trimmed_adapters" == "null_t":
+    adapters_count = 0
+    adapters_percentage = "(0.0%)"
+else:
+    with open("$trimmed_adapters", "r") as handle:
+        for line in handle:
+            if line.startswith("Reads with adapters"):
+                for field in "".join(line.split(",")).split():
+                    if field.isdigit():
+                        adapters_count = field  # reads with adapters
+                    if "%" in field:
+                        adapters_percentage = field  # percentage of reads with adapters: ex. "(100.0%)"
 
 if "$task.ext.prefix" != "null":
     prefix = "$task.ext.prefix"
@@ -35,7 +41,7 @@
         f"merged-reads, {assembled_reads_count} ({round(assembled_reads_count * 100 / raw_reads_count,1)}%)\\n"
     )
     output_file.write(f"reads-with-adapters, {adapters_count} {adapters_percentage}\\n")
-    if "$assembled_reads" == "null":
+    if "$assembled_reads" == "null_a":
         output_file.write(
             f"quality-filtered-reads, {trimmed_reads_count} ({round(trimmed_reads_count * 100 / raw_reads_count,1)}%)\\n"
         )
diff --git a/workflows/crisprseq_targeted.nf b/workflows/crisprseq_targeted.nf
index eeb155e6..d78bda2e 100644
--- a/workflows/crisprseq_targeted.nf
+++ b/workflows/crisprseq_targeted.nf
@@ -41,17 +41,16 @@ include { INPUT_CHECK                      } from '../subworkflows/local/input_c
 //
 // MODULE
 //
-include { FIND_ADAPTERS                    } from '../modules/local/find_adapters'
-include { EXTRACT_UMIS                     } from '../modules/local/extract_umis'
-include { SEQ_TO_FILE as SEQ_TO_FILE_REF   } from '../modules/local/seq_to_file'
-include { SEQ_TO_FILE as SEQ_TO_FILE_TEMPL } from '../modules/local/seq_to_file'
-include { ORIENT_REFERENCE                 } from '../modules/local/orient_reference'
-include { CIGAR_PARSER                     } from '../modules/local/cigar_parser'
-include { MERGING_SUMMARY                  } from '../modules/local/merging_summary'
-include { CLUSTERING_SUMMARY               } from '../modules/local/clustering_summary'
-include { ALIGNMENT_SUMMARY                } from '../modules/local/alignment_summary'
-include { TEMPLATE_REFERENCE               } from '../modules/local/template_reference'
-include { DUMMY_FINAL_UMI                  } from '../modules/local/dummy_final_umi'
+include { FIND_ADAPTERS                                   } from '../modules/local/find_adapters'
+include { EXTRACT_UMIS                                    } from '../modules/local/extract_umis'
+include { SEQ_TO_FILE as SEQ_TO_FILE_REF                  } from '../modules/local/seq_to_file'
+include { SEQ_TO_FILE as SEQ_TO_FILE_TEMPL                } from '../modules/local/seq_to_file'
+include { ORIENT_REFERENCE                                } from '../modules/local/orient_reference'
+include { CIGAR_PARSER                                    } from '../modules/local/cigar_parser'
+include { MERGING_SUMMARY                                 } from '../modules/local/merging_summary'
+include { CLUSTERING_SUMMARY                              } from '../modules/local/clustering_summary'
+include { ALIGNMENT_SUMMARY                               } from '../modules/local/alignment_summary'
+include { TEMPLATE_REFERENCE                              } from '../modules/local/template_reference'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -67,17 +66,66 @@ include { MULTIQC                                       } from '../modules/nf-co
 include { CUSTOM_DUMPSOFTWAREVERSIONS                   } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 include { PEAR                                          } from '../modules/nf-core/pear/main'
 include { CAT_FASTQ                                     } from '../modules/nf-core/cat/fastq/main'
-include { SEQTK_SEQ                                     } from '../modules/nf-core/seqtk/seq/main'
+include { SEQTK_SEQ as SEQTK_SEQ_MASK                   } from '../modules/nf-core/seqtk/seq/main'
+include { SEQTK_SEQ as SEQTK_SEQ_FATOFQ                 } from '../modules/nf-core/seqtk/seq/main'
+include { VSEARCH_CLUSTER                               } from '../modules/nf-core/vsearch/cluster/main'
+include { VSEARCH_SORT                                  } from '../modules/nf-core/vsearch/sort/main'
+include { RACON as RACON_1                              } from '../modules/nf-core/racon/main'
+include { RACON as RACON_2                              } from '../modules/nf-core/racon/main'
 include { BOWTIE2_ALIGN                                 } from '../modules/nf-core/bowtie2/align/main'
 include { BOWTIE2_BUILD                                 } from '../modules/nf-core/bowtie2/build/main'
 include { BWA_MEM                                       } from '../modules/nf-core/bwa/mem/main'
 include { BWA_INDEX                                     } from '../modules/nf-core/bwa/index/main'
 include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_ORIGINAL     } from '../modules/nf-core/minimap2/align/main'
+include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_UMI_1        } from '../modules/nf-core/minimap2/align/main'
+include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_UMI_2        } from '../modules/nf-core/minimap2/align/main'
 include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_TEMPLATE     } from '../modules/nf-core/minimap2/align/main'
+include { SAMTOOLS_FAIDX                                } from '../modules/nf-core/samtools/faidx/main'
+include { MINIMAP2_INDEX                                } from '../modules/nf-core/minimap2/index/main'
+include { MEDAKA                                        } from '../modules/nf-core/medaka/main'
 include { CUTADAPT                                      } from '../modules/nf-core/cutadapt/main'
 include { SAMTOOLS_INDEX                                } from '../modules/nf-core/samtools/index/main'
 
 
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    DEFINE GROOVY FUNCTIONS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+def umi_to_sequence(cluster1) {
+    String line1
+    String sequences = ""
+    String sequence1
+    String id1
+    cluster1.withReader {
+        while ( line1=it.readLine() ) {
+            if (line1.startsWith(">")) {
+                sequence1 = (line1 =~ /;seq=(.*$)/)[0][1]
+                id1 = (line1 =~ /(>.*?);/)[0][1]
+                sequences = sequences + id1 + "\n" + sequence1 + "\n"
+            }
+        }
+    }
+    return sequences
+}
+
+def umi_to_sequence_centroid(cluster) {
+    String line
+    String sequence
+    String id
+    cluster.withReader {
+        while ( line=it.readLine() ) {
+            if (line.startsWith(">")) {
+                sequence = (line =~ /;seq=(.*$)/)[0][1]
+                id = (line =~ /(>.*?);/)[0][1]
+                return id.replace(">", ">centroid_") + "\n" + sequence
+            }
+        }
+    }
+}
+
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -226,103 +274,311 @@ workflow CRISPRSEQ_TARGETED {
     )
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
-    //
-    // MODULE: Find overrepresented sequences
-    FIND_ADAPTERS (
-        FASTQC.out.zip
-    )
-    .adapters
-    .join(ch_pear_fastq)
-    .groupTuple(by: [0])
-    // Separate samples by containing overrepresented sequences or not
-    .branch {
-        meta, adapter_seqs, reads ->
-            no_adapters: adapter_seqs[0].size() == 0
-                return [ meta, reads[0] ]
-            adapters   : adapter_seqs[0].size() > 0
-                return [ meta, reads[0], adapter_seqs[0] ]
-    }
-    .set { ch_adapter_seqs }
+    ch_trimmed = Channel.empty()
 
+    if (params.overrepresented) {
+        //
+        // MODULE: Find overrepresented sequences
+        FIND_ADAPTERS (
+            FASTQC.out.zip
+        )
+        .adapters
+        .join(ch_pear_fastq)
+        .groupTuple(by: [0])
+        // Separate samples by containing overrepresented sequences or not
+        .branch {
+            meta, adapter_seqs, reads ->
+                no_adapters: adapter_seqs[0].size() == 0
+                    return [ meta, reads[0] ]
+                adapters   : adapter_seqs[0].size() > 0
+                    return [ meta, reads[0], adapter_seqs[0] ]
+        }
+        .set { ch_adapter_seqs }
 
-    //
-    // MODULE: Trim adapter sequences
-    //
-    CUTADAPT (
-        ch_adapter_seqs.adapters
-    )
-    ch_versions = ch_versions.mix(CUTADAPT.out.versions)
 
-    ch_adapter_seqs.no_adapters
-    .mix(CUTADAPT.out.reads)
-    .groupTuple(by: [0])
-    .map {
-        meta, fastq ->
-                return [ meta, fastq.flatten() ]
+        //
+        // MODULE: Trim adapter sequences
+        //
+        CUTADAPT (
+            ch_adapter_seqs.adapters
+        )
+        ch_versions = ch_versions.mix(CUTADAPT.out.versions)
+
+        ch_adapter_seqs.no_adapters
+        .mix(CUTADAPT.out.reads)
+        .groupTuple(by: [0])
+        .map {
+            meta, fastq ->
+                    return [ meta, fastq.flatten() ]
+        }
+        .set{ ch_trimmed }
+    } else {
+        ch_trimmed = ch_pear_fastq
     }
-    .set{ ch_trimmed }
+
 
     //
     // MODULE: Mask (convert to Ns) bases with quality lower than 20 and remove sequences shorter than 80
     //
-    SEQTK_SEQ (
+    SEQTK_SEQ_MASK (
         ch_trimmed
     )
-    ch_versions = ch_versions.mix(SEQTK_SEQ.out.versions)
-
+    ch_versions = ch_versions.mix(SEQTK_SEQ_MASK.out.versions)
 
-    //
-    // MODULE: Summary of merged reads
-    //
-    MERGING_SUMMARY {
+    if (params.overrepresented) {
         ch_cat_fastq.paired
             .mix(ch_cat_fastq.single)
             .join(PEAR.out.assembled, remainder: true)
-            .join(SEQTK_SEQ.out.fastx)
+            .join(SEQTK_SEQ_MASK.out.fastx)
             .join(CUTADAPT.out.log)
+            .set { ch_merging_summary_data }
+    } else {
+        ch_cat_fastq.paired
+            .mix(ch_cat_fastq.single)
+            .join(PEAR.out.assembled, remainder: true)
+            .join(SEQTK_SEQ_MASK.out.fastx)
+            .combine(Channel.value("null"))
             .map { meta, reads, assembled, masked, trimmed ->
                 if (assembled == null) {
-                    dummy = file('null')
-                    return [ meta, reads, dummy, masked, trimmed ]
-                } else {
-                    return [ meta, reads, assembled, masked, trimmed ]
+                    assembled = file('null_a')
                 }
+                if (trimmed == "null") {
+                    trimmed = file('null_t')
+                }
+                return [ meta, reads, assembled, masked, trimmed ]
             }
+            .set { ch_merging_summary_data }
     }
 
-
-    /*
-    The UMI clustering step is posponed until the next release, the steps to be implemented are listed below:
-
     //
-    // MODULE: Extract UMI sequences
+    // MODULE: Summary of merged reads
     //
-    EXTRACT_UMIS (
-        SEQTK_SEQ.out.fastx
-    )
+    MERGING_SUMMARY {
+        ch_merging_summary_data
+    }
+
+
+    if (params.umi_clustering) {
+        //
+        // MODULE: Extract UMI sequences
+        //
+        EXTRACT_UMIS (
+            SEQTK_SEQ_MASK.out.fastx
+        )
+
+
+        //
+        // MODULE: Cluster UMIs
+        //
+        VSEARCH_CLUSTER (
+            EXTRACT_UMIS.out.fasta
+        )
+
+        //  Obtain a file with UBS (UBI bin size) and UMI ID
+        VSEARCH_CLUSTER.out.clusters
+        .transpose()
+        .collectFile( storeDir:params.outdir ) {
+            it ->
+                [ "${it[0].id}_ubs.txt", "${it[1].countFasta()}\t${it[1].baseName}\n" ]
+        }
+
+        // Branch the clusters into the ones containing only one sequence and the ones containing more than one sequences
+        VSEARCH_CLUSTER.out.clusters
+        .transpose()
+        .branch{
+            meta, cluster ->
+                single: cluster.countFasta() >= params.umi_bin_size && cluster.countFasta() == 1
+                    return [meta, cluster]
+                cluster: cluster.countFasta() >= params.umi_bin_size && cluster.countFasta() > 1
+                    return [meta, cluster]
+        }
+        .set{ ch_umi_bysize }
+
+        // Get the correspondent fasta sequencences from single clusters
+        ch_umi_bysize.single
+        .tap{ meta_channel }
+        .map{ meta, cluster ->
+            fasta_line = umi_to_sequence(cluster)
+            [meta, cluster.baseName, fasta_line]
+        }
+        .collectFile() { meta, name, fasta ->
+            [ "${name}_consensus.fasta", fasta ]
+        }
+        .map{ new_file ->
+            [new_file.baseName, new_file]
+        }
+        .join(meta_channel
+            .map { meta, original_file ->
+                ["${original_file.baseName}_consensus", meta]
+            })
+        .map{ file_name, new_file, meta ->
+            [meta, new_file]
+        }
+        .set{ ch_single_clusters_consensus }
+
+        //
+        // MODULE: Obtain most abundant UMI in cluster
+        //
+        VSEARCH_SORT(
+            ch_umi_bysize.cluster,
+            Channel.value("--sortbysize")
+        )
 
-    Modules to implement:
+        // Get the correspondent fasta sequencences from top cluster sequences
+        // Replaces the sequence name adding the "centroid_" prefix to avoid having two sequences with the same name in following steps
+        VSEARCH_SORT.out.fasta // [[id:sample_id, ...], sample_top.fasta]
+        .tap{ meta_channel_2 } // [[id:sample_id, ...], sample_top.fasta]
+        .map{ meta, fasta ->
+            fasta_line = umi_to_sequence_centroid(fasta)
+            [meta, fasta.baseName, fasta_line] // [[id:sample_id, ...], sample_top, >centroid_...]
+        }
+        .collectFile(cache:true,sort:true) { meta, name, fasta ->
+            [ "${name}.fasta", fasta ] // >centroid_... -> sample_top.fasta
+        }
+        .dump(tag:"collectfile output")
+        .map{ new_file ->
+            [new_file.baseName, new_file] // Substring is removing "_top" added by VSEARCH_SORT // [sample, sample_top.fasta]
+        }
+        .join(meta_channel_2
+            .map { meta, original_file ->
+                ["${original_file.baseName}", meta] // Substring is removing "_top" added by VSEARCH_SORT // [sample, [id:sample_id, ...]]
+            }) // [sample, sample_top.fasta, [id:sample_id, ...]]
+        .map{ file_name, new_file, meta ->
+            [meta + [cluster_id: file_name[0..-5]], new_file] // Add cluster ID to meta map // [[id:sample_id, ..., cluster_id:sample], sample_top.fasta]
+        }
+        .set{ ch_top_clusters_sequence } // [[id:sample_id, ..., cluster_id:sample], sample_top.fasta]
+
+
+        // Get the correspondent fasta sequencences from UMI clusters
+        ch_umi_bysize.cluster // [[id:sample_id, ...], sample]
+        .tap{ meta_channel_3 } // [[id:sample_id, ...], sample]
+        .map{ meta, cluster ->
+            fasta_line = umi_to_sequence(cluster)
+            [meta, cluster.baseName, fasta_line] // [[id:sample_id, ...], sample, >...]
+        }
+        .dump(tag:'map allreads output')
+        .collectFile(cache:true,sort:true) { meta, name, fasta ->
+            [ "${name}_sequences.fasta", fasta ] // >... -> sample_sequences.fasta
+        }
+        .dump(tag:"collectfile allreads output")
+        .map{ new_file ->
+            [new_file.baseName[0..-11], new_file] // Substring is removing "_sequences" added by collectFile // [sample, sample_sequences.fasta]
+        }
+        .join(meta_channel_3
+            .map { meta, original_file ->
+                ["${original_file.baseName}", meta] // [sample, [id:sample_id, ...]]
+            }) // [sample, sample_sequences.fasta, [id:sample_id, ...]]
+        .map{ file_name, new_file, meta ->
+            [meta + [cluster_id: file_name], new_file] // Add cluster ID to meta map // [[id:sample_id, ..., cluster_id:sample], sample_sequences.fasta]
+        }
+        .set{ ch_clusters_sequence }
 
-    vsearch
-    get_ubs
-    top_read
-    polishing: minimap2, racon
-    consensus
-    join_reads
-    fa2fq
 
-    */
+        // Cluster consensus & polishing
+        // Two cycles of minimap2 + racon
 
-    // Dummy process simulating the last UMi clustering step to obtain clusters as fastq
-    DUMMY_FINAL_UMI {
-        SEQTK_SEQ.out.fastx
+        //
+        // MODULE: Mapping with minimap2 - cycle 1
+        //
+        // Map each cluster against the top read (most abundant UMI) in the cluster
+        MINIMAP2_ALIGN_UMI_1 (
+            ch_clusters_sequence
+                .join(ch_top_clusters_sequence),
+            false, //output in paf format
+            false,
+            false
+        )
+
+        MINIMAP2_ALIGN_UMI_1.out.paf.dump(tag:'minimap_unfiltered')
+
+        // Only continue with clusters that have aligned sequences
+        MINIMAP2_ALIGN_UMI_1.out.paf
+            .filter{ it[1].countLines() > 0 }
+            .set{ ch_minimap_1 }
+
+        //
+        // MODULE: Improve top read from UMI cluster using cluster consensus - cycle 1
+        //
+        RACON_1 (
+            ch_clusters_sequence
+                .join(ch_top_clusters_sequence)
+                .join(ch_minimap_1)
+        )
+
+        //
+        // MODULE: Mapping with minimap2 - cycle 2
+        //
+        // Map each cluster against the top read (most abundant UMI) in the cluster
+        MINIMAP2_ALIGN_UMI_2 (
+            ch_clusters_sequence
+                .join(RACON_1.out.improved_assembly),
+            false, //output in paf format
+            false,
+            false
+        )
+
+        // Only continue with clusters that have aligned sequences
+        MINIMAP2_ALIGN_UMI_2.out.paf
+            .filter{ it[1].countLines() > 0 }
+            .set{ ch_minimap_2 }
+
+        //
+        // MODULE: Improve top read from UMI cluster using cluster consensus - cycle 2
+        //
+        RACON_2 (
+            ch_clusters_sequence
+                .join(RACON_1.out.improved_assembly)
+                .join(ch_minimap_2)
+        )
+
+
+        //
+        // MODULE: Obtain a consensus sequence
+        //
+        MEDAKA (
+            ch_clusters_sequence
+                .join(RACON_2.out.improved_assembly)
+        )
+
+        // Collect all consensus UMI sequences into one single file per sample
+        MEDAKA.out.assembly
+        .tap{ meta_channel_4 }
+        .map{ meta, file ->
+            file_content = file.getText()
+            [meta, file_content] // [[id:sample_id, ...], consensus_content]
+        }
+        .collectFile() { meta, file ->
+            [ "${meta.id}_consensus.fasta", file ]
+        }
+        .map{ new_file ->
+            [new_file.baseName, new_file]
+        }
+        .join(meta_channel_4
+            .map{ meta, consensus ->
+                ["${meta.id}_consensus", meta]
+            }
+        )
+        .map{ name, file, meta ->
+            [meta - meta.subMap('cluster_id'), file]
+        }
+        .set{ ch_umi_consensus }
+
+
+        //
+        // MODULE: Convert fasta to fastq
+        //
+        SEQTK_SEQ_FATOFQ (
+            ch_umi_consensus
+        )
     }
 
+    ch_preprocess_reads = params.umi_clustering ? SEQTK_SEQ_FATOFQ.out.fastx : SEQTK_SEQ_MASK.out.fastx
+
     //
     // MODULE: Summary of clustered reads
     //
     CLUSTERING_SUMMARY (
-        SEQTK_SEQ.out.fastx
+        ch_preprocess_reads
             .join(MERGING_SUMMARY.out.summary)
     )
 
@@ -332,7 +588,7 @@ workflow CRISPRSEQ_TARGETED {
     //
     if (params.aligner == "minimap2") {
         MINIMAP2_ALIGN_ORIGINAL (
-            SEQTK_SEQ.out.fastx
+            ch_preprocess_reads
                 .join(ORIENT_REFERENCE.out.reference),
             true,
             false,
@@ -351,7 +607,7 @@ workflow CRISPRSEQ_TARGETED {
         )
         ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
         BWA_MEM (
-            SEQTK_SEQ.out.fastx,
+            ch_preprocess_reads,
             BWA_INDEX.out.index,
             true
         )
@@ -368,7 +624,7 @@ workflow CRISPRSEQ_TARGETED {
         )
         ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
         BOWTIE2_ALIGN (
-            SEQTK_SEQ.out.fastx,
+            ch_preprocess_reads,
             BOWTIE2_BUILD.out.index,
             false,
             true