Skip to content

Commit

Permalink
fast5_to_pod5 and dorado (not demultiplexed)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuukiiwa committed Oct 17, 2023
1 parent 2020960 commit 1867595
Show file tree
Hide file tree
Showing 14 changed files with 340 additions and 65 deletions.
48 changes: 27 additions & 21 deletions conf/test.config
Original file line number Diff line number Diff line change
@@ -1,33 +1,39 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_nobc_dx,<docker/singularity>
*/
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/nanoseq -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources
max_cpus = 2
max_memory = 6.GB
max_time = 12.h
// Limit resources so that this can run on GitHub Actions
max_cpus = 10
max_memory = '6.GB'
max_time = '6.h'

// Input data to perform demultipexing
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf'
run_nanolyse = true
protocol = 'DNA'
// Input data to perform both basecalling and demultiplexing
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS109'
barcode_kit = 'NBD103/NBD104'
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fastq/nondemultiplexed/sample_nobc_dx.fastq.gz'
skip_bigwig = true
skip_bigbed = true
trim_barcodes=true
output_demultiplex_fast5 = true
run_nanolyse = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true
aligner = 'graphmap2'

// This variable is just for reference and isnt actually required for the tests
// Files are downloaded and staged using the "GetTestData" process
input_path = '/home/wanyk/3.2/test-datasets/fast5/barcoded_multi/'
}
35 changes: 35 additions & 0 deletions conf/test_bc_nodx.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_bc_nodx,<docker/singularity>
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on Travis
max_cpus = 10
max_memory = 6.GB
max_time = 12.h

// Input data to perform basecalling and to skip demultipexing
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_nodx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS108'
skip_bigbed = true
skip_bigwig = true
skip_demultiplexing = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true

// This variable is just for reference and isnt actually required for the tests
// Files are downloaded and staged using the "GetTestData" process
input_path = '/home/wanyk/3.2/test-datasets/fast5/nonbarcoded_multi/'
}
33 changes: 33 additions & 0 deletions conf/test_nobc_dx.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_nobc_dx,<docker/singularity>
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources
max_cpus = 2
max_memory = 6.GB
max_time = 12.h

// Input data to perform demultipexing
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf'
skip_basecalling = true
run_nanolyse = true
protocol = 'DNA'
barcode_kit = 'NBD103/NBD104'
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fastq/nondemultiplexed/sample_nobc_dx.fastq.gz'
skip_bigwig = true
skip_bigbed = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_nodx_noaln.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_1-17550000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_1-17500000.gtf'
skip_basecalling = true
protocol = 'directRNA'
skip_demultiplexing = true
skip_alignment = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_nodx_rnamod.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/modification_transcriptome_subset.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/modification_transcriptome_subset.gtf'
skip_basecalling = true
protocol = 'directRNA'
run_nanolyse = true
skip_bigbed = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ params {
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf'
protocol = 'directRNA'
skip_basecalling = true
skip_demultiplexing = true
skip_fusion_analysis= true
skip_modification_analysis=true
Expand Down
1 change: 1 addition & 0 deletions conf/test_nodx_vc.config → conf/test_nobc_nodx_vc.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ params {
// Input data to skip demultiplexing and variant call
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_nodx_vc.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
skip_basecalling = true
protocol = 'DNA'
skip_quantification = true
skip_demultiplexing = true
Expand Down
39 changes: 39 additions & 0 deletions conf/test_withpull.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/nanoseq -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data to perform both basecalling and demultiplexing
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS109'
barcode_kit = 'EXP-NBD103'
trim_barcodes=true
output_demultiplex_fast5 = true
run_nanolyse = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true

// This variable is just for reference and isnt actually required for the tests
// Files are downloaded and staged using the "GetTestData" process
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fast5/barcoded/'
}
38 changes: 38 additions & 0 deletions modules/local/dorado.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process DORADO {
label 'process_medium'

container "docker.io/ontresearch/dorado"

input:
path(input_path)
val meta
path dorado_config
path dorado_model

output:
path "*.fastq.gz" , emit: fastq
path "versions.yml" , emit: versions

script:
def fast5_dir_path = workflow.profile.contains('test') ? "input_path" : "$input_path"
def trim_barcodes = params.trim_barcodes ? "--trim_barcodes" : ""
def barcode_kit = params.barcode_kit ? "--barcode_kits $params.barcode_kit" : ""
def barcode_ends = params.barcode_both_ends ? "--require_barcodes_both_ends" : ""
def proc_options = params.dorado_gpu ? "--device $params.gpu_device --num_callers $task.cpus --cpu_threads_per_caller $params.dorado_cpu_threads --gpu_runners_per_device $params.dorado_gpu_runners" : "--num_callers 2 --cpu_threads_per_caller ${task.cpus/2}"
def config = "--flowcell $params.flowcell --kit $params.kit"
if (params.dorado_config) config = file(params.dorado_config).exists() ? "--config ./$dorado_config" : "--config $params.dorado_config"
def model = ""
if (params.dorado_model) model = file(params.dorado_model).exists() ? "--model ./$dorado_model" : "--model $params.dorado_model"
"""
dorado download --model [email protected]
dorado basecaller [email protected] $input_path --device cpu --emit-fastq > basecall.fastq
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dorado: \$(echo \$(dorado --version 2>&1) | sed -r 's/.{81}//')
END_VERSIONS
gzip basecall.fastq
"""
}

26 changes: 26 additions & 0 deletions modules/local/fast5_to_pod5.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
process FAST5_TO_POD5 {
label 'process_medium'

conda "conda-forge::r-base=4.0.3 bioconda::bioconductor-bambu=3.0.8 bioconda::bioconductor-bsgenome=1.66.0"
container "docker.io/yuukiiwa/pod5:0.2.4"

input:
path input_path

output:
path "pod5/" , emit: pod5

when:
task.ext.when == null || task.ext.when

script:
output_name = "pod5/converted.pod5"
"""
pod5 convert fast5 $input_path --output $output_name
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pod5:
END_VERSIONS
"""
}
2 changes: 1 addition & 1 deletion modules/local/get_test_data.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ process GET_TEST_DATA {
container "docker.io/yuukiiwa/git:latest"

output:
path "test-datasets/fast5/$barcoded/*" , emit: ch_input_fast5s_path
path "test-datasets/fast5/$barcoded/" , emit: ch_input_fast5_dir_path
path "test-datasets/modification_fast5_fastq/", emit: ch_input_dir_path
path "versions.yml" , emit: versions

Expand Down
3 changes: 2 additions & 1 deletion modules/nf-core/qcat/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 17 additions & 6 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,24 @@ params {
gtf = null


// Options: Demultiplexing
// Options: Basecalling and Demultiplexing
input_path = null
flowcell = null
kit = null
barcode_kit = null
barcode_both_ends = false
trim_barcodes = false
dorado_config = null
dorado_model = null
dorado_gpu = false
dorado_gpu_runners = 6
dorado_cpu_threads = 1
gpu_device = 'auto'
gpu_cluster_options = null
output_demultiplex_fast5 = false
qcat_min_score = 60
qcat_detect_middle = false
skip_basecalling = false
skip_demultiplexing = false

// Options: Raw read cleaning
Expand Down Expand Up @@ -221,12 +230,14 @@ profiles {
executor.cpus = 16
executor.memory = 60.GB
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
test { includeConfig 'conf/test.config' }
test_nodx_stringtie { includeConfig 'conf/test_nodx_stringtie.config' }
test_nodx_noaln { includeConfig 'conf/test_nodx_noaln.config' }
test_nodx_vc { includeConfig 'conf/test_nodx_vc.config' }
test_nodx_rnamod { includeConfig 'conf/test_nodx_rnamod.config' }
test_bc_nodx { includeConfig 'conf/test_bc_nodx.config' }
test_nobc_dx { includeConfig 'conf/test_nobc_dx.config' }
test_nobc_nodx_stringtie { includeConfig 'conf/test_nobc_nodx_stringtie.config' }
test_nobc_nodx_noaln { includeConfig 'conf/test_nobc_nodx_noaln.config' }
test_nobc_nodx_vc { includeConfig 'conf/test_nobc_nodx_vc.config' }
test_nobc_nodx_rnamod { includeConfig 'conf/test_nobc_nodx_rnamod.config' }
}


Expand Down
Loading

0 comments on commit 1867595

Please sign in to comment.