forked from nf-core/modules
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added cleanworkdirs and rmemptyfastqs functions (nf-core#4)
- Loading branch information
Showing
13 changed files
with
299 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+691 KB
assets/test-datasets/genomics/sarscov2/illumina/fastq/cleaned.fastq.gz
Binary file not shown.
Binary file added
BIN
+32 Bytes
assets/test-datasets/genomics/sarscov2/illumina/fastq/empty.fastq.gz
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
// create a function to identify work dirs to clean | ||
def getWorkDirs(ch_to_clean, ch_downstream, print_output) { | ||
// combine channel to clean and dependent channel to clean only channels that have an output | ||
def ch_double_workdir1 = Channel.empty() | ||
def ch_double_workdir2 = Channel.empty() | ||
def ch_branch = ch_to_clean.combine(ch_downstream, by:0) | ||
.branch { it -> | ||
double_ch: it.size() > 3 | ||
single_ch: true | ||
} | ||
ch_double_workdir1 = ch_branch.double_ch | ||
.map { meta, ch_to_clean1, ch_to_clean2, ch_dep -> | ||
return [ [ meta ], ch_to_clean1, ch_dep ] | ||
} | ||
ch_double_workdir2 = ch_branch.double_ch | ||
.map { meta, ch_to_clean1, ch_to_clean2, ch_dep -> | ||
return [ [ meta ], ch_to_clean2, ch_dep ] | ||
} | ||
def ch_workdirs = ch_double_workdir1.mix(ch_double_workdir2).mix(ch_branch.single_ch) | ||
// filter to retain work directory | ||
.map { meta, files_to_clean, dependent_files -> | ||
// do not clean directory if it is not a work directory | ||
if (( files_to_clean =~ /(^.*\/work\/[^\/]+\/[^\/]+\/).*/ )) { | ||
def dir_to_clean = ( files_to_clean =~ /(^.*\/work\/[^\/]+\/[^\/]+\/).*/ )[0][1] | ||
return [ [ id: meta.id ], dir_to_clean ] | ||
} | ||
} | ||
// remove redundancy | ||
.unique() | ||
|
||
// print output for tests, otherwise should be false | ||
if (print_output) { | ||
ch_workdirs.view() | ||
} | ||
return ch_workdirs | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: "functions_getworkdirs" | ||
description: A function for extracting work directories from Nextflow channels. | ||
keywords: | ||
- function | ||
- work | ||
- clean | ||
input: | ||
- ch_to_clean: | ||
type: channel | ||
description: A Nextflow channel containing files to be cleaned. | ||
- ch_downstream: | ||
type: channel | ||
description: A Nextflow channel containing files produced downstream of files that will be cleaned. | ||
output: | ||
- ch_workdirs: | ||
type: channel | ||
description: A nextflow channel containing the meta.id and the work directory path to be cleaned. | ||
authors: | ||
- "@CarsonJM" | ||
maintainers: | ||
- "@CarsonJM" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
nextflow_function { | ||
|
||
name "Test getWorkDirs" | ||
script "../main.nf" | ||
|
||
test("getWorkDirs single") { | ||
function "getWorkDirs" | ||
when { | ||
function { | ||
""" | ||
input[0] = Channel.of( | ||
[ | ||
[ id:'test' ], // meta map | ||
"/somepath/work/12/hashforworkdirtoclean/somefile.txt" | ||
] | ||
) | ||
input[1] = Channel.of( | ||
[ | ||
[ id:'test' ], // meta map | ||
"/somepath/work/34/hashforworkdirdownstream/someotherfile.txt" | ||
] | ||
) | ||
input[2] = true | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll ( | ||
{ assert function.success }, | ||
{ assert snapshot(function.stdout).match() } | ||
) | ||
} | ||
} | ||
|
||
test("getWorkDirs double") { | ||
function "getWorkDirs" | ||
when { | ||
function { | ||
""" | ||
input[0] = Channel.of( | ||
[ | ||
[ id:'test' ], // meta map | ||
"/somepath/work/12/hashforworkdirtoclean/somefile.txt" | ||
], | ||
[ | ||
[ id:'test' ], // meta map | ||
"/somepath/work/24/hashforworkdirtoclean/somefile.txt" | ||
] | ||
) | ||
input[1] = Channel.of( | ||
[ | ||
[ id:'test' ], // meta map | ||
"/somepath/work/34/hashforworkdirdownstream/someotherfile.txt" | ||
] | ||
) | ||
input[2] = true | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll ( | ||
{ assert function.success }, | ||
{ assert snapshot(function.stdout).match() } | ||
) | ||
} | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
modules/nf-core/functions/getworkdirs/tests/main.nf.test.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{ | ||
"getWorkDirs double": { | ||
"content": [ | ||
[ | ||
"[[id:test], /somepath/work/12/hashforworkdirtoclean/]", | ||
"[[id:test], /somepath/work/24/hashforworkdirtoclean/]" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-02T08:00:08.332845613" | ||
}, | ||
"getWorkDirs single": { | ||
"content": [ | ||
[ | ||
"[[id:test], /somepath/work/12/hashforworkdirtoclean/]" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-02T08:00:03.219909745" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
functions/getworkdirs: | ||
- "modules/nf-core/functions/getworkdirs/**" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// create a function that filters channels to remove empty fastq files | ||
def rmEmptyFastQs(ch_fastqs, print_output) { | ||
def ch_nonempty_fastqs = ch_fastqs | ||
.filter { meta, fastq -> | ||
if ( meta.single_end ) { | ||
try { | ||
fastq.countFastq(limit: 10) > 1 | ||
} catch (java.util.zip.ZipException e) { | ||
log.warn "[HoffLab/phageannotator]: ${fastq} is not in GZIP format, this is likely because it was cleaned with --remove_intermediate_files" | ||
true | ||
} catch (EOFException) { | ||
log.warn "[HoffLab/phageannotator]: ${fastq} has an EOFException, this is likely an empty gzipped file." | ||
} | ||
} else { | ||
try { | ||
fastq[0].countLines( limit: 10 ) > 1 | ||
} catch (java.util.zip.ZipException e) { | ||
log.warn "[HoffLab/phageannotator]: ${fastq} is not in GZIP format, this is likely because it was cleaned with --remove_intermediate_files" | ||
true | ||
} catch (EOFException) { | ||
log.warn "[HoffLab/phageannotator]: ${fastq[0]} has an EOFException, this is likely an empty gzipped file." | ||
} | ||
} | ||
} | ||
|
||
if (print_output) { | ||
ch_nonempty_fastqs.view() | ||
} | ||
return ch_nonempty_fastqs | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
name: "functions_rmemptyfastqs" | ||
description: A function for removing empty FastQ files from Nextflow channels. | ||
keywords: | ||
- function | ||
- work | ||
- empty | ||
- fastq | ||
input: | ||
- ch_fastqs: | ||
type: channel | ||
description: A Nextflow channel containing FastQ files. | ||
output: | ||
- ch_nonempty_fastqs: | ||
type: channel | ||
description: A Nextflow channel containing only channel objects with non-empty FastQ files. | ||
authors: | ||
- "@CarsonJM" | ||
maintainers: | ||
- "@CarsonJM" |
63 changes: 63 additions & 0 deletions
63
modules/nf-core/functions/rmemptyfastqs/tests/main.nf.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
nextflow_function { | ||
|
||
name "Test rmEmptyFastQs" | ||
script "../main.nf" | ||
|
||
|
||
test("paired end + cleaned.fasta.gz") { | ||
function "rmEmptyFastQs" | ||
when { | ||
function { | ||
""" | ||
input[0] = Channel.of( | ||
[ | ||
[ id:'nonempty', single_end:false], // meta map | ||
[ | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkifExists: true), | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkifExists: true) | ||
] | ||
], | ||
[ | ||
[ id:'cleaned', single_end:true ], // meta map | ||
file("${projectDir}/assets/test-datasets/genomics/sarscov2/illumina/fastq/cleaned.fastq.gz", checkIfExists: true) | ||
] | ||
) | ||
input[1] = true | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll ( | ||
{ assert function.success }, | ||
{ assert snapshot(function.stdout).match() } | ||
) | ||
} | ||
} | ||
|
||
test("single end + empty.fastq.gz") { | ||
function "rmEmptyFastQs" | ||
when { | ||
function { | ||
""" | ||
input[0] = Channel.of( | ||
[ | ||
[ id:'nonempty', single_end: true], // meta map | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkifExists: true) | ||
], | ||
[ | ||
[ id:'empty', single_end:true ], // meta map | ||
file("${projectDir}/assets/test-datasets/genomics/sarscov2/illumina/fastq/empty.fastq.gz", checkIfExists: true) | ||
] | ||
) | ||
input[1] = true | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll ( | ||
{ assert function.success }, | ||
{ assert snapshot(function.stdout).match() } | ||
) | ||
} | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
modules/nf-core/functions/rmemptyfastqs/tests/main.nf.test.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{ | ||
"paired end + cleaned.fasta.gz": { | ||
"content": [ | ||
[ | ||
"[[id:nonempty, single_end:false], [/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz, /nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz]]", | ||
"[[id:cleaned, single_end:true], /mmfs1/gscratch/pedslabs_hoffman/carsonjm/nf-microbe/modules/assets/test-datasets/genomics/sarscov2/illumina/fastq/cleaned.fastq.gz]" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-02T08:13:09.384239509" | ||
}, | ||
"single end + empty.fastq.gz": { | ||
"content": [ | ||
[ | ||
"[[id:nonempty, single_end:true], /nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz]" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-02T08:13:15.124346278" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
functions/getworkdirs: | ||
- "modules/nf-core/functions/getworkdirs/**" |