Skip to content

Commit

Permalink
Added cleanworkdirs and rmemptyfastqs functions (nf-core#4)
Browse files Browse the repository at this point in the history
* Added cleanworkdirs and rmemptyfastqs functions

* CI Fix attempt #1

* Fix CI attempt #2
  • Loading branch information
CarsonJM authored Sep 2, 2024
1 parent c8f7f48 commit 7579fc0
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 14 deletions.
19 changes: 5 additions & 14 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ on:
type: choice
options:
- "ubuntu-latest"
- "self-hosted"
default: "self-hosted"

# Cancel if a newer run is started
concurrency:
Expand Down Expand Up @@ -135,7 +133,7 @@ jobs:
echo ${{ steps.outputs.outputs.subworkflows }}
nf-core-lint-modules:
runs-on: ${{ github.event.inputs.runners || 'self-hosted' }}
runs-on: ${{ github.event.inputs.runners }}
name: nf-core-lint-modules
needs: [pytest-changes, nf-test-changes]
if: ${{ (needs.pytest-changes.outputs.modules != '[]') || ( needs.nf-test-changes.outputs.modules != '[]') }}
Expand Down Expand Up @@ -182,7 +180,7 @@ jobs:

nf-core-lint-subworkflows:
runs-on: ubuntu-latest
name: nf-core-lint-modules
name: nf-core-lint-subworkflows
needs: [pytest-changes, nf-test-changes]
if: ${{ (needs.pytest-changes.outputs.subworkflows != '[]') || ( needs.nf-test-changes.outputs.subworkflows != '[]') }}
strategy:
Expand Down Expand Up @@ -219,7 +217,7 @@ jobs:
run: nf-core subworkflows lint ${{ matrix.tags }}

pytest:
runs-on: ${{ github.event.inputs.runners || 'self-hosted' }}
runs-on: ${{ github.event.inputs.runners }}
name: pytest
needs: [pytest-changes]
if: needs.pytest-changes.outputs.tags != '[]'
Expand Down Expand Up @@ -485,15 +483,15 @@ jobs:
!${{ github.workspace }}/.singularity
nf-test:
runs-on: ${{ github.event.inputs.runners || 'self-hosted' }}
runs-on: ${{ github.event.inputs.runners }}
name: nf-test
needs: [nf-test-changes]
if: ( needs.nf-test-changes.outputs.paths != '[]' )
strategy:
fail-fast: false
matrix:
path: ["${{ fromJson(needs.nf-test-changes.outputs.paths) }}"]
profile: [conda, docker_self_hosted, singularity]
profile: [conda, docker, singularity]
exclude:
- path: modules/nf-core/nf-test
- profile: conda
Expand Down Expand Up @@ -707,13 +705,6 @@ jobs:
SENTIEON_LICSRVR_IP: ${{ secrets.SENTIEON_LICSRVR_IP }}
SENTIEON_AUTH_MECH: "GitHub Actions - token"
run: |
# use "docker_self_hosted" if it runs on self-hosted runner and matrix.profile=docker
if [ "${{ matrix.profile }}" == "docker" ]; then
PROFILE="docker_self_hosted"
else
PROFILE=${{ matrix.profile }}
fi
NFT_WORKDIR=~ \
nf-test test \
--profile=${{ matrix.profile }} \
Expand Down
Binary file not shown.
Binary file not shown.
36 changes: 36 additions & 0 deletions modules/nf-core/functions/getworkdirs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// create a function to identify work dirs to clean
def getWorkDirs(ch_to_clean, ch_downstream, print_output) {
// combine channel to clean and dependent channel to clean only channels that have an output
def ch_double_workdir1 = Channel.empty()
def ch_double_workdir2 = Channel.empty()
def ch_branch = ch_to_clean.combine(ch_downstream, by:0)
.branch { it ->
double_ch: it.size() > 3
single_ch: true
}
ch_double_workdir1 = ch_branch.double_ch
.map { meta, ch_to_clean1, ch_to_clean2, ch_dep ->
return [ [ meta ], ch_to_clean1, ch_dep ]
}
ch_double_workdir2 = ch_branch.double_ch
.map { meta, ch_to_clean1, ch_to_clean2, ch_dep ->
return [ [ meta ], ch_to_clean2, ch_dep ]
}
def ch_workdirs = ch_double_workdir1.mix(ch_double_workdir2).mix(ch_branch.single_ch)
// filter to retain work directory
.map { meta, files_to_clean, dependent_files ->
// do not clean directory if it is not a work directory
if (( files_to_clean =~ /(^.*\/work\/[^\/]+\/[^\/]+\/).*/ )) {
def dir_to_clean = ( files_to_clean =~ /(^.*\/work\/[^\/]+\/[^\/]+\/).*/ )[0][1]
return [ [ id: meta.id ], dir_to_clean ]
}
}
// remove redundancy
.unique()

// print output for tests, otherwise should be false
if (print_output) {
ch_workdirs.view()
}
return ch_workdirs
}
21 changes: 21 additions & 0 deletions modules/nf-core/functions/getworkdirs/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: "functions_getworkdirs"
description: A function for extracting work directories from Nextflow channels.
keywords:
- function
- work
- clean
input:
- ch_to_clean:
type: channel
description: A Nextflow channel containing files to be cleaned.
- ch_downstream:
type: channel
description: A Nextflow channel containing files produced downstream of files that will be cleaned.
output:
- ch_workdirs:
type: channel
description: A nextflow channel containing the meta.id and the work directory path to be cleaned.
authors:
- "@CarsonJM"
maintainers:
- "@CarsonJM"
67 changes: 67 additions & 0 deletions modules/nf-core/functions/getworkdirs/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
nextflow_function {

name "Test getWorkDirs"
script "../main.nf"

test("getWorkDirs single") {
function "getWorkDirs"
when {
function {
"""
input[0] = Channel.of(
[
[ id:'test' ], // meta map
"/somepath/work/12/hashforworkdirtoclean/somefile.txt"
]
)
input[1] = Channel.of(
[
[ id:'test' ], // meta map
"/somepath/work/34/hashforworkdirdownstream/someotherfile.txt"
]
)
input[2] = true
"""
}
}
then {
assertAll (
{ assert function.success },
{ assert snapshot(function.stdout).match() }
)
}
}

test("getWorkDirs double") {
function "getWorkDirs"
when {
function {
"""
input[0] = Channel.of(
[
[ id:'test' ], // meta map
"/somepath/work/12/hashforworkdirtoclean/somefile.txt"
],
[
[ id:'test' ], // meta map
"/somepath/work/24/hashforworkdirtoclean/somefile.txt"
]
)
input[1] = Channel.of(
[
[ id:'test' ], // meta map
"/somepath/work/34/hashforworkdirdownstream/someotherfile.txt"
]
)
input[2] = true
"""
}
}
then {
assertAll (
{ assert function.success },
{ assert snapshot(function.stdout).match() }
)
}
}
}
27 changes: 27 additions & 0 deletions modules/nf-core/functions/getworkdirs/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"getWorkDirs double": {
"content": [
[
"[[id:test], /somepath/work/12/hashforworkdirtoclean/]",
"[[id:test], /somepath/work/24/hashforworkdirtoclean/]"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-02T08:00:08.332845613"
},
"getWorkDirs single": {
"content": [
[
"[[id:test], /somepath/work/12/hashforworkdirtoclean/]"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-02T08:00:03.219909745"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/functions/getworkdirs/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
functions/getworkdirs:
- "modules/nf-core/functions/getworkdirs/**"
30 changes: 30 additions & 0 deletions modules/nf-core/functions/rmemptyfastqs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// create a function that filters channels to remove empty fastq files
def rmEmptyFastQs(ch_fastqs, print_output) {
def ch_nonempty_fastqs = ch_fastqs
.filter { meta, fastq ->
if ( meta.single_end ) {
try {
fastq.countFastq(limit: 10) > 1
} catch (java.util.zip.ZipException e) {
log.warn "[HoffLab/phageannotator]: ${fastq} is not in GZIP format, this is likely because it was cleaned with --remove_intermediate_files"
true
} catch (EOFException) {
log.warn "[HoffLab/phageannotator]: ${fastq} has an EOFException, this is likely an empty gzipped file."
}
} else {
try {
fastq[0].countLines( limit: 10 ) > 1
} catch (java.util.zip.ZipException e) {
log.warn "[HoffLab/phageannotator]: ${fastq} is not in GZIP format, this is likely because it was cleaned with --remove_intermediate_files"
true
} catch (EOFException) {
log.warn "[HoffLab/phageannotator]: ${fastq[0]} has an EOFException, this is likely an empty gzipped file."
}
}
}

if (print_output) {
ch_nonempty_fastqs.view()
}
return ch_nonempty_fastqs
}
19 changes: 19 additions & 0 deletions modules/nf-core/functions/rmemptyfastqs/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: "functions_rmemptyfastqs"
description: A function for removing empty FastQ files from Nextflow channels.
keywords:
- function
- work
- empty
- fastq
input:
- ch_fastqs:
type: channel
description: A Nextflow channel containing FastQ files.
output:
- ch_nonempty_fastqs:
type: channel
description: A Nextflow channel containing only channel objects with non-empty FastQ files.
authors:
- "@CarsonJM"
maintainers:
- "@CarsonJM"
63 changes: 63 additions & 0 deletions modules/nf-core/functions/rmemptyfastqs/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
nextflow_function {

name "Test rmEmptyFastQs"
script "../main.nf"


test("paired end + cleaned.fasta.gz") {
function "rmEmptyFastQs"
when {
function {
"""
input[0] = Channel.of(
[
[ id:'nonempty', single_end:false], // meta map
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkifExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkifExists: true)
]
],
[
[ id:'cleaned', single_end:true ], // meta map
file("${projectDir}/assets/test-datasets/genomics/sarscov2/illumina/fastq/cleaned.fastq.gz", checkIfExists: true)
]
)
input[1] = true
"""
}
}
then {
assertAll (
{ assert function.success },
{ assert snapshot(function.stdout).match() }
)
}
}

test("single end + empty.fastq.gz") {
function "rmEmptyFastQs"
when {
function {
"""
input[0] = Channel.of(
[
[ id:'nonempty', single_end: true], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkifExists: true)
],
[
[ id:'empty', single_end:true ], // meta map
file("${projectDir}/assets/test-datasets/genomics/sarscov2/illumina/fastq/empty.fastq.gz", checkIfExists: true)
]
)
input[1] = true
"""
}
}
then {
assertAll (
{ assert function.success },
{ assert snapshot(function.stdout).match() }
)
}
}
}
27 changes: 27 additions & 0 deletions modules/nf-core/functions/rmemptyfastqs/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"paired end + cleaned.fasta.gz": {
"content": [
[
"[[id:nonempty, single_end:false], [/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz, /nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz]]",
"[[id:cleaned, single_end:true], /mmfs1/gscratch/pedslabs_hoffman/carsonjm/nf-microbe/modules/assets/test-datasets/genomics/sarscov2/illumina/fastq/cleaned.fastq.gz]"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-02T08:13:09.384239509"
},
"single end + empty.fastq.gz": {
"content": [
[
"[[id:nonempty, single_end:true], /nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz]"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-02T08:13:15.124346278"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/functions/rmemptyfastqs/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
functions/getworkdirs:
- "modules/nf-core/functions/getworkdirs/**"

0 comments on commit 7579fc0

Please sign in to comment.