Skip to content

Commit

Permalink
Add process datasets workflow script
Browse files Browse the repository at this point in the history
  • Loading branch information
KaiWaldrant committed Jul 11, 2024
1 parent b0b4273 commit 787920d
Showing 1 changed file with 55 additions and 0 deletions.
55 changes: 55 additions & 0 deletions src/workflows/process_datasets/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
include { findArgumentSchema } from "${meta.resources_dir}/helper.nf"

workflow auto {
findStates(params, meta.config)
| meta.workflow.run(
auto: [publish: "state"]
)
}

workflow run_wf {
take:
input_ch

main:
output_ch = input_ch

| check_dataset_schema.run(
fromState: { id, state ->
def schema = findArgumentSchema(meta.config, "input")
def schemaYaml = tempFile("schema.yaml")
writeYaml(schema, schemaYaml)
[
"input": state.input,
"schema": schemaYaml
]
},
toState: { id, output, state ->
// read the output to see if dataset passed the qc
def checks = readYaml(output.output)
state + [
"dataset": checks["exit_code"] == 0 ? state.input : null,
]
}
)

// remove datasets which didn't pass the schema check
| filter { id, state ->
state.dataset != null
}

| process_dataset.run(
fromState: [ input: "dataset" ],
toState: [
output_train: "output_train",
output_test: "output_test",
output_solution: "output_solution"
]
)

// only output the files for which an output file was specified
| setState(["output_train", "output_test", "output_solution"])

emit:
output_ch
}

0 comments on commit 787920d

Please sign in to comment.