Skip to content

Commit

Permalink
update scripts and wf
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Sep 24, 2024
1 parent 21faea1 commit 38f6a76
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 35 deletions.
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
@@ -1 +1 @@
process.container = 'nextflow/bash:latest'
process.container = 'nextflow/bash:latest'
7 changes: 3 additions & 4 deletions scripts/create_resources/process_10x_xenium.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ param_list:
segmentation_id: [cell, nucleus]
- id: "10x_xenium/2023_10x_mouse_brain_xenium/rep2"
input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip
input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_2/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip
dataset_name: "Xenium V1 Fresh Frozen Mouse Brain replicate 2"
dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard"
dataset_summary: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1)."
Expand All @@ -32,7 +32,7 @@ param_list:
segmentation_id: [cell, nucleus]
- id: "10x_xenium/2023_10x_mouse_brain_xenium/rep3"
input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip
input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_3/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip
dataset_name: "Xenium V1 Fresh Frozen Mouse Brain replicate 3"
dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard"
dataset_summary: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1)."
Expand All @@ -45,13 +45,12 @@ output_state: "\$id/state.yaml"
publish_dir: "$publish_dir"
HERE

tw launch openproblems-bio/task_ist_preprocessing \
tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
--revision build/main \
--pull-latest \
--main-script target/nextflow/datasets/workflows/process_tenx_xenium/main.nf \
--workspace 53907369739130 \
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
--params-file /tmp/params.yaml \
--entry-name auto \
--config common/nextflow_helpers/labels_tw.config \
--labels datasets,10x_xenium
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,12 @@ output_state: "\$id/state.yaml"
publish_dir: "$publish_dir"
HERE

tw launch openproblems-bio/task_ist_preprocessing \
tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
--revision build/main \
--pull-latest \
--main-script target/nextflow/datasets/workflows/process_allen_brain_cell_atlas/main.nf \
--workspace 53907369739130 \
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
--params-file /tmp/params.yaml \
--entry-name auto \
--config common/nextflow_helpers/labels_tw.config \
--labels datasets,allen_brain_cell_atlas
11 changes: 8 additions & 3 deletions scripts/create_resources/process_datasets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,26 @@ set -e
input_dir="s3://openproblems-data/resources/datasets"
publish_dir="s3://openproblems-data/resources/task_ist_preprocessing/datasets"


cat > /tmp/params.yaml << HERE
param_list:
- id: "mouse_brain_combined/rep1"
input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep1/dataset.zarr"
input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad"
- id: "mouse_brain_combined/rep2"
input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep2/dataset.zarr"
input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad"
- id: "mouse_brain_combined/rep2"
input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep2/dataset.zarr"
input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad"
output_sc: "\$id/output_sc.h5ad"
output_sp: "\$id/output_sp.zarr"
output_state: "\$id/state.yaml"
publish_dir: "$publish_dir"
HERE

tw launch openproblems-bio/task_ist_preprocessing \
tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
--revision build/main \
--pull-latest \
--main-script target/nextflow/workflows/process_datasets/main.nf \
Expand All @@ -34,4 +39,4 @@ tw launch openproblems-bio/task_ist_preprocessing \
--params-file /tmp/params.yaml \
--entry-name auto \
--config common/nextflow_helpers/labels_tw.config \
--labels datasets,10x_xenium
--labels task_ist_preprocessing,process_datasets
13 changes: 2 additions & 11 deletions scripts/run_benchmark/run_full_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,6 @@ cd "$REPO_ROOT"
# please refer to the nextflow information for more details:
# https://www.nextflow.io/docs/latest/

# remove this when you have implemented the script
echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
echo " Step 1: replace 'task_template' with the name of the task in the following command."
echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
echo " Step 3: replace the settings parameter to fit your run_benchmark outputs"
echo " Step 4: remove this message"
exit 1

set -e

echo "Running benchmark on test data"
Expand All @@ -31,14 +23,13 @@ publish_dir="resources/results/${RUN_ID}"
# write the parameters to file
cat > /tmp/params.yaml << HERE
input_states: resources/datasets/**/state.yaml
rename_keys: 'input_train:output_train;input_test:output_test;input_solution:output_solution'
rename_keys: 'input_sc:output_sc;input_sp:output_sp'
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

# run the benchmark
nextflow run openproblems-bio/task_template \
--revision build/main \
nextflow run . \
-main-script target/nextflow/workflows/run_benchmark/main.nf \
-profile docker \
-resume \
Expand Down
24 changes: 11 additions & 13 deletions scripts/run_benchmark/run_full_seqeracloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,21 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

# remove this when you have implemented the script
echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
echo " Step 1: replace 'task_template' with the name of the task in the following command."
echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
echo " Step 3: replace the settings parameter to fit your run_benchmark outputs"
echo " Step 4: remove this message"
exit 1

set -e

# generate a unique id
RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}"
publish_dir="s3://openproblems-data/resources/temp_ist_preprocessing/results/${RUN_ID}"

# write the parameters to file
cat > /tmp/params.yaml << HERE
input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml
rename_keys: 'input_train:output_train;input_test:output_test;input_solution:output_solution'
input_states: s3://openproblems-data/resources/temp_ist_preprocessing/datasets/**/state.yaml
rename_keys: 'input_sc:output_sc;input_sp:output_sp'
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

tw launch https://github.com/openproblems-bio/task_template.git \
tw launch https://github.com/openproblems-bio/temp_ist_preprocessing.git \
--revision build/main \
--pull-latest \
--main-script target/nextflow/workflows/run_benchmark/main.nf \
Expand All @@ -37,4 +29,10 @@ tw launch https://github.com/openproblems-bio/task_template.git \
--params-file /tmp/params.yaml \
--entry-name auto \
--config common/nextflow_helpers/labels_tw.config \
--labels task_template,full
--labels temp_ist_preprocessing,full

aws s3 sync \
s3://openproblems-data/resources/temp_ist_preprocessing/results \
resources/temp_ist_preprocessing/results \
--profile op \
--dryrun
6 changes: 6 additions & 0 deletions scripts/run_benchmark/run_test_seqeracloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,9 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
--params-file /tmp/params.yaml \
--config common/nextflow_helpers/labels_tw.config \
--labels task_template,test

aws s3 sync \
s3://openproblems-nextflow/temp/results \
temp_results \
--profile op \
--dryrun
3 changes: 3 additions & 0 deletions src/workflows/run_benchmark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,9 @@ workflow run_wf {
| extract_uns_metadata.run(
key: "extract_uns_scores",
fromState: [input: "output_metric"],
args: [
uns_length_cutoff: 100
],
toState: { id, output, state ->
state + [
score_uns: readYaml(output.output).uns
Expand Down
2 changes: 1 addition & 1 deletion src/workflows/run_benchmark/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set -e

# export TOWER_WORKSPACE_ID=53907369739130

DATASETS_DIR="resources_test/task_template"
DATASETS_DIR="resources_test/task_ist_preprocessing"
OUTPUT_DIR="output/temp"

if [ ! -d "$OUTPUT_DIR" ]; then
Expand Down

0 comments on commit 38f6a76

Please sign in to comment.