diff --git a/CHANGELOG.md b/CHANGELOG.md index afdf427..6cd7577 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,10 +34,13 @@ * Small changes to api file names (PR #13). +* Update test_resources path in components (PR #18). + ## BUG FIXES * Update the nextflow workflow dependencies (PR #17). +* Fix paths in scripts (PR #18). ## transfer from openproblems-v2 repository diff --git a/README.md b/README.md index c5def4a..05210b1 100644 --- a/README.md +++ b/README.md @@ -72,13 +72,15 @@ flowchart LR A subset of the common dataset. -Example file: `resources_test/common/pancreas/dataset.h5ad` +Example file: +`resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad` Format:
AnnData object + obs: 'batch' layers: 'counts' uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism' @@ -90,6 +92,7 @@ Data structure: | Slot | Type | Description | |:---|:---|:---| +| `obs["batch"]` | `string` | (*Optional*) Batch information. | | `layers["counts"]` | `integer` | Raw counts. | | `uns["dataset_id"]` | `string` | A unique identifier for the dataset. | | `uns["dataset_name"]` | `string` | Nicely formatted name. | @@ -121,7 +124,8 @@ Arguments: The subset of molecules used for the test dataset -Example file: `resources_test/denoising/pancreas/test.h5ad` +Example file: +`resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad` Format: @@ -155,7 +159,8 @@ Data structure: The subset of molecules used for the training dataset -Example file: `resources_test/denoising/pancreas/train.h5ad` +Example file: +`resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad` Format: @@ -229,7 +234,8 @@ Arguments: A denoised dataset as output by a method. -Example file: `resources_test/denoising/pancreas/denoised.h5ad` +Example file: +`resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad` Format: @@ -257,7 +263,8 @@ Data structure: File indicating the score of a metric. -Example file: `resources_test/denoising/pancreas/score.h5ad` +Example file: +`resources_test/task_denoising/cxg_mouse_pancreas_atlas/score.h5ad` Format: diff --git a/_viash.yaml b/_viash.yaml index 3a3b5ff..a31e89c 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -44,8 +44,8 @@ info: test_resources: - type: s3 - path: s3://openproblems-data/resources_test/denoising/ - dest: resources_test/denoising + path: s3://openproblems-data/resources_test/task_denoising/ + dest: resources_test/task_denoising - type: s3 path: s3://openproblems-data/resources_test/common/ dest: resources_test/common diff --git a/scripts/create_resources/resources.sh b/scripts/create_resources/resources.sh index a289f00..ae15999 100755 --- a/scripts/create_resources/resources.sh +++ b/scripts/create_resources/resources.sh @@ -14,7 +14,7 @@ output_state: "$id/state.yaml" publish_dir: s3://openproblems-data/resources/denoising/datasets HERE -tw launch https://github.com/openproblems-bio/task_template.git \ +tw launch https://github.com/openproblems-bio/task_denoising.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/workflows/process_datasets/main.nf \ diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh index 980d179..866c924 100755 --- a/scripts/create_resources/test_resources.sh +++ b/scripts/create_resources/test_resources.sh @@ -6,15 +6,10 @@ REPO_ROOT=$(git rev-parse --show-toplevel) # ensure that the command below is run from the root of the repository cd "$REPO_ROOT" -# # remove this when you have implemented the script -# echo "TODO: replace the commands in this script with the sequence of components that you need to run to generate test_resources." -# echo " Inside this script, you will need to place commands to generate example files for each of the 'src/api/file_*.yaml' files." -# exit 1 - set -e RAW_DATA=resources_test/common -DATASET_DIR=resources_test/denoising +DATASET_DIR=resources_test/task_denoising mkdir -p $DATASET_DIR @@ -22,31 +17,29 @@ mkdir -p $DATASET_DIR viash run src/data_processors/process_dataset/config.vsh.yaml -- \ --input $RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad \ --output_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \ - --output_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \ - --output_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad + --output_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad # run one method viash run src/methods/magic/config.vsh.yaml -- \ - --input_train $DATASET_DIR/pancreas/train.h5ad \ - --output $DATASET_DIR/pancreas/denoised.h5ad + --input_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \ + --output $DATASET_DIR/cxg_mouse_pancreas_atlas/denoised.h5ad # run one metric viash run src/metrics/poisson/config.vsh.yaml -- \ - --input_denoised $DATASET_DIR/pancreas/denoised.h5ad \ - --input_test $DATASET_DIR/pancreas/test.h5ad \ - --output $DATASET_DIR/pancreas/score.h5ad + --input_prediction $DATASET_DIR/cxg_mouse_pancreas_atlas/denoised.h5ad \ + --input_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \ + --output $DATASET_DIR/cxg_mouse_pancreas_atlas/score.h5ad # write manual state.yaml. this is not actually necessary but you never know it might be useful cat > $DATASET_DIR/cxg_mouse_pancreas_atlas/state.yaml << HERE id: cxg_mouse_pancreas_atlas train: !file train.h5ad test: !file test.h5ad -solution: !file solution.h5ad prediction: !file denoised.h5ad score: !file score.h5ad HERE # only run this if you have access to the openproblems-data bucket -# aws s3 sync --profile op \ -# "$DATASET_DIR" s3://openproblems-data/resources_test/denoising \ -# --delete --dryrun +aws s3 sync --profile OP \ + "$DATASET_DIR" s3://openproblems-data/resources_test/task_denoising \ + --delete --dryrun diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh index a85bf75..30015fb 100755 --- a/scripts/run_benchmark/run_test_local.sh +++ b/scripts/run_benchmark/run_test_local.sh @@ -21,7 +21,7 @@ nextflow run . \ -resume \ -c common/nextflow_helpers/labels_ci.config \ --id cxg_mouse_pancreas_atlas \ - --input_train resources_test/denoising/cxg_mouse_pancreas_atlas/train.h5ad \ - --input_test resources_test/denoising/cxg_mouse_pancreas_atlas/test.h5ad \ + --input_train resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad \ + --input_test resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad \ --output_state state.yaml \ --publish_dir "$publish_dir" diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml index 0378baa..d64b5b0 100644 --- a/src/api/comp_control_method.yaml +++ b/src/api/comp_control_method.yaml @@ -29,5 +29,5 @@ test_resources: - type: python_script path: /common/component_tests/check_config.py - path: /common/library.bib - - path: /resources_test/denoising/pancreas - dest: resources_test/denoising/pancreas \ No newline at end of file + - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas + dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas \ No newline at end of file diff --git a/src/api/comp_data_processor.yaml b/src/api/comp_data_processor.yaml index d3d24bb..a500c27 100644 --- a/src/api/comp_data_processor.yaml +++ b/src/api/comp_data_processor.yaml @@ -22,5 +22,5 @@ arguments: test_resources: - type: python_script path: /common/component_tests/run_and_check_output.py - - path: /resources_test/common/pancreas - dest: resources_test/common/pancreas + - path: /resources_test/common/cxg_mouse_pancreas_atlas + dest: resources_test/common/cxg_mouse_pancreas_atlas diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml index ef04c12..bfbd4fd 100644 --- a/src/api/comp_method.yaml +++ b/src/api/comp_method.yaml @@ -21,5 +21,5 @@ test_resources: - type: python_script path: /common/component_tests/check_config.py - path: /common/library.bib - - path: /resources_test/denoising/pancreas - dest: resources_test/denoising/pancreas \ No newline at end of file + - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas + dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas \ No newline at end of file diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml index 354d0f4..bfccf93 100644 --- a/src/api/comp_metric.yaml +++ b/src/api/comp_metric.yaml @@ -25,5 +25,5 @@ test_resources: - type: python_script path: /common/component_tests/run_and_check_output.py - path: /common/library.bib - - path: /resources_test/denoising/pancreas - dest: resources_test/denoising/pancreas + - path: /resources_test/task_denoising/cxg_mouse_pancreas_atlas + dest: resources_test/task_denoising/cxg_mouse_pancreas_atlas diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml index 8ad021f..e6174b9 100644 --- a/src/api/file_common_dataset.yaml +++ b/src/api/file_common_dataset.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/common/pancreas/dataset.h5ad" +example: "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad" label: "Common Dataset" summary: A subset of the common dataset. info: diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml index e732d66..ecb55ff 100644 --- a/src/api/file_prediction.yaml +++ b/src/api/file_prediction.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/denoising/pancreas/denoised.h5ad" +example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad" label: "Denoised data" summary: A denoised dataset as output by a method. info: diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml index 3e80f6e..103a7cd 100644 --- a/src/api/file_score.yaml +++ b/src/api/file_score.yaml @@ -1,5 +1,5 @@ type: file -example: resources_test/denoising/pancreas/score.h5ad +example: resources_test/task_denoising/cxg_mouse_pancreas_atlas/score.h5ad label: Score summary: "File indicating the score of a metric." info: diff --git a/src/api/file_test.yaml b/src/api/file_test.yaml index 10dab87..1c6d202 100644 --- a/src/api/file_test.yaml +++ b/src/api/file_test.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/denoising/pancreas/test.h5ad" +example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad" label: "Test data" summary: The subset of molecules used for the test dataset info: diff --git a/src/api/file_train.yaml b/src/api/file_train.yaml index 0d12edb..6b60dc1 100644 --- a/src/api/file_train.yaml +++ b/src/api/file_train.yaml @@ -1,5 +1,5 @@ type: file -example: "resources_test/denoising/pancreas/train.h5ad" +example: "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad" label: "Training data" summary: The subset of molecules used for the training dataset info: diff --git a/src/control_methods/perfect_denoising/script.py b/src/control_methods/perfect_denoising/script.py index b957cd5..3ed780c 100644 --- a/src/control_methods/perfect_denoising/script.py +++ b/src/control_methods/perfect_denoising/script.py @@ -2,8 +2,8 @@ ## VIASH START par = { - 'input_train': 'resources_test/denoising/pancreas/train.h5ad', - 'input_test': 'resources_test/denoising/pancreas/test.h5ad', + 'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad', + 'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad', 'output': 'output_PD.h5ad', } meta = { diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py index 0c0c7e1..65201a0 100644 --- a/src/data_processors/process_dataset/script.py +++ b/src/data_processors/process_dataset/script.py @@ -5,7 +5,7 @@ ## VIASH START par = { - 'input': "resources_test/common/pancreas/dataset.h5ad", + 'input': "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad", 'output_train': "train.h5ad", 'output_test': "test.h5ad", 'train_frac': 0.9, diff --git a/src/methods/alra/script.R b/src/methods/alra/script.R index df9cc10..9f7536a 100644 --- a/src/methods/alra/script.R +++ b/src/methods/alra/script.R @@ -4,7 +4,7 @@ library(ALRA, warn.conflicts = FALSE) ## VIASH START par <- list( - input_train = "resources_test/denoising/pancreas/train.h5ad", + input_train = "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad", norm = "log", output = "output.h5ad" ) diff --git a/src/methods/dca/script.py b/src/methods/dca/script.py index 7a5ea5e..a045ad4 100644 --- a/src/methods/dca/script.py +++ b/src/methods/dca/script.py @@ -3,7 +3,7 @@ ## VIASH START par = { - 'input_train': 'resources_test/denoising/pancreas/train.h5ad', + 'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad', 'output': 'output_dca.h5ad', 'epochs': 300, } diff --git a/src/methods/knn_smoothing/script.py b/src/methods/knn_smoothing/script.py index fd2fe6c..0fea7ed 100644 --- a/src/methods/knn_smoothing/script.py +++ b/src/methods/knn_smoothing/script.py @@ -3,7 +3,7 @@ ## VIASH START par = { - 'input_train': 'resources_test/denoising/pancreas/train.h5ad', + 'input_train': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad', 'output': 'output_knn.h5ad', } meta = { diff --git a/src/methods/magic/script.py b/src/methods/magic/script.py index b419153..2ca832d 100644 --- a/src/methods/magic/script.py +++ b/src/methods/magic/script.py @@ -7,7 +7,7 @@ ## VIASH START par = { - "input_train": "resources_test/denoising/pancreas/train.h5ad", + "input_train": "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad", "output": "output_magic.h5ad", "solver": "exact", "norm": "sqrt", diff --git a/src/methods/saver/script.R b/src/methods/saver/script.R index 373133e..8c6c320 100644 --- a/src/methods/saver/script.R +++ b/src/methods/saver/script.R @@ -5,7 +5,7 @@ library(Matrix, warn.conflicts = FALSE) ## VIASH START par <- list( - input_train = "resources_test/denoising/pancreas/train.h5ad", + input_train = "resources_test/task_denoising/cxg_mouse_pancreas_atlas/train.h5ad", norm = "log", output = "output.h5ad" ) diff --git a/src/metrics/mse/script.py b/src/metrics/mse/script.py index fca9c88..f5212b2 100644 --- a/src/metrics/mse/script.py +++ b/src/metrics/mse/script.py @@ -5,8 +5,8 @@ ## VIASH START par = { - 'input_test': 'resources_test/denoising/pancreas/test.h5ad', - 'input_prediction': 'resources_test/denoising/pancreas/denoised.h5ad', + 'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad', + 'input_prediction': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad', 'output': 'output_mse.h5ad' } meta = { diff --git a/src/metrics/poisson/script.py b/src/metrics/poisson/script.py index d0000c6..d8e2408 100644 --- a/src/metrics/poisson/script.py +++ b/src/metrics/poisson/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { - 'input_prediction': 'output_magic.h5ad', - 'input_test': 'output_test.h5ad', + 'input_test': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/test.h5ad', + 'input_prediction': 'resources_test/task_denoising/cxg_mouse_pancreas_atlas/denoised.h5ad', 'output': 'output_poisson.h5ad' } meta = {