diff --git a/src/api/comp_process_dataset.yaml b/src/api/comp_process_dataset.yaml new file mode 100644 index 0000000..90bf545 --- /dev/null +++ b/src/api/comp_process_dataset.yaml @@ -0,0 +1,32 @@ +functionality: + namespace: "label_projection" + info: + type: process_dataset + type_info: + label: Data processor + summary: A label projection dataset processor. + description: | + A component for processing a Common Dataset into a task-specific dataset. + arguments: + - name: "--input" + __merge__: file_common_dataset.yaml + direction: input + required: true + - name: "--output_train" + __merge__: file_train.yaml + direction: output + required: true + - name: "--output_test" + __merge__: file_test.yaml + direction: output + required: true + - name: "--output_solution" + __merge__: file_solution.yaml + direction: output + required: true + test_resources: + - path: /resources_test/common/pancreas + dest: resources_test/common/pancreas + - type: python_script + path: /common/component_tests/run_and_check_output.py + diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml new file mode 100644 index 0000000..0a5a05f --- /dev/null +++ b/src/api/file_common_dataset.yaml @@ -0,0 +1,41 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/common/pancreas/dataset.h5ad" +info: + label: "Common Dataset" + summary: A subset of the common dataset. + slots: + layers: + - type: integer + name: counts + description: Raw counts + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - name: dataset_name + type: string + description: Nicely formatted name. + required: true + - type: string + name: dataset_url + description: Link to the original source of the dataset. + required: false + - name: dataset_reference + type: string + description: Bibtex reference of the paper in which the dataset was published. + required: false + - name: dataset_summary + type: string + description: Short description of the dataset. + required: true + - name: dataset_description + type: string + description: Long description of the dataset. + required: true + - name: dataset_organism + type: string + description: The organism of the sample in the dataset. + required: false