openproblems-bio · KaiWaldrant · Jun 21, 2024 · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024
diff --git a/.github/build.yaml b/.github/build.yaml
@@ -0,0 +1,122 @@
+name: build
+
+on:
+  push:
+    branches: [ 'main' ]
+  workflow_dispatch:
+    inputs:
+      target_branch:
+        description: 'Branch to deploy to. If not specified, `build-${BRANCH_NAME}` will be used.'
+        required: false
+      version:
+        description: 'Version name to use for the build. If not specified, `build-${BRANCH_NAME}` will be used.'
+        required: false
+
+jobs:
+  # phase 1
+  list:
+    runs-on: ubuntu-latest
+
+    outputs:
+      target_branch: ${{ steps.defaults.outputs.target_branch }}
+      version: ${{ steps.defaults.outputs.version }}
+      component_matrix: ${{ steps.set_matrix.outputs.matrix }}
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: 'recursive'
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    - name: Determine version tag from branch name
+      id: defaults
+      run: |
+        BRANCH_NAME=$(echo $GITHUB_REF | sed 's/refs\/heads\///')
+
+        VERSION=${{ github.event.inputs.version }}
+        if [ -z "$VERSION" ]; then
+          VERSION="build-$BRANCH_NAME"
+        fi
+        echo "version=$VERSION" >> $GITHUB_OUTPUT
+
+        TARGET_BRANCH=${{ github.event.inputs.target_branch }}
+        if [ -z "$TARGET_BRANCH" ]; then
+          TARGET_BRANCH="build-$BRANCH_NAME"
+        fi
+        echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT
+
+    - name: Remove target folder from .gitignore
+      run: |
+        # allow publishing the target folder
+        sed -i '/^\/target.*/d' .gitignore
+
+    - uses: viash-io/viash-actions/ns-build@v5
+      with:
+        config_mod: .functionality.version := '${{ steps.defaults.outputs.version }}'
+        parallel: true
+
+    - name: Deploy to target branch
+      uses: peaceiris/actions-gh-pages@v4
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: .
+        publish_branch: ${{ steps.defaults.outputs.target_branch }}
+
+    - id: ns_list
+      uses: viash-io/viash-actions/ns-list@v5
+      with:
+        platform: docker
+        src: src
+        format: json
+
+    - id: set_matrix
+      run: |
+        echo "matrix=$(jq -c '[ .[] | 
+          { 
+            "name": (.functionality.namespace + "/" + .functionality.name),
+            "dir": .info.config | capture("^(?<dir>.*\/)").dir
+          }
+        ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT
+
+  # phase 2
+  build:
+    needs: list
+
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        component: ${{ fromJson(needs.list.outputs.component_matrix) }}
+
+    steps:
+    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
+    - uses: data-intuitive/reclaim-the-bytes@v2
+
+    - uses: actions/checkout@v4
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    - name: Build container
+      uses: viash-io/viash-actions/ns-build@v5
+      with:
+        config_mod: .functionality.version := '${{ needs.list.outputs.version }}'
+        platform: docker
+        src: ${{ matrix.component.dir }}
+        setup: build
+
+    - name: Login to container registry
+      uses: docker/login-action@v3
+      with:
+        registry: ghcr.io
+        username: ${{ secrets.GTHB_USER }}
+        password: ${{ secrets.GTHB_PAT }}
+
+    - name: Push container
+      uses: viash-io/viash-actions/ns-build@v5
+      with:
+        config_mod: .functionality.version := '${{ needs.list.outputs.version }}'
+        platform: docker
+        src: ${{ matrix.component.dir }}
+        setup: push
diff --git a/.github/test.yaml b/.github/test.yaml
@@ -0,0 +1,109 @@
+name: test
+
+on:
+  pull_request:
+  push:
+    branches: [ '**' ]
+
+jobs:
+  run_ci_check_job:
+    runs-on: ubuntu-latest
+    outputs:
+      run_ci: ${{ steps.github_cli.outputs.check }}
+    steps:
+      - name: 'Check if branch has an existing pull request and the trigger was a push'
+        id: github_cli
+        run: |
+          pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url')
+          # If the branch has a PR and this run was triggered by a push event, do not run
+          if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then
+            echo "check=false" >> $GITHUB_OUTPUT
+          else
+            echo "check=true" >> $GITHUB_OUTPUT
+          fi
+        env:
+          GITHUB_TOKEN: ${{ secrets.GTHB_PAT }}
+
+  # phase 1
+  list:
+    needs: run_ci_check_job
+    env:
+      s3_bucket: s3://openproblems-data/resources_test/cell_cell_communication
+    runs-on: ubuntu-latest
+    if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }}
+
+    outputs:
+      matrix: ${{ steps.set_matrix.outputs.matrix }}
+      cache_key: ${{ steps.cache.outputs.cache_key }}
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        submodules: 'recursive'
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5
+      id: cache
+      with:
+        s3_bucket: $s3_bucket
+        dest_path: resources
+        cache_key_prefix: resources__
+
+    - id: ns_list
+      uses: viash-io/viash-actions/ns-list@v5
+      with:
+        platform: docker
+        format: json
+
+    - id: ns_list_filtered
+      uses: viash-io/viash-actions/project/detect-changed-components@v5
+      with:
+        input_file: "${{ steps.ns_list.outputs.output_file }}"
+
+    - id: set_matrix
+      run: |
+        echo "matrix=$(jq -c '[ .[] | 
+          { 
+            "name": (.namespace + "/" + .name),
+            "config": .info.config
+          }
+        ]' ${{ steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT
+
+  # phase 2
+  viash_test:
+    needs: list
+    if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }}
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        component: ${{ fromJson(needs.list.outputs.matrix) }}
+
+    steps:
+    # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
+    - uses: data-intuitive/reclaim-the-bytes@v2
+
+    - uses: actions/checkout@v4
+      with:
+        submodules: 'recursive'
+
+    - uses: viash-io/viash-actions/setup@v5
+
+    # use cache
+    - name: Cache resources data
+      uses: actions/cache@v4
+      timeout-minutes: 10
+      with:
+        path: resources
+        key: ${{ needs.list.outputs.cache_key }}
+
+    - name: Run test
+      timeout-minutes: 30
+      run: |
+        VIASH_TEMP=$RUNNER_TEMP/viash viash test \
+          "${{ matrix.component.config }}" \
+          --cpus 2 \
+          --memory "16gb"
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+resources
+work
+.nextflow*
+target
+.vscode
+.DS_Store
+output
+trace-*
+.ipynb_checkpoints
diff --git a/INSTRUCTIONS.md b/INSTRUCTIONS.md
@@ -0,0 +1,73 @@
+# Instructions
+
+This is a guide on what to do after you have created a new task repository from the template. More in depth information about how to create a new task can be found in the [OpenProblems Documentation](https://openproblems.bio/documentation/create_task/).
+
+## First things first
+
+* Update the `_viash.yaml` file with the correct task information.
+* Update the `src/api/task_info.yaml` file with the information you have provied in the task issue.
+
+## Resources
+
+THe OpenProblems team has provided some test resources that can be used to test the task. These resources are stored in the `resources` folder. The `scripts/download_resources.sh` script can be used to download these resources.
+
+If these resources are not sufficient, you can add more resources to the `resources` folder. The `scripts/download_resources.sh` script can be updated to download these resources.
+
+
+
+
+
+<!-- Add to readme 
+* update _viash.yaml
+* update src/api/task_info.yaml
+* update scripts/download_resources
+-->
+
+#!/bin/bash
+
+echo "This script is not supposed to be run directly."
+echo "Please run the script step-by-step."
+exit 1
+
+# sync resources
+scripts/download_resources.sh
+
+# create a new component
+method_id="my_metric"
+method_lang="python" # change this to "r" if need be
+
+common/create_component/create_component -- \
+  --language "$method_lang" \
+  --name "$method_id"
+
+# TODO: fill in required fields in src/task/methods/foo/config.vsh.yaml
+# TODO: edit src/task/methods/foo/script.py/R
+
+# test the component
+viash test src/task/methods/$method_id/config.vsh.yaml
+
+# rebuild the container (only if you change something to the docker platform)
+# You can reduce the memory and cpu allotted to jobs in _viash.yaml by modifying .platforms[.type == "nextflow"].config.labels
+viash run src/task/methods/$method_id/config.vsh.yaml -- \
+  ---setup cachedbuild ---verbose
+
+# run the method (using parquet as input)
+viash run src/task/methods/$method_id/config.vsh.yaml -- \
+  --de_train "resources/neurips-2023-kaggle/de_train.parquet" \
+  --id_map "resources/neurips-2023-kaggle/id_map.csv" \
+  --output "output/prediction.parquet"
+
+# run the method (using h5ad as input)
+viash run src/task/methods/$method_id/config.vsh.yaml -- \
+  --de_train_h5ad "resources/neurips-2023-kaggle/2023-09-12_de_by_cell_type_train.h5ad" \
+  --id_map "resources/neurips-2023-kaggle/id_map.csv" \
+  --output "output/prediction.parquet"
+
+# run evaluation metric
+viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
+  --de_test "resources/neurips-2023-kaggle/de_test.parquet" \
+  --prediction "output/prediction.parquet" \
+  --output "output/score.h5ad"
+
+# print score on kaggle test dataset
+python -c 'import anndata; print(anndata.read_h5ad("output/score.h5ad").uns)'
diff --git a/README.md b/README.md
@@ -1,2 +1,27 @@
-# task-template
-This repo is a template to create a new task that has the correct files and structure needed to start a new task.
+# Task Template
+
+This repo is a template to create a new task for the OpenProblems v2. This repo contains several example files and components that can be used when updated with the task info.
+
+> [!WARNING] 
+> This README will be overwritten when performing the `create_task_readme` script.
+
+## Create a repository from this template
+
+> [!IMPORTANT] 
+> Before creating a new repository, make sure you are part of the openProblems task team. This will be done when you create an issue for the task and you got the go ahead to create the task.
+> For more information on how to create a new task, check out the [Create a new task](https://openproblems.bio/documentation/create_task/) documentation.
+
+The instructions below will guide you through creating a new repository from this template ([creating-a-repository-from-a-template](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-repository-from-a-template#creating-a-repository-from-a-template)).
+
+
+* Click the "Use this template" button on the top right of the repository.
+* Use the Owner dropdown menu to select the `openproblems-bio` account.
+* Type a name for your repository (task_...), and a description.
+* Set the repository visibility to public.
+* Click "Create repository from template".
+
+## What to do next
+
+Check out the [instructions](INSTRUCTIONS.md) for more information on how to update the example files and components. These instructions also contain information on how to build out the task and basic commands.
+
+For more information on the OpenProblems v2, check out the [Documentation](https://openproblems.bio/documentation/) on the Open Problems website.
diff --git a/_viash.yaml b/_viash.yaml
@@ -0,0 +1,19 @@
+viash_version: 0.9.0-RC4
+
+name: <task_template>
+description: |
+  An OpenProblems benchmark on single-cell cell-cell communication.
+license: MIT
+keywords: [single-cell, cell-cell communication, openproblems, benchmark]
+links:
+  issue_tracker: https://github.com/openproblems-bio/<task_template>/issues
+  repository: https://github.com/openproblems-bio/<task_template>
+  docker_registry: ghcr.io/openproblems-bio
+
+config_mods: |
+  .version := 'dev'
+  .arguments[.multiple == true].multiple_sep := ';'
+  .runners[.type == "nextflow"].directives.tag := "$id"
+  .runners[.type == "nextflow"].auto.simplifyOutput := false
+  .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
+  .runners[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'"
diff --git a/common b/common
diff --git a/main.nf b/main.nf
@@ -0,0 +1,3 @@
+workflow {
+    print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.")
+}
diff --git a/nextflow.config b/nextflow.config
@@ -0,0 +1 @@
+process.container = 'nextflow/bash:latest'
diff --git a/scripts/add_a_control_method.sh b/scripts/add_a_control_method.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+component_name="my_control_method"
+component_lang="python" # change this to "r" if need be
+
+common/create_component/create_component \
+  --language "$component_lang" \
+  --name "$component_name" \
+  --api_file src/api/comp_control_method.yaml \
+  --output "src/control_methods/$component_name"