From 64336761b39ff94612ec35847e101ffe9d2881e0 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 11:21:16 +0200 Subject: [PATCH 01/17] WIP fix --- common | 2 +- output_mse.h5ad | Bin 0 -> 20784 bytes src/api/comp_process_dataset.yaml | 2 +- .../no_denoising/config.vsh.yaml | 2 +- .../perfect_denoising/config.vsh.yaml | 2 +- src/methods/alra/config.vsh.yaml | 2 +- src/methods/knn_smoothing/config.vsh.yaml | 2 +- src/methods/magic/config.vsh.yaml | 2 +- src/methods/saver/config.vsh.yaml | 2 +- src/metrics/mse/config.vsh.yaml | 4 +++- src/metrics/mse/script.py | 4 ++-- src/metrics/poisson/config.vsh.yaml | 5 +++-- src/process_dataset/config.vsh.yaml | 2 +- src/workflows/process_datasets/config.vsh.yaml | 2 +- 14 files changed, 18 insertions(+), 15 deletions(-) create mode 100644 output_mse.h5ad diff --git a/common b/common index ecbb47c..b182188 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit ecbb47ca0cb36e9350760cf126d5c7e3125f26de +Subproject commit b18218898334d69f5a740785187c443c92fe781e diff --git a/output_mse.h5ad b/output_mse.h5ad new file mode 100644 index 0000000000000000000000000000000000000000..57135a5984114776e959dd313c9a1f8a2d10d3f3 GIT binary patch literal 20784 zcmeHPJ#ZUE6kgd05s4FQ2#|!o3nW1S|0pH|lLiIbL4o`jCq+8vES;^JNGEZ3%HV>T zl%c|9N|!D@Lqn<36$2e*N|%)B!oJ<_N!Hnmoya?6@ZF5PyKmpU-TUsn{dxOzKNqL2 z9zA^Ou%IiO6$iwi%JRQwxboj?3YWS59x@rEkC3kR=}bmSq`DKJ4?@1vuL&*C@61k5 z3(3jyIYqTtc}3^)Oi0sxg+Ouo$_zd5T|9LW={ocbMNyxRRQ7=zs#KP1l#ykX%5no` zl~&y)FO;`51AQOs@u0d}h#}}}Qm+Su?mQ0iQ zWoDH`mVby!nxyIn#0=_(y5&8P-Sg{#s+9kBl6#OCA(x=Zh(k(#(*D>W*N9(UJK10B zq~?QYT}c1$8w8T}$M#XgxXeJz`9azHk#%0cO1Sp<)7v+1O=YA)Pw=f5bcXFHhjQ^V zO=talg87B{vIlg14*g|&5FaJvpz6d0adY17 z<}0FHd0*2Vors6$y7^X7Ue9TIp7}-+d?>Gvfx(b`GkNQ}!U!E>ywvz4?{R~m?8dI( zVOEVVlAa&EFiLF$X2EK}?;miB0kj*%A&ri17`jVDVmX}VN5_>smh&p^qsA9Px8|YD z{Yu`)@15`Gey|)G0;2w8o$>J{H!NQ z{viYZ>jwF682FDH_}?_}zh&Tm8}wzQ#b(V7oQ4~eLeGu;lH*5JFLt8R?9D49k@tf` zKspOyz2;m}`N)eS$1R2RC~{)2R0-9I z7wWNbj$N-{W}FZp>Sza_IrHZ`%>~^8?)1(F5I22XxlMO1)N}i`<6q z>4nT`)F=D*p+O#oJ;Cp@+Vo2IyMgWPkVe~z^Kxt)?% ztwyW|S}o#=C)MTJ*G{`#TA6)}`;*UuVQ>rH=fOXXv=YaS=}|r>c>lCq^43e=lK1PR^nP!%EGX}nUza zlYN4K>HOdkJHCJe&kGpddp7Yb;ZIO6#n~wWSlBj|*db3fR(Q zpCXW)-*}xja)asE&EHxW$dN>QH+2M`y*$C$^Ke6xNHGFy}wK6KfsYD2?R{`SK$M2 z@Y}M#Tp}K8FYS+FZfw{e#h94vuaMV(wRDosf50J4wnxBZe;XVyg4=A_-)bV>a4+qT zV%TihAH~3#?5~ADm= z+yZXe{`!pmmzfF8SO9$TH_?R?kg z&wWyqyxQl=na5x1o)78-|9iFT&FATKRLp+&$5(U%?01(D{$1_vwxG|W546vt3moWa z5a59RH`+I_wR@ADG;wHp0oBgbzw^E9wQAgg|BYg@X|KSbQjEHaCq_NRmT!(1aAaA< zaigAGgDO53<#Vz;E&o^^x!SUSUMGr<`vo7xAXCf*^HD7EuRjg_zVZu#IjAnZQUp>2 zQUp>2QUp>2_6z|V>s9*ATSorkhZdSywc6C{SL!s1a=zbXyyTYk>Z>kaGgkA4el1Mi%aYY&&F)^VXDKpVKwZ+jURnNb%<+;`^9K~*&<_s)9zTu^-N z-xu`tDb|jECwo~-!e(0+WdDmQ_oCmG+3XjfqgNYv&{2=Q@ox~g*zAqITOmkqF&{;{~Dk^y}E66oA?m9L9G^TdNQ*D!o;`9n(p!%iW_+D?%GbN zh3ws19olv|<)@Mrxog|iK0nQdzUE~&?CGmxU0><9`mC=-=u5BjHNP4D&OzCWN}=C~ z`BPI&_sg1-%K<*c!sU9n=mx(2#ujlhJ@gj5P<}YVLqklDnlg73ZUdXciY=SIDngQhpuKZ91k$4`cXZ}?BX51) gpl?0cutV}ebG&NuyGXsxdz<4`X-R9N|8$h`4_#-kaR2}S literal 0 HcmV?d00001 diff --git a/src/api/comp_process_dataset.yaml b/src/api/comp_process_dataset.yaml index 520ef54..b5e7416 100644 --- a/src/api/comp_process_dataset.yaml +++ b/src/api/comp_process_dataset.yaml @@ -1,4 +1,4 @@ -namespace: "" +namespace: "process_dataset" info: type: process_dataset type_info: diff --git a/src/control_methods/no_denoising/config.vsh.yaml b/src/control_methods/no_denoising/config.vsh.yaml index dcfd530..366a0f0 100644 --- a/src/control_methods/no_denoising/config.vsh.yaml +++ b/src/control_methods/no_denoising/config.vsh.yaml @@ -16,7 +16,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_python:1.0.4 + image: ghcr.io/openproblems-bio/base_images/python:1.1.0 runners: - type: executable diff --git a/src/control_methods/perfect_denoising/config.vsh.yaml b/src/control_methods/perfect_denoising/config.vsh.yaml index ea4def4..f4c5766 100644 --- a/src/control_methods/perfect_denoising/config.vsh.yaml +++ b/src/control_methods/perfect_denoising/config.vsh.yaml @@ -17,7 +17,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_python:1.0.4 + image: ghcr.io/openproblems-bio/base_images/python:1.1.0 runners: - type: executable diff --git a/src/methods/alra/config.vsh.yaml b/src/methods/alra/config.vsh.yaml index 0113796..32ddc79 100644 --- a/src/methods/alra/config.vsh.yaml +++ b/src/methods/alra/config.vsh.yaml @@ -33,7 +33,7 @@ resources: path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_r:1.0.4 + image: ghcr.io/openproblems-bio/base_images/r:1.1.0 setup: - type: r cran: [ Matrix, rsvd ] diff --git a/src/methods/knn_smoothing/config.vsh.yaml b/src/methods/knn_smoothing/config.vsh.yaml index d0b1ade..7ab61ff 100644 --- a/src/methods/knn_smoothing/config.vsh.yaml +++ b/src/methods/knn_smoothing/config.vsh.yaml @@ -30,7 +30,7 @@ resources: engines: - type: docker - image: ghcr.io/openproblems-bio/base_python:1.0.4 + image: ghcr.io/openproblems-bio/base_images/python:1.1.0 setup: - type: python packages: diff --git a/src/methods/magic/config.vsh.yaml b/src/methods/magic/config.vsh.yaml index e1760a1..cee7046 100644 --- a/src/methods/magic/config.vsh.yaml +++ b/src/methods/magic/config.vsh.yaml @@ -53,7 +53,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_python:1.0.4 + image: ghcr.io/openproblems-bio/base_images/python:1.1.0 setup: - type: python pip: [scprep, magic-impute, scipy, scikit-learn<1.2] diff --git a/src/methods/saver/config.vsh.yaml b/src/methods/saver/config.vsh.yaml index 44a6841..5ab4ce0 100644 --- a/src/methods/saver/config.vsh.yaml +++ b/src/methods/saver/config.vsh.yaml @@ -23,7 +23,7 @@ resources: path: script.R engines: - type: docker - image: ghcr.io/openproblems-bio/base_r:1.0.4 + image: ghcr.io/openproblems-bio/base_images/r:1.1.0 setup: - type: r github: mohuangx/SAVER diff --git a/src/metrics/mse/config.vsh.yaml b/src/metrics/mse/config.vsh.yaml index 2dd3ef9..59e55fb 100644 --- a/src/metrics/mse/config.vsh.yaml +++ b/src/metrics/mse/config.vsh.yaml @@ -21,9 +21,11 @@ engines: image: ghcr.io/openproblems-bio/base_python:1.0.4 setup: - type: python - packages: + pypi: - scikit-learn - scprep + - numpy<2.0 + - pandas<2.0 runners: - type: executable - type: nextflow diff --git a/src/metrics/mse/script.py b/src/metrics/mse/script.py index 556e109..acb5381 100644 --- a/src/metrics/mse/script.py +++ b/src/metrics/mse/script.py @@ -6,7 +6,7 @@ ## VIASH START par = { 'input_test': 'resources_test/denoising/pancreas/test.h5ad', - 'input_denoised': 'resources_test/denoising/pancreas/magic.h5ad', + 'input_denoised': 'resources_test/denoising/pancreas/denoised.h5ad', 'output': 'output_mse.h5ad' } meta = { @@ -26,7 +26,7 @@ # scaling and transformation target_sum = 10000 -sc.pp.normalize_total(test_data, target_sum) +sc.pp.normalize_total(test_data, target_sum=target_sum) sc.pp.log1p(test_data) sc.pp.normalize_total(denoised_data, target_sum) diff --git a/src/metrics/poisson/config.vsh.yaml b/src/metrics/poisson/config.vsh.yaml index f36054d..73b02d3 100644 --- a/src/metrics/poisson/config.vsh.yaml +++ b/src/metrics/poisson/config.vsh.yaml @@ -18,10 +18,11 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_python:1.0.4 + image: ghcr.io/openproblems-bio/base_images/python:1.1.0 setup: - type: python - pip: scprep + pypi: + - scprep runners: - type: executable - type: nextflow diff --git a/src/process_dataset/config.vsh.yaml b/src/process_dataset/config.vsh.yaml index 4a2f111..0ae398a 100644 --- a/src/process_dataset/config.vsh.yaml +++ b/src/process_dataset/config.vsh.yaml @@ -25,7 +25,7 @@ resources: - path: helper.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_python:1.0.4 + image: ghcr.io/openproblems-bio/base_images/python:1.1.0 setup: - type: python packages: diff --git a/src/workflows/process_datasets/config.vsh.yaml b/src/workflows/process_datasets/config.vsh.yaml index b0a596e..22765f2 100644 --- a/src/workflows/process_datasets/config.vsh.yaml +++ b/src/workflows/process_datasets/config.vsh.yaml @@ -28,6 +28,6 @@ dependencies: type: github repo: openproblems-bio/openproblems-v2 tag: main_build - - name: process_dataset + - name: process_dataset/process_dataset runners: - type: nextflow From b77d44473b5739258afd35b65947f5e4b11a1ab8 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 1 Jul 2024 11:31:58 +0200 Subject: [PATCH 02/17] fix argument names --- src/metrics/mse/script.py | 2 +- src/metrics/poisson/script.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/metrics/mse/script.py b/src/metrics/mse/script.py index acb5381..a00d006 100644 --- a/src/metrics/mse/script.py +++ b/src/metrics/mse/script.py @@ -15,7 +15,7 @@ ## VIASH END print("Load data", flush=True) -input_denoised = ad.read_h5ad(par['input_denoised'], backed="r") +input_denoised = ad.read_h5ad(par['input_prediction'], backed="r") input_test = ad.read_h5ad(par['input_test'], backed="r") test_data = ad.AnnData(X=input_test.layers["counts"], dtype="float") diff --git a/src/metrics/poisson/script.py b/src/metrics/poisson/script.py index 859ef06..2a13dbf 100644 --- a/src/metrics/poisson/script.py +++ b/src/metrics/poisson/script.py @@ -14,7 +14,7 @@ ## VIASH END print("Load Data", flush=True) -input_denoised = ad.read_h5ad(par['input_denoised'], backed="r") +input_denoised = ad.read_h5ad(par['input_prediction'], backed="r") input_test = ad.read_h5ad(par['input_test'], backed="r") test_data = scprep.utils.toarray(input_test.layers["counts"]) From 945d2831010265da6554269a25b863b2389b7516 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 14:31:20 +0200 Subject: [PATCH 03/17] updatee arguments --- src/metrics/mse/script.py | 2 +- src/metrics/poisson/script.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/metrics/mse/script.py b/src/metrics/mse/script.py index a00d006..375219f 100644 --- a/src/metrics/mse/script.py +++ b/src/metrics/mse/script.py @@ -6,7 +6,7 @@ ## VIASH START par = { 'input_test': 'resources_test/denoising/pancreas/test.h5ad', - 'input_denoised': 'resources_test/denoising/pancreas/denoised.h5ad', + 'input_prediction': 'resources_test/denoising/pancreas/denoised.h5ad', 'output': 'output_mse.h5ad' } meta = { diff --git a/src/metrics/poisson/script.py b/src/metrics/poisson/script.py index 2a13dbf..1604d0c 100644 --- a/src/metrics/poisson/script.py +++ b/src/metrics/poisson/script.py @@ -4,7 +4,7 @@ ## VIASH START par = { - 'input_denoised': 'output_magic.h5ad', + 'input_prediction': 'output_magic.h5ad', 'input_test': 'output_test.h5ad', 'output': 'output_poisson.h5ad' } From 468276b9d3d27e37e7f4674f9038ffcfae56334c Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 14:33:15 +0200 Subject: [PATCH 04/17] update docker mse --- src/metrics/mse/config.vsh.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/metrics/mse/config.vsh.yaml b/src/metrics/mse/config.vsh.yaml index 59e55fb..2e0285d 100644 --- a/src/metrics/mse/config.vsh.yaml +++ b/src/metrics/mse/config.vsh.yaml @@ -24,8 +24,6 @@ engines: pypi: - scikit-learn - scprep - - numpy<2.0 - - pandas<2.0 runners: - type: executable - type: nextflow From b48590634820a7304fd010a0cb715f3a01aae073 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 14:34:29 +0200 Subject: [PATCH 05/17] update CI --- .github/workflows/build.yaml | 121 +++-------------------------------- .github/workflows/test.yaml | 112 ++------------------------------ 2 files changed, 14 insertions(+), 219 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index b6b568d..b33cbdb 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,122 +1,21 @@ -name: build +name: Build on: push: branches: [ 'main' ] workflow_dispatch: inputs: - target_branch: - description: 'Branch to deploy to. If not specified, `build-${BRANCH_NAME}` will be used.' - required: false version: - description: 'Version name to use for the build. If not specified, `build-${BRANCH_NAME}` will be used.' + description: | + The version of the project to build. Example: `1.0.3`. + + If not provided, a development build with a version name + based on the branch name will be built. Otherwise, a release + build with the provided version will be built. required: false jobs: - # phase 1 - list: - runs-on: ubuntu-latest - - outputs: - target_branch: ${{ steps.defaults.outputs.target_branch }} - version: ${{ steps.defaults.outputs.version }} - component_matrix: ${{ steps.set_matrix.outputs.matrix }} - - steps: - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - - - uses: viash-io/viash-actions/setup@v5 - - - name: Determine version tag from branch name - id: defaults - run: | - BRANCH_NAME=$(echo $GITHUB_REF | sed 's/refs\/heads\///') - - VERSION=${{ github.event.inputs.version }} - if [ -z "$VERSION" ]; then - VERSION="build-$BRANCH_NAME" - fi - echo "version=$VERSION" >> $GITHUB_OUTPUT - - TARGET_BRANCH=${{ github.event.inputs.target_branch }} - if [ -z "$TARGET_BRANCH" ]; then - TARGET_BRANCH="build-$BRANCH_NAME" - fi - echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT - - - name: Remove target folder from .gitignore - run: | - # allow publishing the target folder - sed -i '/^\/target.*/d' .gitignore - - - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .version := '${{ steps.defaults.outputs.version }}' - parallel: true - - - name: Deploy to target branch - uses: peaceiris/actions-gh-pages@v4 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: . - publish_branch: ${{ steps.defaults.outputs.target_branch }} - - - id: ns_list - uses: viash-io/viash-actions/ns-list@v5 - with: - platform: docker - src: src - format: json - - - id: set_matrix - run: | - echo "matrix=$(jq -c '[ .[] | - { - "name": (.namespace + "/" + .name), - "dir": .info.config | capture("^(?.*\/)").dir - } - ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT - - # phase 2 build: - needs: list - - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - component: ${{ fromJson(needs.list.outputs.component_matrix) }} - - steps: - # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' - - uses: data-intuitive/reclaim-the-bytes@v2 - - - uses: actions/checkout@v4 - - - uses: viash-io/viash-actions/setup@v5 - - - name: Build container - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .version := '${{ needs.list.outputs.version }}' - platform: docker - src: ${{ matrix.component.dir }} - setup: build - - - name: Login to container registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ secrets.GTHB_USER }} - password: ${{ secrets.GTHB_PAT }} - - - name: Push container - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .version := '${{ needs.list.outputs.version }}' - platform: docker - src: ${{ matrix.component.dir }} - setup: push \ No newline at end of file + uses: openproblems-bio/actions/.github/workflows/build.yml@main + with: + version: ${{ github.event.inputs.version }} \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 0abad5c..96811dd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,113 +1,9 @@ -name: test +name: Test on: - pull_request: push: - branches: [ '**' ] + pull_request: jobs: - run_ci_check_job: - runs-on: ubuntu-latest - outputs: - run_ci: ${{ steps.github_cli.outputs.check }} - steps: - - name: 'Check if branch has an existing pull request and the trigger was a push' - id: github_cli - run: | - pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url') - # If the branch has a PR and this run was triggered by a push event, do not run - if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then - echo "check=false" >> $GITHUB_OUTPUT - else - echo "check=true" >> $GITHUB_OUTPUT - fi - env: - GITHUB_TOKEN: ${{ secrets.GTHB_PAT }} - - # phase 1 - list: - needs: run_ci_check_job - env: - s3_bucket: s3://openproblems-data/resources_test - runs-on: ubuntu-latest - if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }} - - outputs: - matrix: ${{ steps.set_matrix.outputs.matrix }} - cache_key: ${{ steps.cache.outputs.cache_key }} - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: 'recursive' - - - uses: christian-ci/action-yaml-github-output@v2 - with: - file_path: _viash.yaml - - - uses: viash-io/viash-actions/setup@v5 - - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 - id: cache - with: - s3_bucket: $s3_bucket/$NAME - dest_path: resources - cache_key_prefix: resources__ - - - id: ns_list - uses: viash-io/viash-actions/ns-list@v5 - with: - engine: docker - format: json - - - id: ns_list_filtered - uses: viash-io/viash-actions/project/detect-changed-components@v5 - with: - input_file: "${{ steps.ns_list.outputs.output_file }}" - - - id: set_matrix - run: | - echo "matrix=$(jq -c '[ .[] | - { - "name": (.namespace + "/" + .name), - "config": .info.config - } - ]' ${{ steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT - - # phase 2 - viash_test: - needs: list - if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }} - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - component: ${{ fromJson(needs.list.outputs.matrix) }} - - steps: - # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' - - uses: data-intuitive/reclaim-the-bytes@v2 - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - - - uses: viash-io/viash-actions/setup@v5 - - # use cache - - name: Cache resources data - uses: actions/cache@v4 - timeout-minutes: 10 - with: - path: resources - key: ${{ needs.list.outputs.cache_key }} - - - name: Run test - timeout-minutes: 30 - run: | - VIASH_TEMP=$RUNNER_TEMP/viash viash test \ - "${{ matrix.component.config }}" \ - --cpus 2 \ - --memory "16gb" + build: + uses: openproblems-bio/actions/.github/workflows/test.yml@main \ No newline at end of file From ac87da61aa1526cf84b480743b2b2d7cde8ca41b Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 14:44:58 +0200 Subject: [PATCH 06/17] add s3 to _viash --- _viash.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/_viash.yaml b/_viash.yaml index 75b6ab7..05032f7 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -10,6 +10,13 @@ links: repository: https://github.com/openproblems-bio/task_denoising docker_registry: ghcr.io/openproblems-bio +info: + test_resources: + - type: s3 + path: s3://openproblems-data/resources_test/denoising + dest: test_resources + + version: dev config_mods: | From a11efcb49e75e55f006184818dc4af3c495eea8b Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 15:04:05 +0200 Subject: [PATCH 07/17] WIP fix s3 path --- _viash.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_viash.yaml b/_viash.yaml index 05032f7..8ca91e2 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -13,7 +13,7 @@ links: info: test_resources: - type: s3 - path: s3://openproblems-data/resources_test/denoising + path: s3://openproblems-data/resources_test/denoising/ dest: test_resources From c0335c8834e3147daaa45196c11739ea7bd12ad6 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 15:23:40 +0200 Subject: [PATCH 08/17] update common resource submodule --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index b182188..ecbb47c 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit b18218898334d69f5a740785187c443c92fe781e +Subproject commit ecbb47ca0cb36e9350760cf126d5c7e3125f26de From 5bd68a1c7f3074d9a1c6dfcc67bab2dfc8e1d889 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 15:38:41 +0200 Subject: [PATCH 09/17] update common submodule --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index ecbb47c..95dcc63 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit ecbb47ca0cb36e9350760cf126d5c7e3125f26de +Subproject commit 95dcc63c124ab358ce7a7c48f916c51d55181172 From 95a23315b1a3f6538717b0fd1c35685f1de52448 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 16:41:40 +0200 Subject: [PATCH 10/17] update viash resource dest CI --- _viash.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_viash.yaml b/_viash.yaml index 8ca91e2..aad3555 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -14,7 +14,7 @@ info: test_resources: - type: s3 path: s3://openproblems-data/resources_test/denoising/ - dest: test_resources + dest: resources_test version: dev From 472b3c74b509215b5ac0e2099beca1b030acd4a7 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 16:42:13 +0200 Subject: [PATCH 11/17] Add temp fix findstates --- src/workflows/run_benchmark/main.nf | 130 ++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 6 deletions(-) diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 8b8f6eb..72009ed 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -1,5 +1,5 @@ workflow auto { - findStates(params, meta.config) + findStatesTemp(params, meta.config) | meta.workflow.run( auto: [publish: "state"] ) @@ -57,14 +57,14 @@ workflow run_wf { // use the 'filter' argument to only run a defined method or all methods filter: { id, state, comp -> - def method_check = !state.method_ids || state.method_ids.contains(comp.config.functionality.name) + def method_check = !state.method_ids || state.method_ids.contains(comp.config.name) method_check }, // define a new 'id' by appending the method name to the dataset id id: { id, state, comp -> - id + "." + comp.config.functionality.name + id + "." + comp.config.name }, // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ @@ -74,7 +74,7 @@ workflow run_wf { // use 'toState' to publish that component's outputs to the overall state toState: { id, output, state, comp -> state + [ - method_id: comp.config.functionality.name, + method_id: comp.config.name, method_output: output.output ] } @@ -84,7 +84,7 @@ workflow run_wf { | runEach( components: metrics, id: { id, state, comp -> - id + "." + comp.config.functionality.name + id + "." + comp.config.name }, // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ @@ -94,7 +94,7 @@ workflow run_wf { // use 'toState' to publish that component's outputs to the overall state toState: { id, output, state, comp -> state + [ - metric_id: comp.config.functionality.name, + metric_id: comp.config.name, metric_output: output.output ] } @@ -181,4 +181,122 @@ workflow run_wf { emit: output_ch +} + + +// temp fix for rename_keys typo + +def findStatesTemp(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesTempWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesTempWf } \ No newline at end of file From f071413c1888556a6bdf38c22439108f8d8b30a4 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 16:49:51 +0200 Subject: [PATCH 12/17] add fix to process_datasets --- src/workflows/process_datasets/main.nf | 119 +++++++++++++++++++++++- src/workflows/run_benchmark/run_test.sh | 4 +- 2 files changed, 120 insertions(+), 3 deletions(-) diff --git a/src/workflows/process_datasets/main.nf b/src/workflows/process_datasets/main.nf index 4437206..7a3f952 100644 --- a/src/workflows/process_datasets/main.nf +++ b/src/workflows/process_datasets/main.nf @@ -1,7 +1,7 @@ include { findArgumentSchema } from "${meta.resources_dir}/helper.nf" workflow auto { - findStates(params, meta.config) + findStatesTemp(params, meta.config) | meta.workflow.run( auto: [publish: "state"] ) @@ -52,3 +52,120 @@ workflow run_wf { emit: output_ch } + +// temp fix for rename_keys typo + +def findStatesTemp(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesTempWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesTempWf +} \ No newline at end of file diff --git a/src/workflows/run_benchmark/run_test.sh b/src/workflows/run_benchmark/run_test.sh index feffe23..1f4559c 100755 --- a/src/workflows/run_benchmark/run_test.sh +++ b/src/workflows/run_benchmark/run_test.sh @@ -17,13 +17,13 @@ fi export NXF_VER=22.04.5 nextflow run . \ - -main-script target/nextflow/denoising/workflows/run_benchmark/main.nf \ + -main-script target/nextflow/workflows/run_benchmark/main.nf \ -profile docker \ -resume \ -entry auto \ -c common/nextflow_helpers/labels_ci.config \ --input_states "$DATASETS_DIR/**/state.yaml" \ - --rename_keys 'input_train:output_train,input_test:output_test' \ + --rename_keys 'input_train:output_train;input_test:output_test' \ --settings '{"output_scores": "scores.yaml", "output_dataset_info": "dataset_info.yaml", "output_method_configs": "method_configs.yaml", "output_metric_configs": "metric_configs.yaml", "output_task_info": "task_info.yaml"}' \ --publish_dir "$OUTPUT_DIR" \ --output_state "state.yaml" From 2cd26a8dab2f9312213ad808e442abde0069ef84 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 16:53:57 +0200 Subject: [PATCH 13/17] update separator to ; --- scripts/run_benchmark.sh | 2 +- scripts/run_benchmark_test.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh index c832bdc..b87015f 100755 --- a/scripts/run_benchmark.sh +++ b/scripts/run_benchmark.sh @@ -6,7 +6,7 @@ publish_dir="s3://openproblems-data/resources/denoising/results/${RUN_ID}" # make sure only log_cp10k is used cat > /tmp/params.yaml << HERE input_states: s3://openproblems-data/resources/denoising/datasets/**/log_cp10k/state.yaml -rename_keys: 'input_train:output_train,input_test:output_test' +rename_keys: 'input_train:output_train;input_test:output_test' output_state: "state.yaml" publish_dir: "$publish_dir" HERE diff --git a/scripts/run_benchmark_test.sh b/scripts/run_benchmark_test.sh index f45a0b9..4c60d6f 100755 --- a/scripts/run_benchmark_test.sh +++ b/scripts/run_benchmark_test.sh @@ -2,7 +2,7 @@ cat > /tmp/params.yaml << 'HERE' input_states: s3://openproblems-data/resources_test/denoising/**/state.yaml -rename_keys: 'input_train:output_train,input_test:output_test' +rename_keys: 'input_train:output_train;input_test:output_test' output_state: "state.yaml" publish_dir: s3://openproblems-nextflow/temp/denoising/ HERE From 33c19fe108df1da2180f6af9b7c47dd214ddeae8 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 16:54:20 +0200 Subject: [PATCH 14/17] Chnage metric arg in workflow --- src/workflows/run_benchmark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 72009ed..dbebfd7 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -89,7 +89,7 @@ workflow run_wf { // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ input_test: "input_test", - input_denoised: "method_output" + input_prediction: "method_output" ], // use 'toState' to publish that component's outputs to the overall state toState: { id, output, state, comp -> From 47820b7a79190224f8397df9caebdd8b22777299 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 17:11:44 +0200 Subject: [PATCH 15/17] update reources dest --- _viash.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_viash.yaml b/_viash.yaml index aad3555..f2c60c5 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -14,7 +14,7 @@ info: test_resources: - type: s3 path: s3://openproblems-data/resources_test/denoising/ - dest: resources_test + dest: resources_test/denoising version: dev From 8b6c209731426854163036aaafb4f88bfc34a37e Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Mon, 1 Jul 2024 17:14:29 +0200 Subject: [PATCH 16/17] add common resources s3 path to _viash --- _viash.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/_viash.yaml b/_viash.yaml index f2c60c5..95b984c 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -15,6 +15,9 @@ info: - type: s3 path: s3://openproblems-data/resources_test/denoising/ dest: resources_test/denoising + - type: s3 + path: s3://openproblems-data/resources_test/common/ + dest: resources_test/common version: dev From 92443d57e5e7b03cf7acfc74aafd9f55c316f927 Mon Sep 17 00:00:00 2001 From: Kai Waldrant Date: Wed, 3 Jul 2024 10:29:43 +0200 Subject: [PATCH 17/17] update mse docker --- src/metrics/mse/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metrics/mse/config.vsh.yaml b/src/metrics/mse/config.vsh.yaml index 2e0285d..6dd69d1 100644 --- a/src/metrics/mse/config.vsh.yaml +++ b/src/metrics/mse/config.vsh.yaml @@ -18,7 +18,7 @@ resources: path: script.py engines: - type: docker - image: ghcr.io/openproblems-bio/base_python:1.0.4 + image: ghcr.io/openproblems-bio/base_images/python:1.1.0 setup: - type: python pypi: