diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e090..a108be04 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,27 @@ -{ - "name": "nfcore", - "image": "nfcore/gitpod:latest", - "remoteUser": "gitpod", - "runArgs": ["--privileged"], - - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - // Set *default* container specific settings.json values on container create. - "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python" - }, - - // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } -} +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} \ No newline at end of file diff --git a/.editorconfig b/.editorconfig index dd9ffa53..2218e654 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,37 +1,33 @@ -root = true - -[*] -charset = utf-8 -end_of_line = lf -insert_final_newline = true -trim_trailing_whitespace = true -indent_size = 4 -indent_style = space - -[*.{md,yml,yaml,html,css,scss,js}] -indent_size = 2 - -# These files are edited and tested upstream in nf-core/modules -[/modules/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset -[/subworkflows/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset - -[/assets/email*] -indent_size = unset - -# ignore Readme -[README.md] -indent_style = unset - -# ignore python -[*.{py,md}] -indent_style = unset +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset + +[/assets/email*] +indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.gitattributes b/.gitattributes index 7a2dabc2..f7d9a706 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,4 @@ -*.config linguist-language=nextflow -*.nf.test linguist-language=nextflow -modules/nf-core/** linguist-generated -subworkflows/nf-core/** linguist-generated +*.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml old mode 100644 new mode 100755 index 191fabd2..897c2047 --- a/.github/.dockstore.yml +++ b/.github/.dockstore.yml @@ -1,6 +1,6 @@ -# Dockstore config version, not pipeline version -version: 1.2 -workflows: - - subclass: nfl - primaryDescriptorPath: /nextflow.config - publish: True +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md old mode 100644 new mode 100755 index f34ebfd5..c5cc5c5a --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,125 +1,125 @@ -# nf-core/proteinfold: Contributing Guidelines - -Hi there! -Many thanks for taking an interest in improving nf-core/proteinfold. - -We try to manage the required tasks for nf-core/proteinfold using GitHub issues, you probably came to this page when creating one. -Please use the pre-filled template to save time. - -However, don't be put off by this template - other more general issues and suggestions are welcome! -Contributions to the code are even more welcome ;) - -> [!NOTE] -> If you need help using or modifying nf-core/proteinfold then the best place to ask is on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). - -## Contribution workflow - -If you'd like to write some code for nf-core/proteinfold, the standard workflow is as follows: - -1. Check that there isn't already an issue about your idea in the [nf-core/proteinfold issues](https://github.com/nf-core/proteinfold/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this -2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/proteinfold repository](https://github.com/nf-core/proteinfold) to your GitHub account -3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). -5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged - -If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). - -## Tests - -You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: - -```bash -nf-test test --profile debug,test,docker --verbose -``` - -When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. -Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. - -There are typically two types of tests that run: - -### Lint tests - -`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. - -If any failures or warnings are encountered, please follow the listed URL for more documentation. - -### Pipeline tests - -Each `nf-core` pipeline should be set up with a minimal set of test-data. -`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. -If there are any failures then the automated tests fail. -These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. - -## Patch - -:warning: Only in the unlikely and regretful event of a release happening with a bug. - -- On your own fork, make a new branch `patch` based on `upstream/master`. -- Fix the bug, and bump version (X.Y.Z+1). -- A PR should be made on `master` from patch to directly this particular bug. - -## Getting help - -For further information/help, please consult the [nf-core/proteinfold documentation](https://nf-co.re/proteinfold/usage) and don't hesitate to get in touch on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). - -## Pipeline contribution conventions - -To make the nf-core/proteinfold code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. - -### Adding a new step - -If you wish to contribute a new step, please use the following coding standards: - -1. Define the corresponding input channel into your new process from the expected previous process channel -2. Write the process block (see below). -3. Define the output channel if needed (see below). -4. Add any new parameters to `nextflow.config` with a default (see below). -5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). -6. Add sanity checks and validation for all relevant parameters. -7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. -9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. -10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. - -### Default values - -Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. - -Once there, use `nf-core schema build` to add to `nextflow_schema.json`. - -### Default processes resource requirements - -Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. - -The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. - -### Naming schemes - -Please use the following naming schemes, to make it easy to understand what is going where. - -- initial process channel: `ch_output_from_` -- intermediate and terminal channels: `ch__for_` - -### Nextflow version bumping - -If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` - -### Images and figures - -For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). - -## GitHub Codespaces - -This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. - -To get started: - -- Open the repo in [Codespaces](https://github.com/nf-core/proteinfold/codespaces) -- Tools installed - - nf-core - - Nextflow - -Devcontainer specs: - -- [DevContainer config](.devcontainer/devcontainer.json) +# nf-core/proteinfold: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving nf-core/proteinfold. + +We try to manage the required tasks for nf-core/proteinfold using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +> [!NOTE] +> If you need help using or modifying nf-core/proteinfold then the best place to ask is on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Contribution workflow + +If you'd like to write some code for nf-core/proteinfold, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [nf-core/proteinfold issues](https://github.com/nf-core/proteinfold/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/proteinfold repository](https://github.com/nf-core/proteinfold) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nextflow run . --profile debug,test,docker --outdir +``` + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. + +## Getting help + +For further information/help, please consult the [nf-core/proteinfold documentation](https://nf-co.re/proteinfold/usage) and don't hesitate to get in touch on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/proteinfold code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/proteinfold/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml old mode 100644 new mode 100755 index 257da826..6b7a5e93 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,50 +1,50 @@ -name: Bug report -description: Report something that is broken or incorrect -labels: bug -body: - - type: markdown - attributes: - value: | - Before you post this issue, please check the documentation: - - - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) - - [nf-core/proteinfold pipeline documentation](https://nf-co.re/proteinfold/usage) - - - type: textarea - id: description - attributes: - label: Description of the bug - description: A clear and concise description of what the bug is. - validations: - required: true - - - type: textarea - id: command_used - attributes: - label: Command used and terminal output - description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. - render: console - placeholder: | - $ nextflow run ... - - Some output where something broke - - - type: textarea - id: files - attributes: - label: Relevant files - description: | - Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. - Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. - - - type: textarea - id: system - attributes: - label: System information - description: | - * Nextflow version _(eg. 23.04.0)_ - * Hardware _(eg. HPC, Desktop, Cloud)_ - * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ - * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - * Version of nf-core/proteinfold _(eg. 1.1, 1.5, 1.8.2)_ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/proteinfold pipeline documentation](https://nf-co.re/proteinfold/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/proteinfold _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml old mode 100644 new mode 100755 index 06ed5d1c..c7641bd6 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,7 +1,7 @@ -contact_links: - - name: Join nf-core - url: https://nf-co.re/join - about: Please join the nf-core community here - - name: "Slack #proteinfold channel" - url: https://nfcore.slack.com/channels/proteinfold - about: Discussion about the nf-core/proteinfold pipeline +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #proteinfold channel" + url: https://nfcore.slack.com/channels/proteinfold + about: Discussion about the nf-core/proteinfold pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml old mode 100644 new mode 100755 index 545e2122..f2f3494a --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -1,11 +1,11 @@ -name: Feature request -description: Suggest an idea for the nf-core/proteinfold pipeline -labels: enhancement -body: - - type: textarea - id: description - attributes: - label: Description of feature - description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. - validations: - required: true +name: Feature request +description: Suggest an idea for the nf-core/proteinfold pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md old mode 100644 new mode 100755 index 3dde701c..eaf632ad --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,26 +1,26 @@ - - -## PR checklist - -- [ ] This comment contains a description of changes (with reason). -- [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/proteinfold/tree/master/.github/CONTRIBUTING.md) -- [ ] If necessary, also make a PR on the nf-core/proteinfold _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. -- [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). -- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). -- [ ] Usage Documentation in `docs/usage.md` is updated. -- [ ] Output Documentation in `docs/output.md` is updated. -- [ ] `CHANGELOG.md` is updated. -- [ ] `README.md` is updated (including new tool citations and authors/contributors). + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/proteinfold/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/proteinfold _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml old mode 100644 new mode 100755 index eef3ae69..e11d0aba --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -1,49 +1,51 @@ -name: nf-core AWS full size tests -# This workflow is triggered on published releases. -# It can be additionally triggered manually with GitHub actions workflow dispatch button. -# It runs the -profile 'test_full' on AWS batch - -on: - release: - types: [published] - workflow_dispatch: -jobs: - run-tower: - name: Run AWS AlphaFold2 full monomer tests - if: github.repository == 'nf-core/proteinfold' - runs-on: ubuntu-latest - # Do a full-scale run on each of the mode - strategy: - matrix: - mode: - [ - "_alphafold2_standard", - "_alphafold2_split", - "_alphafold2_multimer", - "_colabfold_local", - "_colabfold_webserver", - "_colabfold_multimer", - "_esmfold", - "_esmfold_multimer", - ] - steps: - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} - parameters: | - { - "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-${{ github.sha }}/mode_${{ matrix.mode }}" - } - profiles: test_full_${{ matrix.mode }},aws_tower - - uses: actions/upload-artifact@v4 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-platform: + name: Run AWS full tests + if: github.repository == 'nf-core/proteinfold' + runs-on: ubuntu-latest + # Do a full-scale run on each of the mode + strategy: + matrix: + mode: + [ + "alphafold2_standard", + "alphafold2_split", + "alphafold2_multimer", + "colabfold_local", + "colabfold_webserver", + "colabfold_multimer", + "esmfold", + "esmfold_multimer", + ] + steps: + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-${{ github.sha }}/mode_${{ matrix.mode }}" + } + profiles: test_full_${{ matrix.mode }} + + - uses: actions/upload-artifact@v4 + if: success() || failure() + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml old mode 100644 new mode 100755 index 70e47cdf..c2c66d4b --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -1,33 +1,33 @@ -name: nf-core AWS test -# This workflow can be triggered manually with the GitHub actions workflow dispatch button. -# It runs the -profile 'test' on AWS batch - -on: - workflow_dispatch: -jobs: - run-tower: - name: Run AWS tests - if: github.repository == 'nf-core/proteinfold' - runs-on: ubuntu-latest - steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-test-${{ github.sha }}" - } - profiles: test - - - uses: actions/upload-artifact@v4 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-platform: + name: Run AWS tests + if: github.repository == 'nf-core/proteinfold' + runs-on: ubuntu-latest + steps: + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-test-${{ github.sha }}" + } + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml old mode 100644 new mode 100755 index cabcdbdd..0190ac32 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,44 +1,44 @@ -name: nf-core branch protection -# This workflow is triggered on PRs to master branch on the repository -# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` -on: - pull_request_target: - branches: [master] - -jobs: - test: - runs-on: ubuntu-latest - steps: - # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - - name: Check PRs - if: github.repository == 'nf-core/proteinfold' - run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/proteinfold ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - - # If the above check failed, post a comment on the PR explaining the failure - # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 - with: - message: | - ## This PR is against the `master` branch :x: - - * Do not close this PR - * Click _Edit_ and change the `base` to `dev` - * This CI test will remain failed until you push a new commit - - --- - - Hi @${{ github.event.pull_request.user.login }}, - - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. - The `master` branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. - - You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. - Note that even after this, the test will continue to show as failing until you push a new commit. - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/proteinfold' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/proteinfold ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml old mode 100644 new mode 100755 index 17260c5d..eb80e919 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,107 +1,52 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - -env: - NXF_ANSI_LOG: false - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" - steps: - - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - name: Run pipeline with test data - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - - test_alphafold2_split: - name: Test alphafold2 split workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in alphafold2 split mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_alphafold2_split,docker --outdir ./results - - test_colabfold_local: - name: Test Colabfold local workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in colabfold_local mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_colabfold_local,docker --outdir ./results - - test_colabfold_webserver: - name: Test Colabfold webserver workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in colabfold_webserver mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_colabfold_webserver,docker --outdir ./results - - test_esmfold: - name: Test ESMFold workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in esmfold mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_esmfold,docker --outdir ./results +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Run pipeline with test data + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + parameters: + - "test" + - "test_alphafold2_split" + - "test_alphafold2_download" + - "test_colabfold_local" + - "test_colabfold_webserver" + - "test_colabfold_download" + - "test_esmfold" + + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data ${{ matrix.parameters }} profile + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.parameters }},docker --outdir ./results_${{ matrix.parameters }} diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml old mode 100644 new mode 100755 index 0b6b1f27..519c3ab3 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -1,24 +1,24 @@ -name: "Close user-tagged issues and PRs" -on: - schedule: - - cron: "0 0 * * 0" # Once a week - -jobs: - clean-up: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 - with: - stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." - stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." - close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." - days-before-stale: 30 - days-before-close: 20 - days-before-pr-close: -1 - any-of-labels: "awaiting-changes,awaiting-feedback" - exempt-issue-labels: "WIP" - exempt-pr-labels: "WIP" - repo-token: "${{ secrets.GITHUB_TOKEN }}" +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml old mode 100644 new mode 100755 index 08622fd5..ffc6a39b --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -1,72 +1,86 @@ -name: Test successful pipeline download with 'nf-core download' - -# Run the workflow when: -# - dispatched manually -# - when a PR is opened or reopened to master branch -# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. -on: - workflow_dispatch: - inputs: - testbranch: - description: "The specific branch you wish to utilize for the test execution of nf-core download." - required: true - default: "dev" - pull_request: - types: - - opened - branches: - - master - pull_request_target: - branches: - - master - -env: - NXF_ANSI_LOG: false - -jobs: - download: - runs-on: ubuntu-latest - steps: - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 - with: - python-version: "3.11" - architecture: "x64" - - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 - with: - singularity-version: 3.8.3 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev - - - name: Get the repository name and current branch set as environment variable - run: | - echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} - echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - - - name: Download the pipeline - env: - NXF_SINGULARITY_CACHEDIR: ./ - run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ - --revision ${{ env.REPO_BRANCH }} \ - --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ - --compress "none" \ - --container-system 'singularity' \ - --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ - --container-cache-utilisation 'amend' \ - --download-configuration - - - name: Inspect download - run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - - name: Run the downloaded pipeline - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml old mode 100644 new mode 100755 index f459fc21..f79d761f --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -1,89 +1,89 @@ -name: Fix linting from a comment -on: - issue_comment: - types: [created] - -jobs: - fix-linting: - # Only run if comment is on a PR with the main repo, and if it contains the magic keywords - if: > - contains(github.event.comment.html_url, '/pull/') && - contains(github.event.comment.body, '@nf-core-bot fix linting') && - github.repository == 'nf-core/proteinfold' - runs-on: ubuntu-latest - steps: - # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - with: - token: ${{ secrets.nf_core_bot_auth_token }} - - # indication that the linting is being fixed - - name: React on comment - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: eyes - - # Action runs on the issue comment, so we don't get the PR by default - # Use the gh cli to check out the PR - - name: Checkout Pull Request - run: gh pr checkout ${{ github.event.issue.number }} - env: - GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - # Install and run pre-commit - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 - with: - python-version: 3.11 - - - name: Install pre-commit - run: pip install pre-commit - - - name: Run pre-commit - id: pre-commit - run: pre-commit run --all-files - continue-on-error: true - - # indication that the linting has finished - - name: react if linting finished succesfully - if: steps.pre-commit.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: "+1" - - - name: Commit & push changes - id: commit-and-push - if: steps.pre-commit.outcome == 'failure' - run: | - git config user.email "core@nf-co.re" - git config user.name "nf-core-bot" - git config push.default upstream - git add . - git status - git commit -m "[automated] Fix code linting" - git push - - - name: react if linting errors were fixed - id: react-if-fixed - if: steps.commit-and-push.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: hooray - - - name: react if linting errors were not fixed - if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: confused - - - name: react if linting errors were not fixed - if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - issue-number: ${{ github.event.issue.number }} - body: | - @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. - See [CI log](https://github.com/nf-core/proteinfold/actions/runs/${{ github.run_id }}) for more details. +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/proteinfold' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/proteinfold/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml old mode 100644 new mode 100755 index 073e1876..e2acf04d --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,69 +1,68 @@ -name: nf-core linting -# This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure -# that the code meets the nf-core guidelines. -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 - with: - python-version: 3.11 - cache: "pip" - - - name: Install pre-commit - run: pip install pre-commit - - - name: Run pre-commit - run: pre-commit run --all-files - - nf-core: - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 - with: - python-version: "3.11" - architecture: "x64" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install nf-core - - - name: Run nf-core lint - env: - GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - - name: Save PR number - if: ${{ always() }} - run: echo ${{ github.event.pull_request.number }} > PR_number.txt - - - name: Upload linting log file artifact - if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 - with: - name: linting-logs - path: | - lint_log.txt - lint_results.md - PR_number.txt +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml old mode 100644 new mode 100755 index b706875f..4684f321 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -1,28 +1,28 @@ -name: nf-core linting comment -# This workflow is triggered after the linting action is complete -# It posts an automated comment to the PR, even if the PR is coming from a fork - -on: - workflow_run: - workflows: ["nf-core linting"] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 - with: - workflow: linting.yml - workflow_conclusion: completed - - - name: Get PR number - id: pr_number - run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - - - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - number: ${{ steps.pr_number.outputs.pr_number }} - path: linting-logs/lint_results.md +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml old mode 100644 new mode 100755 index d468aeaa..1213f28c --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -1,75 +1,75 @@ -name: release-announcements -# Automatic release toot and tweet anouncements -on: - release: - types: [published] - workflow_dispatch: - -jobs: - toot: - runs-on: ubuntu-latest - steps: - - name: get topics and convert to hashtags - id: get_topics - run: | - curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT - - - uses: rzr/fediverse-action@master - with: - access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} - host: "mstdn.science" # custom host if not "mastodon.social" (default) - # GitHub event payload - # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release - message: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - - ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics - - send-tweet: - runs-on: ubuntu-latest - - steps: - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 - with: - python-version: "3.10" - - name: Install dependencies - run: pip install tweepy==4.14.0 - - name: Send tweet - shell: python - run: | - import os - import tweepy - - client = tweepy.Client( - access_token=os.getenv("TWITTER_ACCESS_TOKEN"), - access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), - consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), - consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), - ) - tweet = os.getenv("TWEET") - client.create_tweet(text=tweet) - env: - TWEET: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} - TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} - - bsky-post: - runs-on: ubuntu-latest - steps: - - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 - with: - post: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - env: - BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} - BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} - # +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitignore b/.gitignore index 5124c9ac..c4bbae0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ -.nextflow* -work/ -data/ -results/ -.DS_Store -testing/ -testing* -*.pyc +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc diff --git a/.gitpod.yml b/.gitpod.yml index 105a1821..15f5a5f7 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,20 +1,20 @@ -image: nfcore/gitpod:latest -tasks: - - name: Update Nextflow and setup pre-commit - command: | - pre-commit install --install-hooks - nextflow self-update - - name: unset JAVA_TOOL_OPTIONS - command: | - unset JAVA_TOOL_OPTIONS - -vscode: - extensions: # based on nf-core.nf-core-extensionpack - - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting - - oderwat.indent-rainbow # Highlight indentation level - - streetsidesoftware.code-spell-checker # Spelling checker for source code - - charliermarsh.ruff # Code linter Ruff +image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index cfe39173..90f30aaa 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,6 +1,6 @@ -repository_type: pipeline -lint: - files_unchanged: - - .github/ISSUE_TEMPLATE/bug_report.yml - - pyproject.toml - multiqc_config: false +repository_type: pipeline +nf_core_version: "2.14.1" +lint: + files_unchanged: + - .github/CONTRIBUTING.md + multiqc_config: false \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index af57081f..18a79c35 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,10 +1,13 @@ -repos: - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.1.0" - hooks: - - id: prettier - - repo: https://github.com/editorconfig-checker/editorconfig-checker.python - rev: "2.7.3" - hooks: - - id: editorconfig-checker - alias: ec +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec \ No newline at end of file diff --git a/.prettierignore b/.prettierignore index 437d763d..b8056736 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,12 +1,12 @@ -email_template.html -adaptivecard.json -slackreport.json -.nextflow* -work/ -data/ -results/ -.DS_Store -testing/ -testing* -*.pyc -bin/ +email_template.html +adaptivecard.json +slackreport.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/.prettierrc.yml b/.prettierrc.yml index c81f9a76..64d9f384 100644 --- a/.prettierrc.yml +++ b/.prettierrc.yml @@ -1 +1 @@ -printWidth: 120 +printWidth: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dfac881..d881e505 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,40 +1,106 @@ -# nf-core/proteinfold: Changelog - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## 1.1.0dev - [date] - -### Enhancements & fixes - -- [[#80](https://github.com/nf-core/proteinfold/pull/80)] - Add `accelerator` directive to GPU processes when `params.use_gpu` is true. -- [[#81](https://github.com/nf-core/proteinfold/pull/81)] - Support multiline fasta for colabfold multimer predictions. -- [[#89](https://github.com/nf-core/proteinfold/pull/89)] - Fix issue with excessive symlinking in the pdb_mmcif database. -- [[PR #91](https://github.com/nf-core/proteinfold/pull/91)] - Update ColabFold version to 1.5.2 and AlphaFold version to 2.3.2 -- [[PR #92](https://github.com/nf-core/proteinfold/pull/92)] - Add ESMFold workflow to the pipeline. -- Update metro map to include ESMFold workflow. -- Update modules to remove quay from container url. -- [[nf-core/tools#2286](https://github.com/nf-core/tools/issues/2286)] - Set default container registry outside profile scope. -- [[PR #97](https://github.com/nf-core/proteinfold/pull/97)] - Fix issue with uniref30 missing path when using the full BFD database in AlphaFold. -- [[PR #100](https://github.com/nf-core/proteinfold/pull/100)] - Update containers for AlphaFold2 and ColabFold local modules. -- [[PR #105](https://github.com/nf-core/proteinfold/pull/105)] - Update COLABFOLD_BATCH docker container, metro map figure and nextflow schema description. -- [[PR #106](https://github.com/nf-core/proteinfold/pull/106)] - Add `singularity.registry = 'quay.io'` and bump NF version to 23.04.0 -- [[#108](https://github.com/nf-core/proteinfold/issues/108)] - Fix gunzip error when providing too many files when downloading PDBMMCIF database. -- [[PR #111](https://github.com/nf-core/proteinfold/pull/111)] - Update pipeline template to [nf-core/tools 2.9](https://github.com/nf-core/tools/releases/tag/2.9). -- [[PR #112](https://github.com/nf-core/rnaseq/pull/112)] - Use `nf-validation` plugin for parameter and samplesheet validation. -- [[#113](https://github.com/nf-core/proteinfold/pull/113)] - Include esmfold dbs for full data sets. -- [[PR #114](https://github.com/nf-core/rnaseq/pull/114)] - Update paths to test dbs. -- [[PR #117](https://github.com/nf-core/proteinfold/pull/117)] - Update pipeline template to [nf-core/tools 2.10](https://github.com/nf-core/tools/releases/tag/2.10). -- [[PR #132](https://github.com/nf-core/proteinfold/pull/132)] - Remove `lib/` directory. -- [[#135](https://github.com/nf-core/proteinfold/issues/135)] - Reduce Alphafold Docker images sizes. -- [[#115](https://github.com/nf-core/proteinfold/issues/115)] - Throw message error when profile conda is used. -- [[#131](https://github.com/nf-core/proteinfold/issues/131)] - Add esmfold small tests. -- [[#144](https://github.com/nf-core/proteinfold/issues/144)] - Force value channels when providing dbs (downloaded) in `main.nf` to enable the processing of multiple samples. - -## 1.0.0 - White Silver Reebok - -Initial release of nf-core/proteinfold, created with the [nf-core](https://nf-co.re/) template. - -### Enhancements & fixes - -- Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2) +# nf-core/proteinfold: Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 + +- Minor patch release to fix multiqc report. + +### Enhancements & fixes + +## [[1.1.0](https://github.com/nf-core/proteinfold/releases/tag/1.1.0)] - 2025-06-25 + +### Credits + +Special thanks to the following for their contributions to the release: + +- [Adam Talbot](https://github.com/adamrtalbot) +- [Athanasios Baltzis](https://github.com/athbaltzis) +- [Björn Langer](https://github.com/bjlang) +- [Igor Trujnara](https://github.com/itrujnara) +- [Matthias Hörtenhuber](https://github.com/mashehu) +- [Maxime Garcia](https://github.com/maxulysse) +- [Júlia Mir Pedrol](https://github.com/mirpedrol) +- [Ziad Al-Bkhetan](https://github.com/ziadbkh) + +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. + +## [[1.1.0](https://github.com/nf-core/proteinfold/releases/tag/1.1.0)] - 2025-06-21 + +### Enhancements & fixes + +- [[#80](https://github.com/nf-core/proteinfold/pull/80)] - Add `accelerator` directive to GPU processes when `params.use_gpu` is true. +- [[#81](https://github.com/nf-core/proteinfold/pull/81)] - Support multiline fasta for colabfold multimer predictions. +- [[#89](https://github.com/nf-core/proteinfold/pull/89)] - Fix issue with excessive symlinking in the pdb_mmcif database. +- [[PR #91](https://github.com/nf-core/proteinfold/pull/91)] - Update ColabFold version to 1.5.2 and AlphaFold version to 2.3.2 +- [[PR #92](https://github.com/nf-core/proteinfold/pull/92)] - Add ESMFold workflow to the pipeline. +- Update metro map to include ESMFold workflow. +- Update modules to remove quay from container url. +- [[nf-core/tools#2286](https://github.com/nf-core/tools/issues/2286)] - Set default container registry outside profile scope. +- [[PR #97](https://github.com/nf-core/proteinfold/pull/97)] - Fix issue with uniref30 missing path when using the full BFD database in AlphaFold. +- [[PR #100](https://github.com/nf-core/proteinfold/pull/100)] - Update containers for AlphaFold2 and ColabFold local modules. +- [[PR #105](https://github.com/nf-core/proteinfold/pull/105)] - Update COLABFOLD_BATCH docker container, metro map figure and nextflow schema description. +- [[PR #106](https://github.com/nf-core/proteinfold/pull/106)] - Add `singularity.registry = 'quay.io'` and bump NF version to 23.04.0 +- [[#108](https://github.com/nf-core/proteinfold/issues/108)] - Fix gunzip error when providing too many files when downloading PDBMMCIF database. +- [[PR #111](https://github.com/nf-core/proteinfold/pull/111)] - Update pipeline template to [nf-core/tools 2.9](https://github.com/nf-core/tools/releases/tag/2.9). +- [[PR #112](https://github.com/nf-core/rnaseq/pull/112)] - Use `nf-validation` plugin for parameter and samplesheet validation. +- [[#113](https://github.com/nf-core/proteinfold/pull/113)] - Include esmfold dbs for full data sets. +- [[PR #114](https://github.com/nf-core/rnaseq/pull/114)] - Update paths to test dbs. +- [[PR #117](https://github.com/nf-core/proteinfold/pull/117)] - Update pipeline template to [nf-core/tools 2.10](https://github.com/nf-core/tools/releases/tag/2.10). +- [[PR #132](https://github.com/nf-core/proteinfold/pull/132)] - Remove `lib/` directory. +- [[#135](https://github.com/nf-core/proteinfold/issues/135)] - Reduce Alphafold Docker images sizes. +- [[#115](https://github.com/nf-core/proteinfold/issues/115)] - Throw message error when profile conda is used. +- [[#131](https://github.com/nf-core/proteinfold/issues/131)] - Add esmfold small tests. +- [[#144](https://github.com/nf-core/proteinfold/issues/144)] - Force value channels when providing dbs (downloaded) in `main.nf` to enable the processing of multiple samples. +- [[#147](https://github.com/nf-core/proteinfold/issues/147)] - Update modules to last version. +- [[#145](https://github.com/nf-core/proteinfold/issues/145)] - Implement test to check the processes/subworkflows triggered when downloading the databases. +- [[#130](https://github.com/nf-core/proteinfold/issues/130)] - Add `--skip_multiqc` parameter. +- [[PR #154](https://github.com/nf-core/proteinfold/pull/154)] - Update pipeline template to [nf-core/tools 2.14.1](https://github.com/nf-core/tools/releases/tag/2.14.1). +- [[#148](https://github.com/nf-core/proteinfold/issues/148)] - Update Colabfold DBs. +- [[PR #159](https://github.com/nf-core/proteinfold/pull/159)] - Update `mgnify` paths to new available version. +- [[PR ##163](https://github.com/nf-core/proteinfold/pull/163)] - Fix full test CI. +- [[#150]](https://github.com/nf-core/proteinfold/issues/150)] - Add thanks to the AWS Open Data Sponsorship program in `README.md`. +- [[PR ##166](https://github.com/nf-core/proteinfold/pull/166)] - Create 2 different parameters for Colabfold and ESMfold number of recycles. + +### Parameters + +| Old parameter | New parameter | +| --------------------- | ---------------------------------------- | +| `--uniclust30` | | +| `--bfd` | `--bfd_link` | +| `--small_bfd` | `--small_bfd_link` | +| `--alphafold2_params` | `--alphafold2_params_link` | +| `--mgnify` | `--mgnify_link` | +| `--pdb70` | `--pdb70_link` | +| `--pdb_mmcif` | `--pdb_mmcif_link` | +| `--pdb_obsolete` | `--pdb_obsolete_link` | +| `--uniref90` | `--uniref90_link` | +| `--pdb_seqres` | `--pdb_seqres_link` | +| `--uniprot_sprot` | `--uniprot_sprot_link` | +| `--uniprot_trembl` | `--uniprot_trembl_link` | +| `--uniclust30_path` | `--uniref30_alphafold2_path` | +| `--uniref30` | `--uniref30_colabfold_link` | +| `--uniref30_path` | `--uniref30_colabfold_path` | +| `--num_recycle` | `--num_recycles_colabfold` | +| | `--num_recycles_esmfold` | +| | `--uniref30_alphafold2_link` | +| | `--esmfold_db` | +| | `--esmfold_model_preset` | +| | `--esmfold_3B_v1` | +| | `--esm2_t36_3B_UR50D` | +| | `--esm2_t36_3B_UR50D_contact_regression` | +| | `--esmfold_params_path` | +| | `--skip_multiqc` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information isn't present. + +## 1.0.0 - White Silver Reebok + +Initial release of nf-core/proteinfold, created with the [nf-core](https://nf-co.re/) template. + +### Enhancements & fixes + +- Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2) diff --git a/CITATIONS.md b/CITATIONS.md index 1b1f9291..6b2b81e4 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,51 +1,51 @@ -# nf-core/proteinfold: Citations - -## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) - -> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. - -## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) - -> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. - -## Pipeline tools - -- [AlphaFold](https://pubmed.ncbi.nlm.nih.gov/34265844/) - Jumper J et al. Highly accurate protein structure prediction with AlphaFold. Nature. 2021 Aug;596(7873):583-589. doi: 10.1038/s41586-021-03819-2. PubMed PMID: 34265844; PubMed Central PMCID: PMC8371605. - -- [ColabFold](https://pubmed.ncbi.nlm.nih.gov/35637307/) - Mirdita M, Schütze K, Moriwaki Y, Heo L, Ovchinnikov S, Steinegger M. ColabFold: making protein folding accessible to all. Nat Methods. 2022 Jun;19(6):679-682. doi: 10.1038/s41592-022-01488-1. Epub 2022 May 30. PMID: 35637307; PMCID: PMC9184281. - -- [MMseqs2](https://pubmed.ncbi.nlm.nih.gov/26743509/) - Hauser M, Steinegger M, Söding J. MMseqs software suite for fast and deep clustering and searching of large protein sequence sets. Bioinformatics. 2016 May 1;32(9):1323-30. doi: 10.1093/bioinformatics/btw006. Epub 2016 Jan 6. PubMed PMID: 26743509. - -- [ESMFold](https://pubmed.ncbi.nlm.nih.gov/36927031/) - Lin Z, Akin H, Rao R, Hie B, Zhu Z, Lu W, Smetanin N, Verkuil R, Kabeli O, Shmueli Y, Dos Santos Costa A, Fazel-Zarandi M, Sercu T, Candido S, Rives A. Evolutionary-scale prediction of atomic-level protein structure with a language model. Science. 2023 Mar 17;379(6637):1123-1130. doi: 10.1126/science.ade2574. Epub 2023 Mar 16. PMID: 36927031. - - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. - -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - -## Software packaging/containerisation tools - -- [Anaconda](https://anaconda.com) - - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. - -- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. - -- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. - -- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) - - > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. - -- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. +# nf-core/proteinfold: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [AlphaFold](https://pubmed.ncbi.nlm.nih.gov/34265844/) + Jumper J et al. Highly accurate protein structure prediction with AlphaFold. Nature. 2021 Aug;596(7873):583-589. doi: 10.1038/s41586-021-03819-2. PubMed PMID: 34265844; PubMed Central PMCID: PMC8371605. + +- [ColabFold](https://pubmed.ncbi.nlm.nih.gov/35637307/) + Mirdita M, Schütze K, Moriwaki Y, Heo L, Ovchinnikov S, Steinegger M. ColabFold: making protein folding accessible to all. Nat Methods. 2022 Jun;19(6):679-682. doi: 10.1038/s41592-022-01488-1. Epub 2022 May 30. PMID: 35637307; PMCID: PMC9184281. + +- [MMseqs2](https://pubmed.ncbi.nlm.nih.gov/26743509/) + Hauser M, Steinegger M, Söding J. MMseqs software suite for fast and deep clustering and searching of large protein sequence sets. Bioinformatics. 2016 May 1;32(9):1323-30. doi: 10.1093/bioinformatics/btw006. Epub 2016 Jan 6. PubMed PMID: 26743509. + +- [ESMFold](https://pubmed.ncbi.nlm.nih.gov/36927031/) + Lin Z, Akin H, Rao R, Hie B, Zhu Z, Lu W, Smetanin N, Verkuil R, Kabeli O, Shmueli Y, Dos Santos Costa A, Fazel-Zarandi M, Sercu T, Candido S, Rives A. Evolutionary-scale prediction of atomic-level protein structure with a language model. Science. 2023 Mar 17;379(6637):1123-1130. doi: 10.1126/science.ade2574. Epub 2023 Mar 16. PMID: 36927031. + + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index c089ec78..7bc7e9fe 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,182 +1,182 @@ -# Code of Conduct at nf-core (v1.4) - -## Our Pledge - -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - -- Age -- Ability -- Body size -- Caste -- Familial status -- Gender identity and expression -- Geographical location -- Level of experience -- Nationality and national origins -- Native language -- Neurodiversity -- Race or ethnicity -- Religion -- Sexual identity and orientation -- Socioeconomic status - -Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. - -## Preamble - -:::note -This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. -::: - -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). - -Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. - -nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. - -We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. - -Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. - -We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. - -Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. - -## Our Responsibilities - -Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. - -The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. - -Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. - -## When and where does this Code of Conduct apply? - -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - -- Communicating with an official project email address. -- Communicating with community members within the nf-core Slack channel. -- Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. -- Representing nf-core on social media. This includes both official and personal accounts. - -## nf-core cares 😊 - -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - -- Ask for consent before sharing another community member’s personal information (including photographs) on social media. -- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) -- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) -- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) -- Focus on what is best for the team and the community. (When in doubt, ask) -- Accept feedback, yet be unafraid to question, deliberate, and learn. -- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) -- Take breaks when you feel like you need them. -- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) - -## nf-core frowns on 😕 - -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - -- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. -- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. -- Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. - -### Online Trolling - -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. - -All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. - -## Procedures for reporting CoC violations - -If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. - -You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. - -Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. - -All reports will be handled with the utmost discretion and confidentiality. - -You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: - -- Your contact information. -- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. -- The behaviour that was in violation and the circumstances surrounding the incident. -- The approximate time of the behaviour (if different than the time the report was made). -- Other people involved in the incident, if applicable. -- If you believe the incident is ongoing. -- If there is a publicly available record (e.g. mailing list record, a screenshot). -- Any additional information. - -After you file a report, one or more members of our Safety Team will contact you to follow up on your report. - -## Who will read and handle reports - -All reports will be read and handled by the members of the Safety Team at nf-core. - -If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. - -To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. - -## Reviewing reports - -After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. - -The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. - -In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. - -Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. - -## Confidentiality - -All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. - -We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. - -## Enforcement - -Actions taken by the nf-core’s Safety Team may include, but are not limited to: - -- Asking anyone to stop a behaviour. -- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. -- Removing access to the gather.town and Slack, either temporarily or permanently. -- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. -- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. -- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. -- No action. - -## Attribution and Acknowledgements - -- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) -- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) -- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) -- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) - -## Changelog - -### v1.4 - February 8th, 2022 - -- Included a new member of the Safety Team. Corrected a typographical error in the text. - -### v1.3 - December 10th, 2021 - -- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. - -### v1.2 - November 12th, 2021 - -- Removed information specific to reporting CoC violations at the Hackathon in October 2021. - -### v1.1 - October 14th, 2021 - -- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. - -### v1.0 - March 15th, 2021 - -- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. +# Code of Conduct at nf-core (v1.4) + +## Our Pledge + +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: + +- Age +- Ability +- Body size +- Caste +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Neurodiversity +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status + +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. + +## Preamble + +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: + +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. + +## Our Responsibilities + +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. + +## When and where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Accept feedback, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) +- Take breaks when you feel like you need them. +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. + +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for reporting CoC violations + +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. + +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. + +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. + +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. + +## Attribution and Acknowledgements + +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) + +## Changelog + +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 + +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index 93555944..8ed51f3a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,21 @@ -MIT License - -Copyright (c) Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +MIT License + +Copyright (c) Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index d1eed3a2..eaffd5a1 100644 --- a/README.md +++ b/README.md @@ -1,178 +1,179 @@ -

- - - nf-core/proteinfold - -

- -[![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/proteinfold) - -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) - -## Introduction - -**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction. - -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results). - -## Pipeline summary - -![Alt text](docs/images/nf-core-proteinfold_metro_map_1.1.0.png?raw=true "nf-core-proteinfold 1.1.0 metro map") - -1. Choice of protein structure prediction method: - - i. [AlphaFold2](https://github.com/deepmind/alphafold) - - ii. [AlphaFold2 split](https://github.com/luisas/alphafold_split) - AlphaFold2 MSA computation and model inference in separate processes - - iii. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 API server followed by ColabFold - - iv. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 local search followed by ColabFold - - v. [ESMFold](https://github.com/facebookresearch/esm) - -## Usage - -> [!NOTE] -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - -Now, you can run the pipeline using: - -```bash -nextflow run nf-core/proteinfold \ - -profile \ - --input samplesheet.csv \ - --outdir -``` - -The pipeline takes care of downloading the required databases and parameters required by AlphaFold2, Colabfold or ESMFold. In case you have already downloaded the required files, you can skip this step by providing the path using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`] or [`--esmfold_db`] - -- Typical command to run AlphaFold2 mode: - - ```console - nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode alphafold2 \ - --alphafold2_db \ - --full_dbs \ - --alphafold2_model_preset monomer \ - --use_gpu \ - -profile - ``` - -- Typical command to run AlphaFold2 splitting the MSA from the prediction execution: - - ```console - nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode alphafold2 \ - --alphafold2_mode split_msa_prediction \ - --alphafold2_db \ - --full_dbs \ - --alphafold2_model_preset monomer \ - --use_gpu \ - -profile - ``` - -- Typical command to run colabfold_local mode: - - ```console - nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode colabfold \ - --colabfold_server local \ - --colabfold_db \ - --num_recycle 3 \ - --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ - --use_gpu \ - --db_load_mode 0 - -profile - ``` - -- Typical command to run colabfold_webserver mode: - - ```console - nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode colabfold \ - --colabfold_server webserver \ - --host_url \ - --colabfold_db \ - --num_recycle 3 \ - --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ - --use_gpu \ - -profile - ``` - - > **Warning** - > If you aim to carry out a large amount of predictions using the colabfold_webserver mode, please setup and use your own custom MMSeqs2 API Server. You can find instructions [here](https://github.com/sokrypton/ColabFold/tree/main/MsaServer). - -- Typical command to run esmfold mode: - - ```console - nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode esmfold \ - --esmfold_model_preset \ - --esmfold_db \ - --num_recycles 4 \ - --use_gpu \ - -profile - ``` - -> [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). - -For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) and the [parameter documentation](https://nf-co.re/proteinfold/parameters). - -## Pipeline output - -To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfold/results) tab on the nf-core website pipeline page. -For more details about the output files and reports, please refer to the -[output documentation](https://nf-co.re/proteinfold/output). - -## Credits - -nf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)), Luisa Santus ([@luisas](https://github.com/luisas)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/). - -We thank the following people for their extensive assistance in the development of this pipeline: - -Many thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)) and Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal)) - -## Contributions and Support - -If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). - -For further information or help, don't hesitate to get in touch on the [Slack `#proteinfold` channel](https://nfcore.slack.com/channels/proteinfold) (you can join with [this invite](https://nf-co.re/join/slack)). - -## Citations - -If you use nf-core/proteinfold for your analysis, please cite it using the following doi: [10.5281/zenodo.7437038](https://doi.org/10.5281/zenodo.7437038) - -An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. - -You can cite the `nf-core` publication as follows: - -> **The nf-core framework for community-curated bioinformatics pipelines.** -> -> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. -> -> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). +

+ + + nf-core/proteinfold + +

+ +[![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinfold) + +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) + +## Introduction + +**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction. + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results). + +## Pipeline summary + +![Alt text](docs/images/nf-core-proteinfold_metro_map_1.1.0.png?raw=true "nf-core-proteinfold 1.1.0 metro map") + +1. Choice of protein structure prediction method: + + i. [AlphaFold2](https://github.com/deepmind/alphafold) - Regular AlphaFold2 (MSA computation and model inference in the same process) + + ii. [AlphaFold2 split](https://github.com/luisas/alphafold_split) - AlphaFold2 MSA computation and model inference in separate processes + + iii. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 API server followed by ColabFold + + iv. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 local search followed by ColabFold + + v. [ESMFold](https://github.com/facebookresearch/esm) - Regular ESM + +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + +Now, you can run the pipeline using: + +```bash +nextflow run nf-core/proteinfold \ + -profile \ + --input samplesheet.csv \ + --outdir +``` + +The pipeline takes care of downloading the databases and parameters required by AlphaFold2, Colabfold or ESMFold. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`] or [`--esmfold_db`]. Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you need to provide for each of the databases. + +- The typical command to run AlphaFold2 mode is shown below: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode alphafold2 \ + --alphafold2_db \ + --full_dbs \ + --alphafold2_model_preset monomer \ + --use_gpu \ + -profile + ``` + +- Here is the command to run AlphaFold2 splitting the MSA from the prediction execution: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode alphafold2 \ + --alphafold2_mode split_msa_prediction \ + --alphafold2_db \ + --full_dbs \ + --alphafold2_model_preset monomer \ + --use_gpu \ + -profile + ``` + +- Below, the command to run colabfold_local mode: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode colabfold \ + --colabfold_server local \ + --colabfold_db \ + --num_recycles_colabfold 3 \ + --use_amber \ + --colabfold_model_preset "AlphaFold2-ptm" \ + --use_gpu \ + --db_load_mode 0 + -profile + ``` + +- The typical command to run colabfold_webserver mode would be: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode colabfold \ + --colabfold_server webserver \ + --host_url \ + --colabfold_db \ + --num_recycles_colabfold 3 \ + --use_amber \ + --colabfold_model_preset "AlphaFold2-ptm" \ + --use_gpu \ + -profile + ``` + + [!WARNING] + + > If you aim to carry out a large amount of predictions using the colabfold_webserver mode, please setup and use your own custom MMSeqs2 API Server. You can find instructions [here](https://github.com/sokrypton/ColabFold/tree/main/MsaServer). + +- The esmfold mode can be run using the command below: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode esmfold \ + --esmfold_model_preset \ + --esmfold_db \ + --num_recycles_esmfold 4 \ + --use_gpu \ + -profile + ``` + +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) and the [parameter documentation](https://nf-co.re/proteinfold/parameters). + +## Pipeline output + +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfold/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/proteinfold/output). + +## Credits + +nf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)), Luisa Santus ([@luisas](https://github.com/luisas)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/). + +Many thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)) and Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal)) + +We would also like to thanks to the AWS Open Data Sponsorship Program for generously providing the resources necessary to host the data utilized in the testing, development, and deployment of nf-core proteinfold. + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on the [Slack `#proteinfold` channel](https://nfcore.slack.com/channels/proteinfold) (you can join with [this invite](https://nf-co.re/join/slack)). + +## Citations + +If you use nf-core/proteinfold for your analysis, please cite it using the following doi: [10.5281/zenodo.7437038](https://doi.org/10.5281/zenodo.7437038) + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. + +You can cite the `nf-core` publication as follows: + +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/NO_FILE b/assets/NO_FILE new file mode 100644 index 00000000..e69de29b diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json index 03c90229..81ff5d4f 100644 --- a/assets/adaptivecard.json +++ b/assets/adaptivecard.json @@ -1,67 +1,67 @@ -{ - "type": "message", - "attachments": [ - { - "contentType": "application/vnd.microsoft.card.adaptive", - "contentUrl": null, - "content": { - "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", - "msteams": { - "width": "Full" - }, - "type": "AdaptiveCard", - "version": "1.2", - "body": [ - { - "type": "TextBlock", - "size": "Large", - "weight": "Bolder", - "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", - "text": "nf-core/proteinfold v${version} - ${runName}", - "wrap": true - }, - { - "type": "TextBlock", - "spacing": "None", - "text": "Completed at ${dateComplete} (duration: ${duration})", - "isSubtle": true, - "wrap": true - }, - { - "type": "TextBlock", - "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", - "wrap": true - }, - { - "type": "TextBlock", - "text": "The command used to launch the workflow was as follows:", - "wrap": true - }, - { - "type": "TextBlock", - "text": "${commandLine}", - "isSubtle": true, - "wrap": true - } - ], - "actions": [ - { - "type": "Action.ShowCard", - "title": "Pipeline Configuration", - "card": { - "type": "AdaptiveCard", - "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", - "body": [ - { - "type": "FactSet", - "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> - ] - } - ] - } - } - ] - } - } - ] -} +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/proteinfold v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/dummy_db b/assets/dummy_db index 8d4a2dcf..cba8b596 100644 --- a/assets/dummy_db +++ b/assets/dummy_db @@ -1 +1 @@ -# This file is empty just use for testing purposes +# This file is empty just use for testing purposes diff --git a/assets/email_template.html b/assets/email_template.html index 1323cf83..455151d3 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,53 +1,53 @@ - - - - - - - - nf-core/proteinfold Pipeline Report - - -
- - - -

nf-core/proteinfold ${version}

-

Run Name: $runName

- -<% if (!success){ - out << """ -
-

nf-core/proteinfold execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

-

The full error message was:

-
${errorReport}
-
- """ -} else { - out << """ -
- nf-core/proteinfold execution completed successfully! -
- """ -} -%> - -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
$commandLine
- -

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> "" }.join("\n") %> - -
$k
$v
- -

nf-core/proteinfold

-

https://github.com/nf-core/proteinfold

- -
- - - + + + + + + + + nf-core/proteinfold Pipeline Report + + +
+ + + +

nf-core/proteinfold ${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/proteinfold execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+ """ +} else { + out << """ +
+ nf-core/proteinfold execution completed successfully! +
+ """ +} +%> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ +

nf-core/proteinfold

+

https://github.com/nf-core/proteinfold

+ +
+ + + diff --git a/assets/email_template.txt b/assets/email_template.txt index f22fa3a0..cb29babe 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,39 +1,39 @@ ----------------------------------------------------- - ,--./,-. - ___ __ __ __ ___ /,-._.--~\\ - |\\ | |__ __ / ` / \\ |__) |__ } { - | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, - `._,._,' - nf-core/proteinfold ${version} ----------------------------------------------------- -Run Name: $runName - -<% if (success){ - out << "## nf-core/proteinfold execution completed successfully! ##" -} else { - out << """#################################################### -## nf-core/proteinfold execution completed unsuccessfully! ## -#################################################### -The exit status of the task that caused the workflow execution to fail was: $exitStatus. -The full error message was: - -${errorReport} -""" -} %> - - -The workflow was completed at $dateComplete (duration: $duration) - -The command used to launch the workflow was as follows: - - $commandLine - - - -Pipeline Configuration: ------------------------ -<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> - --- -nf-core/proteinfold -https://github.com/nf-core/proteinfold +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/proteinfold ${version} +---------------------------------------------------- +Run Name: $runName + +<% if (success){ + out << "## nf-core/proteinfold execution completed successfully! ##" +} else { + out << """#################################################### +## nf-core/proteinfold execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + +-- +nf-core/proteinfold +https://github.com/nf-core/proteinfold diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 8a911c26..3929fe41 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -1,29 +1,29 @@ -id: "nf-core-proteinfold-methods-description" -description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." -section_name: "nf-core/proteinfold Methods Description" -section_href: "https://github.com/nf-core/proteinfold" -plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object -data: | -

Methods

-

Data was processed using nf-core/proteinfold v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

-

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

-
${workflow.commandLine}
-

${tool_citations}

-

References

-
    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • -
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • -
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • - ${tool_bibliography} -
-
-
Notes:
-
    - ${nodoi_text} -
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • -
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • -
-
+id: "nf-core-proteinfold-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/proteinfold Methods Description" +section_href: "https://github.com/nf-core/proteinfold" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/proteinfold v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f6acb16a..8d4c388e 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,22 +1,22 @@ -report_comment: > - This report has been generated by the nf-core/proteinfold - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - "nf-core-proteinfold-methods-description": - order: -1000 - software_versions: - order: -1001 - "nf-core-proteinfold-summary": - order: -1002 - -export_plots: true - -# Run only these modules -run_modules: - - custom_content - - run_alphafold2 - - run_alphafold2_pred - - colabfold_batch - -disable_version_detection: true +report_comment: > + This report has been generated by the nf-core/proteinfold + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + "nf-core-proteinfold-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-proteinfold-summary": + order: -1002 + +export_plots: true + +# Run only these modules +run_modules: + - custom_content + - run_alphafold2 + - run_alphafold2_pred + - colabfold_batch + +disable_version_detection: true diff --git a/assets/proteinfold_template.html b/assets/proteinfold_template.html new file mode 100755 index 00000000..a10ab65a --- /dev/null +++ b/assets/proteinfold_template.html @@ -0,0 +1,908 @@ + + + + + + + Protein structure prediction + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + + + + +
+ +
+ + +
+
+
+
+
+
<50
+
70
+
90+
+
+
+
+ +
+

+ Alphafold produces a + + per-residue confidence score (pLDDT) + + between 0 and 100. Some regions below 50 pLDDT may be unstructured in isolation. +

+
+
+ + + + + + +
+ +
+ +
+ + +
+
+
Information
+ +
+
+
Program: *prog_name*
+
ID: *sample_name*
+
+
Average:
+
+ +
+
Navigation
+ + +
+
+ Scroll up/down + to zoom in and out +
+
+ Click + drag + to rotate the structure +
+
+ CTRL + click + drag + to move the structure +
+
+ Click + an atom to bring it into focus +
+
+
+
+ + +
+
+
Toggle representations
+
+ + + + +
+
+ +
+
+
+
Actions
+
+ + + +
+
+
+
Download
+ +
+ + +
+
+
+
+
+
+
+ +
+
+
+
+
+
+
+ + +
+
+
Sequence Coverage
+
+
+ +
+ +
+
+ +
+
pLDDT
+
+ +
+
+
+
+
+ + +
+ + +
+
+
+ + +
+
+

+ The Australian BioCommons + is supported by + Bioplatforms Australia +

+

+ Bioplatforms Australia + is enabled by + NCRIS +

+
+
+
+ + + + diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 467fdcf0..f79e08a7 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sequence,fasta -T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta -T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta +sequence,fasta +T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta +T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta diff --git a/assets/schema_input.json b/assets/schema_input.json index b16e3ae5..2a0d9991 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,26 +1,26 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/proteinfold/master/assets/schema_input.json", - "title": "nf-core/proteinfold pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "sequence": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sequence name must be provided and cannot contain spaces", - "meta": ["id"] - }, - "fasta": { - "type": "string", - "format": "file-path", - "exists": true, - "pattern": "^\\S+\\.fa(sta)?$", - "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'" - } - }, - "required": ["sequence", "fasta"] - } -} +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/proteinfold/master/assets/schema_input.json", + "title": "nf-core/proteinfold pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sequence": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sequence name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.fa(sta)?$", + "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'" + } + }, + "required": ["sequence", "fasta"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 6ad96c8f..93966621 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -1,53 +1,53 @@ -To: $email -Subject: $subject -Mime-Version: 1.0 -Content-Type: multipart/related;boundary="nfcoremimeboundary" - ---nfcoremimeboundary -Content-Type: text/html; charset=utf-8 - -$email_html - ---nfcoremimeboundary -Content-Type: image/png;name="nf-core-proteinfold_logo.png" -Content-Transfer-Encoding: base64 -Content-ID: -Content-Disposition: inline; filename="nf-core-proteinfold_logo_light.png" - -<% out << new File("$projectDir/assets/nf-core-proteinfold_logo_light.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> - -<% -if (mqcFile){ -def mqcFileObj = new File("$mqcFile") -if (mqcFileObj.length() < mqcMaxSize){ -out << """ ---nfcoremimeboundary -Content-Type: text/html; name=\"multiqc_report\" -Content-Transfer-Encoding: base64 -Content-ID: -Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" - -${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} -""" -}} -%> - ---nfcoremimeboundary-- +To: $email +Subject: $subject +Mime-Version: 1.0 +Content-Type: multipart/related;boundary="nfcoremimeboundary" + +--nfcoremimeboundary +Content-Type: text/html; charset=utf-8 + +$email_html + +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-proteinfold_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-proteinfold_logo_light.png" + +<% out << new File("$projectDir/assets/nf-core-proteinfold_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/assets/slackreport.json b/assets/slackreport.json index ac9960e2..db71c89f 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -1,34 +1,34 @@ -{ - "attachments": [ - { - "fallback": "Plain-text summary of the attachment.", - "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/proteinfold ${version} - ${runName}", - "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", - "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", - "fields": [ - { - "title": "Command used to launch the workflow", - "value": "```${commandLine}```", - "short": false - } - <% - if (!success) { %> - , - { - "title": "Full error message", - "value": "```${errorReport}```", - "short": false - }, - { - "title": "Pipeline configuration", - "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", - "short": false - } - <% } - %> - ], - "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" - } - ] -} +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/proteinfold ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/extract_output.py b/bin/extract_output.py new file mode 100755 index 00000000..a055180b --- /dev/null +++ b/bin/extract_output.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +import pickle +import os, sys +import argparse + +def read_pkl(id, pkl_files): + for pkl_file in pkl_files: + dict_data = pickle.load(open(pkl_file,'rb')) + #print(dict_data.keys()) + if pkl_file.endswith("features.pkl"): + with open (f"{id}_msa.tsv", "w") as out_f: + for val in dict_data['msa']: + out_f.write("\t".join([str(x) for x in val]) + "\n") + else: + model_id = os.path.basename(pkl_file).replace("result_model_", "").replace("_pred_0.pkl", "") + with open (f"{id}_lddt_{model_id}.tsv", "w") as out_f: + out_f.write("\t".join([str(x) for x in dict_data['plddt']]) + "\n") + + +parser = argparse.ArgumentParser() +parser.add_argument('--pkls',dest='pkls',required=True, nargs="+") +parser.add_argument('--name',dest='name') +parser.add_argument('--output_dir',dest='output_dir') +parser.set_defaults(output_dir='') +parser.set_defaults(name='') +args = parser.parse_args() + +read_pkl(args.name, args.pkls) diff --git a/bin/generat_plots.py b/bin/generat_plots.py new file mode 100755 index 00000000..26494c63 --- /dev/null +++ b/bin/generat_plots.py @@ -0,0 +1,389 @@ +#!/usr/bin/env python + +import os +from matplotlib import pyplot as plt +import argparse +from collections import OrderedDict +import base64 +import os +from collections import OrderedDict +import plotly.graph_objects as go +from plotly.subplots import make_subplots +import re +#from Bio import PDB + +def generate_output_images(msa_path, plddt_data, name, out_dir, in_type, generate_tsv): + msa = [] + if not msa_path.endswith("NO_FILE"): + with open(msa_path, 'r') as in_file: + for line in in_file: + msa.append([int(x) for x in line.strip().split()]) + + seqid = [] + for sequence in msa: + matches = [1.0 if first == other else 0.0 for first, other in zip(msa[0], sequence)] + seqid.append(sum(matches) / len(matches)) + + seqid_sort = sorted(range(len(seqid)), key=seqid.__getitem__) + + non_gaps = [] + for sequence in msa: + non_gaps.append([float(num != 21) if num != 21 else float('nan') for num in sequence]) + + sorted_non_gaps = [non_gaps[i] for i in seqid_sort] + final = [] + for sorted_seq, identity in zip(sorted_non_gaps, [seqid[i] for i in seqid_sort]): + final.append([value * identity if not isinstance(value, str) else value for value in sorted_seq]) + + # ################################################################## + plt.figure(figsize=(14, 14), dpi=100) + # ################################################################## + plt.title("Sequence coverage", fontsize=30, pad=36) + plt.imshow(final, + interpolation='nearest', aspect='auto', + cmap="rainbow_r", vmin=0, vmax=1, origin='lower') + + column_counts = [0] * len(msa[0]) + for col in range(len(msa[0])): + for row in msa: + if row[col] != 21: + column_counts[col] += 1 + + plt.plot(column_counts, color='black') + plt.xlim(-0.5, len(msa[0]) - 0.5) + plt.ylim(-0.5, len(msa) - 0.5) + + plt.tick_params(axis='both', which='both', labelsize=18) + + cbar = plt.colorbar() + cbar.set_label("Sequence identity to query", fontsize=24, labelpad=24) + cbar.ax.tick_params(labelsize=18) + plt.xlabel("Positions", fontsize=24, labelpad=24) + plt.ylabel("Sequences", fontsize=24, labelpad=36) + plt.savefig(f"{out_dir}/{name+('_' if name else '')}seq_coverage.png") + + # ################################################################## + + plddt_per_model = OrderedDict() + output_data = plddt_data + + if generate_tsv == "y": + for plddt_path in output_data: + with open(plddt_path, 'r') as in_file: + plddt_per_model[os.path.basename(plddt_path)[:-4]] = [float(x) for x in in_file.read().strip().split()] + else: + for i, plddt_values_str in enumerate(output_data): + plddt_per_model[i] = [] + plddt_per_model[i] = [float(x) for x in plddt_values_str.strip().split()] + + # plt.figure(figsize=(14, 14), dpi=100) + # plt.title("Predicted LDDT per position") + # for model_name, value_plddt in plddt_per_model.items(): + # plt.plot(value_plddt, label=model_name) + # plt.ylim(0, 100) + # plt.ylabel("Predicted LDDT") + # plt.xlabel("Positions") + # plt.savefig(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT.png") + + # # split into figures + # i = 0 + # for model_name, value_plddt in plddt_per_model.items(): + # plt.figure(figsize=(14, 14), dpi=100) + # plt.title("Predicted LDDT per position") + # plt.plot(value_plddt, label=model_name) + # plt.ylim(0, 100) + # plt.ylabel("Predicted LDDT") + # plt.xlabel("Positions") + # plt.savefig(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT_{i}.png") + # i += 1 + + fig = go.Figure() + for idx, (model_name, value_plddt) in enumerate(plddt_per_model.items()): + rank_label = f"Ranked {idx}" + fig.add_trace(go.Scatter( + x=list(range(len(value_plddt))), + y=value_plddt, + mode='lines', + name=rank_label, + text=[f"({i}, {value:.2f})" for i, value in enumerate(value_plddt)], + hoverinfo='text' + )) + fig.update_layout( + title=dict( + text='Predicted LDDT per position', + x=0.5, + xanchor='center' + ), + xaxis=dict( + title='Positions', + showline=True, + linecolor='black', + gridcolor='WhiteSmoke' + ), + yaxis=dict( + title='Predicted LDDT', + range=[0, 100], + minallowed=0, + maxallowed=100, + showline=True, + linecolor='black', + gridcolor='WhiteSmoke' + ), + legend=dict( + yanchor="bottom", + y=0, + xanchor="right", + x=1.3 + ), + plot_bgcolor='white', + width=600, + height=600, + modebar_remove=['toImage', 'zoomIn', 'zoomOut'] + ) + html_content = fig.to_html(full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True, 'displaylogo': False, 'scrollZoom': True}) + + with open(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT.html", "w") as out_file: + out_file.write(html_content) + + + ################################################################## + + + ################################################################## + """ + num_models = 5 # columns + num_runs_per_model = math.ceil(len(model_names)/num_models) + fig = plt.figure(figsize=(3 * num_models, 2 * num_runs_per_model), dpi=100) + for n, (model_name, value) in enumerate(pae_plddt_per_model.items()): + plt.subplot(num_runs_per_model, num_models, n + 1) + plt.title(model_name) + plt.imshow(value["pae"], label=model_name, cmap="bwr", vmin=0, vmax=30) + plt.colorbar() + fig.tight_layout() + plt.savefig(f"{out_dir}/{name+('_' if name else '')}PAE.png") + """ + ################################################################## + +def generate_plots(msa_path, plddt_paths, name, out_dir): + msa = [] + with open(msa_path, 'r') as in_file: + for line in in_file: + msa.append([int(x) for x in line.strip().split()]) + + seqid = [] + for sequence in msa: + matches = [1.0 if first == other else 0.0 for first, other in zip(msa[0], sequence)] + seqid.append(sum(matches) / len(matches)) + + seqid_sort = sorted(range(len(seqid)), key=seqid.__getitem__) + + non_gaps = [] + for sequence in msa: + non_gaps.append([float(num != 21) if num != 21 else float('nan') for num in sequence]) + + sorted_non_gaps = [non_gaps[i] for i in seqid_sort] + final = [] + for sorted_seq, identity in zip(sorted_non_gaps, [seqid[i] for i in seqid_sort]): + final.append([value * identity if not isinstance(value, str) else value for value in sorted_seq]) + + # Plotting Sequence Coverage using Plotly + fig = go.Figure() + fig.add_trace(go.Heatmap( + z=final, + colorscale="Rainbow", + zmin=0, + zmax=1, + )) + fig.update_layout( + title="Sequence coverage", + xaxis_title="Positions", + yaxis_title="Sequences" + ) + # Save as interactive HTML instead of an image + fig.savefig(f"{out_dir}/{name+('_' if name else '')}seq_coverage.png") + """ + #fig.to_html(full_html=False).write_html(f"{out_dir}/{name+('_' if name else '')}seq_coverage.html") + with open (f"{out_dir}/{name+('_' if name else '')}seq_coverage.html", "w") as out_plt: + out_plt.write(fig.to_html(full_html=False)) + """ + # Plotting Predicted LDDT per position using Plotly + plddt_per_model = OrderedDict() + plddt_paths.sort() + for plddt_path in plddt_paths: + with open(plddt_path, 'r') as in_file: + plddt_per_model[os.path.basename(plddt_path)[:-4]] = [float(x) for x in in_file.read().strip().split()] + + i = 0 + for model_name, value_plddt in plddt_per_model.items(): + fig = go.Figure() + fig.add_trace(go.Scatter( + x=list(range(len(value_plddt))), + y=value_plddt, + mode='lines', + name=model_name + )) + fig.update_layout(title="Predicted LDDT per Position") + fig.savefig(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT_{i}.png") + """ + with open (f"{out_dir}/{name+('_' if name else '')}coverage_LDDT_{i}.html", "w") as out_plt: + out_plt.write(fig.to_html(full_html=False).replace("\"", "\\\"")) + """ + i += 1 + +def align_structures(structures): + return structures + """ + parser = PDB.PDBParser(QUIET=True) + structures = [parser.get_structure(f'Structure_{i}', pdb) for i, pdb in enumerate(structures)] + + ref_structure = structures[0] + ref_atoms = [atom for atom in ref_structure.get_atoms()] + + super_imposer = PDB.Superimposer() + aligned_structures = [structures[0]] # Include the reference structure in the list + + for i, structure in enumerate(structures[1:], start=1): + target_atoms = [atom for atom in structure.get_atoms()] + + super_imposer.set_atoms(ref_atoms, target_atoms) + super_imposer.apply(structure.get_atoms()) + + aligned_structure = f'aligned_structure_{i}.pdb' + io = PDB.PDBIO() + io.set_structure(structure) + io.save(aligned_structure) + aligned_structures.append(aligned_structure) + + return aligned_structures + """ + +def pdb_to_lddt(pdb_files, generate_tsv): + pdb_files_sorted = pdb_files + pdb_files_sorted.sort() + + output_lddt = [] + averages = [] + + for pdb_file in pdb_files_sorted: + plddt_values = [] + seen_lines = set() + + with open(pdb_file, 'r') as infile: + for line in infile: + columns = line.split() + if len(columns) >= 11: + key = f"{columns[5]}\t{columns[10]}" + if key not in seen_lines: + seen_lines.add(key) + plddt_values.append(float(columns[10])) + + # Calculate the average PLDDT value for the current file + if plddt_values: + avg_plddt = sum(plddt_values) / len(plddt_values) + averages.append(avg_plddt) + else: + averages.append(0.0) + + if generate_tsv == "y": + output_file = f"{pdb_file.replace('.pdb', '')}_plddt.tsv" + with open(output_file, 'w') as outfile: + outfile.write(" ".join(map(str, plddt_values)) + "\n") + output_lddt.append(output_file) + else: + plddt_values_string = " ".join(map(str, plddt_values)) + output_lddt.append(plddt_values_string) + + return output_lddt, averages + +print("Starting...") +parser = argparse.ArgumentParser() +parser.add_argument('--type', dest='in_type') +parser.add_argument('--generate_tsv', choices=['y', 'n'], required=True, dest='generate_tsv') +parser.add_argument('--msa', dest='msa',required=True) +parser.add_argument('--pdb', dest='pdb',required=True, nargs="+") +parser.add_argument('--name', dest='name') +parser.add_argument('--output_dir',dest='output_dir') +parser.add_argument('--html_template',dest='html_template') +parser.set_defaults(output_dir='') +parser.set_defaults(in_type='ESM-FOLD') +parser.set_defaults(name='') +args = parser.parse_args() + +lddt_data, lddt_averages = pdb_to_lddt(args.pdb, args.generate_tsv) + +generate_output_images(args.msa, lddt_data, args.name, args.output_dir, args.in_type, args.generate_tsv) +#generate_plots(args.msa, args.plddt, args.name, args.output_dir) + +print("generating html report...") +structures = args.pdb +structures.sort() +aligned_structures = align_structures(structures) + +## Ziad uncomment this +""" +io = PDB.PDBIO() +ref_structure_path = 'aligned_structure_0.pdb' +io.set_structure(aligned_structures[0]) +io.save(ref_structure_path) +aligned_structures[0] = ref_structure_path +""" +alphafold_template = open(args.html_template, "r").read() +alphafold_template = alphafold_template.replace(f"*sample_name*", args.name) +alphafold_template = alphafold_template.replace(f"*prog_name*", args.in_type) + +args_pdb_array_js = ",\n".join([f'"{model}"' for model in structures]) +alphafold_template = re.sub( + r'const MODELS = \[.*?\];', # Match the existing MODELS array in HTML template + f'const MODELS = [\n {args_pdb_array_js}\n];', # Replace with the new array + alphafold_template, + flags=re.DOTALL, +) + +averages_js_array = f"const LDDT_AVERAGES = {lddt_averages};" +alphafold_template = alphafold_template.replace("const LDDT_AVERAGES = [];", averages_js_array) + +i = 0 +for structure in aligned_structures: + alphafold_template = alphafold_template.replace(f"*_data_ranked_{i}.pdb*", open(structure, "r").read().replace("\n", "\\n")) + i += 1 + +if True: + if not args.msa.endswith("NO_FILE"): + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.png", "rb") as in_file: + alphafold_template = alphafold_template.replace("seq_coverage.png", f"data:image/png;base64,{base64.b64encode(in_file.read()).decode('utf-8')}") + + # with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.html", "r") as in_file: + # seq_cov_html = in_file.read() + # alphafold_template = alphafold_template.replace("
", seq_cov_html) + + else: + pattern = r'
.*?(.*?)*?
\s*' + alphafold_template = re.sub(pattern, '', alphafold_template, flags=re.DOTALL) + + # alphafold_template = alphafold_template.replace("seq_coverage.png","") + + # for i in range(0, len(args.plddt)): + # with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.png", "rb") as in_file: + # alphafold_template = alphafold_template.replace(f"coverage_LDDT_{i}.png", f"data:image/png;base64,{base64.b64encode(in_file.read()).decode('utf-8')}") + + # for i in range(0, len(args.plddt)): + # with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.html", "r") as in_file: + # lddt_html = in_file.read() + # alphafold_template = alphafold_template.replace("
", lddt_html) + + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT.html", "r") as in_file: + lddt_html = in_file.read() + alphafold_template = alphafold_template.replace("
", lddt_html) + +""" +with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}seq_coverage.html", "r") as in_file: + alphafold_template = alphafold_template.replace(f"seq_coverage.png", f"{in_file.read()}") + +for i in range(0, 5): + with open(f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT_{i}.html", "r") as in_file: + alphafold_template = alphafold_template.replace(f"coverage_LDDT_{i}.png", f"{in_file.read()}") + +""" + +with open(f"{args.output_dir}/{args.name}_{args.in_type}_report.html", "w") as out_file: + out_file.write(alphafold_template) diff --git a/conf/base.config b/conf/base.config index f13f56b2..f1ad4660 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,65 +1,62 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold Nextflow base config file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - A 'blank slate' config file, appropriate for general use on most high performance - compute environments. Assumes that all software is installed and available on - the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. ----------------------------------------------------------------------------------------- -*/ - -process { - - // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - } - withLabel:error_ignore { - errorStrategy = 'ignore' - } - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 - } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/proteinfold Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ + +process { + + // TODO nf-core: Check the defaults for all processes + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } +} diff --git a/conf/dbs.config b/conf/dbs.config index d8a9c126..22201a02 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -1,58 +1,58 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for databases links -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines pointers to the DBS publicly available that store models parametrisations - and data. ----------------------------------------------------------------------------------------- -*/ -params { - - // AlphaFold2 links - bfd_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' - small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' - alphafold2_params_link = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar' - mgnify_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz' - pdb70_link = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' - pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ - pdb_obsolete_link = 'ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat' - uniref30_alphafold2_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz' - uniref90_link = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' - pdb_seqres_link = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' - uniprot_sprot_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' - uniprot_trembl_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - - // Alphafold paths - bfd_path = "${params.alphafold2_db}/bfd/*" - small_bfd_path = "${params.alphafold2_db}/small_bfd/*" - alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" - mgnify_path = "${params.alphafold2_db}/mgnify/*" - pdb70_path = "${params.alphafold2_db}/pdb70/**" - pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*" - uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" - uniref90_path = "${params.alphafold2_db}/uniref90/*" - pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" - uniprot_path = "${params.alphafold2_db}/uniprot/*" - - // Colabfold links - colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' - uniref30_colabfold_link = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz' - - // Colabfold paths - colabfold_db_path = "${params.colabfold_db}/colabfold_envdb_202108" - uniref30_colabfold_path = "${params.colabfold_db}/uniref30_2202" - colabfold_alphafold2_params_tags = [ - "alphafold2_multimer_v1" : "alphafold_params_colab_2021-10-27", - "alphafold2_multimer_v2" : "alphafold_params_colab_2022-03-02", - "alphafold2_multimer_v3" : "alphafold_params_colab_2022-12-06", - "alphafold2_ptm" : "alphafold_params_2021-07-14" - ] - - // Esmfold links - esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' - esm2_t36_3B_UR50D = 'https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt' - esm2_t36_3B_UR50D_contact_regression = 'https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt' - - // Esmfold paths - esmfold_params_path = "${params.esmfold_db}/*" -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for databases links +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines pointers to the DBS publicly available that store models parametrisations + and data. +---------------------------------------------------------------------------------------- +*/ +params { + + // AlphaFold2 links + bfd_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' + small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' + alphafold2_params_link = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar' + mgnify_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz' + pdb70_link = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' + pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ + pdb_obsolete_link = 'ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat' + uniref30_alphafold2_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz' + uniref90_link = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' + pdb_seqres_link = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' + uniprot_sprot_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' + uniprot_trembl_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' + + // Alphafold paths + bfd_path = "${params.alphafold2_db}/bfd/*" + small_bfd_path = "${params.alphafold2_db}/small_bfd/*" + alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" + mgnify_path = "${params.alphafold2_db}/mgnify/*" + pdb70_path = "${params.alphafold2_db}/pdb70/**" + pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*" + uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" + uniref90_path = "${params.alphafold2_db}/uniref90/*" + pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" + uniprot_path = "${params.alphafold2_db}/uniprot/*" + + // Colabfold links + colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' + uniref30_colabfold_link = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz' + + // Colabfold paths + colabfold_db_path = "${params.colabfold_db}/colabfold_envdb_202108" + uniref30_colabfold_path = "${params.colabfold_db}/uniref30_2202" + colabfold_alphafold2_params_tags = [ + "alphafold2_multimer_v1" : "alphafold_params_colab_2021-10-27", + "alphafold2_multimer_v2" : "alphafold_params_colab_2022-03-02", + "alphafold2_multimer_v3" : "alphafold_params_colab_2022-12-06", + "alphafold2_ptm" : "alphafold_params_2021-07-14" + ] + + // Esmfold links + esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' + esm2_t36_3B_UR50D = 'https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt' + esm2_t36_3B_UR50D_contact_regression = 'https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt' + + // Esmfold paths + esmfold_params_path = "${params.esmfold_db}/*" +} diff --git a/conf/gadi.config b/conf/gadi.config new file mode 100755 index 00000000..80bbd361 --- /dev/null +++ b/conf/gadi.config @@ -0,0 +1,58 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_alphafold2_split, --outdir + +---------------------------------------------------------------------------------------- +*/ + + +if (params.use_gpu) { singularity.runOptions = '--nv' } + +singularity.cacheDir = "/g/data/if89/singularity_cache/" +singularity.autoMounts = true + +params { + mode = 'alphafold2' + alphafold2_mode = 'split_msa_prediction' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + alphafold2_db = '/g/data/if89/proteinfold_dbs/alphafold2_dbs/' + use_dgxa100 = false + esmfold_params_path = '/g/data/if89/proteinfold_dbs/esmfold_dbs/checkpoints' + colabfold_db = "/g/data/if89/proteinfold_dbs/colabfold_dbs" + colabfold_server = 'local' + colabfold_model_preset = 'alphafold2_ptm' + +} + +process { + storage = "gdata/if89+scratch/${params.project}+gdata/${params.project}" + + if (params.use_gpu) { + withName: 'RUN_ALPHAFOLD2_PRED|RUN_ALPHAFOLD2|RUN_ESMFOLD' { + if (params.use_dgxa100){ + queue = "dgxa100" + cpus = 16 + }else{ + queue = "gpuvolta" + cpus = 12 + } + gpus = 1 + } + } + + withName: 'MMSEQS_COLABFOLDSEARCH' { + memory = 300.GB + time = 16.h + } + + withName: 'FOLDSEEK_EASYSEARCH' { + memory = 128.GB + time = 16.h + } + +} \ No newline at end of file diff --git a/conf/igenomes.config b/conf/igenomes.config new file mode 100644 index 00000000..8f5beea9 --- /dev/null +++ b/conf/igenomes.config @@ -0,0 +1,440 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ + +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } + } +} diff --git a/conf/modules.config b/conf/modules.config index 955501f3..80c72f08 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,62 +1,54 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -// -// General configuration options -// - -process { - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } -} - -// -// DBs and parameters preparation options -// - -process { - withName: 'UNTAR' { - ext.args2 = '--no-same-owner' - publishDir = [ - path: {"${params.outdir}/DBs/${params.mode}/${params.alphafold2_mode}"}, - mode: 'symlink', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: 'ARIA2' { - publishDir = [ - enabled: false - ] - } - - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// General configuration options +// + +process { + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] +} + +// +// DBs and parameters preparation options +// + +process { + withName: 'UNTAR' { + ext.args2 = '--no-same-owner' + publishDir = [ + path: {"${params.outdir}/DBs/${params.mode}/${params.alphafold2_mode}"}, + mode: 'symlink', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'ARIA2' { + publishDir = [ + enabled: false + ] + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config index 9a266160..0e8a4b87 100644 --- a/conf/modules_alphafold2.config +++ b/conf/modules_alphafold2.config @@ -1,66 +1,66 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -// -// General configuration options -// - -process { - withName: 'GUNZIP|COMBINE_UNIPROT|DOWNLOAD_PDBMMCIF' { - publishDir = [ - path: {"${params.outdir}/DBs/${params.mode}/${params.alphafold2_mode}"}, - mode: 'symlink', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } -} - -if (params.alphafold2_mode == 'standard') { - process { - withName: 'RUN_ALPHAFOLD2' { - if(params.use_gpu) { accelerator = 1 } - ext.args = [ - params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false', - params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - } -} - - -if (params.alphafold2_mode == 'split_msa_prediction') { - process { - withName: 'RUN_ALPHAFOLD2_MSA' { - ext.args = params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' - publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'RUN_ALPHAFOLD2_PRED' { - if(params.use_gpu) { accelerator = 1 } - ext.args = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false' - publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// General configuration options +// + +process { + withName: 'GUNZIP|COMBINE_UNIPROT|DOWNLOAD_PDBMMCIF|ARIA2_PDB_SEQRES' { + publishDir = [ + path: {"${params.outdir}/DBs/${params.mode}/${params.alphafold2_mode}"}, + mode: 'symlink', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} + +if (params.alphafold2_mode == 'standard') { + process { + withName: 'RUN_ALPHAFOLD2' { + if(params.use_gpu) { accelerator = 1 } + ext.args = [ + params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false', + params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + } +} + + +if (params.alphafold2_mode == 'split_msa_prediction') { + process { + withName: 'RUN_ALPHAFOLD2_MSA' { + ext.args = params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' + publishDir = [ + path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'RUN_ALPHAFOLD2_PRED' { + if(params.use_gpu) { accelerator = 1 } + ext.args = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false' + publishDir = [ + path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 3a6d2e23..3fc13664 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -1,66 +1,67 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -if (params.colabfold_server == 'webserver') { - process { - withName: 'COLABFOLD_BATCH' { - ext.args = [ - params.use_gpu ? '--use-gpu-relax' : '', - params.use_amber ? '--amber' : '', - params.use_templates ? '--templates' : '', - params.host_url ? "--host-url ${params.host_url}" : '' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.colabfold_server}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: '*.*' - ] - } - } -} - -if (params.colabfold_server == 'local') { - process { - withName: '.*:MMSEQS_.*' { - publishDir = [ - enabled: false - ] - } - withName: 'MMSEQS_CREATEINDEX' { - ext.args = '--remove-tmp-files 1' - publishDir = [ - enabled: false - ] - } - withName: 'MMSEQS_COLABFOLDSEARCH' { - ext.args = { params.db_load_mode ? "--db-load-mode ${params.db_load_mode}" : '' } - publishDir = [ - enabled: false - ] - } - withName: 'COLABFOLD_BATCH' { - if(params.use_gpu) { accelerator = 1 } - ext.args = [ - params.use_gpu ? '--use-gpu-relax' : '', - params.use_amber ? '--amber' : '', - params.use_templates ? '--templates' : '' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.mode}/${params.colabfold_server}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: '*.*' - ] - } - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +if (params.colabfold_server == 'webserver') { + process { + withName: 'COLABFOLD_BATCH' { + ext.args = [ + params.use_gpu ? '--use-gpu-relax' : '', + params.use_amber ? '--amber' : '', + params.use_templates ? '--templates' : '', + params.host_url ? "--host-url ${params.host_url}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.mode}/${params.colabfold_server}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ] + } + } +} + +if (params.colabfold_server == 'local') { + process { + withName: '.*:MMSEQS_.*' { + publishDir = [ + enabled: false + ] + } + withName: 'MMSEQS_CREATEINDEX' { + ext.args = '--remove-tmp-files 1' + ext.args2 = '*_seq.tsv' + publishDir = [ + enabled: false + ] + } + withName: 'MMSEQS_COLABFOLDSEARCH' { + ext.args = { params.db_load_mode ? "--db-load-mode ${params.db_load_mode}" : '' } + publishDir = [ + enabled: false + ] + } + withName: 'COLABFOLD_BATCH' { + if(params.use_gpu) { accelerator = 1 } + ext.args = [ + params.use_gpu ? '--use-gpu-relax' : '', + params.use_amber ? '--amber' : '', + params.use_templates ? '--templates' : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.mode}/${params.colabfold_server}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ] + } + } +} diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config index 81b3048f..be22a5ec 100644 --- a/conf/modules_esmfold.config +++ b/conf/modules_esmfold.config @@ -1,23 +1,23 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -process { - withName: 'RUN_ESMFOLD' { - ext.args = {params.use_gpu ? '' : '--cpu-only'} - publishDir = [ - path: { "${params.outdir}/${params.mode}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: '*.*' - ] - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: 'RUN_ESMFOLD' { + ext.args = {params.use_gpu ? '' : '--cpu-only'} + publishDir = [ + path: { "${params.outdir}/${params.mode}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ] + } +} diff --git a/conf/test.config b/conf/test.config index 4001a162..e71e652d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,35 +1,35 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -stubRun = true - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data to test alphafold2 analysis - mode = 'alphafold2' - alphafold2_mode = 'standard' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - alphafold2_db = "${projectDir}/assets/dummy_db_dir" -} - -process { - withName: 'RUN_ALPHAFOLD2' { - container = 'biocontainers/gawk:5.1.0' - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +//stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test alphafold2 analysis + mode = 'alphafold2' + alphafold2_mode = 'standard' + input = params.pipelines_testdata_base_path + '/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + alphafold2_db = "${projectDir}/assets/dummy_db_dir" +} + +process { + withName: 'RUN_ALPHAFOLD2' { + container = 'biocontainers/gawk:5.1.0' + } +} diff --git a/conf/test_alphafold_download.config b/conf/test_alphafold_download.config new file mode 100755 index 00000000..dce0cc02 --- /dev/null +++ b/conf/test_alphafold_download.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_alphafold2_download, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test alphafold2 analysis + mode = 'alphafold2' + alphafold2_mode = 'standard' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'ARIA2|UNTAR|RUN_ALPHAFOLD2' { + container = 'biocontainers/gawk:5.1.0' + } +} \ No newline at end of file diff --git a/conf/test_alphafold_split.config b/conf/test_alphafold_split.config index 1bc651f6..64667f8f 100644 --- a/conf/test_alphafold_split.config +++ b/conf/test_alphafold_split.config @@ -1,35 +1,35 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -stubRun = true - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data to test alphafold2 splitting MSA from prediction analysis - mode = 'alphafold2' - alphafold2_mode = 'split_msa_prediction' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - alphafold2_db = "${projectDir}/assets/dummy_db_dir" -} - -process { - withName: 'RUN_ALPHAFOLD2_MSA|RUN_ALPHAFOLD2_PRED' { - container = 'biocontainers/gawk:5.1.0' - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test alphafold2 splitting MSA from prediction analysis + mode = 'alphafold2' + alphafold2_mode = 'split_msa_prediction' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + alphafold2_db = "${projectDir}/assets/dummy_db_dir" +} + +process { + withName: 'RUN_ALPHAFOLD2_MSA|RUN_ALPHAFOLD2_PRED' { + container = 'biocontainers/gawk:5.1.0' + } +} diff --git a/conf/test_colabfold_download.config b/conf/test_colabfold_download.config new file mode 100755 index 00000000..96a664c1 --- /dev/null +++ b/conf/test_colabfold_download.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_colabfold_download, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test colabfold analysis + mode = 'colabfold' + colabfold_server = 'webserver' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'ARIA2|UNTAR|COLABFOLD_BATCH' { + container = 'biocontainers/gawk:5.1.0' + } +} \ No newline at end of file diff --git a/conf/test_colabfold_local.config b/conf/test_colabfold_local.config index 43c04b4c..d2558ba2 100644 --- a/conf/test_colabfold_local.config +++ b/conf/test_colabfold_local.config @@ -1,34 +1,34 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir ----------------------------------------------------------------------------------------- -*/ - -stubRun = true - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data to test colabfold with the colabfold webserver analysis - mode = 'colabfold' - colabfold_server = 'local' - colabfold_db = "${projectDir}/assets/dummy_db_dir" - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' -} - -process { - withName: 'MMSEQS_COLABFOLDSEARCH|COLABFOLD_BATCH' { - container = 'biocontainers/gawk:5.1.0' - } -} - +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_colabfold_local, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test colabfold with the colabfold webserver analysis + mode = 'colabfold' + colabfold_server = 'local' + colabfold_db = "${projectDir}/assets/dummy_db_dir" + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'MMSEQS_COLABFOLDSEARCH|COLABFOLD_BATCH' { + container = 'biocontainers/gawk:5.1.0' + } +} + diff --git a/conf/test_colabfold_webserver.config b/conf/test_colabfold_webserver.config index 99ebf182..d8424e23 100644 --- a/conf/test_colabfold_webserver.config +++ b/conf/test_colabfold_webserver.config @@ -1,33 +1,33 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir ----------------------------------------------------------------------------------------- -*/ - -stubRun = true - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data to test colabfold with a local server analysis - mode = 'colabfold' - colabfold_server = 'webserver' - colabfold_db = "${projectDir}/assets/dummy_db_dir" - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' -} - -process { - withName: 'COLABFOLD_BATCH' { - container = 'biocontainers/gawk:5.1.0' - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_colabfold_webserver, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test colabfold with a local server analysis + mode = 'colabfold' + colabfold_server = 'webserver' + colabfold_db = "${projectDir}/assets/dummy_db_dir" + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'COLABFOLD_BATCH' { + container = 'biocontainers/gawk:5.1.0' + } +} diff --git a/conf/test_esmfold.config b/conf/test_esmfold.config index 38202ac8..856f2090 100644 --- a/conf/test_esmfold.config +++ b/conf/test_esmfold.config @@ -1,33 +1,33 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir ----------------------------------------------------------------------------------------- -*/ - -stubRun = true - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data to test esmfold - mode = 'esmfold' - esmfold_db = "${projectDir}/assets/dummy_db_dir" - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' -} - -process { - withName: 'RUN_ESMFOLD' { - container = 'quay.io/biocontainers/gawk:5.1.0' - } -} - +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_esmfold, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test esmfold + mode = 'esmfold' + esmfold_db = "${projectDir}/assets/dummy_db_dir" + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'RUN_ESMFOLD' { + container = 'quay.io/biocontainers/gawk:5.1.0' + } +} + diff --git a/conf/test_full.config b/conf/test_full.config index 2c8a4fae..2fe08d0f 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,22 +1,22 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' - - // Input data for full test of alphafold standard mode - mode = 'alphafold2' - alphafold2_mode = 'standard' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full test of alphafold standard mode + mode = 'alphafold2' + alphafold2_mode = 'standard' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' +} diff --git a/conf/test_full_alphafold_multimer.config b/conf/test_full_alphafold_multimer.config index 2f8b0627..a9665b35 100644 --- a/conf/test_full_alphafold_multimer.config +++ b/conf/test_full_alphafold_multimer.config @@ -1,23 +1,23 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full_alphafold_multimer, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' - - // Input data for full test of alphafold standard mode - mode = 'alphafold2' - alphafold2_mode = 'standard' - alphafold2_model_preset = 'multimer' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' - alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_alphafold_multimer, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full test of alphafold standard mode + mode = 'alphafold2' + alphafold2_mode = 'standard' + alphafold2_model_preset = 'multimer' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' + alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' +} diff --git a/conf/test_full_alphafold_split.config b/conf/test_full_alphafold_split.config index 9cb378c2..9f2a202f 100644 --- a/conf/test_full_alphafold_split.config +++ b/conf/test_full_alphafold_split.config @@ -1,22 +1,22 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full_alphafold2_split, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' - - // Input data to test colabfold with a local server analysis - mode = 'alphafold2' - alphafold2_mode = 'split_msa_prediction' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_alphafold2_split, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data to test colabfold with a local server analysis + mode = 'alphafold2' + alphafold2_mode = 'split_msa_prediction' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' +} diff --git a/conf/test_full_colabfold_local.config b/conf/test_full_colabfold_local.config index 90f1c811..0f1db9eb 100644 --- a/conf/test_full_colabfold_local.config +++ b/conf/test_full_colabfold_local.config @@ -1,29 +1,29 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full_colabfold_local, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile for colabfold using colabfold server' - config_profile_description = 'Minimal test dataset to check pipeline function' - - - // Input data to test colabfold with a local server analysis - mode = 'colabfold' - colabfold_server = 'local' - colabfold_model_preset = 'alphafold2_ptm' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' -} -process { - withName:MMSEQS_COLABFOLDSEARCH { - memory = 16.GB - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_colabfold_local, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for colabfold using colabfold server' + config_profile_description = 'Minimal test dataset to check pipeline function' + + + // Input data to test colabfold with a local server analysis + mode = 'colabfold' + colabfold_server = 'local' + colabfold_model_preset = 'alphafold2_ptm' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' +} +process { + withName:MMSEQS_COLABFOLDSEARCH { + memory = 16.GB + } +} diff --git a/conf/test_full_colabfold_webserver.config b/conf/test_full_colabfold_webserver.config index a9db381a..8834d336 100644 --- a/conf/test_full_colabfold_webserver.config +++ b/conf/test_full_colabfold_webserver.config @@ -1,23 +1,23 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile for colabfold using colabfold server' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data for full test of colabfold with Colabfold server - mode = 'colabfold' - colabfold_server = 'webserver' - colabfold_model_preset = 'alphafold2_ptm' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for colabfold using colabfold server' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data for full test of colabfold with Colabfold server + mode = 'colabfold' + colabfold_server = 'webserver' + colabfold_model_preset = 'alphafold2_ptm' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' +} diff --git a/conf/test_full_colabfold_webserver_multimer.config b/conf/test_full_colabfold_webserver_multimer.config index 612a1221..9abdf616 100644 --- a/conf/test_full_colabfold_webserver_multimer.config +++ b/conf/test_full_colabfold_webserver_multimer.config @@ -1,23 +1,23 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile for colabfold using colabfold server' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data for full test of colabfold with Colabfold server - mode = 'colabfold' - colabfold_server = 'webserver' - colabfold_model_preset = 'alphafold2_multimer_v3' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' - colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for colabfold using colabfold server' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data for full test of colabfold with Colabfold server + mode = 'colabfold' + colabfold_server = 'webserver' + colabfold_model_preset = 'alphafold2_multimer_v3' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' +} diff --git a/conf/test_full_esmfold.config b/conf/test_full_esmfold.config index a3919070..ccf59905 100644 --- a/conf/test_full_esmfold.config +++ b/conf/test_full_esmfold.config @@ -1,22 +1,22 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile for esmfold monomer' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data for full test of esmfold monomer - mode = 'esmfold' - esmfold_model_preset = 'monomer' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - esmfold_db = 's3://proteinfold-dataset/test-data/db/esmfold' -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_esmfold, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for esmfold monomer' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data for full test of esmfold monomer + mode = 'esmfold' + esmfold_model_preset = 'monomer' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + //esmfold_db = 's3://proteinfold-dataset/test-data/db/esmfold' +} diff --git a/conf/test_full_esmfold_multimer.config b/conf/test_full_esmfold_multimer.config index 36445443..7832acd9 100644 --- a/conf/test_full_esmfold_multimer.config +++ b/conf/test_full_esmfold_multimer.config @@ -1,22 +1,22 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Full test profile for esmfold multimer' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data for full test of esmfold multimer - mode = 'esmfold' - esmfold_model_preset = 'multimer' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' - esmfold_db = 's3://proteinfold-dataset/test-data/db/esmfold' -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_esmfold_multimer, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for esmfold multimer' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data for full test of esmfold multimer + mode = 'esmfold' + esmfold_model_preset = 'multimer' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' + esmfold_db = 's3://proteinfold-dataset/test-data/db/esmfold' +} diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_msa b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_msa old mode 100644 new mode 100755 index af184d3a..75ad01ec --- a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_msa +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_msa @@ -1,58 +1,58 @@ -FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 - -LABEL authors="Luisa Santus, Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ - title="nfcore/proteinfold_alphafold2_msa" \ - Version="1.1.0" \ - description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2_MSA module using the nf-core/proteinfold pipeline" - -# Use bash to support string substitution. -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Add env variables -ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH" -ENV PATH="/conda/bin:$PATH" - -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ - build-essential \ - cmake \ - cuda-command-line-tools-11-1 \ - git \ - hmmer \ - kalign \ - tzdata \ - wget \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get autoremove -y \ - && apt-get clean - -# Clone AlphaFold2 -RUN git clone https://github.com/cbcrg/alphafold.git /app/alphafold && \ - cd /app/alphafold && \ - git checkout 1b3170e9409472ec8ad044f9935c92bedd7b4674 && \ - cd - - -# Compile HHsuite from source -RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ - && mkdir /tmp/hh-suite/build \ - && cd /tmp/hh-suite/build \ - && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ - && make -j 4 && make install \ - && ln -s /opt/hhsuite/bin/* /usr/bin \ - && cd - && rm -rf /tmp/hh-suite - -# Install Miniconda package manager -RUN wget -q -P /tmp \ - https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ - && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ - && rm /tmp/Miniconda3-latest-Linux-x86_64.sh - -# Install conda packages -RUN /conda/bin/conda install -y -c conda-forge \ - pip \ - python=3.10 \ - && conda clean --all --force-pkgs-dirs --yes - -# Install pip packages -RUN pip3 install --upgrade pip --no-cache-dir \ - && pip3 install -r /app/alphafold/requirements_msa.txt --no-cache-dir +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Luisa Santus, Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_alphafold2_msa" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2_MSA module using the nf-core/proteinfold pipeline" + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + build-essential \ + cmake \ + cuda-command-line-tools-11-1 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get autoremove -y \ + && apt-get clean + +# Clone AlphaFold2 +RUN git clone https://github.com/cbcrg/alphafold.git /app/alphafold && \ + cd /app/alphafold && \ + git checkout 1b3170e9409472ec8ad044f9935c92bedd7b4674 && \ + cd - + +# Compile HHsuite from source +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build \ + && cd /tmp/hh-suite/build \ + && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && cd - && rm -rf /tmp/hh-suite + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + +# Install conda packages +RUN /conda/bin/conda install -y -c conda-forge \ + pip \ + python=3.10 \ + && conda clean --all --force-pkgs-dirs --yes + +# Install pip packages +RUN pip3 install --upgrade pip --no-cache-dir \ + && pip3 install -r /app/alphafold/requirements_msa.txt --no-cache-dir diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_split b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_split old mode 100644 new mode 100755 index 2c7fd102..e078aa50 --- a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_split +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_split @@ -1,79 +1,79 @@ -FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 - -LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ - title="nfcore/proteinfold_alphafold2_split" \ - Version="1.1.0" \ - description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2_PRED module using the nf-core/proteinfold pipeline" - -# Use bash to support string substitution. -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Add env variables -ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH" -ENV PATH="/conda/bin:$PATH" - -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ - build-essential \ - cmake \ - cuda-command-line-tools-11-1 \ - git \ - hmmer \ - kalign \ - tzdata \ - wget \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get autoremove -y \ - && apt-get clean - -# Clone AlphaFold2 -RUN git clone https://github.com/cbcrg/alphafold.git /app/alphafold && \ - cd /app/alphafold && \ - git checkout 1b3170e9409472ec8ad044f9935c92bedd7b4674 && \ - cd - - -# Compile HHsuite from source -RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ - && mkdir /tmp/hh-suite/build \ - && cd /tmp/hh-suite/build \ - && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ - && make -j 4 && make install \ - && ln -s /opt/hhsuite/bin/* /usr/bin \ - && cd - && rm -rf /tmp/hh-suite - -# Install Miniconda package manager -RUN wget -q -P /tmp \ - https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ - && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ - && rm /tmp/Miniconda3-latest-Linux-x86_64.sh - -# Install conda packages -RUN /conda/bin/conda install -y -c conda-forge \ - openmm=7.7.0 \ - cudatoolkit==11.1.1 \ - pdbfixer \ - pip \ - python=3.10 \ - && conda clean --all --force-pkgs-dirs --yes - -RUN wget -q -P /app/alphafold/alphafold/common/ \ - https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt - -# Install pip packages. -RUN pip3 install --upgrade pip --no-cache-dir \ - && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \ - && pip3 install --upgrade --no-cache-dir \ - jax==0.3.25 \ - jaxlib==0.3.25+cuda11.cudnn805 \ - -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html - -RUN sed -i "s|alphafold/common/stereo_chemical_props.txt|/app/alphafold/alphafold/common/stereo_chemical_props.txt|g" /app/alphafold/alphafold/common/residue_constants.py - -# Add SETUID bit to the ldconfig binary so that non-root users can run it. -RUN chmod u+s /sbin/ldconfig.real - -# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk -# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for -# details. -RUN cd /app/alphafold -RUN ldconfig +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_alphafold2_split" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2_PRED module using the nf-core/proteinfold pipeline" + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + build-essential \ + cmake \ + cuda-command-line-tools-11-1 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get autoremove -y \ + && apt-get clean + +# Clone AlphaFold2 +RUN git clone https://github.com/cbcrg/alphafold.git /app/alphafold && \ + cd /app/alphafold && \ + git checkout 1b3170e9409472ec8ad044f9935c92bedd7b4674 && \ + cd - + +# Compile HHsuite from source +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build \ + && cd /tmp/hh-suite/build \ + && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && cd - && rm -rf /tmp/hh-suite + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + +# Install conda packages +RUN /conda/bin/conda install -y -c conda-forge \ + openmm=7.7.0 \ + cudatoolkit==11.1.1 \ + pdbfixer \ + pip \ + python=3.10 \ + && conda clean --all --force-pkgs-dirs --yes + +RUN wget -q -P /app/alphafold/alphafold/common/ \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + +# Install pip packages. +RUN pip3 install --upgrade pip --no-cache-dir \ + && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \ + && pip3 install --upgrade --no-cache-dir \ + jax==0.3.25 \ + jaxlib==0.3.25+cuda11.cudnn805 \ + -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + +RUN sed -i "s|alphafold/common/stereo_chemical_props.txt|/app/alphafold/alphafold/common/stereo_chemical_props.txt|g" /app/alphafold/alphafold/common/residue_constants.py + +# Add SETUID bit to the ldconfig binary so that non-root users can run it. +RUN chmod u+s /sbin/ldconfig.real + +# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk +# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for +# details. +RUN cd /app/alphafold +RUN ldconfig diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_standard b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_standard old mode 100644 new mode 100755 index e7b49f5a..4187eed4 --- a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_standard +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_standard @@ -1,79 +1,79 @@ -FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 - -LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ - title="nfcore/proteinfold_alphafold2_standard" \ - Version="1.1.0" \ - description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2 module using the nf-core/proteinfold pipeline" - -# Use bash to support string substitution. -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Add env variables -ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH" -ENV PATH="/conda/bin:$PATH" - -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ - build-essential \ - cmake \ - cuda-command-line-tools-11-4 \ - git \ - hmmer \ - kalign \ - tzdata \ - wget \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get autoremove -y \ - && apt-get clean - -# Clone AlphaFold2 -RUN git clone https://github.com/deepmind/alphafold.git /app/alphafold && \ - cd /app/alphafold && \ - git checkout 7c9114c8423ac9db981d8365168464bab09b3e54 && \ - cd - - -# Compile HHsuite from source -RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ - && mkdir /tmp/hh-suite/build \ - && cd /tmp/hh-suite/build \ - && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ - && make -j 4 && make install \ - && ln -s /opt/hhsuite/bin/* /usr/bin \ - && cd - && rm -rf /tmp/hh-suite - -# Install Miniconda package manager -RUN wget -q -P /tmp \ - https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ - && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ - && rm /tmp/Miniconda3-latest-Linux-x86_64.sh - -# Install conda packages -RUN /conda/bin/conda install -y -c conda-forge \ - openmm=7.7.0 \ - cudatoolkit==11.1.1 \ - pdbfixer \ - pip \ - python=3.10 \ - && conda clean --all --force-pkgs-dirs --yes - -RUN wget -q -P /app/alphafold/alphafold/common/ \ - https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt - -# Install pip packages. -RUN pip3 install --upgrade pip --no-cache-dir \ - && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \ - && pip3 install --upgrade --no-cache-dir \ - jax==0.3.25 \ - jaxlib==0.3.25+cuda11.cudnn805 \ - -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html - -RUN sed -i "s|alphafold/common/stereo_chemical_props.txt|/app/alphafold/alphafold/common/stereo_chemical_props.txt|g" /app/alphafold/alphafold/common/residue_constants.py - -# Add SETUID bit to the ldconfig binary so that non-root users can run it. -RUN chmod u+s /sbin/ldconfig.real - -# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk -# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for -# details. -RUN cd /app/alphafold -RUN ldconfig +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_alphafold2_standard" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2 module using the nf-core/proteinfold pipeline" + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + build-essential \ + cmake \ + cuda-command-line-tools-11-4 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get autoremove -y \ + && apt-get clean + +# Clone AlphaFold2 +RUN git clone https://github.com/deepmind/alphafold.git /app/alphafold && \ + cd /app/alphafold && \ + git checkout 7c9114c8423ac9db981d8365168464bab09b3e54 && \ + cd - + +# Compile HHsuite from source +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build \ + && cd /tmp/hh-suite/build \ + && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && cd - && rm -rf /tmp/hh-suite + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + +# Install conda packages +RUN /conda/bin/conda install -y -c conda-forge \ + openmm=7.7.0 \ + cudatoolkit==11.1.1 \ + pdbfixer \ + pip \ + python=3.10 \ + && conda clean --all --force-pkgs-dirs --yes + +RUN wget -q -P /app/alphafold/alphafold/common/ \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + +# Install pip packages. +RUN pip3 install --upgrade pip --no-cache-dir \ + && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \ + && pip3 install --upgrade --no-cache-dir \ + jax==0.3.25 \ + jaxlib==0.3.25+cuda11.cudnn805 \ + -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + +RUN sed -i "s|alphafold/common/stereo_chemical_props.txt|/app/alphafold/alphafold/common/stereo_chemical_props.txt|g" /app/alphafold/alphafold/common/residue_constants.py + +# Add SETUID bit to the ldconfig binary so that non-root users can run it. +RUN chmod u+s /sbin/ldconfig.real + +# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk +# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for +# details. +RUN cd /app/alphafold +RUN ldconfig diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_colabfold b/dockerfiles/Dockerfile_nfcore-proteinfold_colabfold old mode 100644 new mode 100755 index 6e639b50..5b5f9eb9 --- a/dockerfiles/Dockerfile_nfcore-proteinfold_colabfold +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_colabfold @@ -1,36 +1,37 @@ -FROM nvidia/cuda:11.4.2-cudnn8-runtime-ubuntu18.04 -LABEL authors="Athanasios Baltzis, Leila Mansouri" \ - title="nfcore/proteinfold_colabfold" \ - Version="1.1.0" \ - description="Docker image containing all software requirements to run the COLABFOLD_BATCH module using the nf-core/proteinfold pipeline" - -ENV PATH="/localcolabfold/colabfold-conda/bin:$PATH" -ENV LD_LIBRARY_PATH="/localcolabfold/colabfold-conda/lib:/usr/local/cuda/lib64" -ENV PYTHONPATH="/localcolabfold/colabfold-conda/lib" -ENV PATH="/MMseqs2/build/bin:$PATH" - -# Use bash to support string substitution. -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - build-essential \ - cuda-command-line-tools-11-4 \ - git \ - hmmer \ - kalign \ - tzdata \ - wget \ - curl \ - cmake \ - && rm -rf /var/lib/apt/lists/* - -RUN cd / \ - && wget https://raw.githubusercontent.com/YoshitakaMo/localcolabfold/fb0598ebbd8227096a052d3684a549ddf33d06bb/install_colabbatch_linux.sh \ - && sed -i "/colabfold.download/d" install_colabbatch_linux.sh \ - && sed -i "s|cudatoolkit==.*\sopenmm|cudatoolkit==11.1.1 openmm|g" install_colabbatch_linux.sh \ - && bash install_colabbatch_linux.sh - -RUN /localcolabfold/colabfold-conda/bin/python3.10 -m pip install tensorflow-cpu==2.11.0 - -#Silence download of the AlphaFold2 params -RUN sed -i "s|download_alphafold_params(|#download_alphafold_params(|g" /localcolabfold/colabfold-conda/lib/python3.10/site-packages/colabfold/batch.py - +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_colabfold" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the COLABFOLD_BATCH module using the nf-core/proteinfold pipeline" + +ENV PATH="/localcolabfold/colabfold-conda/bin:$PATH" +ENV LD_LIBRARY_PATH="/localcolabfold/colabfold-conda/lib:/usr/local/cuda/lib64" +ENV PYTHONPATH="/localcolabfold/colabfold-conda/lib" +ENV PATH="/MMseqs2/build/bin:$PATH" + +# Use bash to support string substitution. +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + cuda-command-line-tools-11-4 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + curl \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN cd / \ + && wget https://raw.githubusercontent.com/YoshitakaMo/localcolabfold/82a3635/install_colabbatch_linux.sh \ + && sed -i "/colabfold.download/d" install_colabbatch_linux.sh \ + && sed -i "s|cudatoolkit==.*\sopenmm|cudatoolkit==11.1.1 openmm|g" install_colabbatch_linux.sh \ + && bash install_colabbatch_linux.sh + +RUN /localcolabfold/colabfold-conda/bin/python3.10 -m pip install tensorflow-cpu==2.11.0 + +#Silence download of the AlphaFold2 params +RUN sed -i "s|download_alphafold_params(|#download_alphafold_params(|g" /localcolabfold/colabfold-conda/lib/python3.10/site-packages/colabfold/batch.py +RUN sed -i "s|if args\.num_models|#if args\.num_models|g" /localcolabfold/colabfold-conda/lib/python3.10/site-packages/colabfold/batch.py diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_esmfold b/dockerfiles/Dockerfile_nfcore-proteinfold_esmfold old mode 100644 new mode 100755 index af2cd993..eb6d3915 --- a/dockerfiles/Dockerfile_nfcore-proteinfold_esmfold +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_esmfold @@ -1,48 +1,48 @@ -FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 -LABEL authors="Athanasios Baltzis" \ - title="nfcore/proteinfold_esmfold" \ - Version="1.1.0" \ - description="Docker image containing all software requirements to run ESMFold using the nf-core/proteinfold pipeline" - -# Add env variables -ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.0/lib64:/conda/lib/python3.9/site-packages/nvidia/cusparse/lib:$LD_LIBRARY_PATH" -ENV PATH="/conda/bin:$PATH" - -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - build-essential \ - cuda-command-line-tools-11-0 \ - nvidia-cuda-dev \ - wget \ - git \ - && rm -rf /var/lib/apt/lists/* - -# Install Miniconda package manager -RUN wget -q -P /tmp \ - https://repo.anaconda.com/miniconda/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh \ - && bash /tmp/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh -b -p /conda \ - && rm /tmp/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh - -# Install ESMFold dependencies -RUN cd / && /conda/bin/conda update -qy conda \ - && /conda/bin/conda install -y -c conda-forge pip python -RUN /conda/bin/pip install --no-cache-dir git+https://github.com/facebookresearch/esm.git -RUN /conda/bin/pip install --no-cache-dir "fair-esm[esmfold]" -RUN /conda/bin/pip install --no-cache-dir \ - torch==1.13.1 \ - torchvision==0.14.1 \ - pytorch_lightning==1.5.10 \ - biopython==1.79 \ - deepspeed==0.5.9 \ - dm-tree==0.1.6 \ - ml-collections==0.1.0 \ - numpy==1.21.2 \ - PyYAML==5.4.1 \ - requests==2.26.0 \ - scipy==1.7.1 \ - tqdm==4.62.2 \ - typing-extensions==3.10.0.2 \ - wandb==0.12.21 -RUN /conda/bin/pip uninstall -y nvidia_cublas_cu11 -RUN /conda/bin/pip install --no-cache-dir 'dllogger @ git+https://github.com/NVIDIA/dllogger.git' -RUN /conda/bin/pip install --no-cache-dir 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307' +FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 +LABEL authors="Athanasios Baltzis" \ + title="nfcore/proteinfold_esmfold" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run ESMFold using the nf-core/proteinfold pipeline" + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.0/lib64:/conda/lib/python3.9/site-packages/nvidia/cusparse/lib:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + cuda-command-line-tools-11-0 \ + nvidia-cuda-dev \ + wget \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh + +# Install ESMFold dependencies +RUN cd / && /conda/bin/conda update -qy conda \ + && /conda/bin/conda install -y -c conda-forge pip python +RUN /conda/bin/pip install --no-cache-dir git+https://github.com/facebookresearch/esm.git +RUN /conda/bin/pip install --no-cache-dir "fair-esm[esmfold]" +RUN /conda/bin/pip install --no-cache-dir \ + torch==1.13.1 \ + torchvision==0.14.1 \ + pytorch_lightning==1.5.10 \ + biopython==1.79 \ + deepspeed==0.5.9 \ + dm-tree==0.1.6 \ + ml-collections==0.1.0 \ + numpy==1.21.2 \ + PyYAML==5.4.1 \ + requests==2.26.0 \ + scipy==1.7.1 \ + tqdm==4.62.2 \ + typing-extensions==3.10.0.2 \ + wandb==0.12.21 +RUN /conda/bin/pip uninstall -y nvidia_cublas_cu11 +RUN /conda/bin/pip install --no-cache-dir 'dllogger @ git+https://github.com/NVIDIA/dllogger.git' +RUN /conda/bin/pip install --no-cache-dir 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307' diff --git a/docs/README.md b/docs/README.md old mode 100644 new mode 100755 index b1d38ab0..34d96034 --- a/docs/README.md +++ b/docs/README.md @@ -1,10 +1,10 @@ -# nf-core/proteinfold: Documentation - -The nf-core/proteinfold documentation is split into the following pages: - -- [Usage](usage.md) - - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. -- [Output](output.md) - - An overview of the different results produced by the pipeline and how to interpret them. - -You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) +# nf-core/proteinfold: Documentation + +The nf-core/proteinfold documentation is split into the following pages: + +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/T1024_LmrP____408_residues__PAE_mqc.png b/docs/images/T1024_LmrP____408_residues__PAE_mqc.png old mode 100644 new mode 100755 diff --git a/docs/images/T1024_LmrP____408_residues__coverage_mqc.png b/docs/images/T1024_LmrP____408_residues__coverage_mqc.png old mode 100644 new mode 100755 diff --git a/docs/images/T1024_LmrP____408_residues__plddt_mqc.png b/docs/images/T1024_LmrP____408_residues__plddt_mqc.png old mode 100644 new mode 100755 diff --git a/docs/images/nf-core-proteinfold_logo_dark.png b/docs/images/nf-core-proteinfold_logo_dark.png old mode 100644 new mode 100755 diff --git a/docs/images/nf-core-proteinfold_logo_light.png b/docs/images/nf-core-proteinfold_logo_light.png old mode 100644 new mode 100755 diff --git a/docs/images/nf-core-proteinfold_metro_map.png b/docs/images/nf-core-proteinfold_metro_map.png old mode 100644 new mode 100755 diff --git a/docs/images/nf-core-proteinfold_metro_map.svg b/docs/images/nf-core-proteinfold_metro_map.svg old mode 100644 new mode 100755 index ff895d9e..3834799e --- a/docs/images/nf-core-proteinfold_metro_map.svg +++ b/docs/images/nf-core-proteinfold_metro_map.svg @@ -1,1267 +1,1267 @@ - - - - - - - - - - - - fasta - - - - - fasta - - - - - DB - - - - - DB - - - - - DB - - - - - DB - - - - - params - - - - - params - - - - - - csv - - - - - PDB - - - - Samplesheet - ColabFoldWebserver - Standard AlphaFold2 (AF2) - AlphaFold2 (AF2) Split - ColabFold Webserver - ColabFold Local - INPUT CHECK - LEGEND - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - AF2 MSA - AF2 PRED - AF2 - - - - - PREPARE AF2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PREPARE COLABFOLD - COLABFOLD - COLABFOLD MSA - COLABFOLDPRED - - - - - + + + + + + + + + + + + fasta + + + + + fasta + + + + + DB + + + + + DB + + + + + DB + + + + + DB + + + + + params + + + + + params + + + + + + csv + + + + + PDB + + + + Samplesheet + ColabFoldWebserver + Standard AlphaFold2 (AF2) + AlphaFold2 (AF2) Split + ColabFold Webserver + ColabFold Local + INPUT CHECK + LEGEND + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AF2 MSA + AF2 PRED + AF2 + + + + + PREPARE AF2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PREPARE COLABFOLD + COLABFOLD + COLABFOLD MSA + COLABFOLDPRED + + + + + diff --git a/docs/images/nf-core-proteinfold_metro_map_1.1.0.png b/docs/images/nf-core-proteinfold_metro_map_1.1.0.png old mode 100644 new mode 100755 diff --git a/docs/images/nf-core-proteinfold_metro_map_1.1.0.svg b/docs/images/nf-core-proteinfold_metro_map_1.1.0.svg old mode 100644 new mode 100755 index 372a70dc..634f46e1 --- a/docs/images/nf-core-proteinfold_metro_map_1.1.0.svg +++ b/docs/images/nf-core-proteinfold_metro_map_1.1.0.svg @@ -1,1640 +1,1640 @@ - - - - - - - - - - - - fasta - - - - - fasta - - - - DB - - - DB - - - - DB - - - - - DB - - - - - params - - - - - params - - - - params - - - - - csv - - - - - PDB - - - - Samplesheet - ColabFoldWebserver - Standard AlphaFold2 (AF2) - AlphaFold2 (AF2) Split - ColabFold Webserver - ColabFold Local - INPUT CHECK - v.1.1.0 - LEGEND - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ESMFold - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - AF2MSA - AF2 PRED - AF2 - - - - - PREPAREAF2 - - PREPAREESMFOLD - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PREPARE COLABFOLD - COLABFOLD - - ESMFOLD - COLABFOLDMSA - COLABFOLDPRED - - - - - + + + + + + + + + + + + fasta + + + + + fasta + + + + DB + + + DB + + + + DB + + + + + DB + + + + + params + + + + + params + + + + params + + + + + csv + + + + + PDB + + + + Samplesheet + ColabFoldWebserver + Standard AlphaFold2 (AF2) + AlphaFold2 (AF2) Split + ColabFold Webserver + ColabFold Local + INPUT CHECK + v.1.1.0 + LEGEND + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ESMFold + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AF2MSA + AF2 PRED + AF2 + + + + + PREPAREAF2 + + PREPAREESMFOLD + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PREPARE COLABFOLD + COLABFOLD + + ESMFOLD + COLABFOLDMSA + COLABFOLDPRED + + + + + diff --git a/docs/images/nf-core-proteinfold_metro_map_1.1.0_transp.png b/docs/images/nf-core-proteinfold_metro_map_1.1.0_transp.png old mode 100644 new mode 100755 diff --git a/docs/output.md b/docs/output.md old mode 100644 new mode 100755 index 29d2337c..947bd57e --- a/docs/output.md +++ b/docs/output.md @@ -1,211 +1,213 @@ -# nf-core/proteinfold: Output - -## Introduction - -This document describes the output produced by the pipeline. - -Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. - -## Pipeline overview - -The pipeline is built using [Nextflow](https://www.nextflow.io/) and predicts protein structures using the following methods: - -- [AlphaFold2](https://github.com/deepmind/alphafold) -- [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 (API server or local search) followed by ColabFold -- [ESMFold](https://github.com/facebookresearch/esm) - -See main [README.md](https://github.com/nf-core/proteinfold/blob/master/README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. - -The directories listed below will be created in the output directory after the pipeline has finished. All paths are relative to the top-level results directory. - -### AlphaFold2 - -
-Output files - -- `AlphaFold2/` - - `/` that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings - - `.alphafold.pdb` that is the structure with the highest pLDDT score (ranked first) - - `_plddt_mqc.tsv` that presents the pLDDT scores per residue for each of the 5 predicted models -- `DBs/` that contains symbolic links to the downloaded database and parameter files - -
- -Below you can find an indicative example of the TSV file with the pLDDT scores per residue for each of the 5 predicted models produced by AlphaFold2, which is included in the MultiQC report: - -| Positions | rank_0 | rank_1 | rank_2 | rank_3 | rank_4 | -| --------- | ------ | ------ | ------ | ------ | ------ | -| 1 | 66.17 | 60.61 | 60.32 | 64.20 | 65.31 | -| 2 | 78.01 | 74.20 | 73.11 | 77.36 | 78.46 | -| 3 | 82.16 | 78.16 | 76.70 | 80.20 | 80.68 | -| 4 | 86.03 | 82.78 | 81.88 | 82.19 | 83.93 | -| 5 | 88.08 | 84.38 | 84.73 | 85.58 | 87.70 | -| 6 | 89.37 | 86.06 | 86.31 | 86.84 | 88.52 | -| 7 | 91.27 | 88.27 | 88.09 | 87.01 | 88.67 | -| 8 | 91.28 | 89.42 | 90.17 | 87.47 | 90.07 | -| 9 | 93.10 | 90.09 | 92.86 | 90.70 | 93.41 | -| 10 | 93.23 | 91.42 | 93.07 | 90.13 | 92.91 | -| 11 | 94.12 | 92.44 | 93.00 | 89.90 | 92.97 | -| 12 | 95.15 | 93.63 | 94.25 | 92.66 | 94.38 | -| 13 | 95.09 | 93.75 | 94.36 | 92.54 | 94.95 | -| 14 | 94.08 | 92.72 | 93.43 | 90.31 | 93.63 | -| 15 | 94.34 | 93.77 | 93.31 | 91.72 | 93.57 | -| 16 | 95.56 | 94.62 | 94.46 | 93.55 | 95.20 | -| 17 | 95.54 | 94.75 | 94.65 | 93.61 | 95.37 | -| 18 | 93.91 | 93.89 | 93.30 | 91.33 | 92.95 | -| 19 | 95.48 | 95.78 | 94.48 | 93.95 | 95.05 | -| 20 | 95.96 | 95.46 | 95.14 | 94.01 | 95.83 | -| 21 | 94.06 | 94.06 | 93.13 | 91.69 | 93.54 | -| 22 | 92.98 | 93.28 | 91.14 | 88.80 | 91.25 | -| 23 | 95.28 | 95.13 | 93.39 | 91.48 | 93.56 | -| 24 | 93.41 | 93.38 | 92.32 | 89.85 | 92.40 | -| 25 | 90.88 | 91.40 | 88.60 | 85.67 | 87.65 | -| 26 | 89.30 | 88.90 | 84.58 | 83.11 | 84.52 | -| 27 | 91.96 | 90.95 | 89.04 | 86.42 | 87.77 | -| 28 | 91.20 | 90.68 | 88.71 | 86.43 | 87.62 | -| 29 | 88.01 | 87.53 | 85.83 | 83.11 | 84.95 | -| 30 | 81.29 | 83.72 | 77.75 | 75.76 | 74.84 | -| 31 | 87.14 | 86.92 | 82.10 | 82.32 | 78.74 | -| 32 | 92.34 | 90.13 | 89.04 | 88.31 | 86.49 | -| 33 | 91.70 | 88.94 | 85.52 | 85.94 | 81.75 | -| 34 | 90.11 | 88.23 | 84.33 | 85.47 | 80.01 | -| 35 | 93.35 | 91.49 | 90.60 | 89.40 | 87.10 | -| 36 | 94.15 | 92.47 | 90.17 | 90.48 | 86.77 | -| 37 | 93.40 | 92.01 | 86.38 | 87.84 | 80.11 | -| 38 | 92.79 | 89.97 | 89.31 | 88.55 | 85.15 | -| 39 | 94.66 | 91.29 | 92.74 | 90.67 | 90.30 | -| 40 | 95.98 | 93.58 | 94.30 | 91.69 | 90.73 | -| 41 | 94.94 | 92.57 | 88.31 | 88.40 | 80.33 | -| 42 | 92.89 | 91.03 | 84.03 | 85.31 | 74.66 | -| 43 | 94.54 | 93.44 | 86.50 | 84.91 | 76.68 | -| 44 | 96.93 | 95.23 | 92.42 | 91.98 | 86.11 | -| 45 | 94.40 | 92.27 | 87.40 | 89.02 | 79.44 | -| 46 | 91.74 | 90.94 | 81.35 | 84.88 | 74.93 | -| 47 | 96.19 | 94.46 | 90.51 | 89.82 | 84.51 | -| 48 | 94.84 | 93.04 | 91.02 | 91.57 | 87.72 | -| 49 | 91.23 | 89.34 | 86.10 | 87.63 | 82.12 | -| 50 | 91.64 | 89.58 | 84.93 | 85.88 | 79.38 | - -### ColabFold - -
-Output files - -- `colabfold/webserver/` or `colabfold/local/` based on the selected mode that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs and scores, prediction metadata, logs and section timings -- `DBs/` that contains symbolic links to the downloaded database and parameter files - -
- -Below you can find some indicative examples of the output images produced by ColabFold, which are included in the MultiQC report: - -#### Sequence coverage - -![Alt text](../docs/images/T1024_LmrP____408_residues__coverage_mqc.png?raw=true "T1024_coverage") - -#### predicted Local Distance Difference Test (pLDDT) - -![Alt text](../docs/images/T1024_LmrP____408_residues__plddt_mqc.png?raw=true "T1024_coverage") - -#### Predicted Aligned Error (PAE) - -![Alt text](../docs/images/T1024_LmrP____408_residues__PAE_mqc.png?raw=true "T1024_coverage") - -### ESMFold - -
-Output files - -- `esmfold/` - - `.pdb` that is the structure with the highest pLDDT score (ranked first) - - `_plddt_mqc.tsv` that presents the pLDDT scores per residue for each of the 5 predicted models -- `DBs/` that contains symbolic links to the downloaded database and parameter files - -
- -Below you can find an indicative example of the TSV file with the pLDDT scores per atom for predicted model produced by ESMFold, which is included in the MultiQC report: - -| Atom_serial_number | Atom_name | Residue_name | Residue_sequence_number | pLDDT | -| ------------------ | --------- | ------------ | ----------------------- | ----- | -| 1 | N | VAL | 1 | 44.77 | -| 2 | CA | VAL | 1 | 47.23 | -| 3 | C | VAL | 1 | 46.66 | -| 4 | CB | VAL | 1 | 41.88 | -| 5 | O | VAL | 1 | 45.75 | -| 6 | CG1 | VAL | 1 | 39.15 | -| 7 | CG2 | VAL | 1 | 39.59 | -| 8 | N | THR | 2 | 49.89 | -| 9 | CA | THR | 2 | 51.41 | -| 10 | C | THR | 2 | 50.21 | -| 11 | CB | THR | 2 | 43.84 | -| 12 | O | THR | 2 | 47.36 | -| 13 | CG2 | THR | 2 | 35.32 | -| 14 | OG1 | THR | 2 | 40.12 | -| 15 | N | VAL | 3 | 51.40 | -| 16 | CA | VAL | 3 | 54.38 | -| 17 | C | VAL | 3 | 52.10 | -| 18 | CB | VAL | 3 | 48.50 | -| 19 | O | VAL | 3 | 52.58 | -| 20 | CG1 | VAL | 3 | 38.75 | -| 21 | CG2 | VAL | 3 | 39.26 | -| 22 | N | ASP | 4 | 52.00 | -| 23 | CA | ASP | 4 | 53.92 | -| 24 | C | ASP | 4 | 52.33 | -| 25 | CB | ASP | 4 | 46.82 | -| 26 | O | ASP | 4 | 51.28 | -| 27 | CG | ASP | 4 | 42.89 | -| 28 | OD1 | ASP | 4 | 45.89 | -| 29 | OD2 | ASP | 4 | 53.61 | -| 30 | N | ASP | 5 | 56.10 | -| 31 | CA | ASP | 5 | 56.97 | -| 32 | C | ASP | 5 | 55.75 | -| 33 | CB | ASP | 5 | 50.34 | -| 34 | O | ASP | 5 | 54.18 | -| 35 | CG | ASP | 5 | 45.82 | -| 36 | OD1 | ASP | 5 | 50.03 | -| 37 | OD2 | ASP | 5 | 58.01 | -| 38 | N | LEU | 6 | 56.50 | -| 39 | CA | LEU | 6 | 58.34 | -| 40 | C | LEU | 6 | 55.81 | -| 41 | CB | LEU | 6 | 52.46 | -| 42 | O | LEU | 6 | 54.42 | -| 43 | CG | LEU | 6 | 49.17 | -| 44 | CD1 | LEU | 6 | 44.31 | -| 45 | CD2 | LEU | 6 | 47.07 | -| 46 | N | VAL | 7 | 57.23 | -| 47 | CA | VAL | 7 | 57.68 | -| 48 | C | VAL | 7 | 57.39 | -| 49 | CB | VAL | 7 | 52.74 | -| 50 | O | VAL | 7 | 56.46 | - -### MultiQC report - -
-Output files - -- `multiqc` - - multiqc_report.html: A standalone HTML file that can be viewed in your web browser. - - multiqc_data/: Directory containing parsed statistics from the different tools used in the pipeline. - - multiqc_plots/: Directory containing static images from the report in various formats. - -
- -[MultiQC](https://multiqc.info/docs/) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available within the report data directory. - -Results generated by MultiQC collate pipeline QC from AlphaFold2 or ColabFold. - -The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see http://multiqc.info. - -### Pipeline information - -
-Output files - -- `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - - Parameters used by the pipeline run: `params.json`. - -
- -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. +# nf-core/proteinfold: Output + +## Introduction + +This document describes the output produced by the pipeline. + +Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and predicts protein structures using the following methods: + +- [AlphaFold2](https://github.com/deepmind/alphafold) +- [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 (API server or local search) followed by ColabFold +- [ESMFold](https://github.com/facebookresearch/esm) + +See main [README.md](https://github.com/nf-core/proteinfold/blob/master/README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. + +The directories listed below will be created in the output directory after the pipeline has finished. All paths are relative to the top-level results directory. + +### AlphaFold2 + +
+Output files + +- `AlphaFold2/` + - `/` that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings + - `.alphafold.pdb` that is the structure with the highest pLDDT score (ranked first) + - `_plddt_mqc.tsv` that presents the pLDDT scores per residue for each of the 5 predicted models +- `DBs/` that contains symbolic links to the downloaded database and parameter files + +
+ +Below you can find an indicative example of the TSV file with the pLDDT scores per residue for each of the 5 predicted models produced by AlphaFold2, which is included in the MultiQC report: + +| Positions | rank_0 | rank_1 | rank_2 | rank_3 | rank_4 | +| --------- | ------ | ------ | ------ | ------ | ------ | +| 1 | 66.17 | 60.61 | 60.32 | 64.20 | 65.31 | +| 2 | 78.01 | 74.20 | 73.11 | 77.36 | 78.46 | +| 3 | 82.16 | 78.16 | 76.70 | 80.20 | 80.68 | +| 4 | 86.03 | 82.78 | 81.88 | 82.19 | 83.93 | +| 5 | 88.08 | 84.38 | 84.73 | 85.58 | 87.70 | +| 6 | 89.37 | 86.06 | 86.31 | 86.84 | 88.52 | +| 7 | 91.27 | 88.27 | 88.09 | 87.01 | 88.67 | +| 8 | 91.28 | 89.42 | 90.17 | 87.47 | 90.07 | +| 9 | 93.10 | 90.09 | 92.86 | 90.70 | 93.41 | +| 10 | 93.23 | 91.42 | 93.07 | 90.13 | 92.91 | +| 11 | 94.12 | 92.44 | 93.00 | 89.90 | 92.97 | +| 12 | 95.15 | 93.63 | 94.25 | 92.66 | 94.38 | +| 13 | 95.09 | 93.75 | 94.36 | 92.54 | 94.95 | +| 14 | 94.08 | 92.72 | 93.43 | 90.31 | 93.63 | +| 15 | 94.34 | 93.77 | 93.31 | 91.72 | 93.57 | +| 16 | 95.56 | 94.62 | 94.46 | 93.55 | 95.20 | +| 17 | 95.54 | 94.75 | 94.65 | 93.61 | 95.37 | +| 18 | 93.91 | 93.89 | 93.30 | 91.33 | 92.95 | +| 19 | 95.48 | 95.78 | 94.48 | 93.95 | 95.05 | +| 20 | 95.96 | 95.46 | 95.14 | 94.01 | 95.83 | +| 21 | 94.06 | 94.06 | 93.13 | 91.69 | 93.54 | +| 22 | 92.98 | 93.28 | 91.14 | 88.80 | 91.25 | +| 23 | 95.28 | 95.13 | 93.39 | 91.48 | 93.56 | +| 24 | 93.41 | 93.38 | 92.32 | 89.85 | 92.40 | +| 25 | 90.88 | 91.40 | 88.60 | 85.67 | 87.65 | +| 26 | 89.30 | 88.90 | 84.58 | 83.11 | 84.52 | +| 27 | 91.96 | 90.95 | 89.04 | 86.42 | 87.77 | +| 28 | 91.20 | 90.68 | 88.71 | 86.43 | 87.62 | +| 29 | 88.01 | 87.53 | 85.83 | 83.11 | 84.95 | +| 30 | 81.29 | 83.72 | 77.75 | 75.76 | 74.84 | +| 31 | 87.14 | 86.92 | 82.10 | 82.32 | 78.74 | +| 32 | 92.34 | 90.13 | 89.04 | 88.31 | 86.49 | +| 33 | 91.70 | 88.94 | 85.52 | 85.94 | 81.75 | +| 34 | 90.11 | 88.23 | 84.33 | 85.47 | 80.01 | +| 35 | 93.35 | 91.49 | 90.60 | 89.40 | 87.10 | +| 36 | 94.15 | 92.47 | 90.17 | 90.48 | 86.77 | +| 37 | 93.40 | 92.01 | 86.38 | 87.84 | 80.11 | +| 38 | 92.79 | 89.97 | 89.31 | 88.55 | 85.15 | +| 39 | 94.66 | 91.29 | 92.74 | 90.67 | 90.30 | +| 40 | 95.98 | 93.58 | 94.30 | 91.69 | 90.73 | +| 41 | 94.94 | 92.57 | 88.31 | 88.40 | 80.33 | +| 42 | 92.89 | 91.03 | 84.03 | 85.31 | 74.66 | +| 43 | 94.54 | 93.44 | 86.50 | 84.91 | 76.68 | +| 44 | 96.93 | 95.23 | 92.42 | 91.98 | 86.11 | +| 45 | 94.40 | 92.27 | 87.40 | 89.02 | 79.44 | +| 46 | 91.74 | 90.94 | 81.35 | 84.88 | 74.93 | +| 47 | 96.19 | 94.46 | 90.51 | 89.82 | 84.51 | +| 48 | 94.84 | 93.04 | 91.02 | 91.57 | 87.72 | +| 49 | 91.23 | 89.34 | 86.10 | 87.63 | 82.12 | +| 50 | 91.64 | 89.58 | 84.93 | 85.88 | 79.38 | + +### ColabFold + +
+Output files + +- `colabfold/webserver/` or `colabfold/local/` based on the selected mode that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs and scores, prediction metadata, logs and section timings +- `DBs/` that contains symbolic links to the downloaded database and parameter files + +
+ +Below you can find some indicative examples of the output images produced by ColabFold, which are included in the MultiQC report: + +#### Sequence coverage + +![Alt text](../docs/images/T1024_LmrP____408_residues__coverage_mqc.png?raw=true "T1024_coverage") + +#### predicted Local Distance Difference Test (pLDDT) + +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: + +#### Predicted Aligned Error (PAE) + +![Alt text](../docs/images/T1024_LmrP____408_residues__PAE_mqc.png?raw=true "T1024_coverage") + +### ESMFold + +
+Output files + +- `esmfold/` + - `.pdb` that is the structure with the highest pLDDT score (ranked first) + - `_plddt_mqc.tsv` that presents the pLDDT scores per residue for each of the 5 predicted models +- `DBs/` that contains symbolic links to the downloaded database and parameter files + +
+ +Below you can find an indicative example of the TSV file with the pLDDT scores per atom for predicted model produced by ESMFold, which is included in the MultiQC report: + +| Atom_serial_number | Atom_name | Residue_name | Residue_sequence_number | pLDDT | +| ------------------ | --------- | ------------ | ----------------------- | ----- | +| 1 | N | VAL | 1 | 44.77 | +| 2 | CA | VAL | 1 | 47.23 | +| 3 | C | VAL | 1 | 46.66 | +| 4 | CB | VAL | 1 | 41.88 | +| 5 | O | VAL | 1 | 45.75 | +| 6 | CG1 | VAL | 1 | 39.15 | +| 7 | CG2 | VAL | 1 | 39.59 | +| 8 | N | THR | 2 | 49.89 | +| 9 | CA | THR | 2 | 51.41 | +| 10 | C | THR | 2 | 50.21 | +| 11 | CB | THR | 2 | 43.84 | +| 12 | O | THR | 2 | 47.36 | +| 13 | CG2 | THR | 2 | 35.32 | +| 14 | OG1 | THR | 2 | 40.12 | +| 15 | N | VAL | 3 | 51.40 | +| 16 | CA | VAL | 3 | 54.38 | +| 17 | C | VAL | 3 | 52.10 | +| 18 | CB | VAL | 3 | 48.50 | +| 19 | O | VAL | 3 | 52.58 | +| 20 | CG1 | VAL | 3 | 38.75 | +| 21 | CG2 | VAL | 3 | 39.26 | +| 22 | N | ASP | 4 | 52.00 | +| 23 | CA | ASP | 4 | 53.92 | +| 24 | C | ASP | 4 | 52.33 | +| 25 | CB | ASP | 4 | 46.82 | +| 26 | O | ASP | 4 | 51.28 | +| 27 | CG | ASP | 4 | 42.89 | +| 28 | OD1 | ASP | 4 | 45.89 | +| 29 | OD2 | ASP | 4 | 53.61 | +| 30 | N | ASP | 5 | 56.10 | +| 31 | CA | ASP | 5 | 56.97 | +| 32 | C | ASP | 5 | 55.75 | +| 33 | CB | ASP | 5 | 50.34 | +| 34 | O | ASP | 5 | 54.18 | +| 35 | CG | ASP | 5 | 45.82 | +| 36 | OD1 | ASP | 5 | 50.03 | +| 37 | OD2 | ASP | 5 | 58.01 | +| 38 | N | LEU | 6 | 56.50 | +| 39 | CA | LEU | 6 | 58.34 | +| 40 | C | LEU | 6 | 55.81 | +| 41 | CB | LEU | 6 | 52.46 | +| 42 | O | LEU | 6 | 54.42 | +| 43 | CG | LEU | 6 | 49.17 | +| 44 | CD1 | LEU | 6 | 44.31 | +| 45 | CD2 | LEU | 6 | 47.07 | +| 46 | N | VAL | 7 | 57.23 | +| 47 | CA | VAL | 7 | 57.68 | +| 48 | C | VAL | 7 | 57.39 | +| 49 | CB | VAL | 7 | 52.74 | +| 50 | O | VAL | 7 | 56.46 | + +### MultiQC report + +
+Output files + +- `multiqc` + - multiqc_report.html: A standalone HTML file that can be viewed in your web browser. + - multiqc_data/: Directory containing parsed statistics from the different tools used in the pipeline. + - multiqc_plots/: Directory containing static images from the report in various formats. + +
+ +[MultiQC](https://multiqc.info/docs/) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available within the report data directory. + +Results generated by MultiQC collate pipeline QC from AlphaFold2 or ColabFold. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see http://multiqc.info. + +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md old mode 100644 new mode 100755 index 9544d587..3c473d6b --- a/docs/usage.md +++ b/docs/usage.md @@ -1,575 +1,592 @@ -# nf-core/proteinfold: Usage - -## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/proteinfold/usage](https://nf-co.re/proteinfold/usage) - -> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ - -## Introduction - - - -## Samplesheet input - -You will need to create a samplesheet with information about the sequences you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns, and a header row as shown in the examples below. - -```bash ---input '[path to samplesheet file]' -``` - -### Full samplesheet - -The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below. - -A final samplesheet file may look something like the one below. This is for 2 sequences. - -```csv title="samplesheet.csv" -sequence,fasta -T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta -T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta -``` - -| Column | Description | -| ---------- | --------------------------------------------------------------------------------------------------- | -| `sequence` | Custom sequence name. Spaces in sequence names are automatically converted to underscores (`_`). | -| `fasta` | Full path to fasta file for the provided sequence. File has to have the extension ".fasta" or "fa". | - -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. - -## Running the pipeline - -The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are as follows: - -```csv title="samplesheet.csv" -nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode alphafold2 \ - --alphafold2_db \ - --full_dbs \ - --alphafold2_model_preset monomer \ - --use_gpu \ - -profile -``` - -```console -nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode alphafold2 \ - --alphafold2_mode split_msa_prediction \ - --alphafold2_db \ - --full_dbs \ - --alphafold2_model_preset monomer \ - --use_gpu \ - -profile -``` - -If you specify the `--alphafold2_db ` parameter, the directory structure of your path should be like this: - -``` -├── mgnify -│   └── mgy_clusters_2018_12.fa -├── alphafold_params_2022-03-02 -│   ├── LICENSE -│   ├── params_model_1_multimer.npz -│   ├── params_model_1_multimer_v2.npz -│   ├── params_model_1.npz -│   ├── params_model_1_ptm.npz -│   ├── params_model_2_multimer.npz -│   ├── params_model_2_multimer_v2.npz -│   ├── params_model_2.npz -│   ├── params_model_2_ptm.npz -│   ├── params_model_3_multimer.npz -│   ├── params_model_3_multimer_v2.npz -│   ├── params_model_3.npz -│   ├── params_model_3_ptm.npz -│   ├── params_model_4_multimer.npz -│   ├── params_model_4_multimer_v2.npz -│   ├── params_model_4.npz -│   ├── params_model_4_ptm.npz -│   ├── params_model_5_multimer.npz -│   ├── params_model_5_multimer_v2.npz -│   ├── params_model_5.npz -│   └── params_model_5_ptm.npz -├── pdb70 -│   └── pdb70_from_mmcif_200916 -│   ├── md5sum -│   ├── pdb70_a3m.ffdata -│   ├── pdb70_a3m.ffindex -│   ├── pdb70_clu.tsv -│   ├── pdb70_cs219.ffdata -│   ├── pdb70_cs219.ffindex -│   ├── pdb70_hhm.ffdata -│   ├── pdb70_hhm.ffindex -│   └── pdb_filter.dat -├── pdb_mmcif -│   ├── mmcif_files -│   │   ├── 1g6g.cif -│   │   ├── 1go4.cif -│   │   ├── 1isn.cif -│   │   ├── 1kuu.cif -│   │   ├── 1m7s.cif -│   │   ├── 1mwq.cif -│   │   ├── 1ni5.cif -│   │   ├── 1qgd.cif -│   │   ├── 1tp9.cif -│   │   ├── 1wa9.cif -│   │   ├── 1ye5.cif -│   │   ├── 1yhl.cif -│   │   ├── 2bjd.cif -│   │   ├── 2bo9.cif -│   │   ├── 2e7t.cif -│   │   ├── 2fyg.cif -│   │   ├── 2j0q.cif -│   │   ├── 2jcq.cif -│   │   ├── 2m4k.cif -│   │   ├── 2n9o.cif -│   │   ├── 2nsx.cif -│   │   ├── 2w4u.cif -│   │   ├── 2wd6.cif -│   │   ├── 2wh5.cif -│   │   ├── 2wji.cif -│   │   ├── 2yu3.cif -│   │   ├── 3cw2.cif -│   │   ├── 3d45.cif -│   │   ├── 3gnz.cif -│   │   ├── 3j0a.cif -│   │   ├── 3jaj.cif -│   │   ├── 3mzo.cif -│   │   ├── 3nrn.cif -│   │   ├── 3piv.cif -│   │   ├── 3pof.cif -│   │   ├── 3pvd.cif -│   │   ├── 3q45.cif -│   │   ├── 3qh6.cif -│   │   ├── 3rg2.cif -│   │   ├── 3sxe.cif -│   │   ├── 3uai.cif -│   │   ├── 3uid.cif -│   │   ├── 3wae.cif -│   │   ├── 3wt1.cif -│   │   ├── 3wtr.cif -│   │   ├── 3wy2.cif -│   │   ├── 3zud.cif -│   │   ├── 4bix.cif -│   │   ├── 4bzx.cif -│   │   ├── 4c1n.cif -│   │   ├── 4cej.cif -│   │   ├── 4chm.cif -│   │   ├── 4fzo.cif -│   │   ├── 4i1f.cif -│   │   ├── 4ioa.cif -│   │   ├── 4j6o.cif -│   │   ├── 4m9q.cif -│   │   ├── 4mal.cif -│   │   ├── 4nhe.cif -│   │   ├── 4o2w.cif -│   │   ├── 4pzo.cif -│   │   ├── 4qlx.cif -│   │   ├── 4uex.cif -│   │   ├── 4zm4.cif -│   │   ├── 4zv1.cif -│   │   ├── 5aj4.cif -│   │   ├── 5frs.cif -│   │   ├── 5hwo.cif -│   │   ├── 5kbk.cif -│   │   ├── 5odq.cif -│   │   ├── 5u5t.cif -│   │   ├── 5wzq.cif -│   │   ├── 5x9z.cif -│   │   ├── 5xe5.cif -│   │   ├── 5ynv.cif -│   │   ├── 5yud.cif -│   │   ├── 5z5c.cif -│   │   ├── 5zb3.cif -│   │   ├── 5zlg.cif -│   │   ├── 6a6i.cif -│   │   ├── 6az3.cif -│   │   ├── 6ban.cif -│   │   ├── 6g1f.cif -│   │   ├── 6ix4.cif -│   │   ├── 6jwp.cif -│   │   ├── 6ng9.cif -│   │   ├── 6ojj.cif -│   │   ├── 6s0x.cif -│   │   ├── 6sg9.cif -│   │   ├── 6vi4.cif -│   │   └── 7sp5.cif -│   └── obsolete.dat -├── pdb_seqres -│   └── pdb_seqres.txt -├── small_bfd -│   └── bfd-first_non_consensus_sequences.fasta -├── uniclust30 -│   └── uniclust30_2018_08 -│   ├── uniclust30_2018_08_a3m_db -> uniclust30_2018_08_a3m.ffdata -│   ├── uniclust30_2018_08_a3m_db.index -│   ├── uniclust30_2018_08_a3m.ffdata -│   ├── uniclust30_2018_08_a3m.ffindex -│   ├── uniclust30_2018_08.cs219 -│   ├── uniclust30_2018_08_cs219.ffdata -│   ├── uniclust30_2018_08_cs219.ffindex -│   ├── uniclust30_2018_08.cs219.sizes -│   ├── uniclust30_2018_08_hhm_db -> uniclust30_2018_08_hhm.ffdata -│   ├── uniclust30_2018_08_hhm_db.index -│   ├── uniclust30_2018_08_hhm.ffdata -│   ├── uniclust30_2018_08_hhm.ffindex -│   └── uniclust30_2018_08_md5sum -├── uniprot -│   └── uniprot.fasta -└── uniref90 - └── uniref90.fasta -``` - -```console -nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode colabfold \ - --colabfold_server local \ - --colabfold_db \ - --num_recycle 3 \ - --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ - --use_gpu \ - --db_load_mode 0 - -profile -``` - -```console -nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode colabfold - --colabfold_server webserver \ - --host_url \ - --colabfold_db \ - --num_recycle 3 \ - --use_amber \ - --colabfold_model_preset "AlphaFold2-ptm" \ - --use_gpu \ - -profile -``` - -If you specify the `--colabfold_db ` parameter, the directory structure of your path should be like this: - -``` -├── colabfold_envdb_202108 -│   ├── colabfold_envdb_202108_db.0 -│   ├── colabfold_envdb_202108_db.1 -│   ├── colabfold_envdb_202108_db.10 -│   ├── colabfold_envdb_202108_db.11 -│   ├── colabfold_envdb_202108_db.12 -│   ├── colabfold_envdb_202108_db.13 -│   ├── colabfold_envdb_202108_db.14 -│   ├── colabfold_envdb_202108_db.15 -│   ├── colabfold_envdb_202108_db.2 -│   ├── colabfold_envdb_202108_db.3 -│   ├── colabfold_envdb_202108_db.4 -│   ├── colabfold_envdb_202108_db.5 -│   ├── colabfold_envdb_202108_db.6 -│   ├── colabfold_envdb_202108_db.7 -│   ├── colabfold_envdb_202108_db.8 -│   ├── colabfold_envdb_202108_db.9 -│   ├── colabfold_envdb_202108_db_aln.0 -│   ├── colabfold_envdb_202108_db_aln.1 -│   ├── colabfold_envdb_202108_db_aln.10 -│   ├── colabfold_envdb_202108_db_aln.11 -│   ├── colabfold_envdb_202108_db_aln.12 -│   ├── colabfold_envdb_202108_db_aln.13 -│   ├── colabfold_envdb_202108_db_aln.14 -│   ├── colabfold_envdb_202108_db_aln.15 -│   ├── colabfold_envdb_202108_db_aln.2 -│   ├── colabfold_envdb_202108_db_aln.3 -│   ├── colabfold_envdb_202108_db_aln.4 -│   ├── colabfold_envdb_202108_db_aln.5 -│   ├── colabfold_envdb_202108_db_aln.6 -│   ├── colabfold_envdb_202108_db_aln.7 -│   ├── colabfold_envdb_202108_db_aln.8 -│   ├── colabfold_envdb_202108_db_aln.9 -│   ├── colabfold_envdb_202108_db_aln.dbtype -│   ├── colabfold_envdb_202108_db_aln.index -│   ├── colabfold_envdb_202108_db.dbtype -│   ├── colabfold_envdb_202108_db_h -│   ├── colabfold_envdb_202108_db_h.dbtype -│   ├── colabfold_envdb_202108_db_h.index -│   ├── colabfold_envdb_202108_db.idx -│   ├── colabfold_envdb_202108_db.idx.dbtype -│   ├── colabfold_envdb_202108_db.idx.index -│   ├── colabfold_envdb_202108_db.index -│   ├── colabfold_envdb_202108_db_seq.0 -│   ├── colabfold_envdb_202108_db_seq.1 -│   ├── colabfold_envdb_202108_db_seq.10 -│   ├── colabfold_envdb_202108_db_seq.11 -│   ├── colabfold_envdb_202108_db_seq.12 -│   ├── colabfold_envdb_202108_db_seq.13 -│   ├── colabfold_envdb_202108_db_seq.14 -│   ├── colabfold_envdb_202108_db_seq.15 -│   ├── colabfold_envdb_202108_db_seq.2 -│   ├── colabfold_envdb_202108_db_seq.3 -│   ├── colabfold_envdb_202108_db_seq.4 -│   ├── colabfold_envdb_202108_db_seq.5 -│   ├── colabfold_envdb_202108_db_seq.6 -│   ├── colabfold_envdb_202108_db_seq.7 -│   ├── colabfold_envdb_202108_db_seq.8 -│   ├── colabfold_envdb_202108_db_seq.9 -│   ├── colabfold_envdb_202108_db_seq.dbtype -│   ├── colabfold_envdb_202108_db_seq_h -> colabfold_envdb_202108_db_h -│   ├── colabfold_envdb_202108_db_seq_h.dbtype -> colabfold_envdb_202108_db_h.dbtype -│   ├── colabfold_envdb_202108_db_seq_h.index -> colabfold_envdb_202108_db_h.index -│   ├── colabfold_envdb_202108_db_seq.index -├── params -│   ├── alphafold_params_2021-07-14 -│   │   ├── LICENSE -│   │   ├── params_model_1.npz -│   │   ├── params_model_1_ptm.npz -│   │   ├── params_model_2.npz -│   │   ├── params_model_2_ptm.npz -│   │   ├── params_model_3.npz -│   │   ├── params_model_3_ptm.npz -│   │   ├── params_model_4.npz -│   │   ├── params_model_4_ptm.npz -│   │   ├── params_model_5.npz -│   │   └── params_model_5_ptm.npz -│   └── alphafold_params_colab_2022-03-02 -│   ├── LICENSE -│   ├── params_model_1_multimer_v2.npz -│   ├── params_model_1.npz -│   ├── params_model_2_multimer_v2.npz -│   ├── params_model_2.npz -│   ├── params_model_2_ptm.npz -│   ├── params_model_3_multimer_v2.npz -│   ├── params_model_3.npz -│   ├── params_model_4_multimer_v2.npz -│   ├── params_model_4.npz -│   ├── params_model_5_multimer_v2.npz -│   └── params_model_5.npz -└── uniref30_2202 - ├── uniref30_2202_db.0 - ├── uniref30_2202_db.1 - ├── uniref30_2202_db.2 - ├── uniref30_2202_db.3 - ├── uniref30_2202_db.4 - ├── uniref30_2202_db.5 - ├── uniref30_2202_db.6 - ├── uniref30_2202_db.7 - ├── uniref30_2202_db_aln.0 - ├── uniref30_2202_db_aln.1 - ├── uniref30_2202_db_aln.2 - ├── uniref30_2202_db_aln.3 - ├── uniref30_2202_db_aln.4 - ├── uniref30_2202_db_aln.5 - ├── uniref30_2202_db_aln.6 - ├── uniref30_2202_db_aln.7 - ├── uniref30_2202_db_aln.dbtype - ├── uniref30_2202_db_aln.index - ├── uniref30_2202_db.dbtype - ├── uniref30_2202_db_h - ├── uniref30_2202_db_h.dbtype - ├── uniref30_2202_db_h.index - ├── uniref30_2202_db.idx - ├── uniref30_2202_db.idx.dbtype - ├── uniref30_2202_db.idx.index - ├── uniref30_2202_db.index - ├── uniref30_2202_db_seq.0 - ├── uniref30_2202_db_seq.1 - ├── uniref30_2202_db_seq.2 - ├── uniref30_2202_db_seq.3 - ├── uniref30_2202_db_seq.4 - ├── uniref30_2202_db_seq.5 - ├── uniref30_2202_db_seq.6 - ├── uniref30_2202_db_seq.7 - ├── uniref30_2202_db_seq.dbtype - ├── uniref30_2202_db_seq_h -> uniref30_2202_db_h - ├── uniref30_2202_db_seq_h.dbtype -> uniref30_2202_db_h.dbtype - ├── uniref30_2202_db_seq_h.index -> uniref30_2202_db_h.index - └── uniref30_2202_db_seq.index -``` - -```console -nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode esmfold - --esmfold_db \ - --num_recycles 4 \ - --esmfold_model_preset \ - --use_gpu \ - -profile -``` - -If you specify the `--esmfold_db ` parameter, the directory structure of your path should be like this: - -```console -└── checkpoints - ├── esm2_t36_3B_UR50D-contact-regression.pt - ├── esm2_t36_3B_UR50D.pt - └── esmfold_3B_v1.pt -``` - -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. - -Note that the pipeline will create the following files in your working directory: - -```bash -work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) -.nextflow_log # Log file from Nextflow -# Other nextflow hidden files, eg. history of pipeline runs and old logs. -``` - -If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. - -Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. - -:::warning -Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -::: - -The above pipeline run specified with a params file in yaml format: - -```bash -nextflow run nf-core/proteinfold -profile docker -params-file params.yaml -``` - -with `params.yaml` containing: - -```yaml -input: './samplesheet.csv' -outdir: './results/' -genome: 'GRCh37' -<...> -``` - -You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). - -### Updating the pipeline - -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: - -```bash -nextflow pull nf-core/proteinfold -``` - -### Reproducibility - -It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. - -First, go to the [nf-core/proteinfold releases page](https://github.com/nf-core/proteinfold/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. - -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. - -To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. - -:::tip -If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -::: - -## Core Nextflow arguments - -:::note -These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -::: - -### `-profile` - -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. - -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. - -:::info -We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -::: - -The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). - -Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! -They are loaded in sequence, so later profiles can overwrite earlier profiles. - -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. - -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters -- `docker` - - A generic configuration profile to be used with [Docker](https://docker.com/) -- `singularity` - - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -- `podman` - - A generic configuration profile to be used with [Podman](https://podman.io/) -- `shifter` - - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -- `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -- `apptainer` - - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) -- `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. - -### `-resume` - -Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). - -You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. - -### `-c` - -Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. - -## Custom configuration - -### Resource requests - -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. - -To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - -### Custom Containers - -In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - -To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - -### Custom Tool Arguments - -A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. - -To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. - -### nf-core/configs - -In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. - -See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. - -If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). - -## Use of shared file systems - -Given that the AlphaFold2 and the ColabFold modes (except for the ColabFold webserver option) rely on huge databases to infer the predictions, the execution of the pipeline is recommended to take place on shared file systems so as to avoid high latency caused during staging this data. For instance, if you work on AWS, you might consider using an Amazon FSx file system. - -## Azure Resource Requests - -To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. -We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. - -Note that the choice of VM size depends on your quota and the overall workload during the analysis. -For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). - -## Running in the background - -Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. - -The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. - -Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. -Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). - -## Nextflow memory requirements - -In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. -We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): - -```bash -NXF_OPTS='-Xms1g -Xmx4g' -``` +# nf-core/proteinfold: Usage + +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/proteinfold/usage](https://nf-co.re/proteinfold/usage) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ + +## Introduction + + + +## Samplesheet input + +You will need to create a samplesheet with information about the sequences you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns, and a header row as shown in the examples below. + +```bash +--input '[path to samplesheet file]' +``` + +### Full samplesheet + +A sample of the final samplesheet file for two sequences is shown below: + +```csv title="samplesheet.csv" +sequence,fasta +T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta +T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta +``` + +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below: + +| Column | Description | +| ---------- | --------------------------------------------------------------------------------------------------- | +| `sequence` | Custom sequence name. Spaces in sequence names are automatically converted to underscores (`_`). | +| `fasta` | Full path to fasta file for the provided sequence. File has to have the extension ".fasta" or "fa". | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + +## Running the pipeline + +The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below. + +AlphaFold2 regular can be run using this command: + +```bash +nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode alphafold2 \ + --alphafold2_db \ + --full_dbs \ + --alphafold2_model_preset monomer \ + --use_gpu \ + -profile +``` + +To run the AlphaFold2 that splits the MSA calculation from the model inference, you can use the `--alphafold2_mode split_msa_prediction` parameter, as shown below: + +```bash +nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode alphafold2 \ + --alphafold2_mode split_msa_prediction \ + --alphafold2_db \ + --full_dbs \ + --alphafold2_model_preset monomer \ + --use_gpu \ + -profile +``` + +To provide the predownloaded AlphaFold2 databases and parameters you can specify the `--alphafold2_db ` parameter and the directory structure of your path should be like this: + +
+Directory structure +```console +├── alphafold_params_2022-12-06 +│ ├── LICENSE +│ ├── params_model_1_multimer.npz +│ ├── params_model_1_multimer_v2.npz +│ ├── params_model_1_multimer_v3.npz +│ ├── params_model_1.npz +│ ├── params_model_1_ptm.npz +│ ├── params_model_2_multimer.npz +│ ├── params_model_2_multimer_v2.npz +│ ├── params_model_2_multimer_v3.npz +│ ├── params_model_2.npz +│ ├── params_model_2_ptm.npz +│ ├── params_model_3_multimer.npz +│ ├── params_model_3_multimer_v2.npz +│ ├── params_model_3_multimer_v3.npz +│ ├── params_model_3.npz +│ ├── params_model_3_ptm.npz +│ ├── params_model_4_multimer.npz +│ ├── params_model_4_multimer_v2.npz +│ ├── params_model_4_multimer_v3.npz +│ ├── params_model_4.npz +│ ├── params_model_4_ptm.npz +│ ├── params_model_5_multimer.npz +│ ├── params_model_5_multimer_v2.npz +│ ├── params_model_5_multimer_v3.npz +│ ├── params_model_5.npz +│ └── params_model_5_ptm.npz +├── mgnify +│ └── mgy_clusters_2022_05.fa +├── pdb70 +│ └── pdb70_from_mmcif_200916 +│ ├── md5sum +│ ├── pdb70_a3m.ffdata +│ ├── pdb70_a3m.ffindex +│ ├── pdb70_clu.tsv +│ ├── pdb70_cs219.ffdata +│ ├── pdb70_cs219.ffindex +│ ├── pdb70_hhm.ffdata +│ ├── pdb70_hhm.ffindex +│ └── pdb_filter.dat +├── pdb_mmcif +│ ├── mmcif_files +│ │ ├── 1g6g.cif +│ │ ├── 1go4.cif +│ │ ├── 1isn.cif +│ │ ├── 1kuu.cif +│ │ ├── 1m7s.cif +│ │ ├── 1mwq.cif +│ │ ├── 1ni5.cif +│ │ ├── 1qgd.cif +│ │ ├── 1tp9.cif +│ │ ├── 1wa9.cif +│ │ ├── 1ye5.cif +│ │ ├── 1yhl.cif +│ │ ├── 2bjd.cif +│ │ ├── 2bo9.cif +│ │ ├── 2e7t.cif +│ │ ├── 2fyg.cif +│ │ ├── 2j0q.cif +│ │ ├── 2jcq.cif +│ │ ├── 2m4k.cif +│ │ ├── 2n9o.cif +│ │ ├── 2nsx.cif +│ │ ├── 2w4u.cif +│ │ ├── 2wd6.cif +│ │ ├── 2wh5.cif +│ │ ├── 2wji.cif +│ │ ├── 2yu3.cif +│ │ ├── 3cw2.cif +│ │ ├── 3d45.cif +│ │ ├── 3gnz.cif +│ │ ├── 3j0a.cif +│ │ ├── 3jaj.cif +│ │ ├── 3mzo.cif +│ │ ├── 3nrn.cif +│ │ ├── 3piv.cif +│ │ ├── 3pof.cif +│ │ ├── 3pvd.cif +│ │ ├── 3q45.cif +│ │ ├── 3qh6.cif +│ │ ├── 3rg2.cif +│ │ ├── 3sxe.cif +│ │ ├── 3uai.cif +│ │ ├── 3uid.cif +│ │ ├── 3wae.cif +│ │ ├── 3wt1.cif +│ │ ├── 3wtr.cif +│ │ ├── 3wy2.cif +│ │ ├── 3zud.cif +│ │ ├── 4bix.cif +│ │ ├── 4bzx.cif +│ │ ├── 4c1n.cif +│ │ ├── 4cej.cif +│ │ ├── 4chm.cif +│ │ ├── 4fzo.cif +│ │ ├── 4i1f.cif +│ │ ├── 4ioa.cif +│ │ ├── 4j6o.cif +│ │ ├── 4m9q.cif +│ │ ├── 4mal.cif +│ │ ├── 4nhe.cif +│ │ ├── 4o2w.cif +│ │ ├── 4pzo.cif +│ │ ├── 4qlx.cif +│ │ ├── 4uex.cif +│ │ ├── 4zm4.cif +│ │ ├── 4zv1.cif +│ │ ├── 5aj4.cif +│ │ ├── 5frs.cif +│ │ ├── 5hwo.cif +│ │ ├── 5kbk.cif +│ │ ├── 5odq.cif +│ │ ├── 5u5t.cif +│ │ ├── 5wzq.cif +│ │ ├── 5x9z.cif +│ │ ├── 5xe5.cif +│ │ ├── 5ynv.cif +│ │ ├── 5yud.cif +│ │ ├── 5z5c.cif +│ │ ├── 5zb3.cif +│ │ ├── 5zlg.cif +│ │ ├── 6a6i.cif +│ │ ├── 6az3.cif +│ │ ├── 6ban.cif +│ │ ├── 6g1f.cif +│ │ ├── 6ix4.cif +│ │ ├── 6jwp.cif +│ │ ├── 6ng9.cif +│ │ ├── 6ojj.cif +│ │ ├── 6s0x.cif +│ │ ├── 6sg9.cif +│ │ ├── 6vi4.cif +│ │ └── 7sp5.cif +│ └── obsolete.dat +├── pdb_seqres +│ └── pdb_seqres.txt +├── small_bfd +│ └── bfd-first_non_consensus_sequences.fasta +├── uniprot +│ └── uniprot.fasta +├── uniref30 +│ ├── UniRef30_2021_03_a3m.ffdata +│ ├── UniRef30_2021_03_a3m.ffindex +│ ├── UniRef30_2021_03_cs219.ffdata +│ ├── UniRef30_2021_03_cs219.ffindex +| ├── UniRef30_2021_03_hhm.ffdata +│ └── UniRef30_2021_03_hhm.ffindex +└── uniref90 + └── uniref90.fasta +``` +
+ +Colabfold mode using use your own custom MMSeqs2 API server (`--colabfold_server local`) can be run using the following command: + +```bash +nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode colabfold \ + --colabfold_server local \ + --colabfold_db \ + --num_recycles_colabfold 3 \ + --use_amber \ + --colabfold_model_preset "AlphaFold2-ptm" \ + --use_gpu \ + --db_load_mode 0 \ + -profile +``` + +The command to run run Colabfold, using the Colabfold webserver is shown below: + +```bash +nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode colabfold + --colabfold_server webserver \ + --host_url \ + --colabfold_db \ + --num_recycles_colabfold 3 \ + --use_amber \ + --colabfold_model_preset "AlphaFold2-ptm" \ + --use_gpu \ + -profile +``` + +If you specify the `--colabfold_db ` parameter, the directory structure of your path should be like this: + +
+Directory structure +```console +├── colabfold_envdb_202108 +│ ├── colabfold_envdb_202108_db.0 +│ ├── colabfold_envdb_202108_db.1 +│ ├── colabfold_envdb_202108_db.10 +│ ├── colabfold_envdb_202108_db.11 +│ ├── colabfold_envdb_202108_db.12 +│ ├── colabfold_envdb_202108_db.13 +│ ├── colabfold_envdb_202108_db.14 +│ ├── colabfold_envdb_202108_db.15 +│ ├── colabfold_envdb_202108_db.2 +│ ├── colabfold_envdb_202108_db.3 +│ ├── colabfold_envdb_202108_db.4 +│ ├── colabfold_envdb_202108_db.5 +│ ├── colabfold_envdb_202108_db.6 +│ ├── colabfold_envdb_202108_db.7 +│ ├── colabfold_envdb_202108_db.8 +│ ├── colabfold_envdb_202108_db.9 +│ ├── colabfold_envdb_202108_db_aln.0 +│ ├── colabfold_envdb_202108_db_aln.1 +│ ├── colabfold_envdb_202108_db_aln.10 +│ ├── colabfold_envdb_202108_db_aln.11 +│ ├── colabfold_envdb_202108_db_aln.12 +│ ├── colabfold_envdb_202108_db_aln.13 +│ ├── colabfold_envdb_202108_db_aln.14 +│ ├── colabfold_envdb_202108_db_aln.15 +│ ├── colabfold_envdb_202108_db_aln.2 +│ ├── colabfold_envdb_202108_db_aln.3 +│ ├── colabfold_envdb_202108_db_aln.4 +│ ├── colabfold_envdb_202108_db_aln.5 +│ ├── colabfold_envdb_202108_db_aln.6 +│ ├── colabfold_envdb_202108_db_aln.7 +│ ├── colabfold_envdb_202108_db_aln.8 +│ ├── colabfold_envdb_202108_db_aln.9 +│ ├── colabfold_envdb_202108_db_aln.dbtype +│ ├── colabfold_envdb_202108_db_aln.index +│ ├── colabfold_envdb_202108_db.dbtype +│ ├── colabfold_envdb_202108_db_h +│ ├── colabfold_envdb_202108_db_h.dbtype +│ ├── colabfold_envdb_202108_db_h.index +│ ├── colabfold_envdb_202108_db.idx +│ ├── colabfold_envdb_202108_db.idx.dbtype +│ ├── colabfold_envdb_202108_db.idx.index +│ ├── colabfold_envdb_202108_db.index +│ ├── colabfold_envdb_202108_db_seq.0 +│ ├── colabfold_envdb_202108_db_seq.1 +│ ├── colabfold_envdb_202108_db_seq.10 +│ ├── colabfold_envdb_202108_db_seq.11 +│ ├── colabfold_envdb_202108_db_seq.12 +│ ├── colabfold_envdb_202108_db_seq.13 +│ ├── colabfold_envdb_202108_db_seq.14 +│ ├── colabfold_envdb_202108_db_seq.15 +│ ├── colabfold_envdb_202108_db_seq.2 +│ ├── colabfold_envdb_202108_db_seq.3 +│ ├── colabfold_envdb_202108_db_seq.4 +│ ├── colabfold_envdb_202108_db_seq.5 +│ ├── colabfold_envdb_202108_db_seq.6 +│ ├── colabfold_envdb_202108_db_seq.7 +│ ├── colabfold_envdb_202108_db_seq.8 +│ ├── colabfold_envdb_202108_db_seq.9 +│ ├── colabfold_envdb_202108_db_seq.dbtype +│ ├── colabfold_envdb_202108_db_seq_h -> colabfold_envdb_202108_db_h +│ ├── colabfold_envdb_202108_db_seq_h.dbtype -> colabfold_envdb_202108_db_h.dbtype +│ ├── colabfold_envdb_202108_db_seq_h.index -> colabfold_envdb_202108_db_h.index +│ ├── colabfold_envdb_202108_db_seq.index +├── params +│ ├── alphafold_params_2021-07-14 +│ │ ├── LICENSE +│ │ ├── params_model_1.npz +│ │ ├── params_model_1_ptm.npz +│ │ ├── params_model_2.npz +│ │ ├── params_model_2_ptm.npz +│ │ ├── params_model_3.npz +│ │ ├── params_model_3_ptm.npz +│ │ ├── params_model_4.npz +│ │ ├── params_model_4_ptm.npz +│ │ ├── params_model_5.npz +│ │ └── params_model_5_ptm.npz +│ └── alphafold_params_colab_2022-12-06 +│ ├── LICENSE +│ ├── params_model_1_multimer_v3.npz +│ ├── params_model_1.npz +│ ├── params_model_2_multimer_v3.npz +│ ├── params_model_2.npz +│ ├── params_model_2_ptm.npz +│ ├── params_model_3_multimer_v3.npz +│ ├── params_model_3.npz +│ ├── params_model_4_multimer_v3.npz +│ ├── params_model_4.npz +│ ├── params_model_5_multimer_v3.npz +│ └── params_model_5.npz +└── uniref30_2302 + ├── uniref30_2302_aln.tsv + ├── uniref30_2302_db.0 + ├── uniref30_2302_db.1 + ├── uniref30_2302_db.2 + ├── uniref30_2302_db.3 + ├── uniref30_2302_db.4 + ├── uniref30_2302_db.5 + ├── uniref30_2302_db.6 + ├── uniref30_2302_db.7 + ├── uniref30_2302_db_aln.0 + ├── uniref30_2302_db_aln.1 + ├── uniref30_2302_db_aln.2 + ├── uniref30_2302_db_aln.3 + ... + ├── uniref30_2302_db_aln.97 + ├── uniref30_2302_db_aln.98 + ├── uniref30_2302_db_aln.99 + ├── uniref30_2302_db_aln.dbtype + ├── uniref30_2302_db_aln.index + ├── uniref30_2302_db.dbtype + ├── uniref30_2302_db_h + ├── uniref30_2302_db_h.dbtype + ├── uniref30_2302_db_h.index + ├── uniref30_2302_db.idx + ├── uniref30_2302_db.idx.dbtype + ├── uniref30_2302_db.idx.index + ├── uniref30_2302_db.idx_mapping + ├── uniref30_2302_db.idx_taxonomy + ├── uniref30_2302_db.index + ├── uniref30_2302_db_mapping + ├── uniref30_2302_db_seq.0 + ├── uniref30_2302_db_seq.1 + ├── uniref30_2302_db_seq.2 + ├── uniref30_2302_db_seq.3 + ... + ├── uniref30_2302_db_seq.97 + ├── uniref30_2302_db_seq.98 + ├── uniref30_2302_db_seq.99 + ├── uniref30_2302_db_seq.dbtype + ├── uniref30_2302_db_seq_h -> uniref30_2302_db_h + ├── uniref30_2302_db_seq_h.dbtype -> uniref30_2302_db_h.dbtype + ├── uniref30_2302_db_seq_h.index -> uniref30_2302_db_h.index + └── uniref30_2302_db_seq.index +``` +
+ +```console +nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode esmfold + --esmfold_db \ + --num_recycles_esmfold 4 \ + --esmfold_model_preset \ + --use_gpu \ + -profile +``` + +If you specify the `--esmfold_db ` parameter, the directory structure of your path should be like this: + +```console +└── checkpoints + ├── esm2_t36_3B_UR50D-contact-regression.pt + ├── esm2_t36_3B_UR50D.pt + └── esmfold_3B_v1.pt +``` + +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/proteinfold -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull nf-core/proteinfold +``` + +### Reproducibility + +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [nf-core/proteinfold releases page](https://github.com/nf-core/proteinfold/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + +## Core Nextflow arguments + +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Use of shared file systems + +Given that the AlphaFold2 and the ColabFold modes (except for the ColabFold webserver option) rely on huge databases to infer the predictions, the execution of the pipeline is recommended to take place on shared file systems so as to avoid high latency caused during staging this data. For instance, if you work on AWS, you might consider using an Amazon FSx file system. + +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + +## Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/main.nf b/main.nf index 5b46e229..4e2093d7 100644 --- a/main.nf +++ b/main.nf @@ -1,229 +1,229 @@ -#!/usr/bin/env nextflow -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Github : https://github.com/nf-core/proteinfold - Website: https://nf-co.re/proteinfold - Slack : https://nfcore.slack.com/channels/proteinfold ----------------------------------------------------------------------------------------- -*/ - -nextflow.enable.dsl = 2 - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -if (params.mode == "alphafold2") { - include { PREPARE_ALPHAFOLD2_DBS } from './subworkflows/local/prepare_alphafold2_dbs' - include { ALPHAFOLD2 } from './workflows/alphafold2' -} else if (params.mode == "colabfold") { - include { PREPARE_COLABFOLD_DBS } from './subworkflows/local/prepare_colabfold_dbs' - include { COLABFOLD } from './workflows/colabfold' -} else if (params.mode == "esmfold") { - include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' - include { ESMFOLD } from './workflows/esmfold' -} - -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' -include { getColabfoldAlphafold2Params } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' -include { getColabfoldAlphafold2ParamsPath } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COLABFOLD PARAMETER VALUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -params.colabfold_alphafold2_params_link = getColabfoldAlphafold2Params() -params.colabfold_alphafold2_params_path = getColabfoldAlphafold2ParamsPath() - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// WORKFLOW: Run main analysis pipeline -// -workflow NFCORE_PROTEINFOLD { - - main: - ch_multiqc = Channel.empty() - ch_versions = Channel.empty() - - // - // WORKFLOW: Run alphafold2 - // - if(params.mode == "alphafold2") { - // - // SUBWORKFLOW: Prepare Alphafold2 DBs - // - PREPARE_ALPHAFOLD2_DBS ( - params.alphafold2_db, - params.full_dbs, - params.bfd_path, - params.small_bfd_path, - params.alphafold2_params_path, - params.mgnify_path, - params.pdb70_path, - params.pdb_mmcif_path, - params.uniref30_alphafold2_path, - params.uniref90_path, - params.pdb_seqres_path, - params.uniprot_path, - params.bfd_link, - params.small_bfd_link, - params.alphafold2_params_link, - params.mgnify_link, - params.pdb70_link, - params.pdb_mmcif_link, - params.pdb_obsolete_link, - params.uniref30_alphafold2_link, - params.uniref90_link, - params.pdb_seqres_link, - params.uniprot_sprot_link, - params.uniprot_trembl_link - ) - ch_versions = ch_versions.mix(PREPARE_ALPHAFOLD2_DBS.out.versions) - - // - // WORKFLOW: Run nf-core/alphafold2 workflow - // - ALPHAFOLD2 ( - ch_versions, - params.full_dbs, - params.alphafold2_mode, - params.alphafold2_model_preset, - PREPARE_ALPHAFOLD2_DBS.out.params.first(), - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]).first(), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]).first(), - PREPARE_ALPHAFOLD2_DBS.out.mgnify.first(), - PREPARE_ALPHAFOLD2_DBS.out.pdb70.first(), - PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif.first(), - PREPARE_ALPHAFOLD2_DBS.out.uniref30.first(), - PREPARE_ALPHAFOLD2_DBS.out.uniref90.first(), - PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres.first(), - PREPARE_ALPHAFOLD2_DBS.out.uniprot.first() - ) - ch_multiqc = ALPHAFOLD2.out.multiqc_report - ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) - } - - // - // WORKFLOW: Run colabfold - // - else if(params.mode == "colabfold") { - // - // SUBWORKFLOW: Prepare Colabfold DBs - // - PREPARE_COLABFOLD_DBS ( - params.colabfold_db, - params.colabfold_server, - params.colabfold_alphafold2_params_path, - params.colabfold_db_path, - params.uniref30_colabfold_path, - params.colabfold_alphafold2_params_link, - params.colabfold_db_link, - params.uniref30_colabfold_link, - params.create_colabfold_index - ) - ch_versions = ch_versions.mix(PREPARE_COLABFOLD_DBS.out.versions) - - // - // WORKFLOW: Run nf-core/colabfold workflow - // - COLABFOLD ( - ch_versions, - params.colabfold_model_preset, - PREPARE_COLABFOLD_DBS.out.params.first(), - PREPARE_COLABFOLD_DBS.out.colabfold_db.first(), - PREPARE_COLABFOLD_DBS.out.uniref30.first(), - params.num_recycle - ) - ch_multiqc = COLABFOLD.out.multiqc_report - ch_versions = ch_versions.mix(COLABFOLD.out.versions) - } - - // - // WORKFLOW: Run esmfold - // - else if(params.mode == "esmfold") { - // - // SUBWORKFLOW: Prepare esmfold DBs - // - PREPARE_ESMFOLD_DBS ( - params.esmfold_db, - params.esmfold_params_path, - params.esmfold_3B_v1, - params.esm2_t36_3B_UR50D, - params.esm2_t36_3B_UR50D_contact_regression - ) - ch_versions = ch_versions.mix(PREPARE_ESMFOLD_DBS.out.versions) - - // - // WORKFLOW: Run nf-core/esmfold workflow - // - ESMFOLD ( - ch_versions, - PREPARE_ESMFOLD_DBS.out.params, - params.num_recycle - ) - ch_multiqc = ESMFOLD.out.multiqc_report - ch_versions = ch_versions.mix(ESMFOLD.out.versions) - } - emit: - multiqc_report = ch_multiqc // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [version1, version2, ...] -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow { - - main: - // - // SUBWORKFLOW: Run initialisation tasks - // - PIPELINE_INITIALISATION ( - params.version, - params.help, - params.validate_params, - params.monochrome_logs, - args, - params.outdir - ) - - // - // WORKFLOW: Run main workflow - // - NFCORE_PROTEINFOLD () - - // - // SUBWORKFLOW: Run completion tasks - // - PIPELINE_COMPLETION ( - params.email, - params.email_on_fail, - params.plaintext_email, - params.outdir, - params.monochrome_logs, - params.hook_url, - NFCORE_PROTEINFOLD.out.multiqc_report - ) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +#!/usr/bin/env nextflow +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/proteinfold +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/proteinfold + Website: https://nf-co.re/proteinfold + Slack : https://nfcore.slack.com/channels/proteinfold +---------------------------------------------------------------------------------------- +*/ + +nextflow.enable.dsl = 2 + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { PREPARE_ALPHAFOLD2_DBS } from './subworkflows/local/prepare_alphafold2_dbs' +include { ALPHAFOLD2 } from './workflows/alphafold2' +include { PREPARE_COLABFOLD_DBS } from './subworkflows/local/prepare_colabfold_dbs' +include { COLABFOLD } from './workflows/colabfold' +include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' +include { ESMFOLD } from './workflows/esmfold' + +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { getColabfoldAlphafold2Params } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { getColabfoldAlphafold2ParamsPath } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' + +include { FOLDSEEK_EASYSEARCH } from './modules/nf-core/foldseek/easysearch' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COLABFOLD PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +params.colabfold_alphafold2_params_link = getColabfoldAlphafold2Params() +params.colabfold_alphafold2_params_path = getColabfoldAlphafold2ParamsPath() + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// WORKFLOW: Run main analysis pipeline +// +workflow NFCORE_PROTEINFOLD { + + main: + ch_multiqc = Channel.empty() + ch_versions = Channel.empty() + ch_pred_pdb = Channel.empty() + // + // WORKFLOW: Run alphafold2 + // + if(params.mode.toLowerCase().split(",").contains("alphafold2")) { + // + // SUBWORKFLOW: Prepare Alphafold2 DBs + // + // WORKFLOW: Run nf-core/alphafold2 workflow + // + + ch_params = Channel.fromPath( params.alphafold2_params_path ) + ch_mgnify = Channel.fromPath( params.mgnify_path ) + ch_pdb70 = Channel.fromPath( params.pdb70_path, type: 'dir' ) + ch_mmcif_files = Channel.fromPath( params.pdb_mmcif_path, type: 'dir' ) + ch_mmcif_obsolete = Channel.fromPath( params.pdb_mmcif_path, type: 'file' ) + ch_mmcif = ch_mmcif_files.mix(ch_mmcif_obsolete) + ch_uniref30 = Channel.fromPath( params.uniref30_alphafold2_path, type: 'any' ) + ch_uniref90 = Channel.fromPath( params.uniref90_path ) + ch_pdb_seqres = Channel.fromPath( params.pdb_seqres_path ) + ch_uniprot = Channel.fromPath( params.uniprot_path ) + ch_small_bfd = Channel.fromPath( params.small_bfd_path) + ch_bfd = Channel.fromPath( params.bfd_path) + + + + + ALPHAFOLD2 ( + params.full_dbs, + params.alphafold2_mode, + params.alphafold2_model_preset, + ch_params.toList(), + ch_bfd.ifEmpty([]).first(), + ch_small_bfd.ifEmpty([]).first(), + ch_mgnify.first(), + ch_pdb70.first(), + ch_mmcif.toList(), + ch_uniref30.toList(), + ch_uniref90.first(), + ch_pdb_seqres.first(), + ch_uniprot.first() + ) + ch_pred_pdb = ch_pred_pdb.mix(ALPHAFOLD2.out.pdb) + ch_multiqc = ALPHAFOLD2.out.multiqc_report + ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) + } + + // + // WORKFLOW: Run colabfold + // + if(params.mode.toLowerCase().split(",").contains("colabfold")) { + // + // SUBWORKFLOW: Prepare Colabfold DBs + // + PREPARE_COLABFOLD_DBS ( + params.colabfold_db, + params.colabfold_server, + params.colabfold_alphafold2_params_path, + params.colabfold_db_path, + params.uniref30_colabfold_path, + params.colabfold_alphafold2_params_link, + params.colabfold_db_link, + params.uniref30_colabfold_link, + params.create_colabfold_index + ) + ch_versions = ch_versions.mix(PREPARE_COLABFOLD_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/colabfold workflow + // + COLABFOLD ( + ch_versions, + params.colabfold_model_preset, + PREPARE_COLABFOLD_DBS.out.params.first(), + PREPARE_COLABFOLD_DBS.out.colabfold_db.first(), + PREPARE_COLABFOLD_DBS.out.uniref30.first(), + params.num_recycles_colabfold + ) + ch_pred_pdb = ch_pred_pdb.mix(Channel.empty()) + ch_multiqc = COLABFOLD.out.multiqc_report + ch_versions = ch_versions.mix(COLABFOLD.out.versions) + } + + // + // WORKFLOW: Run esmfold + // + if(params.mode.toLowerCase().split(",").contains("esmfold")) { + // + // SUBWORKFLOW: Prepare esmfold DBs + // + /*PREPARE_ESMFOLD_DBS ( + params.esmfold_db, + params.esmfold_params_path, + params.esmfold_3B_v1, + params.esm2_t36_3B_UR50D, + params.esm2_t36_3B_UR50D_contact_regression + )*/ + + //ch_versions = ch_versions.mix(PREPARE_ESMFOLD_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/esmfold workflow + // + Channel.fromPath(params.esmfold_params_path).view() + ESMFOLD ( + ch_versions, + Channel.fromPath(params.esmfold_params_path), + params.num_recycles_esmfold + ) + + ch_pred_pdb = ch_pred_pdb.mix(ESMFOLD.out.pdb) + ch_multiqc = ESMFOLD.out.multiqc_report + ch_versions = ch_versions.mix(ESMFOLD.out.versions) + } + + if (params.foldseek_search == "easysearch"){ + ch_foldseek_db = channel.value([["id": params.foldseek_db], file(params.foldseek_db_path, checkIfExists: true)]) + + FOLDSEEK_EASYSEARCH( + ch_pred_pdb, + ch_foldseek_db + ) + } + emit: + multiqc_report = ch_multiqc // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [version1, version2, ...] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow { + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_PROTEINFOLD () + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_PROTEINFOLD.out.multiqc_report + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json index 3e924e45..f7ed277a 100644 --- a/modules.json +++ b/modules.json @@ -1,62 +1,67 @@ -{ - "name": "nf-core/proteinfold", - "homePage": "https://github.com/nf-core/proteinfold", - "repos": { - "https://github.com/nf-core/modules.git": { - "modules": { - "nf-core": { - "aria2": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "gunzip": { - "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] - }, - "mmseqs/createindex": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "mmseqs/tsv2exprofiledb": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "multiqc": { - "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] - }, - "untar": { - "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"], - "patch": "modules/nf-core/untar/untar.diff" - } - } - }, - "subworkflows": { - "nf-core": { - "utils_nextflow_pipeline": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - }, - "utils_nfcore_pipeline": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - }, - "utils_nfvalidation_plugin": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - } - } - } - } - } -} +{ + "name": "nf-core/proteinfold", + "homePage": "https://github.com/nf-core/proteinfold", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "fastqc": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "foldseek/easysearch": { + "branch": "master", + "git_sha": "9bfc81874554e87740bcb3e5e07acf0a153c9ecb", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] + }, + "mmseqs/createindex": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "mmseqs/tsv2exprofiledb": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"], + "patch": "modules/nf-core/untar/untar.diff" + } + } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } + } + } + } +} diff --git a/modules/local/colabfold_batch.nf b/modules/local/colabfold_batch.nf index 28f26274..90e4b84d 100644 --- a/modules/local/colabfold_batch.nf +++ b/modules/local/colabfold_batch.nf @@ -1,61 +1,61 @@ -process COLABFOLD_BATCH { - tag "$meta.id" - label 'process_medium' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local COLABFOLD_BATCH module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "nf-core/proteinfold_colabfold:1.1.0" - - input: - tuple val(meta), path(fasta) - val colabfold_model_preset - path ('params/*') - path ('colabfold_db/*') - path ('uniref30/*') - val numRec - - output: - path ("*") , emit: pdb - path ("*_mqc.png") , emit: multiqc - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - - """ - ln -r -s params/alphafold_params_*/* params/ - colabfold_batch \\ - $args \\ - --num-recycle ${numRec} \\ - --data \$PWD \\ - --model-type ${colabfold_model_preset} \\ - ${fasta} \\ - \$PWD - for i in `find *_relaxed_rank_001*.pdb`; do cp \$i `echo \$i | sed "s|_relaxed_rank_|\t|g" | cut -f1`"_colabfold.pdb"; done - for i in `find *.png -maxdepth 0`; do cp \$i \${i%'.png'}_mqc.png; done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - colabfold_batch: $VERSION - END_VERSIONS - """ - - stub: - def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - touch ./"${fasta.baseName}"_colabfold.pdb - touch ./"${fasta.baseName}"_mqc.png - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - colabfold_batch: $VERSION - END_VERSIONS - """ -} +process COLABFOLD_BATCH { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local COLABFOLD_BATCH module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_colabfold:1.1.1" + + input: + tuple val(meta), path(fasta) + val colabfold_model_preset + path ('params/*') + path ('colabfold_db/*') + path ('uniref30/*') + val numRec + + output: + path ("*") , emit: pdb + path ("*_mqc.png") , emit: multiqc + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + ln -r -s params/alphafold_params_*/* params/ + colabfold_batch \\ + $args \\ + --num-recycle ${numRec} \\ + --data \$PWD \\ + --model-type ${colabfold_model_preset} \\ + ${fasta} \\ + \$PWD + for i in `find *_relaxed_rank_001*.pdb`; do cp \$i `echo \$i | sed "s|_relaxed_rank_|\t|g" | cut -f1`"_colabfold.pdb"; done + for i in `find *.png -maxdepth 0`; do cp \$i \${i%'.png'}_mqc.png; done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + colabfold_batch: $VERSION + END_VERSIONS + """ + + stub: + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ./"${fasta.baseName}"_colabfold.pdb + touch ./"${fasta.baseName}"_mqc.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + colabfold_batch: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/combine_uniprot.nf b/modules/local/combine_uniprot.nf index 7f4637b3..eaec3d48 100644 --- a/modules/local/combine_uniprot.nf +++ b/modules/local/combine_uniprot.nf @@ -1,42 +1,42 @@ -process COMBINE_UNIPROT { - label 'process_single' - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - path uniprot_sprot - path uniprot_trembl - - output: - path ('uniprot.fasta'), emit: ch_db - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - set -e - - cat ${uniprot_sprot} >> ${uniprot_trembl} - mv ${uniprot_trembl} uniprot.fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') - END_VERSIONS - """ - - stub: - """ - touch uniprot.fasta - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') - END_VERSIONS - """ -} +process COMBINE_UNIPROT { + label 'process_single' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path uniprot_sprot + path uniprot_trembl + + output: + path ('uniprot.fasta'), emit: ch_db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + set -e + + cat ${uniprot_sprot} >> ${uniprot_trembl} + mv ${uniprot_trembl} uniprot.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch uniprot.fasta + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} diff --git a/modules/local/download_pdbmmcif.nf b/modules/local/download_pdbmmcif.nf index fef63755..12681c02 100644 --- a/modules/local/download_pdbmmcif.nf +++ b/modules/local/download_pdbmmcif.nf @@ -1,77 +1,78 @@ -/* - * Download PDB MMCIF database - */ -process DOWNLOAD_PDBMMCIF { - label 'process_low' - label 'error_retry' - - conda "bioconda::aria2=1.36.0 conda-forge::rsync=3.2.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' : - 'biocontainers/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' }" - - input: - val source_url_pdb_mmcif - val source_url_pdb_obsolete - - output: - path ('*') , emit: ch_db - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - set -euo pipefail - - mkdir raw - - rsync \\ - --recursive \\ - --links \\ - --perms \\ - --times \\ - --compress \\ - --info=progress2 \\ - --delete \\ - --port=33444 \\ - $source_url_pdb_mmcif \\ - raw - - echo "Unzipping all mmCIF files..." - find ./raw -type f -name '*.[gG][zZ]' -exec gunzip {} \\; - - echo "Flattening all mmCIF files..." - mkdir mmcif_files - find ./raw -type d -empty -delete # Delete empty directories. - for subdir in ./raw/*; do - mv "\${subdir}/"*.cif ./mmcif_files - done - - # Delete empty download directory structure. - find ./raw -type d -empty -delete - - aria2c \\ - $source_url_pdb_obsolete - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | head -1 | sed 's/^.*GNU sed) //; s/ .*\$//') - rsync: \$(rsync --version | head -1 | sed 's/^rsync version //; s/ protocol version [[:digit:]]*//') - aria2c: \$( aria2c -v | head -1 | sed 's/aria2 version //' ) - END_VERSIONS - """ - - stub: - """ - touch obsolete.dat - mkdir mmcif_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') - END_VERSIONS - """ -} +/* + * Download PDB MMCIF database + */ +process DOWNLOAD_PDBMMCIF { + tag "${source_url_pdb_mmcif}--${source_url_pdb_obsolete}" + label 'process_low' + label 'error_retry' + + conda "bioconda::aria2=1.36.0 conda-forge::rsync=3.2.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' : + 'biocontainers/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' }" + + input: + val source_url_pdb_mmcif + val source_url_pdb_obsolete + + output: + path ('*') , emit: ch_db + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + set -euo pipefail + + mkdir raw + + rsync \\ + --recursive \\ + --links \\ + --perms \\ + --times \\ + --compress \\ + --info=progress2 \\ + --delete \\ + --port=33444 \\ + $source_url_pdb_mmcif \\ + raw + + echo "Unzipping all mmCIF files..." + find ./raw -type f -name '*.[gG][zZ]' -exec gunzip {} \\; + + echo "Flattening all mmCIF files..." + mkdir mmcif_files + find ./raw -type d -empty -delete # Delete empty directories. + for subdir in ./raw/*; do + mv "\${subdir}/"*.cif ./mmcif_files + done + + # Delete empty download directory structure. + find ./raw -type d -empty -delete + + aria2c \\ + $source_url_pdb_obsolete + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | head -1 | sed 's/^.*GNU sed) //; s/ .*\$//') + rsync: \$(rsync --version | head -1 | sed 's/^rsync version //; s/ protocol version [[:digit:]]*//') + aria2c: \$( aria2c -v | head -1 | sed 's/aria2 version //' ) + END_VERSIONS + """ + + stub: + """ + touch obsolete.dat + mkdir mmcif_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} diff --git a/modules/local/extract_outputs.nf b/modules/local/extract_outputs.nf new file mode 100644 index 00000000..fafbfec6 --- /dev/null +++ b/modules/local/extract_outputs.nf @@ -0,0 +1,42 @@ +process EXTRACT_OUTPUTS { + tag "$id" + label 'process_single' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_standard:dev" + + input: + tuple val(id), path(pkl_files) + + output: + tuple val(id), path ("*_msa.tsv"), optional: true, emit: msa_info + tuple val(id), path ("*_lddt_*.tsv"), optional: true, emit: lddt_info + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + script: + def args = task.ext.args ?: '' + //#for pkl_file in [\"${"\", \"".join(pkl_files)}\"]: + """ + #!/usr/bin/env python + import pickle + import os, sys + for pkl_file in [\"${pkl_files.join("\", \"")}\"]: + dict_data = pickle.load(open(pkl_file,'rb')) + if pkl_file.endswith("features.pkl"): + with open ("${id}_msa.tsv", "w") as out_f: + for val in dict_data['msa']: + out_f.write("\\t".join([str(x) for x in val]) + "\\n") + else: + model_id = os.path.basename(pkl_file).replace("result_model_", "").replace("_pred_0.pkl", "") + with open (f"${id}_lddt_{model_id}.tsv", "w") as out_f: + out_f.write("\\t".join([str(x) for x in dict_data['plddt']]) + "\\n") + with open ("versions.yml", "w") as version_file: + version_file.write("\\"${task.process}\\":\\n python: {}\\n".format(sys.version.split()[0].strip())) + """ +} diff --git a/modules/local/generat_report.nf b/modules/local/generat_report.nf new file mode 100644 index 00000000..1902ac3f --- /dev/null +++ b/modules/local/generat_report.nf @@ -0,0 +1,30 @@ +process GENERATE_REPORT { + tag "${meta.id}" + label 'process_single' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + conda "bioconda::multiqc=1.21" + + input: + tuple val(meta_msa), path(msa) + tuple val(meta), path(lddt) + tuple val(meta), path(pdb) + path(template) + val(output_type) + + output: + tuple val(meta), path ("*report.html"), emit: report + tuple val(meta), path ("*.png"), optional: true, emit: images + tuple val(meta), path ("*_LDDT.html"), emit: lddt_images + //path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + script: + def args = task.ext.args ?: '' + + """ + generat_plots.py --type ${output_type} --msa ${msa} --plddt ${lddt.join(' ')} --pdb ${pdb.join(' ')} --html_template ${template} --output_dir ./ --name ${meta.id} + """ +} diff --git a/modules/local/mmseqs_colabfoldsearch.nf b/modules/local/mmseqs_colabfoldsearch.nf index 978a627e..c3afc9fd 100644 --- a/modules/local/mmseqs_colabfoldsearch.nf +++ b/modules/local/mmseqs_colabfoldsearch.nf @@ -1,57 +1,58 @@ -process MMSEQS_COLABFOLDSEARCH { - tag "$meta.id" - label 'process_high_memory' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local MMSEQS_COLABFOLDSEARCH module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "nf-core/proteinfold_colabfold:1.1.0" - - input: - tuple val(meta), path(fasta) - path ('db/params') - path colabfold_db - path uniref30 - - output: - tuple val(meta), path("${meta.id}.a3m"), emit: a3m - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - - """ - ln -r -s $uniref30/uniref30_* ./db - ln -r -s $colabfold_db/colabfold_envdb* ./db - - /localcolabfold/colabfold-conda/bin/colabfold_search \\ - $args \\ - --threads $task.cpus ${fasta} \\ - ./db \\ - "result/" - - cp result/0.a3m ${meta.id}.a3m - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - colabfold_search: $VERSION - END_VERSIONS - """ - - stub: - def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - touch ${meta.id}.a3m - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - colabfold_search: $VERSION - END_VERSIONS - """ -} +process MMSEQS_COLABFOLDSEARCH { + tag "$meta.id" + label 'process_high_memory' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local MMSEQS_COLABFOLDSEARCH module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_colabfold:1.1.1" + + input: + tuple val(meta), path(fasta) + path ('db/params') + path colabfold_db + path uniref30 + + output: + tuple val(meta), path("**.a3m"), emit: a3m + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + ln -r -s $uniref30/uniref30_* ./db + ln -r -s $colabfold_db/colabfold_envdb* ./db + + /localcolabfold/colabfold-conda/bin/colabfold_search \\ + $args \\ + --threads $task.cpus ${fasta} \\ + ./db \\ + "result/" + + cp result/0.a3m ${meta.id}.a3m + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + colabfold_search: $VERSION + END_VERSIONS + """ + + stub: + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + mkdir results + touch results/${meta.id}.a3m + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + colabfold_search: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/multifasta_to_csv.nf b/modules/local/multifasta_to_csv.nf index d5d68fbf..5ea369cc 100644 --- a/modules/local/multifasta_to_csv.nf +++ b/modules/local/multifasta_to_csv.nf @@ -1,40 +1,40 @@ -process MULTIFASTA_TO_CSV { - tag "$meta.id" - label 'process_single' - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("input.csv"), emit: input_csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - awk '/^>/ {printf("\\n%s\\n",\$0);next; } { printf("%s",\$0);} END {printf("\\n");}' ${fasta} > single_line.fasta - echo -e id,sequence'\\n'${meta.id},`awk '!/^>/ {print \$0}' single_line.fasta | tr '\\n' ':' | sed 's/:\$//' | sed 's/^://'` > input.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') - END_VERSIONS - """ - - stub: - """ - touch input.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') - END_VERSIONS - """ -} +process MULTIFASTA_TO_CSV { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("input.csv"), emit: input_csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + awk '/^>/ {printf("\\n%s\\n",\$0);next; } { printf("%s",\$0);} END {printf("\\n");}' ${fasta} > single_line.fasta + echo -e id,sequence'\\n'${meta.id},`awk '!/^>/ {print \$0}' single_line.fasta | tr '\\n' ':' | sed 's/:\$//' | sed 's/^://'` > input.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch input.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/multifasta_to_singlefasta.nf b/modules/local/multifasta_to_singlefasta.nf index e9744416..e95737b7 100644 --- a/modules/local/multifasta_to_singlefasta.nf +++ b/modules/local/multifasta_to_singlefasta.nf @@ -1,40 +1,40 @@ -process MULTIFASTA_TO_SINGLEFASTA { - tag "$meta.id" - label 'process_single' - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("${meta.id}.fasta"), emit: input_fasta - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - awk '/^>/ {printf("\\n%s\\n",\$0);next; } { printf("%s",\$0);} END {printf("\\n");}' ${fasta} > single_line.fasta - echo -e '>'${meta.id}'\\n'`awk '!/^>/ {print \$0}' single_line.fasta | tr '\\n' ':' | sed 's/:\$//' | sed 's/^://'` > ${meta.id}.fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') - END_VERSIONS - """ - - stub: - """ - touch ${meta.id}.fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') - END_VERSIONS - """ -} +process MULTIFASTA_TO_SINGLEFASTA { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${meta.id}.fasta"), emit: input_fasta + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + awk '/^>/ {printf("\\n%s\\n",\$0);next; } { printf("%s",\$0);} END {printf("\\n");}' ${fasta} > single_line.fasta + echo -e '>'${meta.id}'\\n'`awk '!/^>/ {print \$0}' single_line.fasta | tr '\\n' ':' | sed 's/:\$//' | sed 's/^://'` > ${meta.id}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 5607712d..cf37fa00 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -1,93 +1,104 @@ -/* - * Run Alphafold2 - */ -process RUN_ALPHAFOLD2 { - tag "$meta.id" - label 'process_medium' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "nf-core/proteinfold_alphafold2_standard:dev" - - input: - tuple val(meta), path(fasta) - val db_preset - val alphafold2_model_preset - path ('params/*') - path ('bfd/*') - path ('small_bfd/*') - path ('mgnify/*') - path ('pdb70/*') - path ('pdb_mmcif/*') - path ('uniref30/*') - path ('uniref90/*') - path ('pdb_seqres/*') - path ('uniprot/*') - - output: - path ("${fasta.baseName}*") - path "*_mqc.tsv", emit: multiqc - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : - "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" - if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " - } - else { - alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " - } - """ - if [ -f pdb_seqres/pdb_seqres.txt ] - then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt - fi - if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi - python3 /app/alphafold/run_alphafold.py \ - --fasta_paths=${fasta} \ - --model_preset=${alphafold2_model_preset} \ - --db_preset=${db_preset} \ - --output_dir=\$PWD \ - --data_dir=\$PWD \ - --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ - --random_seed=53343 \ - $args - - cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb - cd "${fasta.baseName}" - awk '{print \$6"\\t"\$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv - for i in 1 2 3 4 - do awk '{print \$6"\\t"\$11}' ranked_\$i.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv - done - paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv - echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv - cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv - cd .. - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python3 --version | sed 's/Python //g') - END_VERSIONS - """ - - stub: - """ - touch ./"${fasta.baseName}".alphafold.pdb - touch ./"${fasta.baseName}"_mqc.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') - END_VERSIONS - """ -} +/* + * Run Alphafold2 + */ +process RUN_ALPHAFOLD2 { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_standard:1.1.1" + + input: + tuple val(meta), path(fasta) + val db_preset + val alphafold2_model_preset + path ('params/*') + path ('bfd/*') + path ('small_bfd/*') + path ('mgnify/*') + path ('pdb70/*') + path ('pdb_mmcif/*') + path ('uniref30/*') + path ('uniref90/*') + path ('pdb_seqres/*') + path ('uniprot/*') + + output: + tuple val(meta), path ("${fasta.baseName}*"), emit: af_out + tuple val(meta), path ("${fasta.baseName}/${fasta.baseName}*tsv"), emit: af_out_tsv + tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: af_out_pdb + path "*_mqc.tsv", emit: multiqc + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : + "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" + if (alphafold2_model_preset == 'multimer') { + alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " + } + else { + alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " + } + """ + #if [ -f pdb_seqres/pdb_seqres.txt ] + # then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt + #fi + if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + python3 /app/alphafold/run_alphafold.py \ + --fasta_paths=${fasta} \ + --model_preset=${alphafold2_model_preset} \ + --db_preset=${db_preset} \ + --output_dir=\$PWD \ + --data_dir=\$PWD \ + --uniref90_database_path=./uniref90/uniref90.fasta \ + --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ + --template_mmcif_dir=./pdb_mmcif/mmcif_files \ + --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ + --random_seed=53343 \ + $args + + cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb + cd "${fasta.baseName}" + awk '{print \$6"\\t"\$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv + for i in 1 2 3 4 + do awk '{print \$6"\\t"\$11}' ranked_\$i.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv + done + paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv + echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv + cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv + + extract_output.py --name ${fasta.baseName} \\ + --pkls result_model_1_pred_0.pkl \\ + result_model_2_pred_0.pkl \\ + result_model_3_pred_0.pkl \\ + result_model_4_pred_0.pkl \\ + result_model_5_pred_0.pkl \\ + features.pkl + + cd .. + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch ./"${fasta.baseName}".alphafold.pdb + touch ./"${fasta.baseName}"_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 6c9bfcf8..d0ec5725 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -1,81 +1,81 @@ -/* - * Run Alphafold2 MSA - */ -process RUN_ALPHAFOLD2_MSA { - tag "$meta.id" - label 'process_medium' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "nf-core/proteinfold_alphafold2_msa:dev" - - input: - tuple val(meta), path(fasta) - val db_preset - val alphafold2_model_preset - path ('params/*') - path ('bfd/*') - path ('small_bfd/*') - path ('mgnify/*') - path ('pdb70/*') - path ('pdb_mmcif/*') - path ('uniref30/*') - path ('uniref90/*') - path ('pdb_seqres/*') - path ('uniprot/*') - - output: - path ("${fasta.baseName}*") - path ("${fasta.baseName}.features.pkl"), emit: features - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : - "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" - if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " - } - else { - alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " - } - """ - if [ -f pdb_seqres/pdb_seqres.txt ] - then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt - fi - python3 /app/alphafold/run_msa.py \ - --fasta_paths=${fasta} \ - --model_preset=${alphafold2_model_preset} \ - --db_preset=${db_preset} \ - --output_dir=\$PWD \ - --data_dir=\$PWD \ - --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ - $args - - cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python3 --version | sed 's/Python //g') - END_VERSIONS - """ - - stub: - """ - touch ./"${fasta.baseName}".features.pkl - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') - END_VERSIONS - """ -} +/* + * Run Alphafold2 MSA + */ +process RUN_ALPHAFOLD2_MSA { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_msa:1.1.1" + + input: + tuple val(meta), path(fasta) + val db_preset + val alphafold2_model_preset + path ('params/*') + path ('bfd/*') + path ('small_bfd/*') + path ('mgnify/*') + path ('pdb70/*') + path ('pdb_mmcif/*') + path ('uniref30/*') + path ('uniref90/*') + path ('pdb_seqres/*') + path ('uniprot/*') + + output: + tuple val(meta), path ("${fasta.baseName}*") + tuple val(meta), path ("${fasta.baseName}.features.pkl"), emit: features + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : + "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" + if (alphafold2_model_preset == 'multimer') { + alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " + } + else { + alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " + } + """ + #if [ -f pdb_seqres/pdb_seqres.txt ] + # then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt + #fi + python3 /app/alphafold/run_msa.py \ + --fasta_paths=${fasta} \ + --model_preset=${alphafold2_model_preset} \ + --db_preset=${db_preset} \ + --output_dir=\$PWD \ + --data_dir=\$PWD \ + --uniref90_database_path=./uniref90/uniref90.fasta \ + --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ + --template_mmcif_dir=./pdb_mmcif/mmcif_files \ + --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ + $args + + cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch ./"${fasta.baseName}".features.pkl + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index ee9983c5..b3713e35 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -1,79 +1,90 @@ -/* - * Run Alphafold2 PRED - */ -process RUN_ALPHAFOLD2_PRED { - tag "$meta.id" - label 'process_medium' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "nf-core/proteinfold_alphafold2_split:dev" - - input: - tuple val(meta), path(fasta) - val db_preset - val alphafold2_model_preset - path ('params/*') - path ('bfd/*') - path ('small_bfd/*') - path ('mgnify/*') - path ('pdb70/*') - path ('pdb_mmcif/*') - path ('uniref30/*') - path ('uniref90/*') - path ('pdb_seqres/*') - path ('uniprot/*') - path msa - - output: - path ("${fasta.baseName}*") - path "*_mqc.tsv", emit: multiqc - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi - python3 /app/alphafold/run_predict.py \ - --fasta_paths=${fasta} \ - --model_preset=${alphafold2_model_preset} \ - --output_dir=\$PWD \ - --data_dir=\$PWD \ - --random_seed=53343 \ - --msa_path=${msa} \ - $args - - cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb - cd "${fasta.baseName}" - awk '{print \$6"\\t"\$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv - for i in 1 2 3 4 - do awk '{print \$6"\\t"\$11}' ranked_\$i.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv - done - paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv - echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv - cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv - cd .. - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python3 --version | sed 's/Python //g') - END_VERSIONS - """ - - stub: - """ - touch ./"${fasta.baseName}".alphafold.pdb - touch ./"${fasta.baseName}"_mqc.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') - END_VERSIONS - """ -} +/* + * Run Alphafold2 PRED + */ +process RUN_ALPHAFOLD2_PRED { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_split:1.1.1" + + input: + tuple val(meta), path(fasta) + val db_preset + val alphafold2_model_preset + path ('params/*') + path ('bfd/*') + path ('small_bfd/*') + path ('mgnify/*') + path ('pdb70/*') + path ('pdb_mmcif/*') + path ('uniref30/*') + path ('uniref90/*') + path ('pdb_seqres/*') + path ('uniprot/*') + path msa + + output: + tuple val(meta), path ("${fasta.baseName}*") + tuple val(meta), path ("${fasta.baseName}/${fasta.baseName}*tsv"), emit: af_out_tsv + tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: af_out_pdb + path "*_mqc.tsv", emit: multiqc + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + python3 /app/alphafold/run_predict.py \ + --fasta_paths=${fasta} \ + --model_preset=${alphafold2_model_preset} \ + --output_dir=\$PWD \ + --data_dir=\$PWD \ + --random_seed=53343 \ + --msa_path=${msa} \ + $args + + cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb + cd "${fasta.baseName}" + awk '{print \$6"\\t"\$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv + for i in 1 2 3 4 + do awk '{print \$6"\\t"\$11}' ranked_\$i.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv + done + paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv + echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv + cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv + + extract_output.py --name ${fasta.baseName} \\ + --pkls result_model_1_pred_0.pkl \\ + result_model_2_pred_0.pkl \\ + result_model_3_pred_0.pkl \\ + result_model_4_pred_0.pkl \\ + result_model_5_pred_0.pkl \\ + ../${fasta.baseName}.features.pkl + + cd .. + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch ./"${fasta.baseName}".alphafold.pdb + touch ./"${fasta.baseName}"_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} diff --git a/modules/local/run_esmfold.nf b/modules/local/run_esmfold.nf index 5f7a25ce..d3dc2d24 100644 --- a/modules/local/run_esmfold.nf +++ b/modules/local/run_esmfold.nf @@ -1,57 +1,58 @@ -process RUN_ESMFOLD { - tag "$meta.id" - label 'process_medium' - - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local RUN_ESMFOLD module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "nf-core/proteinfold_esmfold:1.1.0" - - input: - tuple val(meta), path(fasta) - path ('./checkpoints/') - val numRec - - output: - path ("${fasta.baseName}*.pdb"), emit: pdb - path ("${fasta.baseName}_plddt_mqc.tsv"), emit: multiqc - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - - """ - esm-fold \ - -i ${fasta} \ - -o \$PWD \ - -m \$PWD \ - --num-recycles ${numRec} \ - $args - - awk '{print \$2"\\t"\$3"\\t"\$4"\\t"\$6"\\t"\$11}' "${fasta.baseName}"*.pdb | grep -v 'N/A' | uniq > plddt.tsv - echo -e Atom_serial_number"\\t"Atom_name"\\t"Residue_name"\\t"Residue_sequence_number"\\t"pLDDT > header.tsv - cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - esm-fold: $VERSION - END_VERSIONS - """ - - stub: - def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - touch ./"${fasta.baseName}".pdb - touch ./"${fasta.baseName}"_plddt_mqc.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - esm-fold: $VERSION - END_VERSIONS - """ -} +process RUN_ESMFOLD { + tag "$meta.id" + label 'process_medium' + + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ESMFOLD module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_esmfold:1.1.1" + + input: + tuple val(meta), path(fasta) + path (esm_fold_parms) + val numRec + + output: + tuple val(meta), path ("${fasta.baseName}*.pdb"), emit: pdb + tuple val(meta), path ("${fasta.baseName}_plddt_mqc.tsv"), emit: multiqc + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + esm-fold \ + -i ${fasta} \ + -o \$PWD \ + -m \$PWD \ + --num-recycles ${numRec} \ + $args + + mv *.pdb "${fasta.baseName}".pdb + awk '{print \$2"\\t"\$3"\\t"\$4"\\t"\$6"\\t"\$11}' "${fasta.baseName}"*.pdb | grep -v 'N/A' | uniq > plddt.tsv + echo -e Atom_serial_number"\\t"Atom_name"\\t"Residue_name"\\t"Residue_sequence_number"\\t"pLDDT > header.tsv + cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + esm-fold: $VERSION + END_VERSIONS + """ + + stub: + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ./"${fasta.baseName}".pdb + touch ./"${fasta.baseName}"_plddt_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + esm-fold: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/z_run_alphafold2.nf b/modules/local/z_run_alphafold2.nf new file mode 100644 index 00000000..41a0d24f --- /dev/null +++ b/modules/local/z_run_alphafold2.nf @@ -0,0 +1,57 @@ +/* + * Run Alphafold2 + */ +process RUN_ALPHAFOLD2 { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_standard:1.1.1" + + input: + tuple val(meta), path(fasta) + val db_preset + val alphafold2_model_preset + path ('params/*') + path ('bfd/*') + path ('small_bfd/*') + path ('mgnify/*') + path ('pdb70/*') + path ('pdb_mmcif/*') + path ('uniref30/*') + path ('uniref90/*') + path ('pdb_seqres/*') + path ('uniprot/*') + + output: + tuple val(meta), path ("${fasta.baseName}*"), emit: af_out + tuple val(meta), path ("${fasta.baseName}/${fasta.baseName}*tsv"), emit: af_out_tsv + tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: af_out_pdb + path "*_mqc.tsv", emit: multiqc + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + """ + cp -r /mnt/d/01852933ca43cd53eb240fcf350f32/* ./ + + """ + + stub: + """ + touch ./"${fasta.baseName}".alphafold.pdb + touch ./"${fasta.baseName}"_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} diff --git a/modules/local/z_run_esmfold.nf b/modules/local/z_run_esmfold.nf new file mode 100644 index 00000000..44475aec --- /dev/null +++ b/modules/local/z_run_esmfold.nf @@ -0,0 +1,45 @@ +process RUN_ESMFOLD { + tag "$meta.id" + label 'process_medium' + + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ESMFOLD module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_esmfold:1.1.1" + + input: + tuple val(meta), path(fasta) + path (esm_fold_parms) + val numRec + + output: + tuple val(meta), path ("${fasta.baseName}*.pdb"), emit: pdb + tuple val(meta), path ("${fasta.baseName}_plddt_mqc.tsv"), emit: multiqc + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + cp -r /mnt/d/01852933ca43cd53eb240fcf350f32/* ./ + cp T1026.1_plddt_mqc.tsv T1026_plddt_mqc.tsv + + """ + + stub: + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ./"${fasta.baseName}".pdb + touch ./"${fasta.baseName}"_plddt_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + esm-fold: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/aria2/environment.yml b/modules/nf-core/aria2/environment.yml new file mode 100755 index 00000000..4a3d95e9 --- /dev/null +++ b/modules/nf-core/aria2/environment.yml @@ -0,0 +1,7 @@ +name: aria2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::aria2=1.36.0 \ No newline at end of file diff --git a/modules/nf-core/aria2/main.nf b/modules/nf-core/aria2/main.nf index af595a9f..3bf9735e 100644 --- a/modules/nf-core/aria2/main.nf +++ b/modules/nf-core/aria2/main.nf @@ -1,38 +1,47 @@ - -process ARIA2 { - tag "$source_url" - label 'process_single' - - conda "conda-forge::aria2=1.36.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/aria2:1.36.0' : - 'biocontainers/aria2:1.36.0' }" - - input: - val source_url - - output: - path ("$downloaded_file"), emit: downloaded_file - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - downloaded_file = source_url.split("/")[-1] - - """ - set -e - - aria2c \\ - --check-certificate=false \\ - $args \\ - $source_url - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - aria2: \$(echo \$(aria2c --version 2>&1) | grep 'aria2 version' | cut -f3 -d ' ') - END_VERSIONS - """ -} +process ARIA2 { + tag "$source_url" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/aria2:1.36.0' : + 'biocontainers/aria2:1.36.0' }" + + input: + tuple val(meta), val(source_url) + + output: + tuple val(meta), path("$downloaded_file"), emit: downloaded_file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + downloaded_file = source_url.split("/")[-1] + + """ + aria2c \\ + --check-certificate=false \\ + $args \\ + $source_url + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + aria2: \$(echo \$(aria2c --version 2>&1) | grep 'aria2 version' | cut -f3 -d ' ') + END_VERSIONS + """ + + stub: + downloaded_file = source_url.split("/")[-1] + + """ + touch ${downloaded_file} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + aria2: \$(echo \$(aria2c --version 2>&1) | grep 'aria2 version' | cut -f3 -d ' ') + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/aria2/meta.yml b/modules/nf-core/aria2/meta.yml index 64c2a524..578ac7c2 100644 --- a/modules/nf-core/aria2/meta.yml +++ b/modules/nf-core/aria2/meta.yml @@ -1,30 +1,30 @@ -name: "aria2" -description: CLI Download utility -keywords: - - download -tools: - - "aria2": - description: "aria2 is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink." - - tool_dev_url: "https://github.com/aria2/aria2/" - - licence: "['GPL v2']" - -input: - - source_url: - type: url - description: Source URL to be downloaded - pattern: "{http,https}*" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - downloaded_file: - type: file - description: Downloaded files from source - pattern: "*.*" - -authors: - - "@JoseEspinosa" +name: "aria2" +description: CLI Download utility +keywords: + - download +tools: + - "aria2": + description: "aria2 is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink." + + tool_dev_url: "https://github.com/aria2/aria2/" + + licence: "['GPL v2']" + +input: + - source_url: + type: url + description: Source URL to be downloaded + pattern: "{http,https}*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - downloaded_file: + type: file + description: Downloaded files from source + pattern: "*.*" + +authors: + - "@JoseEspinosa" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..e2ed3f80 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..83d696ee --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,55 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 00000000..833a6f30 --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,57 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..a9ef2e26 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..fab8640e --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..646e3824 --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/foldseek/easysearch/environment.yml b/modules/nf-core/foldseek/easysearch/environment.yml new file mode 100644 index 00000000..ebc2c484 --- /dev/null +++ b/modules/nf-core/foldseek/easysearch/environment.yml @@ -0,0 +1,7 @@ +name: foldseek_easysearch +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::foldseek=9.427df8a diff --git a/modules/nf-core/foldseek/easysearch/main.nf b/modules/nf-core/foldseek/easysearch/main.nf new file mode 100644 index 00000000..a5a5aa9f --- /dev/null +++ b/modules/nf-core/foldseek/easysearch/main.nf @@ -0,0 +1,51 @@ +process FOLDSEEK_EASYSEARCH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/foldseek:9.427df8a--pl5321hb365157_1': + 'biocontainers/foldseek:9.427df8a--pl5321hb365157_1' }" + + input: + tuple val(meta) , path(pdb) + tuple val(meta_db), path(db) + + output: + tuple val(meta), path("${meta.id}.m8"), emit: aln + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + foldseek \\ + easy-search \\ + ${pdb} \\ + ${db}/${meta_db.id} \\ + ${prefix}.m8 \\ + tmpFolder \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldseek: \$(foldseek --help | grep Version | sed 's/.*Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.m8 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldseek: \$(foldseek --help | grep Version | sed 's/.*Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/foldseek/easysearch/meta.yml b/modules/nf-core/foldseek/easysearch/meta.yml new file mode 100644 index 00000000..dcf44388 --- /dev/null +++ b/modules/nf-core/foldseek/easysearch/meta.yml @@ -0,0 +1,54 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "foldseek_easysearch" +description: Search for protein structural hits against a foldseek database of protein structures +keywords: + - protein + - structure + - comparisons +tools: + - "foldseek": + description: "Foldseek: fast and accurate protein structure search" + homepage: "https://search.foldseek.com/search" + documentation: "https://github.com/steineggerlab/foldseek" + tool_dev_url: "https://github.com/steineggerlab/foldseek" + doi: "10.1038/s41587-023-01773-0" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - pdb: + type: file + description: Protein structure(s) in PDB, mmCIF or mmJSON format to compare against a foldseek database (also works with folder input) + pattern: "*.{pdb,mmcif,mmjson}" + - meta_db: + type: map + description: | + Groovy Map containing sample information for the foldseek db + e.g. `[ id:'test', single_end:false ]` + - db: + type: directory + description: foldseek database from protein structures + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - aln: + type: file + description: | + Structural comparisons file output + Query, Target, Identity, Alignment length, Mismatches, Gap openings, + Query start, Query end, Target start, Target end, E-value, Bit score + pattern: "*.{m8}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@vagkaratzas" diff --git a/modules/nf-core/foldseek/easysearch/tests/main.nf.test b/modules/nf-core/foldseek/easysearch/tests/main.nf.test new file mode 100644 index 00000000..5b3d6361 --- /dev/null +++ b/modules/nf-core/foldseek/easysearch/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process FOLDSEEK_EASYSEARCH" + script "../main.nf" + process "FOLDSEEK_EASYSEARCH" + tag "modules" + tag "modules_nfcore" + tag "foldseek" + tag "foldseek/createdb" + tag "foldseek/easysearch" + + setup { + run("FOLDSEEK_CREATEDB") { + script "../../createdb/main.nf" + process { + """ + input[0] = [ [ id:'test_db' ], [ file(params.modules_testdata_base_path + 'proteomics/pdb/1tim.pdb', checkIfExists: true) ] ] + """ + } + } + } + + test("proteomics - pdb") { + + when { + process { + """ + input[0] = [ [ id:'test_search' ], [ file(params.modules_testdata_base_path + 'proteomics/pdb/8tim.pdb', checkIfExists: true) ] ] + input[1] = FOLDSEEK_CREATEDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.aln.get(0).get(1)).readLines().contains("8tim_A\t1tim_A\t0.967\t247\t8\t0\t1\t247\t1\t247\t1.152E-43\t1523") }, + { assert process.out.versions } + ) + } + + } + + test("proteomics - pdb -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test_search' ], [ file(params.modules_testdata_base_path + 'proteomics/pdb/8tim.pdb', checkIfExists: true) ] ] + input[1] = FOLDSEEK_CREATEDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/foldseek/easysearch/tests/main.nf.test.snap b/modules/nf-core/foldseek/easysearch/tests/main.nf.test.snap new file mode 100644 index 00000000..cd65740a --- /dev/null +++ b/modules/nf-core/foldseek/easysearch/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "proteomics - pdb -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_search" + }, + "test_search.m8:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ddc75b2e08b63a7082ecad353073fd3b" + ], + "aln": [ + [ + { + "id": "test_search" + }, + "test_search.m8:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ddc75b2e08b63a7082ecad353073fd3b" + ] + } + ], + "timestamp": "2024-07-02T13:55:57.915188646" + } +} \ No newline at end of file diff --git a/modules/nf-core/foldseek/easysearch/tests/tags.yml b/modules/nf-core/foldseek/easysearch/tests/tags.yml new file mode 100644 index 00000000..c25a6301 --- /dev/null +++ b/modules/nf-core/foldseek/easysearch/tests/tags.yml @@ -0,0 +1,3 @@ +foldseek/easysearch: + - modules/nf-core/foldseek/easysearch/** + - modules/nf-core/foldseek/createdb/** diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index e7189d2f..6dd7c66d 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,44 +1,44 @@ -process GUNZIP { - tag "$archive" - label 'process_single' - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("$gunzip"), emit: gunzip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' - """ - gunzip \\ - -f \\ - $args \\ - $archive - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - gunzip = archive.toString() - '.gz' - """ - touch $gunzip - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + gunzip \\ + -f \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4cdcdf4c..5346b39f 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -1,35 +1,35 @@ -name: gunzip -description: Compresses and decompresses files. -keywords: - - gunzip - - compression - - decompression -tools: - - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" -output: - - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/mmseqs/createindex/main.nf b/modules/nf-core/mmseqs/createindex/main.nf index f5c75e6d..4cd605a1 100644 --- a/modules/nf-core/mmseqs/createindex/main.nf +++ b/modules/nf-core/mmseqs/createindex/main.nf @@ -1,47 +1,47 @@ -process MMSEQS_CREATEINDEX { - tag "$db" - label 'process_high' - - conda "bioconda::mmseqs2=14.7e284" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mmseqs2:14.7e284--pl5321hf1761c0_0': - 'biocontainers/mmseqs2:14.7e284--pl5321hf1761c0_0' }" - - input: - path db - - output: - path(db) , emit: db_indexed - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') - - mmseqs createindex \\ - \${DB_PATH_NAME} \\ - tmp1 \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') - END_VERSIONS - """ - - stub: - """ - DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') - - touch "\${DB_PATH_NAME}.idx" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') - END_VERSIONS - """ -} +process MMSEQS_CREATEINDEX { + tag "$db" + label 'process_high' + + conda "bioconda::mmseqs2=14.7e284" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:14.7e284--pl5321hf1761c0_0': + 'biocontainers/mmseqs2:14.7e284--pl5321hf1761c0_0' }" + + input: + path db + + output: + path(db) , emit: db_indexed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') + + mmseqs createindex \\ + \${DB_PATH_NAME} \\ + tmp1 \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + """ + DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') + + touch "\${DB_PATH_NAME}.idx" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/createindex/meta.yml b/modules/nf-core/mmseqs/createindex/meta.yml index 1d50c235..9c77f72f 100644 --- a/modules/nf-core/mmseqs/createindex/meta.yml +++ b/modules/nf-core/mmseqs/createindex/meta.yml @@ -1,38 +1,38 @@ -name: "mmseqs_createindex" -description: Creates sequence index for mmseqs database -keywords: - - protein sequence - - databases - - clustering - - searching - - indexing -tools: - - "mmseqs": - description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" - homepage: "https://github.com/soedinglab/MMseqs2" - documentation: "https://mmseqs.com/latest/userguide.pdf" - tool_dev_url: "https://github.com/soedinglab/MMseqs2" - doi: "10.1093/bioinformatics/btw006" - licence: "['GPL v3']" - -input: - - db: - type: directory - description: | - Directory containing the DB to be indexed - pattern: "*" - -output: - - versions: - type: file - description: | - File containing software versions - pattern: "versions.yml" - - db_indexed: - type: directory - description: | - Directory containing the DB and the generated indexes - pattern: "*" - -authors: - - "@JoseEspinosa" +name: "mmseqs_createindex" +description: Creates sequence index for mmseqs database +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: "['GPL v3']" + +input: + - db: + type: directory + description: | + Directory containing the DB to be indexed + pattern: "*" + +output: + - versions: + type: file + description: | + File containing software versions + pattern: "versions.yml" + - db_indexed: + type: directory + description: | + Directory containing the DB and the generated indexes + pattern: "*" + +authors: + - "@JoseEspinosa" diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf b/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf index b10017b1..3edd9caf 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf @@ -1,47 +1,47 @@ -process MMSEQS_TSV2EXPROFILEDB { - tag "$db" - label 'process_high' - - conda "bioconda::mmseqs2=14.7e284" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mmseqs2:14.7e284--pl5321hf1761c0_0': - 'biocontainers/mmseqs2:14.7e284--pl5321hf1761c0_0' }" - - input: - path db - - output: - path (db) , emit: db_exprofile - path "versions.yml", emit: versions - // INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') - - mmseqs tsv2exprofiledb \\ - \${DB_PATH_NAME} \\ - "\${DB_PATH_NAME}_db" \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') - END_VERSIONS - """ - - stub: - """ - DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') - - touch "\${DB_PATH_NAME}_db" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') - END_VERSIONS - """ -} +process MMSEQS_TSV2EXPROFILEDB { + tag "$db" + label 'process_high' + + conda "bioconda::mmseqs2=14.7e284" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:14.7e284--pl5321hf1761c0_0': + 'biocontainers/mmseqs2:14.7e284--pl5321hf1761c0_0' }" + + input: + path db + + output: + path (db) , emit: db_exprofile + path "versions.yml", emit: versions + // INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') + + mmseqs tsv2exprofiledb \\ + \${DB_PATH_NAME} \\ + "\${DB_PATH_NAME}_db" \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + """ + DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') + + touch "\${DB_PATH_NAME}_db" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml b/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml index fe01b1d8..8f20d06a 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml @@ -1,38 +1,38 @@ -name: "mmseqs_tsv2exprofiledb" -description: Conversion of expandable profile to databases to the MMseqs2 databases format -keywords: - - protein sequence - - databases - - clustering - - searching - - indexing -tools: - - "mmseqs": - description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" - homepage: "https://github.com/soedinglab/MMseqs2" - documentation: "https://mmseqs.com/latest/userguide.pdf" - tool_dev_url: "https://github.com/soedinglab/MMseqs2" - doi: "10.1093/bioinformatics/btw006" - licence: "['GPL v3']" - -input: - - db: - type: directory - description: | - Directory containing the DB to be indexed - pattern: "*" - -output: - - versions: - type: file - description: | - File containing software versions - pattern: "versions.yml" - - db_indexed: - type: directory - description: | - Directory containing the DB and the generated indexes - pattern: "*" - -authors: - - "@JoseEspinosa" +name: "mmseqs_tsv2exprofiledb" +description: Conversion of expandable profile to databases to the MMseqs2 databases format +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: "['GPL v3']" + +input: + - db: + type: directory + description: | + Directory containing the DB to be indexed + pattern: "*" + +output: + - versions: + type: file + description: | + File containing software versions + pattern: "versions.yml" + - db_indexed: + type: directory + description: | + Directory containing the DB and the generated indexes + pattern: "*" + +authors: + - "@JoseEspinosa" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb67..d701bd3b 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,7 @@ -name: multiqc -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.21 +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352f..c3f35062 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,55 +1,55 @@ -process MULTIQC { - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" - - input: - path multiqc_files, stageAs: "?/*" - path(multiqc_config) - path(extra_multiqc_config) - path(multiqc_logo) - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def config = multiqc_config ? "--config $multiqc_config" : '' - def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' - """ - multiqc \\ - --force \\ - $args \\ - $config \\ - $extra_config \\ - $logo \\ - . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ - - stub: - """ - mkdir multiqc_data - touch multiqc_plots - touch multiqc_report.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + $logo \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc35..f50f7641 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,58 +1,58 @@ -name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into a single report -keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report -tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] -input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" -output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" - - "@jfy133" -maintainers: - - "@abhi18av" - - "@bunop" - - "@drpatelh" - - "@jfy133" +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242e..9d34ee91 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -1,84 +1,84 @@ -nextflow_process { - - name "Test Process MULTIQC" - script "../main.nf" - process "MULTIQC" - - tag "modules" - tag "modules_nfcore" - tag "multiqc" - - test("sarscov2 single-end [fastqc]") { - - when { - process { - """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = [] - input[2] = [] - input[3] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, - { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_single") } - ) - } - - } - - test("sarscov2 single-end [fastqc] [config]") { - - when { - process { - """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) - input[2] = [] - input[3] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, - { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_config") } - ) - } - } - - test("sarscov2 single-end [fastqc] - stub") { - - options "-stub" - - when { - process { - """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = [] - input[2] = [] - input[3] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.report.collect { file(it).getName() } + - process.out.data.collect { file(it).getName() } + - process.out.plots.collect { file(it).getName() } + - process.out.versions ).match("multiqc_stub") } - ) - } - - } -} +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd802..0a6829e6 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -1,41 +1,41 @@ -{ - "multiqc_versions_single": { - "content": [ - [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-29T08:48:55.657331" - }, - "multiqc_stub": { - "content": [ - [ - "multiqc_report.html", - "multiqc_data", - "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-29T08:49:49.071937" - }, - "multiqc_versions_config": { - "content": [ - [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-29T08:49:25.457567" - } +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml index bea6c0d3..0500fde5 100644 --- a/modules/nf-core/multiqc/tests/tags.yml +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -1,2 +1,2 @@ -multiqc: - - modules/nf-core/multiqc/** +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8cd1856c..b0da43e2 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -1,63 +1,63 @@ -process UNTAR { - tag "$archive" - label 'process_single' - - conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("$prefix"), emit: untar - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) - - """ - mkdir $prefix - - ## Ensures --strip-components only applied when top level of tar contents is a directory - ## If just files or multiple directories, place all in prefix - if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then - tar \\ - -C $prefix --strip-components 1 \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - else - tar \\ - -C $prefix \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) - """ - mkdir $prefix - touch ${prefix}/file.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} +process UNTAR { + tag "$archive" + label 'process_single' + + conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index db241a6e..f0aeb7dc 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -1,41 +1,41 @@ -name: untar -description: Extract files. -keywords: - - untar - - uncompress - - extract -tools: - - untar: - description: | - Extract tar.gz files. - documentation: https://www.gnu.org/software/tar/manual/ - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be untar - pattern: "*.{tar}.{gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - untar: - type: directory - description: Directory containing contents of archive - pattern: "*/" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@matthdsm" - - "@jfy133" +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 2bd14507..48362776 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,366 +1,379 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold Nextflow config file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments ----------------------------------------------------------------------------------------- -*/ - -// Global default params, used in configs -params { - - // Input options - input = null - mode = 'alphafold2' // {alphafold2, colabfold, esmfold} - use_gpu = false - - // Alphafold2 parameters - alphafold2_mode = "standard" - max_template_date = "2020-05-14" - full_dbs = false // true full_dbs, false reduced_dbs - alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} - alphafold2_db = null - - // Alphafold2 links - bfd_link = null - small_bfd_link = null - alphafold2_params_link = null - mgnify_link = null - pdb70_link = null - pdb_mmcif_link = null - pdb_obsolete_link = null - uniref30_alphafold2_link = null - uniref90_link = null - pdb_seqres_link = null - uniprot_sprot_link = null - uniprot_trembl_link = null - - // Alphafold2 paths - bfd_path = null - small_bfd_path = null - alphafold2_params_path = null - mgnify_path = null - pdb70_path = null - pdb_mmcif_path = null - uniref30_alphafold2_path = null - uniref90_path = null - pdb_seqres_path = null - uniprot_path = null - - // Colabfold parameters - colabfold_server = "webserver" - colabfold_model_preset = "alphafold2_ptm" // {'auto', 'alphafold2', 'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} - num_recycle = 3 - use_amber = true - colabfold_db = null - db_load_mode = 0 - host_url = null - use_templates = true - create_colabfold_index = false - - // Colabfold links - colabfold_db_link = null - uniref30_colabfold_link = null - - // Colabfold paths - colabfold_db_path = null - uniref30_colabfold_path = null - - // Esmfold parameters - esmfold_db = null - esmfold_model_preset = "monomer" - num_recycles = 4 - - // Esmfold links - esmfold_3B_v1 = null - esm2_t36_3B_UR50D = null - esm2_t36_3B_UR50D_contact_regression = null - - // Esmfold paths - esmfold_params_path = null - - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - - // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - - // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = '' - validationShowHiddenParams = false - validate_params = true - -} - -// Load base.config by default for all pipelines -includeConfig 'conf/base.config' - -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -// Load nf-core/proteinfold custom profiles from different institutions. -try { - includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") -} - -profiles { - debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false - nextflow.enable.configProcessNamesValidation = true - } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false - } - mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - docker { - docker.enabled = true - docker.userEmulation = true - if (params.use_gpu) { - docker.runOptions = '--gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g)' - } - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - arm { - if (params.use_gpu) { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' - } - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - if (params.use_gpu) { singularity.runOptions = '--nv' } - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false - } - apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - } - test { includeConfig 'conf/test.config' } - test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } - test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } - test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } - test_esmfold { includeConfig 'conf/test_esmfold.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } - test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } - test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } - test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } - test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } - test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } - test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } - test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } -} - -// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile -// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled -// Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' - -// Nextflow plugins -plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} - -// Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. -// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. - -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" -} - -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] - -// Disable process selector warnings by default. Use debug profile to enable warnings. -nextflow.enable.configProcessNamesValidation = false - -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" -} - -manifest { - name = 'nf-core/proteinfold' - author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" - homePage = 'https://github.com/nf-core/proteinfold' - description = """Protein 3D structure prediction pipeline""" - mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.1.0dev' - doi = '10.5281/zenodo.7629996' -} - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// Load modules config for pipeline specific modes -if (params.mode == 'alphafold2') { - includeConfig 'conf/modules_alphafold2.config' -} else if (params.mode == 'colabfold') { - includeConfig 'conf/modules_colabfold.config' -} else if (params.mode == 'esmfold') { - includeConfig 'conf/modules_esmfold.config' -} - -// Load links to DBs and parameters -includeConfig 'conf/dbs.config' - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} - - - +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/proteinfold Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +// Global default params, used in configs +params { + + // Input options + input = null + mode = 'alphafold2' // {alphafold2, colabfold, esmfold} + use_gpu = false + + // Alphafold2 parameters + alphafold2_mode = "standard" + max_template_date = "2020-05-14" + full_dbs = false // true full_dbs, false reduced_dbs + alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} + alphafold2_db = null + + // Alphafold2 links + bfd_link = null + small_bfd_link = null + alphafold2_params_link = null + mgnify_link = null + pdb70_link = null + pdb_mmcif_link = null + pdb_obsolete_link = null + uniref30_alphafold2_link = null + uniref90_link = null + pdb_seqres_link = null + uniprot_sprot_link = null + uniprot_trembl_link = null + + // Alphafold2 paths + bfd_path = null + small_bfd_path = null + alphafold2_params_path = null + mgnify_path = null + pdb70_path = null + pdb_mmcif_path = null + uniref30_alphafold2_path = null + uniref90_path = null + pdb_seqres_path = null + uniprot_path = null + + // Colabfold parameters + colabfold_server = "webserver" + colabfold_model_preset = "alphafold2_ptm" // {'auto', 'alphafold2', 'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} + num_recycles_colabfold = 3 + use_amber = true + colabfold_db = null + db_load_mode = 0 + host_url = null + use_templates = true + create_colabfold_index = false + + // Colabfold links + colabfold_db_link = null + uniref30_colabfold_link = null + + // Colabfold paths + colabfold_db_path = null + uniref30_colabfold_path = null + + // Esmfold parameters + esmfold_db = null + esmfold_model_preset = "monomer" + num_recycles_esmfold = 4 + + // Esmfold links + esmfold_3B_v1 = null + esm2_t36_3B_UR50D = null + esm2_t36_3B_UR50D_contact_regression = null + + // Esmfold paths + esmfold_params_path = null + + // Process skipping options + skip_multiqc = false + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = '' + validationShowHiddenParams = false + validate_params = true + +} + +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' + +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} + +// Load nf-core/proteinfold custom profiles from different institutions. +try { + includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") +} +profiles { + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + docker.userEmulation = true + if (params.use_gpu) { + docker.runOptions = '--gpus all' + } else { + docker.runOptions = '-u $(id -u):$(id -g)' + } + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + arm { + if (params.use_gpu) { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' + } else { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + if (params.use_gpu) { singularity.runOptions = '--nv' } + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } + test { includeConfig 'conf/test.config' } + test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } + test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } + test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } + test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } + test_colabfold_download { includeConfig 'conf/test_colabfold_download.config' } + test_esmfold { includeConfig 'conf/test_esmfold.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } + test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } + test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } + test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } + test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } + test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } + test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } + test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } + gadi { + includeConfig 'https://raw.githubusercontent.com/nf-core/configs/master/conf/nci_gadi.config' + includeConfig 'conf/gadi.config' + } +} + +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} + +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" +} +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" +} +trace { + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" +} +dag { + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" +} + +manifest { + name = 'nf-core/proteinfold' + author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" + homePage = 'https://github.com/nf-core/proteinfold' + description = """Protein 3D structure prediction pipeline""" + mainScript = 'main.nf' + nextflowVersion = '!>=23.04.0' + version = '1.1.1' + doi = '10.5281/zenodo.7629996' +} + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// Load modules config for pipeline specific modes +if (params.mode == 'alphafold2') { + includeConfig 'conf/modules_alphafold2.config' +} else if (params.mode == 'colabfold') { + includeConfig 'conf/modules_colabfold.config' +} else if (params.mode == 'esmfold') { + includeConfig 'conf/modules_esmfold.config' +} + +// Load links to DBs and parameters +includeConfig 'conf/dbs.config' + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/nextflow_schema.json b/nextflow_schema.json index d2ef75da..7f19ff41 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,671 +1,693 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/proteinfold/master/nextflow_schema.json", - "title": "nf-core/proteinfold pipeline parameters", - "description": "Protein 3D structure prediction pipeline", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Global options", - "type": "object", - "fa_icon": "fas fa-coins", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "exists": true, - "schema": "assets/schema_input.json", - "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "mode": { - "type": "string", - "default": "alphafold2", - "description": "Specifies the mode in which the pipeline will be run", - "enum": ["alphafold2", "colabfold", "esmfold"], - "fa_icon": "fas fa-cogs" - }, - "use_gpu": { - "type": "boolean", - "description": "Run on CPUs (default) or GPUs", - "fa_icon": "fas fa-microchip" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" - } - } - }, - "alphafold2_options": { - "title": "Alphafold2 options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Alphafold2 options.", - "properties": { - "max_template_date": { - "type": "string", - "default": "2020-05-14", - "description": "Maximum date of the PDB templates used by 'AlphaFold2' mode", - "fa_icon": "fas fa-calendar-check" - }, - "alphafold2_db": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the DB and PARAMS path used by 'AlphaFold2' mode", - "fa_icon": "fas fa-database" - }, - "full_dbs": { - "type": "boolean", - "default": false, - "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", - "fa_icon": "fas fa-battery-full" - }, - "alphafold2_mode": { - "type": "string", - "default": "standard", - "description": "Specifies the mode in which Alphafold2 will be run", - "enum": ["standard", "split_msa_prediction"], - "fa_icon": "fas fa-exchange-alt" - }, - "alphafold2_model_preset": { - "type": "string", - "default": "monomer", - "description": "Model preset for 'AlphaFold2' mode", - "enum": ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], - "fa_icon": "fas fa-stream" - } - } - }, - "colabfold_options": { - "title": "Colabfold options", - "type": "object", - "fa_icon": "fas fa-coins", - "description": "Colabfold options.", - "properties": { - "colabfold_db": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the PARAMS and DB path used by 'colabfold' mode", - "fa_icon": "fas fa-folder-open" - }, - "colabfold_server": { - "type": "string", - "default": "webserver", - "description": "Specifies the MSA server used by Colabfold", - "enum": ["webserver", "local"], - "fa_icon": "fas fa-server" - }, - "colabfold_model_preset": { - "type": "string", - "default": "alphafold2_ptm", - "description": "Model preset for 'colabfold' mode", - "enum": [ - "auto", - "alphafold2", - "alphafold2_ptm", - "alphafold2_multimer_v1", - "alphafold2_multimer_v2", - "alphafold2_multimer_v3" - ], - "fa_icon": "fas fa-stream" - }, - "num_recycle": { - "type": "integer", - "default": 3, - "description": "Number of recycles", - "fa_icon": "fas fa-recycle" - }, - "use_amber": { - "type": "boolean", - "default": true, - "description": "Use Amber minimization to refine the predicted structures", - "fa_icon": "fas fa-compress-alt" - }, - "db_load_mode": { - "type": "integer", - "default": 0, - "description": "Specify the way that MMSeqs2 will load the required databases in memory", - "fa_icon": "fas fa-download", - "enum": [0, 1, 2, 3] - }, - "host_url": { - "type": "string", - "description": "Specify your custom MMSeqs2 API server url", - "fa_icon": "fas fa-link" - }, - "use_templates": { - "type": "boolean", - "default": true, - "description": "Use PDB templates", - "fa_icon": "fas fa-paste" - }, - "create_colabfold_index": { - "type": "boolean", - "description": "Create databases indexes when running colabfold_local mode", - "fa_icon": "fas fa-bezier-curve" - } - } - }, - "esmfold_options": { - "title": "Esmfold options", - "type": "object", - "fa_icon": "fas fa-coins", - "description": "Esmfold options.", - "properties": { - "esmfold_db": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the PARAMS path used by 'esmfold' mode", - "fa_icon": "fas fa-folder-open" - }, - "num_recycles": { - "type": "integer", - "default": 4, - "description": "Specifies the number of recycles used by Esmfold", - "fa_icon": "fas fa-server" - }, - "esmfold_model_preset": { - "type": "string", - "description": "Specifies whether is a 'monomer' or 'multimer' prediction", - "enum": ["monomer", "multimer"], - "fa_icon": "fas fa-stream" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "alphafold2_dbs_and_parameters_link_options": { - "title": "Alphafold2 DBs and parameters links options", - "type": "object", - "fa_icon": "fas fa-database", - "description": "Parameters used to provide the links to the DBs and parameters public resources to Alphafold2.", - "properties": { - "bfd_link": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz", - "description": "Link to BFD dababase", - "fa_icon": "fas fa-link" - }, - "small_bfd_link": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz", - "description": "Link to a reduced version of the BFD dababase", - "fa_icon": "fas fa-link" - }, - "alphafold2_params_link": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar", - "description": "Link to the Alphafold2 parameters", - "fa_icon": "fas fa-link" - }, - "mgnify_link": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz", - "description": "Link to the MGnify database", - "fa_icon": "fas fa-link" - }, - "pdb70_link": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz", - "description": "Link to the PDB70 database", - "fa_icon": "fas fa-link" - }, - "pdb_mmcif_link": { - "type": "string", - "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/", - "description": "Link to the PDB mmCIF database", - "fa_icon": "fas fa-link" - }, - "pdb_obsolete_link": { - "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat", - "description": "Link to the PDV obsolete database", - "fa_icon": "fas fa-link" - }, - "uniref30_alphafold2_link": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz", - "description": "Link to the Uniclust30 database", - "fa_icon": "fas fa-link" - }, - "uniref90_link": { - "type": "string", - "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", - "description": "Link to the UniRef90 database", - "fa_icon": "fas fa-link" - }, - "pdb_seqres_link": { - "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", - "description": "Link to the PDB SEQRES database", - "fa_icon": "fas fa-link" - }, - "uniprot_sprot_link": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", - "description": "Link to the SwissProt UniProt database", - "fa_icon": "fas fa-link" - }, - "uniprot_trembl_link": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", - "description": "Link to the TrEMBL UniProt database", - "fa_icon": "fas fa-link" - } - } - }, - "alphafold2_dbs_and_parameters_path_options": { - "title": "Alphafold2 DBs and parameters links options", - "type": "object", - "fa_icon": "fas fa-database", - "description": "Parameters used to provide the paths to the DBs and parameters for Alphafold2.", - "properties": { - "bfd_path": { - "type": "string", - "description": "Path to BFD dababase", - "fa_icon": "fas fa-folder-open" - }, - "small_bfd_path": { - "type": "string", - "description": "Path to a reduced version of the BFD database", - "fa_icon": "fas fa-folder-open" - }, - "alphafold2_params_path": { - "type": "string", - "description": "Path to the Alphafold2 parameters", - "fa_icon": "fas fa-folder-open" - }, - "mgnify_path": { - "type": "string", - "description": "Path to the MGnify database", - "fa_icon": "fas fa-folder-open" - }, - "pdb70_path": { - "type": "string", - "description": "Path to the PDB70 database", - "fa_icon": "fas fa-folder-open" - }, - "pdb_mmcif_path": { - "type": "string", - "description": "Path to the PDB mmCIF database", - "fa_icon": "fas fa-folder-open" - }, - "uniref30_alphafold2_path": { - "type": "string", - "description": "Path to the Uniref30 database", - "fa_icon": "fas fa-folder-open" - }, - "uniref90_path": { - "type": "string", - "description": "Path to the UniRef90 database", - "fa_icon": "fas fa-folder-open" - }, - "pdb_seqres_path": { - "type": "string", - "description": "Path to the PDB SEQRES database", - "fa_icon": "fas fa-folder-open" - }, - "uniprot_path": { - "type": "string", - "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", - "fa_icon": "fas fa-folder-open" - } - } - }, - "colabfold_dbs_and_parameters_link_options": { - "title": "Colabfold DBs and parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", - "fa_icon": "fas fa-database", - "properties": { - "colabfold_db_link": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz", - "description": "Link to the Colabfold database", - "fa_icon": "fas fa-link" - }, - "uniref30_colabfold_link": { - "type": "string", - "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz", - "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-link" - }, - "colabfold_alphafold2_params_link": { - "type": "string", - "description": "Link to the Alphafold2 parameters for Colabfold", - "fa_icon": "fas fa-link" - } - } - }, - "colabfold_dbs_and_parameters_path_options": { - "title": "Colabfold DBs and parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", - "fa_icon": "fas fa-database", - "properties": { - "colabfold_db_path": { - "type": "string", - "description": "Link to the Colabfold database", - "fa_icon": "fas fa-folder-open" - }, - "uniref30_colabfold_path": { - "type": "string", - "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-folder-open" - }, - "colabfold_alphafold2_params_path": { - "type": "string", - "description": "Link to the Alphafold2 parameters for Colabfold", - "fa_icon": "fas fa-folder-open" - }, - "colabfold_alphafold2_params_tags": { - "type": "string", - "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream" - } - } - }, - "esmfold_parameters_link_options": { - "title": "Esmfold parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", - "fa_icon": "fas fa-database", - "properties": { - "esmfold_3B_v1": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt", - "description": "Link to the Esmfold 3B-v1 model", - "fa_icon": "fas fa-link" - }, - "esm2_t36_3B_UR50D": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt", - "description": "Link to the Esmfold t36-3B-UR50D model", - "fa_icon": "fas fa-link" - }, - "esm2_t36_3B_UR50D_contact_regression": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt", - "description": "Link to the Esmfold t36-3B-UR50D-contact-regression model", - "fa_icon": "fas fa-link" - } - } - }, - "esmfold_parameters_path_options": { - "title": "Esmfold parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", - "fa_icon": "fas fa-database", - "properties": { - "esmfold_params_path": { - "type": "string", - "description": "Link to the Esmfold parameters", - "fa_icon": "fas fa-folder-open" - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format", - "hidden": true - }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "multiqc_config": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true - }, - "multiqc_logo": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", - "fa_icon": "fas fa-image", - "hidden": true - }, - "multiqc_methods_description": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/alphafold2_options" - }, - { - "$ref": "#/definitions/colabfold_options" - }, - { - "$ref": "#/definitions/esmfold_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/alphafold2_dbs_and_parameters_link_options" - }, - { - "$ref": "#/definitions/alphafold2_dbs_and_parameters_path_options" - }, - { - "$ref": "#/definitions/colabfold_dbs_and_parameters_link_options" - }, - { - "$ref": "#/definitions/colabfold_dbs_and_parameters_path_options" - }, - { - "$ref": "#/definitions/esmfold_parameters_link_options" - }, - { - "$ref": "#/definitions/esmfold_parameters_path_options" - }, - { - "$ref": "#/definitions/generic_options" - } - ] -} +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/proteinfold/master/nextflow_schema.json", + "title": "nf-core/proteinfold pipeline parameters", + "description": "Protein 3D structure prediction pipeline", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Global options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input", "outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "mode": { + "type": "string", + "default": "alphafold2", + "description": "Specifies the mode in which the pipeline will be run", + "fa_icon": "fas fa-cogs" + }, + "use_gpu": { + "type": "boolean", + "description": "Run on CPUs (default) or GPUs", + "fa_icon": "fas fa-microchip" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + } + } + }, + "alphafold2_options": { + "title": "Alphafold2 options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Alphafold2 options.", + "properties": { + "max_template_date": { + "type": "string", + "default": "2020-05-14", + "description": "Maximum date of the PDB templates used by 'AlphaFold2' mode", + "fa_icon": "fas fa-calendar-check" + }, + "alphafold2_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the DB and PARAMS path used by 'AlphaFold2' mode", + "fa_icon": "fas fa-database" + }, + "full_dbs": { + "type": "boolean", + "default": false, + "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", + "fa_icon": "fas fa-battery-full" + }, + "alphafold2_mode": { + "type": "string", + "default": "standard", + "description": "Specifies the mode in which Alphafold2 will be run", + "enum": ["standard", "split_msa_prediction"], + "fa_icon": "fas fa-exchange-alt" + }, + "alphafold2_model_preset": { + "type": "string", + "default": "monomer", + "description": "Model preset for 'AlphaFold2' mode", + "enum": ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], + "fa_icon": "fas fa-stream" + } + } + }, + "colabfold_options": { + "title": "Colabfold options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Colabfold options.", + "properties": { + "colabfold_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the PARAMS and DB path used by 'colabfold' mode", + "fa_icon": "fas fa-folder-open" + }, + "colabfold_server": { + "type": "string", + "default": "webserver", + "description": "Specifies the MSA server used by Colabfold", + "enum": ["webserver", "local"], + "fa_icon": "fas fa-server" + }, + "colabfold_model_preset": { + "type": "string", + "default": "alphafold2_ptm", + "description": "Model preset for 'colabfold' mode", + "enum": [ + "auto", + "alphafold2", + "alphafold2_ptm", + "alphafold2_multimer_v1", + "alphafold2_multimer_v2", + "alphafold2_multimer_v3" + ], + "fa_icon": "fas fa-stream" + }, + "num_recycles_colabfold": { + "type": "integer", + "default": 3, + "description": "Number of recycles for Colabfold", + "fa_icon": "fas fa-recycle" + }, + "use_amber": { + "type": "boolean", + "default": true, + "description": "Use Amber minimization to refine the predicted structures", + "fa_icon": "fas fa-compress-alt" + }, + "db_load_mode": { + "type": "integer", + "default": 0, + "description": "Specify the way that MMSeqs2 will load the required databases in memory", + "fa_icon": "fas fa-download", + "enum": [0, 1, 2, 3] + }, + "host_url": { + "type": "string", + "description": "Specify your custom MMSeqs2 API server url", + "fa_icon": "fas fa-link" + }, + "use_templates": { + "type": "boolean", + "default": true, + "description": "Use PDB templates", + "fa_icon": "fas fa-paste" + }, + "create_colabfold_index": { + "type": "boolean", + "description": "Create databases indexes when running colabfold_local mode", + "fa_icon": "fas fa-bezier-curve" + } + } + }, + "esmfold_options": { + "title": "Esmfold options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Esmfold options.", + "properties": { + "esmfold_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the PARAMS path used by 'esmfold' mode", + "fa_icon": "fas fa-folder-open" + }, + "num_recycles_esmfold": { + "type": "integer", + "default": 4, + "description": "Specifies the number of recycles used by Esmfold", + "fa_icon": "fas fa-server" + }, + "esmfold_model_preset": { + "type": "string", + "description": "Specifies whether is a 'monomer' or 'multimer' prediction", + "enum": ["monomer", "multimer"], + "fa_icon": "fas fa-stream" + } + } + }, + "process_skipping_options": { + "title": "Process skipping options", + "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", + "properties": { + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC.", + "fa_icon": "fas fa-fast-forward" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "alphafold2_dbs_and_parameters_link_options": { + "title": "Alphafold2 DBs and parameters links options", + "type": "object", + "fa_icon": "fas fa-database", + "description": "Parameters used to provide the links to the DBs and parameters public resources to Alphafold2.", + "properties": { + "bfd_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz", + "description": "Link to BFD dababase", + "fa_icon": "fas fa-link" + }, + "small_bfd_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz", + "description": "Link to a reduced version of the BFD dababase", + "fa_icon": "fas fa-link" + }, + "alphafold2_params_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar", + "description": "Link to the Alphafold2 parameters", + "fa_icon": "fas fa-link" + }, + "mgnify_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz", + "description": "Link to the MGnify database", + "fa_icon": "fas fa-link" + }, + "pdb70_link": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz", + "description": "Link to the PDB70 database", + "fa_icon": "fas fa-link" + }, + "pdb_mmcif_link": { + "type": "string", + "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/", + "description": "Link to the PDB mmCIF database", + "fa_icon": "fas fa-link" + }, + "pdb_obsolete_link": { + "type": "string", + "default": "https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat", + "description": "Link to the PDB obsolete database", + "fa_icon": "fas fa-link" + }, + "uniref30_alphafold2_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz", + "description": "Link to the Uniclust30 database", + "fa_icon": "fas fa-link" + }, + "uniref90_link": { + "type": "string", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", + "description": "Link to the UniRef90 database", + "fa_icon": "fas fa-link" + }, + "pdb_seqres_link": { + "type": "string", + "default": "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", + "description": "Link to the PDB SEQRES database", + "fa_icon": "fas fa-link" + }, + "uniprot_sprot_link": { + "type": "string", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", + "description": "Link to the SwissProt UniProt database", + "fa_icon": "fas fa-link" + }, + "uniprot_trembl_link": { + "type": "string", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", + "description": "Link to the TrEMBL UniProt database", + "fa_icon": "fas fa-link" + } + } + }, + "alphafold2_dbs_and_parameters_path_options": { + "title": "Alphafold2 DBs and parameters links options", + "type": "object", + "fa_icon": "fas fa-database", + "description": "Parameters used to provide the paths to the DBs and parameters for Alphafold2.", + "properties": { + "bfd_path": { + "type": "string", + "description": "Path to BFD dababase", + "fa_icon": "fas fa-folder-open" + }, + "small_bfd_path": { + "type": "string", + "description": "Path to a reduced version of the BFD database", + "fa_icon": "fas fa-folder-open" + }, + "alphafold2_params_path": { + "type": "string", + "description": "Path to the Alphafold2 parameters", + "fa_icon": "fas fa-folder-open" + }, + "mgnify_path": { + "type": "string", + "description": "Path to the MGnify database", + "fa_icon": "fas fa-folder-open" + }, + "pdb70_path": { + "type": "string", + "description": "Path to the PDB70 database", + "fa_icon": "fas fa-folder-open" + }, + "pdb_mmcif_path": { + "type": "string", + "description": "Path to the PDB mmCIF database", + "fa_icon": "fas fa-folder-open" + }, + "uniref30_alphafold2_path": { + "type": "string", + "description": "Path to the Uniref30 database", + "fa_icon": "fas fa-folder-open" + }, + "uniref90_path": { + "type": "string", + "description": "Path to the UniRef90 database", + "fa_icon": "fas fa-folder-open" + }, + "pdb_seqres_path": { + "type": "string", + "description": "Path to the PDB SEQRES database", + "fa_icon": "fas fa-folder-open" + }, + "uniprot_path": { + "type": "string", + "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", + "fa_icon": "fas fa-folder-open" + } + } + }, + "colabfold_dbs_and_parameters_link_options": { + "title": "Colabfold DBs and parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", + "fa_icon": "fas fa-database", + "properties": { + "colabfold_db_link": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz", + "description": "Link to the Colabfold database", + "fa_icon": "fas fa-link" + }, + "uniref30_colabfold_link": { + "type": "string", + "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz", + "description": "Link to the UniRef30 database", + "fa_icon": "fas fa-link" + }, + "colabfold_alphafold2_params_link": { + "type": "string", + "description": "Link to the Alphafold2 parameters for Colabfold", + "fa_icon": "fas fa-link" + } + } + }, + "colabfold_dbs_and_parameters_path_options": { + "title": "Colabfold DBs and parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", + "fa_icon": "fas fa-database", + "properties": { + "colabfold_db_path": { + "type": "string", + "description": "Link to the Colabfold database", + "fa_icon": "fas fa-folder-open" + }, + "uniref30_colabfold_path": { + "type": "string", + "description": "Link to the UniRef30 database", + "fa_icon": "fas fa-folder-open" + }, + "colabfold_alphafold2_params_path": { + "type": "string", + "description": "Link to the Alphafold2 parameters for Colabfold", + "fa_icon": "fas fa-folder-open" + }, + "colabfold_alphafold2_params_tags": { + "type": "string", + "description": "Dictionary with Alphafold2 parameters tags", + "fa_icon": "fas fa-stream" + } + } + }, + "esmfold_parameters_link_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_3B_v1": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt", + "description": "Link to the Esmfold 3B-v1 model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt", + "description": "Link to the Esmfold t36-3B-UR50D model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D_contact_regression": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt", + "description": "Link to the Esmfold t36-3B-UR50D-contact-regression model", + "fa_icon": "fas fa-link" + } + } + }, + "esmfold_parameters_path_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_params_path": { + "type": "string", + "description": "Link to the Esmfold parameters", + "fa_icon": "fas fa-folder-open" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "multiqc_config": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "multiqc_logo": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/alphafold2_options" + }, + { + "$ref": "#/definitions/colabfold_options" + }, + { + "$ref": "#/definitions/esmfold_options" + }, + { + "$ref": "#/definitions/process_skipping_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/alphafold2_dbs_and_parameters_link_options" + }, + { + "$ref": "#/definitions/alphafold2_dbs_and_parameters_path_options" + }, + { + "$ref": "#/definitions/colabfold_dbs_and_parameters_link_options" + }, + { + "$ref": "#/definitions/colabfold_dbs_and_parameters_path_options" + }, + { + "$ref": "#/definitions/esmfold_parameters_link_options" + }, + { + "$ref": "#/definitions/esmfold_parameters_path_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/pyproject.toml b/pyproject.toml index 56110621..1f406b5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.ruff] -line-length = 120 -target-version = "py38" -cache-dir = "~/.cache/ruff" - -[tool.ruff.lint] -select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] - -[tool.ruff.lint.isort] -known-first-party = ["nf_core"] - -[tool.ruff.lint.per-file-ignores] -"__init__.py" = ["E402", "F401"] +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.ruff] +line-length = 120 +target-version = "py38" +cache-dir = "~/.cache/ruff" + +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/aria2_uncompress.nf b/subworkflows/local/aria2_uncompress.nf index b2e8a0ca..072089b9 100644 --- a/subworkflows/local/aria2_uncompress.nf +++ b/subworkflows/local/aria2_uncompress.nf @@ -1,30 +1,30 @@ -// -// Download with aria2 and uncompress the data if needed -// - -include { UNTAR } from '../../modules/nf-core/untar/main' -include { GUNZIP } from '../../modules/nf-core/gunzip/main' -include { ARIA2 } from '../../modules/nf-core/aria2/main' - - -workflow ARIA2_UNCOMPRESS { - take: - source_url // url - - main: - ARIA2 ( - source_url - ) - ch_db = Channel.empty() - - if (source_url.toString().endsWith('.tar') || source_url.toString().endsWith('.tar.gz')) { - ch_db = UNTAR ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).untar.map{ it[1] } - } else if (source_url.toString().endsWith('.gz')) { - ch_db = GUNZIP ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).gunzip.map { it[1] } - } - - emit: - db = ch_db // channel: [ db ] - versions = ARIA2.out.versions // channel: [ versions.yml ] -} - +// +// Download with aria2 and uncompress the data if needed +// + +include { UNTAR } from '../../modules/nf-core/untar/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { ARIA2 } from '../../modules/nf-core/aria2/main' + + +workflow ARIA2_UNCOMPRESS { + take: + source_url // url + + main: + ARIA2 ( + source_url + ) + ch_db = Channel.empty() + + if (source_url.toString().endsWith('.tar') || source_url.toString().endsWith('.tar.gz')) { + ch_db = UNTAR ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).untar.map{ it[1] } + } else if (source_url.toString().endsWith('.gz')) { + ch_db = GUNZIP ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).gunzip.map { it[1] } + } + + emit: + db = ch_db // channel: [ db ] + versions = ARIA2.out.versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/prepare_alphafold2_dbs.nf b/subworkflows/local/prepare_alphafold2_dbs.nf index 2a9fb46b..83e55e3f 100644 --- a/subworkflows/local/prepare_alphafold2_dbs.nf +++ b/subworkflows/local/prepare_alphafold2_dbs.nf @@ -1,161 +1,161 @@ -// -// Download all the required AlphaFold 2 databases and parameters -// - -include { - ARIA2_UNCOMPRESS as ARIA2_ALPHAFOLD2_PARAMS - ARIA2_UNCOMPRESS as ARIA2_BFD - ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD - ARIA2_UNCOMPRESS as ARIA2_MGNIFY - ARIA2_UNCOMPRESS as ARIA2_PDB70 - ARIA2_UNCOMPRESS as ARIA2_UNIREF30 - ARIA2_UNCOMPRESS as ARIA2_UNIREF90 - ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT - ARIA2_UNCOMPRESS as ARIA2_UNIPROT_TREMBL } from './aria2_uncompress' - -include { ARIA2 } from '../../modules/nf-core/aria2/main' -include { COMBINE_UNIPROT } from '../../modules/local/combine_uniprot' -include { DOWNLOAD_PDBMMCIF } from '../../modules/local/download_pdbmmcif' - -workflow PREPARE_ALPHAFOLD2_DBS { - - take: - alphafold2_db // directory: path to alphafold2 DBs - full_dbs // boolean: Use full databases (otherwise reduced version) - bfd_path // directory: /path/to/bfd/ - small_bfd_path // directory: /path/to/small_bfd/ - alphafold2_params_path // directory: /path/to/alphafold2/params/ - mgnify_path // directory: /path/to/mgnify/ - pdb70_path // directory: /path/to/pdb70/ - pdb_mmcif_path // directory: /path/to/pdb_mmcif/ - uniref30_alphafold2_path // directory: /path/to/uniref30/alphafold2/ - uniref90_path // directory: /path/to/uniref90/ - pdb_seqres_path // directory: /path/to/pdb_seqres/ - uniprot_path // directory: /path/to/uniprot/ - bfd_link // string: Specifies the link to download bfd - small_bfd_link // string: Specifies the link to download small_bfd - alphafold2_params_link // string: Specifies the link to download alphafold2_params - mgnify_link // string: Specifies the link to download mgnify - pdb70_link // string: Specifies the link to download pdb70 - pdb_mmcif_link // string: Specifies the link to download pdb_mmcif - pdb_obsolete_link // string: Specifies the link to download pdb_obsolete - uniref30_alphafold2_link // string: Specifies the link to download uniref30_alphafold2 - uniref90_link // string: Specifies the link to download uniref90 - pdb_seqres_link // string: Specifies the link to download pdb_seqres - uniprot_sprot_link // string: Specifies the link to download uniprot_sprot - uniprot_trembl_link // string: Specifies the link to download uniprot_trembl - - main: - ch_bfd = Channel.empty() - ch_small_bfd = Channel.empty() - ch_versions = Channel.empty() - - - if (alphafold2_db) { - if (full_dbs) { - ch_bfd = file( bfd_path ) - ch_small_bfd = file( "${projectDir}/assets/dummy_db" ) - } - else { - ch_bfd = file( "${projectDir}/assets/dummy_db" ) - ch_small_bfd = file( small_bfd_path ) - } - - ch_params = file( alphafold2_params_path ) - ch_mgnify = file( mgnify_path ) - ch_pdb70 = file( pdb70_path, type: 'dir' ) - ch_mmcif_files = file( pdb_mmcif_path, type: 'dir' ) - ch_mmcif_obsolete = file( pdb_mmcif_path, type: 'file' ) - ch_mmcif = ch_mmcif_files + ch_mmcif_obsolete - ch_uniref30 = file( uniref30_alphafold2_path, type: 'any' ) - ch_uniref90 = file( uniref90_path ) - ch_pdb_seqres = file( pdb_seqres_path ) - ch_uniprot = file( uniprot_path ) - } - else { - if (full_dbs) { - ARIA2_BFD( - bfd_link - ) - ch_bfd = ARIA2_BFD.out.db - ch_versions = ch_versions.mix(ARIA2_BFD.out.versions) - } else { - ARIA2_SMALL_BFD( - small_bfd_link - ) - ch_small_bfd = ARIA2_SMALL_BFD.out.db - ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) - } - - ARIA2_ALPHAFOLD2_PARAMS( - alphafold2_params_link - ) - ch_params = ARIA2_ALPHAFOLD2_PARAMS.out.db - ch_versions = ch_versions.mix(ARIA2_ALPHAFOLD2_PARAMS.out.versions) - - ARIA2_MGNIFY( - mgnify_link - ) - ch_mgnify = ARIA2_MGNIFY.out.db - ch_versions = ch_versions.mix(ARIA2_MGNIFY.out.versions) - - ARIA2_PDB70( - pdb70_link - ) - ch_pdb70 = ARIA2_PDB70.out.db - ch_versions = ch_versions.mix(ARIA2_PDB70.out.versions) - - DOWNLOAD_PDBMMCIF( - pdb_mmcif_link, - pdb_obsolete_link - ) - ch_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db - ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) - - ARIA2_UNIREF30( - uniref30_alphafold2_link - ) - ch_uniref30 = ARIA2_UNIREF30.out.db - ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) - - ARIA2_UNIREF90( - uniref90_link - ) - ch_uniref90 = ARIA2_UNIREF90.out.db - ch_versions = ch_versions.mix(ARIA2_UNIREF90.out.versions) - - ARIA2 ( - pdb_seqres_link - ) - ch_pdb_seqres = ARIA2.out.downloaded_file - ch_versions = ch_versions.mix(ARIA2.out.versions) - - ARIA2_UNIPROT_SPROT( - uniprot_sprot_link - ) - ch_versions = ch_versions.mix(ARIA2_UNIPROT_SPROT.out.versions) - ARIA2_UNIPROT_TREMBL( - uniprot_trembl_link - ) - ch_versions = ch_versions.mix(ARIA2_UNIPROT_TREMBL.out.versions) - COMBINE_UNIPROT ( - ARIA2_UNIPROT_SPROT.out.db, - ARIA2_UNIPROT_TREMBL.out.db - ) - ch_uniprot = COMBINE_UNIPROT.out.ch_db - ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions) - } - - emit: - bfd = ch_bfd - small_bfd = ch_small_bfd - params = ch_params - mgnify = ch_mgnify - pdb70 = ch_pdb70 - pdb_mmcif = ch_mmcif - uniref30 = ch_uniref30 - uniref90 = ch_uniref90 - pdb_seqres = ch_pdb_seqres - uniprot = ch_uniprot - versions = ch_versions -} +// +// Download all the required AlphaFold 2 databases and parameters +// + +include { + ARIA2_UNCOMPRESS as ARIA2_ALPHAFOLD2_PARAMS + ARIA2_UNCOMPRESS as ARIA2_BFD + ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD + ARIA2_UNCOMPRESS as ARIA2_MGNIFY + ARIA2_UNCOMPRESS as ARIA2_PDB70 + ARIA2_UNCOMPRESS as ARIA2_UNIREF30 + ARIA2_UNCOMPRESS as ARIA2_UNIREF90 + ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT + ARIA2_UNCOMPRESS as ARIA2_UNIPROT_TREMBL } from './aria2_uncompress' + +include { ARIA2 } from '../../modules/nf-core/aria2/main' +include { COMBINE_UNIPROT } from '../../modules/local/combine_uniprot' +include { DOWNLOAD_PDBMMCIF } from '../../modules/local/download_pdbmmcif' + +workflow PREPARE_ALPHAFOLD2_DBS { + + take: + alphafold2_db // directory: path to alphafold2 DBs + full_dbs // boolean: Use full databases (otherwise reduced version) + bfd_path // directory: /path/to/bfd/ + small_bfd_path // directory: /path/to/small_bfd/ + alphafold2_params_path // directory: /path/to/alphafold2/params/ + mgnify_path // directory: /path/to/mgnify/ + pdb70_path // directory: /path/to/pdb70/ + pdb_mmcif_path // directory: /path/to/pdb_mmcif/ + uniref30_alphafold2_path // directory: /path/to/uniref30/alphafold2/ + uniref90_path // directory: /path/to/uniref90/ + pdb_seqres_path // directory: /path/to/pdb_seqres/ + uniprot_path // directory: /path/to/uniprot/ + bfd_link // string: Specifies the link to download bfd + small_bfd_link // string: Specifies the link to download small_bfd + alphafold2_params_link // string: Specifies the link to download alphafold2_params + mgnify_link // string: Specifies the link to download mgnify + pdb70_link // string: Specifies the link to download pdb70 + pdb_mmcif_link // string: Specifies the link to download pdb_mmcif + pdb_obsolete_link // string: Specifies the link to download pdb_obsolete + uniref30_alphafold2_link // string: Specifies the link to download uniref30_alphafold2 + uniref90_link // string: Specifies the link to download uniref90 + pdb_seqres_link // string: Specifies the link to download pdb_seqres + uniprot_sprot_link // string: Specifies the link to download uniprot_sprot + uniprot_trembl_link // string: Specifies the link to download uniprot_trembl + + main: + ch_bfd = Channel.empty() + ch_small_bfd = Channel.empty() + ch_versions = Channel.empty() + + + if (alphafold2_db) { + if (full_dbs) { + ch_bfd = file( bfd_path ) + ch_small_bfd = file( "${projectDir}/assets/dummy_db" ) + } + else { + ch_bfd = file( "${projectDir}/assets/dummy_db" ) + ch_small_bfd = file( small_bfd_path ) + } + + ch_params = file( alphafold2_params_path ) + ch_mgnify = file( mgnify_path ) + ch_pdb70 = file( pdb70_path, type: 'dir' ) + ch_mmcif_files = file( pdb_mmcif_path, type: 'dir' ) + ch_mmcif_obsolete = file( pdb_mmcif_path, type: 'file' ) + ch_mmcif = ch_mmcif_files + ch_mmcif_obsolete + ch_uniref30 = file( uniref30_alphafold2_path, type: 'any' ) + ch_uniref90 = file( uniref90_path ) + ch_pdb_seqres = file( pdb_seqres_path ) + ch_uniprot = file( uniprot_path ) + } + else { + if (full_dbs) { + ARIA2_BFD( + bfd_link + ) + ch_bfd = ARIA2_BFD.out.db + ch_versions = ch_versions.mix(ARIA2_BFD.out.versions) + } else { + ARIA2_SMALL_BFD( + small_bfd_link + ) + ch_small_bfd = ARIA2_SMALL_BFD.out.db + ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) + } + + ARIA2_ALPHAFOLD2_PARAMS( + alphafold2_params_link + ) + ch_params = ARIA2_ALPHAFOLD2_PARAMS.out.db + ch_versions = ch_versions.mix(ARIA2_ALPHAFOLD2_PARAMS.out.versions) + + ARIA2_MGNIFY( + mgnify_link + ) + ch_mgnify = ARIA2_MGNIFY.out.db + ch_versions = ch_versions.mix(ARIA2_MGNIFY.out.versions) + + ARIA2_PDB70( + pdb70_link + ) + ch_pdb70 = ARIA2_PDB70.out.db + ch_versions = ch_versions.mix(ARIA2_PDB70.out.versions) + + DOWNLOAD_PDBMMCIF( + pdb_mmcif_link, + pdb_obsolete_link + ) + ch_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db + ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) + + ARIA2_UNIREF30( + uniref30_alphafold2_link + ) + ch_uniref30 = ARIA2_UNIREF30.out.db + ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) + + ARIA2_UNIREF90( + uniref90_link + ) + ch_uniref90 = ARIA2_UNIREF90.out.db + ch_versions = ch_versions.mix(ARIA2_UNIREF90.out.versions) + + ARIA2 ( + pdb_seqres_link + ) + ch_pdb_seqres = ARIA2.out.downloaded_file + ch_versions = ch_versions.mix(ARIA2.out.versions) + + ARIA2_UNIPROT_SPROT( + uniprot_sprot_link + ) + ch_versions = ch_versions.mix(ARIA2_UNIPROT_SPROT.out.versions) + ARIA2_UNIPROT_TREMBL( + uniprot_trembl_link + ) + ch_versions = ch_versions.mix(ARIA2_UNIPROT_TREMBL.out.versions) + COMBINE_UNIPROT ( + ARIA2_UNIPROT_SPROT.out.db, + ARIA2_UNIPROT_TREMBL.out.db + ) + ch_uniprot = COMBINE_UNIPROT.out.ch_db + ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions) + } + + emit: + bfd = ch_bfd + small_bfd = ch_small_bfd + params = ch_params + mgnify = ch_mgnify + pdb70 = ch_pdb70 + pdb_mmcif = ch_mmcif + uniref30 = ch_uniref30 + uniref90 = ch_uniref90 + pdb_seqres = ch_pdb_seqres + uniprot = ch_uniprot + versions = ch_versions +} diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 5979ceb5..4b1dde0f 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -1,92 +1,92 @@ -// -// Download all the required databases and params by Colabfold -// -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' - -include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_PARAMS } from './aria2_uncompress' -include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_DB } from './aria2_uncompress' -include { ARIA2_UNCOMPRESS as ARIA2_UNIREF30 } from './aria2_uncompress' -include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB } from '../../modules/nf-core/mmseqs/tsv2exprofiledb/main' -include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_UNIPROT30 } from '../../modules/nf-core/mmseqs/tsv2exprofiledb/main' - -workflow PREPARE_COLABFOLD_DBS { - - take: - colabfold_db // directory: path/to/colabfold/DBs and params - colabfold_server // string: Specifies the server to use for colabfold - colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ - colabfold_db_path // directory: /path/to/colabfold/db/ - uniref30_colabfold_path // directory: /path/to/uniref30/colabfold/ - colabfold_alphafold2_params_link // string: Specifies the link to download colabfold alphafold2 params - colabfold_db_link // string: Specifies the link to download colabfold db - uniref30_colabfold_link // string: Specifies the link to download uniref30 - create_colabfold_index // boolean: Create index for colabfold db - - main: - ch_params = Channel.empty() - ch_colabfold_db = Channel.empty() - ch_uniref30 = Channel.empty() - ch_versions = Channel.empty() - - if (colabfold_db) { - ch_params = file( colabfold_alphafold2_params_path, type: 'any' ) - if (colabfold_server == 'local') { - ch_colabfold_db = file( colabfold_db_path, type: 'any' ) - ch_uniref30 = file( uniref30_colabfold_path , type: 'any' ) - } - } - else { - ARIA2_COLABFOLD_PARAMS ( - colabfold_alphafold2_params_link - ) - ch_params = ARIA2_COLABFOLD_PARAMS.out.db - ch_versions = ch_versions.mix(ARIA2_COLABFOLD_PARAMS.out.versions) - - if (params.colabfold_server == 'local') { - ARIA2_COLABFOLD_DB ( - colabfold_db_link - ) - ch_versions = ch_versions.mix(ARIA2_COLABFOLD_DB.out.versions) - - MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB ( - ARIA2_COLABFOLD_DB.out.db - ) - ch_colabfold_db = MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB.out.db_exprofile - ch_versions = ch_versions.mix(MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB.out.versions) - - if (params.create_colabfold_index) { - MMSEQS_CREATEINDEX_COLABFOLDDB ( - MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB.out.db_exprofile - ) - ch_colabfold_db = MMSEQS_CREATEINDEX_COLABFOLDDB.out.db_indexed - ch_versions = ch_versions.mix(MMSEQS_CREATEINDEX_COLABFOLDDB.out.versions) - } - - ARIA2_UNIREF30( - uniref30_colabfold_link - ) - ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) - - MMSEQS_TSV2EXPROFILEDB_UNIPROT30 ( - ARIA2_UNIREF30.out.db - ) - ch_uniref30 = MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile - ch_versions = ch_versions.mix(MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.versions) - - if (create_colabfold_index) { - MMSEQS_CREATEINDEX_UNIPROT30 ( - MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile - ) - ch_uniref30 = MMSEQS_CREATEINDEX_UNIPROT30.out.db_indexed - ch_versions = ch_versions.mix(MMSEQS_CREATEINDEX_UNIPROT30.out.versions) - } - } - } - - emit: - params = ch_params - colabfold_db = ch_colabfold_db - uniref30 = ch_uniref30 - versions = ch_versions -} +// +// Download all the required databases and params by Colabfold +// +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' + +include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_PARAMS } from './aria2_uncompress' +include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_DB } from './aria2_uncompress' +include { ARIA2_UNCOMPRESS as ARIA2_UNIREF30 } from './aria2_uncompress' +include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB } from '../../modules/nf-core/mmseqs/tsv2exprofiledb/main' +include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_UNIPROT30 } from '../../modules/nf-core/mmseqs/tsv2exprofiledb/main' + +workflow PREPARE_COLABFOLD_DBS { + + take: + colabfold_db // directory: path/to/colabfold/DBs and params + colabfold_server // string: Specifies the server to use for colabfold + colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ + colabfold_db_path // directory: /path/to/colabfold/db/ + uniref30_colabfold_path // directory: /path/to/uniref30/colabfold/ + colabfold_alphafold2_params_link // string: Specifies the link to download colabfold alphafold2 params + colabfold_db_link // string: Specifies the link to download colabfold db + uniref30_colabfold_link // string: Specifies the link to download uniref30 + create_colabfold_index // boolean: Create index for colabfold db + + main: + ch_params = Channel.empty() + ch_colabfold_db = Channel.empty() + ch_uniref30 = Channel.empty() + ch_versions = Channel.empty() + + if (colabfold_db) { + ch_params = file( colabfold_alphafold2_params_path, type: 'any' ) + if (colabfold_server == 'local') { + ch_colabfold_db = file( colabfold_db_path, type: 'any' ) + ch_uniref30 = file( uniref30_colabfold_path , type: 'any' ) + } + } + else { + ARIA2_COLABFOLD_PARAMS ( + colabfold_alphafold2_params_link + ) + ch_params = ARIA2_COLABFOLD_PARAMS.out.db + ch_versions = ch_versions.mix(ARIA2_COLABFOLD_PARAMS.out.versions) + + if (params.colabfold_server == 'local') { + ARIA2_COLABFOLD_DB ( + colabfold_db_link + ) + ch_versions = ch_versions.mix(ARIA2_COLABFOLD_DB.out.versions) + + MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB ( + ARIA2_COLABFOLD_DB.out.db + ) + ch_colabfold_db = MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB.out.db_exprofile + ch_versions = ch_versions.mix(MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB.out.versions) + + if (params.create_colabfold_index) { + MMSEQS_CREATEINDEX_COLABFOLDDB ( + MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB.out.db_exprofile + ) + ch_colabfold_db = MMSEQS_CREATEINDEX_COLABFOLDDB.out.db_indexed + ch_versions = ch_versions.mix(MMSEQS_CREATEINDEX_COLABFOLDDB.out.versions) + } + + ARIA2_UNIREF30( + uniref30_colabfold_link + ) + ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) + + MMSEQS_TSV2EXPROFILEDB_UNIPROT30 ( + ARIA2_UNIREF30.out.db + ) + ch_uniref30 = MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile + ch_versions = ch_versions.mix(MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.versions) + + if (create_colabfold_index) { + MMSEQS_CREATEINDEX_UNIPROT30 ( + MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile + ) + ch_uniref30 = MMSEQS_CREATEINDEX_UNIPROT30.out.db_indexed + ch_versions = ch_versions.mix(MMSEQS_CREATEINDEX_UNIPROT30.out.versions) + } + } + } + + emit: + params = ch_params + colabfold_db = ch_colabfold_db + uniref30 = ch_uniref30 + versions = ch_versions +} diff --git a/subworkflows/local/prepare_esmfold_dbs.nf b/subworkflows/local/prepare_esmfold_dbs.nf index 39c39370..a2e248fb 100644 --- a/subworkflows/local/prepare_esmfold_dbs.nf +++ b/subworkflows/local/prepare_esmfold_dbs.nf @@ -1,41 +1,41 @@ -// -// Download all the required Esmfold parameters -// - -include { ARIA2 as ARIA2_ESMFOLD_3B_V1 } from '../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D } from '../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION } from '../../modules/nf-core/aria2/main' - -workflow PREPARE_ESMFOLD_DBS { - - take: - esmfold_db // directory: /path/to/esmfold/db/ - esmfold_params_path // directory: /path/to/esmfold/params/ - esmfold_3B_v1 // string: Specifies the link to download esmfold 3B v1 - esm2_t36_3B_UR50D // string: Specifies the link to download esm2 t36 3B UR50D - esm2_t36_3B_UR50D_contact_regression // string: Specifies the link to download esm2 t36 3B UR50D contact regression - - main: - ch_versions = Channel.empty() - - if (esmfold_db) { - ch_params = file( esmfold_params_path, type: 'file' ) - } - else { - ARIA2_ESMFOLD_3B_V1 ( - esmfold_3B_v1 - ) - ARIA2_ESM2_T36_3B_UR50D ( - esm2_t36_3B_UR50D - ) - ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION ( - esm2_t36_3B_UR50D_contact_regression - ) - ch_params = ARIA2_ESMFOLD_3B_V1.out.downloaded_file.mix(ARIA2_ESM2_T36_3B_UR50D.out.downloaded_file,ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION.out.downloaded_file).collect() - ch_versions = ch_versions.mix(ARIA2_ESMFOLD_3B_V1.out.versions) - } - - emit: - params = ch_params - versions = ch_versions -} +// +// Download all the required Esmfold parameters +// + +include { ARIA2 as ARIA2_ESMFOLD_3B_V1 } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION } from '../../modules/nf-core/aria2/main' + +workflow PREPARE_ESMFOLD_DBS { + + take: + esmfold_db // directory: /path/to/esmfold/db/ + esmfold_params_path // directory: /path/to/esmfold/params/ + esmfold_3B_v1 // string: Specifies the link to download esmfold 3B v1 + esm2_t36_3B_UR50D // string: Specifies the link to download esm2 t36 3B UR50D + esm2_t36_3B_UR50D_contact_regression // string: Specifies the link to download esm2 t36 3B UR50D contact regression + + main: + ch_versions = Channel.empty() + + if (esmfold_db) { + ch_params = file( esmfold_params_path, type: 'file' ) + } + else { + ARIA2_ESMFOLD_3B_V1 ( + esmfold_3B_v1 + ) + ARIA2_ESM2_T36_3B_UR50D ( + esm2_t36_3B_UR50D + ) + ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION ( + esm2_t36_3B_UR50D_contact_regression + ) + ch_params = ARIA2_ESMFOLD_3B_V1.out.downloaded_file.mix(ARIA2_ESM2_T36_3B_UR50D.out.downloaded_file,ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION.out.downloaded_file).collect() + ch_versions = ch_versions.mix(ARIA2_ESMFOLD_3B_V1.out.versions) + } + + emit: + params = ch_params + versions = ch_versions +} diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf index 90b2f395..ca69c33e 100644 --- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf @@ -1,194 +1,206 @@ -// -// Subworkflow with functionality specific to the nf-core/proteinfold pipeline -// - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' -include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' - -/* -======================================================================================== - SUBWORKFLOW TO INITIALISE PIPELINE -======================================================================================== -*/ - -workflow PIPELINE_INITIALISATION { - - take: - version // boolean: Display version and exit - help // boolean: Display help text - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - - main: - // - // Print version and exit if required and dump pipeline parameters to JSON file - // - UTILS_NEXTFLOW_PIPELINE ( - version, - true, - outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 - ) - - // - // Validate parameters and generate parameter summary to stdout - // - pre_help_text = nfCoreLogo(monochrome_logs) - post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFVALIDATION_PLUGIN ( - help, - workflow_command, - pre_help_text, - post_help_text, - validate_params, - "nextflow_schema.json" - ) - - // - // Check config provided to the pipeline - // - UTILS_NFCORE_PIPELINE ( - nextflow_cli_args - ) -} - -/* -======================================================================================== - SUBWORKFLOW FOR PIPELINE COMPLETION -======================================================================================== -*/ - -workflow PIPELINE_COMPLETION { - - take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure - plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published - monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications - multiqc_report // string: Path to MultiQC report - - main: - - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - - // - // Completion email and summary - // - workflow.onComplete { - if (email || email_on_fail) { - completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) - } - - completionSummary(monochrome_logs) - - if (hook_url) { - imNotification(summary_params, hook_url) - } - } -} - -/* -======================================================================================== - FUNCTIONS -======================================================================================== -*/ - -// -// Get link to Colabfold Alphafold2 parameters -// -def getColabfoldAlphafold2Params() { - def link = null - if (params.colabfold_alphafold2_params_tags) { - if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { - link = "https://storage.googleapis.com/alphafold/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + '.tar' - } - } - return link -} - -// -// Get path to Colabfold Alphafold2 parameters -// -def getColabfoldAlphafold2ParamsPath() { - def path = null - params.colabfold_model_preset.toString() - if (params.colabfold_alphafold2_params_tags) { - if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { - path = "${params.colabfold_db}/params/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] - } - } - return path -} - -// -// Generate methods description for MultiQC -// -def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text -} - -def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text -} - -def methodsDescriptionText(mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = workflow.toMap() - meta["manifest_map"] = workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() - def methods_text = mqc_methods_yaml.text - - def engine = new groovy.text.SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html.toString() -} +// +// Subworkflow with functionality specific to the nf-core/proteinfold pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Get link to Colabfold Alphafold2 parameters +// +def getColabfoldAlphafold2Params() { + def link = null + if (params.colabfold_alphafold2_params_tags) { + if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { + link = "https://storage.googleapis.com/alphafold/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + '.tar' + } + } + return link +} + +// +// Get path to Colabfold Alphafold2 parameters +// +def getColabfoldAlphafold2ParamsPath() { + def path = null + params.colabfold_model_preset.toString() + if (params.colabfold_alphafold2_params_tags) { + if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { + path = "${params.colabfold_db}/params/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + } + } + return path +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index ac31f28f..20d52ae2 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -1,126 +1,126 @@ -// -// Subworkflow with functionality that may be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - -/* -======================================================================================== - SUBWORKFLOW DEFINITION -======================================================================================== -*/ - -workflow UTILS_NEXTFLOW_PIPELINE { - - take: - print_version // boolean: print version - dump_parameters // boolean: dump parameters - outdir // path: base directory used to publish pipeline results - check_conda_channels // boolean: check conda channels - - main: - - // - // Print workflow version and exit on --version - // - if (print_version) { - log.info "${workflow.manifest.name} ${getWorkflowVersion()}" - System.exit(0) - } - - // - // Dump pipeline parameters to a JSON file - // - if (dump_parameters && outdir) { - dumpParametersToJSON(outdir) - } - - // - // When running with Conda, warn if channels have not been set-up appropriately - // - if (check_conda_channels) { - checkCondaChannels() - } - - emit: - dummy_emit = true -} - -/* -======================================================================================== - FUNCTIONS -======================================================================================== -*/ - -// -// Generate version string -// -def getWorkflowVersion() { - String version_string = "" - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string -} - -// -// Dump pipeline parameters to a JSON file -// -def dumpParametersToJSON(outdir) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") - temp_pf.delete() -} - -// -// When running with -profile conda, warn if channels have not been set-up appropriately -// -def checkCondaChannels() { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } -} +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml index e5c3a0a8..5d192cd6 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -1,38 +1,38 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "UTILS_NEXTFLOW_PIPELINE" -description: Subworkflow with functionality that may be useful for any Nextflow pipeline -keywords: - - utility - - pipeline - - initialise - - version -components: [] -input: - - print_version: - type: boolean - description: | - Print the version of the pipeline and exit - - dump_parameters: - type: boolean - description: | - Dump the parameters of the pipeline to a JSON file - - output_directory: - type: directory - description: Path to output dir to write JSON file to. - pattern: "results/" - - check_conda_channel: - type: boolean - description: | - Check if the conda channel priority is correct. -output: - - dummy_emit: - type: boolean - description: | - Dummy emit to make nf-core subworkflows lint happy -authors: - - "@adamrtalbot" - - "@drpatelh" -maintainers: - - "@adamrtalbot" - - "@drpatelh" - - "@maxulysse" +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test index 68718e4f..705a63c0 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -1,54 +1,54 @@ - -nextflow_function { - - name "Test Functions" - script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" - config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" - tag 'subworkflows' - tag 'utils_nextflow_pipeline' - tag 'subworkflows/utils_nextflow_pipeline' - - test("Test Function getWorkflowVersion") { - - function "getWorkflowVersion" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function dumpParametersToJSON") { - - function "dumpParametersToJSON" - - when { - function { - """ - // define inputs of the function here. Example: - input[0] = "$outputDir" - """.stripIndent() - } - } - - then { - assertAll( - { assert function.success } - ) - } - } - - test("Test Function checkCondaChannels") { - - function "checkCondaChannels" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } -} + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap index e3f0baf4..954db08d 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -1,20 +1,20 @@ -{ - "Test Function getWorkflowVersion": { - "content": [ - "v9.9.9" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:02:05.308243" - }, - "Test Function checkCondaChannels": { - "content": null, - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:02:12.425833" - } +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } } \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test index ca964ce8..415d37b1 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -1,111 +1,111 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NEXTFLOW_PIPELINE" - script "../main.nf" - config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" - workflow "UTILS_NEXTFLOW_PIPELINE" - tag 'subworkflows' - tag 'utils_nextflow_pipeline' - tag 'subworkflows/utils_nextflow_pipeline' - - test("Should run no inputs") { - - when { - workflow { - """ - print_version = false - dump_parameters = false - outdir = null - check_conda_channels = false - - input[0] = print_version - input[1] = dump_parameters - input[2] = outdir - input[3] = check_conda_channels - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should print version") { - - when { - workflow { - """ - print_version = true - dump_parameters = false - outdir = null - check_conda_channels = false - - input[0] = print_version - input[1] = dump_parameters - input[2] = outdir - input[3] = check_conda_channels - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } - ) - } - } - - test("Should dump params") { - - when { - workflow { - """ - print_version = false - dump_parameters = true - outdir = 'results' - check_conda_channels = false - - input[0] = false - input[1] = true - input[2] = outdir - input[3] = false - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should not create params JSON if no output directory") { - - when { - workflow { - """ - print_version = false - dump_parameters = true - outdir = null - check_conda_channels = false - - input[0] = false - input[1] = true - input[2] = outdir - input[3] = false - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } -} +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config index d0a926bf..7025bc40 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -1,9 +1,9 @@ -manifest { - name = 'nextflow_workflow' - author = """nf-core""" - homePage = 'https://127.0.0.1' - description = """Dummy pipeline""" - nextflowVersion = '!>=23.04.0' - version = '9.9.9' - doi = 'https://doi.org/10.5281/zenodo.5070524' -} +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml index f8476112..0ed583d2 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -1,2 +1,2 @@ -subworkflows/utils_nextflow_pipeline: - - subworkflows/nf-core/utils_nextflow_pipeline/** +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..1c33aa1f 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -1,440 +1,440 @@ -// -// Subworkflow with utility functions specific to the nf-core pipeline template -// - -import org.yaml.snakeyaml.Yaml -import nextflow.extension.FilesEx - -/* -======================================================================================== - SUBWORKFLOW DEFINITION -======================================================================================== -*/ - -workflow UTILS_NFCORE_PIPELINE { - - take: - nextflow_cli_args - - main: - valid_config = checkConfigProvided() - checkProfileProvided(nextflow_cli_args) - - emit: - valid_config -} - -/* -======================================================================================== - FUNCTIONS -======================================================================================== -*/ - -// -// Warn if a -profile or Nextflow config has not been provided to run the pipeline -// -def checkConfigProvided() { - valid_config = true - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - valid_config = false - } - return valid_config -} - -// -// Exit pipeline if --profile contains spaces -// -def checkProfileProvided(nextflow_cli_args) { - if (workflow.profile.endsWith(',')) { - error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + - "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" - } - if (nextflow_cli_args[0]) { - log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + - "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" - } -} - -// -// Citation string for pipeline -// -def workflowCitation() { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" -} - -// -// Generate workflow version string -// -def getWorkflowVersion() { - String version_string = "" - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string -} - -// -// Get software versions for pipeline -// -def processVersionsFromYAML(yaml_file) { - Yaml yaml = new Yaml() - versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } - return yaml.dumpAsMap(versions).trim() -} - -// -// Get workflow version for pipeline -// -def workflowVersionToYAML() { - return """ - Workflow: - $workflow.manifest.name: ${getWorkflowVersion()} - Nextflow: $workflow.nextflow.version - """.stripIndent().trim() -} - -// -// Get channel of software versions used in pipeline in YAML format -// -def softwareVersionsToYAML(ch_versions) { - return ch_versions - .unique() - .map { processVersionsFromYAML(it) } - .unique() - .mix(Channel.of(workflowVersionToYAML())) -} - -// -// Get workflow summary for MultiQC -// -def paramsSummaryMultiqc(summary_params) { - def summary_section = '' - for (group in summary_params.keySet()) { - def group_params = summary_params.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

    $group

    \n" - summary_section += "
    \n" - for (param in group_params.keySet()) { - summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" - } - summary_section += "
    \n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - - return yaml_file_text -} - -// -// nf-core logo -// -def nfCoreLogo(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) -} - -// -// Return dashed line -// -def dashedLine(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" -} - -// -// ANSII colours used for terminal logging -// -def logColours(monochrome_logs=true) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes -} - -// -// Attach the multiqc report to email -// -def attachMultiqcReport(multiqc_report) { - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - return mqc_report -} - -// -// Construct and send completion email -// -def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = getWorkflowVersion() - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = attachMultiqcReport(multiqc_report) - - // Check if we are only sending emails on failure - def email_address = email - if (!email && email_on_fail && !workflow.success) { - email_address = email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("${workflow.projectDir}/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("${workflow.projectDir}/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(monochrome_logs) - if (email_address) { - try { - if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() -} - -// -// Print pipeline summary on completion -// -def completionSummary(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } -} - -// -// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack -// -def imNotification(summary_params, hook_url) { - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = getWorkflowVersion() - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("${workflow.projectDir}/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } -} +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml index d08d2434..735224a0 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -1,24 +1,24 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "UTILS_NFCORE_PIPELINE" -description: Subworkflow with utility functions specific to the nf-core pipeline template -keywords: - - utility - - pipeline - - initialise - - version -components: [] -input: - - nextflow_cli_args: - type: list - description: | - Nextflow CLI positional arguments -output: - - success: - type: boolean - description: | - Dummy output to indicate success -authors: - - "@adamrtalbot" -maintainers: - - "@adamrtalbot" - - "@maxulysse" +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test index 1dc317f8..c22adcb6 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -1,134 +1,134 @@ - -nextflow_function { - - name "Test Functions" - script "../main.nf" - config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "utils_nfcore_pipeline" - tag "subworkflows/utils_nfcore_pipeline" - - test("Test Function checkConfigProvided") { - - function "checkConfigProvided" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function checkProfileProvided") { - - function "checkProfileProvided" - - when { - function { - """ - input[0] = [] - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function workflowCitation") { - - function "workflowCitation" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function nfCoreLogo") { - - function "nfCoreLogo" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function dashedLine") { - - function "dashedLine" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function without logColours") { - - function "logColours" - - when { - function { - """ - input[0] = true - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function with logColours") { - function "logColours" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } -} + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap index 1037232c..e1843841 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -1,166 +1,166 @@ -{ - "Test Function checkProfileProvided": { - "content": null, - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:03.360873" - }, - "Test Function checkConfigProvided": { - "content": [ - true - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:02:59.729647" - }, - "Test Function nfCoreLogo": { - "content": [ - "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:10.562934" - }, - "Test Function workflowCitation": { - "content": [ - "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:07.019761" - }, - "Test Function without logColours": { - "content": [ - { - "reset": "", - "bold": "", - "dim": "", - "underlined": "", - "blink": "", - "reverse": "", - "hidden": "", - "black": "", - "red": "", - "green": "", - "yellow": "", - "blue": "", - "purple": "", - "cyan": "", - "white": "", - "bblack": "", - "bred": "", - "bgreen": "", - "byellow": "", - "bblue": "", - "bpurple": "", - "bcyan": "", - "bwhite": "", - "ublack": "", - "ured": "", - "ugreen": "", - "uyellow": "", - "ublue": "", - "upurple": "", - "ucyan": "", - "uwhite": "", - "iblack": "", - "ired": "", - "igreen": "", - "iyellow": "", - "iblue": "", - "ipurple": "", - "icyan": "", - "iwhite": "", - "biblack": "", - "bired": "", - "bigreen": "", - "biyellow": "", - "biblue": "", - "bipurple": "", - "bicyan": "", - "biwhite": "" - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:17.969323" - }, - "Test Function dashedLine": { - "content": [ - "-\u001b[2m----------------------------------------------------\u001b[0m-" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:14.366181" - }, - "Test Function with logColours": { - "content": [ - { - "reset": "\u001b[0m", - "bold": "\u001b[1m", - "dim": "\u001b[2m", - "underlined": "\u001b[4m", - "blink": "\u001b[5m", - "reverse": "\u001b[7m", - "hidden": "\u001b[8m", - "black": "\u001b[0;30m", - "red": "\u001b[0;31m", - "green": "\u001b[0;32m", - "yellow": "\u001b[0;33m", - "blue": "\u001b[0;34m", - "purple": "\u001b[0;35m", - "cyan": "\u001b[0;36m", - "white": "\u001b[0;37m", - "bblack": "\u001b[1;30m", - "bred": "\u001b[1;31m", - "bgreen": "\u001b[1;32m", - "byellow": "\u001b[1;33m", - "bblue": "\u001b[1;34m", - "bpurple": "\u001b[1;35m", - "bcyan": "\u001b[1;36m", - "bwhite": "\u001b[1;37m", - "ublack": "\u001b[4;30m", - "ured": "\u001b[4;31m", - "ugreen": "\u001b[4;32m", - "uyellow": "\u001b[4;33m", - "ublue": "\u001b[4;34m", - "upurple": "\u001b[4;35m", - "ucyan": "\u001b[4;36m", - "uwhite": "\u001b[4;37m", - "iblack": "\u001b[0;90m", - "ired": "\u001b[0;91m", - "igreen": "\u001b[0;92m", - "iyellow": "\u001b[0;93m", - "iblue": "\u001b[0;94m", - "ipurple": "\u001b[0;95m", - "icyan": "\u001b[0;96m", - "iwhite": "\u001b[0;97m", - "biblack": "\u001b[1;90m", - "bired": "\u001b[1;91m", - "bigreen": "\u001b[1;92m", - "biyellow": "\u001b[1;93m", - "biblue": "\u001b[1;94m", - "bipurple": "\u001b[1;95m", - "bicyan": "\u001b[1;96m", - "biwhite": "\u001b[1;97m" - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:21.714424" - } +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } } \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test index 8940d32d..6d62ff3c 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -1,29 +1,29 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NFCORE_PIPELINE" - script "../main.nf" - config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" - workflow "UTILS_NFCORE_PIPELINE" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "utils_nfcore_pipeline" - tag "subworkflows/utils_nfcore_pipeline" - - test("Should run without failures") { - - when { - workflow { - """ - input[0] = [] - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot(workflow.out).match() } - ) - } - } -} +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap index 859d1030..f055845e 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -1,19 +1,19 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - true - ], - "valid_config": [ - true - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:25.726491" - } +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } } \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config index d0a926bf..7025bc40 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -1,9 +1,9 @@ -manifest { - name = 'nextflow_workflow' - author = """nf-core""" - homePage = 'https://127.0.0.1' - description = """Dummy pipeline""" - nextflowVersion = '!>=23.04.0' - version = '9.9.9' - doi = 'https://doi.org/10.5281/zenodo.5070524' -} +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml index ac8523c9..b1c7127f 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -1,2 +1,2 @@ -subworkflows/utils_nfcore_pipeline: - - subworkflows/nf-core/utils_nfcore_pipeline/** +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf index 2585b65d..49bb879b 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -1,62 +1,62 @@ -// -// Subworkflow that uses the nf-validation plugin to render help text and parameter summary -// - -/* -======================================================================================== - IMPORT NF-VALIDATION PLUGIN -======================================================================================== -*/ - -include { paramsHelp } from 'plugin/nf-validation' -include { paramsSummaryLog } from 'plugin/nf-validation' -include { validateParameters } from 'plugin/nf-validation' - -/* -======================================================================================== - SUBWORKFLOW DEFINITION -======================================================================================== -*/ - -workflow UTILS_NFVALIDATION_PLUGIN { - - take: - print_help // boolean: print help - workflow_command // string: default commmand used to run pipeline - pre_help_text // string: string to be printed before help text and summary log - post_help_text // string: string to be printed after help text and summary log - validate_params // boolean: validate parameters - schema_filename // path: JSON schema file, null to use default value - - main: - - log.debug "Using schema file: ${schema_filename}" - - // Default values for strings - pre_help_text = pre_help_text ?: '' - post_help_text = post_help_text ?: '' - workflow_command = workflow_command ?: '' - - // - // Print help message if needed - // - if (print_help) { - log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text - System.exit(0) - } - - // - // Print parameter summary to stdout - // - log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text - - // - // Validate parameters relative to the parameter JSON schema - // - if (validate_params){ - validateParameters(parameters_schema: schema_filename) - } - - emit: - dummy_emit = true -} +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml index 3d4a6b04..3f9ac684 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -1,44 +1,44 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "UTILS_NFVALIDATION_PLUGIN" -description: Use nf-validation to initiate and validate a pipeline -keywords: - - utility - - pipeline - - initialise - - validation -components: [] -input: - - print_help: - type: boolean - description: | - Print help message and exit - - workflow_command: - type: string - description: | - The command to run the workflow e.g. "nextflow run main.nf" - - pre_help_text: - type: string - description: | - Text to print before the help message - - post_help_text: - type: string - description: | - Text to print after the help message - - validate_params: - type: boolean - description: | - Validate the parameters and error if invalid. - - schema_filename: - type: string - description: | - The filename of the schema to validate against. -output: - - dummy_emit: - type: boolean - description: | - Dummy emit to make nf-core subworkflows lint happy -authors: - - "@adamrtalbot" -maintainers: - - "@adamrtalbot" - - "@maxulysse" +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test index 5784a33f..39561d0a 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -1,200 +1,200 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NFVALIDATION_PLUGIN" - script "../main.nf" - workflow "UTILS_NFVALIDATION_PLUGIN" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "plugin/nf-validation" - tag "'plugin/nf-validation'" - tag "utils_nfvalidation_plugin" - tag "subworkflows/utils_nfvalidation_plugin" - - test("Should run nothing") { - - when { - - params { - monochrome_logs = true - test_data = '' - } - - workflow { - """ - help = false - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should run help") { - - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } } - ) - } - } - - test("Should run help with command") { - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = "nextflow run noorg/doesntexist" - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } } - ) - } - } - - test("Should run help with extra text") { - - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = "nextflow run noorg/doesntexist" - pre_help_text = "pre-help-text" - post_help_text = "post-help-text" - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('pre-help-text') } }, - { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } }, - { assert workflow.stdout.any { it.contains('post-help-text') } } - ) - } - } - - test("Should validate params") { - - when { - - params { - monochrome_logs = true - test_data = '' - outdir = 1 - } - workflow { - """ - help = false - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = true - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.failed }, - { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } - ) - } - } -} +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json index 7626c1c9..2384acc5 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -1,96 +1,96 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", - "title": ". pipeline parameters", - "description": "", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["outdir"], - "properties": { - "validate_params": { - "type": "boolean", - "description": "Validate parameters?", - "default": true, - "hidden": true - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "test_data_base": { - "type": "string", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", - "description": "Base for test data directory", - "hidden": true - }, - "test_data": { - "type": "string", - "description": "Fake test data param", - "hidden": true - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "logo": { - "type": "boolean", - "default": true, - "description": "Display nf-core logo in console output.", - "fa_icon": "fas fa-image", - "hidden": true - }, - "singularity_pull_docker_container": { - "type": "boolean", - "description": "Pull Singularity container from Docker?", - "hidden": true - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Use monochrome_logs", - "hidden": true - } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/generic_options" - } - ] -} +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml index 60b1cfff..9d2972c2 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -1,2 +1,2 @@ -subworkflows/utils_nfvalidation_plugin: - - subworkflows/nf-core/utils_nfvalidation_plugin/** +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tower.yml b/tower.yml old mode 100644 new mode 100755 index 787aedfe..a068cb79 --- a/tower.yml +++ b/tower.yml @@ -1,5 +1,8 @@ -reports: - multiqc_report.html: - display: "MultiQC HTML report" - samplesheet.csv: - display: "Auto-created samplesheet with collated metadata and FASTQ paths" +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" + "*_report.html": + display: "Predected structures" + \ No newline at end of file diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index c85e672b..aa0bd540 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -1,183 +1,232 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Loaded from modules/local/ -// -include { RUN_ALPHAFOLD2 } from '../modules/local/run_alphafold2' -include { RUN_ALPHAFOLD2_MSA } from '../modules/local/run_alphafold2_msa' -include { RUN_ALPHAFOLD2_PRED } from '../modules/local/run_alphafold2_pred' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { MULTIQC } from '../modules/nf-core/multiqc/main' - -// -// SUBWORKFLOW: Consisting entirely of nf-core/modules -// -include { paramsSummaryMap } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow ALPHAFOLD2 { - - take: - ch_versions // channel: [ path(versions.yml) ] - full_dbs // boolean: Use full databases (otherwise reduced version) - alphafold2_mode // string: Mode to run Alphafold2 in - alphafold2_model_preset // string: Specifies the model preset to use for Alphafold2 - ch_alphafold2_params // channel: path(alphafold2_params) - ch_bfd // channel: path(bfd) - ch_small_bfd // channel: path(small_bfd) - ch_mgnify // channel: path(mgnify) - ch_pdb70 // channel: path(pdb70) - ch_pdb_mmcif // channel: path(pdb_mmcif) - ch_uniref30 // channel: path(uniref30) - ch_uniref90 // channel: path(uniref90) - ch_pdb_seqres // channel: path(pdb_seqres) - ch_uniprot // channel: path(uniprot) - - main: - ch_multiqc_files = Channel.empty() - - // - // Create input channel from input file provided through params.input - // - Channel - .fromSamplesheet("input") - .set { ch_fasta } - - if (alphafold2_model_preset != 'multimer') { - ch_fasta - .map { - meta, fasta -> - [ meta, fasta.splitFasta(file:true) ] - } - .transpose() - .set { ch_fasta } - } - - if (alphafold2_mode == 'standard') { - // - // SUBWORKFLOW: Run Alphafold2 standard mode - // - RUN_ALPHAFOLD2 ( - ch_fasta, - full_dbs, - alphafold2_model_preset, - ch_alphafold2_params, - ch_bfd, - ch_small_bfd, - ch_mgnify, - ch_pdb70, - ch_pdb_mmcif, - ch_uniref30, - ch_uniref90, - ch_pdb_seqres, - ch_uniprot - ) - ch_multiqc_rep = RUN_ALPHAFOLD2.out.multiqc.collect() - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) - - } else if (alphafold2_mode == 'split_msa_prediction') { - // - // SUBWORKFLOW: Run Alphafold2 split mode, MSA and predicition - // - RUN_ALPHAFOLD2_MSA ( - ch_fasta, - full_dbs, - alphafold2_model_preset, - ch_alphafold2_params, - ch_bfd, - ch_small_bfd, - ch_mgnify, - ch_pdb70, - ch_pdb_mmcif, - ch_uniref30, - ch_uniref90, - ch_pdb_seqres, - ch_uniprot - ) - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_MSA.out.versions) - - RUN_ALPHAFOLD2_PRED ( - ch_fasta, - full_dbs, - alphafold2_model_preset, - ch_alphafold2_params, - ch_bfd, - ch_small_bfd, - ch_mgnify, - ch_pdb70, - ch_pdb_mmcif, - ch_uniref30, - ch_uniref90, - ch_pdb_seqres, - ch_uniprot, - RUN_ALPHAFOLD2_MSA.out.features - ) - ch_multiqc_rep = RUN_ALPHAFOLD2_PRED.out.multiqc.collect() - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) - } - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } - - // - // MODULE: MultiQC - // - ch_multiqc_report = Channel.empty() - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - ch_multiqc_report = MULTIQC.out.report.toList() - - emit: - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { RUN_ALPHAFOLD2 } from '../modules/local/run_alphafold2' +include { RUN_ALPHAFOLD2_MSA } from '../modules/local/run_alphafold2_msa' +include { RUN_ALPHAFOLD2_PRED } from '../modules/local/run_alphafold2_pred' +include { EXTRACT_OUTPUTS } from '../modules/local/extract_outputs' +include { GENERATE_REPORT } from '../modules/local/generat_report' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow ALPHAFOLD2 { + + take: + full_dbs // boolean: Use full databases (otherwise reduced version) + alphafold2_mode // string: Mode to run Alphafold2 in + alphafold2_model_preset // string: Specifies the model preset to use for Alphafold2 + ch_alphafold2_params // channel: path(alphafold2_params) + ch_bfd // channel: path(bfd) + ch_small_bfd // channel: path(small_bfd) + ch_mgnify // channel: path(mgnify) + ch_pdb70 // channel: path(pdb70) + ch_pdb_mmcif // channel: path(pdb_mmcif) + ch_uniref30 // channel: path(uniref30) + ch_uniref90 // channel: path(uniref90) + ch_pdb_seqres // channel: path(pdb_seqres) + ch_uniprot // channel: path(uniprot) + + main: + ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + + // + // Create input channel from input file provided through params.input + // + + Channel + .fromSamplesheet("input") + .set { ch_fasta } + + if (alphafold2_model_preset != 'multimer') { + ch_fasta + .map { + meta, fasta -> + [ meta, fasta.splitFasta(file:true) ] + } + .transpose() + .set { ch_fasta } + } + ch_alphafold_outputs = Channel.empty() + + if (alphafold2_mode == 'standard') { + // + // SUBWORKFLOW: Run Alphafold2 standard mode + // + //full_dbs.view() + //ch_alphafold2_params.view() + + RUN_ALPHAFOLD2 ( + ch_fasta, + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot + ) + RUN_ALPHAFOLD2.out.af_out_tsv + .map{[it[0], it[1].findAll{ it.getName().contains("_lddt_")}]} + .set{ch_af_out_lddt} + + RUN_ALPHAFOLD2.out.af_out_tsv + .map{[it[0], it[1].findAll{ it.getName().contains("_msa.tsv")}]} + .set{ch_af_out_msa} + + RUN_ALPHAFOLD2.out.af_out_pdb.set{ch_af_out_pdb} + + ch_multiqc_rep = RUN_ALPHAFOLD2.out.multiqc.collect() + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) + + + } else if (alphafold2_mode == 'split_msa_prediction') { + // + // SUBWORKFLOW: Run Alphafold2 split mode, MSA and predicition + // + RUN_ALPHAFOLD2_MSA ( + ch_fasta, + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot + ) + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_MSA.out.versions) + + RUN_ALPHAFOLD2_MSA.out.features + .map{[it[0].id, it[0], it[1]]} + .join( + ch_fasta + .map{[it[0].id, it[0], it[1]]} + ) + .set{ch_af_all} + + RUN_ALPHAFOLD2_PRED ( + ch_af_all.map{[it[3], it[4]]}, + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot, + ch_af_all.map{it[2]} + ) + ch_multiqc_rep = RUN_ALPHAFOLD2_PRED.out.multiqc.map{1}.collect() + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) + RUN_ALPHAFOLD2_PRED.out.af_out_tsv + .map{[it[0], it[1].findAll{ it.getName().contains("_lddt_")}]} + .set{ch_af_out_lddt} + + RUN_ALPHAFOLD2_PRED.out.af_out_tsv + .map{[it[0], it[1].findAll{ it.getName().contains("_msa.tsv")}]} + .set{ch_af_out_msa} + + RUN_ALPHAFOLD2_PRED.out.af_out_pdb.set{ch_af_out_pdb} + + } + + GENERATE_REPORT( + ch_af_out_msa, + ch_af_out_lddt, + ch_af_out_pdb, + Channel.fromPath("$projectDir/assets/proteinfold_template.html").first(), + Channel.value("ALPHAFOLD2") + ) + + + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() + } + + emit: + pdb = ch_af_out_pdb + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index dd38fd0f..345b0623 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -1,172 +1,175 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Loaded from modules/local/ -// -include { COLABFOLD_BATCH } from '../modules/local/colabfold_batch' -include { MMSEQS_COLABFOLDSEARCH } from '../modules/local/mmseqs_colabfoldsearch' -include { MULTIFASTA_TO_CSV } from '../modules/local/multifasta_to_csv' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { MULTIQC } from '../modules/nf-core/multiqc/main' - -// -// SUBWORKFLOW: Consisting entirely of nf-core/modules -// -include { paramsSummaryMap } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow COLABFOLD { - - take: - ch_versions // channel: [ path(versions.yml) ] - colabfold_model_preset // string: Specifies the model preset to use for colabfold - ch_colabfold_params // channel: path(colabfold_params) - ch_colabfold_db // channel: path(colabfold_db) - ch_uniref30 // channel: path(uniref30) - num_recycle // int: Number of recycles for esmfold - - main: - ch_multiqc_files = Channel.empty() - - // - // Create input channel from input file provided through params.input - // - Channel - .fromSamplesheet("input") - .set { ch_fasta } - - if (params.colabfold_server == 'webserver') { - // - // MODULE: Run colabfold - // - if (params.colabfold_model_preset != 'alphafold2_ptm' && params.colabfold_model_preset != 'alphafold2') { - MULTIFASTA_TO_CSV( - ch_fasta - ) - ch_versions = ch_versions.mix(MULTIFASTA_TO_CSV.out.versions) - COLABFOLD_BATCH( - MULTIFASTA_TO_CSV.out.input_csv, - colabfold_model_preset, - ch_colabfold_params, - [], - [], - num_recycle - ) - ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) - } else { - COLABFOLD_BATCH( - ch_fasta, - colabfold_model_preset, - ch_colabfold_params, - [], - [], - num_recycle - ) - ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) - } - - } else if (params.colabfold_server == 'local') { - // - // MODULE: Run mmseqs - // - if (params.colabfold_model_preset != 'AlphaFold2-ptm') { - MULTIFASTA_TO_CSV( - ch_fasta - ) - ch_versions = ch_versions.mix(MULTIFASTA_TO_CSV.out.versions) - MMSEQS_COLABFOLDSEARCH ( - MULTIFASTA_TO_CSV.out.input_csv, - ch_colabfold_params, - ch_colabfold_db, - ch_uniref30 - ) - ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) - } else { - MMSEQS_COLABFOLDSEARCH ( - ch_fasta, - ch_colabfold_params, - ch_colabfold_db, - ch_uniref30 - ) - ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) - } - - // - // MODULE: Run colabfold - // - COLABFOLD_BATCH( - MMSEQS_COLABFOLDSEARCH.out.a3m, - colabfold_model_preset, - ch_colabfold_params, - ch_colabfold_db, - ch_uniref30, - num_recycle - ) - ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) - } - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } - - // - // MODULE: MultiQC - // - ch_multiqc_report = Channel.empty() - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(COLABFOLD_BATCH.out.multiqc.collect()) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - ch_multiqc_report = MULTIQC.out.report.toList() - - emit: - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { COLABFOLD_BATCH } from '../modules/local/colabfold_batch' +include { MMSEQS_COLABFOLDSEARCH } from '../modules/local/mmseqs_colabfoldsearch' +include { MULTIFASTA_TO_CSV } from '../modules/local/multifasta_to_csv' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow COLABFOLD { + + take: + ch_versions // channel: [ path(versions.yml) ] + colabfold_model_preset // string: Specifies the model preset to use for colabfold + ch_colabfold_params // channel: path(colabfold_params) + ch_colabfold_db // channel: path(colabfold_db) + ch_uniref30 // channel: path(uniref30) + num_recycles // int: Number of recycles for esmfold + + main: + ch_multiqc_files = Channel.empty() + + // + // Create input channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .set { ch_fasta } + ch_fasta.view() + if (params.colabfold_server == 'webserver') { + // + // MODULE: Run colabfold + // + if (params.colabfold_model_preset != 'alphafold2_ptm' && params.colabfold_model_preset != 'alphafold2') { + MULTIFASTA_TO_CSV( + ch_fasta + ) + ch_versions = ch_versions.mix(MULTIFASTA_TO_CSV.out.versions) + COLABFOLD_BATCH( + MULTIFASTA_TO_CSV.out.input_csv, + colabfold_model_preset, + ch_colabfold_params, + [], + [], + num_recycles + ) + ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) + } else { + COLABFOLD_BATCH( + ch_fasta, + colabfold_model_preset, + ch_colabfold_params, + [], + [], + num_recycles + ) + ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) + } + + } else if (params.colabfold_server == 'local') { + // + // MODULE: Run mmseqs + // + if (params.colabfold_model_preset != 'alphafold2_ptm') { + MULTIFASTA_TO_CSV( + ch_fasta + ) + ch_versions = ch_versions.mix(MULTIFASTA_TO_CSV.out.versions) + MMSEQS_COLABFOLDSEARCH ( + MULTIFASTA_TO_CSV.out.input_csv, + ch_colabfold_params, + ch_colabfold_db, + ch_uniref30 + ) + ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) + } else { + MMSEQS_COLABFOLDSEARCH ( + ch_fasta, + ch_colabfold_params, + ch_colabfold_db, + ch_uniref30 + ) + ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) + } + + // + // MODULE: Run colabfold + // + COLABFOLD_BATCH( + MMSEQS_COLABFOLDSEARCH.out.a3m, + colabfold_model_preset, + ch_colabfold_params, + ch_colabfold_db, + ch_uniref30, + num_recycles + ) + ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) + } + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(COLABFOLD_BATCH.out.multiqc.collect()) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() + } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf index 4bf7e2a4..68aafede 100644 --- a/workflows/esmfold.nf +++ b/workflows/esmfold.nf @@ -1,120 +1,143 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Loaded from modules/local/ -// -include { RUN_ESMFOLD } from '../modules/local/run_esmfold' -include { MULTIFASTA_TO_SINGLEFASTA } from '../modules/local/multifasta_to_singlefasta' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { MULTIQC } from '../modules/nf-core/multiqc/main' - -// -// SUBWORKFLOW: Consisting entirely of nf-core/modules -// -include { paramsSummaryMap } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow ESMFOLD { - - take: - ch_versions // channel: [ path(versions.yml) ] - ch_esmfold_params // directory: /path/to/esmfold/params/ - ch_num_recycle // int: Number of recycles for esmfold - - main: - ch_multiqc_files = Channel.empty() - - // - // Create input channel from input file provided through params.input - // - Channel - .fromSamplesheet("input") - .set { ch_fasta } - - // - // MODULE: Run esmfold - // - if (params.esmfold_model_preset != 'monomer') { - MULTIFASTA_TO_SINGLEFASTA( - ch_fasta - ) - ch_versions = ch_versions.mix(MULTIFASTA_TO_SINGLEFASTA.out.versions) - RUN_ESMFOLD( - MULTIFASTA_TO_SINGLEFASTA.out.input_fasta, - ch_esmfold_params, - ch_num_recycle - ) - ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) - } else { - RUN_ESMFOLD( - ch_fasta, - ch_esmfold_params, - ch_num_recycle - ) - ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) - } - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } - - // - // MODULE: MultiQC - // - ch_multiqc_report = Channel.empty() - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_methods_description)) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(RUN_ESMFOLD.out.multiqc.collect()) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - ch_multiqc_report = MULTIQC.out.report.toList() - emit: - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { RUN_ESMFOLD } from '../modules/local/run_esmfold' +include { MULTIFASTA_TO_SINGLEFASTA } from '../modules/local/multifasta_to_singlefasta' +include { GENERATE_REPORT } from '../modules/local/generat_report' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' + + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow ESMFOLD { + + take: + ch_versions // channel: [ path(versions.yml) ] + ch_esmfold_params // directory: /path/to/esmfold/params/ + ch_num_recycles // int: Number of recycles for esmfold + + main: + ch_multiqc_files = Channel.empty() + + // + // Create input channel from input file provided through params.input + // + ch_esmfold_params.view() + Channel + .fromSamplesheet("input") + .set { ch_fasta } + + // + // MODULE: Run esmfold + // + if (params.esmfold_model_preset != 'monomer') { + MULTIFASTA_TO_SINGLEFASTA( + ch_fasta + ) + ch_versions = ch_versions.mix(MULTIFASTA_TO_SINGLEFASTA.out.versions) + RUN_ESMFOLD( + MULTIFASTA_TO_SINGLEFASTA.out.input_fasta, + ch_esmfold_params, + ch_num_recycles + ) + ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) + } else { + + RUN_ESMFOLD( + ch_fasta, + ch_esmfold_params, + ch_num_recycles + ) + ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) + } + + RUN_ESMFOLD.out.multiqc + .map{[it[0].id, it[0], it[1]]} + .join( + RUN_ESMFOLD.out.pdb + .map{[it[0].id, it[0], it[1]]} + ).set{ch_all} + + GENERATE_REPORT( + Channel.value([["id":"TEMP"], file("$projectDir/assets/NO_FILE")]), + ch_all.map{[it[1], [it[2]]]}, + ch_all.map{[it[3], [it[4]]]}, + Channel.fromPath("$projectDir/assets/proteinfold_template.html", checkIfExists:true).first(), + Channel.value("ESM-FOLD") + ) + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(RUN_ESMFOLD.out.multiqc.map{it[1]}.collect()) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() + } + + emit: + pdb = RUN_ESMFOLD.out.pdb + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/