diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index 8719a7f9..89dd3564 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,17 +18,32 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset + +# These files are edited and tested upstream in pfr/modules +[/modules/pfr/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +[/subworkflows/pfr/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset -# ignore Readme -[README.md] -indent_style = unset - -# ignore python -[*.{py}] +# ignore python and markdown +[*.{py,md}] indent_style = unset # ignore perl diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index edd71437..b8059542 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -23,8 +23,11 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests -You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to -receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -81,7 +84,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 50053a1f..3da295ca 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,7 +16,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/plant-food-r - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/plant-food-research-open/assemblyqc/tree/master/.github/CONTRIBUTING.md) -- [ ] Make sure your code lints (`nf-core lint`) and (`pre-commit run --all`) +- [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 752ea1bf..9186934e 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v2 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ff4e1f8..89f347df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,19 +25,32 @@ jobs: matrix: NXF_VER: - "23.04.0" - RUN_CONFIG: - - "-profile test,docker" - - "-profile docker -c ./tests/stub/stub.config -stub" - - "-profile docker -params-file ./tests/invalid/params.json" + TEST_PARAMS: + - minimal + - invalid + - stub + include: + - OPTION_STUB: "" + - OPTION_STUB: "-stub" + TEST_PARAMS: stub steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: ${{ matrix.NXF_VER }} + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} --outdir ./results ${{ matrix.RUN_CONFIG }} + nextflow run \ + ${GITHUB_WORKSPACE} \ + --outdir ./results \ + -profile docker \ + -params-file \ + ./tests/${{ matrix.TEST_PARAMS }}/params.json \ + ${{ matrix.OPTION_STUB }} diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index e37cfda5..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v9 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 69574376..2d20d644 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -6,9 +6,16 @@ name: Test successful pipeline download with 'nf-core download' # - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. on: workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" pull_request: types: - opened + - edited + - synchronize branches: - master pull_request_target: @@ -20,16 +27,19 @@ env: jobs: download: - runs-on: ["self-hosted"] + runs-on: ubuntu-latest steps: - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v5 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - - uses: eWaterCycle/setup-singularity@v7 + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 with: singularity-version: 3.8.3 @@ -42,13 +52,13 @@ jobs: run: | echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${GITHUB_REF#refs/heads/}" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - name: Download the pipeline env: NXF_SINGULARITY_CACHEDIR: ./ run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ + nf-core download ${{ env.REPO_LOWERCASE }} \ --revision ${{ env.REPO_BRANCH }} \ --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ --compress "none" \ @@ -60,8 +70,17 @@ jobs: - name: Inspect download run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - name: Run the downloaded pipeline + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 41963bec..31ca9786 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,25 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - name: Set up Python 3.11 - uses: actions/setup-python@v5 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit - name: Run pre-commit - run: pre-commit run --all-files || echo "status=fail" >> $GITHUB_ENV + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: env.status == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with pre-commit" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/plant-food-research-open/assemblyqc/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index f807f5e2..05605419 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,13 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit @@ -28,42 +27,61 @@ jobs: - name: Run pre-commit run: pre-commit run --all-files - # nf-core: - # runs-on: ubuntu-latest - # steps: - # - name: Check out pipeline code - # uses: actions/checkout@v4 + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - # - name: Install Nextflow - # uses: nf-core/setup-nextflow@v1 + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 - # - uses: actions/setup-python@v5 - # with: - # python-version: "3.11" - # architecture: "x64" + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" - # - name: Install dependencies - # run: | - # python -m pip install --upgrade pip - # pip install nf-core + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core - # - name: Run nf-core lint - # env: - # GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - # run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: | + nf-core -l lint_log.txt lint \ + --dir ${GITHUB_WORKSPACE} \ + --markdown lint_results.md \ + --key actions_ci \ + --key actions_schema_validation \ + --key base_config \ + --key files_exist \ + --key files_unchanged \ + --key merge_markers \ + --key modules_config \ + --key nextflow_config \ + --key nfcore_yml \ + --key pipeline_name_conventions \ + --key pipeline_todos \ + --key readme \ + --key schema_description \ + --key schema_lint \ + --key schema_params \ + --key system_exit - # - name: Save PR number - # if: ${{ always() }} - # run: echo ${{ github.event.pull_request.number }} > PR_number.txt + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt - # - name: Upload linting log file artifact - # if: ${{ always() }} - # uses: actions/upload-artifact@v4 - # with: - # name: linting-logs - # path: | - # lint_log.txt - # lint_results.md - # PR_number.txt + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 147bcd10..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v3 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.gitpod.yml b/.gitpod.yml index 363d5b1d..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -10,13 +10,11 @@ tasks: vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 61d19b4d..87685f2a 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -7,17 +7,22 @@ lint: - .github/ISSUE_TEMPLATE/config.yml - .github/workflows/awstest.yml - .github/workflows/awsfulltest.yml + - assets/multiqc_config.yml + - conf/igenomes.config files_unchanged: - CODE_OF_CONDUCT.md - assets/nf-core-assemblyqc_logo_light.png - docs/images/nf-core-assemblyqc_logo_light.png - docs/images/nf-core-assemblyqc_logo_dark.png - - .github/ISSUE_TEMPLATE/bug_report.yml - multiqc_config: - - report_comment + - docs/README.md + - .github/CONTRIBUTING.md + - .github/workflows/linting.yml + - LICENSE nextflow_config: - manifest.name - manifest.homePage + template_strings: False +nf_core_version: 2.14.1 repository_type: pipeline template: prefix: plant-food-research-open diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bc85d767..9b0780ba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,9 @@ repos: rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python rev: "2.7.3" hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index dc2e51c8..9ae1f69e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,53 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.0.0 - [04-June-2024] + +### `Added` + +1. Updated nf-core/template to 2.14.1 +2. Removed release-announcements GitHub workflow +3. Added a list of nf-core contributors +4. Added a launcher script for local testing `local_assemblyqc` +5. Added a custom `BUNDLELINKS` module which respects direction when bundling `DNADIFF` links [#82](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/82) +6. Added the ability to create linear synteny plot in addition to the circos plot [#74](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/74) +7. Updated modules and sub-workflows: `BWA/INDEX`, `BWA/MEM`, `CAT/CAT`, , `CUSTOM/RESTOREGFFIDS`, `CUSTOM/SHORTENFASTAIDS`, `GT/GFF3`, `GT/GFF3VALIDATOR`, `GT/STAT`, `LTRFINDER`, `LTRHARVEST`, `LTRRETRIEVER/LAI`, `LTRRETRIEVER/LTRRETRIEVER`, `SAMBLASTER`, `FASTA_LTRRETRIEVER_LAI`, `FASTQ_BWA_MEM_SAMBLASTER`, `GFF3_VALIDATE`, `CUSTOM/SRATOOLSNCBISETTINGS`, `FASTP`, `FASTQC`, `UNTAR`, `SEQKIT/SEQ`, `SEQKIT/SORT`, `FASTA_EXPLORE_SEARCH_PLOT_TIDK` +8. Now the `contamination_stops_pipeline` flag allows the pipeline to continue if contamination is detected. It's default value is `true` [#54](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/54) +9. Now fasta ids are sorted in natural order for the HiC module [#76](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/76) +10. Now using `FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS` for SRA downloads +11. Added `MERQURY` module [#85](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/85) +12. Replaced `GFF3_VALIDATE` sub-workflow with `GFF3_GT_GFF3_GFF3VALIDATOR_STAT` +13. Replaced local `BUSCO` module with `FASTA_GXF_BUSCO_PLOT` sub-workflow [#75](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/75) +14. Replaced local `NCBI_FCS_ADAPTOR` with nf-core module and updated to 0.5.0 which includes additional adaptors for PacBio and Nanopore technologies [#55](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/55) +15. Added PLOTSR [#77](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/77) +16. Added [JADWOS01](https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_016859245.1/) assembly to xrefsheet for successfully running PLOTSR +17. Now detecting duplicate sequences with `SEQKIT/RMDUP` [#64](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/64) + +### `Fixed` + +1. Fixed a bug which caused NCBI_FCS_GX to not resume [#80](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/80) +2. Restored the original version of `nf-core/subworkflows/fastq_trim_fastp_fastqc` +3. Fixed n-core linting +4. Updated `tower.yml` +5. Updated LICENSE copyright to Copyright (c) 2024 The New Zealand Institute for Plant and Food Research Limited [#81](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/81) +6. `RUNASSEMBLYVISUALIZER` is now single threaded for successful execution on both Linux and MacOS +7. Fixed java memory overflow issues in `RUNASSEMBLYVISUALIZER` +8. Updated `FASTA_LTRRETRIEVER_LAI` to fix a pipeline crash when `ch_monoploid_seqs` was `[ meta, [] ]` [#83](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/83) +9. Improved input assembly documentation [#86](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/86) +10. Added assembly tag to synteny warning message regarding missing `synteny_labels` file +11. Now copying files in `NCBI_FCS_GX_SETUP_SAMPLE` rather than symlinking in an attempt to support NextFlow Fusion + +### `Dependencies` + +1. NextFlow!>=23.04.0 +2. nf-validation@1.1.3 + +### `Deprecated` + +1. Removed `CIRCOS_BUNDLELINKS` module +2. Now the default value of `synteny_plot_1_vs_all` is false +3. Replace module `CUSTOM/CHECKGFF3FASTACORRESPONDENCE` with a local groovy function in `GFF3_GT_GFF3_GFF3VALIDATOR_STAT` sub-workflow + ## 1.4 - [04-Mar-2024] ### `Added` diff --git a/CITATION.cff b/CITATION.cff index efb1b209..23125eae 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -21,7 +21,7 @@ authors: - family-names: "Deng" given-names: "Cecilia" title: "AssemblyQC: A NextFlow pipeline for evaluating assembly quality" -version: 1.4 +version: 2.0.0 date-released: 2024-02-12 url: "https://github.com/Plant-Food-Research-Open/assembly_qc" doi: 10.5281/zenodo.10647870 diff --git a/CITATIONS.md b/CITATIONS.md index 63cbdd2b..fbd8036d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,6 +40,10 @@ > Manni M, Berkeley MR, Seppey M, Simão FA, Zdobnov EM. 2021. BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes, Molecular Biology and Evolution, Volume 38, Issue 10, October 2021, Pages 4647–4654, +- GFFREAD, [MIT](https://github.com/gpertea/gffread/blob/master/LICENSE) + + > Pertea G, Pertea M. GFF Utilities: GffRead and GffCompare. F1000Res. 2020 Apr 28;9:ISCB Comm J-304. doi: . PMID: 32489650; PMCID: PMC7222033. + - TIDK, [MIT](https://github.com/tolkit/telomeric-identifier/blob/main/LICENSE) > @@ -112,6 +116,22 @@ > Marçais G, Delcher AL, Phillippy AM, Coston R, Salzberg SL, Zimin A. 2018. MUMmer4: A fast and versatile genome alignment system. PLoS Comput Biol. 2018 Jan 26;14(1):e1005944. doi: . PMID: 29373581; PMCID: PMC5802927. +- PLOTSR, [MIT](https://github.com/schneebergerlab/plotsr/blob/master/LICENSE) + + > Goel M, Schneeberger K. 2022. plotsr: visualizing structural similarities and rearrangements between multiple genomes. Bioinformatics. 2022 May 13;38(10):2922-2926. doi: . PMID: 35561173; PMCID: PMC9113368. + +- SYRI, [MIT](https://github.com/schneebergerlab/syri/blob/master/LICENSE) + + > Goel M, Sun H, Jiao WB, Schneeberger K. 2019. SyRI: finding genomic rearrangements and local sequence differences from whole-genome assemblies. Genome Biol. 2019 Dec 16;20(1):277. doi: . PMID: 31842948; PMCID: PMC6913012. + +- MINIMAP2, [MIT](https://github.com/lh3/minimap2/blob/master/LICENSE.txt) + + > Li H. 2021. New strategies to improve minimap2 alignment accuracy, Bioinformatics, Volume 37, Issue 23, December 2021, Pages 4572–4574, doi: + +- MERQURY, [United States Government Work](https://github.com/marbl/merqury?tab=License-1-ov-file#readme) + + > Rhie, A., Walenz, B.P., Koren, S. et al. 2020. Merqury: reference-free quality, completeness, and phasing assessment for genome assemblies. Genome Biol 21, 245. doi: + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/LICENSE b/LICENSE index 96e3eb88..2ef2204c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Usman Rashid, Ken Smith, Ross Crowhurst, Chen Wu, Marcus Davy +Copyright (c) 2024 The New Zealand Institute for Plant and Food Research Limited Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index ba0154c5..74f56c0d 100644 --- a/README.md +++ b/README.md @@ -1,65 +1,88 @@ -[![GitHub Actions CI Status](https://github.com/plant-food-research-open/assemblyqc/workflows/nf-core%20CI/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/plant-food-research-open/assemblyqc/workflows/nf-core%20linting/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10647870-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10647870) +[![GitHub Actions CI Status](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/ci.yml/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/linting.yml/badge.svg)](https://github.com/plant-food-research-open/assemblyqc/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10647870-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10647870) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda ❌](http://img.shields.io/badge/run%20with-conda%20❌-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/plant-food-research-open/assemblyqc) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/plant-food-research-open/assemblyqc) ## Introduction -**plant-food-research-open/assemblyqc** is a [NextFlow](https://www.nextflow.io/docs/latest/index.html) pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report. The tools are shown in the [Pipeline Flowchart](#pipeline-flowchart) and their version are listed in [CITATIONS.md](./CITATIONS.md). +**plant-food-research-open/assemblyqc** is a [NextFlow](https://www.nextflow.io/docs/latest/index.html) pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report. The tools are shown in the [Pipeline Flowchart](#pipeline-flowchart) and their references are listed in [CITATIONS.md](./CITATIONS.md). ## Pipeline Flowchart ```mermaid +%%{init: { + 'theme': 'base', + 'themeVariables': { + 'fontSize': '52px", + 'primaryColor': '#9A6421', + 'primaryTextColor': '#ffffff', + 'primaryBorderColor': '#9A6421', + 'lineColor': '#B180A8', + 'secondaryColor': '#455C58', + 'tertiaryColor': '#ffffff' + } +}}%% flowchart LR - forEachTag(For each\nAssembly) --> VALIDATE_FORMAT[VALIDATE FORMAT] - - VALIDATE_FORMAT --> ncbiFCS[NCBI FCS\nADAPTOR] - ncbiFCS --> Check{Check} - - VALIDATE_FORMAT --> ncbiGX[NCBI FCS GX] - ncbiGX --> Check - Check --> |Clean|Run(Run) - - Check --> |Contamination|Skip(Skip All) - Skip --> REPORT - - VALIDATE_FORMAT --> GFF_STATS[GENOMETOOLS GT STAT] - - Run --> ASS_STATS[ASSEMBLATHON STATS] - Run --> BUSCO - Run --> TIDK - Run --> LTRRETRIEVER - LTRRETRIEVER --> LAI - Run --> KRAKEN2 - Run --> HIC_CONTACT_MAP[HIC CONTACT MAP] - Run --> SYNTENY - - ASS_STATS --> REPORT - GFF_STATS --> REPORT - BUSCO --> REPORT - TIDK --> REPORT - LAI --> REPORT - KRAKEN2 --> REPORT - HIC_CONTACT_MAP --> REPORT - SYNTENY --> REPORT + forEachTag(Assembly) ==> VALIDATE_FORMAT[VALIDATE FORMAT] + + VALIDATE_FORMAT ==> ncbiFCS[NCBI FCS\nADAPTOR] + ncbiFCS ==> Check{Check} + + VALIDATE_FORMAT ==> ncbiGX[NCBI FCS GX] + ncbiGX ==> Check + Check ==> |Clean|Run(Run) + + Check ==> |Contamination|Skip(Skip All) + Skip ==> REPORT + + VALIDATE_FORMAT ==> GFF_STATS[GENOMETOOLS GT STAT] + + Run ==> ASS_STATS[ASSEMBLATHON STATS] + Run ==> BUSCO + Run ==> TIDK + Run ==> LAI + Run ==> KRAKEN2 + Run ==> HIC_CONTACT_MAP[HIC CONTACT MAP] + Run ==> MUMMER + Run ==> MINIMAP2 + Run ==> MERQURY + + MUMMER ==> CIRCOS + MUMMER ==> DOTPLOT + + MINIMAP2 ==> PLOTSR + + ASS_STATS ==> REPORT + GFF_STATS ==> REPORT + BUSCO ==> REPORT + TIDK ==> REPORT + LAI ==> REPORT + KRAKEN2 ==> REPORT + HIC_CONTACT_MAP ==> REPORT + CIRCOS ==> REPORT + DOTPLOT ==> REPORT + PLOTSR ==> REPORT + MERQURY ==> REPORT ``` -- [FASTA VALIDATION](https://github.com/GallVp/fasta_validator) -- [GFF3 VALIDATION](https://github.com/genometools/genometools) +- [FASTA VALIDATOR](https://github.com/linsalrob/fasta_validator) + [SEQKIT RMDUP](https://github.com/shenwei356/seqkit): FASTA validation +- [GENOMETOOLS GT GFF3VALIDATOR](https://genometools.org/tools/gt_gff3validator.html): GFF3 validation - [ASSEMBLATHON STATS](https://github.com/PlantandFoodResearch/assemblathon2-analysis/blob/a93cba25d847434f7eadc04e63b58c567c46a56d/assemblathon_stats.pl): Assembly statistics -- [GENOMETOOLS GT STAT](https://github.com/genometools/genometools): Annotation statistics +- [GENOMETOOLS GT STAT](https://genometools.org/tools/gt_stat.html): Annotation statistics - [NCBI FCS ADAPTOR](https://github.com/ncbi/fcs): Adaptor contamination pass/fail - [NCBI FCS GX](https://github.com/ncbi/fcs): Foreign organism contamination pass/fail -- [BUSCO](https://gitlab.com/ezlab/busco/-/tree/master): Gene-space completeness estimation +- [BUSCO](https://gitlab.com/ezlab/busco): Gene-space completeness estimation - [TIDK](https://github.com/tolkit/telomeric-identifier): Telomere repeat identification - [LAI](https://github.com/oushujun/LTR_retriever/blob/master/LAI): Continuity of repetitive sequences - [KRAKEN2](https://github.com/DerrickWood/kraken2): Taxonomy classification -- [HIC CONTACT MAP](https://github.com/igvteam/juicebox-web): Alignment and visualisation of HiC data -- SYNTENY: Synteny analysis using [MUMMER](https://github.com/mummer4/mummer) and [CIRCOS](http://circos.ca/documentation/) +- [HIC CONTACT MAP](https://github.com/igvteam/juicebox.js): Alignment and visualisation of HiC data +- [MUMMER](https://github.com/mummer4/mummer) → [CIRCOS](http://circos.ca/documentation/) + [DOTPLOT](https://plotly.com) & [MINIMAP2](https://github.com/lh3/minimap2) → [PLOTSR](https://github.com/schneebergerlab/plotsr): Synteny analysis +- [MERQURY](https://github.com/marbl/merqury): K-mer completeness, consensus quality and phasing assessment ## Usage @@ -68,13 +91,10 @@ Refer to [usage](./docs/usage.md), [parameters](./docs/parameters.md) and [outpu > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. -Prepare an `assemblysheet.csv` file with following columns representing target assemblies and associated meta-data. See an example [assemblysheet.csv](./assets/assemblysheet.csv) +Prepare an `assemblysheet.csv` file with following columns representing target assemblies and associated meta-data. - `tag:` A unique tag which represents the target assembly throughout the pipeline and in the final report - `fasta:` FASTA file -- `gff3 [Optional]:` GFF3 annotation file if available -- `monoploid_ids [Optional]:` A txt file listing the IDs used to calculate LAI in monoploid mode if necessary -- `synteny_labels [Optional]:` A two column tsv file listing fasta sequence ids (first column) and labels for the synteny plots (second column) when performing synteny analysis Now, you can run the pipeline using: @@ -89,7 +109,7 @@ nextflow run plant-food-research-open/assemblyqc \ > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -### Quick Start for Plant&Food Users +### Plant&Food Users Download the pipeline to your `/workspace/$USER` folder. Change the parameters defined in the [pfr/params.json](./pfr/params.json) file. Submit the pipeline to SLURM for execution. @@ -99,17 +119,51 @@ sbatch ./pfr_assemblyqc ## Credits -plant-food-research-open/assemblyqc was originally written by Usman Rashid and Ken Smith. Ross Crowhurst, Chen Wu and Marcus Davy generously contributed their QC scripts. +plant-food-research-open/assemblyqc was originally written by Usman Rashid ([@gallvp](https://github.com/gallvp)) and Ken Smith ([@hzlnutspread](https://github.com/hzlnutspread)). + +Ross Crowhurst ([@rosscrowhurst](https://github.com/rosscrowhurst)), Chen Wu ([@christinawu2008](https://github.com/christinawu2008)) and Marcus Davy ([@mdavy86](https://github.com/mdavy86)) generously contributed their QC scripts. + +Mahesh Binzer-Panchal ([@mahesh-panchal](https://github.com/mahesh-panchal)) helped port the pipeline modules and sub-workflows to [nf-core](https://nf-co.re) schema. We thank the following people for their extensive assistance in the development of this pipeline: -- Cecilia Deng [@CeciliaDeng](https://github.com/CeciliaDeng) -- Chen Wu [@christinawu2008](https://github.com/christinawu2008) -- Jason Shiller [@jasonshiller](https://github.com/jasonshiller) -- Marcus Davy [@mdavy86](https://github.com/mdavy86) -- Ross Crowhurst [@rosscrowhurst](https://github.com/rosscrowhurst) -- Susan Thomson [@cflsjt](https://github.com/cflsjt) -- Ting-Hsuan Chen [@ting-hsuan-chen](https://github.com/ting-hsuan-chen) +- [Cecilia Deng](https://github.com/CeciliaDeng) +- [Ignacio Carvajal](https://github.com/ignacio3437) +- [Jason Shiller](https://github.com/jasonshiller) +- [Sarah Bailey](https://github.com/SarahBailey1998) +- [Susan Thomson](https://github.com/cflsjt) +- [Ting-Hsuan Chen](https://github.com/ting-hsuan-chen) + +The pipeline uses nf-core modules contributed by following authors: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## Contributions and Support @@ -117,7 +171,9 @@ If you would like to contribute to this pipeline, please see the [contributing g ## Citations -If you use plant-food-research-open/assemblyqc for your analysis, please cite it using the following doi: [10.5281/zenodo.10647870](https://doi.org/10.5281/zenodo.10647870) +If you use plant-food-research-open/assemblyqc for your analysis, please cite it as: + +> Rashid, U., Wu, C., Shiller, J., Smith, K., Crowhurst, R., Davy, M., Chen, T.-H., Thomson, S., & Deng, C. (2024). AssemblyQC: A NextFlow pipeline for evaluating assembly quality (2.0.0). Zenodo. https://doi.org/10.5281/zenodo.10647870 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/assemblysheetv2.csv b/assets/assemblysheetv2.csv new file mode 100644 index 00000000..505e312c --- /dev/null +++ b/assets/assemblysheetv2.csv @@ -0,0 +1,2 @@ +tag,fasta,gff3,monoploid_ids,synteny_labels,reads_1 +FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/FI1.monoploid.seqs.txt,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/FI1.seq.labels.tsv,SRR8238189 diff --git a/assets/schema_input.json b/assets/schema_input.json index 2aab4188..f415c25e 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -47,6 +47,89 @@ "maxLength": 0 } ] + }, + "reads_1": { + "errorMessage": "reads_1 should be a SRA ID for paired FASTQ files or FASTX file path without spaces and must have extension '.f(a|asta|as|sa|na|astq|q)' or '.f(a|asta|as|sa|na|astq|q).gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^SR\\w+$|^\\S+\\.f(a|asta|as|sa|na|astq|q)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "reads_2": { + "errorMessage": "FASTX file path cannot contain spaces and must have extension '.f(a|asta|as|sa|na|astq|q)' or '.f(a|asta|as|sa|na|astq|q).gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(a|asta|as|sa|na|astq|q)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "dependentRequired": ["reads_1"] + }, + "maternal_reads_1": { + "errorMessage": "maternal_reads_1 should be a SRA ID for paired FASTQ files or FASTX file path without spaces and must have extension '.f(a|asta|as|sa|na|astq|q)' or '.f(a|asta|as|sa|na|astq|q).gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^SR\\w+$|^\\S+\\.f(a|asta|as|sa|na|astq|q)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "dependentRequired": ["reads_1", "paternal_reads_1"] + }, + "maternal_reads_2": { + "errorMessage": "FASTX file path cannot contain spaces and must have extension '.f(a|asta|as|sa|na|astq|q)' or '.f(a|asta|as|sa|na|astq|q).gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(a|asta|as|sa|na|astq|q)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "dependentRequired": ["maternal_reads_1"] + }, + "paternal_reads_1": { + "errorMessage": "paternal_reads_1 should be a SRA ID for paired FASTQ files or FASTX file path without spaces and must have extension '.f(a|asta|as|sa|na|astq|q)' or '.f(a|asta|as|sa|na|astq|q).gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^SR\\w+$|^\\S+\\.f(a|asta|as|sa|na|astq|q)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "dependentRequired": ["reads_1", "maternal_reads_1"] + }, + "paternal_reads_2": { + "errorMessage": "FASTX file path cannot contain spaces and must have extension '.f(a|asta|as|sa|na|astq|q)' or '.f(a|asta|as|sa|na|astq|q).gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(a|asta|as|sa|na|astq|q)(\\.gz)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "dependentRequired": ["paternal_reads_1"] } }, "required": ["tag", "fasta"] diff --git a/assets/xrefsheet.csv b/assets/xrefsheet.csv index 5c218707..e945d3e4 100644 --- a/assets/xrefsheet.csv +++ b/assets/xrefsheet.csv @@ -1,2 +1,3 @@ tag,fasta,synteny_labels TT_2021a,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/TT_2021a.seq.labels.tsv +JAD,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/859/245/GCA_016859245.1_ASM1685924v1/GCA_016859245.1_ASM1685924v1_genomic.fna.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/JAD.seq.labels.tsv diff --git a/bin/assemblyqc.py b/bin/assemblyqc.py index fc42eeed..b5c73f56 100755 --- a/bin/assemblyqc.py +++ b/bin/assemblyqc.py @@ -24,7 +24,8 @@ from report_modules.parsers.lai_parser import parse_lai_folder from report_modules.parsers.kraken2_parser import parse_kraken2_folder from report_modules.parsers.hic_parser import parse_hic_folder -from report_modules.parsers.circos_parser import parse_circos_folder +from report_modules.parsers.synteny_parser import parse_synteny_folder +from report_modules.parsers.merqury_parser import parse_merqury_folder if __name__ == "__main__": params_dict, params_table = parse_params_json("params_json.json") @@ -42,11 +43,16 @@ data_from_tools = {**data_from_tools, **parse_assemblathon_stats_folder()} data_from_tools = {**data_from_tools, **parse_genometools_gt_stat_folder()} data_from_tools = {**data_from_tools, **parse_busco_folder()} + data_from_tools = { + **data_from_tools, + **parse_busco_folder("busco_gff_outputs", "BUSCO_GFF"), + } data_from_tools = {**data_from_tools, **parse_tidk_folder()} data_from_tools = {**data_from_tools, **parse_lai_folder()} data_from_tools = {**data_from_tools, **parse_kraken2_folder()} data_from_tools = {**data_from_tools, **parse_hic_folder()} - data_from_tools = {**data_from_tools, **parse_circos_folder()} + data_from_tools = {**data_from_tools, **parse_synteny_folder()} + data_from_tools = {**data_from_tools, **parse_merqury_folder()} with open("software_versions.yml", "r") as f: versions_from_ch_versions = yaml.safe_load(f) diff --git a/bin/bundlelinks.py b/bin/bundlelinks.py new file mode 100755 index 00000000..1cc28a46 --- /dev/null +++ b/bin/bundlelinks.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 + +import argparse + + +def log(verbose, message): + if verbose: + print(message) + + +def dir_sign(end, start): + if abs(int(end) - int(start)) == 0: + return 1 + sign = (int(end) - int(start)) / abs(int(end) - int(start)) + return sign + + +def is_same_direction(link, bundle, verbose): + _, ref_start, ref_end, _, target_start, target_end = link + ( + _, + bundle_ref_start, + bundle_ref_end, + _, + bundle_target_start, + bundle_target_end, + ) = bundle + + link_ref_dir = dir_sign(ref_end, ref_start) + link_target_dir = dir_sign(target_end, target_start) + + bundle_ref_dir = dir_sign(bundle_ref_end, bundle_ref_start) + bundle_target_dir = dir_sign(bundle_target_end, bundle_target_start) + + log( + verbose, + f"Compared directions l: {link_ref_dir},{link_target_dir} and b: {bundle_ref_dir},{bundle_target_dir}", + ) + + if link_ref_dir == bundle_ref_dir and link_target_dir == bundle_target_dir: + return True + + return False + + +def within_max_gap(link, bundle, max_gap): + _, ref_start, ref_end, _, target_start, target_end = link + ( + _, + bundle_ref_start, + bundle_ref_end, + _, + bundle_target_start, + bundle_target_end, + ) = bundle + + ref_start = int(ref_start) + ref_end = int(ref_end) + target_start = int(target_start) + target_end = int(target_end) + + bundle_ref_start = int(bundle_ref_start) + bundle_ref_end = int(bundle_ref_end) + bundle_target_start = int(bundle_target_start) + bundle_target_end = int(bundle_target_end) + + ref_within_max_gap = ( + abs(ref_start - bundle_ref_start) <= max_gap + or abs(ref_start - bundle_ref_end) <= max_gap + or abs(ref_end - bundle_ref_start) <= max_gap + or abs(ref_end - bundle_ref_end) <= max_gap + ) + + target_within_max_gap = ( + abs(target_start - bundle_target_start) <= max_gap + or abs(target_start - bundle_target_end) <= max_gap + or abs(target_end - bundle_target_start) <= max_gap + or abs(target_end - bundle_target_end) <= max_gap + ) + + return ref_within_max_gap and target_within_max_gap + + +def get_bundle_directions(bundle): + ( + _, + bundle_ref_start, + bundle_ref_end, + _, + bundle_target_start, + bundle_target_end, + ) = bundle + + return ( + dir_sign(bundle_ref_end, bundle_ref_start), + dir_sign(bundle_target_end, bundle_target_start), + ) + + +def add_link_to_bundle(link, bundle, verbose): + ref_dir, target_dir = get_bundle_directions(bundle) + + log(verbose, f"Bundle directions l: {ref_dir},{target_dir}") + + _, ref_start, ref_end, _, target_start, target_end = link + + ref_start = int(ref_start) + ref_end = int(ref_end) + target_start = int(target_start) + target_end = int(target_end) + + ( + ref, + bundle_ref_start, + bundle_ref_end, + target, + bundle_target_start, + bundle_target_end, + ) = bundle + + bundle_ref_start = int(bundle_ref_start) + bundle_ref_end = int(bundle_ref_end) + bundle_target_start = int(bundle_target_start) + bundle_target_end = int(bundle_target_end) + + updated_bundle_ref_start = None + updated_bundle_ref_end = None + updated_bundle_target_start = None + updated_bundle_target_end = None + + if ref_dir > 0: + updated_bundle_ref_start = min(ref_start, bundle_ref_start) + updated_bundle_ref_end = max(ref_end, bundle_ref_end) + else: + updated_bundle_ref_start = max(ref_start, bundle_ref_start) + updated_bundle_ref_end = min(ref_end, bundle_ref_end) + + if target_dir > 0: + updated_bundle_target_start = min(target_start, bundle_target_start) + updated_bundle_target_end = max(target_end, bundle_target_end) + else: + updated_bundle_target_start = max(target_start, bundle_target_start) + updated_bundle_target_end = min(target_end, bundle_target_end) + + return [ + ref, + updated_bundle_ref_start, + updated_bundle_ref_end, + target, + updated_bundle_target_start, + updated_bundle_target_end, + ] + + +def bundle_len(bundle): + ( + _, + bundle_ref_start, + bundle_ref_end, + _, + bundle_target_start, + bundle_target_end, + ) = bundle + + return min( + abs(int(bundle_ref_end) - int(bundle_ref_start)), + abs(int(bundle_target_end) - int(bundle_target_start)), + ) + + +def bundle_links(input_file, output_file, max_gap, min_bundle_size, verbose): + bundles = {} + nlinks = {} + current_bundle_num = 0 + + with open(input_file, "r") as f: + for line in f: + link = line.strip().split("\t") + ref, _, _, target, _, _ = link + + log(verbose, f"Link: {link}") + + link_key = f"{ref}:{target}" + possible_bundles_to_add_to = [ + k for k in bundles.keys() if k.startswith(link_key) + ] + + log(verbose, f"Possible bundles are: {possible_bundles_to_add_to}") + + # Check if the current link can be added to an existing bundle + link_added = False + for k in possible_bundles_to_add_to: + bundle = bundles[k] + + log(verbose, f"Checking against bundle: {bundle}") + + if not is_same_direction(link, bundle, verbose): + log(verbose, "Bundle and link have different directions") + continue + + if not within_max_gap(link, bundle, max_gap): + log(verbose, f"Bundle and link are not within {max_gap}") + continue + + # Add link to bundle + updated_bundle = add_link_to_bundle(link, bundle, verbose) + bundles[k] = updated_bundle + link_added = True + nlinks[k] += 1 + log(verbose, f"Added link to bundle") + log(verbose, f"Updated bundle: {updated_bundle}") + break + + # Create a new bundle for the current link + if not link_added: + current_bundle_num += 1 + bundles[link_key + f":{current_bundle_num}"] = link + nlinks[link_key + f":{current_bundle_num}"] = 0 + log(verbose, f"Created bundle: {link}") + + # Filter out bundles smaller than the minimum bundle size + filtered_bundles = { + key: bundle + for key, bundle in bundles.items() + if bundle_len(bundle) >= min_bundle_size + } + + # Write the filtered bundles to the output file + with open(output_file, "w") as f: + for k, bundle in filtered_bundles.items(): + bundle_nlinks = nlinks[k] + f.write( + "\t".join([str(v) for v in bundle] + [f"nlinks={bundle_nlinks}"]) + "\n" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Bundle links", + epilog="Author: ChatGPT, Usman Rashid", + ) + parser.add_argument("input_file", help="Input TSV file containing links") + parser.add_argument("output_file", help="Output TSV file to write bundled links") + parser.add_argument( + "--max_gap", + type=int, + default=1_000_000, + help="Maximum gap allowed between links for bundling", + ) + parser.add_argument( + "--min_bundle_size", + type=int, + default=1_000, + help="Minimum size of a bundle to retain", + ) + + parser.add_argument("--verbose", help="Print info messages", action="store_true") + + args = parser.parse_args() + + bundle_links( + args.input_file, + args.output_file, + args.max_gap, + args.min_bundle_size, + args.verbose, + ) diff --git a/bin/linearsynteny.py b/bin/linearsynteny.py new file mode 100755 index 00000000..6bb44eb9 --- /dev/null +++ b/bin/linearsynteny.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 + +import plotly.graph_objects as go +import pandas as pd +import argparse + + +def load_data(data_filename, karyotype_ref_file_name, karyotype_target_filename): + data = pd.read_csv(data_filename, sep="\t", header=None) + data.columns = [ + "ref", + "ref_start", + "ref_stop", + "target", + "target_start", + "target_stop", + "color", + ] + + karyotype_ref = pd.read_csv(karyotype_ref_file_name, sep="\t", header=None) + karyotype_ref.columns = ["chr", "name", "name1", "zero", "size", "color"] + karyotype_target = pd.read_csv(karyotype_target_filename, sep="\t", header=None) + karyotype_target.columns = ["chr", "name", "name1", "zero", "size", "color"] + return data, karyotype_ref, karyotype_target + + +def insert_offsets_and_return_dict(karyotype_df): + karyotype_df["offset"] = karyotype_df["size"].cumsum().shift(fill_value=0) + return dict(zip(karyotype_df["name"], karyotype_df["offset"])) + + +def insert_midpoints_and_return_dict(karyotype_df): + karyotype_df["midpoint"] = (karyotype_df["size"] / 2.0) + karyotype_df["offset"] + return dict(zip(karyotype_df["name"], karyotype_df["midpoint"])) + + +def insert_figure_data( + data, offsets_ref, offsets_target, midpoints_ref, midpoints_target, fig +): + for index, row in data.iterrows(): + x_ = [i + offsets_ref[row["ref"]] for i in [row["ref_start"], row["ref_stop"]]] + + y_ = [ + j + offsets_target[row["target"]] + for j in [row["target_start"], row["target_stop"]] + ] + + midpoint_x = midpoints_ref[row["ref"]] + midpoint_y = midpoints_target[row["target"]] + + fig.add_trace( + go.Scatter( + x=x_, + y=y_, + mode="lines", + line=dict( + color=f"rgba{(row['color'].replace('color=', '').replace('0.5)', '1)'))}", + width=2, + ), + name=f"{index}: {row['target']}:{row['ref']}", + legendgroup=f"{row['target']}:{row['ref']}", + legendgrouptitle=dict(text=f"{row['target']}:{row['ref']}"), + ) + ) + + fig.add_trace( + go.Scatter( + x=[midpoint_x], + y=[0], + xaxis="x2", + line=dict(color="#ffffff"), + showlegend=False, + ) + ) + fig.add_trace( + go.Scatter( + x=[0], + y=[midpoint_y], + yaxis="y2", + line=dict(color="#ffffff"), + showlegend=False, + ) + ) + + +def format_figure( + karyotype_ref, + karyotype_target, + offsets_ref, + offsets_target, + midpoints_ref, + midpoints_target, + fig, +): + + xaxis_range = [0, list(offsets_ref.values())[-1] + list(karyotype_ref["size"])[-1]] + yaxis_range = [ + 0, + list(offsets_target.values())[-1] + list(karyotype_target["size"])[-1], + ] + + fig.update_layout( + xaxis=dict( + range=xaxis_range, + ), + yaxis=dict(range=yaxis_range), + hovermode="closest", + xaxis2=dict( + overlaying="x", + scaleanchor="x1", + range=xaxis_range, + tickmode="array", + tickvals=list(offsets_ref.values()), + ticktext=list(offsets_ref.keys()), + side="top", + ), + yaxis2=dict( + overlaying="y", + scaleanchor="y1", + range=yaxis_range, + tickmode="array", + tickvals=list(offsets_target.values()), + ticktext=list(offsets_target.keys()), + autoshift=True, + anchor="free", + ), + paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", + showlegend=False, + ) + + fig.update_xaxes( + showgrid=True, + zeroline=False, + gridcolor="rgba(0, 0, 0, 0.1)", + griddash="dashdot", + ) + fig.update_yaxes( + showgrid=True, + zeroline=False, + gridcolor="rgba(0, 0, 0, 0.1)", + griddash="dashdot", + ) + fig["layout"]["yaxis1"]["showgrid"] = False + fig["layout"]["xaxis1"]["showgrid"] = False + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Create a linear synteny plot from `nucmer/dnadiff/circos bundlelinks` bundles", + epilog="Author: Usman Rashid", + ) + parser.add_argument("-v", "--version", action="version", version="%(prog)s 0.1") + parser.add_argument( + "--output", + type=str, + default="synteny_plot.html", + required=False, + help="Output filename", + ) + parser.add_argument("bundlelinks", type=argparse.FileType("r")) + parser.add_argument("karyotype_ref", type=argparse.FileType("r")) + parser.add_argument("karyotype_target", type=argparse.FileType("r")) + + args = parser.parse_args() + + data_filename = args.bundlelinks + karyotype_ref_file_name = args.karyotype_ref + karyotype_target_filename = args.karyotype_target + output_filename = args.output + + data, karyotype_ref, karyotype_target = load_data( + data_filename, karyotype_ref_file_name, karyotype_target_filename + ) + + offsets_ref = insert_offsets_and_return_dict(karyotype_ref) + offsets_target = insert_offsets_and_return_dict(karyotype_target) + + midpoints_ref = insert_midpoints_and_return_dict(karyotype_ref) + midpoints_target = insert_midpoints_and_return_dict(karyotype_target) + + fig = go.Figure() + + insert_figure_data( + data, offsets_ref, offsets_target, midpoints_ref, midpoints_target, fig + ) + + format_figure( + karyotype_ref, + karyotype_target, + offsets_ref, + offsets_target, + midpoints_ref, + midpoints_target, + fig, + ) + + fig.write_html(output_filename) diff --git a/bin/report_modules/parsers/busco_parser.py b/bin/report_modules/parsers/busco_parser.py index 74c6fb05..44d52d0f 100644 --- a/bin/report_modules/parsers/busco_parser.py +++ b/bin/report_modules/parsers/busco_parser.py @@ -11,70 +11,64 @@ class BuscoParser: def __init__(self, file_data): - self.file_data = file_data - self.stats_dict = {} + self.file_text = file_data def parse_report(self): - self.stats_dict["version"] = self.get_busco_version(self.file_data) - self.stats_dict["lineage"] = self.get_lineage_dataset(self.file_data) - self.stats_dict["created"] = self.get_creation_date(self.file_data) - self.stats_dict["mode"] = self.get_run_mode(self.file_data) - self.stats_dict["predictor"] = self.get_gene_predictor(self.file_data) - self.stats_dict["search_percentages"] = self.get_busco_percentages( - self.file_data + stats_dict = {} + stats_dict["version"] = self.get_busco_version() + stats_dict["lineage"] = self.get_lineage_dataset() + stats_dict["created"] = self.get_creation_date() + stats_dict["mode"] = self.get_run_mode() + stats_dict["predictor"] = self.get_gene_predictor() + stats_dict["search_percentages"] = self.get_busco_percentages() + (stats_dict["results_dict"], stats_dict["results_table"]) = ( + self.get_busco_result_table() ) - self.stats_dict["dependencies"] = self.get_deps_and_versions(self.file_data) - self.stats_dict["results_table"] = self.get_busco_result_table(self.file_data) - - # include busco results dictionary for use in json dump - self.stats_dict["results_dict"] = self.get_busco_result_dict(self.file_data) - # include dependencies dictionary for use in json dump - self.stats_dict["dependencies_dict"] = self.get_deps_and_versions_dict( - self.file_data + + (stats_dict["dependencies_dict"], stats_dict["dependencies"]) = ( + self.get_deps_and_versions() ) - return self.stats_dict + return stats_dict - def get_busco_version(self, data): + def get_busco_version(self): p = re.compile("BUSCO version is: (.*)") - result = p.search(data).group(1).strip() + result = p.search(self.file_text).group(1).strip() return result - def get_lineage_dataset(self, data): + def get_lineage_dataset(self): p = re.compile("The lineage dataset is: (.*)") - result = p.search(data).group(1).split()[0] + result = p.search(self.file_text).group(1).split()[0] return result - def get_creation_date(self, data): + def get_creation_date(self): p = re.compile("The lineage dataset is: (.*)") - result = p.search(data) + result = p.search(self.file_text) result = result.group(1).split()[3][:-1] return result - def get_run_mode(self, data): + def get_run_mode(self): p = re.compile("BUSCO was run in mode: (.*)") - result = p.search(data).group(1) + result = p.search(self.file_text).group(1) return result - def get_gene_predictor(self, data): + def get_gene_predictor(self): p = re.compile("Gene predictor used: (.*)") - gene_predictor = p.search(data) + gene_predictor = p.search(self.file_text) if gene_predictor == None: return "None" result = gene_predictor.group(1) - q = re.compile(f"{gene_predictor.group(1)}: (.*)") - predictor_version = q.search(data) return result - def get_busco_percentages(self, data): + def get_busco_percentages(self): p = re.compile("C:(.*)") - result = p.search(data).group(0).strip() + result = p.search(self.file_text).group(0).strip() return result - def get_deps_and_versions(self, file_data): - list_of_lines = file_data.split("\n") + def get_deps_and_versions(self): + list_of_lines = self.file_text.split("\n") for index, line in enumerate(list_of_lines): if "Dependencies and versions" in line: all_deps = ( @@ -88,44 +82,28 @@ def get_deps_and_versions(self, file_data): dependency = dep.split(":")[0] version = dep.split(":")[1].strip() dep_dict[f"{dependency}"] = f"{version}" + df = pd.DataFrame(dep_dict.items(), columns=["Dependency", "Version"]) col_names = ["Dependency", "Version"] table = tabulate( df, headers=col_names, tablefmt="html", numalign="left", showindex=False ) - return table - - # get dependencies dictionary instead of table to use in json dump - def get_deps_and_versions_dict(self, file_data): - list_of_lines = file_data.split("\n") - for index, line in enumerate(list_of_lines): - if "Dependencies and versions" in line: - all_deps = ( - "".join(list_of_lines[max(0, index + 1) : len(list_of_lines) - 2]) - .replace("\t", "\n") - .strip() - ) - - dep_dict = {} - for dep in all_deps.splitlines(): - dependency = dep.split(":")[0] - version = dep.split(":")[1].strip() - dep_dict[f"{dependency}"] = f"{version}" - return dep_dict + return (dep_dict, table) - def get_busco_result_table(self, file_data): - list_of_lines = file_data.split("\n") + def get_busco_result_table(self): + list_of_lines = self.file_text.split("\n") + end_index = len(list_of_lines) - 1 for index, line in enumerate(list_of_lines): - if "Assembly Statistics" in line: - stats_index = index + if ("Assembly Statistics" in line) or ("Dependencies and versions" in line): + end_index = index break results_dict = {} for index, line in enumerate(list_of_lines): if "C:" in line: - for i in range(index + 1, stats_index - 1): + for i in range(index + 1, end_index - 1): number = list_of_lines[i].split("\t")[1] descr = list_of_lines[i].split("\t")[2] @@ -135,28 +113,10 @@ def get_busco_result_table(self, file_data): table = tabulate( df, headers=col_names, tablefmt="html", numalign="left", showindex=False ) - return table + return (results_dict, table) - # get results dictionary instead of table to use in json dump - def get_busco_result_dict(self, file_data): - list_of_lines = file_data.split("\n") - for index, line in enumerate(list_of_lines): - if "Assembly Statistics" in line: - stats_index = index - - results_dict = {} - for index, line in enumerate(list_of_lines): - if "C:" in line: - for i in range(index + 1, stats_index - 1): - number = list_of_lines[i].split("\t")[1] - descr = list_of_lines[i].split("\t")[2] - results_dict[f"{descr}"] = f"{number}" - - return results_dict - - -def parse_busco_folder(folder_name="busco_outputs"): +def parse_busco_folder(folder_name="busco_outputs", data_key="BUSCO"): dir = os.getcwdb().decode() busco_folder_path = Path(f"{dir}/{folder_name}") @@ -172,27 +132,28 @@ def parse_busco_folder(folder_name="busco_outputs"): ext = str(plot_path).split(".")[-1] busco_plot_url = f"data:image/{ext};base64,{base64_utf8_str}" - data = {"BUSCO": []} + data = {data_key: []} for file in list_of_files: - file_data = "" with open(file, "r") as file: - lines = file.readlines() - for line in lines: - file_data += line - parser = BuscoParser(file_data) + file_text = file.read() + + parser = BuscoParser(file_text) + file_tokens = re.findall( r"short_summary.specific.([\w]+).([\w]+)_([a-zA-Z0-9]+).txt", os.path.basename(str(file)), )[0] + stats = { "hap": file_tokens[1], "lineage": file_tokens[0], **parser.parse_report(), } - data["BUSCO"].append(stats) - data["BUSCO"] = sort_list_of_results(data["BUSCO"], "hap") - data["BUSCO"][0]["busco_plot"] = busco_plot_url + data[data_key].append(stats) + + data[data_key] = sort_list_of_results(data[data_key], "hap") + data[data_key][0]["busco_plot"] = busco_plot_url return data diff --git a/bin/report_modules/parsers/circos_parser.py b/bin/report_modules/parsers/circos_parser.py deleted file mode 100644 index 0df2d037..00000000 --- a/bin/report_modules/parsers/circos_parser.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -from pathlib import Path -import base64 -import re - -from report_modules.parsers.parsing_commons import sort_list_of_results - - -def parse_circos_folder(folder_name="circos_outputs"): - dir = os.getcwdb().decode() - circos_folder_path = Path(f"{dir}/{folder_name}") - - if not os.path.exists(circos_folder_path): - return {} - - list_of_plot_files = [item for item in circos_folder_path.glob("*.png")] - - data = {"CIRCOS": []} - - for plot_path in list_of_plot_files: - file_tokens = re.findall( - r"([\w]+).on.([\w]+).([\w]+).png", - os.path.basename(str(plot_path)), - )[0] - - if os.path.getsize(plot_path) == 0: - data["CIRCOS"].append( - { - "tag.on.tag": f"{file_tokens[0]} : {file_tokens[1]} : {file_tokens[2]}", - "circos_plot": "", - "is_plot_empty": True, - } - ) - continue - - binary_fc = open(plot_path, "rb").read() - base64_utf8_str = base64.b64encode(binary_fc).decode("utf-8") - ext = str(plot_path).split(".")[-1] - plot_url = f"data:image/{ext}+xml;base64,{base64_utf8_str}" - - data["CIRCOS"].append( - { - "tag.on.tag": f"{file_tokens[0]} : {file_tokens[1]} : {file_tokens[2]}", - "circos_plot": plot_url, - "is_plot_empty": False, - } - ) - - if len(data["CIRCOS"]) < 1: - return {} - - return {"CIRCOS": sort_list_of_results(data["CIRCOS"], "tag.on.tag")} diff --git a/bin/report_modules/parsers/fasta_validate_parser.py b/bin/report_modules/parsers/fasta_validate_parser.py index 786978ec..e572348f 100644 --- a/bin/report_modules/parsers/fasta_validate_parser.py +++ b/bin/report_modules/parsers/fasta_validate_parser.py @@ -17,6 +17,16 @@ def parse_fasta_validate_folder(folder_name="fastavalidator_logs"): data = {"FASTA_VALIDATE": []} for log_path in list_of_log_files: + + if str(log_path).endswith(".seqkit.rmdup.log"): + data["FASTA_VALIDATE"].append( + { + "hap": os.path.basename(log_path).replace(".seqkit.rmdup.log", ""), + "validation_log": "FASTA validation failed due to presence of duplicate sequences", + } + ) + continue + with open(log_path, "r") as f: log_lines = [f"

{l}

" for l in f.readlines()] diff --git a/bin/report_modules/parsers/merqury_parser.py b/bin/report_modules/parsers/merqury_parser.py new file mode 100644 index 00000000..fb584611 --- /dev/null +++ b/bin/report_modules/parsers/merqury_parser.py @@ -0,0 +1,85 @@ +import base64 +import os + +from report_modules.parsers.parsing_commons import sort_list_of_results +from tabulate import tabulate +from pathlib import Path +import pandas as pd + + +def load_image_as_base64_str(file_name, optional=False): + + if optional and not os.path.exists(file_name): + return None + + with open(file_name, "rb") as f: + binary_fc = f.read() + + base64_utf8_str = base64.b64encode(binary_fc).decode("utf-8") + return f"data:image/png+xml;base64,{base64_utf8_str}" + + +def parse_merqury_folder(folder_name="merqury_outputs"): + dir = os.getcwdb().decode() + merqury_folder_path = Path(f"{dir}/{folder_name}") + + if not os.path.exists(merqury_folder_path): + return {} + + data = {"MERQURY": []} + + completeness_stats_paths = [ + item for item in merqury_folder_path.glob("*.completeness.stats") + ] + + for completeness_stats_path in completeness_stats_paths: + + individual_id = os.path.basename(str(completeness_stats_path)).split( + ".completeness.stats" + )[0] + haplotypes = individual_id.split("-and-") + + completeness_stats_table = pd.read_csv(completeness_stats_path, sep="\t", header=None) + qv_stats_table = pd.read_csv(f"{folder_name}/{individual_id}.qv", sep="\t", header=None) + + data["MERQURY"].append( + { + "individual_id": individual_id, + "completeness_stats_table": completeness_stats_table.to_dict("records"), + "completeness_stats_table_html": tabulate( + completeness_stats_table, + headers=["Assembly", "Region", "Found", "Total", "% Covered"], + tablefmt="html", + numalign="left", + showindex=False, + ), + "qv_stats_table": qv_stats_table.to_dict("records"), + "qv_stats_table_html": tabulate( + qv_stats_table, + headers=["Assembly", "No Support", "Total", "QV", "Error %"], + tablefmt="html", + numalign="left", + showindex=False, + ), + "hap_plots": [ + { + "hap": hap, + "plot": load_image_as_base64_str( + f"{folder_name}/{individual_id}.{hap}.spectra-cn.fl.png" + ), + } + for hap in haplotypes + ], + "asm_plot": load_image_as_base64_str( + f"{folder_name}/{individual_id}.spectra-asm.fl.png" + ), + "plot": load_image_as_base64_str( + f"{folder_name}/{individual_id}.spectra-cn.fl.png", True + ), + "hapmers_blob": load_image_as_base64_str( + f"{folder_name}/{individual_id}.hapmers.blob.png", True + ), + } + ) + + return {"MERQURY": sort_list_of_results(data["MERQURY"], "individual_id")} diff --git a/bin/report_modules/parsers/ncbi_fcs_adaptor_parser.py b/bin/report_modules/parsers/ncbi_fcs_adaptor_parser.py index f896577f..8451d767 100644 --- a/bin/report_modules/parsers/ncbi_fcs_adaptor_parser.py +++ b/bin/report_modules/parsers/ncbi_fcs_adaptor_parser.py @@ -14,7 +14,7 @@ def parse_ncbi_fcs_adaptor_folder(folder_name="ncbi_fcs_adaptor_reports"): if not os.path.exists(reports_folder_path): return {} - list_of_report_files = reports_folder_path.glob("*.tsv") + list_of_report_files = reports_folder_path.glob("*.txt") data = {"NCBI_FCS_ADAPTOR": []} @@ -22,7 +22,7 @@ def parse_ncbi_fcs_adaptor_folder(folder_name="ncbi_fcs_adaptor_reports"): report_table = pd.read_csv(report_path, sep="\t") file_tokens = re.findall( - r"([\w]+)_fcs_adaptor_report.tsv", + r"([\w]+)\.fcs_adaptor_report.txt", os.path.basename(str(report_path)), )[0] diff --git a/bin/report_modules/parsers/synteny_parser.py b/bin/report_modules/parsers/synteny_parser.py new file mode 100644 index 00000000..d25d82e9 --- /dev/null +++ b/bin/report_modules/parsers/synteny_parser.py @@ -0,0 +1,206 @@ +from tabulate import tabulate +from pathlib import Path +import pandas as pd +import base64 +import os +import re + +from report_modules.parsers.parsing_commons import sort_list_of_results + + +def parse_synteny_circos(folder_name="synteny_outputs"): + dir = os.getcwdb().decode() + circos_folder_path = Path(f"{dir}/{folder_name}") + + if not os.path.exists(circos_folder_path): + return {} + + list_of_plot_files = [item for item in circos_folder_path.glob("*.png")] + + data = {"SYNTENY_CIRCOS": []} + + for plot_path in list_of_plot_files: + base_name = os.path.basename(str(plot_path)) + + if base_name == "plotsr.png": + continue + + file_tokens = re.findall( + r"([\w]+).on.([\w]+).([\w]+).png", + base_name, + )[0] + + if os.path.getsize(plot_path) == 0: + data["SYNTENY_CIRCOS"].append( + { + "tag.on.tag": f"{file_tokens[0]} : {file_tokens[1]} : {file_tokens[2]}", + "circos_plot": "", + "is_plot_empty": True, + } + ) + continue + + binary_fc = open(plot_path, "rb").read() + base64_utf8_str = base64.b64encode(binary_fc).decode("utf-8") + ext = str(plot_path).split(".")[-1] + plot_url = f"data:image/{ext}+xml;base64,{base64_utf8_str}" + + data["SYNTENY_CIRCOS"].append( + { + "tag.on.tag": f"{file_tokens[0]} : {file_tokens[1]} : {file_tokens[2]}", + "circos_plot": plot_url, + "is_plot_empty": False, + } + ) + + if len(data["SYNTENY_CIRCOS"]) < 1: + return {} + + return { + "SYNTENY_CIRCOS": sort_list_of_results(data["SYNTENY_CIRCOS"], "tag.on.tag") + } + + +def parse_synteny_dotplot(folder_name="synteny_outputs"): + dir = os.getcwdb().decode() + circos_folder_path = Path(f"{dir}/{folder_name}") + + if not os.path.exists(circos_folder_path): + return {} + + list_of_plot_files = [item for item in circos_folder_path.glob("*.html")] + + data = {"SYNTENY_DOTPLOT": []} + + for plot_path in list_of_plot_files: + file_tokens = re.findall( + r"([\w]+).on.([\w]+).([\w]+).html", + os.path.basename(str(plot_path)), + )[0] + + if os.path.getsize(plot_path) == 0: + data["SYNTENY_DOTPLOT"].append( + { + "tag.on.tag": f"{file_tokens[0]} : {file_tokens[1]} : {file_tokens[2]}", + "plot": "", + "plot_folder": "", + "is_plot_empty": True, + } + ) + continue + + plot_filename = os.path.basename(str(plot_path)) + + data["SYNTENY_DOTPLOT"].append( + { + "tag.on.tag": f"{file_tokens[0]} : {file_tokens[1]} : {file_tokens[2]}", + "plot": plot_filename, + "plot_folder": plot_filename.replace(".html", ""), + "is_plot_empty": False, + } + ) + + if len(data["SYNTENY_DOTPLOT"]) < 1: + return {} + + return { + "SYNTENY_DOTPLOT": sort_list_of_results(data["SYNTENY_DOTPLOT"], "tag.on.tag") + } + + +def parse_synteny_plotsr(folder_name="synteny_outputs"): + dir = os.getcwdb().decode() + plotsr_folder_path = Path(f"{dir}/{folder_name}") + + if not os.path.exists(plotsr_folder_path): + return {} + + list_of_error_files = [item for item in plotsr_folder_path.glob("*.error.log")] + + data = {"SYNTENY_PLOTSR": []} + + error_comparisons = [] + + for error_log_path in list_of_error_files: + base_name = os.path.basename(str(error_log_path)) + + file_tokens = re.findall( + r"([\w]+).on.([\w]+).error.log", + base_name, + )[0] + + error_comparisons.append((file_tokens[0], file_tokens[1])) + + plot_url = None + plotsr_png_path = Path(f"{dir}/{folder_name}/plotsr.png") + if os.path.exists(plotsr_png_path): + binary_fc = open(plotsr_png_path, "rb").read() + base64_utf8_str = base64.b64encode(binary_fc).decode("utf-8") + ext = str(plotsr_png_path).split(".")[-1] + plot_url = f"data:image/{ext}+xml;base64,{base64_utf8_str}" + + if error_comparisons == [] and plot_url == None: + return {} + + data["SYNTENY_PLOTSR"].append( + { + "error_message": ( + None + if error_comparisons == [] + else "Note: Syri failed to detect structural rearrangements for following comparisons: " + + ", ".join( + [f"{target} with reference to {ref}" for (target, ref) in error_comparisons] + ) + + '. This may be due to known Syri limitations. See: GitHub/Syri/Limitations' + ), + "plotsr_png": plot_url, + "labels_table": None, + "labels_table_html": None, + } + ) + + if plot_url == None: + return data + + list_of_label_files = [item for item in plotsr_folder_path.glob("*.plotsr.csv")] + labels_table = pd.DataFrame() + + for labels_path in list_of_label_files: + base_name = os.path.basename(str(labels_path)) + + file_token = re.findall( + r"([\w]+).plotsr.csv", + base_name, + )[0] + + _labels_table = pd.read_csv(labels_path, header=None, sep="\t") + _labels_table = _labels_table.set_axis([file_token, "Labels"], axis=1) + _labels_table = _labels_table[["Labels", file_token]] + + if labels_table.empty: + labels_table = _labels_table + continue + + labels_table = pd.concat([labels_table, _labels_table[[file_token]]], axis=1) + + if labels_table.empty: + return data + + data["SYNTENY_PLOTSR"][0]["labels_table"] = labels_table.to_dict("records") + data["SYNTENY_PLOTSR"][0]["labels_table_html"] = tabulate( + labels_table, + headers="keys", + tablefmt="html", + numalign="left", + showindex=False, + ) + + return data + + +def parse_synteny_folder(folder_name="synteny_outputs"): + circos_data = parse_synteny_circos(folder_name) + dotplot_data = parse_synteny_dotplot(folder_name) + plotsr_data = parse_synteny_plotsr(folder_name) + + return {**circos_data, **dotplot_data, **plotsr_data} diff --git a/bin/report_modules/templates/base.html b/bin/report_modules/templates/base.html index bb753a67..11fe7d92 100644 --- a/bin/report_modules/templates/base.html +++ b/bin/report_modules/templates/base.html @@ -3,54 +3,146 @@
-
AssemblyQC {{ - all_stats_dicts['VERSIONS']['Workflow']['plant-food-research-open/assemblyqc'] }}
+
+ AssemblyQC {{ all_stats_dicts['VERSIONS']['Workflow']['plant-food-research-open/assemblyqc'] }} +
+ + {% if 'FASTA_VALIDATE' in all_stats_dicts %} {% endif %} + {% if 'GFF3_VALIDATE' in all_stats_dicts %} {% endif %} + {% if 'NCBI_FCS_ADAPTOR' in all_stats_dicts %} - {% endif %} {% if 'NCBI_FCS_GX' in all_stats_dicts %}{% endif %} {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %}{% endif %} {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %}{% endif %} {% if 'BUSCO' in all_stats_dicts %}{% endif %} {% if 'TIDK' in all_stats_dicts %}{% endif %} {% if 'LAI' in all_stats_dicts %}{% endif %} {% if 'KRAKEN2' in all_stats_dicts %}{% endif %} {% if 'HIC' in all_stats_dicts %}{% endif %} {% if 'CIRCOS' in all_stats_dicts %}{% endif %} + {% endif %} + + {% if 'NCBI_FCS_GX' in all_stats_dicts %} + + {% endif %} + + {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %} + + {% endif %} + + {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %} + + {% endif %} + + {% if 'BUSCO' in all_stats_dicts %} + + {% endif %} + + {% if 'BUSCO_GFF' in all_stats_dicts %} + + {% endif %} + + {% if 'TIDK' in all_stats_dicts %} + + {% endif %} + + {% if 'LAI' in all_stats_dicts %} + + {% endif %} + + {% if 'KRAKEN2' in all_stats_dicts %} + + {% endif %} + + {% if 'HIC' in all_stats_dicts %} + + {% endif %} + + {% if 'SYNTENY_CIRCOS' in all_stats_dicts %} + + {% endif %} + + {% if 'SYNTENY_DOTPLOT' in all_stats_dicts %} + + {% endif %} + + {% if 'SYNTENY_PLOTSR' in all_stats_dicts %} + + {% endif %} + + {% if 'MERQURY' in all_stats_dicts %} + + {% endif %}
- {% include 'params/params.html' %} {% include 'tools/tools.html' %} {% if 'FASTA_VALIDATE' in all_stats_dicts %}{% - include 'fasta_validate/fasta_validate.html' %}{% endif %} - - {% if 'GFF3_VALIDATE' in all_stats_dicts %}{% include 'gff3_validate/gff3_validate.html' %}{% endif %} - - {% if 'NCBI_FCS_ADAPTOR' in all_stats_dicts %}{% - include 'ncbi_fcs_adaptor/ncbi_fcs_adaptor.html' %}{% endif %} {% if 'NCBI_FCS_GX' in all_stats_dicts %}{% - include 'ncbi_fcs_gx/ncbi_fcs_gx.html' %}{% endif %} {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %}{% include - 'assemblathon_stats/assemblathon_stats.html' %}{% endif %} {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %}{% - include 'genometools_gt_stat/genometools_gt_stat.html' %}{% endif %} {% if 'BUSCO' in all_stats_dicts %}{% - include 'busco/busco.html' %}{% endif %} {% if 'TIDK' in all_stats_dicts %}{% include 'tidk/tidk.html' %}{% - endif %} {% if 'LAI' in all_stats_dicts %}{% include 'lai/lai.html' %}{% endif %} {% if 'KRAKEN2' in - all_stats_dicts %}{% include 'kraken2/kraken2.html' %}{% endif %} {% if 'HIC' in all_stats_dicts %}{% include - 'hic/hic.html' %}{% endif %} {% if 'CIRCOS' in all_stats_dicts %}{% include 'circos/circos.html' %}{% endif %} + + {% include 'params/params.html' %} + {% include 'tools/tools.html' %} + + {% if 'FASTA_VALIDATE' in all_stats_dicts %} + {%include 'fasta_validate/fasta_validate.html' %} + {% endif %} + + {% if 'GFF3_VALIDATE' in all_stats_dicts %} + {% include 'gff3_validate/gff3_validate.html' %} + {% endif %} + + {% if 'NCBI_FCS_ADAPTOR' in all_stats_dicts %} + {% include 'ncbi_fcs_adaptor/ncbi_fcs_adaptor.html' %} + {% endif %} + + {% if 'NCBI_FCS_GX' in all_stats_dicts %} + {% include 'ncbi_fcs_gx/ncbi_fcs_gx.html' %} + {% endif %} + + {% if 'ASSEMBLATHON_STATS' in all_stats_dicts %} + {% include 'assemblathon_stats/assemblathon_stats.html' %} + {% endif %} + + {% if 'GENOMETOOLS_GT_STAT' in all_stats_dicts %} + {% include 'genometools_gt_stat/genometools_gt_stat.html' %} + {% endif %} + + {% if 'BUSCO' in all_stats_dicts %} + {% include 'busco/busco.html' %} + {% endif %} + + {% if 'BUSCO_GFF' in all_stats_dicts %} + {% include 'busco_gff/busco_gff.html' %} + {% endif %} + + {% if 'TIDK' in all_stats_dicts %} + {% include 'tidk/tidk.html' %} + {% endif %} + + {% if 'LAI' in all_stats_dicts %} + {% include 'lai/lai.html' %} + {% endif %} + + {% if 'KRAKEN2' in all_stats_dicts %} + {% include 'kraken2/kraken2.html' %} + {% endif %} + + {% if 'HIC' in all_stats_dicts %} + {% include 'hic/hic.html' %} + {% endif %} + + {% if 'SYNTENY_CIRCOS' in all_stats_dicts %} + {% include 'synteny_circos/synteny_circos.html' %} + {% endif %} + + {% if 'SYNTENY_DOTPLOT' in all_stats_dicts %} + {% include 'synteny_dotplot/synteny_dotplot.html' %} + {% endif %} + + {% if 'SYNTENY_PLOTSR' in all_stats_dicts %} + {% include 'synteny_plotsr/synteny_plotsr.html' %} + {% endif %} + + {% if 'MERQURY' in all_stats_dicts %} + {% include 'merqury/merqury.html' %} + {% endif %} {% include 'js.html' %} diff --git a/bin/report_modules/templates/busco/busco.html b/bin/report_modules/templates/busco/busco.html index 4f6f9b3a..20e8fc04 100644 --- a/bin/report_modules/templates/busco/busco.html +++ b/bin/report_modules/templates/busco/busco.html @@ -11,7 +11,7 @@ viral genomes. arXiv:2106.11799 [q-bio] [Internet]. Available from: arxiv.org/abs/2106.11799

-

Version: {{ all_stats_dicts['VERSIONS']['BUSCO']['busco'] }}

+

Version: {{ all_stats_dicts['VERSIONS']['BUSCO_ASSEMBLY']['busco'] }}

{% include 'busco/dropdown.html' %} {% include 'busco/summary_contents.html' %} {% include 'busco/report_contents.html' %} diff --git a/bin/report_modules/templates/busco_gff/busco_gff.html b/bin/report_modules/templates/busco_gff/busco_gff.html new file mode 100644 index 00000000..654a59c9 --- /dev/null +++ b/bin/report_modules/templates/busco_gff/busco_gff.html @@ -0,0 +1,25 @@ + diff --git a/bin/report_modules/templates/busco_gff/dropdown.html b/bin/report_modules/templates/busco_gff/dropdown.html new file mode 100644 index 00000000..982255f5 --- /dev/null +++ b/bin/report_modules/templates/busco_gff/dropdown.html @@ -0,0 +1,13 @@ + diff --git a/bin/report_modules/templates/busco_gff/report_contents.html b/bin/report_modules/templates/busco_gff/report_contents.html new file mode 100644 index 00000000..55be556f --- /dev/null +++ b/bin/report_modules/templates/busco_gff/report_contents.html @@ -0,0 +1,77 @@ +{% for item in range(all_stats_dicts["BUSCO_GFF"]|length) %} + +{% endfor %} diff --git a/bin/report_modules/templates/busco_gff/summary_contents.html b/bin/report_modules/templates/busco_gff/summary_contents.html new file mode 100644 index 00000000..d99344d2 --- /dev/null +++ b/bin/report_modules/templates/busco_gff/summary_contents.html @@ -0,0 +1,37 @@ +
+
+
+
Summary
+
+ +
+
+ + + + + + + + + + {% for item in range(all_stats_dicts["BUSCO_GFF"]|length) %} + + + + + + {% endfor %} + +
AnnotationLineagePercentages
+ {{ all_stats_dicts['BUSCO_GFF'][item]['hap'] }} + + {{ all_stats_dicts['BUSCO_GFF'][item]['lineage'] }} + {{ all_stats_dicts['BUSCO_GFF'][item]['search_percentages'] }}
+
+
+
+
+ +
+
diff --git a/bin/report_modules/templates/circos/dropdown.html b/bin/report_modules/templates/circos/dropdown.html deleted file mode 100644 index 0cdafbb7..00000000 --- a/bin/report_modules/templates/circos/dropdown.html +++ /dev/null @@ -1,10 +0,0 @@ - diff --git a/bin/report_modules/templates/gff3_validate/gff3_validate.html b/bin/report_modules/templates/gff3_validate/gff3_validate.html index 01fa2754..d43fb8db 100644 --- a/bin/report_modules/templates/gff3_validate/gff3_validate.html +++ b/bin/report_modules/templates/gff3_validate/gff3_validate.html @@ -1,22 +1,17 @@ diff --git a/bin/report_modules/templates/merqury/dropdown.html b/bin/report_modules/templates/merqury/dropdown.html new file mode 100644 index 00000000..5c61274c --- /dev/null +++ b/bin/report_modules/templates/merqury/dropdown.html @@ -0,0 +1,10 @@ + diff --git a/bin/report_modules/templates/merqury/merqury.html b/bin/report_modules/templates/merqury/merqury.html new file mode 100644 index 00000000..528fb47f --- /dev/null +++ b/bin/report_modules/templates/merqury/merqury.html @@ -0,0 +1,17 @@ + diff --git a/bin/report_modules/templates/merqury/report_contents.html b/bin/report_modules/templates/merqury/report_contents.html new file mode 100644 index 00000000..63ec32d8 --- /dev/null +++ b/bin/report_modules/templates/merqury/report_contents.html @@ -0,0 +1,61 @@ +{% set vars = {'is_first': True} %} +{% for item in range(all_stats_dicts["MERQURY"]|length) %} +{% set active_text = 'display: block' if vars.is_first else 'display: none' %} +
+
+
+
{{ all_stats_dicts['MERQURY'][item]['individual_id'] }}
+
+
+

Completeness stats

+
+
+
{{ all_stats_dicts['MERQURY'][item]['completeness_stats_table_html'] }}
+
+
+

Consensus quality QV stats

+
+
+
{{ all_stats_dicts['MERQURY'][item]['qv_stats_table_html'] }}
+
+ +
+

Spectra-asm

+
+
+ +
+ + {% for subitem in range(all_stats_dicts["MERQURY"][item]['hap_plots']|length) %} +
+

{{ all_stats_dicts['MERQURY'][item]['hap_plots'][subitem]['hap'] }} spectra-cn

+
+
+ +
+ {% endfor %} + + {% if all_stats_dicts['MERQURY'][item]['plot'] is not none %} +
+

Spectra-cn

+
+
+ +
+ {% endif %} + + {% if all_stats_dicts['MERQURY'][item]['hapmers_blob'] is not none %} +
+

Hapmers Blob

+
+
+ +
+ {% endif %} + +
+
+{% if vars.update({'is_first': False}) %} +{% endif %} +{% endfor %} diff --git a/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html b/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html index e62678c9..4c95333f 100644 --- a/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html +++ b/bin/report_modules/templates/ncbi_fcs_adaptor/ncbi_fcs_adaptor.html @@ -5,7 +5,7 @@

https://github.com/ncbi/fcs

-

Version: {{ all_stats_dicts['VERSIONS']['NCBI_FCS_ADAPTOR']['av_screen_x'] }}

+

Version: {{ all_stats_dicts['VERSIONS']['FCS_FCSADAPTOR']['FCS-adaptor'] }}

{% include 'ncbi_fcs_adaptor/dropdown.html' %} {% include 'ncbi_fcs_adaptor/report_contents.html' %} diff --git a/bin/report_modules/templates/synteny_circos/dropdown.html b/bin/report_modules/templates/synteny_circos/dropdown.html new file mode 100644 index 00000000..9111391c --- /dev/null +++ b/bin/report_modules/templates/synteny_circos/dropdown.html @@ -0,0 +1,10 @@ + diff --git a/bin/report_modules/templates/circos/report_contents.html b/bin/report_modules/templates/synteny_circos/report_contents.html similarity index 60% rename from bin/report_modules/templates/circos/report_contents.html rename to bin/report_modules/templates/synteny_circos/report_contents.html index e3355169..e1d975ef 100644 --- a/bin/report_modules/templates/circos/report_contents.html +++ b/bin/report_modules/templates/synteny_circos/report_contents.html @@ -1,15 +1,15 @@ -{% set vars = {'is_first': True} %} {% for item in range(all_stats_dicts["CIRCOS"]|length) %} {% set active_text = +{% set vars = {'is_first': True} %} {% for item in range(all_stats_dicts["SYNTENY_CIRCOS"]|length) %} {% set active_text = 'display: block' if vars.is_first else 'display: none' %}
-
{{ all_stats_dicts['CIRCOS'][item]['tag.on.tag'] }}
+
{{ all_stats_dicts['SYNTENY_CIRCOS'][item]['tag.on.tag'] }}
- {% if all_stats_dicts['CIRCOS'][item]['is_plot_empty'] %} + {% if all_stats_dicts['SYNTENY_CIRCOS'][item]['is_plot_empty'] %}

Either no alignments were detected between the target and reference assemblies, or all the alignment @@ -18,7 +18,7 @@

{% else %}
- +
{% endif %}
diff --git a/bin/report_modules/templates/circos/circos.html b/bin/report_modules/templates/synteny_circos/synteny_circos.html similarity index 85% rename from bin/report_modules/templates/circos/circos.html rename to bin/report_modules/templates/synteny_circos/synteny_circos.html index 4b9c8c1b..d7547707 100644 --- a/bin/report_modules/templates/circos/circos.html +++ b/bin/report_modules/templates/synteny_circos/synteny_circos.html @@ -1,4 +1,4 @@ -