diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..7907016c --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,52 @@ +name: Tests + +on: [pull_request, workflow_call] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + tests: + name: Test with py${{ matrix.python-version }} + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + - name: Set up mamba + uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: test + auto-activate-base: false + python-version: ${{ matrix.python-version }} + miniforge-version: latest + miniforge-variant: Mambaforge + use-mamba: true + mamba-version: "*" + - name: Install test dependencies + shell: bash -el {0} + run: | + mamba install -c conda-forge -c bioconda bcftools pip pytest pytest-cov + - name: Upgrade pip and install our package + shell: bash -el {0} + run: | + python -m pip install --upgrade pip setuptools wheel setuptools-scm + pip install -e . + - name: Test with pytest + shell: bash -el {0} + run: | + python -m pytest --cov=. --cov-report term-missing --cov-fail-under 89 --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml + - name: Test command line + shell: bash -el {0} + run: | + ./test/cmdline_tests.sh + - name: Upload pytest test results + uses: actions/upload-artifact@v3 + with: + name: pytest-results-${{ matrix.python-version }} + path: junit/test-results-${{ matrix.python-version }}.xml + # Use always() to always run this step to publish test results when there are test failures + if: ${{ always() }} diff --git a/.gitignore b/.gitignore index fcb2afaf..4010ead1 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ c57* build/ dist/ env.yaml +trtools/version.py # Files generated by examples NA12878_eh_reader.vcf.gz diff --git a/.readthedocs.yml b/.readthedocs.yml index a8601014..7d0baa34 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,9 +5,15 @@ # Required version: 2 +build: + os: "ubuntu-22.04" + tools: + python: "mambaforge-22.9" + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: doc/conf.py + fail_on_warning: true # Optionally build your docs in additional formats such as PDF formats: diff --git a/.readthedocs_conda_env.yml b/.readthedocs_conda_env.yml index 4d4ac8e6..14838a5c 100644 --- a/.readthedocs_conda_env.yml +++ b/.readthedocs_conda_env.yml @@ -2,7 +2,7 @@ name: trtools_3.6 channels: - conda-forge - bioconda - - defaults + - nodefaults dependencies: - python=3.6 - sphinx=3.0.4 @@ -16,4 +16,5 @@ dependencies: - scikit-learn - sphinx-autodoc-typehints - cyvcf2 - + - sphinx_rtd_theme + - statsmodels diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 503e49ff..00000000 --- a/.travis.yml +++ /dev/null @@ -1,55 +0,0 @@ -language: python - -jobs: - include: - - os: linux - python: 3.5 - - os: linux - python: 3.6 - - os: osx - osx_image: xcode12 - language: generic - env: PYTHON=36 - before_install: - - pip3 install virtualenv - - virtualenv -p python3 ~/venv - - source ~/venv/bin/activate - - python --version - - pip install -U --upgrade pip - - pip install -U pytest - - pip install -U pytest-cov - - pip install -U codecov - - pip install -U virtualenv - - wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 - - tar -jvxf bcftools-1.10.2.tar.bz2 - - cd bcftools-1.10.2 && make && sudo make install && cd .. - - wget https://github.com/samtools/htslib/releases/download/1.10.2/htslib-1.10.2.tar.bz2 - - tar -jvxf htslib-1.10.2.tar.bz2 - - cd htslib-1.10.2 && make && sudo make install && cd .. - -before_install: # for linux jobs - - sudo apt-get update - # attempt to get cyvcf2 working, it requires a SHA256 'symbol' - - sudo apt-get -y install -y libcrypto++-dev libssl-dev libcurl4-openssl-dev - - python --version - - pip install -U --upgrade pip - - pip install -U pytest - - pip install -U pytest-cov - - pip install -U codecov - - pip install -U virtualenv - - wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 - - tar -jvxf bcftools-1.10.2.tar.bz2 - - cd bcftools-1.10.2 && make && sudo make install && cd .. - - wget https://github.com/samtools/htslib/releases/download/1.10.2/htslib-1.10.2.tar.bz2 - - tar -jvxf htslib-1.10.2.tar.bz2 - - cd htslib-1.10.2 && make && sudo make install && cd .. - -install: - - pip install . - -script: - - python -m pytest --cov=. - - sh test/cmdline_tests.sh - -after_success: - - codecov diff --git a/Dockerfile b/Dockerfile index 17adeabf..1dd9c86b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,7 @@ RUN ldconfig WORKDIR .. # Download and install TRTools -RUN git clone https://github.com/gymreklab/TRTools +RUN git clone https://github.com/gymrek-lab/TRTools WORKDIR TRTools RUN python3 setup.py install WORKDIR .. diff --git a/PUBLISHING.rst b/PUBLISHING.rst index a27ae5ed..fdf9e6f3 100644 --- a/PUBLISHING.rst +++ b/PUBLISHING.rst @@ -22,7 +22,7 @@ New Dependencies ---------------- If you've added dependencies to trtools or its tests, those dependencies should be listed in - * setup.py + * pyproject.toml * the .readthedocs_conda_env.yml file in the root of the repository that's used for building TRTool's Read The Docs webpage. * the appropriate section of the bioconda recipe (see below) @@ -31,12 +31,12 @@ If you've added dependencies to trtools or its tests, those dependencies should Publishing Steps ---------------- -Once changes have been made to develop that are ready to be published, first choose the new version number. Then set up the environment you're going to publish TRTools from: +Once changes have been made to develop that are ready to be published, first choose the new version number according to `semantic versioning `_. Then set up the environment you're going to publish TRTools from: #. Create a clean environment. #. Install setuptools with version >= 40.8.0 -#. Additionally, install ``pytest``, ``wheel`` and ``twine`` -#. Clone the `trtools repo `_ +#. Additionally, install ``pytest``, ``wheel``, ``build``, and ``twine`` +#. Clone the `trtools repo `_ #. Check out the develop branch #. Run :code:`pip install --upgrade pip && pip install -e .` @@ -45,26 +45,16 @@ Then go through the steps of merging the changes into the master branch: #. Run :code:`pytest` and make sure all the tests pass. Then run :code:`./test/cmdline_tests.sh` and make sure those tests pass. #. Change the 'Unreleased Changes' section of :code:`RELEASE_NOTES.rst` to the new version number. #. Check if any changes have been made that have not yet been documented in the release notes. If so, document them. -#. Update the version number in setup.py -#. Run ``python setup.py sdist bdist_wheel`` (this ensures that trtools/version.py contains the updated version number) -#. Commit the changes to setup.py and trtools/version.py and push them. #. Submit a pull request from develop into master on the github webiste. -#. If the code review and travis checks pass, merge the pull request. +#. If the code review checks pass, merge the pull request. #. Tag the merge commit with the package version in vX.Y.Z format. (For more details on tagging, see `below`) Then go through the steps of publishing the changed code to PyPI: -1. :code:`cd` into the root of your clone of the trtools repo, checkout master and pull the latest change. +1. :code:`cd` into the root of your clone of the trtools repo, checkout master and pull the latest change. Note that the most recent commit *must* be tagged. 2. Run :code:`rm -rf build dist *.egg-info` to make sure all previous build artifacts are removed -3. Run :code:`python setup.py sdist bdist_wheel` to build the package. - - This will create the warning:: - - UserWarning: Unknown distribution option: 'license_file' warnings.warn(msg) - - You can ignore this warning: the 'license_file' option is necessary for creating the build artifacts - -4. Run :code:`twine upload dist/*` to upload the build to PyPI +3. Run :code:`python -m build` to build the package with the version number you just tagged. (Note: you might need to install ``build`` first.) +5. Run :code:`twine upload dist/*` to upload the distribution to PyPI Lastly, the change needs to be published to bioconda. diff --git a/README.rst b/README.rst index 792fbdef..5a1880dd 100644 --- a/README.rst +++ b/README.rst @@ -2,12 +2,12 @@ .. a location that the doc/index.rst uses for including this file .. before_header -.. image:: https://travis-ci.org/gymreklab/TRTools.svg?branch=master - :target: https://travis-ci.org/gymreklab/TRTools +.. image:: https://github.com/gymrek-lab/trtools/workflows/Tests/badge.svg + :target: https://github.com/gymrek-lab/trtools/workflows/Tests/badge.svg -.. image:: https://codecov.io/gh/gymreklab/TRTools/branch/master/graph/badge.svg - :target: https://codecov.io/gh/gymreklab/TRTools +.. image:: https://codecov.io/gh/gymrek-lab/TRTools/branch/master/graph/badge.svg + :target: https://codecov.io/gh/gymrek-lab/TRTools .. a location that the doc/index.rst uses for including this file @@ -33,13 +33,13 @@ With conda :: - conda install -c bioconda trtools + conda install -c conda-forge -c bioconda trtools Optionally install :code:`bcftools` which is used to prepare input files for TRTools by running: :: - conda install -c bioconda bcftools + conda install -c conda-forge -c bioconda bcftools Note: Bioconda only supports python versions 3.6-3.8 currently, so that is all TRTools supports in conda. @@ -64,10 +64,10 @@ Note: TRTools installation may fail for pip version 10.0.1, hence the need to up From source ^^^^^^^^^^^ -To install from source (only recommended for development) download the TRTools repository from `github `_, +To install from source (only recommended for development) download the TRTools repository from `github `_, checkout the branch you're interested in, and run the following command from the base directory of the repo. e.g.:: - git clone https://github.com/gymreklab/TRTools + git clone https://github.com/gymrek-lab/TRTools cd TRTools/ pip install --upgrade pip pip install -e . @@ -127,7 +127,7 @@ Development Notes Contact Us ---------- -Please submit an issue on the `trtools github `_ +Please submit an issue on the `trtools github `_ .. _Contributing: @@ -135,7 +135,7 @@ Contributing ------------ We appreciate contributions to TRTools. If you would like to contribute a fix or new feature, follow these guidelines: -1. Consider `discussing `_ your solution with us first so we can provide help or feedback if necessary. +1. Consider `discussing `_ your solution with us first so we can provide help or feedback if necessary. #. Install TRTools from source `as above `_. #. Additionally, install :code:`pytest`, `pytest-cov `_ and :code:`sphinx>=3` in your environment. #. Fork the TRTools repository. diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 619e9179..488fcbfe 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -79,7 +79,7 @@ Misc: Bug fixes: -* https://github.com/gymreklab/TRTools/issues/146 fixed record positions being compared twice +* https://github.com/gymrek-lab/TRTools/issues/146 fixed record positions being compared twice * CompareSTR: Decision on which records are comparable is now based on data from harmonized TRRecords, and not from the records directly from VCF readers. Thanks to this, HipSTR records which have different starting positions, but position of their repeat is at the same position are compared correctly (harmonization step removes this difference). @@ -92,7 +92,7 @@ Bug fixes: Bug fixes: -* https://github.com/gymreklab/TRTools/issues/143 Fix HipstrMinSuppReads filter when +* https://github.com/gymrek-lab/TRTools/issues/143 Fix HipstrMinSuppReads filter when there are called samples but none have ALLREADS 4.0.0 diff --git a/doc/VIGNETTE-AFREQ.rst b/doc/VIGNETTE-AFREQ.rst index 6bb839e7..ab5be707 100644 --- a/doc/VIGNETTE-AFREQ.rst +++ b/doc/VIGNETTE-AFREQ.rst @@ -3,7 +3,7 @@ Plotting allele length distributions by population group Tools used: mergeSTR, statSTR -This vignette shows how to use :code:`mergeSTR` to merge two VCF files and :code:`statSTR` to plot allele frequencies across different sample groups for an example TR locus. It uses the example VCF files :code:`ceu_ex.vcf.gz` and :code:`yri_ex.vcf.gz` available at https://github.com/gymreklab/TRTools/tree/master/example-files. These VCFs were generated by GangSTR on samples sequenced by the 1000 Genomes Project. They have already been sorted and indexed. +This vignette shows how to use :code:`mergeSTR` to merge two VCF files and :code:`statSTR` to plot allele frequencies across different sample groups for an example TR locus. It uses the example VCF files :code:`ceu_ex.vcf.gz` and :code:`yri_ex.vcf.gz` available at https://github.com/gymrek-lab/TRTools/tree/master/example-files. These VCFs were generated by GangSTR on samples sequenced by the 1000 Genomes Project. They have already been sorted and indexed. After downloading the VCF files, we can use :code:`mergeSTR` to merge them into a single VCF:: diff --git a/doc/VIGNETTE-COMPARE-CALLERS.rst b/doc/VIGNETTE-COMPARE-CALLERS.rst index 452ed6f3..e1bc948d 100644 --- a/doc/VIGNETTE-COMPARE-CALLERS.rst +++ b/doc/VIGNETTE-COMPARE-CALLERS.rst @@ -3,7 +3,7 @@ Comparing TR calls across different genotypers Tools used: mergeSTR, compareSTR -This vignette shows how to use :code:`mergeSTR` to merge VCFs from multiple samples into a single VCF, and :code:`compareSTR` to compare VCF files generated by different genotypers (HipSTR and ExpansionHunter) using the same set of reference TRs. In this example, we use VCF files available at https://github.com/gymreklab/TRTools/tree/master/example-files: +This vignette shows how to use :code:`mergeSTR` to merge VCFs from multiple samples into a single VCF, and :code:`compareSTR` to compare VCF files generated by different genotypers (HipSTR and ExpansionHunter) using the same set of reference TRs. In this example, we use VCF files available at https://github.com/gymrek-lab/TRTools/tree/master/example-files: * :code:`NA12878_chr21_eh.sorted.vcf.gz`, :code:`NA12891_chr21_eh.sorted.vcf.gz`, and :code:`NA12892_chr21_eh.sorted.vcf.gz` generated using ExpansionHunter on three separate samples * :code:`trio_chr21_hipstr.sorted.vcf.gz` generated using HipSTR run jointly on all three samples. diff --git a/doc/VIGNETTE-COMPARE.rst b/doc/VIGNETTE-COMPARE.rst index 5b49f293..2810edd5 100644 --- a/doc/VIGNETTE-COMPARE.rst +++ b/doc/VIGNETTE-COMPARE.rst @@ -3,7 +3,7 @@ Comparing TR calls across different parameter sets Tools used: compareSTR -This vignette shows how to use :code:`compareSTR` to compare two VCF files generated using the same set of reference TRs. In this example, we use VCF files :code:`c57_ex1.vcf.gz` and :code:`c57_ex2.vcf.gz` available at https://github.com/gymreklab/TRTools/tree/master/example-files. These VCF files were generated by GangSTR on a mouse dataset using two different sets of stutter parameters. +This vignette shows how to use :code:`compareSTR` to compare two VCF files generated using the same set of reference TRs. In this example, we use VCF files :code:`c57_ex1.vcf.gz` and :code:`c57_ex2.vcf.gz` available at https://github.com/gymrek-lab/TRTools/tree/master/example-files. These VCF files were generated by GangSTR on a mouse dataset using two different sets of stutter parameters. To run :code:`compareSTR`:: diff --git a/doc/VIGNETTE-FILTER-QC.rst b/doc/VIGNETTE-FILTER-QC.rst index d687a12b..a695ea63 100644 --- a/doc/VIGNETTE-FILTER-QC.rst +++ b/doc/VIGNETTE-FILTER-QC.rst @@ -3,7 +3,7 @@ Filtering and QC of VCFs Tools used: dumpSTR, qcSTR -This vignette shows how to use :code:`dumpSTR` to filter a VCF and :code:`qcSTR` to visualize some basic QC metrics. For this example, we use the file :code:`trio_chr21_popstr.sorted.vcf.gz` available at https://github.com/gymreklab/TRTools/tree/master/example-files. This file was generated on samples NA12878, NA12891, and NA12892 using popSTR. +This vignette shows how to use :code:`dumpSTR` to filter a VCF and :code:`qcSTR` to visualize some basic QC metrics. For this example, we use the file :code:`trio_chr21_popstr.sorted.vcf.gz` available at https://github.com/gymrek-lab/TRTools/tree/master/example-files. This file was generated on samples NA12878, NA12891, and NA12892 using popSTR. First, let's perform some filtering on the VCF:: diff --git a/doc/VIGNETTE-STATSTR.rst b/doc/VIGNETTE-STATSTR.rst index aea2f513..7704d1e5 100644 --- a/doc/VIGNETTE-STATSTR.rst +++ b/doc/VIGNETTE-STATSTR.rst @@ -3,7 +3,7 @@ Computing per-locus TR statistics Tools used: mergeSTR, statSTR -This vignette shows how to use :code:`mergeSTR` to merge two VCF files and :code:`statSTR` to compute statistics across different sample groups for an example TR locus. It uses the example VCF files :code:`ceu_ex.vcf.gz` and :code:`yri_ex.vcf.gz` available at https://github.com/gymreklab/TRTools/tree/master/example-files. These VCFs were generated by GangSTR on samples sequenced by the 1000 Genomes Project. They have already been sorted and indexed. +This vignette shows how to use :code:`mergeSTR` to merge two VCF files and :code:`statSTR` to compute statistics across different sample groups for an example TR locus. It uses the example VCF files :code:`ceu_ex.vcf.gz` and :code:`yri_ex.vcf.gz` available at https://github.com/gymrek-lab/TRTools/tree/master/example-files. These VCFs were generated by GangSTR on samples sequenced by the 1000 Genomes Project. They have already been sorted and indexed. After downloading the VCF files, we can use :code:`mergeSTR` to merge them into a single VCF:: diff --git a/doc/conf.py b/doc/conf.py index 060dfa32..0ac50598 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -13,7 +13,6 @@ import os import sys sys.path.insert(0, os.path.abspath('..')) -import trtools.version # -- Project information ----------------------------------------------------- @@ -21,10 +20,6 @@ copyright = '2020, Gymreklab' author = 'Gymreklab' -# The full version, including alpha/beta/rc tags -version = trtools.version.version -release = trtools.version.version - master_doc = 'index' # -- General configuration --------------------------------------------------- @@ -73,9 +68,6 @@ 'searchbox.html' ] } -html_theme_options = { - 'fixed_sidebar' : True -} # Add any paths that contain custom static files (such as style sheets) here, diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..3b5e6cc7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,56 @@ +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "trtools" +authors = [ + {name = "Melissa Gymrek", email = "mgymrek@ucsd.edu"}, + {name = "Gymrek Lab"}, +] +description = "Toolkit for genome-wide analysis of STRs" +readme = "README.rst" +requires-python = ">=3.5" +license = {text = "MIT"} +classifiers = [ + "Programming Language :: Python :: 3.5", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +dependencies = [ + "cyvcf2", + "matplotlib", + "numpy", + "pandas", + "pybedtools", + "pysam", + "scikit-learn", + "scipy", + "statsmodels", +] +dynamic = ["version"] + +[tool.setuptools] +packages = ["trtools"] +script-files = ["trtools/testsupport/test_trtools.sh", "scripts/trtools_prep_beagle_vcf.sh"] +license-files = ["LICENSE.txt"] + +[tool.setuptools_scm] +# generated automatically by setuptools when running pip install -e or python -m build +version_file = "trtools/version.py" + +[project.scripts] +dumpSTR = "trtools.dumpSTR:run" +mergeSTR = "trtools.mergeSTR:run" +statSTR = "trtools.statSTR:run" +compareSTR = "trtools.compareSTR:run" +qcSTR = "trtools.qcSTR:run" +associaTR = "trtools.associaTR:run" + +[project.urls] +Homepage = "https://trtools.readthedocs.org" +Documentation = "https://trtools.readthedocs.org" +Repository = "https://github.com/gymrek-lab/trtools.git" +Changelog = "https://github.com/gymrek-lab/trtools/blob/master/RELEASE_NOTES.rst" diff --git a/setup.py b/setup.py deleted file mode 100644 index bfb4deb8..00000000 --- a/setup.py +++ /dev/null @@ -1,72 +0,0 @@ -import os -from setuptools import setup, find_packages - -DESCRIPTION = "Toolkit for genome-wide analysis of STRs" -LONG_DESCRIPTION = DESCRIPTION -NAME = "trtools" -AUTHOR = "Melissa Gymrek" -AUTHOR_EMAIL = "mgymrek@ucsd.edu" -MAINTAINER = "Melissa Gymrek" -MAINTAINER_EMAIL = "mgymrek@ucsd.edu" -DOWNLOAD_URL = 'http://github.com/gymreklab/TRTools' -LICENSE = 'MIT' - -# version-keeping code based on pybedtools -curdir = os.path.abspath(os.path.dirname(__file__)) -MAJ = 5 -MIN = 0 -REV = 2 -VERSION = '%d.%d.%d' % (MAJ, MIN, REV) -with open(os.path.join(curdir, 'trtools/version.py'), 'w') as fout: - fout.write( - "\n".join(["", - "# THIS FILE IS GENERATED FROM SETUP.PY", - "version = '{version}'", - "__version__ = version"]).format(version=VERSION) - ) - -setup(name=NAME, - version=VERSION, - description=DESCRIPTION, - long_description=LONG_DESCRIPTION, - author=AUTHOR, - author_email=AUTHOR_EMAIL, - maintainer=MAINTAINER, - maintainer_email=MAINTAINER_EMAIL, - url=DOWNLOAD_URL, - download_url=DOWNLOAD_URL, - license=LICENSE, - python_requires='>=3.5', - packages=find_packages(), - include_package_data=True, - license_file="LICENSE.txt", - scripts=[ - "trtools/testsupport/test_trtools.sh", - 'scripts/trtools_prep_beagle_vcf.sh' - ], - entry_points={ - 'console_scripts': [ - 'dumpSTR=trtools.dumpSTR:run', - 'mergeSTR=trtools.mergeSTR:run', - 'statSTR=trtools.statSTR:run', - 'compareSTR=trtools.compareSTR:run', - 'qcSTR=trtools.qcSTR:run', - 'associaTR=trtools.associaTR:run' - ], - }, - install_requires=['cyvcf2', - 'matplotlib', - 'numpy', - 'pandas', - 'pybedtools', - 'pysam', - 'scikit-learn', - 'scipy', - 'statsmodels'], - classifiers=['Development Status :: 4 - Beta',\ - 'Programming Language :: Python :: 3.5',\ - 'License :: OSI Approved :: MIT License',\ - 'Operating System :: OS Independent',\ - 'Intended Audience :: Science/Research',\ - 'Topic :: Scientific/Engineering :: Bio-Informatics'] - ) diff --git a/trtools/__init__.py b/trtools/__init__.py index 1f567094..e6d1bc83 100644 --- a/trtools/__init__.py +++ b/trtools/__init__.py @@ -1,2 +1,4 @@ -from .version import __version__ - +try: + from .version import __version__ +except ModuleNotFoundError: + __version__ = "unknown" diff --git a/trtools/associaTR/README.rst b/trtools/associaTR/README.rst index 2e0a7946..14df88e5 100644 --- a/trtools/associaTR/README.rst +++ b/trtools/associaTR/README.rst @@ -127,7 +127,7 @@ Example Commands ---------------- Below is an :code:`associaTR` example. For this example no TRs causally impact the simulated phenotype. -Data files for this example can be found at https://github.com/gymreklab/TRTools/tree/master/example-files:: +Data files for this example can be found at https://github.com/gymrek-lab/TRTools/tree/master/example-files:: associaTR \ association_results.tsv \ diff --git a/trtools/compareSTR/README.rst b/trtools/compareSTR/README.rst index 5b264aa3..e66b35f5 100644 --- a/trtools/compareSTR/README.rst +++ b/trtools/compareSTR/README.rst @@ -95,7 +95,7 @@ CompareSTR requires input files to be compressed and indexed. Use the following Example Commands ---------------- -Below are :code:`compareSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymreklab/TRTools/tree/master/example-files:: +Below are :code:`compareSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymrek-lab/TRTools/tree/master/example-files:: # AdVNTR (comparing a file against itself. Not very interesting. Just for demonstration) # Note, you first need to reheader files to add required contig lines to VCF headers diff --git a/trtools/compareSTR/compareSTR.py b/trtools/compareSTR/compareSTR.py index 77fef1ff..c8ad324b 100644 --- a/trtools/compareSTR/compareSTR.py +++ b/trtools/compareSTR/compareSTR.py @@ -29,7 +29,7 @@ import trtools.utils.utils as utils from trtools import __version__ -from typing import List, Any, Callable, Tuple, Optional +from typing import List, Any, Callable, Optional def GetFormatFields(format_fields, format_binsizes, format_fileoption, vcfreaders): diff --git a/trtools/dumpSTR/README.rst b/trtools/dumpSTR/README.rst index 1ec44f20..18bcee94 100644 --- a/trtools/dumpSTR/README.rst +++ b/trtools/dumpSTR/README.rst @@ -150,7 +150,7 @@ DumpSTR outputs the following files: Example Commands ---------------- -Below are :code:`dumpSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymreklab/TRTools/tree/master/example-files:: +Below are :code:`dumpSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymrek-lab/TRTools/tree/master/example-files:: # AdVNTR dumpSTR --vcf NA12878_chr21_advntr.sorted.vcf.gz --advntr-min-call-DP 100 --out test_dumpstr_advntr diff --git a/trtools/mergeSTR/README.rst b/trtools/mergeSTR/README.rst index 3ffc0883..b648a07f 100644 --- a/trtools/mergeSTR/README.rst +++ b/trtools/mergeSTR/README.rst @@ -84,7 +84,7 @@ MergeSTR requires the input file to be compressed and indexed. Use the following Example Commands ---------------- -Below are :code:`mergeSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymreklab/TRTools/tree/master/example-files:: +Below are :code:`mergeSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymrek-lab/TRTools/tree/master/example-files:: # AdVNTR # Note, you first need to reheader files to add required contig lines to VCF headers diff --git a/trtools/qcSTR/README.rst b/trtools/qcSTR/README.rst index 1f485c78..538113ea 100644 --- a/trtools/qcSTR/README.rst +++ b/trtools/qcSTR/README.rst @@ -149,7 +149,7 @@ These additional options can be used to customize reference bias plots. Example Commands ---------------- -Below are :code:`qcSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymreklab/TRTools/tree/master/example-files:: +Below are :code:`qcSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymrek-lab/TRTools/tree/master/example-files:: # AdVNTR qcSTR --vcf NA12878_chr21_advntr.sorted.vcf.gz --out test_qc_advntr diff --git a/trtools/statSTR/README.rst b/trtools/statSTR/README.rst index 8fd85660..dc256ec9 100644 --- a/trtools/statSTR/README.rst +++ b/trtools/statSTR/README.rst @@ -78,7 +78,7 @@ If multiple sample groups are specified, instead there is one additional column Example Commands ---------------- -Below are :code:`statSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymreklab/TRTools/tree/master/example-files:: +Below are :code:`statSTR` examples using VCFs from supported TR genotypers. Data files can be found at https://github.com/gymrek-lab/TRTools/tree/master/example-files:: # AdVNTR statSTR --vcf NA12878_chr21_advntr.sorted.vcf.gz \ diff --git a/trtools/testsupport/sample_vcfs/associaTR/generate_traits.py b/trtools/testsupport/sample_vcfs/associaTR/generate_traits.py index 1ec68e81..80861ad8 100755 --- a/trtools/testsupport/sample_vcfs/associaTR/generate_traits.py +++ b/trtools/testsupport/sample_vcfs/associaTR/generate_traits.py @@ -1,11 +1,14 @@ #!/usr/bin/env python3 +import pathlib import numpy as np import numpy.random seed = 2 -with open('samples.txt') as samples_file: +SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() + +with open(SCRIPT_DIR / 'samples.txt') as samples_file: samples = np.array([int(sample.strip()) for sample in samples_file.readlines() if 'IID' not in sample]) n_samples = len(samples) diff --git a/trtools/testsupport/sample_vcfs/associaTR/make_dosages.py b/trtools/testsupport/sample_vcfs/associaTR/make_dosages.py index ed9ca996..1d8fa699 100755 --- a/trtools/testsupport/sample_vcfs/associaTR/make_dosages.py +++ b/trtools/testsupport/sample_vcfs/associaTR/make_dosages.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import random +import pathlib import cyvcf2 import numpy as np @@ -9,12 +10,14 @@ random.seed(11) +SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() + # biallelic -vcf = cyvcf2.VCF('many_samples_biallelic.vcf.gz') +vcf = cyvcf2.VCF(str(SCRIPT_DIR / 'many_samples_biallelic.vcf.gz')) samples = vcf.samples -with open('gp_dosages.tsv', 'w') as gp_out, open('ap1_dosages.tsv', 'w') as ap1_out, open('ap2_dosages.tsv', 'w') as ap2_out: +with open(str(SCRIPT_DIR / 'gp_dosages.tsv'), 'w') as gp_out, open(str(SCRIPT_DIR / 'ap1_dosages.tsv'), 'w') as ap1_out, open(str(SCRIPT_DIR / 'ap2_dosages.tsv'), 'w') as ap2_out: for var in vcf: gp_out.write('{}\t{}\t{}'.format(var.CHROM, var.POS, var.POS)) ap1_out.write('{}\t{}\t{}'.format(var.CHROM, var.POS, var.POS)) @@ -88,14 +91,14 @@ 'tabix -f many_samples_biallelic_dosages.vcf.gz ' '"' ) -sp.run(cmd, shell = True, check=True) +sp.run(cmd, shell = True, check=True, cwd=str(SCRIPT_DIR)) # multiallelic -vcf = cyvcf2.VCF('many_samples_multiallelic.vcf.gz') +vcf = cyvcf2.VCF(str(SCRIPT_DIR / 'many_samples_multiallelic.vcf.gz')) samples = vcf.samples -with open('ap1_multi_dosages.tsv', 'w') as ap1_out, open('ap2_multi_dosages.tsv', 'w') as ap2_out: +with open(str(SCRIPT_DIR / 'ap1_multi_dosages.tsv'), 'w') as ap1_out, open(str(SCRIPT_DIR / 'ap2_multi_dosages.tsv'), 'w') as ap2_out: for var in vcf: ap1_out.write('{}\t{}\t{}'.format(var.CHROM, var.POS, var.POS)) ap2_out.write('{}\t{}\t{}'.format(var.CHROM, var.POS, var.POS)) @@ -139,5 +142,5 @@ 'tabix -f many_samples_multiallelic_dosages.vcf.gz ' '"' ) -sp.run(cmd, shell = True, check=True) +sp.run(cmd, shell = True, check=True, cwd=str(SCRIPT_DIR)) diff --git a/trtools/testsupport/test_trtools.sh b/trtools/testsupport/test_trtools.sh index 2e836e5c..bc6945e4 100755 --- a/trtools/testsupport/test_trtools.sh +++ b/trtools/testsupport/test_trtools.sh @@ -16,7 +16,7 @@ if [ ! -d "$TMP" ] ; then mkdir $TMP pushd $TMP git init . - git remote add origin -f https://github.com/gymreklab/TRTools.git + git remote add origin -f https://github.com/gymrek-lab/TRTools.git git pull origin master popd echo "Download done" diff --git a/trtools/utils/utils.py b/trtools/utils/utils.py index 3324422f..cec3307c 100644 --- a/trtools/utils/utils.py +++ b/trtools/utils/utils.py @@ -186,7 +186,7 @@ def GetEntropy(allele_freqs: Dict[Any, float]) -> float: Examples -------- >>> GetEntropy({0:0.5, 1:0.5}) - 1 + 1.0 """ if not ValidateAlleleFreqs(allele_freqs): return np.nan @@ -209,7 +209,7 @@ def GetMean(allele_freqs): Examples -------- - >>> GetMean({0:0, 1:1}) + >>> GetMean({0:0.5, 1:0.5}) 0.5 """ if not ValidateAlleleFreqs(allele_freqs): @@ -356,7 +356,7 @@ def GetCanonicalMotif(repseq): Examples -------- >>> GetCanonicalMotif("TG") - "AC" + 'AC' """ repseq = repseq.upper() # Get canonical sequence of each strand @@ -389,7 +389,7 @@ def GetCanonicalOneStrand(repseq): Examples -------- >>> GetCanonicalOneStrand("CAG") - "AGC" + 'AGC' """ repseq = repseq.upper() size = len(repseq) @@ -421,7 +421,7 @@ def ReverseComplement(seq): Examples -------- >>> ReverseComplement("AGGCT") - "AGCCT" + 'AGCCT' """ seq = seq.upper() newseq = "" diff --git a/trtools/version.py b/trtools/version.py deleted file mode 100644 index e4741ae5..00000000 --- a/trtools/version.py +++ /dev/null @@ -1,4 +0,0 @@ - -# THIS FILE IS GENERATED FROM SETUP.PY -version = '5.0.2' -__version__ = version \ No newline at end of file