diff --git a/.github/workflows/conventional-prs.yml b/.github/workflows/conventional-prs.yml new file mode 100644 index 0000000..17c8812 --- /dev/null +++ b/.github/workflows/conventional-prs.yml @@ -0,0 +1,18 @@ +name: PR +on: + pull_request_target: + types: + - opened + - reopened + - edited + - synchronize + +jobs: + title-format: + runs-on: ubuntu-latest + steps: + - uses: amannn/action-semantic-pull-request@v3.4.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + validateSingleCommit: true diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 08c1218..c8391d0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,16 +21,22 @@ jobs: formatting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - - name: Setup black environment + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install dependencies run: | - conda create -c conda-forge -y -q --name black black==22.3.0 + pip install black==22.3.0 + pip freeze + - name: Check formatting run: | - export PATH="/usr/share/miniconda/bin:$PATH" - source activate black black --check --diff -l 100 . + - name: Comment PR if: github.event_name == 'pull_request' && failure() uses: marocchino/sticky-pull-request-comment@v1.1.0 @@ -49,14 +55,12 @@ jobs: - '3.10' needs: formatting steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 # we need tags for versioneer to work + - uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} + python-version: "${{ matrix.python-version }}" - name: Install dependencies run: | diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml new file mode 100644 index 0000000..75c30d7 --- /dev/null +++ b/.github/workflows/release-please.yml @@ -0,0 +1,47 @@ +on: + push: + branches: + - main + +name: release-please + +jobs: + release-please: + runs-on: ubuntu-latest + steps: + + - uses: GoogleCloudPlatform/release-please-action@v3 + id: release + with: + release-type: python + package-name: vcfpy + token: ${{ secrets.BOT_TOKEN }} + extra-files: | + setup.py + setup.cfg + vcfpy/_version.py + + - uses: actions/checkout@v2 + if: ${{ steps.release.outputs.release_created }} + with: + fetch-depth: 0 + + - name: Set up Python + if: ${{ steps.release.outputs.release_created }} + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Build package + if: ${{ steps.release.outputs.release_created }} + run: | + python -m pip install --upgrade pip + pip install -U setuptools + python setup.py sdist + + - name: Publish to PyPI + if: ${{ steps.release.outputs.release_created }} + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e213d68 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,169 @@ +# Changelog + +## [0.13.6](https://github.com/bihealth/vcfpy/compare/v0.13.5...v0.13.6) (2022-11-28) + +- Fixing bug in `setup.py` that prevented `pysam` dependency to be loaded (#150). + +## v0.13.5 (2022-11-13) + +- Treat `.bgz` files the same as `.gz` (#145, \#149) + +## v0.13.4 (2022-04-13) + +- Switching to Github Actions for CI +- Fix INFO flag raises TypeError (#146) + +## v0.13.3 (2020-09-14) + +- Adding `Record.update_calls`. +- Making `Record.{format,calls}` use list when empty + +## v0.13.2 (2020-08-20) + +- Adding `Call.set_genotype()`. + +## v0.13.1 (2020-08-20) + +- Fixed `Call.ploidy`. +- Fixed `Call.is_variant`. + +## v0.13.0 (2020-07-10) + +- Fixing bug in case `GT` describes only one allele. +- Proper escaping of colon and semicolon (or the lack of escaping) in + `INFO` and `FORMAT`. + +## v0.12.2 (2020-04-29) + +- Fixing bug in case `GT` describes only one allele. + +## v0.12.1 (2019-03-08) + +- Not warning on `PASS` filter if not defined in header. + +## v0.12.0 (2019-01-29) + +- Fixing tests for Python \>=3.6 +- Fixing CI, improving tox integration. +- Applying `black` formatting. +- Replacing Makefile with more minimal one. +- Removing some linting errors from flake8. +- Adding support for reading VCF without `FORMAT` or any sample + column. +- Adding support for writing headers and records without `FORMAT` and + any sample columns. + +## v0.11.2 (2018-04-16) + +- Removing `pip` module from `setup.py` which is not recommended + anyway. + +## v0.11.1 (2018-03-06) + +- Working around problem in HTSJDK output with incomplete `FORMAT` + fields (#127). Writing out `.` instead of keeping trailing empty + records empty. + +## v0.11.0 (2017-11-22) + +- The field `FORMAT/FT` is now expected to be a semicolon-separated + string. Internally, we will handle it as a list. +- Switching from warning helper utility code to Python `warnings` + module. +- Return `str` in case of problems with parsing value. + +## v0.10.0 (2017-02-27) + +- Extending API to allow for reading subsets of records. (Writing for + sample subsets or reordered samples is possible through using the + appropriate `names` list in the `SamplesInfos` for the `Writer`). +- Deep-copying header lines and samples infos on `Writer` construction +- Using `samples` attribute from `Header` in `Reader` and `Writer` + instead of passing explicitely + +## 0.9.0 (2017-02-26) + +- Restructuring of requirements.txt files +- Fixing parsing of no-call `GT` fields + +## 0.8.1 (2017-02-08) + +- PEP8 style adjustments +- Using versioneer for versioning +- Using `requirements*.txt` files now from setup.py +- Fixing dependency on cyordereddict to be for Python \<3.6 instead of + \<3.5 +- Jumping by samtools coordinate string now also allowed + +## 0.8.0 (2016-10-31) + +- Adding `Header.has_header_line` for querying existence of header + line +- `Header.add_*_line` return a `bool` no indicating any conflicts +- Construction of Writer uses samples within header and no extra + parameter (breaks API) + +## 0.7.0 (2016-09-25) + +- Smaller improvements and fixes to documentation +- Adding Codacy coverage and static code analysis results to README +- Various smaller code cleanup triggered by Codacy results +- Adding `__eq__`, `__neq__` and `__hash__` to data types (where + applicable) + +## 0.6.0 (2016-09-25 + +- Refining implementation for breakend and symbolic allele class +- Removing `record.SV_CODES` +- Refactoring parser module a bit to make the code cleaner +- Fixing small typos and problems in documentation + +## 0.5.0 (2016-09-24) + +- Deactivating warnings on record parsing by default because of + performance +- Adding validation for `INFO` and `FORMAT` fields on reading (#8) +- Adding predefined `INFO` and `FORMAT` fields to `pyvcf.header` (#32) + +## 0.4.1 (2016-09-22) + +- Initially enabling codeclimate + +## 0.4.0 (2016-09-22) + +- Exporting constants for encoding variant types +- Exporting genotype constants `HOM_REF`, `HOM_ALT`, `HET` +- Implementing `Call.is_phased`, `Call.is_het`, `Call.is_variant`, + `Call.is_phased`, `Call.is_hom_ref`, `Call.is_hom_alt` +- Removing `Call.phased` (breaks API, next release is 0.4.0) +- Adding tests, fixing bugs for methods of `Call` + +## 0.3.1 (2016-09-21) + +- Work around `FORMAT/FT` being a string; this is done so in the Delly + output + +## 0.3.0 (2016-09-21) + +- `Reader` and `Writer` can now be used as context manager (with + `with`) +- Including license in documentation, including Biopython license +- Adding support for writing bgzf files (taken from Biopython) +- Adding support for parsing arrays in header lines +- Removing `example-4.1-bnd.vcf` example file because v4.1 tumor + derival lacks `ID` field +- Adding `AltAlleleHeaderLine`, `MetaHeaderLine`, + `PedigreeHeaderLine`, and `SampleHeaderLine` +- Renaming `SimpleHeaderFile` to `SimpleHeaderLine` +- Warn on missing `FILTER` entries on parsing +- Reordered parameters in `from_stream` and `from_file` (#18) +- Renamed `from_file` to `from_stream` (#18) +- Renamed `Reader.jump_to` to `Reader.fetch` +- Adding `header_without_lines` function +- Generally extending API to make it esier to use +- Upgrading dependencies, enabling pyup-bot +- Greatly extending documentation + +## 0.2.1 (2016-09-19) + +- First release on PyPI diff --git a/HISTORY.rst b/HISTORY.rst deleted file mode 100644 index 051d14d..0000000 --- a/HISTORY.rst +++ /dev/null @@ -1,178 +0,0 @@ -======= -History -======= - -v0.13.6 (2022-11-28) --------------------- - -- Fixing bug in ``setup.py`` that prevented ``pysam`` dependency to be loaded (#150). - -v0.13.5 (2022-11-13) --------------------- - -- Treat ``.bgz`` files the same as ``.gz`` (#145, #149) - -v0.13.4 (2022-04-13) --------------------- - -- Switching to Github Actions for CI -- Fix INFO flag raises TypeError (#146) - -v0.13.3 (2020-09-14) --------------------- - -- Adding ``Record.update_calls``. -- Making ``Record.{format,calls}`` use list when empty - -v0.13.2 (2020-08-20) --------------------- - -- Adding ``Call.set_genotype()``. - -v0.13.1 (2020-08-20) --------------------- - -- Fixed ``Call.ploidy``. -- Fixed ``Call.is_variant``. - -v0.13.0 (2020-07-10) --------------------- - -* Fixing bug in case ``GT`` describes only one allele. -* Proper escaping of colon and semicolon (or the lack of escaping) in ``INFO`` and ``FORMAT``. - -v0.12.2 (2020-04-29) --------------------- - -* Fixing bug in case ``GT`` describes only one allele. - -v0.12.1 (2019-03-08) --------------------- - -* Not warning on ``PASS`` filter if not defined in header. - -v0.12.0 (2019-01-29) --------------------- - -* Fixing tests for Python >=3.6 -* Fixing CI, improving tox integration. -* Applying ``black`` formatting. -* Replacing Makefile with more minimal one. -* Removing some linting errors from flake8. -* Adding support for reading VCF without ``FORMAT`` or any sample column. -* Adding support for writing headers and records without ``FORMAT`` and any sample columns. - -v0.11.2 (2018-04-16) --------------------- - -* Removing ``pip`` module from ``setup.py`` which is not recommended anyway. - -v0.11.1 (2018-03-06) --------------------- - -* Working around problem in HTSJDK output with incomplete ``FORMAT`` fields (#127). - Writing out ``.`` instead of keeping trailing empty records empty. - -v0.11.0 (2017-11-22) --------------------- - -* The field ``FORMAT/FT`` is now expected to be a semicolon-separated string. - Internally, we will handle it as a list. -* Switching from warning helper utility code to Python ``warnings`` module. -* Return ``str`` in case of problems with parsing value. - -v0.10.0 (2017-02-27) --------------------- - -* Extending API to allow for reading subsets of records. - (Writing for sample subsets or reordered samples is possible through using the appropriate ``names`` list in the ``SamplesInfos`` for the ``Writer``). -* Deep-copying header lines and samples infos on ``Writer`` construction -* Using ``samples`` attribute from ``Header`` in ``Reader`` and ``Writer`` instead of passing explicitely - -0.9.0 (2017-02-26) ------------------- - -* Restructuring of requirements.txt files -* Fixing parsing of no-call ``GT`` fields - -0.8.1 (2017-02-08) ------------------- - -* PEP8 style adjustments -* Using versioneer for versioning -* Using ``requirements*.txt`` files now from setup.py -* Fixing dependency on cyordereddict to be for Python <3.6 instead of <3.5 -* Jumping by samtools coordinate string now also allowed - -0.8.0 (2016-10-31) ------------------- - -* Adding ``Header.has_header_line`` for querying existence of header line -* ``Header.add_*_line`` return a ``bool`` no indicating any conflicts -* Construction of Writer uses samples within header and no extra parameter (breaks API) - -0.7.0 (2016-09-25) ------------------- - -* Smaller improvements and fixes to documentation -* Adding Codacy coverage and static code analysis results to README -* Various smaller code cleanup triggered by Codacy results -* Adding ``__eq__``, ``__neq__`` and ``__hash__`` to data types (where applicable) - -0.6.0 (2016-09-25 ------------------ - -* Refining implementation for breakend and symbolic allele class -* Removing ``record.SV_CODES`` -* Refactoring parser module a bit to make the code cleaner -* Fixing small typos and problems in documentation - -0.5.0 (2016-09-24) ------------------- - -* Deactivating warnings on record parsing by default because of performance -* Adding validation for ``INFO`` and ``FORMAT`` fields on reading (#8) -* Adding predefined ``INFO`` and ``FORMAT`` fields to ``pyvcf.header`` (#32) - -0.4.1 (2016-09-22) ------------------- - -* Initially enabling codeclimate - -0.4.0 (2016-09-22) ------------------- - -* Exporting constants for encoding variant types -* Exporting genotype constants ``HOM_REF``, ``HOM_ALT``, ``HET`` -* Implementing ``Call.is_phased``, ``Call.is_het``, ``Call.is_variant``, ``Call.is_phased``, ``Call.is_hom_ref``, ``Call.is_hom_alt`` -* Removing ``Call.phased`` (breaks API, next release is 0.4.0) -* Adding tests, fixing bugs for methods of ``Call`` - -0.3.1 (2016-09-21) ------------------- - -* Work around ``FORMAT/FT`` being a string; this is done so in the Delly output - -0.3.0 (2016-09-21) ------------------- - -* ``Reader`` and ``Writer`` can now be used as context manager (with ``with``) -* Including license in documentation, including Biopython license -* Adding support for writing bgzf files (taken from Biopython) -* Adding support for parsing arrays in header lines -* Removing ``example-4.1-bnd.vcf`` example file because v4.1 tumor derival lacks ``ID`` field -* Adding ``AltAlleleHeaderLine``, ``MetaHeaderLine``, ``PedigreeHeaderLine``, and ``SampleHeaderLine`` -* Renaming ``SimpleHeaderFile`` to ``SimpleHeaderLine`` -* Warn on missing ``FILTER`` entries on parsing -* Reordered parameters in ``from_stream`` and ``from_file`` (#18) -* Renamed ``from_file`` to ``from_stream`` (#18) -* Renamed ``Reader.jump_to`` to ``Reader.fetch`` -* Adding ``header_without_lines`` function -* Generally extending API to make it esier to use -* Upgrading dependencies, enabling pyup-bot -* Greatly extending documentation - -0.2.1 (2016-09-19) ------------------- - -* First release on PyPI diff --git a/MANIFEST.in b/MANIFEST.in index dda48ac..b45effc 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,9 @@ include AUTHORS.rst include CONTRIBUTING.rst -include HISTORY.rst +include HISTORY.md include LICENSE -include README.rst +include README.md recursive-include tests * recursive-exclude * __pycache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000..3856a03 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +[![pypi](https://img.shields.io/pypi/v/vcfpy.svg)](https://pypi.python.org/pypi/vcfpy) +[![bioconda](https://img.shields.io/conda/dn/bioconda/vcfpy.svg?label=Bioconda)](https://bioconda.github.io/recipes/vcfpy/README.html) +[![CI](https://github.com/bihealth/vcfpy/actions/workflows/main.yml/badge.svg)](https://github.com/bihealth/vcfpy/actions/workflows/main.yml) +[![Documentation Status](https://readthedocs.org/projects/vcfpy/badge/?version=latest)](https://vcfpy.readthedocs.io/en/latest/?badge=latest) +[![Publication in The Journal of Open Source Software](http://joss.theoj.org/papers/edae85d90ea8a49843dbaaa109e47cba/status.svg)](http://joss.theoj.org/papers/10.21105/joss.00085) + +# VCFPy + +Python 3 VCF library with good support for both reading and writing + +- Free software: MIT license +- Documentation: . + +## Features + +- Support for reading and writing VCF v4.3 +- Interface to `INFO` and `FORMAT` fields is based on `OrderedDict` allows for easier modification than PyVCF (also I find this more pythonic) +- Read (and jump in) and write BGZF files just using `vcfpy` + +## Why another VCF parser for Python! + +I've been using PyVCF with quite some success in the past. However, the +main bottleneck of PyVCF is when you want to modify the per-sample +genotype information. There are some issues in the tracker of PyVCF but +none of them can really be considered solved. I tried several hours to +solve these problems within PyVCF but this never got far or towards a +complete rewrite... + +For this reason, VCFPy was born and here it is! + +## What's the State? + +VCFPy is the result of two full days of development plus some +maintenance work later now (right now). I'm using it in several projects +but it is not as battle-tested as PyVCF. + +## Why Python 3 Only? + +As I'm only using Python 3 code, I see no advantage in carrying around +support for legacy Python 2 and maintaining it. At a later point when +VCFPy is known to be stable, Python 2 support might be added if someone +contributes a pull request. + diff --git a/README.rst b/README.rst deleted file mode 100644 index 49ab08c..0000000 --- a/README.rst +++ /dev/null @@ -1,64 +0,0 @@ -===== -VCFPy -===== - - -.. image:: https://img.shields.io/pypi/v/vcfpy.svg - :target: https://pypi.python.org/pypi/vcfpy - -.. image:: https://img.shields.io/conda/dn/bioconda/vcfpy.svg?label=Bioconda - :target: https://bioconda.github.io/recipes/vcfpy/README.html - -.. image:: https://img.shields.io/travis/bihealth/vcfpy.svg - :target: https://travis-ci.org/bihealth/vcfpy - -.. image:: https://readthedocs.org/projects/vcfpy/badge/?version=latest - :target: https://vcfpy.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status - -.. image:: https://api.codacy.com/project/badge/Grade/cfe741307ec34e8fb90dfe37e84a2519 - :target: https://www.codacy.com/app/manuel-holtgrewe/vcfpy?utm_source=github.com&utm_medium=referral&utm_content=bihealth/vcfpy&utm_campaign=Badge_Grade - :alt: Codacy Analysis - -.. image:: https://api.codacy.com/project/badge/Coverage/cfe741307ec34e8fb90dfe37e84a2519 - :alt: Codacy Coverage - :target: https://www.codacy.com/app/manuel-holtgrewe/vcfpy?utm_source=github.com&utm_medium=referral&utm_content=bihealth/vcfpy&utm_campaign=Badge_Coverage - -.. image:: http://joss.theoj.org/papers/edae85d90ea8a49843dbaaa109e47cba/status.svg - :alt: Publication in The Journal of Open Source Software - :target: http://joss.theoj.org/papers/10.21105/joss.00085 - -Python 3 VCF library with good support for both reading and writing - -* Free software: MIT license -* Documentation: https://vcfpy.readthedocs.io. - - -Features --------- - -- Support for reading and writing VCF v4.3 -- Interface to ``INFO`` and ``FORMAT`` fields is based on ``OrderedDict`` allows for easier modification than PyVCF (also I find this more pythonic) -- Read (and jump in) and write BGZF files just using ``vcfpy`` - -Why another VCF parser for Python! ----------------------------------- - -I've been using PyVCF with quite some success in the past. -However, the main bottleneck of PyVCF is when you want to modify the per-sample genotype information. -There are some issues in the tracker of PyVCF but none of them can really be considered solved. -I tried several hours to solve these problems within PyVCF but this never got far or towards a complete rewrite... - -For this reason, VCFPy was born and here it is! - -What's the State? ------------------ - -VCFPy is the result of two full days of development plus some maintenance work later now (right now). -I'm using it in several projects but it is not as battle-tested as PyVCF. - -Why Python 3 Only? ------------------- - -As I'm only using Python 3 code, I see no advantage in carrying around support for legacy Python 2 and maintaining it. -At a later point when VCFPy is known to be stable, Python 2 support might be added if someone contributes a pull request. diff --git a/docs/history.rst b/docs/history.rst index 2506499..8e33606 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -1 +1 @@ -.. include:: ../HISTORY.rst +See ``README.md`` file. diff --git a/setup.py b/setup.py index 6fba090..5ff25f9 100644 --- a/setup.py +++ b/setup.py @@ -23,10 +23,10 @@ def parse_requirements(path): return requirements -with open("README.rst") as readme_file: +with open("README.md") as readme_file: readme = readme_file.read() -with open("HISTORY.rst") as history_file: +with open("CHANGELOG.md") as history_file: history = history_file.read() base_reqs = parse_requirements("requirements.txt") @@ -52,6 +52,7 @@ def parse_requirements(path): version=version, description=("Python 3 VCF library with good support for both reading and writing"), long_description=readme + "\n\n" + history, + long_description_content_type="text/markdown", author="Manuel Holtgrewe", author_email="manuel.holtgrewe@bih-charite.de", url="https://github.com/bihealth/vcfpy",