Skip to content

Commit

Permalink
Merge master branch
Browse files Browse the repository at this point in the history
  • Loading branch information
Jakob37 committed Oct 18, 2024
2 parents 98935d6 + cc7cdf0 commit 5127fb0
Show file tree
Hide file tree
Showing 11 changed files with 172 additions and 48 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/build_and_publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Publish to PyPI

on:
release:
types:
- created

jobs:
build-n-publish:
name: Build and publish Python distribution to PyPI
runs-on: ubuntu-latest
permissions:
id-token: write
steps:
- name: Check out git repository
uses: actions/checkout@v4

- name: Set up Python 3.8
uses: actions/setup-python@v5
with:
python-version: 3.8

- name: Install build tools
run: >-
python -m
pip install
wheel
twine
--user
- name: Build a binary wheel and a source tarball
run: >-
python
setup.py
sdist
bdist_wheel
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
15 changes: 15 additions & 0 deletions .github/workflows/keep_a_changelog.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: "Changelog Reminder"
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]

jobs:
# Enforces the update of a changelog file on every pull request
changelog:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dangoslen/changelog-enforcer@v3
with:
changeLogPath: 'CHANGELOG.md'
skipLabels: 'Skip-Changelog'
11 changes: 9 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,16 @@ Please add a new candidate release at the top after changing the latest one. Fee

Try to use the following format:

## [3.8.1]
## [unreleased]
- Fixed wrong models when chromosome X was named `chrX` and not `X`
- Added GitHub Actions workflows for automatic publishing to PyPI on release, and keep a changelog reminder ([#136](https://github.com/Clinical-Genomics/genmod/pull/136))
- Optional user defined threshold and penalty for compound scoring

## [3.8.3]

- Fixed unstable compounds order in models output ([#134](https://github.com/Clinical-Genomics/genmod/pull/134))
- Added `six` to requirements.txt and setup.py ([#134](https://github.com/Clinical-Genomics/genmod/pull/134))

## [3.8.0]
- Rank score normalisation

Expand All @@ -25,4 +32,4 @@ Try to use the following format:
- Adds support for escaped characters in FORMAT description header strings

### Fixed
- Documentation about cli options `strict` and `phased`
- Documentation about cli options `strict` and `phased`
2 changes: 1 addition & 1 deletion genmod/annotate_models/genetic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def check_genetic_models(variant_batch, families, phased = False,

# Only check X-linked for the variants in the X-chromosome:
# For X-linked we do not need to check the other models
if variant['CHROM'] == 'X':
if variant['CHROM'] in ['X', 'chrX']:
if check_X_recessive(variant, family, strict):
variant['inheritance_models'][family_id]['XR'] = True
for individual_id in individuals:
Expand Down
2 changes: 2 additions & 0 deletions genmod/score_variants/compound_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ def run(self):
new_compound = "{0}>{1}".format(compound_id, compound_score)
scored_compound_list.append(new_compound)

# Sort compound variants lexicographically
scored_compound_list.sort()
new_compound_string = "{0}:{1}".format(
compound_family_id, '|'.join(scored_compound_list))

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ configobj == 5.0.8
intervaltree == 3.1.0
extract_vcf == 0.5
vcftoolbox == 1.5.1
six == 1.16.0
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
long_description = 'Tool for annotating patterns of genetic inheritance in Variant Call Format (VCF) files.'

setup(name='genmod',
version='3.8.2',
version='3.8.3',
description='Annotate genetic inheritance models in variant files',
author = 'Mans Magnusson',
author_email = '[email protected]',
Expand All @@ -36,7 +36,8 @@
'configobj >= 5.0.8',
'intervaltree >= 3.1.0',
'extract_vcf >= 0.5',
'vcftoolbox >= 1.5.1'
'vcftoolbox >= 1.5.1',
'six >= 1.16.0',
],
packages=find_packages(
exclude=('tests*', 'docs', 'examples', 'configs')
Expand Down
21 changes: 21 additions & 0 deletions tests/fixtures/test_vcf_regions_with_chr.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
##fileformat=VCFv4.1
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=Annotation,Number=.,Type=String,Description="Annotates what feature(s) this variant belongs to.">
##contig=<ID=1,length=249250621,assembly=b37>
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT father mother proband father_2 mother_2 proband_2
chr1 879537 . T C 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 0/1:10,10:60 1/1:10,10:60 0/0:10,10:60 0/1:10,10:60 1/1:10,10:60
chr1 879541 . G A 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ ./. 0/1:10,10:60 1/1:10,10:60 ./. 0/1:10,10:60 0/1:10,10:60
chr1 879595 . C T 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 0/0:10,10:60 1/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60
chr1 879676 . G A 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 1/1:10,10:60 1/1:10,10:60 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60
chr1 879911 . G A 100 PASS MQ=1;Annotation=SAMD11,NOC2L GT:AD:GQ 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60
chr1 880012 . A G 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60
chr1 880086 . T C 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60
chr1 880199 . G A 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60
chr1 880217 . T G 100 PASS MQ=1;Annotation=NOC2L GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60
chr10 76154051 . A G 100 PASS MQ=1;Annotation=ADK GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60 0/0:10,10:60 0/1:10,10:60 0/1:10,10:60
chr10 76154073 . T G 100 PASS MQ=1;Annotation=ADK GT:AD:GQ 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60 0/0:10,10:60 0/0:10,10:60 0/1:10,10:60
chr10 76154074 . C G 100 PASS MQ=1;Annotation=ADK GT:AD:GQ ./. 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60 0/1:10,10:60
chr10 76154076 . G C 100 PASS MQ=1;Annotation=ADK GT:AD:GQ ./. 0/0:10,10:60 0/1:10,10:60 ./. 0/0:10,10:60 0/1:10,10:60
chrX 302253 . CCCTCCTGCCCCT C 100 PASS MQ=1;Annotation=PPP2R3B GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 1/1:10,10:60 0/0:10,10:60 1/1:10,10:60 1/1:10,10:60
chrM 302253 . CCCTCCTGCCCCT C 100 PASS MQ=1 GT:AD:GQ 0/0:10,10:60 0/1:10,10:60 1/1:10,10:60 0/0:10,10:60 1/1:10,10:60 1/1:10,10:60
43 changes: 41 additions & 2 deletions tests/functionality/test_annotate_models.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,44 @@
from genmod.commands import models_command
from tempfile import NamedTemporaryFile
from typing import Dict, Union
from click.testing import CliRunner

ANNOTATED_VCF_FILE = "tests/fixtures/test_vcf_annotated.vcf"
VCF_FILE = "tests/fixtures/test_vcf_regions.vcf"
VCF_FILE_WITH_CHR = "tests/fixtures/test_vcf_regions_with_chr.vcf"
FAMILY_FILE = "tests/fixtures/recessive_trio.ped"
BAD_FAMILY_FILE = "tests/fixtures/annotate_models/one_ind.ped"
EMPTY_VCF_FILE = "tests/fixtures/empty.vcf"

from genmod import logger
from genmod.log import init_log
from test_utils import generate_variants_from_file

init_log(logger, loglevel="INFO")

def _generate_genetic_models_string_from_file(file_path: str) -> str:
"""
Yield genetic model string from VCF.
:param file_path: VCF to be read
"""
for variant in generate_variants_from_file(file_path=file_path):
genetic_models_entry: str = variant['info_dict'].get('GeneticModels', '')
for family_genetic_models in genetic_models_entry.split(','):
family_genetic_models = family_genetic_models.split(':')
if len(family_genetic_models) > 1: # Not all variants will have a model
genetic_models: str = family_genetic_models[1]
yield genetic_models

def _run_model_command(vcf_file):
"""Helper function to run models_command and return output as a list."""
runner = CliRunner()
result = runner.invoke(models_command, [vcf_file, '-f', FAMILY_FILE])
assert result.exit_code == 0, f"Command failed with exit code: {result.exit_code}"

with NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(result.stdout_bytes)
temp_file.seek(0) # Move back to the start of the file
return list(_generate_genetic_models_string_from_file(temp_file.name))

def test_genmod_annotate_models_no_family():
"""docstring for test_genmod_annotate_models"""
Expand Down Expand Up @@ -42,7 +70,7 @@ def test_genmod_annotate_models_empty_vcf():
FAMILY_FILE
]
)

print(result.output)
assert result.exit_code == 0

Expand All @@ -67,5 +95,16 @@ def test_annotate_models_lacking_ind():
BAD_FAMILY_FILE
]
)

assert result.exit_code == 1

def test_annotate_models_chr_prefix():
"""Test that genetic models are identical for VCF with and without 'chr' prefix."""

# Get models from both VCF files
models_list = _run_model_command(VCF_FILE)
models_list_with_chr = _run_model_command(VCF_FILE_WITH_CHR)

assert result.exit_code == 1
# Assert that the lists of models are identical
assert len(models_list) > 0 and len(models_list_with_chr) > 0, "No models in VCFs"
assert models_list == models_list_with_chr, "Models differ between VCF files."
43 changes: 2 additions & 41 deletions tests/functionality/test_score_variants_ranks_score_is_float.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,19 @@
import pytest
from tempfile import NamedTemporaryFile
from typing import Dict, Union
from click.testing import CliRunner

from genmod.commands import score_command, score_compounds_command
from genmod.vcf_tools import HeaderParser, get_variant_dict, get_info_dict
from test_utils import generate_variants_from_file

ANNOTATED_VCF_FILE = "tests/fixtures/test_vcf_annotated.vcf"
SCORE_CONFIG = "tests/fixtures/score_variants/genmod_example.ini"


def _parse_variant_file(file_path: str) -> HeaderParser:
"""
Parse VCF header fields
:param file_path: VCF to be read
:raises ValueError: in case file is empty
"""
with open(file_path, 'r') as variant_file:
head = HeaderParser()
for line_index, line in enumerate(variant_file):
line = line.rstrip()
if line.startswith('#'):
if line.startswith('##'):
head.parse_meta_data(line)
else:
head.parse_header_line(line)
else:
break
if line_index == 0:
raise ValueError('Expected contents in file, got none')
return head


def _generate_variants_from_file(file_path: str) -> Dict[str, Union[str, int, float]]:
"""
Yield variants from VCF file.
:param file_path: VCF to be read
"""
header = _parse_variant_file(file_path=file_path)
with open(file_path, 'r') as variant_file:
for line in variant_file:
if line.startswith('#'):
continue
variant: Dict[str, str] = get_variant_dict(line, header.header)
variant['info_dict'] = get_info_dict(variant['INFO'])
yield variant


def _generate_rank_score_strings_from_file(file_path: str) -> str:
"""
Yield rank score strings from VCF.
:param file_path: VCF to be read
"""
for variant in _generate_variants_from_file(file_path=file_path):
for variant in generate_variants_from_file(file_path=file_path):
rank_score_entry: str = variant['info_dict'].get('RankScore', '')
for family_rank_score in rank_score_entry.split(','):
family_rank_score = family_rank_score.split(':')
Expand Down
38 changes: 38 additions & 0 deletions tests/functionality/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Dict, Union
from genmod.vcf_tools import HeaderParser, get_variant_dict, get_info_dict

def parse_variant_file(file_path: str) -> HeaderParser:
"""
Parse VCF header fields
:param file_path: VCF to be read
:raises ValueError: in case file is empty
"""
with open(file_path, 'r') as variant_file:
head = HeaderParser()
for line_index, line in enumerate(variant_file):
line = line.rstrip()
if line.startswith('#'):
if line.startswith('##'):
head.parse_meta_data(line)
else:
head.parse_header_line(line)
else:
break
if line_index == 0:
raise ValueError('Expected contents in file, got none')
return head

def generate_variants_from_file(file_path: str) -> Dict[str, Union[str, int, float]]:
"""
Yield variants from VCF file.
:param file_path: VCF to be read
"""
header = parse_variant_file(file_path=file_path)
with open(file_path, 'r') as variant_file:
for line in variant_file:
if line.startswith('#'):
continue
variant: Dict[str, str] = get_variant_dict(line, header.header)
variant['info_dict'] = get_info_dict(variant['INFO'])
yield variant

0 comments on commit 5127fb0

Please sign in to comment.