Skip to content

Commit

Permalink
Add test case for checking RankScore is float in processed VCF
Browse files Browse the repository at this point in the history
Going forward, the expected behavior is that RankScore
is to be a float-type value in VCFs.

Signed-off-by: Tor Björgen <[email protected]>
  • Loading branch information
torbjorgen committed Dec 4, 2023
1 parent 5b2fac9 commit bc15a2f
Showing 1 changed file with 143 additions and 0 deletions.
143 changes: 143 additions & 0 deletions tests/functionality/test_score_variants_ranks_score_is_float.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import pytest
from tempfile import NamedTemporaryFile
from typing import Dict, Union
from click.testing import CliRunner

from genmod.commands import score_command, score_compounds_command
from genmod.vcf_tools import HeaderParser, get_variant_dict, get_info_dict

ANNOTATED_VCF_FILE = "tests/fixtures/test_vcf_annotated.vcf"
SCORE_CONFIG = "tests/fixtures/score_variants/genmod_example.ini"


def _parse_variant_file(file_path: str) -> HeaderParser:
"""
Parse VCF header fields
:param file_path: VCF to be read
:raises ValueError: in case file is empty
"""
with open(file_path, 'r') as variant_file:
head = HeaderParser()
for line_index, line in enumerate(variant_file):
line = line.rstrip()
if line.startswith('#'):
if line.startswith('##'):
head.parse_meta_data(line)
else:
head.parse_header_line(line)
else:
break
if line_index == 0:
raise ValueError('Expected contents in file, got none')
return head


def _generate_variants_from_file(file_path: str) -> Dict[str, Union[str, int, float]]:
"""
Yield variants from VCF file.
:param file_path: VCF to be read
"""
header = _parse_variant_file(file_path=file_path)
with open(file_path, 'r') as variant_file:
for line in variant_file:
if line.startswith('#'):
continue
variant: Dict[str, str] = get_variant_dict(line, header.header)
variant['info_dict'] = get_info_dict(variant['INFO'])
yield variant


def _generate_rank_score_strings_from_file(file_path: str) -> str:
"""
Yield rank score strings from VCF.
:param file_path: VCF to be read
"""
for variant in _generate_variants_from_file(file_path=file_path):
rank_score_entry: str = variant['info_dict'].get('RankScore', '')
for family_rank_score in rank_score_entry.split(','):
family_rank_score = family_rank_score.split(':')
family_id: str = family_rank_score[0]
rank_score: str = family_rank_score[1]
yield rank_score


def _check_rankscore_string_is_float(rank_score_string: str):
"""
Check to see if a rank score string contains a floating point value.
:param rank_score_string: A string to be checked
:raises AssertionError: In case rank_score_string is not a float-type value
"""
# Check decimal point presence
assert '.' in rank_score_string
# Check all numerical digits, might contain period and minus sign.
assert rank_score_string \
.replace('.', '') \
.replace('-','') \
.isdigit()
# Check successfully parsed to float
assert isinstance(float(rank_score_string), float)


def test_check_rankscore_string_is_float():
"""
Test for checking integrity of rank score float check method.
"""
# GIVEN some rank score strings
# WHEN running the method under test
# THEN expect it to behave as expected in positive case
_check_rankscore_string_is_float('0.0')
_check_rankscore_string_is_float('132.1')
_check_rankscore_string_is_float('132.10')
_check_rankscore_string_is_float('-10.0')

# THEN expect it to behave as expected in negative case
with pytest.raises(AssertionError):
_check_rankscore_string_is_float('')
with pytest.raises(AssertionError):
_check_rankscore_string_is_float('132')
with pytest.raises(AssertionError):
_check_rankscore_string_is_float('b')
with pytest.raises(AssertionError):
_check_rankscore_string_is_float('0')
with pytest.raises(AssertionError):
_check_rankscore_string_is_float('.')
with pytest.raises(AssertionError):
_check_rankscore_string_is_float('-')


def test_rankscore_is_float_type():
"""
Test to make sure RankScore value is float
provided by both the CLI command 'score' and 'compound'.
"""
# GIVEN some VCF file to be ranked
runner = CliRunner()
# WHEN computing rank score
result = runner.invoke(score_command, [
ANNOTATED_VCF_FILE,
'-c',
SCORE_CONFIG
]
)
assert result.exit_code == 0
temporary_file = NamedTemporaryFile()
with open(temporary_file.name, 'w') as file:
file.write(result.stdout_bytes.decode('utf-8')) # Save processed VCF to file
# THEN expect all rank scores to be float type
for rank_score_string in _generate_rank_score_strings_from_file(file_path=temporary_file.name):
_check_rankscore_string_is_float(rank_score_string)

# GIVEN some ranked VCF file, run compound scoring (which modify the RankScore)
runner = CliRunner()
# WHEN computing compound score
result = runner.invoke(score_compounds_command, [
temporary_file.name,
]
)
assert result.exit_code == 0
temporary_file = NamedTemporaryFile()
with open(temporary_file.name, 'w') as file:
file.write(result.stdout_bytes.decode('utf-8')) # Save processed VCF to file
# THEN expect all rank scores (including modified compound scores) to be float type
for rank_score_string in _generate_rank_score_strings_from_file(file_path=temporary_file.name):
_check_rankscore_string_is_float(rank_score_string)

0 comments on commit bc15a2f

Please sign in to comment.