diff --git a/genmod/commands/score_variants.py b/genmod/commands/score_variants.py index e9dd9c1..e2a7931 100755 --- a/genmod/commands/score_variants.py +++ b/genmod/commands/score_variants.py @@ -194,7 +194,9 @@ def score(context, variant_file, family_id, family_file, family_type, score_conf category_scores_max += category_score_max category_scores.append(str(category_score)) - + + rank_score = float(rank_score) # Export rank score as float type + # Normalize ranks score (across all categories) rank_score_normalized: float = as_normalized_max_min(score=float(rank_score), min_score_value=category_scores_min, diff --git a/genmod/score_variants/compound_scorer.py b/genmod/score_variants/compound_scorer.py index 30a4ca0..a11aa71 100755 --- a/genmod/score_variants/compound_scorer.py +++ b/genmod/score_variants/compound_scorer.py @@ -270,6 +270,7 @@ def run(self): new_compound_string = "{0}:{1}".format( compound_family_id, '|'.join(scored_compound_list)) + current_rank_score = float(current_rank_score) # Export rank score as float type new_rank_score_string = "{0}:{1}".format(compound_family_id, current_rank_score) # variant['info_dict']['IndividualRankScore'] = current_rank_score_string diff --git a/tests/functionality/test_score_variants_ranks_score_is_float.py b/tests/functionality/test_score_variants_ranks_score_is_float.py new file mode 100644 index 0000000..d51584e --- /dev/null +++ b/tests/functionality/test_score_variants_ranks_score_is_float.py @@ -0,0 +1,143 @@ +import pytest +from tempfile import NamedTemporaryFile +from typing import Dict, Union +from click.testing import CliRunner + +from genmod.commands import score_command, score_compounds_command +from genmod.vcf_tools import HeaderParser, get_variant_dict, get_info_dict + +ANNOTATED_VCF_FILE = "tests/fixtures/test_vcf_annotated.vcf" +SCORE_CONFIG = "tests/fixtures/score_variants/genmod_example.ini" + + +def _parse_variant_file(file_path: str) -> HeaderParser: + """ + Parse VCF header fields + :param file_path: VCF to be read + :raises ValueError: in case file is empty + """ + with open(file_path, 'r') as variant_file: + head = HeaderParser() + for line_index, line in enumerate(variant_file): + line = line.rstrip() + if line.startswith('#'): + if line.startswith('##'): + head.parse_meta_data(line) + else: + head.parse_header_line(line) + else: + break + if line_index == 0: + raise ValueError('Expected contents in file, got none') + return head + + +def _generate_variants_from_file(file_path: str) -> Dict[str, Union[str, int, float]]: + """ + Yield variants from VCF file. + :param file_path: VCF to be read + """ + header = _parse_variant_file(file_path=file_path) + with open(file_path, 'r') as variant_file: + for line in variant_file: + if line.startswith('#'): + continue + variant: Dict[str, str] = get_variant_dict(line, header.header) + variant['info_dict'] = get_info_dict(variant['INFO']) + yield variant + + +def _generate_rank_score_strings_from_file(file_path: str) -> str: + """ + Yield rank score strings from VCF. + :param file_path: VCF to be read + """ + for variant in _generate_variants_from_file(file_path=file_path): + rank_score_entry: str = variant['info_dict'].get('RankScore', '') + for family_rank_score in rank_score_entry.split(','): + family_rank_score = family_rank_score.split(':') + family_id: str = family_rank_score[0] + rank_score: str = family_rank_score[1] + yield rank_score + + +def _check_rankscore_string_is_float(rank_score_string: str): + """ + Check to see if a rank score string contains a floating point value. + :param rank_score_string: A string to be checked + :raises AssertionError: In case rank_score_string is not a float-type value + """ + # Check decimal point presence + assert '.' in rank_score_string + # Check all numerical digits, might contain period and minus sign. + assert rank_score_string \ + .replace('.', '') \ + .replace('-','') \ + .isdigit() + # Check successfully parsed to float + assert isinstance(float(rank_score_string), float) + + +def test_check_rankscore_string_is_float(): + """ + Test for checking integrity of rank score float check method. + """ + # GIVEN some rank score strings + # WHEN running the method under test + # THEN expect it to behave as expected in positive case + _check_rankscore_string_is_float('0.0') + _check_rankscore_string_is_float('132.1') + _check_rankscore_string_is_float('132.10') + _check_rankscore_string_is_float('-10.0') + + # THEN expect it to behave as expected in negative case + with pytest.raises(AssertionError): + _check_rankscore_string_is_float('') + with pytest.raises(AssertionError): + _check_rankscore_string_is_float('132') + with pytest.raises(AssertionError): + _check_rankscore_string_is_float('b') + with pytest.raises(AssertionError): + _check_rankscore_string_is_float('0') + with pytest.raises(AssertionError): + _check_rankscore_string_is_float('.') + with pytest.raises(AssertionError): + _check_rankscore_string_is_float('-') + + +def test_rankscore_is_float_type(): + """ + Test to make sure RankScore value is float + provided by both the CLI command 'score' and 'compound'. + """ + # GIVEN some VCF file to be ranked + runner = CliRunner() + # WHEN computing rank score + result = runner.invoke(score_command, [ + ANNOTATED_VCF_FILE, + '-c', + SCORE_CONFIG + ] + ) + assert result.exit_code == 0 + temporary_file = NamedTemporaryFile() + with open(temporary_file.name, 'w') as file: + file.write(result.stdout_bytes.decode('utf-8')) # Save processed VCF to file + # THEN expect all rank scores to be float type + for rank_score_string in _generate_rank_score_strings_from_file(file_path=temporary_file.name): + _check_rankscore_string_is_float(rank_score_string) + + # GIVEN some ranked VCF file, run compound scoring (which modify the RankScore) + runner = CliRunner() + # WHEN computing compound score + result = runner.invoke(score_compounds_command, [ + temporary_file.name, + ] + ) + assert result.exit_code == 0 + temporary_file = NamedTemporaryFile() + with open(temporary_file.name, 'w') as file: + file.write(result.stdout_bytes.decode('utf-8')) # Save processed VCF to file + # THEN expect all rank scores (including modified compound scores) to be float type + for rank_score_string in _generate_rank_score_strings_from_file(file_path=temporary_file.name): + _check_rankscore_string_is_float(rank_score_string)