diff --git a/CHANGELOG.md b/CHANGELOG.md index 27b7d0b..71ea875 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ Please add a new candidate release at the top after changing the latest one. Fee Try to use the following format: +## [3.8.1] +- Optional user defined threshold and penalty for compound scoring + ## [3.8.0] - Rank score normalisation diff --git a/genmod/commands/score_compounds.py b/genmod/commands/score_compounds.py index 5765589..3a27f3c 100755 --- a/genmod/commands/score_compounds.py +++ b/genmod/commands/score_compounds.py @@ -45,8 +45,8 @@ is_flag=True, help='If variants are annotated with the Variant Effect Predictor.' ) -@click.option('--threshold', type=int, help="If no other variants below this threshold, the variant is penalized", default=9) -@click.option('--penalty', type=int, help="If not together with other variant above the threshold defined by --threshold, this penalty is applied", default=6) +@click.option('--threshold', type=int, help="Threshold for model-dependent penalty if no compounds with passing score", default=9) +@click.option('--penalty', type=int, help="Penalty applied together with --threshold", default=6) @click.pass_context def compound(context, variant_file, silent, outfile, vep, threshold: int, penalty: int, processes, temp_dir): """ @@ -155,7 +155,7 @@ def compound(context, variant_file, silent, outfile, vep, threshold: int, penalt vep = vep, results_queue=results ) - + logger.debug("Put stop signs in the variant queue") for i in range(num_scorers): variant_queue.put(None) diff --git a/genmod/score_variants/compound_scorer.py b/genmod/score_variants/compound_scorer.py index 04c39b4..3d90c65 100755 --- a/genmod/score_variants/compound_scorer.py +++ b/genmod/score_variants/compound_scorer.py @@ -184,7 +184,6 @@ def run(self): #TODO check if correct family id # Right now we assume that there is only one family in the vcf - family_id = family_rank_score[0] rank_score = float(family_rank_score[-1]) @@ -193,7 +192,7 @@ def run(self): # Per variant, find rank score max min values used for normalization variant_rankscore_normalization_bounds: Dict[str, Tuple] = \ self._get_rankscore_normalization_bounds(variant_batch) - + #We now have a dictionary with variant ids and rank scores, per rank_score_type for variant_id in variant_batch: # If the variants only follow AR_comp (and AD for single individual families) @@ -235,15 +234,9 @@ def run(self): logger.debug("Checking compounds for family {0}".format( compound_family_id)) - #Loop through compounds to check if they are only low scored for compound_id in compound_list: - - - compound_rank_score = rank_scores[rank_score_type][compound_id] - - if compound_rank_score > get_rank_score(rank_score_type=rank_score_type, threshold=self.threshold, min_rank_score_value=variant_rankscore_normalization_bounds[variant_id][0], @@ -253,7 +246,6 @@ def run(self): logger.debug("Setting only_low to {0}".format(only_low)) if (correct_score and only_low): - logger.debug("correcting rank score for {0}".format( variant_id)) current_rank_score -= get_rank_score_as_magnitude(rank_score_type=rank_score_type, @@ -288,10 +280,6 @@ def run(self): variant['info_dict'][f'{rank_score_type}'] = new_rank_score_string variant['info_dict'][f'Compounds{rank_score_type.strip("RankScore")}'] = new_compound_string - # print(variant["info_dict"]) - # raise ValueError(variant) - # import sys - variant = replace_vcf_info( keyword=f'{rank_score_type}', annotation = new_rank_score_string, @@ -312,7 +300,6 @@ def run(self): annotation=new_compound_string, variant_dict=variant ) - logger.debug("Putting variant in results_queue") self.results_queue.put(variant)