From f02a6f8c4f91360b34c591e233cfd3fec3aa4ea9 Mon Sep 17 00:00:00 2001 From: Jakob Willforss Date: Thu, 24 Oct 2024 07:52:13 +0200 Subject: [PATCH] Add parameters for penalty and threshold (#138) * Add parameters for penalty and threshold * Cleaning things up * Cleaning up and changelog --- CHANGELOG.md | 1 + genmod/commands/score_compounds.py | 6 +++++- genmod/score_variants/compound_scorer.py | 9 ++++++--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a0767e..f455062 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Try to use the following format: ## [unreleased] - Fixed wrong models when chromosome X was named `chrX` and not `X` - Added GitHub Actions workflows for automatic publishing to PyPI on release, and keep a changelog reminder ([#136](https://github.com/Clinical-Genomics/genmod/pull/136)) +- Optional user defined threshold and penalty for compound scoring ## [3.8.3] diff --git a/genmod/commands/score_compounds.py b/genmod/commands/score_compounds.py index 15c2826..3a27f3c 100755 --- a/genmod/commands/score_compounds.py +++ b/genmod/commands/score_compounds.py @@ -45,8 +45,10 @@ is_flag=True, help='If variants are annotated with the Variant Effect Predictor.' ) +@click.option('--threshold', type=int, help="Threshold for model-dependent penalty if no compounds with passing score", default=9) +@click.option('--penalty', type=int, help="Penalty applied together with --threshold", default=6) @click.pass_context -def compound(context, variant_file, silent, outfile, vep, processes, temp_dir): +def compound(context, variant_file, silent, outfile, vep, threshold: int, penalty: int, processes, temp_dir): """ Score compound variants in a vcf file based on their rank score. """ @@ -110,6 +112,8 @@ def compound(context, variant_file, silent, outfile, vep, processes, temp_dir): task_queue=variant_queue, results_queue=results, individuals=individuals, + threshold=threshold, + penalty=penalty, ) for i in range(num_scorers) ] diff --git a/genmod/score_variants/compound_scorer.py b/genmod/score_variants/compound_scorer.py index 388aad7..e90387c 100755 --- a/genmod/score_variants/compound_scorer.py +++ b/genmod/score_variants/compound_scorer.py @@ -90,7 +90,7 @@ class CompoundScorer(Process): the results queue. """ - def __init__(self, task_queue, results_queue, individuals): + def __init__(self, task_queue, results_queue, individuals, threshold: int, penalty: int): """ Initialize the VariantAnnotator @@ -119,6 +119,9 @@ def __init__(self, task_queue, results_queue, individuals): logger.debug("Setting up individuals") self.individuals = individuals + self.threshold = threshold + self.penalty = penalty + if len(self.individuals) == 1: self.models = ['AR_comp', 'AR_comp_dn', 'AD', 'AD_dn'] else: @@ -235,7 +238,7 @@ def run(self): for compound_id in compound_list: compound_rank_score = rank_scores[rank_score_type][compound_id] if compound_rank_score > get_rank_score(rank_score_type=rank_score_type, - threshold=9, + threshold=self.threshold, min_rank_score_value=variant_rankscore_normalization_bounds[variant_id][0], max_rank_score_value=variant_rankscore_normalization_bounds[variant_id][1] ): @@ -246,7 +249,7 @@ def run(self): logger.debug("correcting rank score for {0}".format( variant_id)) current_rank_score -= get_rank_score_as_magnitude(rank_score_type=rank_score_type, - rank_score=6, + rank_score=self.penalty, min_rank_score_value=variant_rankscore_normalization_bounds[variant_id][0], max_rank_score_value=variant_rankscore_normalization_bounds[variant_id][1] )