From 2c3fb3f1a38f0ee30c63b6fb02cd2243dec79183 Mon Sep 17 00:00:00 2001 From: Ali Darius Khan Date: Wed, 1 Feb 2023 12:52:37 +0000 Subject: [PATCH 01/20] Replace rotamer classification with score from tortoize --- iris_validation/__init__.py | 4 +- iris_validation/_defs.py | 89 +++++++++++++++++++++-------- iris_validation/graphics/panel.py | 8 ++- iris_validation/metrics/__init__.py | 40 ++++++++----- iris_validation/metrics/chain.py | 16 ++++-- iris_validation/metrics/model.py | 21 +++++-- iris_validation/metrics/residue.py | 17 +++++- iris_validation/metrics/series.py | 9 ++- 8 files changed, 144 insertions(+), 60 deletions(-) diff --git a/iris_validation/__init__.py b/iris_validation/__init__.py index e54a19a..4557198 100644 --- a/iris_validation/__init__.py +++ b/iris_validation/__init__.py @@ -16,7 +16,7 @@ def generate_report(latest_model_path, previous_distpred_path=None, run_covariance=False, run_molprobity=False, - calculate_rama_z=True, + calculate_tortoize=True, multiprocessing=True, wrap_in_html=True, output_dir=None): @@ -32,7 +32,7 @@ def generate_report(latest_model_path, distpred_paths, run_covariance, run_molprobity, - calculate_rama_z, + calculate_tortoize, multiprocessing) model_series_data = model_series.get_raw_data() panel = Panel(model_series_data) diff --git a/iris_validation/_defs.py b/iris_validation/_defs.py index c17c1a4..31da21a 100644 --- a/iris_validation/_defs.py +++ b/iris_validation/_defs.py @@ -31,8 +31,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 1, 'type' : 'continuous', @@ -43,8 +45,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 2, 'type' : 'continuous', @@ -55,8 +59,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 3, 'type' : 'continuous', @@ -67,8 +73,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': True, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 4, 'type' : 'continuous', @@ -79,8 +87,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': True, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 5, 'type' : 'continuous', @@ -91,8 +101,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': True, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 6, 'type' : 'continuous', @@ -103,8 +115,10 @@ 'is_covariance' : True, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 7, 'type' : 'continuous', @@ -115,8 +129,24 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': True, - 'is_rama_classification': False + 'is_rama_z' : True, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False + }, + { 'id' : 8, + 'type' : 'continuous', + 'long_name' : 'Rotamer z-score', + 'short_name' : 'Rota Z', + 'ring_color' : COLORS['GREY'], + 'polarity' : 1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : True, + 'is_rota_classification': False } ) @@ -134,8 +164,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': True }, { 'id' : 1, 'type' : 'discrete', @@ -151,8 +183,10 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': True + 'is_rama_z' : False, + 'is_rama_classification': True, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 2, 'type' : 'discrete', @@ -168,8 +202,10 @@ 'is_covariance' : False, 'is_molprobity' : True, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False }, { 'id' : 3, 'type' : 'discrete', @@ -183,8 +219,10 @@ 'is_covariance' : True, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False + 'is_rama_z' : False, + 'is_rama_classification': False, + 'is_rota_z' : False, + 'is_rota_classification': False } ) @@ -200,7 +238,8 @@ CONTINUOUS_METRICS[4], CONTINUOUS_METRICS[5], CONTINUOUS_METRICS[6], - CONTINUOUS_METRICS[7] ] + CONTINUOUS_METRICS[7], + CONTINUOUS_METRICS[8] ] RESIDUE_VIEW_BOXES = [ DISCRETE_METRICS[0], DISCRETE_METRICS[1], diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index 7f7bb3f..5bc3503 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -47,9 +47,13 @@ def _verify_chosen_metrics(self): del metric_list[metric_index] elif (metric_list[metric_index]['is_reflections'] and not self.data[0]['has_reflections']): del metric_list[metric_index] - elif (metric_list[metric_index]['is_rama_z'] and not self.data[0]['has_rama_z']): + elif (metric_list[metric_index]['is_rama_z'] and not self.data[0]['has_tortoize']): del metric_list[metric_index] - elif (metric_list[metric_index]['is_rama_classification'] and self.data[0]['has_rama_z']): + elif (metric_list[metric_index]['is_rama_classification'] and self.data[0]['has_tortoize']): + del metric_list[metric_index] + elif (metric_list[metric_index]['is_rota_z'] and not self.data[0]['has_tortoize']): + del metric_list[metric_index] + elif (metric_list[metric_index]['is_rota_classification'] and self.data[0]['has_tortoize']): del metric_list[metric_index] def _generate_javascript(self): diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index e241386..208d8d8 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -1,5 +1,6 @@ from multiprocessing import Process, Queue +import collections import subprocess import json import clipper @@ -176,7 +177,8 @@ def _get_covariance_data(model_path, def _get_tortoize_data(model_path, model_id=None, out_queue=None): - rama_z_data = {} + tortoize_datum = collections.namedtuple('tortoize_datum', ['rama_z', 'rota_z']) + tortoize_data = collections.defaultdict(tortoize_datum) try: tortoize_process = subprocess.Popen( f'tortoize {model_path}', @@ -190,13 +192,15 @@ def _get_tortoize_data(model_path, model_id=None, out_queue=None): tortoize_dict = json.loads(tortoize_output) residues = tortoize_dict["model"]["1"]["residues"] for res in residues: - chain_rama_z_data = rama_z_data.setdefault(res['pdb']['strandID'], {}) - chain_rama_z_data[res['pdb']['seqNum']] = res['ramachandran']['z-score'] - + chain_tortoize_data = tortoize_data.setdefault(res['pdb']['strandID'], {}) + chain_tortoize_data[res['pdb']['seqNum']] = tortoize_datum( + rama_z=res['ramachandran']['z-score'], + rota_z=None if ('torsion' not in res or res['torsion']['z-score'] > 3) else res['torsion']['z-score']) if out_queue is not None: - out_queue.put(('rama_z', model_id, rama_z_data)) + out_queue.put(('tortoize', model_id, tortoize_data)) + + return tortoize_data - return rama_z_data def metrics_model_series_from_files(model_paths, reflections_paths=None, @@ -204,7 +208,7 @@ def metrics_model_series_from_files(model_paths, distpred_paths=None, run_covariance=False, run_molprobity=False, - calculate_rama_z=True, + calculate_tortoize=True, multiprocessing=True): try: if isinstance(model_paths, str): @@ -227,7 +231,7 @@ def metrics_model_series_from_files(model_paths, all_covariance_data = [ ] all_molprobity_data = [ ] all_reflections_data = [ ] - all_rama_z_data = [ ] + all_tortoize_data = [ ] num_queued = 0 results_queue = Queue() for model_id, file_paths in enumerate(zip(*path_lists)): @@ -237,7 +241,7 @@ def metrics_model_series_from_files(model_paths, covariance_data = None molprobity_data = None reflections_data = None - rama_z_data = None + tortoize_data = None if run_covariance: if multiprocessing: p = Process(target=_get_covariance_data, @@ -268,7 +272,7 @@ def metrics_model_series_from_files(model_paths, num_queued += 1 else: reflections_data = _get_reflections_data(model_path, reflections_path) - if calculate_rama_z: + if calculate_tortoize: if multiprocessing: p = Process(target=_get_tortoize_data, args=(model_path,), @@ -277,13 +281,13 @@ def metrics_model_series_from_files(model_paths, p.start() num_queued += 1 else: - rama_z_data = _get_tortoize_data(model_path) + tortoize_data = _get_tortoize_data(model_path) all_minimol_data.append(minimol) all_covariance_data.append(covariance_data) all_molprobity_data.append(molprobity_data) all_reflections_data.append(reflections_data) - all_rama_z_data.append(rama_z_data) + all_tortoize_data.append(tortoize_data) if multiprocessing: for _ in range(num_queued): @@ -294,11 +298,17 @@ def metrics_model_series_from_files(model_paths, all_molprobity_data[model_id] = result if result_type == 'reflections': all_reflections_data[model_id] = result - if result_type == 'rama_z': - all_rama_z_data[model_id] = result + if result_type == 'tortoize': + all_tortoize_data[model_id] = result metrics_models = [ ] - for model_id, model_data in enumerate(zip(all_minimol_data, all_covariance_data, all_molprobity_data, all_reflections_data, all_rama_z_data)): + for model_id, model_data in enumerate(zip( + all_minimol_data, + all_covariance_data, + all_molprobity_data, + all_reflections_data, + all_tortoize_data)): + metrics_model = MetricsModel(*model_data) metrics_models.append(metrics_model) diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index 9ee0ab6..7adf6e1 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -2,13 +2,21 @@ class MetricsChain(): - def __init__(self, mmol_chain, parent_model=None, covariance_data=None, molprobity_data=None, density_scores=None, rama_z_data=None): + def __init__( + self, + mmol_chain, + parent_model=None, + covariance_data=None, + molprobity_data=None, + density_scores=None, + tortoize_data=None): + self.minimol_chain = mmol_chain self.parent_model = parent_model self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.density_scores = density_scores - self.rama_z_data = rama_z_data + self.tortoize_data = tortoize_data self._index = -1 self.residues = [ ] @@ -22,7 +30,7 @@ def __init__(self, mmol_chain, parent_model=None, covariance_data=None, molprobi residue_covariance_data = None if covariance_data is None else covariance_data[seq_num] residue_molprobity_data = None if molprobity_data is None else molprobity_data[seq_num] residue_density_scores = None if density_scores is None else density_scores[seq_num] - residue_rama_z_score = None if rama_z_data is None else rama_z_data.get(seq_num, None) + residue_tortoize_scores = None if tortoize_data is None else tortoize_data.get(seq_num, None) residue = MetricsResidue( mmol_residue, residue_index, @@ -32,7 +40,7 @@ def __init__(self, mmol_chain, parent_model=None, covariance_data=None, molprobi residue_covariance_data, residue_molprobity_data, residue_density_scores, - residue_rama_z_score) + residue_tortoize_scores) self.residues.append(residue) for residue_index, residue in enumerate(self.residues): diff --git a/iris_validation/metrics/model.py b/iris_validation/metrics/model.py index 530c196..1d36fa2 100644 --- a/iris_validation/metrics/model.py +++ b/iris_validation/metrics/model.py @@ -4,12 +4,19 @@ class MetricsModel(): - def __init__(self, mmol_model, covariance_data=None, molprobity_data=None, reflections_data=None, rama_z_data=None): + def __init__( + self, + mmol_model, + covariance_data=None, + molprobity_data=None, + reflections_data=None, + tortoize_data=None): + self.minimol_model = mmol_model self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.reflections_data = reflections_data - self.rama_z_data = rama_z_data + self.tortoize_data = tortoize_data self._index = -1 self.minimol_chains = list(mmol_model.model()) @@ -27,8 +34,14 @@ def __init__(self, mmol_model, covariance_data=None, molprobity_data=None, refle chain_covariance_data = None if covariance_data is None else covariance_data[chain_id] chain_molprobity_data = None if molprobity_data is None else molprobity_data[chain_id] chain_density_scores = None if self.density_scores is None else self.density_scores[chain_id] - chain_rama_z_data = None if rama_z_data is None else rama_z_data[chain_id] - chain = MetricsChain(mmol_chain, self, chain_covariance_data, chain_molprobity_data, chain_density_scores, chain_rama_z_data) + chain_tortoize_data = None if (tortoize_data is None) else tortoize_data[chain_id] + chain = MetricsChain( + mmol_chain, + self, + chain_covariance_data, + chain_molprobity_data, + chain_density_scores, + chain_tortoize_data) chain.remove_non_aa_residues() self.chains.append(chain) diff --git a/iris_validation/metrics/residue.py b/iris_validation/metrics/residue.py index 86b628d..c4638de 100644 --- a/iris_validation/metrics/residue.py +++ b/iris_validation/metrics/residue.py @@ -7,7 +7,18 @@ class MetricsResidue(): - def __init__(self, mmol_residue, index_in_chain=None, previous_residue=None, next_residue=None, parent_chain=None, covariance_data=None, molprobity_data=None, density_scores=None, rama_z_score=None): + def __init__( + self, + mmol_residue, + index_in_chain=None, + previous_residue=None, + next_residue=None, + parent_chain=None, + covariance_data=None, + molprobity_data=None, + density_scores=None, + tortoize_scores=None): + self.minimol_residue = mmol_residue self.initialised_with_context = index_in_chain is not None self.index_in_chain = index_in_chain @@ -17,7 +28,8 @@ def __init__(self, mmol_residue, index_in_chain=None, previous_residue=None, nex self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.density_scores = density_scores - self.rama_z = rama_z_score + self.rama_z = None if not tortoize_scores else tortoize_scores.rama_z + self.rota_z = None if not tortoize_scores else tortoize_scores.rota_z self.atoms = list(mmol_residue) self.sequence_number = int(mmol_residue.seqnum()) @@ -107,4 +119,3 @@ def __init__(self, mmol_residue, index_in_chain=None, previous_residue=None, nex self.mainchain_fit_score_percentile = percentile_calculator.get_percentile(4, self.mainchain_fit_score) self.sidechain_fit_score_percentile = percentile_calculator.get_percentile(5, self.sidechain_fit_score) self.covariance_score_percentile = percentile_calculator.get_percentile(6, self.covariance_score) - # self.rama_z_score_percentile = percentile_calculator.get_percentile(7, self.rama_z) diff --git a/iris_validation/metrics/series.py b/iris_validation/metrics/series.py index e2fddf2..a79deba 100644 --- a/iris_validation/metrics/series.py +++ b/iris_validation/metrics/series.py @@ -73,8 +73,7 @@ def get_raw_data(self): has_covariance = self.metrics_models[0].covariance_data is not None has_molprobity = self.metrics_models[0].molprobity_data is not None has_reflections = self.metrics_models[0].reflections_data is not None - has_rama_z = self.metrics_models[0].rama_z_data is not None - has_rama_classification = not has_rama_z + has_tortoize = self.metrics_models[0].tortoize_data is not None raw_data = [ ] for chain_id, chain_set in self.chain_sets.items(): @@ -85,8 +84,7 @@ def get_raw_data(self): 'has_covariance' : has_covariance, 'has_molprobity' : has_molprobity, 'has_reflections' : has_reflections, - 'has_rama_z' : has_rama_z, - 'has_rama_classification': has_rama_classification, + 'has_tortoize' : has_tortoize, 'aligned_length' : aligned_length, 'residue_seqnos' : [ ], 'residue_codes' : [ ], @@ -131,7 +129,8 @@ def get_raw_data(self): residue.mainchain_fit_score, residue.sidechain_fit_score, residue.covariance_score, - residue.rama_z) + residue.rama_z, + residue.rota_z) residue_percentile_values = (residue.avg_b_factor_percentile, residue.max_b_factor_percentile, residue.std_b_factor_percentile, From 5aaaf4d338b6c8a9311c8a376354d472088fbe0f Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Wed, 22 Mar 2023 10:04:53 +0000 Subject: [PATCH 02/20] add options to read data from external json file --- iris_validation/__init__.py | 65 ++-- iris_validation/graphics/chain.py | 477 ++++++++++++++++++---------- iris_validation/graphics/panel.py | 440 +++++++++++++++---------- iris_validation/metrics/__init__.py | 327 ++++++++++++------- iris_validation/metrics/chain.py | 133 ++++++-- iris_validation/metrics/model.py | 101 +++++- iris_validation/metrics/residue.py | 146 +++++++-- iris_validation/metrics/series.py | 169 ++++++---- 8 files changed, 1240 insertions(+), 618 deletions(-) diff --git a/iris_validation/__init__.py b/iris_validation/__init__.py index e54a19a..8695ab9 100644 --- a/iris_validation/__init__.py +++ b/iris_validation/__init__.py @@ -1,39 +1,48 @@ import os -import subprocess -import json + +# import subprocess +# import json from iris_validation.graphics import Panel from iris_validation.metrics import metrics_model_series_from_files -def generate_report(latest_model_path, - latest_reflections_path=None, - latest_sequence_path=None, - latest_distpred_path=None, - previous_model_path=None, - previous_reflections_path=None, - previous_sequence_path=None, - previous_distpred_path=None, - run_covariance=False, - run_molprobity=False, - calculate_rama_z=True, - multiprocessing=True, - wrap_in_html=True, - output_dir=None): +def generate_report( + latest_model_path, + latest_reflections_path=None, + latest_sequence_path=None, + latest_distpred_path=None, + previous_model_path=None, + previous_reflections_path=None, + previous_sequence_path=None, + previous_distpred_path=None, + run_covariance=False, + run_molprobity=False, + calculate_rama_z=False, + multiprocessing=True, + latest_model_metrics_json=None, + previous_model_metrics_json=None, + wrap_in_html=True, + output_dir=None, +): model_paths = (previous_model_path, latest_model_path) reflections_paths = (previous_reflections_path, latest_reflections_path) sequence_paths = (previous_sequence_path, latest_sequence_path) distpred_paths = (previous_distpred_path, latest_distpred_path) - - model_series = metrics_model_series_from_files(model_paths, - reflections_paths, - sequence_paths, - distpred_paths, - run_covariance, - run_molprobity, - calculate_rama_z, - multiprocessing) + model_json_paths = (previous_model_metrics_json, latest_model_metrics_json) + + model_series = metrics_model_series_from_files( + model_paths, + reflections_paths, + sequence_paths, + distpred_paths, + run_covariance, + run_molprobity, + calculate_rama_z, + model_json_paths, + multiprocessing, + ) model_series_data = model_series.get_raw_data() panel = Panel(model_series_data) panel_string = panel.dwg.tostring() @@ -47,6 +56,8 @@ def generate_report(latest_model_path, if not os.path.isdir(output_dir): os.mkdir(output_dir) - extension = 'html' if wrap_in_html else 'svg' - with open(os.path.join(output_dir, f'report.{extension}'), 'w', encoding='utf8') as outfile: + extension = "html" if wrap_in_html else "svg" + with open( + os.path.join(output_dir, f"report.{extension}"), "w", encoding="utf8" + ) as outfile: outfile.write(panel_string) diff --git a/iris_validation/graphics/chain.py b/iris_validation/graphics/chain.py index 1a02e06..e170fc1 100644 --- a/iris_validation/graphics/chain.py +++ b/iris_validation/graphics/chain.py @@ -7,213 +7,324 @@ from iris_validation._defs import COLORS, CHAIN_VIEW_RINGS, CHAIN_VIEW_GAP_ANGLE -class ChainView(): +class ChainView: def __init__(self, data, chain_index, canvas_size=(1000, 1000), hidden=False): self.data = data + print(data) self.chain_index = chain_index self.canvas_size = canvas_size self.hidden = hidden self.dwg = None - self.cfa_cache = { } + self.cfa_cache = {} self.num_rings = len(CHAIN_VIEW_RINGS) - self.num_versions = self.data['num_versions'] - self.num_segments = self.data['aligned_length'] + self.num_versions = self.data["num_versions"] + self.num_segments = self.data["aligned_length"] self.center = (self.canvas_size[0] // 2, self.canvas_size[1] // 2) self.full_radius = round(min(self.canvas_size) / 2 - 10, 2) self.division_size = round(self.full_radius / (self.num_rings + 2), 2) self.angle_delta = (2 * pi - CHAIN_VIEW_GAP_ANGLE) / self.num_segments - self.svg_id = f'iris-chain-view-{self.chain_index}' + self.svg_id = f"iris-chain-view-{self.chain_index}" self._draw() def _coords_from_angle(self, angle, radius, gap=True): gap_angle = CHAIN_VIEW_GAP_ANGLE if gap else 0.0 - arg_string = str([ self.center, angle, radius, gap_angle ]) + arg_string = str([self.center, angle, radius, gap_angle]) if arg_string in self.cfa_cache: coords = self.cfa_cache[arg_string] else: - result_x = self.center[0] + radius * sin(angle + gap_angle/2) - result_y = self.center[1] - radius * cos(angle + gap_angle/2) + result_x = self.center[0] + radius * sin(angle + gap_angle / 2) + result_y = self.center[1] - radius * cos(angle + gap_angle / 2) coords = (round(result_x, 1), round(result_y, 1)) self.cfa_cache[arg_string] = coords return coords def _draw(self): # Initialise drawing - self.dwg = svgwrite.Drawing(profile='full') + self.dwg = svgwrite.Drawing(profile="full") # Set HTML attributes - self.dwg.attribs['viewBox'] = '0 0 ' + ' '.join([ str(x) for x in self.canvas_size ]) - self.dwg.attribs['id'] = self.svg_id + self.dwg.attribs["viewBox"] = "0 0 " + " ".join( + [str(x) for x in self.canvas_size] + ) + self.dwg.attribs["id"] = self.svg_id if self.hidden: - self.dwg.attribs['style'] = 'display: none;' + self.dwg.attribs["style"] = "display: none;" # Draw background - self.dwg.add(self.dwg.circle(r=self.full_radius, - center=self.center, - fill=COLORS['WHITE'], - fill_opacity=1, - stroke_opacity=0)) - + self.dwg.add( + self.dwg.circle( + r=self.full_radius, + center=self.center, + fill=COLORS["WHITE"], + fill_opacity=1, + stroke_opacity=0, + ) + ) # Draw data rings for ring_id, ring_metric in enumerate(CHAIN_VIEW_RINGS): self._add_ring(ring_id, ring_metric) # Draw missing-data shade - for version_id, residue_validities in enumerate(self.data['residue_validities']): - group_opacity = 1 if version_id == self.num_versions-1 else 0 - shade_group = self.dwg.g(id=f'{self.svg_id}-shade-{version_id}', opacity=group_opacity) + for version_id, residue_validities in enumerate( + self.data["residue_validities"] + ): + group_opacity = 1 if version_id == self.num_versions - 1 else 0 + shade_group = self.dwg.g( + id=f"{self.svg_id}-shade-{version_id}", opacity=group_opacity + ) for segment_id, residue_valid in enumerate(residue_validities): if not residue_valid: - shade_group.add(self.dwg.polygon([ self.center, - self._coords_from_angle(self.angle_delta * segment_id, self.full_radius+5), - self._coords_from_angle(self.angle_delta * (segment_id+1), self.full_radius+5) ], - stroke_opacity=0, - fill=COLORS['L_PINK'], - fill_opacity=1)) + shade_group.add( + self.dwg.polygon( + [ + self.center, + self._coords_from_angle( + self.angle_delta * segment_id, self.full_radius + 5 + ), + self._coords_from_angle( + self.angle_delta * (segment_id + 1), + self.full_radius + 5, + ), + ], + stroke_opacity=0, + fill=COLORS["L_PINK"], + fill_opacity=1, + ) + ) self.dwg.add(shade_group) # Draw outer rings - self.dwg.add(self.dwg.circle(r=self.full_radius-24, - center=self.center, - fill_opacity=0, - stroke=COLORS['BLACK'], - stroke_width=1, - stroke_opacity=0.5)) - self.dwg.add(self.dwg.circle(r=self.full_radius-8, - center=self.center, - fill_opacity=0, - stroke=COLORS['BLACK'], - stroke_width=1, - stroke_opacity=0.5)) - for i in range(self.num_segments+1): - self.dwg.add(self.dwg.line(self._coords_from_angle(self.angle_delta*i, self.full_radius-24), - self._coords_from_angle(self.angle_delta*i, self.full_radius-8), - stroke=COLORS['BLACK'], - stroke_width=1, - stroke_opacity=0.5)) + self.dwg.add( + self.dwg.circle( + r=self.full_radius - 24, + center=self.center, + fill_opacity=0, + stroke=COLORS["BLACK"], + stroke_width=1, + stroke_opacity=0.5, + ) + ) + self.dwg.add( + self.dwg.circle( + r=self.full_radius - 8, + center=self.center, + fill_opacity=0, + stroke=COLORS["BLACK"], + stroke_width=1, + stroke_opacity=0.5, + ) + ) + for i in range(self.num_segments + 1): + self.dwg.add( + self.dwg.line( + self._coords_from_angle( + self.angle_delta * i, self.full_radius - 24 + ), + self._coords_from_angle(self.angle_delta * i, self.full_radius - 8), + stroke=COLORS["BLACK"], + stroke_width=1, + stroke_opacity=0.5, + ) + ) # Draw segment selector - center_point = self.angle_delta*0.5 - selector_points = (self._coords_from_angle(center_point, self.full_radius-16), - self._coords_from_angle(center_point-0.02, self.full_radius-8), - self._coords_from_angle(center_point-0.02, self.full_radius+8), - self._coords_from_angle(center_point+0.02, self.full_radius+8), - self._coords_from_angle(center_point+0.02, self.full_radius-8)) - self.dwg.add(self.dwg.polygon(selector_points, - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=1, - fill=COLORS['GREY'], - fill_opacity=0.2, - id=f'{self.svg_id}-residue-selector')) + center_point = self.angle_delta * 0.5 + selector_points = ( + self._coords_from_angle(center_point, self.full_radius - 16), + self._coords_from_angle(center_point - 0.02, self.full_radius - 8), + self._coords_from_angle(center_point - 0.02, self.full_radius + 8), + self._coords_from_angle(center_point + 0.02, self.full_radius + 8), + self._coords_from_angle(center_point + 0.02, self.full_radius - 8), + ) + self.dwg.add( + self.dwg.polygon( + selector_points, + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=1, + fill=COLORS["GREY"], + fill_opacity=0.2, + id=f"{self.svg_id}-residue-selector", + ) + ) # Draw interaction segments for segment_id in range(self.num_segments): - self.dwg.add(self.dwg.polygon([ self.center, - self._coords_from_angle(self.angle_delta * segment_id, self.full_radius+5), - self._coords_from_angle(self.angle_delta * (segment_id+1), self.full_radius+5) ], - stroke=COLORS['BLACK'], - stroke_width=1, - stroke_opacity=0, - fill=COLORS['L_GREY'], - fill_opacity=0, - onmousedown=f'handleSegment(1, {segment_id});', - onmouseover=f'handleSegment(2, {segment_id});', - onmouseup=f'handleSegment(3, {segment_id});', - id=f'{self.svg_id}-interaction-segment-{segment_id}')) - self.dwg.add(self.dwg.circle(r=1.5*self.division_size, - center=self.center, - fill=COLORS['WHITE'], - fill_opacity=1, - stroke_opacity=0)) + self.dwg.add( + self.dwg.polygon( + [ + self.center, + self._coords_from_angle( + self.angle_delta * segment_id, self.full_radius + 5 + ), + self._coords_from_angle( + self.angle_delta * (segment_id + 1), self.full_radius + 5 + ), + ], + stroke=COLORS["BLACK"], + stroke_width=1, + stroke_opacity=0, + fill=COLORS["L_GREY"], + fill_opacity=0, + onmousedown=f"handleSegment(1, {segment_id});", + onmouseover=f"handleSegment(2, {segment_id});", + onmouseup=f"handleSegment(3, {segment_id});", + id=f"{self.svg_id}-interaction-segment-{segment_id}", + ) + ) + self.dwg.add( + self.dwg.circle( + r=1.5 * self.division_size, + center=self.center, + fill=COLORS["WHITE"], + fill_opacity=1, + stroke_opacity=0, + ) + ) # Draw center text - self.dwg.add(self.dwg.text(text='Iris', - insert=(self.center[0], self.center[1]-24), - font_size=1.5*16, - font_family='Arial', - font_weight='bold', - text_anchor='middle', - alignment_baseline='central')) - self.dwg.add(self.dwg.text(text='Chain ' + self.data['chain_id'], - insert=(self.center[0], self.center[1]+16), - font_size=16, - font_family='Arial', - text_anchor='middle', - alignment_baseline='central')) - if self.data['has_molprobity']: - self.dwg.add(self.dwg.text(text='MolProbity', - insert=(self.center[0], self.center[1]+48), - font_size=16, - font_family='Arial', - text_anchor='middle', - alignment_baseline='central', - fill=COLORS['L_GREY'])) + self.dwg.add( + self.dwg.text( + text="Iris", + insert=(self.center[0], self.center[1] - 24), + font_size=1.5 * 16, + font_family="Arial", + font_weight="bold", + text_anchor="middle", + alignment_baseline="central", + ) + ) + self.dwg.add( + self.dwg.text( + text="Chain " + self.data["chain_id"], + insert=(self.center[0], self.center[1] + 16), + font_size=16, + font_family="Arial", + text_anchor="middle", + alignment_baseline="central", + ) + ) + if self.data["has_molprobity"]: + self.dwg.add( + self.dwg.text( + text="MolProbity", + insert=(self.center[0], self.center[1] + 48), + font_size=16, + font_family="Arial", + text_anchor="middle", + alignment_baseline="central", + fill=COLORS["L_GREY"], + ) + ) def _add_ring(self, ring_id, metric): - datapoints = self.data[metric['type'] + '_values'][metric['id']] + datapoints = self.data[metric["type"] + "_values"][metric["id"]] # Draw axes ring_base_radius = (ring_id + 2) * self.division_size - self.dwg.add(self.dwg.circle(r=ring_base_radius, - center=self.center, - fill_opacity=0, - stroke=metric['ring_color'], - stroke_width=1, - stroke_opacity=1)) - self.dwg.add(self.dwg.polyline([ self._coords_from_angle((CHAIN_VIEW_GAP_ANGLE/25)*(i-(20-1)/2), ring_base_radius, gap=False) for i in range(20) ], - stroke=metric['ring_color'], - stroke_width=3, - stroke_opacity=1, - fill_opacity=0)) - self.dwg.add(self.dwg.text(text=metric['short_name'], - insert=self._coords_from_angle(0, ring_base_radius+12, gap=False), - font_size=16, - font_family='Arial', - text_anchor='middle', - alignment_baseline='central')) + self.dwg.add( + self.dwg.circle( + r=ring_base_radius, + center=self.center, + fill_opacity=0, + stroke=metric["ring_color"], + stroke_width=1, + stroke_opacity=1, + ) + ) + self.dwg.add( + self.dwg.polyline( + [ + self._coords_from_angle( + (CHAIN_VIEW_GAP_ANGLE / 25) * (i - (20 - 1) / 2), + ring_base_radius, + gap=False, + ) + for i in range(20) + ], + stroke=metric["ring_color"], + stroke_width=3, + stroke_opacity=1, + fill_opacity=0, + ) + ) + self.dwg.add( + self.dwg.text( + text=metric["short_name"], + insert=self._coords_from_angle(0, ring_base_radius + 12, gap=False), + font_size=16, + font_family="Arial", + text_anchor="middle", + alignment_baseline="central", + ) + ) - if metric['type'] == 'discrete': + if metric["type"] == "discrete": for version_id, version_datapoints in enumerate(datapoints): - version_ring_segments = [ ] + version_ring_segments = [] for segment_id, datapoint in enumerate(version_datapoints): segment_length = 10 - segment_color = metric['seq_colors'][-1] + segment_color = metric["seq_colors"][-1] segment_opacity = 1 - if datapoint is not None and 0 <= datapoint < len(metric['seq_colors']): - segment_color = metric['seq_colors'][datapoint] - if segment_color == metric['seq_colors'][-1]: + if datapoint is not None and 0 <= datapoint < len( + metric["seq_colors"] + ): + segment_color = metric["seq_colors"][datapoint] + if segment_color == metric["seq_colors"][-1]: segment_opacity = 0.5 - segment_points = (self._coords_from_angle(self.angle_delta * (segment_id), - ring_base_radius - segment_length), - self._coords_from_angle(self.angle_delta * (segment_id), - ring_base_radius + segment_length), - self._coords_from_angle(self.angle_delta * (segment_id+1), - ring_base_radius + segment_length), - self._coords_from_angle(self.angle_delta * (segment_id+1), - ring_base_radius - segment_length)) - version_ring_segments.append((segment_points, segment_color, segment_opacity)) - group_opacity = 1 if version_id == self.num_versions-1 else 0 - segment_group = self.dwg.g(id=f'{self.svg_id}-discrete-{version_id}-{ring_id}', opacity=group_opacity) - for segment_points, segment_color, segment_opacity in version_ring_segments: - segment_group.add(self.dwg.polyline(segment_points, - stroke_width=0, - stroke_opacity=0, - fill=segment_color, - fill_opacity=segment_opacity)) + segment_points = ( + self._coords_from_angle( + self.angle_delta * (segment_id), + ring_base_radius - segment_length, + ), + self._coords_from_angle( + self.angle_delta * (segment_id), + ring_base_radius + segment_length, + ), + self._coords_from_angle( + self.angle_delta * (segment_id + 1), + ring_base_radius + segment_length, + ), + self._coords_from_angle( + self.angle_delta * (segment_id + 1), + ring_base_radius - segment_length, + ), + ) + version_ring_segments.append( + (segment_points, segment_color, segment_opacity) + ) + group_opacity = 1 if version_id == self.num_versions - 1 else 0 + segment_group = self.dwg.g( + id=f"{self.svg_id}-discrete-{version_id}-{ring_id}", + opacity=group_opacity, + ) + for ( + segment_points, + segment_color, + segment_opacity, + ) in version_ring_segments: + segment_group.add( + self.dwg.polyline( + segment_points, + stroke_width=0, + stroke_opacity=0, + fill=segment_color, + fill_opacity=segment_opacity, + ) + ) self.dwg.add(segment_group) - elif metric['type'] == 'continuous': + elif metric["type"] == "continuous": # Get mean metric value - all_valid_values = [ ] + all_valid_values = [] for version_datapoints in datapoints: for datapoint in version_datapoints: if datapoint is None: continue - value = datapoint * metric['polarity'] + value = datapoint * metric["polarity"] all_valid_values.append(value) ring_avg = 0 if len(all_valid_values) == 0: @@ -221,36 +332,44 @@ def _add_ring(self, ring_id, metric): ring_avg = sum(all_valid_values) / len(all_valid_values) # Calculate deltas from the ring average - deltas = [ ] + deltas = [] for version_datapoints in datapoints: - version_deltas = [ ] + version_deltas = [] for datapoint in version_datapoints: delta = None if datapoint is not None: - value = datapoint * metric['polarity'] + value = datapoint * metric["polarity"] delta = value - ring_avg version_deltas.append(delta) deltas.append(version_deltas) # Calculate average negative delta in the latest dataset - latest_negative_deltas = [ x for x in deltas[-1] if x is not None and x < 0 ] + latest_negative_deltas = [x for x in deltas[-1] if x is not None and x < 0] avg_negative_delta = 0 if len(latest_negative_deltas) > 0: - avg_negative_delta = sum(latest_negative_deltas) / len(latest_negative_deltas) + avg_negative_delta = sum(latest_negative_deltas) / len( + latest_negative_deltas + ) # Subtract the average negative delta from all deltas to calculate 'magnitudes' - magnitudes = [ ] - all_valid_magnitudes = [ ] + magnitudes = [] + all_valid_magnitudes = [] for version_deltas in deltas: - version_magnitudes = [ x - avg_negative_delta if x is not None else None for x in version_deltas ] - all_valid_magnitudes += [ x for x in version_magnitudes if x is not None ] + version_magnitudes = [ + x - avg_negative_delta if x is not None else None + for x in version_deltas + ] + all_valid_magnitudes += [x for x in version_magnitudes if x is not None] magnitudes.append(version_magnitudes) - magnitude_min, magnitude_max = (min(all_valid_magnitudes), max(all_valid_magnitudes)) + magnitude_min, magnitude_max = ( + min(all_valid_magnitudes), + max(all_valid_magnitudes), + ) # Calculate plot magnitudes - plot_magnitudes = [ ] + plot_magnitudes = [] for version_magnitudes in magnitudes: - version_plot_magnitudes = [ ] + version_plot_magnitudes = [] for magnitude in version_magnitudes: if magnitude is None: version_plot_magnitudes.append(None) @@ -264,10 +383,12 @@ def _add_ring(self, ring_id, metric): plot_magnitudes.append(version_plot_magnitudes) # Calculate plot point coordinates - line_points = [ ] + line_points = [] for version_plot_magnitudes in plot_magnitudes: - version_line_points = [ ] - zero_point = self._coords_from_angle(self.angle_delta*0.5, ring_base_radius) + version_line_points = [] + zero_point = self._coords_from_angle( + self.angle_delta * 0.5, ring_base_radius + ) version_line_points.append(zero_point) for segment_id, plot_magnitude in enumerate(version_plot_magnitudes): angle = self.angle_delta * (segment_id + 0.5) @@ -279,27 +400,39 @@ def _add_ring(self, ring_id, metric): line_points.append(version_line_points) # Draw line - baseline_circle_points = [ ] + baseline_circle_points = [] baseline_point_resolution = 200 for point_id in range(baseline_point_resolution + 1): - point_angle = (baseline_point_resolution - point_id) * (2*pi - CHAIN_VIEW_GAP_ANGLE) / baseline_point_resolution - baseline_circle_points.append(self._coords_from_angle(point_angle, ring_base_radius)) + point_angle = ( + (baseline_point_resolution - point_id) + * (2 * pi - CHAIN_VIEW_GAP_ANGLE) + / baseline_point_resolution + ) + baseline_circle_points.append( + self._coords_from_angle(point_angle, ring_base_radius) + ) plot_points = line_points[-1] + baseline_circle_points - ring_line = self.dwg.polyline(plot_points, - stroke=metric['ring_color'], - stroke_width=2, - stroke_opacity=1, - fill=metric['ring_color'], - fill_opacity=0.2) + ring_line = self.dwg.polyline( + plot_points, + stroke=metric["ring_color"], + stroke_width=2, + stroke_opacity=1, + fill=metric["ring_color"], + fill_opacity=0.2, + ) for version_id, version_line_points in enumerate(line_points): plot_points = version_line_points + baseline_circle_points - points_string = ' '.join([ ','.join([ str(x) for x in point ]) for point in plot_points ]) - animation = Animate(values=None, - dur='250ms', - begin='indefinite', - fill='freeze', - attributeName='points', - to=points_string, - id=f'{self.svg_id}-animation-{version_id}-{ring_id}') + points_string = " ".join( + [",".join([str(x) for x in point]) for point in plot_points] + ) + animation = Animate( + values=None, + dur="250ms", + begin="indefinite", + fill="freeze", + attributeName="points", + to=points_string, + id=f"{self.svg_id}-animation-{version_id}-{ring_id}", + ) ring_line.add(animation) self.dwg.add(ring_line) diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index 7f7bb3f..ac27010 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -7,15 +7,21 @@ from iris_validation.graphics.chain import ChainView from iris_validation.graphics.residue import ResidueView -from iris_validation._defs import COLORS, CHAIN_VIEW_RINGS, RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS, CHAIN_VIEW_GAP_ANGLE +from iris_validation._defs import ( + COLORS, + CHAIN_VIEW_RINGS, + RESIDUE_VIEW_BOXES, + RESIDUE_VIEW_BARS, + CHAIN_VIEW_GAP_ANGLE, +) -JS_PATH = os.path.join(os.path.dirname(__file__), 'js') -JS_CONSTANTS_PATH = os.path.join(JS_PATH, 'constants.js') -JS_INTERACTION_PATH = os.path.join(JS_PATH, 'interaction.js') +JS_PATH = os.path.join(os.path.dirname(__file__), "js") +JS_CONSTANTS_PATH = os.path.join(JS_PATH, "constants.js") +JS_INTERACTION_PATH = os.path.join(JS_PATH, "interaction.js") -class Panel(): +class Panel: def __init__(self, data, canvas_size=(1500, 1000)): self.data = data self.canvas_size = canvas_size @@ -24,10 +30,10 @@ def __init__(self, data, canvas_size=(1500, 1000)): self.javascript = None self.chain_views = None self.residue_view = None - self.num_models = self.data[0]['num_versions'] - self.chain_ids = [ chain_data['chain_id'] for chain_data in self.data ] - self.swtich_colors = [ COLORS['VL_GREY'], COLORS['CYAN'] ] - self.svg_id = 'iris-panel' + self.num_models = self.data[0]["num_versions"] + self.chain_ids = [chain_data["chain_id"] for chain_data in self.data] + self.swtich_colors = [COLORS["VL_GREY"], COLORS["CYAN"]] + self.svg_id = "iris-panel" self._verify_chosen_metrics() self._generate_javascript() @@ -39,49 +45,66 @@ def _verify_chosen_metrics(self): global CHAIN_VIEW_RINGS, RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS for metric_list in (CHAIN_VIEW_RINGS, RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS): if not isinstance(metric_list, list): - raise ValueError('Chosen metrics in the _defs.py file must be lists') + raise ValueError("Chosen metrics in the _defs.py file must be lists") for metric_index in reversed(range(len(metric_list))): - if (metric_list[metric_index]['is_covariance'] and not self.data[0]['has_covariance']): + if ( + metric_list[metric_index]["is_covariance"] + and not self.data[0]["has_covariance"] + ): del metric_list[metric_index] - elif (metric_list[metric_index]['is_molprobity'] and not self.data[0]['has_molprobity']): + elif ( + metric_list[metric_index]["is_molprobity"] + and not self.data[0]["has_molprobity"] + ): del metric_list[metric_index] - elif (metric_list[metric_index]['is_reflections'] and not self.data[0]['has_reflections']): + elif ( + metric_list[metric_index]["is_reflections"] + and not self.data[0]["has_reflections"] + ): del metric_list[metric_index] - elif (metric_list[metric_index]['is_rama_z'] and not self.data[0]['has_rama_z']): + elif ( + metric_list[metric_index]["is_rama_z"] + and not self.data[0]["has_rama_z"] + ): del metric_list[metric_index] - elif (metric_list[metric_index]['is_rama_classification'] and self.data[0]['has_rama_z']): + elif ( + metric_list[metric_index]["is_rama_classification"] + and self.data[0]["has_rama_z"] + ): del metric_list[metric_index] def _generate_javascript(self): json_data = json.dumps(self.data) num_chains = len(self.chain_ids) - bar_metric_ids = [ metric['id'] for metric in RESIDUE_VIEW_BARS ] - box_metric_ids = [ metric['id'] for metric in RESIDUE_VIEW_BOXES ] - box_colors = json.dumps([ metric['seq_colors'] for metric in RESIDUE_VIEW_BOXES ]) - box_labels = json.dumps([ metric['seq_labels'] for metric in RESIDUE_VIEW_BOXES ]) + bar_metric_ids = [metric["id"] for metric in RESIDUE_VIEW_BARS] + box_metric_ids = [metric["id"] for metric in RESIDUE_VIEW_BOXES] + box_colors = json.dumps([metric["seq_colors"] for metric in RESIDUE_VIEW_BOXES]) + box_labels = json.dumps([metric["seq_labels"] for metric in RESIDUE_VIEW_BOXES]) gap_degrees = CHAIN_VIEW_GAP_ANGLE * 180 / math.pi - with open(JS_CONSTANTS_PATH, 'r', encoding='utf8') as infile: + with open(JS_CONSTANTS_PATH, "r", encoding="utf8") as infile: js_constants = infile.read() - with open(JS_INTERACTION_PATH, 'r', encoding='utf8') as infile: + with open(JS_INTERACTION_PATH, "r", encoding="utf8") as infile: js_interation = infile.read() - js_constants = js_constants.format(model_data=json_data, - num_chains=num_chains, - bar_metric_ids=bar_metric_ids, - box_metric_ids=box_metric_ids, - box_colors=box_colors, - box_labels=box_labels, - gap_degrees=gap_degrees, - chain_selector_colors=self.swtich_colors) + js_constants = js_constants.format( + model_data=json_data, + num_chains=num_chains, + bar_metric_ids=bar_metric_ids, + box_metric_ids=box_metric_ids, + box_colors=box_colors, + box_labels=box_labels, + gap_degrees=gap_degrees, + chain_selector_colors=self.swtich_colors, + ) self.javascript = js_constants + js_interation def _generate_subviews(self): - self.chain_views = [ ] + self.chain_views = [] for chain_index, chain_data in enumerate(self.data): - chain_view = ChainView(chain_data, chain_index, hidden=chain_index>0).dwg + chain_view = ChainView(chain_data, chain_index, hidden=chain_index > 0).dwg self.chain_views.append(chain_view) self.residue_view = ResidueView().dwg @@ -91,167 +114,260 @@ def _draw(self): view_title_font = 24 button_width = 38 button_height = 32 - view_width, view_height = [ dim - view_border for dim in self.canvas_size ] - view_divider_x = round(2/3 * view_width, 2) - chain_view_bounds = (view_border, - view_border, - view_divider_x - round(middle_gap/2, 2), - view_height) - residue_view_bounds = (view_divider_x + round(middle_gap/2, 2), - view_border, - view_width, - view_height) + view_width, view_height = [dim - view_border for dim in self.canvas_size] + view_divider_x = round(2 / 3 * view_width, 2) + chain_view_bounds = ( + view_border, + view_border, + view_divider_x - round(middle_gap / 2, 2), + view_height, + ) + residue_view_bounds = ( + view_divider_x + round(middle_gap / 2, 2), + view_border, + view_width, + view_height, + ) # Initialise drawing - self.dwg = svgwrite.Drawing(profile='full') + self.dwg = svgwrite.Drawing(profile="full") # Disable text selection - self.dwg.attribs['style'] = 'user-select: none;' + self.dwg.attribs["style"] = "user-select: none;" # Draw background - self.dwg.add(self.dwg.polygon(points=[ (0, 0), - (0, self.canvas_size[1]), - (self.canvas_size[0], self.canvas_size[1]), - (self.canvas_size[0], 0) ], - fill=COLORS['WHITE'], - fill_opacity=1, - stroke_opacity=0)) + self.dwg.add( + self.dwg.polygon( + points=[ + (0, 0), + (0, self.canvas_size[1]), + (self.canvas_size[0], self.canvas_size[1]), + (self.canvas_size[0], 0), + ], + fill=COLORS["WHITE"], + fill_opacity=1, + stroke_opacity=0, + ) + ) # Set HTML attributes - self.dwg.attribs['viewBox'] = '0 0 ' + ' '.join([ str(x) for x in self.canvas_size ]) - self.dwg.attribs['id'] = self.svg_id + self.dwg.attribs["viewBox"] = "0 0 " + " ".join( + [str(x) for x in self.canvas_size] + ) + self.dwg.attribs["id"] = self.svg_id # Add JavaScript self.dwg.defs.add(self.dwg.script(content=self.javascript)) # View titles and divider lines - self.dwg.add(self.dwg.text(text='Chain', - insert=(chain_view_bounds[0], chain_view_bounds[1]+view_title_font), - font_size=view_title_font, - font_family='Arial')) - - self.dwg.add(self.dwg.text(text='Residue', - insert=(residue_view_bounds[0], residue_view_bounds[1]+view_title_font), - font_size=view_title_font, - font_family='Arial', - id=f'{self.svg_id}-residue-summary')) - - self.dwg.add(self.dwg.line((chain_view_bounds[0], chain_view_bounds[1]+40), - (chain_view_bounds[2], chain_view_bounds[1]+40), - stroke=COLORS['BLACK'], - stroke_width=2)) - - self.dwg.add(self.dwg.line((residue_view_bounds[0], residue_view_bounds[1]+40), - (residue_view_bounds[2], residue_view_bounds[1]+40), - stroke=COLORS['BLACK'], - stroke_width=2)) + self.dwg.add( + self.dwg.text( + text="Chain", + insert=(chain_view_bounds[0], chain_view_bounds[1] + view_title_font), + font_size=view_title_font, + font_family="Arial", + ) + ) + + self.dwg.add( + self.dwg.text( + text="Residue", + insert=( + residue_view_bounds[0], + residue_view_bounds[1] + view_title_font, + ), + font_size=view_title_font, + font_family="Arial", + id=f"{self.svg_id}-residue-summary", + ) + ) + + self.dwg.add( + self.dwg.line( + (chain_view_bounds[0], chain_view_bounds[1] + 40), + (chain_view_bounds[2], chain_view_bounds[1] + 40), + stroke=COLORS["BLACK"], + stroke_width=2, + ) + ) + + self.dwg.add( + self.dwg.line( + (residue_view_bounds[0], residue_view_bounds[1] + 40), + (residue_view_bounds[2], residue_view_bounds[1] + 40), + stroke=COLORS["BLACK"], + stroke_width=2, + ) + ) # Chain selector buttons for chain_index, chain_id in enumerate(self.chain_ids[:12]): - selector_color = self.swtich_colors[1] if chain_index == 0 else self.swtich_colors[0] - self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), - size=(button_width, button_height), - rx=5, - stroke_opacity=0, - fill_opacity=0.5, - fill=selector_color, - id=f'{self.svg_id}-chain-selector-{chain_index}')) - - self.dwg.add(self.dwg.text(text=chain_id, - insert=(chain_view_bounds[0] + 75 + button_width/2 + 50*chain_index, chain_view_bounds[1] + button_height/2), - font_size=view_title_font, - font_family='Arial', - text_anchor='middle', - alignment_baseline='central')) - - self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), - size=(button_width, button_height), - rx=5, - stroke_opacity=0, - fill_opacity=0, - onmouseover='setPointer();', - onmouseout='unsetPointer();', - onclick=f'setChain({chain_index});')) + selector_color = ( + self.swtich_colors[1] if chain_index == 0 else self.swtich_colors[0] + ) + self.dwg.add( + self.dwg.rect( + insert=( + chain_view_bounds[0] + 75 + 50 * chain_index, + chain_view_bounds[1], + ), + size=(button_width, button_height), + rx=5, + stroke_opacity=0, + fill_opacity=0.5, + fill=selector_color, + id=f"{self.svg_id}-chain-selector-{chain_index}", + ) + ) + + self.dwg.add( + self.dwg.text( + text=chain_id, + insert=( + chain_view_bounds[0] + 75 + button_width / 2 + 50 * chain_index, + chain_view_bounds[1] + button_height / 2, + ), + font_size=view_title_font, + font_family="Arial", + text_anchor="middle", + alignment_baseline="central", + ) + ) + + self.dwg.add( + self.dwg.rect( + insert=( + chain_view_bounds[0] + 75 + 50 * chain_index, + chain_view_bounds[1], + ), + size=(button_width, button_height), + rx=5, + stroke_opacity=0, + fill_opacity=0, + onmouseover="setPointer();", + onmouseout="unsetPointer();", + onclick=f"setChain({chain_index});", + ) + ) # Extra chains dropdown # TODO: finish this if len(self.chain_ids) > 12: chain_index = 12 selector_color = self.swtich_colors[0] - self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), - size=(38, 32), - rx=5, - stroke_opacity=0, - fill_opacity=0.5, - fill=selector_color, - id=f'{self.svg_id}-chain-selector-dropdown')) - - self.dwg.add(self.dwg.text(text='...', - insert=(chain_view_bounds[0] + 85 + 50*chain_index, chain_view_bounds[1]+view_title_font), - font_size=view_title_font, - font_family='Arial')) - - self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), - size=(38, 32), - rx=5, - stroke_opacity=0, - fill_opacity=0, - onmouseover='setPointer();', - onmouseout='unsetPointer();', - onclick=f'toggleDropdown();')) + self.dwg.add( + self.dwg.rect( + insert=( + chain_view_bounds[0] + 75 + 50 * chain_index, + chain_view_bounds[1], + ), + size=(38, 32), + rx=5, + stroke_opacity=0, + fill_opacity=0.5, + fill=selector_color, + id=f"{self.svg_id}-chain-selector-dropdown", + ) + ) + + self.dwg.add( + self.dwg.text( + text="...", + insert=( + chain_view_bounds[0] + 85 + 50 * chain_index, + chain_view_bounds[1] + view_title_font, + ), + font_size=view_title_font, + font_family="Arial", + ) + ) + + self.dwg.add( + self.dwg.rect( + insert=( + chain_view_bounds[0] + 75 + 50 * chain_index, + chain_view_bounds[1], + ), + size=(38, 32), + rx=5, + stroke_opacity=0, + fill_opacity=0, + onmouseover="setPointer();", + onmouseout="unsetPointer();", + onclick=f"toggleDropdown();", + ) + ) # Version toggle switch - self.dwg.add(self.dwg.text(text='Previous', - insert=(chain_view_bounds[2]-215, chain_view_bounds[1]+20), - font_size=16, - font_family='Arial')) - - self.dwg.add(self.dwg.text(text='Latest', - insert=(chain_view_bounds[2]-55, chain_view_bounds[1]+20), - font_size=16, - font_family='Arial')) + self.dwg.add( + self.dwg.text( + text="Previous", + insert=(chain_view_bounds[2] - 215, chain_view_bounds[1] + 20), + font_size=16, + font_family="Arial", + ) + ) + + self.dwg.add( + self.dwg.text( + text="Latest", + insert=(chain_view_bounds[2] - 55, chain_view_bounds[1] + 20), + font_size=16, + font_family="Arial", + ) + ) if self.num_models > 1: - switch_group = self.dwg.g(id=f'{self.svg_id}-switch', - onmouseover='setPointer();', - onmouseout='unsetPointer();', - onclick='toggleVersion();') + switch_group = self.dwg.g( + id=f"{self.svg_id}-switch", + onmouseover="setPointer();", + onmouseout="unsetPointer();", + onclick="toggleVersion();", + ) else: - switch_group = self.dwg.g(id=f'{self.svg_id}-switch') + switch_group = self.dwg.g(id=f"{self.svg_id}-switch") - switch_rectangle = self.dwg.rect(insert=(chain_view_bounds[2]-140, chain_view_bounds[1]), - size=(70, 30), - rx=15, - stroke_opacity=0, - fill_opacity=1, - fill=self.swtich_colors[1]) + switch_rectangle = self.dwg.rect( + insert=(chain_view_bounds[2] - 140, chain_view_bounds[1]), + size=(70, 30), + rx=15, + stroke_opacity=0, + fill_opacity=1, + fill=self.swtich_colors[1], + ) for version_id in range(2): - animation = Animate(values=None, - dur='250ms', - begin='indefinite', - fill='freeze', - attributeName='fill', - to=self.swtich_colors[version_id], - id=f'{self.svg_id}-switch-color-animation-{version_id}') + animation = Animate( + values=None, + dur="250ms", + begin="indefinite", + fill="freeze", + attributeName="fill", + to=self.swtich_colors[version_id], + id=f"{self.svg_id}-switch-color-animation-{version_id}", + ) switch_rectangle.add(animation) switch_group.add(switch_rectangle) - switch_circle = self.dwg.circle(r=10, - center=(chain_view_bounds[2]-85, chain_view_bounds[1]+15), - stroke_opacity=0, - fill_opacity=1, - fill=COLORS['WHITE']) + switch_circle = self.dwg.circle( + r=10, + center=(chain_view_bounds[2] - 85, chain_view_bounds[1] + 15), + stroke_opacity=0, + fill_opacity=1, + fill=COLORS["WHITE"], + ) for version_id in range(2): - animation = Animate(values=None, - dur='250ms', - begin='indefinite', - fill='freeze', - attributeName='cx', - to=(chain_view_bounds[2]-125, chain_view_bounds[2]-85)[version_id], - id=f'{self.svg_id}-switch-move-animation-{version_id}') + animation = Animate( + values=None, + dur="250ms", + begin="indefinite", + fill="freeze", + attributeName="cx", + to=(chain_view_bounds[2] - 125, chain_view_bounds[2] - 85)[version_id], + id=f"{self.svg_id}-switch-move-animation-{version_id}", + ) switch_circle.add(animation) switch_group.add(switch_circle) @@ -269,8 +385,10 @@ def _draw(self): width_buffer = -((viewbox_width - 1000) / 2) height_buffer = -((viewbox_height - 1000 - 50) / 2 + 50) for chain_view in self.chain_views: - chain_view.attribs['x'] = str(view_adj_x) - chain_view.attribs['viewBox'] = f'{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}' + chain_view.attribs["x"] = str(view_adj_x) + chain_view.attribs[ + "viewBox" + ] = f"{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}" self.dwg.add(chain_view) # *** Residue view view_mid_x = (residue_view_bounds[0] + residue_view_bounds[2]) / 2 @@ -281,6 +399,8 @@ def _draw(self): viewbox_height = int(round(1000**2 / view_space_height)) width_buffer = -((viewbox_width - 400) / 2) height_buffer = -((viewbox_height - 1000 - 50) / 2 + 50) - self.residue_view.attribs['x'] = str(view_adj_x) - self.residue_view.attribs['viewBox'] = f'{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}' + self.residue_view.attribs["x"] = str(view_adj_x) + self.residue_view.attribs[ + "viewBox" + ] = f"{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}" self.dwg.add(self.residue_view) diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index e241386..bfd65f7 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -4,9 +4,9 @@ import json import clipper -from iris_validation.utils import ONE_LETTER_CODES -from iris_validation.metrics.residue import MetricsResidue -from iris_validation.metrics.chain import MetricsChain +# from iris_validation.utils import ONE_LETTER_CODES +# from iris_validation.metrics.residue import MetricsResidue +# from iris_validation.metrics.chain import MetricsChain from iris_validation.metrics.model import MetricsModel from iris_validation.metrics.series import MetricsModelSeries from iris_validation.metrics.reflections import ReflectionsHandler @@ -19,15 +19,15 @@ def _get_minimol_from_path(model_path): fpdb.read_file(model_path) fpdb.import_minimol(minimol) except Exception as exception: - raise Exception('Failed to import model file') from exception + raise Exception("Failed to import model file") from exception return minimol def _get_minimol_seq_nums(minimol): - seq_nums = { } + seq_nums = {} for chain in minimol: chain_id = str(chain.id()).strip() - seq_nums[chain_id] = [ ] + seq_nums[chain_id] = [] for residue in chain: seq_num = int(residue.seqnum()) seq_nums[chain_id].append(seq_num) @@ -41,7 +41,7 @@ def _get_reflections_data(model_path, reflections_path, model_id=None, out_queue density_scores = reflections_handler.calculate_all_density_scores() reflections_data = (resolution, density_scores) if out_queue is not None: - out_queue.put(('reflections', model_id, reflections_data)) + out_queue.put(("reflections", model_id, reflections_data)) return reflections_data @@ -51,67 +51,81 @@ def _get_molprobity_data(model_path, seq_nums, model_id=None, out_queue=None): from mmtbx.command_line.molprobity import get_master_phil from mmtbx.validation.molprobity import molprobity, molprobity_flags except (ImportError, ModuleNotFoundError): - print('WARNING: Failed to import MolProbity; continuing without MolProbity analyses') + print( + "WARNING: Failed to import MolProbity; continuing without MolProbity analyses" + ) return try: cmdline = load_model_and_data( - args=[ f'pdb.file_name="{model_path}"', 'quiet=True' ], + args=[f'pdb.file_name="{model_path}"', "quiet=True"], master_phil=get_master_phil(), require_data=False, - process_pdb_file=True) + process_pdb_file=True, + ) validation = molprobity(model=cmdline.model) except Exception: - print('WARNING: Failed to run MolProbity; continuing without MolProbity analyses') + print( + "WARNING: Failed to run MolProbity; continuing without MolProbity analyses" + ) return - molprobity_data = { } - molprobity_data['model_wide'] = { } - molprobity_data['model_wide']['summary'] = { 'cbeta_deviations' : validation.cbetadev.n_outliers, - 'clashscore' : validation.clashscore(), - 'ramachandran_outliers' : validation.rama_outliers(), - 'ramachandran_favoured' : validation.rama_favored(), - 'rms_bonds' : validation.rms_bonds(), - 'rms_angles' : validation.rms_angles(), - 'rotamer_outliers' : validation.rota_outliers(), - 'molprobity_score' : validation.molprobity_score() } - - molprobity_data['model_wide']['details'] = { 'clash' : [ ], - 'c-beta' : [ ], - 'nqh_flips' : [ ], - 'omega' : [ ], - 'ramachandran' : [ ], - 'rotamer' : [ ] } - - molprobity_results = { 'clash' : validation.clashes.results, - 'c-beta' : validation.cbetadev.results, - 'nqh_flips' : validation.nqh_flips.results, - 'omega' : validation.omegalyze.results, - 'ramachandran' : validation.ramalyze.results, - 'rotamer' : validation.rotalyze.results } + molprobity_data = {} + molprobity_data["model_wide"] = {} + molprobity_data["model_wide"]["summary"] = { + "cbeta_deviations": validation.cbetadev.n_outliers, + "clashscore": validation.clashscore(), + "ramachandran_outliers": validation.rama_outliers(), + "ramachandran_favoured": validation.rama_favored(), + "rms_bonds": validation.rms_bonds(), + "rms_angles": validation.rms_angles(), + "rotamer_outliers": validation.rota_outliers(), + "molprobity_score": validation.molprobity_score(), + } + + molprobity_data["model_wide"]["details"] = { + "clash": [], + "c-beta": [], + "nqh_flips": [], + "omega": [], + "ramachandran": [], + "rotamer": [], + } + + molprobity_results = { + "clash": validation.clashes.results, + "c-beta": validation.cbetadev.results, + "nqh_flips": validation.nqh_flips.results, + "omega": validation.omegalyze.results, + "ramachandran": validation.ramalyze.results, + "rotamer": validation.rotalyze.results, + } for chain_id, chain_seq_nums in seq_nums.items(): - molprobity_data[chain_id] = { } + molprobity_data[chain_id] = {} for seq_num in chain_seq_nums: - molprobity_data[chain_id][seq_num] = { category : None for category in molprobity_results } - molprobity_data[chain_id][seq_num]['clash'] = 2 - + molprobity_data[chain_id][seq_num] = { + category: None for category in molprobity_results + } + molprobity_data[chain_id][seq_num]["clash"] = 2 for category, results in molprobity_results.items(): for result in results: - if category == 'clash': + if category == "clash": for atom in result.atoms_info: chain_id = atom.chain_id.strip() seq_num = int(atom.resseq.strip()) if molprobity_data[chain_id][seq_num][category] > 0: molprobity_data[chain_id][seq_num][category] -= 1 - details_line = [ ' '.join(a.id_str().split()) for a in result.atoms_info ] + [ result.overlap ] - molprobity_data['model_wide']['details'][category].append(details_line) + details_line = [ + " ".join(a.id_str().split()) for a in result.atoms_info + ] + [result.overlap] + molprobity_data["model_wide"]["details"][category].append(details_line) continue chain_id = result.chain_id.strip() seq_num = int(result.resseq.strip()) - if category in ('ramachandran', 'rotamer'): + if category in ("ramachandran", "rotamer"): if result.score < 0.3: molprobity_data[chain_id][seq_num][category] = 0 elif result.score < 2.0: @@ -125,52 +139,67 @@ def _get_molprobity_data(model_path, seq_nums, model_id=None, out_queue=None): molprobity_data[chain_id][seq_num][category] = 0 if result.outlier: - score = result.deviation if category == 'c-beta' else result.score - details_line = [ result.chain_id.strip(), result.resid.strip(), result.resname.strip(), score ] - molprobity_data['model_wide']['details'][category].append(details_line) + score = result.deviation if category == "c-beta" else result.score + details_line = [ + result.chain_id.strip(), + result.resid.strip(), + result.resname.strip(), + score, + ] + molprobity_data["model_wide"]["details"][category].append(details_line) if out_queue is not None: - out_queue.put(('molprobity', model_id, molprobity_data)) + out_queue.put(("molprobity", model_id, molprobity_data)) return molprobity_data -def _get_covariance_data(model_path, - sequence_path, - distpred_path, - seq_nums, - distpred_format='rosettanpz', - map_align_exe='map_align', - dssp_exe='mkdssp', - model_id=None, - out_queue=None): +def _get_covariance_data( + model_path, + sequence_path, + distpred_path, + seq_nums, + distpred_format="rosettanpz", + map_align_exe="map_align", + dssp_exe="mkdssp", + model_id=None, + out_queue=None, +): try: from Bio.PDB import PDBParser from Bio.PDB.DSSP import DSSP from conkit import applications, command_line, io, plot except (ImportError, ModuleNotFoundError): - print('WARNING: Failed to import Biopython; continuing without covariance analyses') + print( + "WARNING: Failed to import Biopython; continuing without covariance analyses" + ) return parser = PDBParser() - structure = parser.get_structure('structure', model_path)[0] - dssp = DSSP(structure, model_path, dssp=dssp_exe, acc_array='Wilke') - model = io.read(model_path, 'pdb' if model_path.endswith('.pdb') else 'mmcif').top + structure = parser.get_structure("structure", model_path)[0] + dssp = DSSP(structure, model_path, dssp=dssp_exe, acc_array="Wilke") + model = io.read(model_path, "pdb" if model_path.endswith(".pdb") else "mmcif").top prediction = io.read(distpred_path, distpred_format).top - sequence = io.read(sequence_path, 'fasta').top - figure = plot.ModelValidationFigure(model, prediction, sequence, dssp, map_align_exe=map_align_exe) + sequence = io.read(sequence_path, "fasta").top + figure = plot.ModelValidationFigure( + model, prediction, sequence, dssp, map_align_exe=map_align_exe + ) - covariance_data = { } + covariance_data = {} for chain_id, chain_seq_nums in seq_nums.items(): - covariance_data[chain_id] = { } + covariance_data[chain_id] = {} for seq_num in chain_seq_nums: # TODO: by chain - score = figure.smooth_scores[seq_num] if 0 < seq_num < len(figure.smooth_scores) else None + score = ( + figure.smooth_scores[seq_num] + if 0 < seq_num < len(figure.smooth_scores) + else None + ) alignment = 0 if seq_num in figure.alignment.keys() else 1 covariance_data[chain_id][seq_num] = (score, alignment) if out_queue is not None: - out_queue.put(('covariance', model_id, covariance_data)) + out_queue.put(("covariance", model_id, covariance_data)) return covariance_data @@ -179,101 +208,146 @@ def _get_tortoize_data(model_path, model_id=None, out_queue=None): rama_z_data = {} try: tortoize_process = subprocess.Popen( - f'tortoize {model_path}', - shell=True, - stdout=subprocess.PIPE) + f"tortoize {model_path}", shell=True, stdout=subprocess.PIPE + ) except Exception: - print('WARNING: Failed to run tortoize') + print("WARNING: Failed to run tortoize") return tortoize_output = tortoize_process.communicate()[0] tortoize_dict = json.loads(tortoize_output) residues = tortoize_dict["model"]["1"]["residues"] for res in residues: - chain_rama_z_data = rama_z_data.setdefault(res['pdb']['strandID'], {}) - chain_rama_z_data[res['pdb']['seqNum']] = res['ramachandran']['z-score'] + chain_rama_z_data = rama_z_data.setdefault(res["pdb"]["strandID"], {}) + chain_rama_z_data[res["pdb"]["seqNum"]] = res["ramachandran"]["z-score"] if out_queue is not None: - out_queue.put(('rama_z', model_id, rama_z_data)) - + out_queue.put(("rama_z", model_id, rama_z_data)) + print(rama_z_data) return rama_z_data -def metrics_model_series_from_files(model_paths, - reflections_paths=None, - sequence_paths=None, - distpred_paths=None, - run_covariance=False, - run_molprobity=False, - calculate_rama_z=True, - multiprocessing=True): + +def metrics_model_series_from_files( + model_paths, + reflections_paths=None, + sequence_paths=None, + distpred_paths=None, + run_covariance=False, + run_molprobity=False, + calculate_rama_z=False, + model_json_paths=None, + multiprocessing=True, +): + print(model_paths) try: if isinstance(model_paths, str): - model_paths = [ model_paths ] + model_paths = [model_paths] model_paths = tuple(model_paths) - if None in model_paths: + if model_paths[-1] is None: raise TypeError except TypeError as exception: - raise ValueError('Argument \'model_paths\' should be an iterable of filenames') from exception - - path_lists = [ model_paths, reflections_paths, sequence_paths, distpred_paths ] + raise ValueError( + "Argument 'model_paths' should be an iterable of filenames" + ) from exception + + path_lists = [ + model_paths, + reflections_paths, + sequence_paths, + distpred_paths, + model_json_paths, + ] + print(path_lists) for i in range(1, len(path_lists)): if path_lists[i] is None: - path_lists[i] = tuple([ None for _ in model_paths ]) - if len(path_lists[i]) != len(model_paths) or \ - path_lists[i].count(None) not in (0, len(path_lists[i])): - raise ValueError('Path arguments should be equal-length iterables of filenames') - - all_minimol_data = [ ] - all_covariance_data = [ ] - all_molprobity_data = [ ] - all_reflections_data = [ ] - all_rama_z_data = [ ] + path_lists[i] = tuple([None for _ in model_paths]) + if len(path_lists[i]) != len(model_paths) or path_lists[i].count(None) not in ( + 0, + len(path_lists[i]), + ): + raise ValueError( + "Path arguments should be equal-length iterables of filenames" + ) + + # list_metric_names = ["molprobity", "rama_z", "map_fit", "backbone_fit"] + all_minimol_data = [] + all_covariance_data = [] + all_molprobity_data = [] + all_reflections_data = [] + all_rama_z_data = [] num_queued = 0 results_queue = Queue() + check_resnum = False for model_id, file_paths in enumerate(zip(*path_lists)): - model_path, reflections_path, sequence_path, distpred_path = file_paths + ( + model_path, + reflections_path, + sequence_path, + distpred_path, + json_data_path, + ) = file_paths + if model_path is None: + continue minimol = _get_minimol_from_path(model_path) seq_nums = _get_minimol_seq_nums(minimol) covariance_data = None molprobity_data = None reflections_data = None rama_z_data = None + + if json_data_path: + check_resnum = True + with open(json_data_path, "r") as j: + json_data = json.load(j) + for metric in json_data: + if metric == "molprobity": + molprobity_data = json_data["molprobity"] + if metric == "rama_z": + rama_z_data = json_data["rama_z"] + if metric == "map_fit": + reflections_data = json_data["map_fit"] if run_covariance: if multiprocessing: - p = Process(target=_get_covariance_data, - args=(model_path, sequence_path, distpred_path, seq_nums), - kwargs={ 'model_id': model_id, - 'out_queue': results_queue }) + p = Process( + target=_get_covariance_data, + args=(model_path, sequence_path, distpred_path, seq_nums), + kwargs={"model_id": model_id, "out_queue": results_queue}, + ) p.start() num_queued += 1 else: - covariance_data = _get_covariance_data(model_path, sequence_path, distpred_path) + covariance_data = _get_covariance_data( + model_path, sequence_path, distpred_path + ) if run_molprobity: if multiprocessing: - p = Process(target=_get_molprobity_data, - args=(model_path, seq_nums), - kwargs={ 'model_id': model_id, - 'out_queue': results_queue }) + p = Process( + target=_get_molprobity_data, + args=(model_path, seq_nums), + kwargs={"model_id": model_id, "out_queue": results_queue}, + ) p.start() num_queued += 1 else: molprobity_data = _get_molprobity_data(model_path, seq_nums) if reflections_path is not None: if multiprocessing: - p = Process(target=_get_reflections_data, - args=(model_path, reflections_path), - kwargs={ 'model_id': model_id, - 'out_queue': results_queue }) + p = Process( + target=_get_reflections_data, + args=(model_path, reflections_path), + kwargs={"model_id": model_id, "out_queue": results_queue}, + ) p.start() num_queued += 1 else: reflections_data = _get_reflections_data(model_path, reflections_path) if calculate_rama_z: if multiprocessing: - p = Process(target=_get_tortoize_data, - args=(model_path,), - kwargs={ 'model_id': model_id, - 'out_queue': results_queue }) + p = Process( + target=_get_tortoize_data, + args=(model_path,), + kwargs={"model_id": model_id, "out_queue": results_queue}, + ) p.start() num_queued += 1 else: @@ -288,18 +362,25 @@ def metrics_model_series_from_files(model_paths, if multiprocessing: for _ in range(num_queued): result_type, model_id, result = results_queue.get() - if result_type == 'covariance': + if result_type == "covariance": all_covariance_data[model_id] = result - if result_type == 'molprobity': + if result_type == "molprobity": all_molprobity_data[model_id] = result - if result_type == 'reflections': + if result_type == "reflections": all_reflections_data[model_id] = result - if result_type == 'rama_z': + if result_type == "rama_z": all_rama_z_data[model_id] = result - - metrics_models = [ ] - for model_id, model_data in enumerate(zip(all_minimol_data, all_covariance_data, all_molprobity_data, all_reflections_data, all_rama_z_data)): - metrics_model = MetricsModel(*model_data) + metrics_models = [] + for model_id, model_data in enumerate( + zip( + all_minimol_data, + all_covariance_data, + all_molprobity_data, + all_reflections_data, + all_rama_z_data, + ) + ): + metrics_model = MetricsModel(*model_data, check_resnum) metrics_models.append(metrics_model) metrics_model_series = MetricsModelSeries(metrics_models) diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index 9ee0ab6..31c9281 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -1,8 +1,17 @@ from iris_validation.metrics.residue import MetricsResidue -class MetricsChain(): - def __init__(self, mmol_chain, parent_model=None, covariance_data=None, molprobity_data=None, density_scores=None, rama_z_data=None): +class MetricsChain: + def __init__( + self, + mmol_chain, + parent_model=None, + covariance_data=None, + molprobity_data=None, + density_scores=None, + rama_z_data=None, + check_resnum=False, + ): self.minimol_chain = mmol_chain self.parent_model = parent_model self.covariance_data = covariance_data @@ -11,18 +20,78 @@ def __init__(self, mmol_chain, parent_model=None, covariance_data=None, molprobi self.rama_z_data = rama_z_data self._index = -1 - self.residues = [ ] + self.residues = [] self.length = len(mmol_chain) self.chain_id = str(mmol_chain.id().trim()) for residue_index, mmol_residue in enumerate(mmol_chain): - previous_residue = mmol_chain[residue_index-1] if residue_index > 0 else None - next_residue = mmol_chain[residue_index+1] if residue_index < len(mmol_chain)-1 else None + previous_residue = ( + mmol_chain[residue_index - 1] if residue_index > 0 else None + ) + next_residue = ( + mmol_chain[residue_index + 1] + if residue_index < len(mmol_chain) - 1 + else None + ) seq_num = int(mmol_residue.seqnum()) - residue_covariance_data = None if covariance_data is None else covariance_data[seq_num] - residue_molprobity_data = None if molprobity_data is None else molprobity_data[seq_num] - residue_density_scores = None if density_scores is None else density_scores[seq_num] - residue_rama_z_score = None if rama_z_data is None else rama_z_data.get(seq_num, None) + res_id = str(mmol_residue.id()).strip() + # covariance + if covariance_data is None: + residue_covariance_data = None + else: + if check_resnum: + try: + residue_covariance_data = covariance_data[res_id] + except KeyError: + try: + residue_covariance_data = covariance_data[mmol_residue.id()] + except KeyError: + residue_covariance_data = None + else: + residue_covariance_data = covariance_data[seq_num] + # molprobity + if molprobity_data is None: + residue_molprobity_data = None + else: + if check_resnum: + try: + residue_molprobity_data = molprobity_data[res_id] + except KeyError: + try: + residue_molprobity_data = molprobity_data[mmol_residue.id()] + except KeyError: + residue_molprobity_data = None + else: + residue_molprobity_data = molprobity_data[seq_num] + # density scores + if density_scores is None: + residue_density_scores = None + else: + if check_resnum: + try: + residue_density_scores = density_scores[res_id] + except KeyError: + try: + residue_density_scores = density_scores[mmol_residue.id()] + except KeyError: + residue_density_scores = None + else: + residue_density_scores = density_scores[seq_num] + # rama_z + if rama_z_data is None: + residue_rama_z_score = None + else: + if check_resnum: + try: + residue_rama_z_score = rama_z_data[res_id] + except KeyError: + try: + residue_rama_z_score = rama_z_data[mmol_residue.id()] + except KeyError: + residue_rama_z_score = None + else: + residue_rama_z_score = rama_z_data.get(seq_num, None) + residue = MetricsResidue( mmol_residue, residue_index, @@ -32,13 +101,24 @@ def __init__(self, mmol_chain, parent_model=None, covariance_data=None, molprobi residue_covariance_data, residue_molprobity_data, residue_density_scores, - residue_rama_z_score) + residue_rama_z_score, + ) self.residues.append(residue) for residue_index, residue in enumerate(self.residues): - if (0 < residue_index < len(self.residues)-1) and \ - (self.residues[residue_index-1].is_aa and residue.is_aa and self.residues[residue_index+1].is_aa) and \ - (self.residues[residue_index-1].sequence_number+1 == residue.sequence_number == self.residues[residue_index+1].sequence_number-1): + if ( + (0 < residue_index < len(self.residues) - 1) + and ( + self.residues[residue_index - 1].is_aa + and residue.is_aa + and self.residues[residue_index + 1].is_aa + ) + and ( + self.residues[residue_index - 1].sequence_number + 1 + == residue.sequence_number + == self.residues[residue_index + 1].sequence_number - 1 + ) + ): residue.is_consecutive_aa = True else: residue.is_consecutive_aa = False @@ -47,29 +127,35 @@ def __iter__(self): return self def __next__(self): - if self._index < self.length-1: + if self._index < self.length - 1: self._index += 1 return self.residues[self._index] self._index = -1 raise StopIteration def get_residue(self, sequence_number): - return next(residue for residue in self.residues if residue.sequence_number == sequence_number) + return next( + residue + for residue in self.residues + if residue.sequence_number == sequence_number + ) def remove_residue(self, residue): if residue in self.residues: self.residues.remove(residue) self.length -= 1 else: - print('Error removing residue, no matching residue was found.') + print("Error removing residue, no matching residue was found.") def remove_non_aa_residues(self): - non_aa_residues = [ residue for residue in self.residues if not residue.is_aa ] + non_aa_residues = [residue for residue in self.residues if not residue.is_aa] for residue in non_aa_residues: self.remove_residue(residue) def b_factor_lists(self): - all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = [ [ ] for _ in range(8) ] + all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = [ + [] for _ in range(8) + ] for residue in self.residues: all_bfs.append(residue.avg_b_factor) if residue.is_aa: @@ -85,4 +171,13 @@ def b_factor_lists(self): ligand_bfs.append(residue.avg_b_factor) else: ion_bfs.append(residue.avg_b_factor) - return all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs + return ( + all_bfs, + aa_bfs, + mc_bfs, + sc_bfs, + non_aa_bfs, + water_bfs, + ligand_bfs, + ion_bfs, + ) diff --git a/iris_validation/metrics/model.py b/iris_validation/metrics/model.py index 530c196..22d5250 100644 --- a/iris_validation/metrics/model.py +++ b/iris_validation/metrics/model.py @@ -3,13 +3,22 @@ from iris_validation.metrics.percentiles import PercentileCalculator -class MetricsModel(): - def __init__(self, mmol_model, covariance_data=None, molprobity_data=None, reflections_data=None, rama_z_data=None): +class MetricsModel: + def __init__( + self, + mmol_model, + covariance_data=None, + molprobity_data=None, + reflections_data=None, + rama_z_data=None, + check_resnum=False, + ): self.minimol_model = mmol_model self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.reflections_data = reflections_data self.rama_z_data = rama_z_data + print(rama_z_data) self._index = -1 self.minimol_chains = list(mmol_model.model()) @@ -21,14 +30,29 @@ def __init__(self, mmol_model, covariance_data=None, molprobity_data=None, refle self.percentile_calculator = PercentileCalculator(self.resolution) self.rotamer_calculator = RotamerCalculator() - self.chains = [ ] + self.chains = [] for mmol_chain in mmol_model: chain_id = str(mmol_chain.id().trim()) - chain_covariance_data = None if covariance_data is None else covariance_data[chain_id] - chain_molprobity_data = None if molprobity_data is None else molprobity_data[chain_id] - chain_density_scores = None if self.density_scores is None else self.density_scores[chain_id] + chain_covariance_data = ( + None if covariance_data is None else covariance_data[chain_id] + ) + chain_molprobity_data = ( + None if molprobity_data is None else molprobity_data[chain_id] + ) + chain_density_scores = ( + None if self.density_scores is None else self.density_scores[chain_id] + ) chain_rama_z_data = None if rama_z_data is None else rama_z_data[chain_id] - chain = MetricsChain(mmol_chain, self, chain_covariance_data, chain_molprobity_data, chain_density_scores, chain_rama_z_data) + print(chain_rama_z_data) + chain = MetricsChain( + mmol_chain, + self, + chain_covariance_data, + chain_molprobity_data, + chain_density_scores, + chain_rama_z_data, + check_resnum=check_resnum, + ) chain.remove_non_aa_residues() self.chains.append(chain) @@ -36,7 +60,7 @@ def __iter__(self): return self def __next__(self): - if self._index < len(self.chains)-1: + if self._index < len(self.chains) - 1: self._index += 1 return self.chains[self._index] self._index = -1 @@ -46,19 +70,66 @@ def get_chain(self, chain_id): return next(chain for chain in self.chains if chain.chain_id == chain_id) def remove_chain(self, chain_id): - matching_chains = [ chain for chain in self.chains if chain.chain_id == chain_id ] + matching_chains = [chain for chain in self.chains if chain.chain_id == chain_id] if len(matching_chains) == 0: - print('Error removing chain, no chains matching that ID were found.') + print("Error removing chain, no chains matching that ID were found.") else: for chain in matching_chains: self.chains.remove(chain) self.chain_count -= 1 def b_factor_lists(self): - all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = [ ], [ ], [ ], [ ], [ ], [ ], [ ], [ ] + all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = ( + [], + [], + [], + [], + [], + [], + [], + [], + ) for chain in self.chains: - all_bfs_c, aa_bfs_c, mc_bfs_c, sc_bfs_c, non_aa_bfs_c, water_bfs_c, ligand_bfs_c, ion_bfs_c = chain.b_factor_lists() - for model_li, chain_li in zip((all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs), - (all_bfs_c, aa_bfs_c, mc_bfs_c, sc_bfs_c, non_aa_bfs_c, water_bfs_c, ligand_bfs_c, ion_bfs_c)): + ( + all_bfs_c, + aa_bfs_c, + mc_bfs_c, + sc_bfs_c, + non_aa_bfs_c, + water_bfs_c, + ligand_bfs_c, + ion_bfs_c, + ) = chain.b_factor_lists() + for model_li, chain_li in zip( + ( + all_bfs, + aa_bfs, + mc_bfs, + sc_bfs, + non_aa_bfs, + water_bfs, + ligand_bfs, + ion_bfs, + ), + ( + all_bfs_c, + aa_bfs_c, + mc_bfs_c, + sc_bfs_c, + non_aa_bfs_c, + water_bfs_c, + ligand_bfs_c, + ion_bfs_c, + ), + ): model_li += chain_li - return all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs + return ( + all_bfs, + aa_bfs, + mc_bfs, + sc_bfs, + non_aa_bfs, + water_bfs, + ligand_bfs, + ion_bfs, + ) diff --git a/iris_validation/metrics/residue.py b/iris_validation/metrics/residue.py index 86b628d..387de7d 100644 --- a/iris_validation/metrics/residue.py +++ b/iris_validation/metrics/residue.py @@ -6,8 +6,19 @@ from iris_validation._defs import RAMACHANDRAN_THRESHOLDS -class MetricsResidue(): - def __init__(self, mmol_residue, index_in_chain=None, previous_residue=None, next_residue=None, parent_chain=None, covariance_data=None, molprobity_data=None, density_scores=None, rama_z_score=None): +class MetricsResidue: + def __init__( + self, + mmol_residue, + index_in_chain=None, + previous_residue=None, + next_residue=None, + parent_chain=None, + covariance_data=None, + molprobity_data=None, + density_scores=None, + rama_z_score=None, + ): self.minimol_residue = mmol_residue self.initialised_with_context = index_in_chain is not None self.index_in_chain = index_in_chain @@ -25,17 +36,37 @@ def __init__(self, mmol_residue, index_in_chain=None, previous_residue=None, nex self.code_type = utils.code_type(mmol_residue) self.backbone_atoms = utils.get_backbone_atoms(mmol_residue) self.backbone_atoms_are_correct = None not in self.backbone_atoms - self.backbone_geometry_is_correct = utils.check_backbone_geometry(mmol_residue) if self.backbone_atoms_are_correct else None + self.backbone_geometry_is_correct = ( + utils.check_backbone_geometry(mmol_residue) + if self.backbone_atoms_are_correct + else None + ) self.is_aa = utils.check_is_aa(mmol_residue) - self.is_water = str(mmol_residue.type()).strip() == 'HOH' + self.is_water = str(mmol_residue.type()).strip() == "HOH" self.is_consecutive_aa = None # B-factors - self.max_b_factor, self.avg_b_factor, self.std_b_factor, self.mc_b_factor, self.sc_b_factor = utils.analyse_b_factors(mmol_residue, self.is_aa, self.backbone_atoms) + ( + self.max_b_factor, + self.avg_b_factor, + self.std_b_factor, + self.mc_b_factor, + self.sc_b_factor, + ) = utils.analyse_b_factors(mmol_residue, self.is_aa, self.backbone_atoms) # Backbone torsion angles - self.phi = clipper.MMonomer.protein_ramachandran_phi(self.previous_residue, mmol_residue) if self.previous_residue else None - self.psi = clipper.MMonomer.protein_ramachandran_psi(mmol_residue, self.next_residue) if self.next_residue else None + self.phi = ( + clipper.MMonomer.protein_ramachandran_phi( + self.previous_residue, mmol_residue + ) + if self.previous_residue + else None + ) + self.psi = ( + clipper.MMonomer.protein_ramachandran_psi(mmol_residue, self.next_residue) + if self.next_residue + else None + ) if self.phi is not None and isnan(self.phi): self.phi = None if self.psi is not None and isnan(self.psi): @@ -46,16 +77,26 @@ def __init__(self, mmol_residue, index_in_chain=None, previous_residue=None, nex self.is_sidechain_complete = self.chis is not None and None not in self.chis # Ramachandran - self.ramachandran_score = utils.calculate_ramachandran_score(mmol_residue, self.code, self.phi, self.psi) + self.ramachandran_score = utils.calculate_ramachandran_score( + mmol_residue, self.code, self.phi, self.psi + ) self.ramachandran_flags = (None, None, None) if self.ramachandran_score is not None: if RAMACHANDRAN_THRESHOLDS[0] <= self.ramachandran_score: self.ramachandran_flags = (True, False, False) - elif RAMACHANDRAN_THRESHOLDS[1] <= self.ramachandran_score < RAMACHANDRAN_THRESHOLDS[0]: + elif ( + RAMACHANDRAN_THRESHOLDS[1] + <= self.ramachandran_score + < RAMACHANDRAN_THRESHOLDS[0] + ): self.ramachandran_flags = (False, True, False) elif self.ramachandran_score < RAMACHANDRAN_THRESHOLDS[1]: self.ramachandran_flags = (False, False, True) - self.ramachandran_favoured, self.ramachandran_allowed, self.ramachandran_outlier = self.ramachandran_flags + ( + self.ramachandran_favoured, + self.ramachandran_allowed, + self.ramachandran_outlier, + ) = self.ramachandran_flags # Rotamer rotamer_calculator = self.parent_chain.parent_model.rotamer_calculator @@ -70,41 +111,80 @@ def __init__(self, mmol_residue, index_in_chain=None, previous_residue=None, nex self.rotamer_flags = (False, True, False) elif rotamer_clf_id in (0, 1): self.rotamer_flags = (False, False, True) - self.rotamer_favoured, self.rotamer_allowed, self.rotamer_outlier = self.rotamer_flags + ( + self.rotamer_favoured, + self.rotamer_allowed, + self.rotamer_outlier, + ) = self.rotamer_flags # MolProbity data self.discrete_indicators = self.molprobity_data if self.molprobity_data is None: - ramachandran_indicator = 0 if self.ramachandran_outlier else \ - 1 if self.ramachandran_allowed else \ - 2 if self.ramachandran_favoured else None - rotamer_indicator = 0 if self.rotamer_outlier else \ - 1 if self.rotamer_allowed else \ - 2 if self.rotamer_favoured else None - self.discrete_indicators = { 'clash' : None, - 'c-beta' : None, - 'omega' : None, - 'ramachandran' : ramachandran_indicator, - 'rotamer' : rotamer_indicator } + ramachandran_indicator = ( + 0 + if self.ramachandran_outlier + else 1 + if self.ramachandran_allowed + else 2 + if self.ramachandran_favoured + else None + ) + rotamer_indicator = ( + 0 + if self.rotamer_outlier + else 1 + if self.rotamer_allowed + else 2 + if self.rotamer_favoured + else None + ) + self.discrete_indicators = { + "clash": None, + "c-beta": None, + "omega": None, + "ramachandran": ramachandran_indicator, + "rotamer": rotamer_indicator, + } # Covariance data self.covariance_score, self.cmo_string = None, None if self.covariance_data is not None: self.covariance_score, self.cmo_string = self.covariance_data - self.discrete_indicators['cmo'] = self.cmo_string + self.discrete_indicators["cmo"] = self.cmo_string # Density fit scores - self.fit_score, self.mainchain_fit_score, self.sidechain_fit_score = None, None, None + self.fit_score, self.mainchain_fit_score, self.sidechain_fit_score = ( + None, + None, + None, + ) if self.density_scores is not None: - self.fit_score, self.mainchain_fit_score, self.sidechain_fit_score = self.density_scores - + ( + self.fit_score, + self.mainchain_fit_score, + self.sidechain_fit_score, + ) = self.density_scores # Percentiles percentile_calculator = self.parent_chain.parent_model.percentile_calculator - self.avg_b_factor_percentile = percentile_calculator.get_percentile(0, self.avg_b_factor) - self.max_b_factor_percentile = percentile_calculator.get_percentile(1, self.max_b_factor) - self.std_b_factor_percentile = percentile_calculator.get_percentile(2, self.std_b_factor) - self.fit_score_percentile = percentile_calculator.get_percentile(3, self.fit_score) - self.mainchain_fit_score_percentile = percentile_calculator.get_percentile(4, self.mainchain_fit_score) - self.sidechain_fit_score_percentile = percentile_calculator.get_percentile(5, self.sidechain_fit_score) - self.covariance_score_percentile = percentile_calculator.get_percentile(6, self.covariance_score) + self.avg_b_factor_percentile = percentile_calculator.get_percentile( + 0, self.avg_b_factor + ) + self.max_b_factor_percentile = percentile_calculator.get_percentile( + 1, self.max_b_factor + ) + self.std_b_factor_percentile = percentile_calculator.get_percentile( + 2, self.std_b_factor + ) + self.fit_score_percentile = percentile_calculator.get_percentile( + 3, self.fit_score + ) + self.mainchain_fit_score_percentile = percentile_calculator.get_percentile( + 4, self.mainchain_fit_score + ) + self.sidechain_fit_score_percentile = percentile_calculator.get_percentile( + 5, self.sidechain_fit_score + ) + self.covariance_score_percentile = percentile_calculator.get_percentile( + 6, self.covariance_score + ) # self.rama_z_score_percentile = percentile_calculator.get_percentile(7, self.rama_z) diff --git a/iris_validation/metrics/series.py b/iris_validation/metrics/series.py index e2fddf2..878cc43 100644 --- a/iris_validation/metrics/series.py +++ b/iris_validation/metrics/series.py @@ -1,11 +1,12 @@ -import clipper +# import clipper from iris_validation import utils -from iris_validation.metrics.model import MetricsModel -from iris_validation.metrics.reflections import ReflectionsHandler +# from iris_validation.metrics.model import MetricsModel +# from iris_validation.metrics.reflections import ReflectionsHandler -class MetricsModelSeries(): + +class MetricsModelSeries: def __init__(self, metrics_models): self.metrics_models = metrics_models self.chain_sets = None @@ -18,7 +19,9 @@ def align_models(self): if len(self.metrics_models) == 0: return if len(self.metrics_models) > 2: - raise Exception('Iris currently only supports alignment for two model iterations') + raise Exception( + "Iris currently only supports alignment for two model iterations" + ) # Check for and remove chains with no amino acid residues bad_chain_ids = set() @@ -27,15 +30,20 @@ def align_models(self): if chain.length == 0: bad_chain_ids.add(chain.chain_id) if len(bad_chain_ids) > 0: - print('WARNING: at least one chain contains no amino acid residues. Ignoring chains: ' + ', '.join(sorted(bad_chain_ids))) + print( + "WARNING: at least one chain contains no amino acid residues. Ignoring chains: " + + ", ".join(sorted(bad_chain_ids)) + ) for model in self.metrics_models: for chain_id in bad_chain_ids: model.remove_chain(chain_id) - if 0 in [ model.chain_count for model in self.metrics_models ]: - raise Exception('One or more models had no valid chains') + if 0 in [model.chain_count for model in self.metrics_models]: + raise Exception("One or more models had no valid chains") # Align chains - chain_id_sets = [ set(chain.chain_id for chain in model) for model in self.metrics_models ] + chain_id_sets = [ + set(chain.chain_id for chain in model) for model in self.metrics_models + ] common_chain_ids = set.intersection(*chain_id_sets) lost_chain_ids = set() for model, chain_id_set in zip(self.metrics_models, chain_id_sets): @@ -45,22 +53,29 @@ def align_models(self): for chain_id in model_lost_chain_ids: model.remove_chain(chain_id) if len(lost_chain_ids) > 0: - print(f'WARNING: Some chains are not present or valid across all model versions ({sorted(lost_chain_ids)}). These chains will not be represented in the validation report.') + print( + f"WARNING: Some chains are not present or valid across all model versions ({sorted(lost_chain_ids)}). These chains will not be represented in the validation report." + ) # Chain sets - self.chain_sets = { } + self.chain_sets = {} for chain_id in sorted(common_chain_ids): - self.chain_sets[chain_id] = [ ] + self.chain_sets[chain_id] = [] for model in self.metrics_models: - matching_chain = [ chain for chain in model if chain.chain_id == chain_id ][0] + matching_chain = [ + chain for chain in model if chain.chain_id == chain_id + ][0] self.chain_sets[chain_id].append(matching_chain) # Align residues - self.chain_alignments = { } + self.chain_alignments = {} for chain_id, chain_set in self.chain_sets.items(): - sequences = [ utils.code_three_to_one([ residue.code for residue in chain ]) for chain in chain_set ] + sequences = [ + utils.code_three_to_one([residue.code for residue in chain]) + for chain in chain_set + ] if len(sequences) == 1: - self.chain_alignments[chain_id] = (sequences[0], ) + self.chain_alignments[chain_id] = (sequences[0],) continue alignment_pair = utils.needleman_wunsch(sequences[-2], sequences[-1]) self.chain_alignments[chain_id] = alignment_pair @@ -76,36 +91,38 @@ def get_raw_data(self): has_rama_z = self.metrics_models[0].rama_z_data is not None has_rama_classification = not has_rama_z - raw_data = [ ] + raw_data = [] for chain_id, chain_set in self.chain_sets.items(): alignment_strings = self.chain_alignments[chain_id] aligned_length = len(alignment_strings[0]) - chain_data = { 'chain_id' : chain_id, - 'num_versions' : num_versions, - 'has_covariance' : has_covariance, - 'has_molprobity' : has_molprobity, - 'has_reflections' : has_reflections, - 'has_rama_z' : has_rama_z, - 'has_rama_classification': has_rama_classification, - 'aligned_length' : aligned_length, - 'residue_seqnos' : [ ], - 'residue_codes' : [ ], - 'residue_validities' : [ ], - 'discrete_values' : [ ], - 'continuous_values' : [ ], - 'percentile_values' : [ ] } + chain_data = { + "chain_id": chain_id, + "num_versions": num_versions, + "has_covariance": has_covariance, + "has_molprobity": has_molprobity, + "has_reflections": has_reflections, + "has_rama_z": has_rama_z, + "has_rama_classification": has_rama_classification, + "aligned_length": aligned_length, + "residue_seqnos": [], + "residue_codes": [], + "residue_validities": [], + "discrete_values": [], + "continuous_values": [], + "percentile_values": [], + } for alignment_string, chain in zip(alignment_strings, chain_set): - residue_seqnos = [ ] - residue_codes = [ ] - residue_validities = [ ] - discrete_values = [ ] - continuous_values = [ ] - percentile_values = [ ] + residue_seqnos = [] + residue_codes = [] + residue_validities = [] + discrete_values = [] + continuous_values = [] + percentile_values = [] residue_id = -1 for alignment_char in alignment_string: - if alignment_char == '-': + if alignment_char == "-": residue_seqnos.append(None) residue_codes.append(None) residue_validities.append(False) @@ -120,27 +137,37 @@ def get_raw_data(self): residue_codes.append(residue.code) residue_validities.append(True) - residue_discrete_values = (residue.discrete_indicators['rotamer'], - residue.discrete_indicators['ramachandran'], - residue.discrete_indicators['clash'], - residue.discrete_indicators['cmo']) - residue_continuous_values = (residue.avg_b_factor, - residue.max_b_factor, - residue.std_b_factor, - residue.fit_score, - residue.mainchain_fit_score, - residue.sidechain_fit_score, - residue.covariance_score, - residue.rama_z) - residue_percentile_values = (residue.avg_b_factor_percentile, - residue.max_b_factor_percentile, - residue.std_b_factor_percentile, - residue.fit_score_percentile, - residue.mainchain_fit_score_percentile, - residue.sidechain_fit_score_percentile, - residue.covariance_score_percentile) - - residue_continuous_values = tuple(round(x, 3) if isinstance(x, float) else x for x in residue_continuous_values) + residue_discrete_values = ( + residue.discrete_indicators["rotamer"], + residue.discrete_indicators["ramachandran"], + residue.discrete_indicators["clash"], + residue.discrete_indicators["cmo"], + ) + residue_continuous_values = ( + residue.avg_b_factor, + residue.max_b_factor, + residue.std_b_factor, + residue.fit_score, + residue.mainchain_fit_score, + residue.sidechain_fit_score, + residue.covariance_score, + residue.rama_z, + ) + print(residue_continuous_values) + residue_percentile_values = ( + residue.avg_b_factor_percentile, + residue.max_b_factor_percentile, + residue.std_b_factor_percentile, + residue.fit_score_percentile, + residue.mainchain_fit_score_percentile, + residue.sidechain_fit_score_percentile, + residue.covariance_score_percentile, + ) + + residue_continuous_values = tuple( + round(x, 3) if isinstance(x, float) else x + for x in residue_continuous_values + ) discrete_values.append(residue_discrete_values) continuous_values.append(residue_continuous_values) percentile_values.append(residue_percentile_values) @@ -148,16 +175,20 @@ def get_raw_data(self): discrete_values = list(zip(*discrete_values)) continuous_values = list(zip(*continuous_values)) percentile_values = list(zip(*percentile_values)) - chain_data['residue_seqnos'].append(residue_seqnos) - chain_data['residue_codes'].append(residue_codes) - chain_data['residue_validities'].append(residue_validities) - chain_data['discrete_values'].append(discrete_values) - chain_data['continuous_values'].append(continuous_values) - chain_data['percentile_values'].append(percentile_values) - - chain_data['discrete_values'] = list(zip(*chain_data['discrete_values'])) - chain_data['continuous_values'] = list(zip(*chain_data['continuous_values'])) - chain_data['percentile_values'] = list(zip(*chain_data['percentile_values'])) + chain_data["residue_seqnos"].append(residue_seqnos) + chain_data["residue_codes"].append(residue_codes) + chain_data["residue_validities"].append(residue_validities) + chain_data["discrete_values"].append(discrete_values) + chain_data["continuous_values"].append(continuous_values) + chain_data["percentile_values"].append(percentile_values) + + chain_data["discrete_values"] = list(zip(*chain_data["discrete_values"])) + chain_data["continuous_values"] = list( + zip(*chain_data["continuous_values"]) + ) + chain_data["percentile_values"] = list( + zip(*chain_data["percentile_values"]) + ) raw_data.append(chain_data) return raw_data From 0a6e404654c52adea16b42dd510f4ea23f445b38 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Tue, 4 Apr 2023 11:28:07 +0100 Subject: [PATCH 03/20] add density scores --- iris_validation/__init__.py | 8 +- iris_validation/_defs.py | 406 ++++++++++++++-------------- iris_validation/graphics/chain.py | 17 +- iris_validation/graphics/panel.py | 54 +++- iris_validation/metrics/__init__.py | 3 - iris_validation/metrics/model.py | 2 - iris_validation/metrics/series.py | 1 - 7 files changed, 272 insertions(+), 219 deletions(-) diff --git a/iris_validation/__init__.py b/iris_validation/__init__.py index 8695ab9..36b3a75 100644 --- a/iris_validation/__init__.py +++ b/iris_validation/__init__.py @@ -22,6 +22,8 @@ def generate_report( multiprocessing=True, latest_model_metrics_json=None, previous_model_metrics_json=None, + discrete_metrics_to_display=None, + continuous_metrics_to_display=None, wrap_in_html=True, output_dir=None, ): @@ -44,7 +46,11 @@ def generate_report( multiprocessing, ) model_series_data = model_series.get_raw_data() - panel = Panel(model_series_data) + panel = Panel( + model_series_data, + continuous_metrics_to_display=continuous_metrics_to_display, + discrete_metrics_to_display=discrete_metrics_to_display, + ) panel_string = panel.dwg.tostring() if wrap_in_html: diff --git a/iris_validation/_defs.py b/iris_validation/_defs.py index c17c1a4..47713d8 100644 --- a/iris_validation/_defs.py +++ b/iris_validation/_defs.py @@ -1,211 +1,211 @@ -COLORS = { 'BLACK' : 'rgb(000, 000, 000)', - 'WHITE' : 'rgb(255, 255, 255)', +COLORS = { + "BLACK": "rgb(000, 000, 000)", + "WHITE": "rgb(255, 255, 255)", + "GREY": "rgb(050, 050, 050)", + "L_GREY": "rgb(150, 150, 150)", + "VL_GREY": "rgb(200, 200, 200)", + "RED": "rgb(200, 080, 080)", + "ORANGE": "rgb(250, 200, 050)", + "GREEN": "rgb(050, 200, 050)", + "BLUE": "rgb(050, 050, 200)", + "CYAN": "rgb(050, 200, 200)", + "TEAL": "rgb(000, 120, 120)", + "SLATE": "rgb(120, 160, 200)", + "MAGENTA": "rgb(200, 050, 200)", + "INDIGO": "rgb(080, 000, 120)", + "L_PINK": "rgb(255, 235, 235)", + "BAR_GREEN": "rgb(090, 237, 141)", + "BAR_ORANGE": "rgb(247, 212, 134)", + "BAR_RED": "rgb(240, 106, 111)", +} - 'GREY' : 'rgb(050, 050, 050)', - 'L_GREY' : 'rgb(150, 150, 150)', - 'VL_GREY' : 'rgb(200, 200, 200)', +CONTINUOUS_METRICS = ( + { + "id": 0, + "type": "continuous", + "long_name": "Average B-factor", + "short_name": "Avg. B", + "ring_color": COLORS["CYAN"], + "polarity": -1, + "is_covariance": False, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 1, + "type": "continuous", + "long_name": "Maximum B-factor", + "short_name": "Max. B", + "ring_color": COLORS["TEAL"], + "polarity": -1, + "is_covariance": False, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 2, + "type": "continuous", + "long_name": "Stdev B-factor", + "short_name": "Std. B", + "ring_color": COLORS["SLATE"], + "polarity": -1, + "is_covariance": False, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 3, + "type": "continuous", + "long_name": "Residue Fit", + "short_name": "Res. Fit", + "ring_color": COLORS["MAGENTA"], + "polarity": -1, + "is_covariance": False, + "is_molprobity": False, + "is_reflections": True, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 4, + "type": "continuous", + "long_name": "Main Chain Fit", + "short_name": "Main Fit", + "ring_color": COLORS["BLUE"], + "polarity": -1, + "is_covariance": False, + "is_molprobity": False, + "is_reflections": True, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 5, + "type": "continuous", + "long_name": "Side Chain Fit", + "short_name": "Side Fit", + "ring_color": COLORS["INDIGO"], + "polarity": -1, + "is_covariance": False, + "is_molprobity": False, + "is_reflections": True, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 6, + "type": "continuous", + "long_name": "Covariance Score", + "short_name": "Cov. Score", + "ring_color": COLORS["ORANGE"], + "polarity": -1, + "is_covariance": True, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 7, + "type": "continuous", + "long_name": "Ramachandran z-score", + "short_name": "Rama Z", + "ring_color": COLORS["RED"], + "polarity": 1, + "is_covariance": False, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": True, + "is_rama_classification": False, + }, +) - 'RED' : 'rgb(200, 080, 080)', - 'ORANGE' : 'rgb(250, 200, 050)', - 'GREEN' : 'rgb(050, 200, 050)', - - 'BLUE' : 'rgb(050, 050, 200)', - 'CYAN' : 'rgb(050, 200, 200)', - 'TEAL' : 'rgb(000, 120, 120)', - 'SLATE' : 'rgb(120, 160, 200)', - 'MAGENTA' : 'rgb(200, 050, 200)', - 'INDIGO' : 'rgb(080, 000, 120)', - 'L_PINK' : 'rgb(255, 235, 235)', - - 'BAR_GREEN' : 'rgb(090, 237, 141)', - 'BAR_ORANGE' : 'rgb(247, 212, 134)', - 'BAR_RED' : 'rgb(240, 106, 111)' - } - -CONTINUOUS_METRICS = ( { 'id' : 0, - 'type' : 'continuous', - 'long_name' : 'Average B-factor', - 'short_name' : 'Avg. B', - 'ring_color' : COLORS['CYAN'], - 'polarity' : -1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 1, - 'type' : 'continuous', - 'long_name' : 'Maximum B-factor', - 'short_name' : 'Max. B', - 'ring_color' : COLORS['TEAL'], - 'polarity' : -1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 2, - 'type' : 'continuous', - 'long_name' : 'Stdev B-factor', - 'short_name' : 'Std. B', - 'ring_color' : COLORS['SLATE'], - 'polarity' : -1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 3, - 'type' : 'continuous', - 'long_name' : 'Residue Fit', - 'short_name' : 'Res. Fit', - 'ring_color' : COLORS['MAGENTA'], - 'polarity' : -1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': True, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 4, - 'type' : 'continuous', - 'long_name' : 'Main Chain Fit', - 'short_name' : 'Main Fit', - 'ring_color' : COLORS['BLUE'], - 'polarity' : -1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': True, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 5, - 'type' : 'continuous', - 'long_name' : 'Side Chain Fit', - 'short_name' : 'Side Fit', - 'ring_color' : COLORS['INDIGO'], - 'polarity' : -1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': True, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 6, - 'type' : 'continuous', - 'long_name' : 'Covariance Score', - 'short_name' : 'Cov. Score', - 'ring_color' : COLORS['ORANGE'], - 'polarity' : -1, - 'is_covariance' : True, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 7, - 'type' : 'continuous', - 'long_name' : 'Ramachandran z-score', - 'short_name' : 'Rama Z', - 'ring_color' : COLORS['RED'], - 'polarity' : 1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': True, - 'is_rama_classification': False - } - ) - -DISCRETE_METRICS = ( { 'id' : 0, - 'type' : 'discrete', - 'long_name' : 'Rotamer Classification', - 'short_name' : 'Rota.', - 'ring_color' : COLORS['L_GREY'], - 'seq_colors' : (COLORS['RED'], - COLORS['ORANGE'], - COLORS['GREEN']), - 'seq_labels' : ('Outlier', - 'Allowed', - 'Favoured'), - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 1, - 'type' : 'discrete', - 'long_name' : 'Ramachandran Classification', - 'short_name' : 'Rama.', - 'ring_color' : COLORS['L_GREY'], - 'seq_colors' : (COLORS['RED'], - COLORS['ORANGE'], - COLORS['GREEN']), - 'seq_labels' : ('Outlier', - 'Allowed', - 'Favoured'), - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': True - }, - { 'id' : 2, - 'type' : 'discrete', - 'long_name' : 'Clash Indicator', - 'short_name' : 'Clashes', - 'ring_color' : COLORS['L_GREY'], - 'seq_colors' : (COLORS['RED'], - COLORS['ORANGE'], - COLORS['GREEN']), - 'seq_labels' : ('Multiple Clashes', - 'One Clash', - 'No Clashes'), - 'is_covariance' : False, - 'is_molprobity' : True, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False - }, - { 'id' : 3, - 'type' : 'discrete', - 'long_name' : 'Misalignment', - 'short_name' : 'CMO', - 'ring_color' : COLORS['L_GREY'], - 'seq_colors' : (COLORS['RED'], - COLORS['GREEN']), - 'seq_labels' : ('Misaligned', - 'Aligned'), - 'is_covariance' : True, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z': False, - 'is_rama_classification': False - } - ) +DISCRETE_METRICS = ( + { + "id": 0, + "type": "discrete", + "long_name": "Rotamer Classification", + "short_name": "Rota.", + "ring_color": COLORS["L_GREY"], + "seq_colors": (COLORS["RED"], COLORS["ORANGE"], COLORS["GREEN"]), + "seq_labels": ("Outlier", "Allowed", "Favoured"), + "is_covariance": False, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 1, + "type": "discrete", + "long_name": "Ramachandran Classification", + "short_name": "Rama.", + "ring_color": COLORS["L_GREY"], + "seq_colors": (COLORS["RED"], COLORS["ORANGE"], COLORS["GREEN"]), + "seq_labels": ("Outlier", "Allowed", "Favoured"), + "is_covariance": False, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": True, + }, + { + "id": 2, + "type": "discrete", + "long_name": "Clash Indicator", + "short_name": "Clashes", + "ring_color": COLORS["L_GREY"], + "seq_colors": (COLORS["RED"], COLORS["ORANGE"], COLORS["GREEN"]), + "seq_labels": ("Multiple Clashes", "One Clash", "No Clashes"), + "is_covariance": False, + "is_molprobity": True, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": False, + }, + { + "id": 3, + "type": "discrete", + "long_name": "Misalignment", + "short_name": "CMO", + "ring_color": COLORS["L_GREY"], + "seq_colors": (COLORS["RED"], COLORS["GREEN"]), + "seq_labels": ("Misaligned", "Aligned"), + "is_covariance": True, + "is_molprobity": False, + "is_reflections": False, + "is_rama_z": False, + "is_rama_classification": False, + }, +) CHAIN_VIEW_GAP_ANGLE = 0.35 RAMACHANDRAN_THRESHOLDS = (0.02, 0.002) -CHAIN_VIEW_RINGS = [ DISCRETE_METRICS[0], - DISCRETE_METRICS[1], - DISCRETE_METRICS[2], - DISCRETE_METRICS[3], - CONTINUOUS_METRICS[0], - CONTINUOUS_METRICS[1], - CONTINUOUS_METRICS[4], - CONTINUOUS_METRICS[5], - CONTINUOUS_METRICS[6], - CONTINUOUS_METRICS[7] ] +CHAIN_VIEW_RINGS = [ + DISCRETE_METRICS[0], + DISCRETE_METRICS[1], + DISCRETE_METRICS[2], + DISCRETE_METRICS[3], + CONTINUOUS_METRICS[0], + CONTINUOUS_METRICS[1], + CONTINUOUS_METRICS[4], + CONTINUOUS_METRICS[5], + CONTINUOUS_METRICS[6], + CONTINUOUS_METRICS[7], +] -RESIDUE_VIEW_BOXES = [ DISCRETE_METRICS[0], - DISCRETE_METRICS[1], - DISCRETE_METRICS[2], - DISCRETE_METRICS[3] ] +RESIDUE_VIEW_BOXES = [ + DISCRETE_METRICS[0], + DISCRETE_METRICS[1], + DISCRETE_METRICS[2], + DISCRETE_METRICS[3], +] -RESIDUE_VIEW_BARS = [ CONTINUOUS_METRICS[0], - CONTINUOUS_METRICS[5] ] +RESIDUE_VIEW_BARS = [CONTINUOUS_METRICS[0], CONTINUOUS_METRICS[5]] diff --git a/iris_validation/graphics/chain.py b/iris_validation/graphics/chain.py index e170fc1..0a3a401 100644 --- a/iris_validation/graphics/chain.py +++ b/iris_validation/graphics/chain.py @@ -8,16 +8,25 @@ class ChainView: - def __init__(self, data, chain_index, canvas_size=(1000, 1000), hidden=False): + def __init__( + self, + data, + chain_index, + canvas_size=(1000, 1000), + hidden=False, + ChainViewRings_inp=None, + ): self.data = data - print(data) + self.chain_view_rings = CHAIN_VIEW_RINGS + if ChainViewRings_inp: + self.chain_view_rings = ChainViewRings_inp self.chain_index = chain_index self.canvas_size = canvas_size self.hidden = hidden self.dwg = None self.cfa_cache = {} - self.num_rings = len(CHAIN_VIEW_RINGS) + self.num_rings = len(self.chain_view_rings) self.num_versions = self.data["num_versions"] self.num_segments = self.data["aligned_length"] self.center = (self.canvas_size[0] // 2, self.canvas_size[1] // 2) @@ -64,7 +73,7 @@ def _draw(self): ) # Draw data rings - for ring_id, ring_metric in enumerate(CHAIN_VIEW_RINGS): + for ring_id, ring_metric in enumerate(self.chain_view_rings): self._add_ring(ring_id, ring_metric) # Draw missing-data shade diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index ac27010..d0b113f 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -13,6 +13,8 @@ RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS, CHAIN_VIEW_GAP_ANGLE, + DISCRETE_METRICS, + CONTINUOUS_METRICS, ) @@ -22,10 +24,21 @@ class Panel: - def __init__(self, data, canvas_size=(1500, 1000)): + def __init__( + self, + data, + canvas_size=(1500, 1000), + continuous_metrics_to_display=None, + discrete_metrics_to_display=None, + ): self.data = data self.canvas_size = canvas_size - + self.chain_view_rings = CHAIN_VIEW_RINGS + if continuous_metrics_to_display: + self.chain_view_rings = self.get_chain_view_rings( + continuous_metrics_to_display, + discrete_metrics_to_display=discrete_metrics_to_display, + ) self.dwg = None self.javascript = None self.chain_views = None @@ -42,8 +55,12 @@ def __init__(self, data, canvas_size=(1500, 1000)): # TODO: Make this nicer def _verify_chosen_metrics(self): - global CHAIN_VIEW_RINGS, RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS - for metric_list in (CHAIN_VIEW_RINGS, RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS): + global RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS + for metric_list in ( + self.chain_view_rings, + RESIDUE_VIEW_BOXES, + RESIDUE_VIEW_BARS, + ): if not isinstance(metric_list, list): raise ValueError("Chosen metrics in the _defs.py file must be lists") for metric_index in reversed(range(len(metric_list))): @@ -104,7 +121,12 @@ def _generate_javascript(self): def _generate_subviews(self): self.chain_views = [] for chain_index, chain_data in enumerate(self.data): - chain_view = ChainView(chain_data, chain_index, hidden=chain_index > 0).dwg + chain_view = ChainView( + chain_data, + chain_index, + hidden=chain_index > 0, + ChainViewRings_inp=self.chain_view_rings, + ).dwg self.chain_views.append(chain_view) self.residue_view = ResidueView().dwg @@ -404,3 +426,25 @@ def _draw(self): "viewBox" ] = f"{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}" self.dwg.add(self.residue_view) + + def get_chain_view_rings( + self, continuous_metrics_to_display, discrete_metrics_to_display=None + ): + chain_view = [] + # add discrete types first + if discrete_metrics_to_display: + for metric_name in discrete_metrics_to_display: + for metric_info in DISCRETE_METRICS: + if metric_info["short_name"] == metric_name: + chain_view.append(metric_info) + break + else: + for metric_info in CHAIN_VIEW_RINGS: + if metric_info["type"] == "discrete": + chain_view.append(metric_info) + for metric_name in continuous_metrics_to_display: + for metric_info in CONTINUOUS_METRICS: + if metric_info["short_name"] == metric_name: + chain_view.append(metric_info) + break + return chain_view diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index bfd65f7..1351e26 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -223,7 +223,6 @@ def _get_tortoize_data(model_path, model_id=None, out_queue=None): if out_queue is not None: out_queue.put(("rama_z", model_id, rama_z_data)) - print(rama_z_data) return rama_z_data @@ -238,7 +237,6 @@ def metrics_model_series_from_files( model_json_paths=None, multiprocessing=True, ): - print(model_paths) try: if isinstance(model_paths, str): model_paths = [model_paths] @@ -257,7 +255,6 @@ def metrics_model_series_from_files( distpred_paths, model_json_paths, ] - print(path_lists) for i in range(1, len(path_lists)): if path_lists[i] is None: path_lists[i] = tuple([None for _ in model_paths]) diff --git a/iris_validation/metrics/model.py b/iris_validation/metrics/model.py index 22d5250..1c53a4d 100644 --- a/iris_validation/metrics/model.py +++ b/iris_validation/metrics/model.py @@ -18,7 +18,6 @@ def __init__( self.molprobity_data = molprobity_data self.reflections_data = reflections_data self.rama_z_data = rama_z_data - print(rama_z_data) self._index = -1 self.minimol_chains = list(mmol_model.model()) @@ -43,7 +42,6 @@ def __init__( None if self.density_scores is None else self.density_scores[chain_id] ) chain_rama_z_data = None if rama_z_data is None else rama_z_data[chain_id] - print(chain_rama_z_data) chain = MetricsChain( mmol_chain, self, diff --git a/iris_validation/metrics/series.py b/iris_validation/metrics/series.py index 878cc43..1191fd3 100644 --- a/iris_validation/metrics/series.py +++ b/iris_validation/metrics/series.py @@ -153,7 +153,6 @@ def get_raw_data(self): residue.covariance_score, residue.rama_z, ) - print(residue_continuous_values) residue_percentile_values = ( residue.avg_b_factor_percentile, residue.max_b_factor_percentile, From 10f16d2de5cc016981ccf03728abcc38d36044cc Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Wed, 12 Apr 2023 13:05:03 +0100 Subject: [PATCH 04/20] Add options to provide percentile data or similar --- iris_validation/__init__.py | 8 + iris_validation/graphics/js/constants.js | 1 + iris_validation/graphics/js/interaction.js | 17 +- iris_validation/graphics/panel.py | 36 +- iris_validation/graphics/residue.py | 480 ++++++++++++-------- iris_validation/metrics/__init__.py | 9 +- iris_validation/metrics/chain.py | 50 ++- iris_validation/metrics/model.py | 7 + iris_validation/metrics/reflections.py | 80 ++-- iris_validation/metrics/residue.py | 51 ++- iris_validation/metrics/series.py | 2 - iris_validation/utils.py | 483 +++++++++++++++------ 12 files changed, 838 insertions(+), 386 deletions(-) diff --git a/iris_validation/__init__.py b/iris_validation/__init__.py index 36b3a75..f758131 100644 --- a/iris_validation/__init__.py +++ b/iris_validation/__init__.py @@ -22,8 +22,12 @@ def generate_report( multiprocessing=True, latest_model_metrics_json=None, previous_model_metrics_json=None, + data_with_percentiles=None, # only works with model_metrics_json files discrete_metrics_to_display=None, continuous_metrics_to_display=None, + residue_bars_to_display=None, + percentile_bar_label=None, + percentile_bar_range=None, wrap_in_html=True, output_dir=None, ): @@ -43,6 +47,7 @@ def generate_report( run_molprobity, calculate_rama_z, model_json_paths, + data_with_percentiles, multiprocessing, ) model_series_data = model_series.get_raw_data() @@ -50,6 +55,9 @@ def generate_report( model_series_data, continuous_metrics_to_display=continuous_metrics_to_display, discrete_metrics_to_display=discrete_metrics_to_display, + residue_bars_to_display=residue_bars_to_display, + percentile_bar_label=percentile_bar_label, + percentile_bar_range=percentile_bar_range, ) panel_string = panel.dwg.tostring() diff --git a/iris_validation/graphics/js/constants.js b/iris_validation/graphics/js/constants.js index be00a52..c285d33 100644 --- a/iris_validation/graphics/js/constants.js +++ b/iris_validation/graphics/js/constants.js @@ -6,3 +6,4 @@ const boxColors = {box_colors}; const boxLabels = {box_labels}; const gapDegrees = {gap_degrees}; const chainSelectorColors = {chain_selector_colors}; +const bar_y_lim = {bar_y_lim}; diff --git a/iris_validation/graphics/js/interaction.js b/iris_validation/graphics/js/interaction.js index 993b222..889545f 100644 --- a/iris_validation/graphics/js/interaction.js +++ b/iris_validation/graphics/js/interaction.js @@ -97,8 +97,9 @@ function getResidueViewData() { let y = barChartsContainer.points.getItem(pointID).y; bccPoints.push([x, y]); }; + console.log(bccPoints); barOffsetY = bccPoints[2][1]; - barMultiplierY = -(bccPoints[2][1]-bccPoints[0][1]) / 100; + barMultiplierY = -(bccPoints[2][1]-bccPoints[0][1]) / (bar_y_lim[1]-bar_y_lim[0]); // Boxplot ranges for (var versionID = 0; versionID < modelData[selectedChain]['num_versions']; ++versionID) { @@ -120,14 +121,17 @@ function getResidueViewData() { let metricMax = Math.max.apply(null, allPercentileValues); let metricMean = mean(allPercentileValues); let metricStd = standardDeviation(allPercentileValues); - let metricLow = Math.max(0, metricMean-metricStd); - let metricHigh = Math.min(100, metricMean+metricStd); + let metricLow = Math.max(bar_y_lim[0], metricMean-metricStd); + let metricHigh = Math.min(bar_y_lim[1], metricMean+metricStd); let distributionValues = [ metricMin, metricMax, metricLow, metricMean, metricHigh ]; + console.log(distributionValues, barMultiplierY); let versionLineYs = [ ]; for (var valueID = 0; valueID < 5; ++valueID) { - let lineY = parseFloat((barOffsetY + barMultiplierY * distributionValues[valueID]).toFixed(1)); + let barValueLim = Math.max(0.0, distributionValues[valueID]-bar_y_lim[0]); + let lineY = parseFloat((barOffsetY + barMultiplierY * barValueLim).toFixed(1)); versionLineYs.push(lineY); }; + console.log(versionLineYs); barLineYs[versionID].push(versionLineYs); }; }; @@ -277,7 +281,9 @@ function updateSelectedResidue() { boxplots[barID].setAttribute('opacity', 1); let barValue = modelData[selectedChain]['percentile_values'][metricID][selectedVersion][selectedResidue]; // Set main line coordinates - barY = parseFloat((barOffsetY + barMultiplierY * barValue).toFixed(1)); + let barValueLim = Math.max(0.0, barValue-bar_y_lim[0]); + barY = parseFloat((barOffsetY + barMultiplierY * barValueLim).toFixed(1)); + //console.log(barY); barMainlines[barID].setAttribute('y1', barY); barMainlines[barID].setAttribute('y2', barY); // Set bar label text and position @@ -287,6 +293,7 @@ function updateSelectedResidue() { } else { barLabels[barID].setAttribute('y', barY+25); }; + console.log(barLabels); }; // Set summary text diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index d0b113f..723351c 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -30,6 +30,9 @@ def __init__( canvas_size=(1500, 1000), continuous_metrics_to_display=None, discrete_metrics_to_display=None, + residue_bars_to_display=None, + percentile_bar_label=None, + percentile_bar_range=None, ): self.data = data self.canvas_size = canvas_size @@ -39,6 +42,17 @@ def __init__( continuous_metrics_to_display, discrete_metrics_to_display=discrete_metrics_to_display, ) + self.residue_view_bars = RESIDUE_VIEW_BARS + if residue_bars_to_display is not None: + self.residue_view_bars = self.get_residue_view_bars(residue_bars_to_display) + if percentile_bar_label: + self.percentile_bar_label = percentile_bar_label + else: + self.percentile_bar_label = None + if percentile_bar_range: + self.percentile_bar_range = percentile_bar_range + else: + self.percentile_bar_range = [0, 100] self.dwg = None self.javascript = None self.chain_views = None @@ -55,11 +69,11 @@ def __init__( # TODO: Make this nicer def _verify_chosen_metrics(self): - global RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS + global RESIDUE_VIEW_BOXES for metric_list in ( self.chain_view_rings, RESIDUE_VIEW_BOXES, - RESIDUE_VIEW_BARS, + self.residue_view_bars, ): if not isinstance(metric_list, list): raise ValueError("Chosen metrics in the _defs.py file must be lists") @@ -93,7 +107,7 @@ def _verify_chosen_metrics(self): def _generate_javascript(self): json_data = json.dumps(self.data) num_chains = len(self.chain_ids) - bar_metric_ids = [metric["id"] for metric in RESIDUE_VIEW_BARS] + bar_metric_ids = [metric["id"] for metric in self.residue_view_bars] box_metric_ids = [metric["id"] for metric in RESIDUE_VIEW_BOXES] box_colors = json.dumps([metric["seq_colors"] for metric in RESIDUE_VIEW_BOXES]) box_labels = json.dumps([metric["seq_labels"] for metric in RESIDUE_VIEW_BOXES]) @@ -114,6 +128,7 @@ def _generate_javascript(self): box_labels=box_labels, gap_degrees=gap_degrees, chain_selector_colors=self.swtich_colors, + bar_y_lim=self.percentile_bar_range, ) self.javascript = js_constants + js_interation @@ -128,7 +143,11 @@ def _generate_subviews(self): ChainViewRings_inp=self.chain_view_rings, ).dwg self.chain_views.append(chain_view) - self.residue_view = ResidueView().dwg + self.residue_view = ResidueView( + ResidueViewBars_inp=self.residue_view_bars, + percentile_bar_label=self.percentile_bar_label, + percentile_bar_range=self.percentile_bar_range, + ).dwg def _draw(self): middle_gap = 30 @@ -448,3 +467,12 @@ def get_chain_view_rings( chain_view.append(metric_info) break return chain_view + + def get_residue_view_bars(self, residue_bars_to_display): + residue_view = [] + for metric_name in residue_bars_to_display: + for metric_info in CONTINUOUS_METRICS: + if metric_info["short_name"] == metric_name: + residue_view.append(metric_info) + break + return residue_view diff --git a/iris_validation/graphics/residue.py b/iris_validation/graphics/residue.py index b358b5e..abe283e 100644 --- a/iris_validation/graphics/residue.py +++ b/iris_validation/graphics/residue.py @@ -4,19 +4,34 @@ from iris_validation._defs import COLORS, RESIDUE_VIEW_BOXES, RESIDUE_VIEW_BARS -class ResidueView(): - def __init__(self, canvas_size=(400, 1000)): +class ResidueView: + def __init__( + self, + canvas_size=(400, 1000), + ResidueViewBars_inp=None, + percentile_bar_label=None, + percentile_bar_range=None, + ): self.canvas_size = canvas_size self.dwg = None - self.svg_id = 'iris-residue-view' - - self.box_names = [ metric['short_name'] for metric in RESIDUE_VIEW_BOXES ] - self.bar_names = [ metric['long_name'] for metric in RESIDUE_VIEW_BARS ] + self.svg_id = "iris-residue-view" + self.residue_view_bars = RESIDUE_VIEW_BARS + if ResidueViewBars_inp: + self.residue_view_bars = ResidueViewBars_inp + self.box_names = [metric["short_name"] for metric in RESIDUE_VIEW_BOXES] + self.bar_names = [metric["long_name"] for metric in self.residue_view_bars] # TODO: allow any number of bars self.bar_names = self.bar_names[:2] + self.percentile_bar_label = "Percentiles" + if percentile_bar_label: + self.percentile_bar_label = percentile_bar_label + if percentile_bar_range: + self.percentile_bar_range = percentile_bar_range + else: + self.percentile_bar_range = [0, 100] self._draw() def _draw(self): @@ -24,198 +39,309 @@ def _draw(self): left_indent = 35 # Initialise drawing - self.dwg = svgwrite.Drawing(profile='full') + self.dwg = svgwrite.Drawing(profile="full") # Set HTML attributes - self.dwg.attribs['viewBox'] = '0 0 ' + ' '.join([ str(x) for x in self.canvas_size ]) - self.dwg.attribs['id'] = self.svg_id + self.dwg.attribs["viewBox"] = "0 0 " + " ".join( + [str(x) for x in self.canvas_size] + ) + self.dwg.attribs["id"] = self.svg_id # Draw background - self.dwg.add(self.dwg.polygon(points=[ (0, 0), - (0, self.canvas_size[1]), - (self.canvas_size[0], self.canvas_size[1]), - (self.canvas_size[0], 0) ], - fill=COLORS['WHITE'], - fill_opacity=1, - stroke_opacity=0)) + self.dwg.add( + self.dwg.polygon( + points=[ + (0, 0), + (0, self.canvas_size[1]), + (self.canvas_size[0], self.canvas_size[1]), + (self.canvas_size[0], 0), + ], + fill=COLORS["WHITE"], + fill_opacity=1, + stroke_opacity=0, + ) + ) # Boxes for box_id, box_title in enumerate(self.box_names): - box_bounds = [ 0.25*self.canvas_size[0]+left_indent, - top_margin + 70*box_id, - self.canvas_size[0], - top_margin + 70*box_id + 50 ] - - self.dwg.add(self.dwg.polygon(points=[ (left_indent, box_bounds[1]), - (box_bounds[0], box_bounds[1]), - (box_bounds[0], box_bounds[3]), - (left_indent, box_bounds[3]) ], - fill=COLORS['VL_GREY'], - fill_opacity=0.5, - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=1)) - self.dwg.add(self.dwg.polygon(points=[ (box_bounds[0], box_bounds[1]), - (box_bounds[2], box_bounds[1]), - (box_bounds[2], box_bounds[3]), - (box_bounds[0], box_bounds[3]) ], - fill=COLORS['VL_GREY'], - fill_opacity=0.75, - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=1, - id=f'{self.svg_id}-box-{box_id}')) - self.dwg.add(self.dwg.text('', - insert=((box_bounds[0]+box_bounds[2])/2, (box_bounds[1]+box_bounds[3])/2), - font_size=20, - font_family='Arial', - font_weight='bold', - fill=COLORS['BLACK'], - fill_opacity=1, - text_anchor='middle', - alignment_baseline='central', - id=f'{self.svg_id}-box-{box_id}-text')) - self.dwg.add(self.dwg.text(box_title, - insert=(left_indent + 0.125*self.canvas_size[0], (box_bounds[1]+box_bounds[3])/2), - font_size=18, - font_family='Arial', - fill=COLORS['BLACK'], - fill_opacity=1, - text_anchor='middle', - alignment_baseline='central')) + box_bounds = [ + 0.25 * self.canvas_size[0] + left_indent, + top_margin + 70 * box_id, + self.canvas_size[0], + top_margin + 70 * box_id + 50, + ] + + self.dwg.add( + self.dwg.polygon( + points=[ + (left_indent, box_bounds[1]), + (box_bounds[0], box_bounds[1]), + (box_bounds[0], box_bounds[3]), + (left_indent, box_bounds[3]), + ], + fill=COLORS["VL_GREY"], + fill_opacity=0.5, + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=1, + ) + ) + self.dwg.add( + self.dwg.polygon( + points=[ + (box_bounds[0], box_bounds[1]), + (box_bounds[2], box_bounds[1]), + (box_bounds[2], box_bounds[3]), + (box_bounds[0], box_bounds[3]), + ], + fill=COLORS["VL_GREY"], + fill_opacity=0.75, + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=1, + id=f"{self.svg_id}-box-{box_id}", + ) + ) + self.dwg.add( + self.dwg.text( + "", + insert=( + (box_bounds[0] + box_bounds[2]) / 2, + (box_bounds[1] + box_bounds[3]) / 2, + ), + font_size=20, + font_family="Arial", + font_weight="bold", + fill=COLORS["BLACK"], + fill_opacity=1, + text_anchor="middle", + alignment_baseline="central", + id=f"{self.svg_id}-box-{box_id}-text", + ) + ) + self.dwg.add( + self.dwg.text( + box_title, + insert=( + left_indent + 0.125 * self.canvas_size[0], + (box_bounds[1] + box_bounds[3]) / 2, + ), + font_size=18, + font_family="Arial", + fill=COLORS["BLACK"], + fill_opacity=1, + text_anchor="middle", + alignment_baseline="central", + ) + ) # Bars bar_width = 120 - bar_charts_bounds = (left_indent, - 70*len(self.box_names)+30, - self.canvas_size[0], - self.canvas_size[1]-60) - + bar_charts_bounds = ( + left_indent, + 70 * len(self.box_names) + 30, + self.canvas_size[0], + self.canvas_size[1] - 60, + ) # Bar chart container - self.dwg.add(self.dwg.polygon(points=[ (bar_charts_bounds[0], bar_charts_bounds[1]), - (bar_charts_bounds[2], bar_charts_bounds[1]), - (bar_charts_bounds[2], bar_charts_bounds[3]), - (bar_charts_bounds[0], bar_charts_bounds[3]) ], - fill=COLORS['WHITE'], - fill_opacity=0, - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=1, - id=f'{self.svg_id}-bar-charts-container')) + self.dwg.add( + self.dwg.polygon( + points=[ + (bar_charts_bounds[0], bar_charts_bounds[1]), + (bar_charts_bounds[2], bar_charts_bounds[1]), + (bar_charts_bounds[2], bar_charts_bounds[3]), + (bar_charts_bounds[0], bar_charts_bounds[3]), + ], + fill=COLORS["WHITE"], + fill_opacity=0, + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=1, + id=f"{self.svg_id}-bar-charts-container", + ) + ) # Bar chart axis - for label_id in range(10+1): - height = bar_charts_bounds[1] + label_id*(bar_charts_bounds[3]-bar_charts_bounds[1])/10 - self.dwg.add(self.dwg.line((bar_charts_bounds[0]-5, height), (bar_charts_bounds[0]+5, height), - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=1)) - self.dwg.add(self.dwg.text(str(100-label_id*10), - insert=(bar_charts_bounds[0]-8, height+5), - font_size=18, - font_family='Arial', - fill=COLORS['BLACK'], - fill_opacity=1, - text_anchor='end', - alignment_baseline='central')) + label_step = int( + (self.percentile_bar_range[1] - self.percentile_bar_range[0]) / 10.0 + ) + for label_id in range(10 + 1): + height = ( + bar_charts_bounds[1] + + label_id * (bar_charts_bounds[3] - bar_charts_bounds[1]) / 10 + ) + self.dwg.add( + self.dwg.line( + (bar_charts_bounds[0] - 5, height), + (bar_charts_bounds[0] + 5, height), + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=1, + ) + ) + self.dwg.add( + self.dwg.text( + str(self.percentile_bar_range[1] - label_id * label_step), + insert=(bar_charts_bounds[0] - 8, height + 5), + font_size=18, + font_family="Arial", + fill=COLORS["BLACK"], + fill_opacity=1, + text_anchor="end", + alignment_baseline="central", + ) + ) # Bar chart bottom label - self.dwg.add(self.dwg.text('Percentiles', - insert=(self.canvas_size[0]/2, bar_charts_bounds[3]+50), - font_size=18, - font_family='Arial', - fill=COLORS['BLACK'], - fill_opacity=1, - text_anchor='middle', - alignment_baseline='central')) + self.dwg.add( + self.dwg.text( + self.percentile_bar_label, + insert=(self.canvas_size[0] / 2, bar_charts_bounds[3] + 50), + font_size=18, + font_family="Arial", + fill=COLORS["BLACK"], + fill_opacity=1, + text_anchor="middle", + alignment_baseline="central", + ) + ) bar_chart_width = bar_charts_bounds[2] - bar_charts_bounds[0] for bar_id, bar_name in enumerate(self.bar_names): - bar_x = bar_charts_bounds[0] + (bar_chart_width * (2*bar_id+1)/4) - + bar_x = bar_charts_bounds[0] + (bar_chart_width * (2 * bar_id + 1) / 4) # Bar label - self.dwg.add(self.dwg.text(bar_name, - insert=(bar_x, bar_charts_bounds[3]+25), - font_size=18, - font_family='Arial', - fill=COLORS['BLACK'], - fill_opacity=1, - text_anchor='middle', - alignment_baseline='central')) + self.dwg.add( + self.dwg.text( + bar_name, + insert=(bar_x, bar_charts_bounds[3] + 25), + font_size=18, + font_family="Arial", + fill=COLORS["BLACK"], + fill_opacity=1, + text_anchor="middle", + alignment_baseline="central", + ) + ) # Bar - self.dwg.add(self.dwg.polygon(points=[ (bar_x-bar_width//2, bar_charts_bounds[3]), - (bar_x-bar_width//2, bar_charts_bounds[1]), - (bar_x+bar_width//2, bar_charts_bounds[1]), - (bar_x+bar_width//2, bar_charts_bounds[3]) ], - fill=COLORS['VL_GREY'], - fill_opacity=0.5, - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=1)) + self.dwg.add( + self.dwg.polygon( + points=[ + (bar_x - bar_width // 2, bar_charts_bounds[3]), + (bar_x - bar_width // 2, bar_charts_bounds[1]), + (bar_x + bar_width // 2, bar_charts_bounds[1]), + (bar_x + bar_width // 2, bar_charts_bounds[3]), + ], + fill=COLORS["VL_GREY"], + fill_opacity=0.5, + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=1, + ) + ) # Box plot - box_plot_group = self.dwg.g(id=f'{self.svg_id}-boxplot-{bar_id}', opacity=0) - box_plot_group.add(self.dwg.polygon(points=[ (bar_x-bar_width//2, bar_charts_bounds[3]), - (bar_x-bar_width//2, bar_charts_bounds[1]), - (bar_x+bar_width//2, bar_charts_bounds[1]), - (bar_x+bar_width//2, bar_charts_bounds[3]) ], - fill=COLORS['WHITE'], - fill_opacity=1, - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=1)) - box_plot_group.add(self.dwg.polygon(points=[ (bar_x-bar_width//2, bar_charts_bounds[1]+80), - (bar_x-bar_width//2, bar_charts_bounds[3]-80), - (bar_x+bar_width//2, bar_charts_bounds[3]-80), - (bar_x+bar_width//2, bar_charts_bounds[1]+80) ], - fill=f'url(#{self.svg_id}-gradient-{bar_id})', - fill_opacity=0.8, - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=0.5, - id=f'{self.svg_id}-boxplot-{bar_id}-box')) - - gradient = LinearGradient(start=(0, 0), end=(0,1), id=f'{self.svg_id}-gradient-{bar_id}') - gradient.add_stop_color(offset='0%', color=COLORS['BAR_GREEN']) - gradient.add_stop_color(offset='50%', color=COLORS['BAR_ORANGE']) - gradient.add_stop_color(offset='100%', color=COLORS['BAR_RED']) + box_plot_group = self.dwg.g(id=f"{self.svg_id}-boxplot-{bar_id}", opacity=0) + box_plot_group.add( + self.dwg.polygon( + points=[ + (bar_x - bar_width // 2, bar_charts_bounds[3]), + (bar_x - bar_width // 2, bar_charts_bounds[1]), + (bar_x + bar_width // 2, bar_charts_bounds[1]), + (bar_x + bar_width // 2, bar_charts_bounds[3]), + ], + fill=COLORS["WHITE"], + fill_opacity=1, + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=1, + ) + ) + box_plot_group.add( + self.dwg.polygon( + points=[ + (bar_x - bar_width // 2, bar_charts_bounds[1] + 80), + (bar_x - bar_width // 2, bar_charts_bounds[3] - 80), + (bar_x + bar_width // 2, bar_charts_bounds[3] - 80), + (bar_x + bar_width // 2, bar_charts_bounds[1] + 80), + ], + fill=f"url(#{self.svg_id}-gradient-{bar_id})", + fill_opacity=0.8, + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=0.5, + id=f"{self.svg_id}-boxplot-{bar_id}-box", + ) + ) + + gradient = LinearGradient( + start=(0, 0), end=(0, 1), id=f"{self.svg_id}-gradient-{bar_id}" + ) + gradient.add_stop_color(offset="0%", color=COLORS["BAR_GREEN"]) + gradient.add_stop_color(offset="50%", color=COLORS["BAR_ORANGE"]) + gradient.add_stop_color(offset="100%", color=COLORS["BAR_RED"]) self.dwg.defs.add(gradient) - box_plot_group.add(self.dwg.line((bar_x-bar_width//2, bar_charts_bounds[1]+200), - (bar_x+bar_width//2, bar_charts_bounds[1]+200), - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=0.5, - stroke_dasharray=2, - id=f'{self.svg_id}-boxplot-{bar_id}-line-high')) - box_plot_group.add(self.dwg.line((bar_x-bar_width//2, (bar_charts_bounds[1]+bar_charts_bounds[3])//2), - (bar_x+bar_width//2, (bar_charts_bounds[1]+bar_charts_bounds[3])//2), - stroke=COLORS['BLACK'], - stroke_width=3, - stroke_opacity=0.8, - stroke_dasharray=5, - id=f'{self.svg_id}-boxplot-{bar_id}-line-mid')) - box_plot_group.add(self.dwg.line((bar_x-bar_width//2, bar_charts_bounds[3]-200), - (bar_x+bar_width//2, bar_charts_bounds[3]-200), - stroke=COLORS['BLACK'], - stroke_width=2, - stroke_opacity=0.5, - stroke_dasharray=2, - id=f'{self.svg_id}-boxplot-{bar_id}-line-low')) - box_plot_group.add(self.dwg.line((bar_x-bar_width//2, bar_charts_bounds[3]), - (bar_x+bar_width//2, bar_charts_bounds[3]), - fill_opacity=0, - stroke=COLORS['BLACK'], - stroke_width=4, - stroke_opacity=1, - id=f'{self.svg_id}-bar-{bar_id}-mainline')) - box_plot_group.add(self.dwg.text('', - insert=(bar_x, bar_charts_bounds[3]), - font_size=20, - font_family='Arial', - font_weight='bold', - fill=COLORS['BLACK'], - fill_opacity=1, - text_anchor='middle', - alignment_baseline='central', - id=f'{self.svg_id}-bar-{bar_id}-label')) + box_plot_group.add( + self.dwg.line( + (bar_x - bar_width // 2, bar_charts_bounds[1] + 200), + (bar_x + bar_width // 2, bar_charts_bounds[1] + 200), + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=0.5, + stroke_dasharray=2, + id=f"{self.svg_id}-boxplot-{bar_id}-line-high", + ) + ) + box_plot_group.add( + self.dwg.line( + ( + bar_x - bar_width // 2, + (bar_charts_bounds[1] + bar_charts_bounds[3]) // 2, + ), + ( + bar_x + bar_width // 2, + (bar_charts_bounds[1] + bar_charts_bounds[3]) // 2, + ), + stroke=COLORS["BLACK"], + stroke_width=3, + stroke_opacity=0.8, + stroke_dasharray=5, + id=f"{self.svg_id}-boxplot-{bar_id}-line-mid", + ) + ) + box_plot_group.add( + self.dwg.line( + (bar_x - bar_width // 2, bar_charts_bounds[3] - 200), + (bar_x + bar_width // 2, bar_charts_bounds[3] - 200), + stroke=COLORS["BLACK"], + stroke_width=2, + stroke_opacity=0.5, + stroke_dasharray=2, + id=f"{self.svg_id}-boxplot-{bar_id}-line-low", + ) + ) + box_plot_group.add( + self.dwg.line( + (bar_x - bar_width // 2, bar_charts_bounds[3]), + (bar_x + bar_width // 2, bar_charts_bounds[3]), + fill_opacity=0, + stroke=COLORS["BLACK"], + stroke_width=4, + stroke_opacity=1, + id=f"{self.svg_id}-bar-{bar_id}-mainline", + ) + ) + box_plot_group.add( + self.dwg.text( + "", + insert=(bar_x, bar_charts_bounds[3]), + font_size=20, + font_family="Arial", + font_weight="bold", + fill=COLORS["BLACK"], + fill_opacity=1, + text_anchor="middle", + alignment_baseline="central", + id=f"{self.svg_id}-bar-{bar_id}-label", + ) + ) self.dwg.add(box_plot_group) diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index 78c35fb..5b48068 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -236,6 +236,7 @@ def metrics_model_series_from_files( run_molprobity=False, calculate_rama_z=False, model_json_paths=None, + data_with_percentiles=None, multiprocessing=True, ): try: @@ -273,6 +274,7 @@ def metrics_model_series_from_files( all_molprobity_data = [] all_reflections_data = [] all_rama_z_data = [] + all_bfactor_data = [] # if externally supplied num_queued = 0 results_queue = Queue() check_resnum = False @@ -292,6 +294,7 @@ def metrics_model_series_from_files( molprobity_data = None reflections_data = None rama_z_data = None + bfactor_data = None if json_data_path: check_resnum = True @@ -304,6 +307,8 @@ def metrics_model_series_from_files( rama_z_data = json_data["rama_z"] if metric == "map_fit": reflections_data = json_data["map_fit"] + if metric == "b_fact": + bfactor_data = json_data["b_fact"] if run_covariance: if multiprocessing: p = Process( @@ -356,6 +361,7 @@ def metrics_model_series_from_files( all_molprobity_data.append(molprobity_data) all_reflections_data.append(reflections_data) all_rama_z_data.append(rama_z_data) + all_bfactor_data.append(bfactor_data) if multiprocessing: for _ in range(num_queued): @@ -376,9 +382,10 @@ def metrics_model_series_from_files( all_molprobity_data, all_reflections_data, all_rama_z_data, + all_bfactor_data, ) ): - metrics_model = MetricsModel(*model_data, check_resnum) + metrics_model = MetricsModel(*model_data, check_resnum, data_with_percentiles) metrics_models.append(metrics_model) metrics_model_series = MetricsModelSeries(metrics_models) diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index 31c9281..b6644af 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -10,7 +10,9 @@ def __init__( molprobity_data=None, density_scores=None, rama_z_data=None, + bfactor_data=None, check_resnum=False, + data_with_percentiles=None, ): self.minimol_chain = mmol_chain self.parent_model = parent_model @@ -23,7 +25,7 @@ def __init__( self.residues = [] self.length = len(mmol_chain) self.chain_id = str(mmol_chain.id().trim()) - + dict_ext_percentiles = {} # stores the percentiles supplied externally for residue_index, mmol_residue in enumerate(mmol_chain): previous_residue = ( mmol_chain[residue_index - 1] if residue_index > 0 else None @@ -43,10 +45,7 @@ def __init__( try: residue_covariance_data = covariance_data[res_id] except KeyError: - try: - residue_covariance_data = covariance_data[mmol_residue.id()] - except KeyError: - residue_covariance_data = None + residue_covariance_data = None else: residue_covariance_data = covariance_data[seq_num] # molprobity @@ -57,10 +56,7 @@ def __init__( try: residue_molprobity_data = molprobity_data[res_id] except KeyError: - try: - residue_molprobity_data = molprobity_data[mmol_residue.id()] - except KeyError: - residue_molprobity_data = None + residue_molprobity_data = None else: residue_molprobity_data = molprobity_data[seq_num] # density scores @@ -68,11 +64,15 @@ def __init__( residue_density_scores = None else: if check_resnum: - try: - residue_density_scores = density_scores[res_id] - except KeyError: + if data_with_percentiles and "map_fit" in data_with_percentiles: + try: + residue_density_scores = density_scores[res_id][0] + dict_ext_percentiles["map_fit"] = density_scores[res_id][-1] + except KeyError: + residue_density_scores = None + else: try: - residue_density_scores = density_scores[mmol_residue.id()] + residue_density_scores = density_scores[res_id] except KeyError: residue_density_scores = None else: @@ -85,13 +85,27 @@ def __init__( try: residue_rama_z_score = rama_z_data[res_id] except KeyError: + residue_rama_z_score = None + else: + residue_rama_z_score = rama_z_data.get(seq_num, None) + # ext b-factor + if bfactor_data is None: + residue_bfact_score = None + else: + if check_resnum: + if data_with_percentiles and "b-factor" in data_with_percentiles: try: - residue_rama_z_score = rama_z_data[mmol_residue.id()] + residue_bfact_score = bfactor_data[res_id][0] + dict_ext_percentiles["b-factor"] = bfactor_data[res_id][-1] except KeyError: - residue_rama_z_score = None + residue_bfact_score = None + else: + try: + residue_bfact_score = bfactor_data[res_id] + except KeyError: + residue_bfact_score = None else: - residue_rama_z_score = rama_z_data.get(seq_num, None) - + residue_bfact_score = bfactor_data[seq_num] residue = MetricsResidue( mmol_residue, residue_index, @@ -102,6 +116,8 @@ def __init__( residue_molprobity_data, residue_density_scores, residue_rama_z_score, + residue_bfact_score, + dict_ext_percentiles, ) self.residues.append(residue) diff --git a/iris_validation/metrics/model.py b/iris_validation/metrics/model.py index 1c53a4d..5a0952d 100644 --- a/iris_validation/metrics/model.py +++ b/iris_validation/metrics/model.py @@ -11,7 +11,9 @@ def __init__( molprobity_data=None, reflections_data=None, rama_z_data=None, + bfactor_data=None, check_resnum=False, + data_with_percentiles=None, ): self.minimol_model = mmol_model self.covariance_data = covariance_data @@ -42,6 +44,9 @@ def __init__( None if self.density_scores is None else self.density_scores[chain_id] ) chain_rama_z_data = None if rama_z_data is None else rama_z_data[chain_id] + chain_bfactor_data = ( + None if bfactor_data is None else bfactor_data[chain_id] + ) chain = MetricsChain( mmol_chain, self, @@ -49,7 +54,9 @@ def __init__( chain_molprobity_data, chain_density_scores, chain_rama_z_data, + chain_bfactor_data, check_resnum=check_resnum, + data_with_percentiles=data_with_percentiles, ) chain.remove_non_aa_residues() self.chains.append(chain) diff --git a/iris_validation/metrics/reflections.py b/iris_validation/metrics/reflections.py index 1d30cff..2ef107a 100644 --- a/iris_validation/metrics/reflections.py +++ b/iris_validation/metrics/reflections.py @@ -6,7 +6,7 @@ from iris_validation import utils -class ReflectionsHandler(): +class ReflectionsHandler: def __init__(self, f_reflections=None, xmap=None, minimol=None): self.f_reflections = f_reflections self.xmap = xmap @@ -21,7 +21,9 @@ def __init__(self, f_reflections=None, xmap=None, minimol=None): if f_reflections is None: if xmap is None: - raise ValueError('Either a reflections file path or an xmap object must be passed as an argument') + raise ValueError( + "Either a reflections file path or an xmap object must be passed as an argument" + ) try: self.grid = xmap.grid except AttributeError: @@ -29,12 +31,16 @@ def __init__(self, f_reflections=None, xmap=None, minimol=None): self.spacegroup = xmap.spacegroup self.cell = xmap.cell else: - extension = f_reflections.split('.')[-1].lower() - if extension != 'mtz': - if extension == 'cif': - raise ValueError('mmCIF format is not currently supported for reflections data.') + extension = f_reflections.split(".")[-1].lower() + if extension != "mtz": + if extension == "cif": + raise ValueError( + "mmCIF format is not currently supported for reflections data." + ) else: - raise ValueError(f'Reflections file has unrecognised extension: {extension}') + raise ValueError( + f"Reflections file has unrecognised extension: {extension}" + ) self._load_hkl_data() self._calculate_structure_factors() self._generate_xmap() @@ -45,24 +51,30 @@ def _load_hkl_data(self): mtzin.open_read(self.f_reflections) mtzin.import_hkl_info(self.hkl) - mtz_labels_and_types = [ tuple(str(line).strip().split(' ')) for line in mtzin.column_labels() ] + mtz_labels_and_types = [ + tuple(str(line).strip().split(" ")) for line in mtzin.column_labels() + ] mtz_column_labels, _ = zip(*mtz_labels_and_types) - mtz_column_label_suffixes = set([ label.split('/')[-1] for label in mtz_column_labels ]) + mtz_column_label_suffixes = set( + [label.split("/")[-1] for label in mtz_column_labels] + ) # TODO: need a better way to choose the right headers import_complete = False - for suffix_pair in ( ('F', 'SIGF'), - ('FP', 'SIGFP'), - ('FP_ALL', 'SIGFP_ALL') ): + for suffix_pair in (("F", "SIGF"), ("FP", "SIGFP"), ("FP_ALL", "SIGFP_ALL")): if len(mtz_column_label_suffixes & set(suffix_pair)) == 2: try: self.f_sigf = clipper.HKL_data_F_sigF_float(self.hkl) - mtzin.import_hkl_data(self.f_sigf, '/*/*/[' + ','.join(suffix_pair) + ']') + mtzin.import_hkl_data( + self.f_sigf, "/*/*/[" + ",".join(suffix_pair) + "]" + ) import_complete = True break except Exception as exception: - raise Exception('Failed to import HKL data from reflections file') from exception + raise Exception( + "Failed to import HKL data from reflections file" + ) from exception if not import_complete: - raise ValueError('Reflections file does not contain the required columns') + raise ValueError("Reflections file does not contain the required columns") mtzin.close_read() spacegroup = self.hkl.spacegroup() @@ -75,11 +87,15 @@ def _load_hkl_data(self): self.resolution_limit = resolution.limit() def _calculate_structure_factors(self, bulk_solvent=True): - #self.crystal = clipper.MTZcrystal() - #self.f_phi = clipper.HKL_data_F_phi_float(self.hkl, self.crystal) + # self.crystal = clipper.MTZcrystal() + # self.f_phi = clipper.HKL_data_F_phi_float(self.hkl, self.crystal) self.f_phi = clipper.HKL_data_F_phi_float(self.hkl) atoms = self.minimol.atom_list() - sf_calc = clipper.SFcalc_obs_bulk_float if bulk_solvent else clipper.SFcalc_obs_base_float + sf_calc = ( + clipper.SFcalc_obs_bulk_float + if bulk_solvent + else clipper.SFcalc_obs_base_float + ) sf_calc(self.f_phi, self.f_sigf, atoms) def _generate_xmap(self): @@ -109,13 +125,17 @@ def get_density_at_atom(self, mmol_atom): return self.get_density_at_point(xyz) def calculate_all_density_scores(self): - density_scores = { } + density_scores = {} for chain in self.minimol: chain_id = str(chain.id()).strip() - density_scores[chain_id] = { } + density_scores[chain_id] = {} for residue in chain: seq_num = int(residue.seqnum()) - all_atom_scores, mainchain_atom_scores, sidechain_atom_scores = [ ], [ ], [ ] + all_atom_scores, mainchain_atom_scores, sidechain_atom_scores = ( + [], + [], + [], + ) for atom_id, atom in enumerate(residue): is_mainchain = str(atom.name()).strip() in utils.MC_ATOM_NAMES element = str(atom.element()).strip() @@ -123,7 +143,9 @@ def calculate_all_density_scores(self): density = self.get_density_at_atom(atom) atom_score = None density_norm = density / atomic_number - atom_score = -log(norm.cdf((density_norm - self.map_mean) / self.map_std)) + atom_score = -log( + norm.cdf((density_norm - self.map_mean) / self.map_std) + ) all_atom_scores.append(atom_score) if is_mainchain: mainchain_atom_scores.append(atom_score) @@ -133,8 +155,16 @@ def calculate_all_density_scores(self): if len(all_atom_scores) > 0: all_score = sum(all_atom_scores) / len(all_atom_scores) if len(mainchain_atom_scores) > 0: - mainchain_score = sum(mainchain_atom_scores) / len(mainchain_atom_scores) + mainchain_score = sum(mainchain_atom_scores) / len( + mainchain_atom_scores + ) if len(sidechain_atom_scores) > 0: - sidechain_score = sum(sidechain_atom_scores) / len(sidechain_atom_scores) - density_scores[chain_id][seq_num] = (all_score, mainchain_score, sidechain_score) + sidechain_score = sum(sidechain_atom_scores) / len( + sidechain_atom_scores + ) + density_scores[chain_id][seq_num] = ( + all_score, + mainchain_score, + sidechain_score, + ) return density_scores diff --git a/iris_validation/metrics/residue.py b/iris_validation/metrics/residue.py index 387de7d..865f5ed 100644 --- a/iris_validation/metrics/residue.py +++ b/iris_validation/metrics/residue.py @@ -18,6 +18,8 @@ def __init__( molprobity_data=None, density_scores=None, rama_z_score=None, + bfact_score=None, + dict_ext_percentiles=None, ): self.minimol_residue = mmol_residue self.initialised_with_context = index_in_chain is not None @@ -53,8 +55,12 @@ def __init__( self.mc_b_factor, self.sc_b_factor, ) = utils.analyse_b_factors(mmol_residue, self.is_aa, self.backbone_atoms) + # override precalculated + if bfact_score: + self.avg_b_factor, self.std_b_factor = bfact_score # Backbone torsion angles + self.phi = ( clipper.MMonomer.protein_ramachandran_phi( self.previous_residue, mmol_residue @@ -166,24 +172,39 @@ def __init__( ) = self.density_scores # Percentiles percentile_calculator = self.parent_chain.parent_model.percentile_calculator - self.avg_b_factor_percentile = percentile_calculator.get_percentile( - 0, self.avg_b_factor - ) + if "b-factor" in dict_ext_percentiles: + self.avg_b_factor_percentile = dict_ext_percentiles["b-factor"][0] + else: + self.avg_b_factor_percentile = percentile_calculator.get_percentile( + 0, self.avg_b_factor + ) self.max_b_factor_percentile = percentile_calculator.get_percentile( 1, self.max_b_factor ) - self.std_b_factor_percentile = percentile_calculator.get_percentile( - 2, self.std_b_factor - ) - self.fit_score_percentile = percentile_calculator.get_percentile( - 3, self.fit_score - ) - self.mainchain_fit_score_percentile = percentile_calculator.get_percentile( - 4, self.mainchain_fit_score - ) - self.sidechain_fit_score_percentile = percentile_calculator.get_percentile( - 5, self.sidechain_fit_score - ) + if "b-factor" in dict_ext_percentiles: + self.std_b_factor_percentile = dict_ext_percentiles["b-factor"][1] + else: + self.std_b_factor_percentile = percentile_calculator.get_percentile( + 2, self.std_b_factor + ) + if "map_fit" in dict_ext_percentiles: + self.fit_score_percentile = dict_ext_percentiles["map_fit"][0] + else: + self.fit_score_percentile = percentile_calculator.get_percentile( + 3, self.fit_score + ) + if "map_fit" in dict_ext_percentiles: + self.mainchain_fit_score_percentile = dict_ext_percentiles["map_fit"][1] + else: + self.mainchain_fit_score_percentile = percentile_calculator.get_percentile( + 4, self.mainchain_fit_score + ) + if "map_fit" in dict_ext_percentiles: + self.sidechain_fit_score_percentile = dict_ext_percentiles["map_fit"][2] + else: + self.sidechain_fit_score_percentile = percentile_calculator.get_percentile( + 5, self.sidechain_fit_score + ) self.covariance_score_percentile = percentile_calculator.get_percentile( 6, self.covariance_score ) diff --git a/iris_validation/metrics/series.py b/iris_validation/metrics/series.py index 1191fd3..11c65a5 100644 --- a/iris_validation/metrics/series.py +++ b/iris_validation/metrics/series.py @@ -162,7 +162,6 @@ def get_raw_data(self): residue.sidechain_fit_score_percentile, residue.covariance_score_percentile, ) - residue_continuous_values = tuple( round(x, 3) if isinstance(x, float) else x for x in residue_continuous_values @@ -180,7 +179,6 @@ def get_raw_data(self): chain_data["discrete_values"].append(discrete_values) chain_data["continuous_values"].append(continuous_values) chain_data["percentile_values"].append(percentile_values) - chain_data["discrete_values"] = list(zip(*chain_data["discrete_values"])) chain_data["continuous_values"] = list( zip(*chain_data["continuous_values"]) diff --git a/iris_validation/utils.py b/iris_validation/utils.py index d69fea6..054e7b4 100644 --- a/iris_validation/utils.py +++ b/iris_validation/utils.py @@ -3,74 +3,224 @@ import clipper -THREE_LETTER_CODES = { 0 : [ 'ALA', 'GLY', 'VAL', 'LEU', 'ILE', 'PRO', 'PHE', 'TYR', 'TRP', 'SER', - 'THR', 'CYS', 'MET', 'ASN', 'GLN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU' ], - 1 : [ 'MSE', 'SEC' ], - 2 : [ 'UNK' ] } - -ONE_LETTER_CODES = { 'A' : 'ALA', - 'C' : 'CYS', - 'D' : 'ASP', - 'E' : 'GLU', - 'F' : 'PHE', - 'G' : 'GLY', - 'H' : 'HIS', - 'I' : 'ILE', - 'K' : 'LYS', - 'L' : 'LEU', - 'M' : 'MET', - 'N' : 'ASN', - 'P' : 'PRO', - 'Q' : 'GLN', - 'R' : 'ARG', - 'S' : 'SER', - 'T' : 'THR', - 'U' : 'SEC', - 'V' : 'VAL', - 'W' : 'TRP', - 'Y' : 'TYR', - 'X' : 'UNK' } - -CHI_ATOMS = [ { ('N', 'CA', 'CB', 'CG') : ('ARG', 'ASN', 'ASP', 'GLN', 'GLU', 'HIS', 'LEU', 'LYS', - 'MET', 'PHE', 'PRO', 'TRP', 'TYR', 'MSE'), - ('N', 'CA', 'CB', 'CG1') : ('ILE', 'VAL'), - ('N', 'CA', 'CB', 'SG') : ('CYS'), - ('N', 'CA', 'CB', 'SE') : ('SEC'), - ('N', 'CA', 'CB', 'OG') : ('SER'), - ('N', 'CA', 'CB', 'OG1') : ('THR') }, - { ('CA', 'CB', 'CG', 'CD') : ('ARG', 'GLN', 'GLU', 'LYS', 'PRO'), - ('CA', 'CB', 'CG', 'CD1') : ('LEU', 'PHE', 'TRP', 'TYR'), - ('CA', 'CB', 'CG', 'OD1') : ('ASN', 'ASP'), - ('CA', 'CB', 'CG', 'ND1') : ('HIS'), - ('CA', 'CB', 'CG1', 'CD1') : ('ILE'), - ('CA', 'CB', 'CG', 'SD') : ('MET'), - ('CA', 'CB', 'CG', 'SE') : ('MSE') }, - { ('CB', 'CG', 'CD', 'OE1') : ('GLN', 'GLU'), - ('CB', 'CG', 'CD', 'NE') : ('ARG'), - ('CB', 'CG', 'CD', 'CE') : ('LYS'), - ('CB', 'CG', 'SD', 'CE') : ('MET'), - ('CB', 'CG', 'SE', 'CE') : ('MSE') }, - { ('CG', 'CD', 'NE', 'CZ') : ('ARG'), - ('CG', 'CD', 'CE', 'NZ') : ('LYS') }, - { ('CD', 'NE', 'CZ', 'NH1') : ('ARG') } ] - -ATOMIC_NUMBERS = { 'H': 1, 'HE': 2, 'LI': 3, 'BE': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, - 'NE': 10, 'NA': 11, 'MG': 12, 'AL': 13, 'SI': 14, 'P': 15, 'S': 16, 'CL': 17, - 'AR': 18, 'K': 19, 'CA': 20, 'SC': 21, 'TI': 22, 'V': 23, 'CR': 24, 'MN': 25, - 'FE': 26, 'CO': 27, 'NI': 28, 'CU': 29, 'ZN': 30, 'GA': 31, 'GE': 32, 'AS': 33, - 'SE': 34, 'BR': 35, 'KR': 36, 'RB': 37, 'SR': 38, 'Y': 39, 'ZR': 40, 'NB': 41, - 'MO': 42, 'TC': 43, 'RU': 44, 'RH': 45, 'PD': 46, 'AG': 47, 'CD': 48, 'IN': 49, - 'SN': 50, 'SB': 51, 'TE': 52, 'I': 53, 'XE': 54, 'CS': 55, 'BA': 56, 'LA': 57, - 'CE': 58, 'PR': 59, 'ND': 60, 'PM': 61, 'SM': 62, 'EU': 63, 'GD': 64, 'TB': 65, - 'DY': 66, 'HO': 67, 'ER': 68, 'TM': 69, 'YB': 70, 'LU': 71, 'HF': 72, 'TA': 73, - 'W': 74, 'RE': 75, 'OS': 76, 'IR': 77, 'PT': 78, 'AU': 79, 'HG': 80, 'TL': 81, - 'PB': 82, 'BI': 83, 'PO': 84, 'AT': 85, 'RN': 86, 'FR': 87, 'RA': 88, 'AC': 89, - 'TH': 90, 'PA': 91, 'U': 92, 'NP': 93, 'PU': 94, 'AM': 95, 'CM': 96, 'BK': 97, - 'CF': 98, 'ES': 99, 'FM': 100, 'MD': 101, 'NO': 102, 'LR': 103, 'RF': 104, - 'DB': 105, 'SG': 106, 'BH': 107, 'HS': 108, 'MT': 109, 'DS': 110, 'RG': 111, - 'CN': 112, 'NH': 113, 'FL': 114, 'MC': 115, 'LV': 116, 'TS': 117, 'OG': 118 } - -MC_ATOM_NAMES = set([ 'N', 'CA' 'C', 'O', 'CB' ]) +THREE_LETTER_CODES = { + 0: [ + "ALA", + "GLY", + "VAL", + "LEU", + "ILE", + "PRO", + "PHE", + "TYR", + "TRP", + "SER", + "THR", + "CYS", + "MET", + "ASN", + "GLN", + "LYS", + "ARG", + "HIS", + "ASP", + "GLU", + ], + 1: ["MSE", "SEC"], + 2: ["UNK"], +} + +ONE_LETTER_CODES = { + "A": "ALA", + "C": "CYS", + "D": "ASP", + "E": "GLU", + "F": "PHE", + "G": "GLY", + "H": "HIS", + "I": "ILE", + "K": "LYS", + "L": "LEU", + "M": "MET", + "N": "ASN", + "P": "PRO", + "Q": "GLN", + "R": "ARG", + "S": "SER", + "T": "THR", + "U": "SEC", + "V": "VAL", + "W": "TRP", + "Y": "TYR", + "X": "UNK", +} + +CHI_ATOMS = [ + { + ("N", "CA", "CB", "CG"): ( + "ARG", + "ASN", + "ASP", + "GLN", + "GLU", + "HIS", + "LEU", + "LYS", + "MET", + "PHE", + "PRO", + "TRP", + "TYR", + "MSE", + ), + ("N", "CA", "CB", "CG1"): ("ILE", "VAL"), + ("N", "CA", "CB", "SG"): ("CYS"), + ("N", "CA", "CB", "SE"): ("SEC"), + ("N", "CA", "CB", "OG"): ("SER"), + ("N", "CA", "CB", "OG1"): ("THR"), + }, + { + ("CA", "CB", "CG", "CD"): ("ARG", "GLN", "GLU", "LYS", "PRO"), + ("CA", "CB", "CG", "CD1"): ("LEU", "PHE", "TRP", "TYR"), + ("CA", "CB", "CG", "OD1"): ("ASN", "ASP"), + ("CA", "CB", "CG", "ND1"): ("HIS"), + ("CA", "CB", "CG1", "CD1"): ("ILE"), + ("CA", "CB", "CG", "SD"): ("MET"), + ("CA", "CB", "CG", "SE"): ("MSE"), + }, + { + ("CB", "CG", "CD", "OE1"): ("GLN", "GLU"), + ("CB", "CG", "CD", "NE"): ("ARG"), + ("CB", "CG", "CD", "CE"): ("LYS"), + ("CB", "CG", "SD", "CE"): ("MET"), + ("CB", "CG", "SE", "CE"): ("MSE"), + }, + {("CG", "CD", "NE", "CZ"): ("ARG"), ("CG", "CD", "CE", "NZ"): ("LYS")}, + {("CD", "NE", "CZ", "NH1"): ("ARG")}, +] + +ATOMIC_NUMBERS = { + "H": 1, + "HE": 2, + "LI": 3, + "BE": 4, + "B": 5, + "C": 6, + "N": 7, + "O": 8, + "F": 9, + "NE": 10, + "NA": 11, + "MG": 12, + "AL": 13, + "SI": 14, + "P": 15, + "S": 16, + "CL": 17, + "AR": 18, + "K": 19, + "CA": 20, + "SC": 21, + "TI": 22, + "V": 23, + "CR": 24, + "MN": 25, + "FE": 26, + "CO": 27, + "NI": 28, + "CU": 29, + "ZN": 30, + "GA": 31, + "GE": 32, + "AS": 33, + "SE": 34, + "BR": 35, + "KR": 36, + "RB": 37, + "SR": 38, + "Y": 39, + "ZR": 40, + "NB": 41, + "MO": 42, + "TC": 43, + "RU": 44, + "RH": 45, + "PD": 46, + "AG": 47, + "CD": 48, + "IN": 49, + "SN": 50, + "SB": 51, + "TE": 52, + "I": 53, + "XE": 54, + "CS": 55, + "BA": 56, + "LA": 57, + "CE": 58, + "PR": 59, + "ND": 60, + "PM": 61, + "SM": 62, + "EU": 63, + "GD": 64, + "TB": 65, + "DY": 66, + "HO": 67, + "ER": 68, + "TM": 69, + "YB": 70, + "LU": 71, + "HF": 72, + "TA": 73, + "W": 74, + "RE": 75, + "OS": 76, + "IR": 77, + "PT": 78, + "AU": 79, + "HG": 80, + "TL": 81, + "PB": 82, + "BI": 83, + "PO": 84, + "AT": 85, + "RN": 86, + "FR": 87, + "RA": 88, + "AC": 89, + "TH": 90, + "PA": 91, + "U": 92, + "NP": 93, + "PU": 94, + "AM": 95, + "CM": 96, + "BK": 97, + "CF": 98, + "ES": 99, + "FM": 100, + "MD": 101, + "NO": 102, + "LR": 103, + "RF": 104, + "DB": 105, + "SG": 106, + "BH": 107, + "HS": 108, + "MT": 109, + "DS": 110, + "RG": 111, + "CN": 112, + "NH": 113, + "FL": 114, + "MC": 115, + "LV": 116, + "TS": 117, + "OG": 118, +} + +MC_ATOM_NAMES = set(["N", "CA" "C", "O", "CB"]) # General calculations @@ -86,18 +236,18 @@ def median(values): num_values = len(values) if num_values < 1: return None - i = num_values//2 + i = num_values // 2 if num_values % 2 == 1: return values[i] - return mean(values[i-1:i+1]) + return mean(values[i - 1 : i + 1]) # Matrix operations def avg_coord(*xyzs): num_args = len(xyzs) - x = sum([ xyz[0] for xyz in xyzs ]) / num_args - y = sum([ xyz[1] for xyz in xyzs ]) / num_args - z = sum([ xyz[2] for xyz in xyzs ]) / num_args + x = sum([xyz[0] for xyz in xyzs]) / num_args + y = sum([xyz[1] for xyz in xyzs]) / num_args + z = sum([xyz[2] for xyz in xyzs]) / num_args return (x, y, z) @@ -113,22 +263,24 @@ def dot_product(xyz1, xyz2): def cross_product(xyz1, xyz2): - return [ xyz1[1] * xyz2[2] - xyz1[2] * xyz2[1], - xyz1[2] * xyz2[0] - xyz1[0] * xyz2[2], - xyz1[0] * xyz2[1] - xyz1[1] * xyz2[0] ] + return [ + xyz1[1] * xyz2[2] - xyz1[2] * xyz2[1], + xyz1[2] * xyz2[0] - xyz1[0] * xyz2[2], + xyz1[0] * xyz2[1] - xyz1[1] * xyz2[0], + ] def magnitude(xyz): - return (xyz[0]**2 + xyz[1]**2 + xyz[2]**2) ** 0.5 + return (xyz[0] ** 2 + xyz[1] ** 2 + xyz[2] ** 2) ** 0.5 def unit(xyz): length = magnitude(xyz) - return [ xyz[0] / length, xyz[1] / length, xyz[2] / length ] + return [xyz[0] / length, xyz[1] / length, xyz[2] / length] def subtract(xyz1, xyz2): - return [ xyz1[0] - xyz2[0], xyz1[1] - xyz2[1], xyz1[2] - xyz2[2] ] + return [xyz1[0] - xyz2[0], xyz1[1] - xyz2[1], xyz1[2] - xyz2[2]] def distance(xyz1, xyz2): @@ -162,43 +314,43 @@ def torsion(xyz1, xyz2, xyz3, xyz4, range_positive=False): # General functions def code_three_to_one(three_letter_codes, strict=False, verbose=False): - one_letter_codes = '' + one_letter_codes = "" if isinstance(three_letter_codes, str): - three_letter_codes = [ three_letter_codes ] + three_letter_codes = [three_letter_codes] for tlc in three_letter_codes: if tlc in ONE_LETTER_CODES.values(): olc = next(k for k, v in ONE_LETTER_CODES.items() if v == tlc) one_letter_codes += olc - elif tlc == 'MSE': + elif tlc == "MSE": if strict: - print('Warning: MSE will become M') - one_letter_codes += 'M' + print("Warning: MSE will become M") + one_letter_codes += "M" else: - one_letter_codes += 'M' + one_letter_codes += "M" else: if verbose: - print('Three-letter code not recognised:', tlc) + print("Three-letter code not recognised:", tlc) if strict: if verbose: - print('Returning None') + print("Returning None") return - one_letter_codes += 'X' + one_letter_codes += "X" return one_letter_codes def code_one_to_three(one_letter_codes, strict=False, verbose=False): - three_letter_codes = [ ] + three_letter_codes = [] for olc in one_letter_codes: if olc in ONE_LETTER_CODES: three_letter_codes.append(ONE_LETTER_CODES[olc]) else: if verbose: - print('One-letter code not recognised:', olc) + print("One-letter code not recognised:", olc) if strict: if verbose: - print('Returning None') + print("Returning None") return - three_letter_codes.append('UNK') + three_letter_codes.append("UNK") return three_letter_codes @@ -206,49 +358,61 @@ def needleman_wunsch(seq1, seq2, match_award=1, mismatch_penalty=-1, gap_penalty n = len(seq1) m = len(seq2) - score = [ [ 0 for _ in range(n+1) ] for _ in range(m+1) ] + score = [[0 for _ in range(n + 1)] for _ in range(m + 1)] - for i in range(0, m+1): + for i in range(0, m + 1): score[i][0] = gap_penalty * i - for j in range(0, n+1): + for j in range(0, n + 1): score[0][j] = gap_penalty * j - for i in range(1, m+1): - for j in range(1, n+1): - match = score[i-1][j-1] + (match_award if seq1[j-1] == seq2[i-1] else gap_penalty if '-' in (seq1[j-1], seq2[i-1]) else mismatch_penalty) - delete = score[i-1][j] + gap_penalty - insert = score[i][j-1] + gap_penalty + for i in range(1, m + 1): + for j in range(1, n + 1): + match = score[i - 1][j - 1] + ( + match_award + if seq1[j - 1] == seq2[i - 1] + else gap_penalty + if "-" in (seq1[j - 1], seq2[i - 1]) + else mismatch_penalty + ) + delete = score[i - 1][j] + gap_penalty + insert = score[i][j - 1] + gap_penalty score[i][j] = max(match, delete, insert) - alignment1, alignment2 = '', '' + alignment1, alignment2 = "", "" i, j = m, n while i > 0 and j > 0: score_current = score[i][j] - score_diagonal = score[i-1][j-1] - score_up = score[i][j-1] - score_left = score[i-1][j] - - if score_current == score_diagonal + (match_award if seq1[j-1] == seq2[i-1] else gap_penalty if '-' in (seq1[j-1], seq2[i-1]) else mismatch_penalty): - alignment1 += seq1[j-1] - alignment2 += seq2[i-1] + score_diagonal = score[i - 1][j - 1] + score_up = score[i][j - 1] + score_left = score[i - 1][j] + + if score_current == score_diagonal + ( + match_award + if seq1[j - 1] == seq2[i - 1] + else gap_penalty + if "-" in (seq1[j - 1], seq2[i - 1]) + else mismatch_penalty + ): + alignment1 += seq1[j - 1] + alignment2 += seq2[i - 1] i -= 1 j -= 1 elif score_current == score_up + gap_penalty: - alignment1 += seq1[j-1] - alignment2 += '-' + alignment1 += seq1[j - 1] + alignment2 += "-" j -= 1 elif score_current == score_left + gap_penalty: - alignment1 += '-' - alignment2 += seq2[i-1] + alignment1 += "-" + alignment2 += seq2[i - 1] i -= 1 while j > 0: - alignment1 += seq1[j-1] - alignment2 += '-' + alignment1 += seq1[j - 1] + alignment2 += "-" j -= 1 while i > 0: - alignment1 += '-' - alignment2 += seq2[i-1] + alignment1 += "-" + alignment2 += seq2[i - 1] i -= 1 alignment1, alignment2 = alignment1[::-1], alignment2[::-1] @@ -258,22 +422,41 @@ def needleman_wunsch(seq1, seq2, match_award=1, mismatch_penalty=-1, gap_penalty # (MiniMol) residue functions def code_type(mmol_residue): try: - return next(category for category, group in THREE_LETTER_CODES.items() if mmol_residue.type().trim() in group) + return next( + category + for category, group in THREE_LETTER_CODES.items() + if mmol_residue.type().trim() in group + ) except StopIteration: return None def get_backbone_atoms(mmol_residue): try: - n = next(atom for atom in mmol_residue if atom.id().trim().replace(' ', '') == 'N' or atom.id().trim().replace(' ', '') == 'N:A') + n = next( + atom + for atom in mmol_residue + if atom.id().trim().replace(" ", "") == "N" + or atom.id().trim().replace(" ", "") == "N:A" + ) except StopIteration: n = None try: - ca = next(atom for atom in mmol_residue if atom.id().trim().replace(' ', '') == 'CA' or atom.id().trim().replace(' ', '') == 'CA:A') + ca = next( + atom + for atom in mmol_residue + if atom.id().trim().replace(" ", "") == "CA" + or atom.id().trim().replace(" ", "") == "CA:A" + ) except StopIteration: ca = None try: - c = next(atom for atom in mmol_residue if atom.id().trim().replace(' ', '') == 'C' or atom.id().trim().replace(' ', '') == 'C:A') + c = next( + atom + for atom in mmol_residue + if atom.id().trim().replace(" ", "") == "C" + or atom.id().trim().replace(" ", "") == "C:A" + ) except StopIteration: c = None return n, ca, c @@ -293,19 +476,26 @@ def check_backbone_geometry(mmol_residue): def calculate_chis(mmol_residue): - chis = [ ] + chis = [] for i in range(5): - chi_atoms = [ ] - has_chi = any(mmol_residue.type().trim() in residues for residues in list(CHI_ATOMS[i].values())) + chi_atoms = [] + has_chi = any( + mmol_residue.type().trim() in residues + for residues in list(CHI_ATOMS[i].values()) + ) if not has_chi: return chis - required_atom_names = next(atoms for atoms, residues in CHI_ATOMS[i].items() if mmol_residue.type().trim() in residues) - missing_atom_names = [ ] + required_atom_names = next( + atoms + for atoms, residues in CHI_ATOMS[i].items() + if mmol_residue.type().trim() in residues + ) + missing_atom_names = [] for required_atom_name in required_atom_names: found = False for atom in mmol_residue: - atom_name = atom.id().trim().replace(' ', '') - if atom_name in (required_atom_name, required_atom_name + ':A'): + atom_name = atom.id().trim().replace(" ", "") + if atom_name in (required_atom_name, required_atom_name + ":A"): chi_atoms.append(atom) found = True if not found: @@ -313,7 +503,10 @@ def calculate_chis(mmol_residue): if len(chi_atoms) < 4: chis.append(None) continue - xyzs = [ (atom.coord_orth().x(), atom.coord_orth().y(), atom.coord_orth().z()) for atom in chi_atoms ] + xyzs = [ + (atom.coord_orth().x(), atom.coord_orth().y(), atom.coord_orth().z()) + for atom in chi_atoms + ] chis.append(torsion(xyzs[0], xyzs[1], xyzs[2], xyzs[3])) return tuple(chis) @@ -324,8 +517,8 @@ def analyse_b_factors(mmol_residue, is_aa=None, backbone_atoms=None): if backbone_atoms is None: backbone_atoms = get_backbone_atoms(mmol_residue) if is_aa: - backbone_atom_ids = set([ str(atom.id()).strip() for atom in backbone_atoms ]) - residue_b_factors, mc_b_factors, sc_b_factors = [ ], [ ], [ ] + backbone_atom_ids = set([str(atom.id()).strip() for atom in backbone_atoms]) + residue_b_factors, mc_b_factors, sc_b_factors = [], [], [] for atom in mmol_residue: atom_id = str(atom.id()).strip() bf = clipper.Util_u2b(atom.u_iso()) @@ -337,7 +530,9 @@ def analyse_b_factors(mmol_residue, is_aa=None, backbone_atoms=None): sc_b_factors.append(bf) b_max = max(residue_b_factors) b_avg = mean(residue_b_factors) - b_stdev = (sum([ (x - b_avg) ** 2 for x in residue_b_factors ]) / len(residue_b_factors)) ** 0.5 + b_stdev = ( + sum([(x - b_avg) ** 2 for x in residue_b_factors]) / len(residue_b_factors) + ) ** 0.5 mc_b_avg = mean(mc_b_factors) if is_aa else None sc_b_avg = mean(sc_b_factors) if is_aa else None return b_max, b_avg, b_stdev, mc_b_avg, sc_b_avg @@ -345,9 +540,11 @@ def analyse_b_factors(mmol_residue, is_aa=None, backbone_atoms=None): def check_is_aa(mmol_residue, strict=False): allowed_types = (0,) if strict else (0, 1) - if code_type(mmol_residue) in allowed_types and \ - None not in get_backbone_atoms(mmol_residue) and \ - check_backbone_geometry(mmol_residue): + if ( + code_type(mmol_residue) in allowed_types + and None not in get_backbone_atoms(mmol_residue) + and check_backbone_geometry(mmol_residue) + ): return True return False @@ -355,17 +552,19 @@ def check_is_aa(mmol_residue, strict=False): def get_rama_calculator(mmol_residue, code=None): if code is None: code = mmol_residue.type().trim() - if code == 'GLY': + if code == "GLY": return clipper.Ramachandran(clipper.Ramachandran.Gly2) - elif code == 'PRO': + elif code == "PRO": return clipper.Ramachandran(clipper.Ramachandran.Pro2) - elif code in ('ILE', 'VAL'): + elif code in ("ILE", "VAL"): return clipper.Ramachandran(clipper.Ramachandran.IleVal2) else: return clipper.Ramachandran(clipper.Ramachandran.NoGPIVpreP2) -def get_ramachandran_allowed(mmol_residue, code=None, phi=None, psi=None, thresholds=None): +def get_ramachandran_allowed( + mmol_residue, code=None, phi=None, psi=None, thresholds=None +): if phi is None or psi is None: return None if code is None: @@ -376,7 +575,9 @@ def get_ramachandran_allowed(mmol_residue, code=None, phi=None, psi=None, thresh return rama_function.allowed(phi, psi) -def get_ramachandran_favoured(mmol_residue, code=None, phi=None, psi=None, thresholds=None): +def get_ramachandran_favoured( + mmol_residue, code=None, phi=None, psi=None, thresholds=None +): if phi is None or psi is None: return None if code is None: @@ -387,7 +588,9 @@ def get_ramachandran_favoured(mmol_residue, code=None, phi=None, psi=None, thres return rama_function.favoured(phi, psi) -def get_ramachandran_classification(mmol_residue, code=None, phi=None, psi=None, thresholds=None): +def get_ramachandran_classification( + mmol_residue, code=None, phi=None, psi=None, thresholds=None +): if phi is None or psi is None: return None if code is None: From 54566eb84d764cba9df70fadd13c67b941b31867 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Sun, 18 Jun 2023 20:46:11 +0100 Subject: [PATCH 05/20] add gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c6e5be2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +build/ +.vscode +__pycache__/ +PKG-INFO \ No newline at end of file From cdb2d3c015dafbd91d6c68e3bfd3fc72667df287 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Sun, 18 Jun 2023 20:47:27 +0100 Subject: [PATCH 06/20] include floating point bar labels --- iris_validation/graphics/residue.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/iris_validation/graphics/residue.py b/iris_validation/graphics/residue.py index abe283e..f6ae360 100644 --- a/iris_validation/graphics/residue.py +++ b/iris_validation/graphics/residue.py @@ -162,9 +162,9 @@ def _draw(self): ) # Bar chart axis - label_step = int( - (self.percentile_bar_range[1] - self.percentile_bar_range[0]) / 10.0 - ) + label_step = ( + self.percentile_bar_range[1] - self.percentile_bar_range[0] + ) / 10.0 for label_id in range(10 + 1): height = ( bar_charts_bounds[1] @@ -181,7 +181,7 @@ def _draw(self): ) self.dwg.add( self.dwg.text( - str(self.percentile_bar_range[1] - label_id * label_step), + str(round(self.percentile_bar_range[1] - label_id * label_step, 1)), insert=(bar_charts_bounds[0] - 8, height + 5), font_size=18, font_family="Arial", From 410a61142a04a5515f8307d865252a8580b8c983 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Sun, 18 Jun 2023 20:47:42 +0100 Subject: [PATCH 07/20] remove console log --- iris_validation/graphics/js/interaction.js | 5 ----- 1 file changed, 5 deletions(-) diff --git a/iris_validation/graphics/js/interaction.js b/iris_validation/graphics/js/interaction.js index 889545f..8cbeda6 100644 --- a/iris_validation/graphics/js/interaction.js +++ b/iris_validation/graphics/js/interaction.js @@ -97,10 +97,8 @@ function getResidueViewData() { let y = barChartsContainer.points.getItem(pointID).y; bccPoints.push([x, y]); }; - console.log(bccPoints); barOffsetY = bccPoints[2][1]; barMultiplierY = -(bccPoints[2][1]-bccPoints[0][1]) / (bar_y_lim[1]-bar_y_lim[0]); - // Boxplot ranges for (var versionID = 0; versionID < modelData[selectedChain]['num_versions']; ++versionID) { barLineYs.push([ ]); // Model-version holder @@ -124,7 +122,6 @@ function getResidueViewData() { let metricLow = Math.max(bar_y_lim[0], metricMean-metricStd); let metricHigh = Math.min(bar_y_lim[1], metricMean+metricStd); let distributionValues = [ metricMin, metricMax, metricLow, metricMean, metricHigh ]; - console.log(distributionValues, barMultiplierY); let versionLineYs = [ ]; for (var valueID = 0; valueID < 5; ++valueID) { let barValueLim = Math.max(0.0, distributionValues[valueID]-bar_y_lim[0]); @@ -283,7 +280,6 @@ function updateSelectedResidue() { // Set main line coordinates let barValueLim = Math.max(0.0, barValue-bar_y_lim[0]); barY = parseFloat((barOffsetY + barMultiplierY * barValueLim).toFixed(1)); - //console.log(barY); barMainlines[barID].setAttribute('y1', barY); barMainlines[barID].setAttribute('y2', barY); // Set bar label text and position @@ -293,7 +289,6 @@ function updateSelectedResidue() { } else { barLabels[barID].setAttribute('y', barY+25); }; - console.log(barLabels); }; // Set summary text From 8aa0e9464dadb420a934995728165883add74433 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Tue, 20 Jun 2023 13:26:49 +0100 Subject: [PATCH 08/20] discard linting+formatting changes --- iris_validation/__init__.py | 9 +- iris_validation/_defs.py | 406 ++++++++++---------- iris_validation/graphics/chain.py | 476 +++++++++--------------- iris_validation/graphics/panel.py | 410 ++++++++------------- iris_validation/graphics/residue.py | 417 ++++++++------------- iris_validation/metrics/__init__.py | 260 ++++++------- iris_validation/metrics/chain.py | 55 +-- iris_validation/metrics/model.py | 77 +--- iris_validation/metrics/reflections.py | 78 ++-- iris_validation/metrics/residue.py | 104 ++---- iris_validation/metrics/series.py | 158 ++++---- iris_validation/utils.py | 488 +++++++------------------ 12 files changed, 1068 insertions(+), 1870 deletions(-) diff --git a/iris_validation/__init__.py b/iris_validation/__init__.py index f758131..b1d0987 100644 --- a/iris_validation/__init__.py +++ b/iris_validation/__init__.py @@ -1,8 +1,5 @@ import os -# import subprocess -# import json - from iris_validation.graphics import Panel from iris_validation.metrics import metrics_model_series_from_files @@ -70,8 +67,6 @@ def generate_report( if not os.path.isdir(output_dir): os.mkdir(output_dir) - extension = "html" if wrap_in_html else "svg" - with open( - os.path.join(output_dir, f"report.{extension}"), "w", encoding="utf8" - ) as outfile: + extension = 'html' if wrap_in_html else 'svg' + with open(os.path.join(output_dir, f'report.{extension}'), 'w', encoding='utf8') as outfile: outfile.write(panel_string) diff --git a/iris_validation/_defs.py b/iris_validation/_defs.py index 47713d8..c17c1a4 100644 --- a/iris_validation/_defs.py +++ b/iris_validation/_defs.py @@ -1,211 +1,211 @@ -COLORS = { - "BLACK": "rgb(000, 000, 000)", - "WHITE": "rgb(255, 255, 255)", - "GREY": "rgb(050, 050, 050)", - "L_GREY": "rgb(150, 150, 150)", - "VL_GREY": "rgb(200, 200, 200)", - "RED": "rgb(200, 080, 080)", - "ORANGE": "rgb(250, 200, 050)", - "GREEN": "rgb(050, 200, 050)", - "BLUE": "rgb(050, 050, 200)", - "CYAN": "rgb(050, 200, 200)", - "TEAL": "rgb(000, 120, 120)", - "SLATE": "rgb(120, 160, 200)", - "MAGENTA": "rgb(200, 050, 200)", - "INDIGO": "rgb(080, 000, 120)", - "L_PINK": "rgb(255, 235, 235)", - "BAR_GREEN": "rgb(090, 237, 141)", - "BAR_ORANGE": "rgb(247, 212, 134)", - "BAR_RED": "rgb(240, 106, 111)", -} +COLORS = { 'BLACK' : 'rgb(000, 000, 000)', + 'WHITE' : 'rgb(255, 255, 255)', -CONTINUOUS_METRICS = ( - { - "id": 0, - "type": "continuous", - "long_name": "Average B-factor", - "short_name": "Avg. B", - "ring_color": COLORS["CYAN"], - "polarity": -1, - "is_covariance": False, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 1, - "type": "continuous", - "long_name": "Maximum B-factor", - "short_name": "Max. B", - "ring_color": COLORS["TEAL"], - "polarity": -1, - "is_covariance": False, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 2, - "type": "continuous", - "long_name": "Stdev B-factor", - "short_name": "Std. B", - "ring_color": COLORS["SLATE"], - "polarity": -1, - "is_covariance": False, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 3, - "type": "continuous", - "long_name": "Residue Fit", - "short_name": "Res. Fit", - "ring_color": COLORS["MAGENTA"], - "polarity": -1, - "is_covariance": False, - "is_molprobity": False, - "is_reflections": True, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 4, - "type": "continuous", - "long_name": "Main Chain Fit", - "short_name": "Main Fit", - "ring_color": COLORS["BLUE"], - "polarity": -1, - "is_covariance": False, - "is_molprobity": False, - "is_reflections": True, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 5, - "type": "continuous", - "long_name": "Side Chain Fit", - "short_name": "Side Fit", - "ring_color": COLORS["INDIGO"], - "polarity": -1, - "is_covariance": False, - "is_molprobity": False, - "is_reflections": True, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 6, - "type": "continuous", - "long_name": "Covariance Score", - "short_name": "Cov. Score", - "ring_color": COLORS["ORANGE"], - "polarity": -1, - "is_covariance": True, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 7, - "type": "continuous", - "long_name": "Ramachandran z-score", - "short_name": "Rama Z", - "ring_color": COLORS["RED"], - "polarity": 1, - "is_covariance": False, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": True, - "is_rama_classification": False, - }, -) + 'GREY' : 'rgb(050, 050, 050)', + 'L_GREY' : 'rgb(150, 150, 150)', + 'VL_GREY' : 'rgb(200, 200, 200)', -DISCRETE_METRICS = ( - { - "id": 0, - "type": "discrete", - "long_name": "Rotamer Classification", - "short_name": "Rota.", - "ring_color": COLORS["L_GREY"], - "seq_colors": (COLORS["RED"], COLORS["ORANGE"], COLORS["GREEN"]), - "seq_labels": ("Outlier", "Allowed", "Favoured"), - "is_covariance": False, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 1, - "type": "discrete", - "long_name": "Ramachandran Classification", - "short_name": "Rama.", - "ring_color": COLORS["L_GREY"], - "seq_colors": (COLORS["RED"], COLORS["ORANGE"], COLORS["GREEN"]), - "seq_labels": ("Outlier", "Allowed", "Favoured"), - "is_covariance": False, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": True, - }, - { - "id": 2, - "type": "discrete", - "long_name": "Clash Indicator", - "short_name": "Clashes", - "ring_color": COLORS["L_GREY"], - "seq_colors": (COLORS["RED"], COLORS["ORANGE"], COLORS["GREEN"]), - "seq_labels": ("Multiple Clashes", "One Clash", "No Clashes"), - "is_covariance": False, - "is_molprobity": True, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": False, - }, - { - "id": 3, - "type": "discrete", - "long_name": "Misalignment", - "short_name": "CMO", - "ring_color": COLORS["L_GREY"], - "seq_colors": (COLORS["RED"], COLORS["GREEN"]), - "seq_labels": ("Misaligned", "Aligned"), - "is_covariance": True, - "is_molprobity": False, - "is_reflections": False, - "is_rama_z": False, - "is_rama_classification": False, - }, -) + 'RED' : 'rgb(200, 080, 080)', + 'ORANGE' : 'rgb(250, 200, 050)', + 'GREEN' : 'rgb(050, 200, 050)', + + 'BLUE' : 'rgb(050, 050, 200)', + 'CYAN' : 'rgb(050, 200, 200)', + 'TEAL' : 'rgb(000, 120, 120)', + 'SLATE' : 'rgb(120, 160, 200)', + 'MAGENTA' : 'rgb(200, 050, 200)', + 'INDIGO' : 'rgb(080, 000, 120)', + 'L_PINK' : 'rgb(255, 235, 235)', + + 'BAR_GREEN' : 'rgb(090, 237, 141)', + 'BAR_ORANGE' : 'rgb(247, 212, 134)', + 'BAR_RED' : 'rgb(240, 106, 111)' + } + +CONTINUOUS_METRICS = ( { 'id' : 0, + 'type' : 'continuous', + 'long_name' : 'Average B-factor', + 'short_name' : 'Avg. B', + 'ring_color' : COLORS['CYAN'], + 'polarity' : -1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 1, + 'type' : 'continuous', + 'long_name' : 'Maximum B-factor', + 'short_name' : 'Max. B', + 'ring_color' : COLORS['TEAL'], + 'polarity' : -1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 2, + 'type' : 'continuous', + 'long_name' : 'Stdev B-factor', + 'short_name' : 'Std. B', + 'ring_color' : COLORS['SLATE'], + 'polarity' : -1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 3, + 'type' : 'continuous', + 'long_name' : 'Residue Fit', + 'short_name' : 'Res. Fit', + 'ring_color' : COLORS['MAGENTA'], + 'polarity' : -1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': True, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 4, + 'type' : 'continuous', + 'long_name' : 'Main Chain Fit', + 'short_name' : 'Main Fit', + 'ring_color' : COLORS['BLUE'], + 'polarity' : -1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': True, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 5, + 'type' : 'continuous', + 'long_name' : 'Side Chain Fit', + 'short_name' : 'Side Fit', + 'ring_color' : COLORS['INDIGO'], + 'polarity' : -1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': True, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 6, + 'type' : 'continuous', + 'long_name' : 'Covariance Score', + 'short_name' : 'Cov. Score', + 'ring_color' : COLORS['ORANGE'], + 'polarity' : -1, + 'is_covariance' : True, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 7, + 'type' : 'continuous', + 'long_name' : 'Ramachandran z-score', + 'short_name' : 'Rama Z', + 'ring_color' : COLORS['RED'], + 'polarity' : 1, + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': True, + 'is_rama_classification': False + } + ) + +DISCRETE_METRICS = ( { 'id' : 0, + 'type' : 'discrete', + 'long_name' : 'Rotamer Classification', + 'short_name' : 'Rota.', + 'ring_color' : COLORS['L_GREY'], + 'seq_colors' : (COLORS['RED'], + COLORS['ORANGE'], + COLORS['GREEN']), + 'seq_labels' : ('Outlier', + 'Allowed', + 'Favoured'), + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 1, + 'type' : 'discrete', + 'long_name' : 'Ramachandran Classification', + 'short_name' : 'Rama.', + 'ring_color' : COLORS['L_GREY'], + 'seq_colors' : (COLORS['RED'], + COLORS['ORANGE'], + COLORS['GREEN']), + 'seq_labels' : ('Outlier', + 'Allowed', + 'Favoured'), + 'is_covariance' : False, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': True + }, + { 'id' : 2, + 'type' : 'discrete', + 'long_name' : 'Clash Indicator', + 'short_name' : 'Clashes', + 'ring_color' : COLORS['L_GREY'], + 'seq_colors' : (COLORS['RED'], + COLORS['ORANGE'], + COLORS['GREEN']), + 'seq_labels' : ('Multiple Clashes', + 'One Clash', + 'No Clashes'), + 'is_covariance' : False, + 'is_molprobity' : True, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': False + }, + { 'id' : 3, + 'type' : 'discrete', + 'long_name' : 'Misalignment', + 'short_name' : 'CMO', + 'ring_color' : COLORS['L_GREY'], + 'seq_colors' : (COLORS['RED'], + COLORS['GREEN']), + 'seq_labels' : ('Misaligned', + 'Aligned'), + 'is_covariance' : True, + 'is_molprobity' : False, + 'is_reflections': False, + 'is_rama_z': False, + 'is_rama_classification': False + } + ) CHAIN_VIEW_GAP_ANGLE = 0.35 RAMACHANDRAN_THRESHOLDS = (0.02, 0.002) -CHAIN_VIEW_RINGS = [ - DISCRETE_METRICS[0], - DISCRETE_METRICS[1], - DISCRETE_METRICS[2], - DISCRETE_METRICS[3], - CONTINUOUS_METRICS[0], - CONTINUOUS_METRICS[1], - CONTINUOUS_METRICS[4], - CONTINUOUS_METRICS[5], - CONTINUOUS_METRICS[6], - CONTINUOUS_METRICS[7], -] +CHAIN_VIEW_RINGS = [ DISCRETE_METRICS[0], + DISCRETE_METRICS[1], + DISCRETE_METRICS[2], + DISCRETE_METRICS[3], + CONTINUOUS_METRICS[0], + CONTINUOUS_METRICS[1], + CONTINUOUS_METRICS[4], + CONTINUOUS_METRICS[5], + CONTINUOUS_METRICS[6], + CONTINUOUS_METRICS[7] ] -RESIDUE_VIEW_BOXES = [ - DISCRETE_METRICS[0], - DISCRETE_METRICS[1], - DISCRETE_METRICS[2], - DISCRETE_METRICS[3], -] +RESIDUE_VIEW_BOXES = [ DISCRETE_METRICS[0], + DISCRETE_METRICS[1], + DISCRETE_METRICS[2], + DISCRETE_METRICS[3] ] -RESIDUE_VIEW_BARS = [CONTINUOUS_METRICS[0], CONTINUOUS_METRICS[5]] +RESIDUE_VIEW_BARS = [ CONTINUOUS_METRICS[0], + CONTINUOUS_METRICS[5] ] diff --git a/iris_validation/graphics/chain.py b/iris_validation/graphics/chain.py index 0a3a401..881e6ad 100644 --- a/iris_validation/graphics/chain.py +++ b/iris_validation/graphics/chain.py @@ -25,315 +25,205 @@ def __init__( self.hidden = hidden self.dwg = None - self.cfa_cache = {} - self.num_rings = len(self.chain_view_rings) - self.num_versions = self.data["num_versions"] - self.num_segments = self.data["aligned_length"] + self.cfa_cache = { } + self.num_rings = len(CHAIN_VIEW_RINGS) + self.num_versions = self.data['num_versions'] + self.num_segments = self.data['aligned_length'] self.center = (self.canvas_size[0] // 2, self.canvas_size[1] // 2) self.full_radius = round(min(self.canvas_size) / 2 - 10, 2) self.division_size = round(self.full_radius / (self.num_rings + 2), 2) self.angle_delta = (2 * pi - CHAIN_VIEW_GAP_ANGLE) / self.num_segments - self.svg_id = f"iris-chain-view-{self.chain_index}" + self.svg_id = f'iris-chain-view-{self.chain_index}' self._draw() def _coords_from_angle(self, angle, radius, gap=True): gap_angle = CHAIN_VIEW_GAP_ANGLE if gap else 0.0 - arg_string = str([self.center, angle, radius, gap_angle]) + arg_string = str([ self.center, angle, radius, gap_angle ]) if arg_string in self.cfa_cache: coords = self.cfa_cache[arg_string] else: - result_x = self.center[0] + radius * sin(angle + gap_angle / 2) - result_y = self.center[1] - radius * cos(angle + gap_angle / 2) + result_x = self.center[0] + radius * sin(angle + gap_angle/2) + result_y = self.center[1] - radius * cos(angle + gap_angle/2) coords = (round(result_x, 1), round(result_y, 1)) self.cfa_cache[arg_string] = coords return coords def _draw(self): # Initialise drawing - self.dwg = svgwrite.Drawing(profile="full") + self.dwg = svgwrite.Drawing(profile='full') # Set HTML attributes - self.dwg.attribs["viewBox"] = "0 0 " + " ".join( - [str(x) for x in self.canvas_size] - ) - self.dwg.attribs["id"] = self.svg_id + self.dwg.attribs['viewBox'] = '0 0 ' + ' '.join([ str(x) for x in self.canvas_size ]) + self.dwg.attribs['id'] = self.svg_id if self.hidden: - self.dwg.attribs["style"] = "display: none;" + self.dwg.attribs['style'] = 'display: none;' # Draw background - self.dwg.add( - self.dwg.circle( - r=self.full_radius, - center=self.center, - fill=COLORS["WHITE"], - fill_opacity=1, - stroke_opacity=0, - ) - ) + self.dwg.add(self.dwg.circle(r=self.full_radius, + center=self.center, + fill=COLORS['WHITE'], + fill_opacity=1, + stroke_opacity=0)) + # Draw data rings for ring_id, ring_metric in enumerate(self.chain_view_rings): self._add_ring(ring_id, ring_metric) # Draw missing-data shade - for version_id, residue_validities in enumerate( - self.data["residue_validities"] - ): - group_opacity = 1 if version_id == self.num_versions - 1 else 0 - shade_group = self.dwg.g( - id=f"{self.svg_id}-shade-{version_id}", opacity=group_opacity - ) + for version_id, residue_validities in enumerate(self.data['residue_validities']): + group_opacity = 1 if version_id == self.num_versions-1 else 0 + shade_group = self.dwg.g(id=f'{self.svg_id}-shade-{version_id}', opacity=group_opacity) for segment_id, residue_valid in enumerate(residue_validities): if not residue_valid: - shade_group.add( - self.dwg.polygon( - [ - self.center, - self._coords_from_angle( - self.angle_delta * segment_id, self.full_radius + 5 - ), - self._coords_from_angle( - self.angle_delta * (segment_id + 1), - self.full_radius + 5, - ), - ], - stroke_opacity=0, - fill=COLORS["L_PINK"], - fill_opacity=1, - ) - ) + shade_group.add(self.dwg.polygon([ self.center, + self._coords_from_angle(self.angle_delta * segment_id, self.full_radius+5), + self._coords_from_angle(self.angle_delta * (segment_id+1), self.full_radius+5) ], + stroke_opacity=0, + fill=COLORS['L_PINK'], + fill_opacity=1)) self.dwg.add(shade_group) # Draw outer rings - self.dwg.add( - self.dwg.circle( - r=self.full_radius - 24, - center=self.center, - fill_opacity=0, - stroke=COLORS["BLACK"], - stroke_width=1, - stroke_opacity=0.5, - ) - ) - self.dwg.add( - self.dwg.circle( - r=self.full_radius - 8, - center=self.center, - fill_opacity=0, - stroke=COLORS["BLACK"], - stroke_width=1, - stroke_opacity=0.5, - ) - ) - for i in range(self.num_segments + 1): - self.dwg.add( - self.dwg.line( - self._coords_from_angle( - self.angle_delta * i, self.full_radius - 24 - ), - self._coords_from_angle(self.angle_delta * i, self.full_radius - 8), - stroke=COLORS["BLACK"], - stroke_width=1, - stroke_opacity=0.5, - ) - ) + self.dwg.add(self.dwg.circle(r=self.full_radius-24, + center=self.center, + fill_opacity=0, + stroke=COLORS['BLACK'], + stroke_width=1, + stroke_opacity=0.5)) + self.dwg.add(self.dwg.circle(r=self.full_radius-8, + center=self.center, + fill_opacity=0, + stroke=COLORS['BLACK'], + stroke_width=1, + stroke_opacity=0.5)) + for i in range(self.num_segments+1): + self.dwg.add(self.dwg.line(self._coords_from_angle(self.angle_delta*i, self.full_radius-24), + self._coords_from_angle(self.angle_delta*i, self.full_radius-8), + stroke=COLORS['BLACK'], + stroke_width=1, + stroke_opacity=0.5)) # Draw segment selector - center_point = self.angle_delta * 0.5 - selector_points = ( - self._coords_from_angle(center_point, self.full_radius - 16), - self._coords_from_angle(center_point - 0.02, self.full_radius - 8), - self._coords_from_angle(center_point - 0.02, self.full_radius + 8), - self._coords_from_angle(center_point + 0.02, self.full_radius + 8), - self._coords_from_angle(center_point + 0.02, self.full_radius - 8), - ) - self.dwg.add( - self.dwg.polygon( - selector_points, - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=1, - fill=COLORS["GREY"], - fill_opacity=0.2, - id=f"{self.svg_id}-residue-selector", - ) - ) + center_point = self.angle_delta*0.5 + selector_points = (self._coords_from_angle(center_point, self.full_radius-16), + self._coords_from_angle(center_point-0.02, self.full_radius-8), + self._coords_from_angle(center_point-0.02, self.full_radius+8), + self._coords_from_angle(center_point+0.02, self.full_radius+8), + self._coords_from_angle(center_point+0.02, self.full_radius-8)) + self.dwg.add(self.dwg.polygon(selector_points, + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=1, + fill=COLORS['GREY'], + fill_opacity=0.2, + id=f'{self.svg_id}-residue-selector')) # Draw interaction segments for segment_id in range(self.num_segments): - self.dwg.add( - self.dwg.polygon( - [ - self.center, - self._coords_from_angle( - self.angle_delta * segment_id, self.full_radius + 5 - ), - self._coords_from_angle( - self.angle_delta * (segment_id + 1), self.full_radius + 5 - ), - ], - stroke=COLORS["BLACK"], - stroke_width=1, - stroke_opacity=0, - fill=COLORS["L_GREY"], - fill_opacity=0, - onmousedown=f"handleSegment(1, {segment_id});", - onmouseover=f"handleSegment(2, {segment_id});", - onmouseup=f"handleSegment(3, {segment_id});", - id=f"{self.svg_id}-interaction-segment-{segment_id}", - ) - ) - self.dwg.add( - self.dwg.circle( - r=1.5 * self.division_size, - center=self.center, - fill=COLORS["WHITE"], - fill_opacity=1, - stroke_opacity=0, - ) - ) + self.dwg.add(self.dwg.polygon([ self.center, + self._coords_from_angle(self.angle_delta * segment_id, self.full_radius+5), + self._coords_from_angle(self.angle_delta * (segment_id+1), self.full_radius+5) ], + stroke=COLORS['BLACK'], + stroke_width=1, + stroke_opacity=0, + fill=COLORS['L_GREY'], + fill_opacity=0, + onmousedown=f'handleSegment(1, {segment_id});', + onmouseover=f'handleSegment(2, {segment_id});', + onmouseup=f'handleSegment(3, {segment_id});', + id=f'{self.svg_id}-interaction-segment-{segment_id}')) + self.dwg.add(self.dwg.circle(r=1.5*self.division_size, + center=self.center, + fill=COLORS['WHITE'], + fill_opacity=1, + stroke_opacity=0)) # Draw center text - self.dwg.add( - self.dwg.text( - text="Iris", - insert=(self.center[0], self.center[1] - 24), - font_size=1.5 * 16, - font_family="Arial", - font_weight="bold", - text_anchor="middle", - alignment_baseline="central", - ) - ) - self.dwg.add( - self.dwg.text( - text="Chain " + self.data["chain_id"], - insert=(self.center[0], self.center[1] + 16), - font_size=16, - font_family="Arial", - text_anchor="middle", - alignment_baseline="central", - ) - ) - if self.data["has_molprobity"]: - self.dwg.add( - self.dwg.text( - text="MolProbity", - insert=(self.center[0], self.center[1] + 48), - font_size=16, - font_family="Arial", - text_anchor="middle", - alignment_baseline="central", - fill=COLORS["L_GREY"], - ) - ) + self.dwg.add(self.dwg.text(text='Iris', + insert=(self.center[0], self.center[1]-24), + font_size=1.5*16, + font_family='Arial', + font_weight='bold', + text_anchor='middle', + alignment_baseline='central')) + self.dwg.add(self.dwg.text(text='Chain ' + self.data['chain_id'], + insert=(self.center[0], self.center[1]+16), + font_size=16, + font_family='Arial', + text_anchor='middle', + alignment_baseline='central')) + if self.data['has_molprobity']: + self.dwg.add(self.dwg.text(text='MolProbity', + insert=(self.center[0], self.center[1]+48), + font_size=16, + font_family='Arial', + text_anchor='middle', + alignment_baseline='central', + fill=COLORS['L_GREY'])) def _add_ring(self, ring_id, metric): - datapoints = self.data[metric["type"] + "_values"][metric["id"]] + datapoints = self.data[metric['type'] + '_values'][metric['id']] # Draw axes ring_base_radius = (ring_id + 2) * self.division_size - self.dwg.add( - self.dwg.circle( - r=ring_base_radius, - center=self.center, - fill_opacity=0, - stroke=metric["ring_color"], - stroke_width=1, - stroke_opacity=1, - ) - ) - self.dwg.add( - self.dwg.polyline( - [ - self._coords_from_angle( - (CHAIN_VIEW_GAP_ANGLE / 25) * (i - (20 - 1) / 2), - ring_base_radius, - gap=False, - ) - for i in range(20) - ], - stroke=metric["ring_color"], - stroke_width=3, - stroke_opacity=1, - fill_opacity=0, - ) - ) - self.dwg.add( - self.dwg.text( - text=metric["short_name"], - insert=self._coords_from_angle(0, ring_base_radius + 12, gap=False), - font_size=16, - font_family="Arial", - text_anchor="middle", - alignment_baseline="central", - ) - ) + self.dwg.add(self.dwg.circle(r=ring_base_radius, + center=self.center, + fill_opacity=0, + stroke=metric['ring_color'], + stroke_width=1, + stroke_opacity=1)) + self.dwg.add(self.dwg.polyline([ self._coords_from_angle((CHAIN_VIEW_GAP_ANGLE/25)*(i-(20-1)/2), ring_base_radius, gap=False) for i in range(20) ], + stroke=metric['ring_color'], + stroke_width=3, + stroke_opacity=1, + fill_opacity=0)) + self.dwg.add(self.dwg.text(text=metric['short_name'], + insert=self._coords_from_angle(0, ring_base_radius+12, gap=False), + font_size=16, + font_family='Arial', + text_anchor='middle', + alignment_baseline='central')) - if metric["type"] == "discrete": + if metric['type'] == 'discrete': for version_id, version_datapoints in enumerate(datapoints): - version_ring_segments = [] + version_ring_segments = [ ] for segment_id, datapoint in enumerate(version_datapoints): segment_length = 10 - segment_color = metric["seq_colors"][-1] + segment_color = metric['seq_colors'][-1] segment_opacity = 1 - if datapoint is not None and 0 <= datapoint < len( - metric["seq_colors"] - ): - segment_color = metric["seq_colors"][datapoint] - if segment_color == metric["seq_colors"][-1]: + if datapoint is not None and 0 <= datapoint < len(metric['seq_colors']): + segment_color = metric['seq_colors'][datapoint] + if segment_color == metric['seq_colors'][-1]: segment_opacity = 0.5 - segment_points = ( - self._coords_from_angle( - self.angle_delta * (segment_id), - ring_base_radius - segment_length, - ), - self._coords_from_angle( - self.angle_delta * (segment_id), - ring_base_radius + segment_length, - ), - self._coords_from_angle( - self.angle_delta * (segment_id + 1), - ring_base_radius + segment_length, - ), - self._coords_from_angle( - self.angle_delta * (segment_id + 1), - ring_base_radius - segment_length, - ), - ) - version_ring_segments.append( - (segment_points, segment_color, segment_opacity) - ) - group_opacity = 1 if version_id == self.num_versions - 1 else 0 - segment_group = self.dwg.g( - id=f"{self.svg_id}-discrete-{version_id}-{ring_id}", - opacity=group_opacity, - ) - for ( - segment_points, - segment_color, - segment_opacity, - ) in version_ring_segments: - segment_group.add( - self.dwg.polyline( - segment_points, - stroke_width=0, - stroke_opacity=0, - fill=segment_color, - fill_opacity=segment_opacity, - ) - ) + segment_points = (self._coords_from_angle(self.angle_delta * (segment_id), + ring_base_radius - segment_length), + self._coords_from_angle(self.angle_delta * (segment_id), + ring_base_radius + segment_length), + self._coords_from_angle(self.angle_delta * (segment_id+1), + ring_base_radius + segment_length), + self._coords_from_angle(self.angle_delta * (segment_id+1), + ring_base_radius - segment_length)) + version_ring_segments.append((segment_points, segment_color, segment_opacity)) + group_opacity = 1 if version_id == self.num_versions-1 else 0 + segment_group = self.dwg.g(id=f'{self.svg_id}-discrete-{version_id}-{ring_id}', opacity=group_opacity) + for segment_points, segment_color, segment_opacity in version_ring_segments: + segment_group.add(self.dwg.polyline(segment_points, + stroke_width=0, + stroke_opacity=0, + fill=segment_color, + fill_opacity=segment_opacity)) self.dwg.add(segment_group) - elif metric["type"] == "continuous": + elif metric['type'] == 'continuous': # Get mean metric value - all_valid_values = [] + all_valid_values = [ ] for version_datapoints in datapoints: for datapoint in version_datapoints: if datapoint is None: continue - value = datapoint * metric["polarity"] + value = datapoint * metric['polarity'] all_valid_values.append(value) ring_avg = 0 if len(all_valid_values) == 0: @@ -341,44 +231,36 @@ def _add_ring(self, ring_id, metric): ring_avg = sum(all_valid_values) / len(all_valid_values) # Calculate deltas from the ring average - deltas = [] + deltas = [ ] for version_datapoints in datapoints: - version_deltas = [] + version_deltas = [ ] for datapoint in version_datapoints: delta = None if datapoint is not None: - value = datapoint * metric["polarity"] + value = datapoint * metric['polarity'] delta = value - ring_avg version_deltas.append(delta) deltas.append(version_deltas) # Calculate average negative delta in the latest dataset - latest_negative_deltas = [x for x in deltas[-1] if x is not None and x < 0] + latest_negative_deltas = [ x for x in deltas[-1] if x is not None and x < 0 ] avg_negative_delta = 0 if len(latest_negative_deltas) > 0: - avg_negative_delta = sum(latest_negative_deltas) / len( - latest_negative_deltas - ) + avg_negative_delta = sum(latest_negative_deltas) / len(latest_negative_deltas) # Subtract the average negative delta from all deltas to calculate 'magnitudes' - magnitudes = [] - all_valid_magnitudes = [] + magnitudes = [ ] + all_valid_magnitudes = [ ] for version_deltas in deltas: - version_magnitudes = [ - x - avg_negative_delta if x is not None else None - for x in version_deltas - ] - all_valid_magnitudes += [x for x in version_magnitudes if x is not None] + version_magnitudes = [ x - avg_negative_delta if x is not None else None for x in version_deltas ] + all_valid_magnitudes += [ x for x in version_magnitudes if x is not None ] magnitudes.append(version_magnitudes) - magnitude_min, magnitude_max = ( - min(all_valid_magnitudes), - max(all_valid_magnitudes), - ) + magnitude_min, magnitude_max = (min(all_valid_magnitudes), max(all_valid_magnitudes)) # Calculate plot magnitudes - plot_magnitudes = [] + plot_magnitudes = [ ] for version_magnitudes in magnitudes: - version_plot_magnitudes = [] + version_plot_magnitudes = [ ] for magnitude in version_magnitudes: if magnitude is None: version_plot_magnitudes.append(None) @@ -392,12 +274,10 @@ def _add_ring(self, ring_id, metric): plot_magnitudes.append(version_plot_magnitudes) # Calculate plot point coordinates - line_points = [] + line_points = [ ] for version_plot_magnitudes in plot_magnitudes: - version_line_points = [] - zero_point = self._coords_from_angle( - self.angle_delta * 0.5, ring_base_radius - ) + version_line_points = [ ] + zero_point = self._coords_from_angle(self.angle_delta*0.5, ring_base_radius) version_line_points.append(zero_point) for segment_id, plot_magnitude in enumerate(version_plot_magnitudes): angle = self.angle_delta * (segment_id + 0.5) @@ -409,39 +289,27 @@ def _add_ring(self, ring_id, metric): line_points.append(version_line_points) # Draw line - baseline_circle_points = [] + baseline_circle_points = [ ] baseline_point_resolution = 200 for point_id in range(baseline_point_resolution + 1): - point_angle = ( - (baseline_point_resolution - point_id) - * (2 * pi - CHAIN_VIEW_GAP_ANGLE) - / baseline_point_resolution - ) - baseline_circle_points.append( - self._coords_from_angle(point_angle, ring_base_radius) - ) + point_angle = (baseline_point_resolution - point_id) * (2*pi - CHAIN_VIEW_GAP_ANGLE) / baseline_point_resolution + baseline_circle_points.append(self._coords_from_angle(point_angle, ring_base_radius)) plot_points = line_points[-1] + baseline_circle_points - ring_line = self.dwg.polyline( - plot_points, - stroke=metric["ring_color"], - stroke_width=2, - stroke_opacity=1, - fill=metric["ring_color"], - fill_opacity=0.2, - ) + ring_line = self.dwg.polyline(plot_points, + stroke=metric['ring_color'], + stroke_width=2, + stroke_opacity=1, + fill=metric['ring_color'], + fill_opacity=0.2) for version_id, version_line_points in enumerate(line_points): plot_points = version_line_points + baseline_circle_points - points_string = " ".join( - [",".join([str(x) for x in point]) for point in plot_points] - ) - animation = Animate( - values=None, - dur="250ms", - begin="indefinite", - fill="freeze", - attributeName="points", - to=points_string, - id=f"{self.svg_id}-animation-{version_id}-{ring_id}", - ) + points_string = ' '.join([ ','.join([ str(x) for x in point ]) for point in plot_points ]) + animation = Animate(values=None, + dur='250ms', + begin='indefinite', + fill='freeze', + attributeName='points', + to=points_string, + id=f'{self.svg_id}-animation-{version_id}-{ring_id}') ring_line.add(animation) self.dwg.add(ring_line) diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index 723351c..4e20ccf 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -18,9 +18,9 @@ ) -JS_PATH = os.path.join(os.path.dirname(__file__), "js") -JS_CONSTANTS_PATH = os.path.join(JS_PATH, "constants.js") -JS_INTERACTION_PATH = os.path.join(JS_PATH, "interaction.js") +JS_PATH = os.path.join(os.path.dirname(__file__), 'js') +JS_CONSTANTS_PATH = os.path.join(JS_PATH, 'constants.js') +JS_INTERACTION_PATH = os.path.join(JS_PATH, 'interaction.js') class Panel: @@ -57,10 +57,10 @@ def __init__( self.javascript = None self.chain_views = None self.residue_view = None - self.num_models = self.data[0]["num_versions"] - self.chain_ids = [chain_data["chain_id"] for chain_data in self.data] - self.swtich_colors = [COLORS["VL_GREY"], COLORS["CYAN"]] - self.svg_id = "iris-panel" + self.num_models = self.data[0]['num_versions'] + self.chain_ids = [ chain_data['chain_id'] for chain_data in self.data ] + self.swtich_colors = [ COLORS['VL_GREY'], COLORS['CYAN'] ] + self.svg_id = 'iris-panel' self._verify_chosen_metrics() self._generate_javascript() @@ -76,47 +76,32 @@ def _verify_chosen_metrics(self): self.residue_view_bars, ): if not isinstance(metric_list, list): - raise ValueError("Chosen metrics in the _defs.py file must be lists") + raise ValueError('Chosen metrics in the _defs.py file must be lists') for metric_index in reversed(range(len(metric_list))): - if ( - metric_list[metric_index]["is_covariance"] - and not self.data[0]["has_covariance"] - ): + if (metric_list[metric_index]['is_covariance'] and not self.data[0]['has_covariance']): del metric_list[metric_index] - elif ( - metric_list[metric_index]["is_molprobity"] - and not self.data[0]["has_molprobity"] - ): + elif (metric_list[metric_index]['is_molprobity'] and not self.data[0]['has_molprobity']): del metric_list[metric_index] - elif ( - metric_list[metric_index]["is_reflections"] - and not self.data[0]["has_reflections"] - ): + elif (metric_list[metric_index]['is_reflections'] and not self.data[0]['has_reflections']): del metric_list[metric_index] - elif ( - metric_list[metric_index]["is_rama_z"] - and not self.data[0]["has_rama_z"] - ): + elif (metric_list[metric_index]['is_rama_z'] and not self.data[0]['has_rama_z']): del metric_list[metric_index] - elif ( - metric_list[metric_index]["is_rama_classification"] - and self.data[0]["has_rama_z"] - ): + elif (metric_list[metric_index]['is_rama_classification'] and self.data[0]['has_rama_z']): del metric_list[metric_index] def _generate_javascript(self): json_data = json.dumps(self.data) num_chains = len(self.chain_ids) bar_metric_ids = [metric["id"] for metric in self.residue_view_bars] - box_metric_ids = [metric["id"] for metric in RESIDUE_VIEW_BOXES] - box_colors = json.dumps([metric["seq_colors"] for metric in RESIDUE_VIEW_BOXES]) - box_labels = json.dumps([metric["seq_labels"] for metric in RESIDUE_VIEW_BOXES]) + box_metric_ids = [ metric['id'] for metric in RESIDUE_VIEW_BOXES ] + box_colors = json.dumps([ metric['seq_colors'] for metric in RESIDUE_VIEW_BOXES ]) + box_labels = json.dumps([ metric['seq_labels'] for metric in RESIDUE_VIEW_BOXES ]) gap_degrees = CHAIN_VIEW_GAP_ANGLE * 180 / math.pi - with open(JS_CONSTANTS_PATH, "r", encoding="utf8") as infile: + with open(JS_CONSTANTS_PATH, 'r', encoding='utf8') as infile: js_constants = infile.read() - with open(JS_INTERACTION_PATH, "r", encoding="utf8") as infile: + with open(JS_INTERACTION_PATH, 'r', encoding='utf8') as infile: js_interation = infile.read() js_constants = js_constants.format( @@ -134,7 +119,7 @@ def _generate_javascript(self): self.javascript = js_constants + js_interation def _generate_subviews(self): - self.chain_views = [] + self.chain_views = [ ] for chain_index, chain_data in enumerate(self.data): chain_view = ChainView( chain_data, @@ -155,260 +140,167 @@ def _draw(self): view_title_font = 24 button_width = 38 button_height = 32 - view_width, view_height = [dim - view_border for dim in self.canvas_size] - view_divider_x = round(2 / 3 * view_width, 2) - chain_view_bounds = ( - view_border, - view_border, - view_divider_x - round(middle_gap / 2, 2), - view_height, - ) - residue_view_bounds = ( - view_divider_x + round(middle_gap / 2, 2), - view_border, - view_width, - view_height, - ) + view_width, view_height = [ dim - view_border for dim in self.canvas_size ] + view_divider_x = round(2/3 * view_width, 2) + chain_view_bounds = (view_border, + view_border, + view_divider_x - round(middle_gap/2, 2), + view_height) + residue_view_bounds = (view_divider_x + round(middle_gap/2, 2), + view_border, + view_width, + view_height) # Initialise drawing - self.dwg = svgwrite.Drawing(profile="full") + self.dwg = svgwrite.Drawing(profile='full') # Disable text selection - self.dwg.attribs["style"] = "user-select: none;" + self.dwg.attribs['style'] = 'user-select: none;' # Draw background - self.dwg.add( - self.dwg.polygon( - points=[ - (0, 0), - (0, self.canvas_size[1]), - (self.canvas_size[0], self.canvas_size[1]), - (self.canvas_size[0], 0), - ], - fill=COLORS["WHITE"], - fill_opacity=1, - stroke_opacity=0, - ) - ) + self.dwg.add(self.dwg.polygon(points=[ (0, 0), + (0, self.canvas_size[1]), + (self.canvas_size[0], self.canvas_size[1]), + (self.canvas_size[0], 0) ], + fill=COLORS['WHITE'], + fill_opacity=1, + stroke_opacity=0)) # Set HTML attributes - self.dwg.attribs["viewBox"] = "0 0 " + " ".join( - [str(x) for x in self.canvas_size] - ) - self.dwg.attribs["id"] = self.svg_id + self.dwg.attribs['viewBox'] = '0 0 ' + ' '.join([ str(x) for x in self.canvas_size ]) + self.dwg.attribs['id'] = self.svg_id # Add JavaScript self.dwg.defs.add(self.dwg.script(content=self.javascript)) # View titles and divider lines - self.dwg.add( - self.dwg.text( - text="Chain", - insert=(chain_view_bounds[0], chain_view_bounds[1] + view_title_font), - font_size=view_title_font, - font_family="Arial", - ) - ) - - self.dwg.add( - self.dwg.text( - text="Residue", - insert=( - residue_view_bounds[0], - residue_view_bounds[1] + view_title_font, - ), - font_size=view_title_font, - font_family="Arial", - id=f"{self.svg_id}-residue-summary", - ) - ) - - self.dwg.add( - self.dwg.line( - (chain_view_bounds[0], chain_view_bounds[1] + 40), - (chain_view_bounds[2], chain_view_bounds[1] + 40), - stroke=COLORS["BLACK"], - stroke_width=2, - ) - ) - - self.dwg.add( - self.dwg.line( - (residue_view_bounds[0], residue_view_bounds[1] + 40), - (residue_view_bounds[2], residue_view_bounds[1] + 40), - stroke=COLORS["BLACK"], - stroke_width=2, - ) - ) + self.dwg.add(self.dwg.text(text='Chain', + insert=(chain_view_bounds[0], chain_view_bounds[1]+view_title_font), + font_size=view_title_font, + font_family='Arial')) + + self.dwg.add(self.dwg.text(text='Residue', + insert=(residue_view_bounds[0], residue_view_bounds[1]+view_title_font), + font_size=view_title_font, + font_family='Arial', + id=f'{self.svg_id}-residue-summary')) + + self.dwg.add(self.dwg.line((chain_view_bounds[0], chain_view_bounds[1]+40), + (chain_view_bounds[2], chain_view_bounds[1]+40), + stroke=COLORS['BLACK'], + stroke_width=2)) + + self.dwg.add(self.dwg.line((residue_view_bounds[0], residue_view_bounds[1]+40), + (residue_view_bounds[2], residue_view_bounds[1]+40), + stroke=COLORS['BLACK'], + stroke_width=2)) # Chain selector buttons for chain_index, chain_id in enumerate(self.chain_ids[:12]): - selector_color = ( - self.swtich_colors[1] if chain_index == 0 else self.swtich_colors[0] - ) - self.dwg.add( - self.dwg.rect( - insert=( - chain_view_bounds[0] + 75 + 50 * chain_index, - chain_view_bounds[1], - ), - size=(button_width, button_height), - rx=5, - stroke_opacity=0, - fill_opacity=0.5, - fill=selector_color, - id=f"{self.svg_id}-chain-selector-{chain_index}", - ) - ) - - self.dwg.add( - self.dwg.text( - text=chain_id, - insert=( - chain_view_bounds[0] + 75 + button_width / 2 + 50 * chain_index, - chain_view_bounds[1] + button_height / 2, - ), - font_size=view_title_font, - font_family="Arial", - text_anchor="middle", - alignment_baseline="central", - ) - ) - - self.dwg.add( - self.dwg.rect( - insert=( - chain_view_bounds[0] + 75 + 50 * chain_index, - chain_view_bounds[1], - ), - size=(button_width, button_height), - rx=5, - stroke_opacity=0, - fill_opacity=0, - onmouseover="setPointer();", - onmouseout="unsetPointer();", - onclick=f"setChain({chain_index});", - ) - ) + selector_color = self.swtich_colors[1] if chain_index == 0 else self.swtich_colors[0] + self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), + size=(button_width, button_height), + rx=5, + stroke_opacity=0, + fill_opacity=0.5, + fill=selector_color, + id=f'{self.svg_id}-chain-selector-{chain_index}')) + + self.dwg.add(self.dwg.text(text=chain_id, + insert=(chain_view_bounds[0] + 75 + button_width/2 + 50*chain_index, chain_view_bounds[1] + button_height/2), + font_size=view_title_font, + font_family='Arial', + text_anchor='middle', + alignment_baseline='central')) + + self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), + size=(button_width, button_height), + rx=5, + stroke_opacity=0, + fill_opacity=0, + onmouseover='setPointer();', + onmouseout='unsetPointer();', + onclick=f'setChain({chain_index});')) # Extra chains dropdown # TODO: finish this if len(self.chain_ids) > 12: chain_index = 12 selector_color = self.swtich_colors[0] - self.dwg.add( - self.dwg.rect( - insert=( - chain_view_bounds[0] + 75 + 50 * chain_index, - chain_view_bounds[1], - ), - size=(38, 32), - rx=5, - stroke_opacity=0, - fill_opacity=0.5, - fill=selector_color, - id=f"{self.svg_id}-chain-selector-dropdown", - ) - ) - - self.dwg.add( - self.dwg.text( - text="...", - insert=( - chain_view_bounds[0] + 85 + 50 * chain_index, - chain_view_bounds[1] + view_title_font, - ), - font_size=view_title_font, - font_family="Arial", - ) - ) - - self.dwg.add( - self.dwg.rect( - insert=( - chain_view_bounds[0] + 75 + 50 * chain_index, - chain_view_bounds[1], - ), - size=(38, 32), - rx=5, - stroke_opacity=0, - fill_opacity=0, - onmouseover="setPointer();", - onmouseout="unsetPointer();", - onclick=f"toggleDropdown();", - ) - ) + self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), + size=(38, 32), + rx=5, + stroke_opacity=0, + fill_opacity=0.5, + fill=selector_color, + id=f'{self.svg_id}-chain-selector-dropdown')) + + self.dwg.add(self.dwg.text(text='...', + insert=(chain_view_bounds[0] + 85 + 50*chain_index, chain_view_bounds[1]+view_title_font), + font_size=view_title_font, + font_family='Arial')) + + self.dwg.add(self.dwg.rect(insert=(chain_view_bounds[0] + 75 + 50*chain_index, chain_view_bounds[1]), + size=(38, 32), + rx=5, + stroke_opacity=0, + fill_opacity=0, + onmouseover='setPointer();', + onmouseout='unsetPointer();', + onclick=f'toggleDropdown();')) # Version toggle switch - self.dwg.add( - self.dwg.text( - text="Previous", - insert=(chain_view_bounds[2] - 215, chain_view_bounds[1] + 20), - font_size=16, - font_family="Arial", - ) - ) + self.dwg.add(self.dwg.text(text='Previous', + insert=(chain_view_bounds[2]-215, chain_view_bounds[1]+20), + font_size=16, + font_family='Arial')) - self.dwg.add( - self.dwg.text( - text="Latest", - insert=(chain_view_bounds[2] - 55, chain_view_bounds[1] + 20), - font_size=16, - font_family="Arial", - ) - ) + self.dwg.add(self.dwg.text(text='Latest', + insert=(chain_view_bounds[2]-55, chain_view_bounds[1]+20), + font_size=16, + font_family='Arial')) if self.num_models > 1: - switch_group = self.dwg.g( - id=f"{self.svg_id}-switch", - onmouseover="setPointer();", - onmouseout="unsetPointer();", - onclick="toggleVersion();", - ) + switch_group = self.dwg.g(id=f'{self.svg_id}-switch', + onmouseover='setPointer();', + onmouseout='unsetPointer();', + onclick='toggleVersion();') else: - switch_group = self.dwg.g(id=f"{self.svg_id}-switch") - - switch_rectangle = self.dwg.rect( - insert=(chain_view_bounds[2] - 140, chain_view_bounds[1]), - size=(70, 30), - rx=15, - stroke_opacity=0, - fill_opacity=1, - fill=self.swtich_colors[1], - ) + switch_group = self.dwg.g(id=f'{self.svg_id}-switch') + + switch_rectangle = self.dwg.rect(insert=(chain_view_bounds[2]-140, chain_view_bounds[1]), + size=(70, 30), + rx=15, + stroke_opacity=0, + fill_opacity=1, + fill=self.swtich_colors[1]) for version_id in range(2): - animation = Animate( - values=None, - dur="250ms", - begin="indefinite", - fill="freeze", - attributeName="fill", - to=self.swtich_colors[version_id], - id=f"{self.svg_id}-switch-color-animation-{version_id}", - ) + animation = Animate(values=None, + dur='250ms', + begin='indefinite', + fill='freeze', + attributeName='fill', + to=self.swtich_colors[version_id], + id=f'{self.svg_id}-switch-color-animation-{version_id}') switch_rectangle.add(animation) switch_group.add(switch_rectangle) - switch_circle = self.dwg.circle( - r=10, - center=(chain_view_bounds[2] - 85, chain_view_bounds[1] + 15), - stroke_opacity=0, - fill_opacity=1, - fill=COLORS["WHITE"], - ) + switch_circle = self.dwg.circle(r=10, + center=(chain_view_bounds[2]-85, chain_view_bounds[1]+15), + stroke_opacity=0, + fill_opacity=1, + fill=COLORS['WHITE']) for version_id in range(2): - animation = Animate( - values=None, - dur="250ms", - begin="indefinite", - fill="freeze", - attributeName="cx", - to=(chain_view_bounds[2] - 125, chain_view_bounds[2] - 85)[version_id], - id=f"{self.svg_id}-switch-move-animation-{version_id}", - ) + animation = Animate(values=None, + dur='250ms', + begin='indefinite', + fill='freeze', + attributeName='cx', + to=(chain_view_bounds[2]-125, chain_view_bounds[2]-85)[version_id], + id=f'{self.svg_id}-switch-move-animation-{version_id}') switch_circle.add(animation) switch_group.add(switch_circle) @@ -426,10 +318,8 @@ def _draw(self): width_buffer = -((viewbox_width - 1000) / 2) height_buffer = -((viewbox_height - 1000 - 50) / 2 + 50) for chain_view in self.chain_views: - chain_view.attribs["x"] = str(view_adj_x) - chain_view.attribs[ - "viewBox" - ] = f"{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}" + chain_view.attribs['x'] = str(view_adj_x) + chain_view.attribs['viewBox'] = f'{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}' self.dwg.add(chain_view) # *** Residue view view_mid_x = (residue_view_bounds[0] + residue_view_bounds[2]) / 2 @@ -440,12 +330,10 @@ def _draw(self): viewbox_height = int(round(1000**2 / view_space_height)) width_buffer = -((viewbox_width - 400) / 2) height_buffer = -((viewbox_height - 1000 - 50) / 2 + 50) - self.residue_view.attribs["x"] = str(view_adj_x) - self.residue_view.attribs[ - "viewBox" - ] = f"{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}" + self.residue_view.attribs['x'] = str(view_adj_x) + self.residue_view.attribs['viewBox'] = f'{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}' self.dwg.add(self.residue_view) - + def get_chain_view_rings( self, continuous_metrics_to_display, discrete_metrics_to_display=None ): diff --git a/iris_validation/graphics/residue.py b/iris_validation/graphics/residue.py index f6ae360..472f987 100644 --- a/iris_validation/graphics/residue.py +++ b/iris_validation/graphics/residue.py @@ -15,12 +15,12 @@ def __init__( self.canvas_size = canvas_size self.dwg = None - self.svg_id = "iris-residue-view" + self.svg_id = 'iris-residue-view' self.residue_view_bars = RESIDUE_VIEW_BARS if ResidueViewBars_inp: self.residue_view_bars = ResidueViewBars_inp - self.box_names = [metric["short_name"] for metric in RESIDUE_VIEW_BOXES] - self.bar_names = [metric["long_name"] for metric in self.residue_view_bars] + self.box_names = [ metric['short_name'] for metric in RESIDUE_VIEW_BOXES ] + self.bar_names = [ metric['long_name'] for metric in RESIDUE_VIEW_BARS ] # TODO: allow any number of bars self.bar_names = self.bar_names[:2] @@ -39,146 +39,96 @@ def _draw(self): left_indent = 35 # Initialise drawing - self.dwg = svgwrite.Drawing(profile="full") + self.dwg = svgwrite.Drawing(profile='full') # Set HTML attributes - self.dwg.attribs["viewBox"] = "0 0 " + " ".join( - [str(x) for x in self.canvas_size] - ) - self.dwg.attribs["id"] = self.svg_id + self.dwg.attribs['viewBox'] = '0 0 ' + ' '.join([ str(x) for x in self.canvas_size ]) + self.dwg.attribs['id'] = self.svg_id # Draw background - self.dwg.add( - self.dwg.polygon( - points=[ - (0, 0), - (0, self.canvas_size[1]), - (self.canvas_size[0], self.canvas_size[1]), - (self.canvas_size[0], 0), - ], - fill=COLORS["WHITE"], - fill_opacity=1, - stroke_opacity=0, - ) - ) + self.dwg.add(self.dwg.polygon(points=[ (0, 0), + (0, self.canvas_size[1]), + (self.canvas_size[0], self.canvas_size[1]), + (self.canvas_size[0], 0) ], + fill=COLORS['WHITE'], + fill_opacity=1, + stroke_opacity=0)) # Boxes for box_id, box_title in enumerate(self.box_names): - box_bounds = [ - 0.25 * self.canvas_size[0] + left_indent, - top_margin + 70 * box_id, - self.canvas_size[0], - top_margin + 70 * box_id + 50, - ] - - self.dwg.add( - self.dwg.polygon( - points=[ - (left_indent, box_bounds[1]), - (box_bounds[0], box_bounds[1]), - (box_bounds[0], box_bounds[3]), - (left_indent, box_bounds[3]), - ], - fill=COLORS["VL_GREY"], - fill_opacity=0.5, - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=1, - ) - ) - self.dwg.add( - self.dwg.polygon( - points=[ - (box_bounds[0], box_bounds[1]), - (box_bounds[2], box_bounds[1]), - (box_bounds[2], box_bounds[3]), - (box_bounds[0], box_bounds[3]), - ], - fill=COLORS["VL_GREY"], - fill_opacity=0.75, - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=1, - id=f"{self.svg_id}-box-{box_id}", - ) - ) - self.dwg.add( - self.dwg.text( - "", - insert=( - (box_bounds[0] + box_bounds[2]) / 2, - (box_bounds[1] + box_bounds[3]) / 2, - ), - font_size=20, - font_family="Arial", - font_weight="bold", - fill=COLORS["BLACK"], - fill_opacity=1, - text_anchor="middle", - alignment_baseline="central", - id=f"{self.svg_id}-box-{box_id}-text", - ) - ) - self.dwg.add( - self.dwg.text( - box_title, - insert=( - left_indent + 0.125 * self.canvas_size[0], - (box_bounds[1] + box_bounds[3]) / 2, - ), - font_size=18, - font_family="Arial", - fill=COLORS["BLACK"], - fill_opacity=1, - text_anchor="middle", - alignment_baseline="central", - ) - ) + box_bounds = [ 0.25*self.canvas_size[0]+left_indent, + top_margin + 70*box_id, + self.canvas_size[0], + top_margin + 70*box_id + 50 ] + + self.dwg.add(self.dwg.polygon(points=[ (left_indent, box_bounds[1]), + (box_bounds[0], box_bounds[1]), + (box_bounds[0], box_bounds[3]), + (left_indent, box_bounds[3]) ], + fill=COLORS['VL_GREY'], + fill_opacity=0.5, + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=1)) + self.dwg.add(self.dwg.polygon(points=[ (box_bounds[0], box_bounds[1]), + (box_bounds[2], box_bounds[1]), + (box_bounds[2], box_bounds[3]), + (box_bounds[0], box_bounds[3]) ], + fill=COLORS['VL_GREY'], + fill_opacity=0.75, + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=1, + id=f'{self.svg_id}-box-{box_id}')) + self.dwg.add(self.dwg.text('', + insert=((box_bounds[0]+box_bounds[2])/2, (box_bounds[1]+box_bounds[3])/2), + font_size=20, + font_family='Arial', + font_weight='bold', + fill=COLORS['BLACK'], + fill_opacity=1, + text_anchor='middle', + alignment_baseline='central', + id=f'{self.svg_id}-box-{box_id}-text')) + self.dwg.add(self.dwg.text(box_title, + insert=(left_indent + 0.125*self.canvas_size[0], (box_bounds[1]+box_bounds[3])/2), + font_size=18, + font_family='Arial', + fill=COLORS['BLACK'], + fill_opacity=1, + text_anchor='middle', + alignment_baseline='central')) # Bars bar_width = 120 - bar_charts_bounds = ( - left_indent, - 70 * len(self.box_names) + 30, - self.canvas_size[0], - self.canvas_size[1] - 60, - ) + bar_charts_bounds = (left_indent, + 70*len(self.box_names)+30, + self.canvas_size[0], + self.canvas_size[1]-60) + # Bar chart container - self.dwg.add( - self.dwg.polygon( - points=[ - (bar_charts_bounds[0], bar_charts_bounds[1]), - (bar_charts_bounds[2], bar_charts_bounds[1]), - (bar_charts_bounds[2], bar_charts_bounds[3]), - (bar_charts_bounds[0], bar_charts_bounds[3]), - ], - fill=COLORS["WHITE"], - fill_opacity=0, - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=1, - id=f"{self.svg_id}-bar-charts-container", - ) - ) + self.dwg.add(self.dwg.polygon(points=[ (bar_charts_bounds[0], bar_charts_bounds[1]), + (bar_charts_bounds[2], bar_charts_bounds[1]), + (bar_charts_bounds[2], bar_charts_bounds[3]), + (bar_charts_bounds[0], bar_charts_bounds[3]) ], + fill=COLORS['WHITE'], + fill_opacity=0, + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=1, + id=f'{self.svg_id}-bar-charts-container')) # Bar chart axis label_step = ( self.percentile_bar_range[1] - self.percentile_bar_range[0] ) / 10.0 - for label_id in range(10 + 1): - height = ( - bar_charts_bounds[1] - + label_id * (bar_charts_bounds[3] - bar_charts_bounds[1]) / 10 - ) - self.dwg.add( - self.dwg.line( - (bar_charts_bounds[0] - 5, height), - (bar_charts_bounds[0] + 5, height), - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=1, - ) - ) + for label_id in range(10+1): + height = bar_charts_bounds[1] + label_id*(bar_charts_bounds[3]-bar_charts_bounds[1])/10 + self.dwg.add(self.dwg.line((bar_charts_bounds[0]-5, height), (bar_charts_bounds[0]+5, height), + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=1)) + self.dwg.add( self.dwg.text( str(round(self.percentile_bar_range[1] - label_id * label_step, 1)), @@ -207,141 +157,92 @@ def _draw(self): bar_chart_width = bar_charts_bounds[2] - bar_charts_bounds[0] for bar_id, bar_name in enumerate(self.bar_names): - bar_x = bar_charts_bounds[0] + (bar_chart_width * (2 * bar_id + 1) / 4) + bar_x = bar_charts_bounds[0] + (bar_chart_width * (2*bar_id+1)/4) + # Bar label - self.dwg.add( - self.dwg.text( - bar_name, - insert=(bar_x, bar_charts_bounds[3] + 25), - font_size=18, - font_family="Arial", - fill=COLORS["BLACK"], - fill_opacity=1, - text_anchor="middle", - alignment_baseline="central", - ) - ) + self.dwg.add(self.dwg.text(bar_name, + insert=(bar_x, bar_charts_bounds[3]+25), + font_size=18, + font_family='Arial', + fill=COLORS['BLACK'], + fill_opacity=1, + text_anchor='middle', + alignment_baseline='central')) # Bar - self.dwg.add( - self.dwg.polygon( - points=[ - (bar_x - bar_width // 2, bar_charts_bounds[3]), - (bar_x - bar_width // 2, bar_charts_bounds[1]), - (bar_x + bar_width // 2, bar_charts_bounds[1]), - (bar_x + bar_width // 2, bar_charts_bounds[3]), - ], - fill=COLORS["VL_GREY"], - fill_opacity=0.5, - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=1, - ) - ) + self.dwg.add(self.dwg.polygon(points=[ (bar_x-bar_width//2, bar_charts_bounds[3]), + (bar_x-bar_width//2, bar_charts_bounds[1]), + (bar_x+bar_width//2, bar_charts_bounds[1]), + (bar_x+bar_width//2, bar_charts_bounds[3]) ], + fill=COLORS['VL_GREY'], + fill_opacity=0.5, + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=1)) # Box plot - box_plot_group = self.dwg.g(id=f"{self.svg_id}-boxplot-{bar_id}", opacity=0) - box_plot_group.add( - self.dwg.polygon( - points=[ - (bar_x - bar_width // 2, bar_charts_bounds[3]), - (bar_x - bar_width // 2, bar_charts_bounds[1]), - (bar_x + bar_width // 2, bar_charts_bounds[1]), - (bar_x + bar_width // 2, bar_charts_bounds[3]), - ], - fill=COLORS["WHITE"], - fill_opacity=1, - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=1, - ) - ) - box_plot_group.add( - self.dwg.polygon( - points=[ - (bar_x - bar_width // 2, bar_charts_bounds[1] + 80), - (bar_x - bar_width // 2, bar_charts_bounds[3] - 80), - (bar_x + bar_width // 2, bar_charts_bounds[3] - 80), - (bar_x + bar_width // 2, bar_charts_bounds[1] + 80), - ], - fill=f"url(#{self.svg_id}-gradient-{bar_id})", - fill_opacity=0.8, - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=0.5, - id=f"{self.svg_id}-boxplot-{bar_id}-box", - ) - ) - - gradient = LinearGradient( - start=(0, 0), end=(0, 1), id=f"{self.svg_id}-gradient-{bar_id}" - ) - gradient.add_stop_color(offset="0%", color=COLORS["BAR_GREEN"]) - gradient.add_stop_color(offset="50%", color=COLORS["BAR_ORANGE"]) - gradient.add_stop_color(offset="100%", color=COLORS["BAR_RED"]) + box_plot_group = self.dwg.g(id=f'{self.svg_id}-boxplot-{bar_id}', opacity=0) + box_plot_group.add(self.dwg.polygon(points=[ (bar_x-bar_width//2, bar_charts_bounds[3]), + (bar_x-bar_width//2, bar_charts_bounds[1]), + (bar_x+bar_width//2, bar_charts_bounds[1]), + (bar_x+bar_width//2, bar_charts_bounds[3]) ], + fill=COLORS['WHITE'], + fill_opacity=1, + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=1)) + box_plot_group.add(self.dwg.polygon(points=[ (bar_x-bar_width//2, bar_charts_bounds[1]+80), + (bar_x-bar_width//2, bar_charts_bounds[3]-80), + (bar_x+bar_width//2, bar_charts_bounds[3]-80), + (bar_x+bar_width//2, bar_charts_bounds[1]+80) ], + fill=f'url(#{self.svg_id}-gradient-{bar_id})', + fill_opacity=0.8, + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=0.5, + id=f'{self.svg_id}-boxplot-{bar_id}-box')) + + gradient = LinearGradient(start=(0, 0), end=(0,1), id=f'{self.svg_id}-gradient-{bar_id}') + gradient.add_stop_color(offset='0%', color=COLORS['BAR_GREEN']) + gradient.add_stop_color(offset='50%', color=COLORS['BAR_ORANGE']) + gradient.add_stop_color(offset='100%', color=COLORS['BAR_RED']) self.dwg.defs.add(gradient) - box_plot_group.add( - self.dwg.line( - (bar_x - bar_width // 2, bar_charts_bounds[1] + 200), - (bar_x + bar_width // 2, bar_charts_bounds[1] + 200), - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=0.5, - stroke_dasharray=2, - id=f"{self.svg_id}-boxplot-{bar_id}-line-high", - ) - ) - box_plot_group.add( - self.dwg.line( - ( - bar_x - bar_width // 2, - (bar_charts_bounds[1] + bar_charts_bounds[3]) // 2, - ), - ( - bar_x + bar_width // 2, - (bar_charts_bounds[1] + bar_charts_bounds[3]) // 2, - ), - stroke=COLORS["BLACK"], - stroke_width=3, - stroke_opacity=0.8, - stroke_dasharray=5, - id=f"{self.svg_id}-boxplot-{bar_id}-line-mid", - ) - ) - box_plot_group.add( - self.dwg.line( - (bar_x - bar_width // 2, bar_charts_bounds[3] - 200), - (bar_x + bar_width // 2, bar_charts_bounds[3] - 200), - stroke=COLORS["BLACK"], - stroke_width=2, - stroke_opacity=0.5, - stroke_dasharray=2, - id=f"{self.svg_id}-boxplot-{bar_id}-line-low", - ) - ) - box_plot_group.add( - self.dwg.line( - (bar_x - bar_width // 2, bar_charts_bounds[3]), - (bar_x + bar_width // 2, bar_charts_bounds[3]), - fill_opacity=0, - stroke=COLORS["BLACK"], - stroke_width=4, - stroke_opacity=1, - id=f"{self.svg_id}-bar-{bar_id}-mainline", - ) - ) - box_plot_group.add( - self.dwg.text( - "", - insert=(bar_x, bar_charts_bounds[3]), - font_size=20, - font_family="Arial", - font_weight="bold", - fill=COLORS["BLACK"], - fill_opacity=1, - text_anchor="middle", - alignment_baseline="central", - id=f"{self.svg_id}-bar-{bar_id}-label", - ) - ) + box_plot_group.add(self.dwg.line((bar_x-bar_width//2, bar_charts_bounds[1]+200), + (bar_x+bar_width//2, bar_charts_bounds[1]+200), + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=0.5, + stroke_dasharray=2, + id=f'{self.svg_id}-boxplot-{bar_id}-line-high')) + box_plot_group.add(self.dwg.line((bar_x-bar_width//2, (bar_charts_bounds[1]+bar_charts_bounds[3])//2), + (bar_x+bar_width//2, (bar_charts_bounds[1]+bar_charts_bounds[3])//2), + stroke=COLORS['BLACK'], + stroke_width=3, + stroke_opacity=0.8, + stroke_dasharray=5, + id=f'{self.svg_id}-boxplot-{bar_id}-line-mid')) + box_plot_group.add(self.dwg.line((bar_x-bar_width//2, bar_charts_bounds[3]-200), + (bar_x+bar_width//2, bar_charts_bounds[3]-200), + stroke=COLORS['BLACK'], + stroke_width=2, + stroke_opacity=0.5, + stroke_dasharray=2, + id=f'{self.svg_id}-boxplot-{bar_id}-line-low')) + box_plot_group.add(self.dwg.line((bar_x-bar_width//2, bar_charts_bounds[3]), + (bar_x+bar_width//2, bar_charts_bounds[3]), + fill_opacity=0, + stroke=COLORS['BLACK'], + stroke_width=4, + stroke_opacity=1, + id=f'{self.svg_id}-bar-{bar_id}-mainline')) + box_plot_group.add(self.dwg.text('', + insert=(bar_x, bar_charts_bounds[3]), + font_size=20, + font_family='Arial', + font_weight='bold', + fill=COLORS['BLACK'], + fill_opacity=1, + text_anchor='middle', + alignment_baseline='central', + id=f'{self.svg_id}-bar-{bar_id}-label')) self.dwg.add(box_plot_group) diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index 5b48068..1f4047d 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -19,15 +19,15 @@ def _get_minimol_from_path(model_path): fpdb.read_file(model_path) fpdb.import_minimol(minimol) except Exception as exception: - raise Exception("Failed to import model file") from exception + raise Exception('Failed to import model file') from exception return minimol def _get_minimol_seq_nums(minimol): - seq_nums = {} + seq_nums = { } for chain in minimol: chain_id = str(chain.id()).strip() - seq_nums[chain_id] = [] + seq_nums[chain_id] = [ ] for residue in chain: seq_num = int(residue.seqnum()) seq_nums[chain_id].append(seq_num) @@ -41,7 +41,7 @@ def _get_reflections_data(model_path, reflections_path, model_id=None, out_queue density_scores = reflections_handler.calculate_all_density_scores() reflections_data = (resolution, density_scores) if out_queue is not None: - out_queue.put(("reflections", model_id, reflections_data)) + out_queue.put(('reflections', model_id, reflections_data)) return reflections_data @@ -51,81 +51,67 @@ def _get_molprobity_data(model_path, seq_nums, model_id=None, out_queue=None): from mmtbx.command_line.molprobity import get_master_phil from mmtbx.validation.molprobity import molprobity, molprobity_flags except (ImportError, ModuleNotFoundError): - print( - "WARNING: Failed to import MolProbity; continuing without MolProbity analyses" - ) + print('WARNING: Failed to import MolProbity; continuing without MolProbity analyses') return try: cmdline = load_model_and_data( - args=[f'pdb.file_name="{model_path}"', "quiet=True"], + args=[ f'pdb.file_name="{model_path}"', 'quiet=True' ], master_phil=get_master_phil(), require_data=False, - process_pdb_file=True, - ) + process_pdb_file=True) validation = molprobity(model=cmdline.model) except Exception: - print( - "WARNING: Failed to run MolProbity; continuing without MolProbity analyses" - ) + print('WARNING: Failed to run MolProbity; continuing without MolProbity analyses') return - molprobity_data = {} - molprobity_data["model_wide"] = {} - molprobity_data["model_wide"]["summary"] = { - "cbeta_deviations": validation.cbetadev.n_outliers, - "clashscore": validation.clashscore(), - "ramachandran_outliers": validation.rama_outliers(), - "ramachandran_favoured": validation.rama_favored(), - "rms_bonds": validation.rms_bonds(), - "rms_angles": validation.rms_angles(), - "rotamer_outliers": validation.rota_outliers(), - "molprobity_score": validation.molprobity_score(), - } - - molprobity_data["model_wide"]["details"] = { - "clash": [], - "c-beta": [], - "nqh_flips": [], - "omega": [], - "ramachandran": [], - "rotamer": [], - } - - molprobity_results = { - "clash": validation.clashes.results, - "c-beta": validation.cbetadev.results, - "nqh_flips": validation.nqh_flips.results, - "omega": validation.omegalyze.results, - "ramachandran": validation.ramalyze.results, - "rotamer": validation.rotalyze.results, - } + molprobity_data = { } + molprobity_data['model_wide'] = { } + molprobity_data['model_wide']['summary'] = { 'cbeta_deviations' : validation.cbetadev.n_outliers, + 'clashscore' : validation.clashscore(), + 'ramachandran_outliers' : validation.rama_outliers(), + 'ramachandran_favoured' : validation.rama_favored(), + 'rms_bonds' : validation.rms_bonds(), + 'rms_angles' : validation.rms_angles(), + 'rotamer_outliers' : validation.rota_outliers(), + 'molprobity_score' : validation.molprobity_score() } + + molprobity_data['model_wide']['details'] = { 'clash' : [ ], + 'c-beta' : [ ], + 'nqh_flips' : [ ], + 'omega' : [ ], + 'ramachandran' : [ ], + 'rotamer' : [ ] } + + molprobity_results = { 'clash' : validation.clashes.results, + 'c-beta' : validation.cbetadev.results, + 'nqh_flips' : validation.nqh_flips.results, + 'omega' : validation.omegalyze.results, + 'ramachandran' : validation.ramalyze.results, + 'rotamer' : validation.rotalyze.results } for chain_id, chain_seq_nums in seq_nums.items(): - molprobity_data[chain_id] = {} + molprobity_data[chain_id] = { } for seq_num in chain_seq_nums: - molprobity_data[chain_id][seq_num] = { - category: None for category in molprobity_results - } - molprobity_data[chain_id][seq_num]["clash"] = 2 + molprobity_data[chain_id][seq_num] = { category : None for category in molprobity_results } + molprobity_data[chain_id][seq_num]['clash'] = 2 + for category, results in molprobity_results.items(): for result in results: - if category == "clash": + if category == 'clash': for atom in result.atoms_info: chain_id = atom.chain_id.strip() seq_num = int(atom.resseq.strip()) if molprobity_data[chain_id][seq_num][category] > 0: molprobity_data[chain_id][seq_num][category] -= 1 - details_line = [ - " ".join(a.id_str().split()) for a in result.atoms_info - ] + [result.overlap] - molprobity_data["model_wide"]["details"][category].append(details_line) + details_line = [ ' '.join(a.id_str().split()) for a in result.atoms_info ] + [ result.overlap ] + molprobity_data['model_wide']['details'][category].append(details_line) continue chain_id = result.chain_id.strip() seq_num = int(result.resseq.strip()) - if category in ("ramachandran", "rotamer"): + if category in ('ramachandran', 'rotamer'): if result.score < 0.3: molprobity_data[chain_id][seq_num][category] = 0 elif result.score < 2.0: @@ -139,67 +125,52 @@ def _get_molprobity_data(model_path, seq_nums, model_id=None, out_queue=None): molprobity_data[chain_id][seq_num][category] = 0 if result.outlier: - score = result.deviation if category == "c-beta" else result.score - details_line = [ - result.chain_id.strip(), - result.resid.strip(), - result.resname.strip(), - score, - ] - molprobity_data["model_wide"]["details"][category].append(details_line) + score = result.deviation if category == 'c-beta' else result.score + details_line = [ result.chain_id.strip(), result.resid.strip(), result.resname.strip(), score ] + molprobity_data['model_wide']['details'][category].append(details_line) if out_queue is not None: - out_queue.put(("molprobity", model_id, molprobity_data)) + out_queue.put(('molprobity', model_id, molprobity_data)) return molprobity_data -def _get_covariance_data( - model_path, - sequence_path, - distpred_path, - seq_nums, - distpred_format="rosettanpz", - map_align_exe="map_align", - dssp_exe="mkdssp", - model_id=None, - out_queue=None, -): +def _get_covariance_data(model_path, + sequence_path, + distpred_path, + seq_nums, + distpred_format='rosettanpz', + map_align_exe='map_align', + dssp_exe='mkdssp', + model_id=None, + out_queue=None): try: from Bio.PDB import PDBParser from Bio.PDB.DSSP import DSSP from conkit import applications, command_line, io, plot except (ImportError, ModuleNotFoundError): - print( - "WARNING: Failed to import Biopython; continuing without covariance analyses" - ) + print('WARNING: Failed to import Biopython; continuing without covariance analyses') return parser = PDBParser() - structure = parser.get_structure("structure", model_path)[0] - dssp = DSSP(structure, model_path, dssp=dssp_exe, acc_array="Wilke") - model = io.read(model_path, "pdb" if model_path.endswith(".pdb") else "mmcif").top + structure = parser.get_structure('structure', model_path)[0] + dssp = DSSP(structure, model_path, dssp=dssp_exe, acc_array='Wilke') + model = io.read(model_path, 'pdb' if model_path.endswith('.pdb') else 'mmcif').top prediction = io.read(distpred_path, distpred_format).top - sequence = io.read(sequence_path, "fasta").top - figure = plot.ModelValidationFigure( - model, prediction, sequence, dssp, map_align_exe=map_align_exe - ) + sequence = io.read(sequence_path, 'fasta').top + figure = plot.ModelValidationFigure(model, prediction, sequence, dssp, map_align_exe=map_align_exe) - covariance_data = {} + covariance_data = { } for chain_id, chain_seq_nums in seq_nums.items(): - covariance_data[chain_id] = {} + covariance_data[chain_id] = { } for seq_num in chain_seq_nums: # TODO: by chain - score = ( - figure.smooth_scores[seq_num] - if 0 < seq_num < len(figure.smooth_scores) - else None - ) + score = figure.smooth_scores[seq_num] if 0 < seq_num < len(figure.smooth_scores) else None alignment = 0 if seq_num in figure.alignment.keys() else 1 covariance_data[chain_id][seq_num] = (score, alignment) if out_queue is not None: - out_queue.put(("covariance", model_id, covariance_data)) + out_queue.put(('covariance', model_id, covariance_data)) return covariance_data @@ -208,22 +179,22 @@ def _get_tortoize_data(model_path, seq_nums, model_id=None, out_queue=None): rama_z_data = {chain_id: {} for chain_id in seq_nums.keys()} try: tortoize_process = subprocess.Popen( - f"tortoize {model_path}", shell=True, stdout=subprocess.PIPE - ) + f'tortoize {model_path}', + shell=True, + stdout=subprocess.PIPE) except Exception: - print("WARNING: Failed to run tortoize") + print('WARNING: Failed to run tortoize') return tortoize_output = tortoize_process.communicate()[0] tortoize_dict = json.loads(tortoize_output) residues = tortoize_dict["model"]["1"]["residues"] for res in residues: - rama_z_data[res["pdb"]["strandID"]][res["pdb"]["seqNum"]] = res["ramachandran"][ - "z-score" - ] + rama_z_data[res['pdb']['strandID']][res['pdb']['seqNum']] = res['ramachandran']['z-score'] if out_queue is not None: - out_queue.put(("rama_z", model_id, rama_z_data)) + out_queue.put(('rama_z', model_id, rama_z_data)) + return rama_z_data @@ -241,39 +212,26 @@ def metrics_model_series_from_files( ): try: if isinstance(model_paths, str): - model_paths = [model_paths] + model_paths = [ model_paths ] model_paths = tuple(model_paths) if model_paths[-1] is None: raise TypeError except TypeError as exception: - raise ValueError( - "Argument 'model_paths' should be an iterable of filenames" - ) from exception - - path_lists = [ - model_paths, - reflections_paths, - sequence_paths, - distpred_paths, - model_json_paths, - ] + raise ValueError('Argument \'model_paths\' should be an iterable of filenames') from exception + + path_lists = [ model_paths, reflections_paths, sequence_paths, distpred_paths ] for i in range(1, len(path_lists)): if path_lists[i] is None: - path_lists[i] = tuple([None for _ in model_paths]) - if len(path_lists[i]) != len(model_paths) or path_lists[i].count(None) not in ( - 0, - len(path_lists[i]), - ): - raise ValueError( - "Path arguments should be equal-length iterables of filenames" - ) - - # list_metric_names = ["molprobity", "rama_z", "map_fit", "backbone_fit"] - all_minimol_data = [] - all_covariance_data = [] - all_molprobity_data = [] - all_reflections_data = [] - all_rama_z_data = [] + path_lists[i] = tuple([ None for _ in model_paths ]) + if len(path_lists[i]) != len(model_paths) or \ + path_lists[i].count(None) not in (0, len(path_lists[i])): + raise ValueError('Path arguments should be equal-length iterables of filenames') + + all_minimol_data = [ ] + all_covariance_data = [ ] + all_molprobity_data = [ ] + all_reflections_data = [ ] + all_rama_z_data = [ ] all_bfactor_data = [] # if externally supplied num_queued = 0 results_queue = Queue() @@ -311,46 +269,40 @@ def metrics_model_series_from_files( bfactor_data = json_data["b_fact"] if run_covariance: if multiprocessing: - p = Process( - target=_get_covariance_data, - args=(model_path, sequence_path, distpred_path, seq_nums), - kwargs={"model_id": model_id, "out_queue": results_queue}, - ) + p = Process(target=_get_covariance_data, + args=(model_path, sequence_path, distpred_path, seq_nums), + kwargs={ 'model_id': model_id, + 'out_queue': results_queue }) p.start() num_queued += 1 else: - covariance_data = _get_covariance_data( - model_path, sequence_path, distpred_path - ) + covariance_data = _get_covariance_data(model_path, sequence_path, distpred_path) if run_molprobity: if multiprocessing: - p = Process( - target=_get_molprobity_data, - args=(model_path, seq_nums), - kwargs={"model_id": model_id, "out_queue": results_queue}, - ) + p = Process(target=_get_molprobity_data, + args=(model_path, seq_nums), + kwargs={ 'model_id': model_id, + 'out_queue': results_queue }) p.start() num_queued += 1 else: molprobity_data = _get_molprobity_data(model_path, seq_nums) if reflections_path is not None: if multiprocessing: - p = Process( - target=_get_reflections_data, - args=(model_path, reflections_path), - kwargs={"model_id": model_id, "out_queue": results_queue}, - ) + p = Process(target=_get_reflections_data, + args=(model_path, reflections_path), + kwargs={ 'model_id': model_id, + 'out_queue': results_queue }) p.start() num_queued += 1 else: reflections_data = _get_reflections_data(model_path, reflections_path) if calculate_rama_z: if multiprocessing: - p = Process( - target=_get_tortoize_data, - args=(model_path, seq_nums), - kwargs={"model_id": model_id, "out_queue": results_queue}, - ) + p = Process(target=_get_tortoize_data, + args=(model_path, seq_nums), + kwargs={ 'model_id': model_id, + 'out_queue': results_queue }) p.start() num_queued += 1 else: @@ -366,15 +318,15 @@ def metrics_model_series_from_files( if multiprocessing: for _ in range(num_queued): result_type, model_id, result = results_queue.get() - if result_type == "covariance": + if result_type == 'covariance': all_covariance_data[model_id] = result - if result_type == "molprobity": + if result_type == 'molprobity': all_molprobity_data[model_id] = result - if result_type == "reflections": + if result_type == 'reflections': all_reflections_data[model_id] = result - if result_type == "rama_z": + if result_type == 'rama_z': all_rama_z_data[model_id] = result - metrics_models = [] + metrics_models = [ ] for model_id, model_data in enumerate( zip( all_minimol_data, diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index b6644af..b596c4e 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -22,19 +22,13 @@ def __init__( self.rama_z_data = rama_z_data self._index = -1 - self.residues = [] + self.residues = [ ] self.length = len(mmol_chain) self.chain_id = str(mmol_chain.id().trim()) dict_ext_percentiles = {} # stores the percentiles supplied externally for residue_index, mmol_residue in enumerate(mmol_chain): - previous_residue = ( - mmol_chain[residue_index - 1] if residue_index > 0 else None - ) - next_residue = ( - mmol_chain[residue_index + 1] - if residue_index < len(mmol_chain) - 1 - else None - ) + previous_residue = mmol_chain[residue_index-1] if residue_index > 0 else None + next_residue = mmol_chain[residue_index+1] if residue_index < len(mmol_chain)-1 else None seq_num = int(mmol_residue.seqnum()) res_id = str(mmol_residue.id()).strip() # covariance @@ -122,19 +116,9 @@ def __init__( self.residues.append(residue) for residue_index, residue in enumerate(self.residues): - if ( - (0 < residue_index < len(self.residues) - 1) - and ( - self.residues[residue_index - 1].is_aa - and residue.is_aa - and self.residues[residue_index + 1].is_aa - ) - and ( - self.residues[residue_index - 1].sequence_number + 1 - == residue.sequence_number - == self.residues[residue_index + 1].sequence_number - 1 - ) - ): + if (0 < residue_index < len(self.residues)-1) and \ + (self.residues[residue_index-1].is_aa and residue.is_aa and self.residues[residue_index+1].is_aa) and \ + (self.residues[residue_index-1].sequence_number+1 == residue.sequence_number == self.residues[residue_index+1].sequence_number-1): residue.is_consecutive_aa = True else: residue.is_consecutive_aa = False @@ -143,35 +127,29 @@ def __iter__(self): return self def __next__(self): - if self._index < self.length - 1: + if self._index < self.length-1: self._index += 1 return self.residues[self._index] self._index = -1 raise StopIteration def get_residue(self, sequence_number): - return next( - residue - for residue in self.residues - if residue.sequence_number == sequence_number - ) + return next(residue for residue in self.residues if residue.sequence_number == sequence_number) def remove_residue(self, residue): if residue in self.residues: self.residues.remove(residue) self.length -= 1 else: - print("Error removing residue, no matching residue was found.") + print('Error removing residue, no matching residue was found.') def remove_non_aa_residues(self): - non_aa_residues = [residue for residue in self.residues if not residue.is_aa] + non_aa_residues = [ residue for residue in self.residues if not residue.is_aa ] for residue in non_aa_residues: self.remove_residue(residue) def b_factor_lists(self): - all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = [ - [] for _ in range(8) - ] + all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = [ [ ] for _ in range(8) ] for residue in self.residues: all_bfs.append(residue.avg_b_factor) if residue.is_aa: @@ -187,13 +165,4 @@ def b_factor_lists(self): ligand_bfs.append(residue.avg_b_factor) else: ion_bfs.append(residue.avg_b_factor) - return ( - all_bfs, - aa_bfs, - mc_bfs, - sc_bfs, - non_aa_bfs, - water_bfs, - ligand_bfs, - ion_bfs, - ) + return all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs diff --git a/iris_validation/metrics/model.py b/iris_validation/metrics/model.py index 5a0952d..892d9fd 100644 --- a/iris_validation/metrics/model.py +++ b/iris_validation/metrics/model.py @@ -31,18 +31,12 @@ def __init__( self.percentile_calculator = PercentileCalculator(self.resolution) self.rotamer_calculator = RotamerCalculator() - self.chains = [] + self.chains = [ ] for mmol_chain in mmol_model: chain_id = str(mmol_chain.id().trim()) - chain_covariance_data = ( - None if covariance_data is None else covariance_data[chain_id] - ) - chain_molprobity_data = ( - None if molprobity_data is None else molprobity_data[chain_id] - ) - chain_density_scores = ( - None if self.density_scores is None else self.density_scores[chain_id] - ) + chain_covariance_data = None if covariance_data is None else covariance_data[chain_id] + chain_molprobity_data = None if molprobity_data is None else molprobity_data[chain_id] + chain_density_scores = None if self.density_scores is None else self.density_scores[chain_id] chain_rama_z_data = None if rama_z_data is None else rama_z_data[chain_id] chain_bfactor_data = ( None if bfactor_data is None else bfactor_data[chain_id] @@ -65,7 +59,7 @@ def __iter__(self): return self def __next__(self): - if self._index < len(self.chains) - 1: + if self._index < len(self.chains)-1: self._index += 1 return self.chains[self._index] self._index = -1 @@ -75,66 +69,19 @@ def get_chain(self, chain_id): return next(chain for chain in self.chains if chain.chain_id == chain_id) def remove_chain(self, chain_id): - matching_chains = [chain for chain in self.chains if chain.chain_id == chain_id] + matching_chains = [ chain for chain in self.chains if chain.chain_id == chain_id ] if len(matching_chains) == 0: - print("Error removing chain, no chains matching that ID were found.") + print('Error removing chain, no chains matching that ID were found.') else: for chain in matching_chains: self.chains.remove(chain) self.chain_count -= 1 def b_factor_lists(self): - all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = ( - [], - [], - [], - [], - [], - [], - [], - [], - ) + all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs = [ ], [ ], [ ], [ ], [ ], [ ], [ ], [ ] for chain in self.chains: - ( - all_bfs_c, - aa_bfs_c, - mc_bfs_c, - sc_bfs_c, - non_aa_bfs_c, - water_bfs_c, - ligand_bfs_c, - ion_bfs_c, - ) = chain.b_factor_lists() - for model_li, chain_li in zip( - ( - all_bfs, - aa_bfs, - mc_bfs, - sc_bfs, - non_aa_bfs, - water_bfs, - ligand_bfs, - ion_bfs, - ), - ( - all_bfs_c, - aa_bfs_c, - mc_bfs_c, - sc_bfs_c, - non_aa_bfs_c, - water_bfs_c, - ligand_bfs_c, - ion_bfs_c, - ), - ): + all_bfs_c, aa_bfs_c, mc_bfs_c, sc_bfs_c, non_aa_bfs_c, water_bfs_c, ligand_bfs_c, ion_bfs_c = chain.b_factor_lists() + for model_li, chain_li in zip((all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs), + (all_bfs_c, aa_bfs_c, mc_bfs_c, sc_bfs_c, non_aa_bfs_c, water_bfs_c, ligand_bfs_c, ion_bfs_c)): model_li += chain_li - return ( - all_bfs, - aa_bfs, - mc_bfs, - sc_bfs, - non_aa_bfs, - water_bfs, - ligand_bfs, - ion_bfs, - ) + return all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs diff --git a/iris_validation/metrics/reflections.py b/iris_validation/metrics/reflections.py index 2ef107a..9a38cf1 100644 --- a/iris_validation/metrics/reflections.py +++ b/iris_validation/metrics/reflections.py @@ -21,9 +21,7 @@ def __init__(self, f_reflections=None, xmap=None, minimol=None): if f_reflections is None: if xmap is None: - raise ValueError( - "Either a reflections file path or an xmap object must be passed as an argument" - ) + raise ValueError('Either a reflections file path or an xmap object must be passed as an argument') try: self.grid = xmap.grid except AttributeError: @@ -31,16 +29,12 @@ def __init__(self, f_reflections=None, xmap=None, minimol=None): self.spacegroup = xmap.spacegroup self.cell = xmap.cell else: - extension = f_reflections.split(".")[-1].lower() - if extension != "mtz": - if extension == "cif": - raise ValueError( - "mmCIF format is not currently supported for reflections data." - ) + extension = f_reflections.split('.')[-1].lower() + if extension != 'mtz': + if extension == 'cif': + raise ValueError('mmCIF format is not currently supported for reflections data.') else: - raise ValueError( - f"Reflections file has unrecognised extension: {extension}" - ) + raise ValueError(f'Reflections file has unrecognised extension: {extension}') self._load_hkl_data() self._calculate_structure_factors() self._generate_xmap() @@ -51,30 +45,24 @@ def _load_hkl_data(self): mtzin.open_read(self.f_reflections) mtzin.import_hkl_info(self.hkl) - mtz_labels_and_types = [ - tuple(str(line).strip().split(" ")) for line in mtzin.column_labels() - ] + mtz_labels_and_types = [ tuple(str(line).strip().split(' ')) for line in mtzin.column_labels() ] mtz_column_labels, _ = zip(*mtz_labels_and_types) - mtz_column_label_suffixes = set( - [label.split("/")[-1] for label in mtz_column_labels] - ) + mtz_column_label_suffixes = set([ label.split('/')[-1] for label in mtz_column_labels ]) # TODO: need a better way to choose the right headers import_complete = False - for suffix_pair in (("F", "SIGF"), ("FP", "SIGFP"), ("FP_ALL", "SIGFP_ALL")): + for suffix_pair in ( ('F', 'SIGF'), + ('FP', 'SIGFP'), + ('FP_ALL', 'SIGFP_ALL') ): if len(mtz_column_label_suffixes & set(suffix_pair)) == 2: try: self.f_sigf = clipper.HKL_data_F_sigF_float(self.hkl) - mtzin.import_hkl_data( - self.f_sigf, "/*/*/[" + ",".join(suffix_pair) + "]" - ) + mtzin.import_hkl_data(self.f_sigf, '/*/*/[' + ','.join(suffix_pair) + ']') import_complete = True break except Exception as exception: - raise Exception( - "Failed to import HKL data from reflections file" - ) from exception + raise Exception('Failed to import HKL data from reflections file') from exception if not import_complete: - raise ValueError("Reflections file does not contain the required columns") + raise ValueError('Reflections file does not contain the required columns') mtzin.close_read() spacegroup = self.hkl.spacegroup() @@ -87,15 +75,11 @@ def _load_hkl_data(self): self.resolution_limit = resolution.limit() def _calculate_structure_factors(self, bulk_solvent=True): - # self.crystal = clipper.MTZcrystal() - # self.f_phi = clipper.HKL_data_F_phi_float(self.hkl, self.crystal) + #self.crystal = clipper.MTZcrystal() + #self.f_phi = clipper.HKL_data_F_phi_float(self.hkl, self.crystal) self.f_phi = clipper.HKL_data_F_phi_float(self.hkl) atoms = self.minimol.atom_list() - sf_calc = ( - clipper.SFcalc_obs_bulk_float - if bulk_solvent - else clipper.SFcalc_obs_base_float - ) + sf_calc = clipper.SFcalc_obs_bulk_float if bulk_solvent else clipper.SFcalc_obs_base_float sf_calc(self.f_phi, self.f_sigf, atoms) def _generate_xmap(self): @@ -125,17 +109,13 @@ def get_density_at_atom(self, mmol_atom): return self.get_density_at_point(xyz) def calculate_all_density_scores(self): - density_scores = {} + density_scores = { } for chain in self.minimol: chain_id = str(chain.id()).strip() - density_scores[chain_id] = {} + density_scores[chain_id] = { } for residue in chain: seq_num = int(residue.seqnum()) - all_atom_scores, mainchain_atom_scores, sidechain_atom_scores = ( - [], - [], - [], - ) + all_atom_scores, mainchain_atom_scores, sidechain_atom_scores = [ ], [ ], [ ] for atom_id, atom in enumerate(residue): is_mainchain = str(atom.name()).strip() in utils.MC_ATOM_NAMES element = str(atom.element()).strip() @@ -143,9 +123,7 @@ def calculate_all_density_scores(self): density = self.get_density_at_atom(atom) atom_score = None density_norm = density / atomic_number - atom_score = -log( - norm.cdf((density_norm - self.map_mean) / self.map_std) - ) + atom_score = -log(norm.cdf((density_norm - self.map_mean) / self.map_std)) all_atom_scores.append(atom_score) if is_mainchain: mainchain_atom_scores.append(atom_score) @@ -155,16 +133,8 @@ def calculate_all_density_scores(self): if len(all_atom_scores) > 0: all_score = sum(all_atom_scores) / len(all_atom_scores) if len(mainchain_atom_scores) > 0: - mainchain_score = sum(mainchain_atom_scores) / len( - mainchain_atom_scores - ) + mainchain_score = sum(mainchain_atom_scores) / len(mainchain_atom_scores) if len(sidechain_atom_scores) > 0: - sidechain_score = sum(sidechain_atom_scores) / len( - sidechain_atom_scores - ) - density_scores[chain_id][seq_num] = ( - all_score, - mainchain_score, - sidechain_score, - ) + sidechain_score = sum(sidechain_atom_scores) / len(sidechain_atom_scores) + density_scores[chain_id][seq_num] = (all_score, mainchain_score, sidechain_score) return density_scores diff --git a/iris_validation/metrics/residue.py b/iris_validation/metrics/residue.py index 865f5ed..595cff3 100644 --- a/iris_validation/metrics/residue.py +++ b/iris_validation/metrics/residue.py @@ -38,41 +38,20 @@ def __init__( self.code_type = utils.code_type(mmol_residue) self.backbone_atoms = utils.get_backbone_atoms(mmol_residue) self.backbone_atoms_are_correct = None not in self.backbone_atoms - self.backbone_geometry_is_correct = ( - utils.check_backbone_geometry(mmol_residue) - if self.backbone_atoms_are_correct - else None - ) + self.backbone_geometry_is_correct = utils.check_backbone_geometry(mmol_residue) if self.backbone_atoms_are_correct else None self.is_aa = utils.check_is_aa(mmol_residue) - self.is_water = str(mmol_residue.type()).strip() == "HOH" + self.is_water = str(mmol_residue.type()).strip() == 'HOH' self.is_consecutive_aa = None # B-factors - ( - self.max_b_factor, - self.avg_b_factor, - self.std_b_factor, - self.mc_b_factor, - self.sc_b_factor, - ) = utils.analyse_b_factors(mmol_residue, self.is_aa, self.backbone_atoms) + self.max_b_factor, self.avg_b_factor, self.std_b_factor, self.mc_b_factor, self.sc_b_factor = utils.analyse_b_factors(mmol_residue, self.is_aa, self.backbone_atoms) # override precalculated if bfact_score: self.avg_b_factor, self.std_b_factor = bfact_score # Backbone torsion angles - - self.phi = ( - clipper.MMonomer.protein_ramachandran_phi( - self.previous_residue, mmol_residue - ) - if self.previous_residue - else None - ) - self.psi = ( - clipper.MMonomer.protein_ramachandran_psi(mmol_residue, self.next_residue) - if self.next_residue - else None - ) + self.phi = clipper.MMonomer.protein_ramachandran_phi(self.previous_residue, mmol_residue) if self.previous_residue else None + self.psi = clipper.MMonomer.protein_ramachandran_psi(mmol_residue, self.next_residue) if self.next_residue else None if self.phi is not None and isnan(self.phi): self.phi = None if self.psi is not None and isnan(self.psi): @@ -83,26 +62,16 @@ def __init__( self.is_sidechain_complete = self.chis is not None and None not in self.chis # Ramachandran - self.ramachandran_score = utils.calculate_ramachandran_score( - mmol_residue, self.code, self.phi, self.psi - ) + self.ramachandran_score = utils.calculate_ramachandran_score(mmol_residue, self.code, self.phi, self.psi) self.ramachandran_flags = (None, None, None) if self.ramachandran_score is not None: if RAMACHANDRAN_THRESHOLDS[0] <= self.ramachandran_score: self.ramachandran_flags = (True, False, False) - elif ( - RAMACHANDRAN_THRESHOLDS[1] - <= self.ramachandran_score - < RAMACHANDRAN_THRESHOLDS[0] - ): + elif RAMACHANDRAN_THRESHOLDS[1] <= self.ramachandran_score < RAMACHANDRAN_THRESHOLDS[0]: self.ramachandran_flags = (False, True, False) elif self.ramachandran_score < RAMACHANDRAN_THRESHOLDS[1]: self.ramachandran_flags = (False, False, True) - ( - self.ramachandran_favoured, - self.ramachandran_allowed, - self.ramachandran_outlier, - ) = self.ramachandran_flags + self.ramachandran_favoured, self.ramachandran_allowed, self.ramachandran_outlier = self.ramachandran_flags # Rotamer rotamer_calculator = self.parent_chain.parent_model.rotamer_calculator @@ -117,59 +86,34 @@ def __init__( self.rotamer_flags = (False, True, False) elif rotamer_clf_id in (0, 1): self.rotamer_flags = (False, False, True) - ( - self.rotamer_favoured, - self.rotamer_allowed, - self.rotamer_outlier, - ) = self.rotamer_flags + self.rotamer_favoured, self.rotamer_allowed, self.rotamer_outlier = self.rotamer_flags # MolProbity data self.discrete_indicators = self.molprobity_data if self.molprobity_data is None: - ramachandran_indicator = ( - 0 - if self.ramachandran_outlier - else 1 - if self.ramachandran_allowed - else 2 - if self.ramachandran_favoured - else None - ) - rotamer_indicator = ( - 0 - if self.rotamer_outlier - else 1 - if self.rotamer_allowed - else 2 - if self.rotamer_favoured - else None - ) - self.discrete_indicators = { - "clash": None, - "c-beta": None, - "omega": None, - "ramachandran": ramachandran_indicator, - "rotamer": rotamer_indicator, - } + ramachandran_indicator = 0 if self.ramachandran_outlier else \ + 1 if self.ramachandran_allowed else \ + 2 if self.ramachandran_favoured else None + rotamer_indicator = 0 if self.rotamer_outlier else \ + 1 if self.rotamer_allowed else \ + 2 if self.rotamer_favoured else None + self.discrete_indicators = { 'clash' : None, + 'c-beta' : None, + 'omega' : None, + 'ramachandran' : ramachandran_indicator, + 'rotamer' : rotamer_indicator } # Covariance data self.covariance_score, self.cmo_string = None, None if self.covariance_data is not None: self.covariance_score, self.cmo_string = self.covariance_data - self.discrete_indicators["cmo"] = self.cmo_string + self.discrete_indicators['cmo'] = self.cmo_string # Density fit scores - self.fit_score, self.mainchain_fit_score, self.sidechain_fit_score = ( - None, - None, - None, - ) + self.fit_score, self.mainchain_fit_score, self.sidechain_fit_score = None, None, None if self.density_scores is not None: - ( - self.fit_score, - self.mainchain_fit_score, - self.sidechain_fit_score, - ) = self.density_scores + self.fit_score, self.mainchain_fit_score, self.sidechain_fit_score = self.density_scores + # Percentiles percentile_calculator = self.parent_chain.parent_model.percentile_calculator if "b-factor" in dict_ext_percentiles: diff --git a/iris_validation/metrics/series.py b/iris_validation/metrics/series.py index 11c65a5..5483401 100644 --- a/iris_validation/metrics/series.py +++ b/iris_validation/metrics/series.py @@ -1,4 +1,3 @@ -# import clipper from iris_validation import utils @@ -19,9 +18,7 @@ def align_models(self): if len(self.metrics_models) == 0: return if len(self.metrics_models) > 2: - raise Exception( - "Iris currently only supports alignment for two model iterations" - ) + raise Exception('Iris currently only supports alignment for two model iterations') # Check for and remove chains with no amino acid residues bad_chain_ids = set() @@ -30,20 +27,15 @@ def align_models(self): if chain.length == 0: bad_chain_ids.add(chain.chain_id) if len(bad_chain_ids) > 0: - print( - "WARNING: at least one chain contains no amino acid residues. Ignoring chains: " - + ", ".join(sorted(bad_chain_ids)) - ) + print('WARNING: at least one chain contains no amino acid residues. Ignoring chains: ' + ', '.join(sorted(bad_chain_ids))) for model in self.metrics_models: for chain_id in bad_chain_ids: model.remove_chain(chain_id) - if 0 in [model.chain_count for model in self.metrics_models]: - raise Exception("One or more models had no valid chains") + if 0 in [ model.chain_count for model in self.metrics_models ]: + raise Exception('One or more models had no valid chains') # Align chains - chain_id_sets = [ - set(chain.chain_id for chain in model) for model in self.metrics_models - ] + chain_id_sets = [ set(chain.chain_id for chain in model) for model in self.metrics_models ] common_chain_ids = set.intersection(*chain_id_sets) lost_chain_ids = set() for model, chain_id_set in zip(self.metrics_models, chain_id_sets): @@ -53,29 +45,22 @@ def align_models(self): for chain_id in model_lost_chain_ids: model.remove_chain(chain_id) if len(lost_chain_ids) > 0: - print( - f"WARNING: Some chains are not present or valid across all model versions ({sorted(lost_chain_ids)}). These chains will not be represented in the validation report." - ) + print(f'WARNING: Some chains are not present or valid across all model versions ({sorted(lost_chain_ids)}). These chains will not be represented in the validation report.') # Chain sets - self.chain_sets = {} + self.chain_sets = { } for chain_id in sorted(common_chain_ids): - self.chain_sets[chain_id] = [] + self.chain_sets[chain_id] = [ ] for model in self.metrics_models: - matching_chain = [ - chain for chain in model if chain.chain_id == chain_id - ][0] + matching_chain = [ chain for chain in model if chain.chain_id == chain_id ][0] self.chain_sets[chain_id].append(matching_chain) # Align residues - self.chain_alignments = {} + self.chain_alignments = { } for chain_id, chain_set in self.chain_sets.items(): - sequences = [ - utils.code_three_to_one([residue.code for residue in chain]) - for chain in chain_set - ] + sequences = [ utils.code_three_to_one([ residue.code for residue in chain ]) for chain in chain_set ] if len(sequences) == 1: - self.chain_alignments[chain_id] = (sequences[0],) + self.chain_alignments[chain_id] = (sequences[0], ) continue alignment_pair = utils.needleman_wunsch(sequences[-2], sequences[-1]) self.chain_alignments[chain_id] = alignment_pair @@ -91,38 +76,36 @@ def get_raw_data(self): has_rama_z = self.metrics_models[0].rama_z_data is not None has_rama_classification = not has_rama_z - raw_data = [] + raw_data = [ ] for chain_id, chain_set in self.chain_sets.items(): alignment_strings = self.chain_alignments[chain_id] aligned_length = len(alignment_strings[0]) - chain_data = { - "chain_id": chain_id, - "num_versions": num_versions, - "has_covariance": has_covariance, - "has_molprobity": has_molprobity, - "has_reflections": has_reflections, - "has_rama_z": has_rama_z, - "has_rama_classification": has_rama_classification, - "aligned_length": aligned_length, - "residue_seqnos": [], - "residue_codes": [], - "residue_validities": [], - "discrete_values": [], - "continuous_values": [], - "percentile_values": [], - } + chain_data = { 'chain_id' : chain_id, + 'num_versions' : num_versions, + 'has_covariance' : has_covariance, + 'has_molprobity' : has_molprobity, + 'has_reflections' : has_reflections, + 'has_rama_z' : has_rama_z, + 'has_rama_classification': has_rama_classification, + 'aligned_length' : aligned_length, + 'residue_seqnos' : [ ], + 'residue_codes' : [ ], + 'residue_validities' : [ ], + 'discrete_values' : [ ], + 'continuous_values' : [ ], + 'percentile_values' : [ ] } for alignment_string, chain in zip(alignment_strings, chain_set): - residue_seqnos = [] - residue_codes = [] - residue_validities = [] - discrete_values = [] - continuous_values = [] - percentile_values = [] + residue_seqnos = [ ] + residue_codes = [ ] + residue_validities = [ ] + discrete_values = [ ] + continuous_values = [ ] + percentile_values = [ ] residue_id = -1 for alignment_char in alignment_string: - if alignment_char == "-": + if alignment_char == '-': residue_seqnos.append(None) residue_codes.append(None) residue_validities.append(False) @@ -137,35 +120,27 @@ def get_raw_data(self): residue_codes.append(residue.code) residue_validities.append(True) - residue_discrete_values = ( - residue.discrete_indicators["rotamer"], - residue.discrete_indicators["ramachandran"], - residue.discrete_indicators["clash"], - residue.discrete_indicators["cmo"], - ) - residue_continuous_values = ( - residue.avg_b_factor, - residue.max_b_factor, - residue.std_b_factor, - residue.fit_score, - residue.mainchain_fit_score, - residue.sidechain_fit_score, - residue.covariance_score, - residue.rama_z, - ) - residue_percentile_values = ( - residue.avg_b_factor_percentile, - residue.max_b_factor_percentile, - residue.std_b_factor_percentile, - residue.fit_score_percentile, - residue.mainchain_fit_score_percentile, - residue.sidechain_fit_score_percentile, - residue.covariance_score_percentile, - ) - residue_continuous_values = tuple( - round(x, 3) if isinstance(x, float) else x - for x in residue_continuous_values - ) + residue_discrete_values = (residue.discrete_indicators['rotamer'], + residue.discrete_indicators['ramachandran'], + residue.discrete_indicators['clash'], + residue.discrete_indicators['cmo']) + residue_continuous_values = (residue.avg_b_factor, + residue.max_b_factor, + residue.std_b_factor, + residue.fit_score, + residue.mainchain_fit_score, + residue.sidechain_fit_score, + residue.covariance_score, + residue.rama_z) + residue_percentile_values = (residue.avg_b_factor_percentile, + residue.max_b_factor_percentile, + residue.std_b_factor_percentile, + residue.fit_score_percentile, + residue.mainchain_fit_score_percentile, + residue.sidechain_fit_score_percentile, + residue.covariance_score_percentile) + + residue_continuous_values = tuple(round(x, 3) if isinstance(x, float) else x for x in residue_continuous_values) discrete_values.append(residue_discrete_values) continuous_values.append(residue_continuous_values) percentile_values.append(residue_percentile_values) @@ -173,19 +148,16 @@ def get_raw_data(self): discrete_values = list(zip(*discrete_values)) continuous_values = list(zip(*continuous_values)) percentile_values = list(zip(*percentile_values)) - chain_data["residue_seqnos"].append(residue_seqnos) - chain_data["residue_codes"].append(residue_codes) - chain_data["residue_validities"].append(residue_validities) - chain_data["discrete_values"].append(discrete_values) - chain_data["continuous_values"].append(continuous_values) - chain_data["percentile_values"].append(percentile_values) - chain_data["discrete_values"] = list(zip(*chain_data["discrete_values"])) - chain_data["continuous_values"] = list( - zip(*chain_data["continuous_values"]) - ) - chain_data["percentile_values"] = list( - zip(*chain_data["percentile_values"]) - ) + chain_data['residue_seqnos'].append(residue_seqnos) + chain_data['residue_codes'].append(residue_codes) + chain_data['residue_validities'].append(residue_validities) + chain_data['discrete_values'].append(discrete_values) + chain_data['continuous_values'].append(continuous_values) + chain_data['percentile_values'].append(percentile_values) + + chain_data['discrete_values'] = list(zip(*chain_data['discrete_values'])) + chain_data['continuous_values'] = list(zip(*chain_data['continuous_values'])) + chain_data['percentile_values'] = list(zip(*chain_data['percentile_values'])) raw_data.append(chain_data) return raw_data diff --git a/iris_validation/utils.py b/iris_validation/utils.py index 054e7b4..da3b9a1 100644 --- a/iris_validation/utils.py +++ b/iris_validation/utils.py @@ -1,226 +1,71 @@ -from math import acos, atan2, degrees - -import clipper - - -THREE_LETTER_CODES = { - 0: [ - "ALA", - "GLY", - "VAL", - "LEU", - "ILE", - "PRO", - "PHE", - "TYR", - "TRP", - "SER", - "THR", - "CYS", - "MET", - "ASN", - "GLN", - "LYS", - "ARG", - "HIS", - "ASP", - "GLU", - ], - 1: ["MSE", "SEC"], - 2: ["UNK"], -} - -ONE_LETTER_CODES = { - "A": "ALA", - "C": "CYS", - "D": "ASP", - "E": "GLU", - "F": "PHE", - "G": "GLY", - "H": "HIS", - "I": "ILE", - "K": "LYS", - "L": "LEU", - "M": "MET", - "N": "ASN", - "P": "PRO", - "Q": "GLN", - "R": "ARG", - "S": "SER", - "T": "THR", - "U": "SEC", - "V": "VAL", - "W": "TRP", - "Y": "TYR", - "X": "UNK", -} - -CHI_ATOMS = [ - { - ("N", "CA", "CB", "CG"): ( - "ARG", - "ASN", - "ASP", - "GLN", - "GLU", - "HIS", - "LEU", - "LYS", - "MET", - "PHE", - "PRO", - "TRP", - "TYR", - "MSE", - ), - ("N", "CA", "CB", "CG1"): ("ILE", "VAL"), - ("N", "CA", "CB", "SG"): ("CYS"), - ("N", "CA", "CB", "SE"): ("SEC"), - ("N", "CA", "CB", "OG"): ("SER"), - ("N", "CA", "CB", "OG1"): ("THR"), - }, - { - ("CA", "CB", "CG", "CD"): ("ARG", "GLN", "GLU", "LYS", "PRO"), - ("CA", "CB", "CG", "CD1"): ("LEU", "PHE", "TRP", "TYR"), - ("CA", "CB", "CG", "OD1"): ("ASN", "ASP"), - ("CA", "CB", "CG", "ND1"): ("HIS"), - ("CA", "CB", "CG1", "CD1"): ("ILE"), - ("CA", "CB", "CG", "SD"): ("MET"), - ("CA", "CB", "CG", "SE"): ("MSE"), - }, - { - ("CB", "CG", "CD", "OE1"): ("GLN", "GLU"), - ("CB", "CG", "CD", "NE"): ("ARG"), - ("CB", "CG", "CD", "CE"): ("LYS"), - ("CB", "CG", "SD", "CE"): ("MET"), - ("CB", "CG", "SE", "CE"): ("MSE"), - }, - {("CG", "CD", "NE", "CZ"): ("ARG"), ("CG", "CD", "CE", "NZ"): ("LYS")}, - {("CD", "NE", "CZ", "NH1"): ("ARG")}, -] - -ATOMIC_NUMBERS = { - "H": 1, - "HE": 2, - "LI": 3, - "BE": 4, - "B": 5, - "C": 6, - "N": 7, - "O": 8, - "F": 9, - "NE": 10, - "NA": 11, - "MG": 12, - "AL": 13, - "SI": 14, - "P": 15, - "S": 16, - "CL": 17, - "AR": 18, - "K": 19, - "CA": 20, - "SC": 21, - "TI": 22, - "V": 23, - "CR": 24, - "MN": 25, - "FE": 26, - "CO": 27, - "NI": 28, - "CU": 29, - "ZN": 30, - "GA": 31, - "GE": 32, - "AS": 33, - "SE": 34, - "BR": 35, - "KR": 36, - "RB": 37, - "SR": 38, - "Y": 39, - "ZR": 40, - "NB": 41, - "MO": 42, - "TC": 43, - "RU": 44, - "RH": 45, - "PD": 46, - "AG": 47, - "CD": 48, - "IN": 49, - "SN": 50, - "SB": 51, - "TE": 52, - "I": 53, - "XE": 54, - "CS": 55, - "BA": 56, - "LA": 57, - "CE": 58, - "PR": 59, - "ND": 60, - "PM": 61, - "SM": 62, - "EU": 63, - "GD": 64, - "TB": 65, - "DY": 66, - "HO": 67, - "ER": 68, - "TM": 69, - "YB": 70, - "LU": 71, - "HF": 72, - "TA": 73, - "W": 74, - "RE": 75, - "OS": 76, - "IR": 77, - "PT": 78, - "AU": 79, - "HG": 80, - "TL": 81, - "PB": 82, - "BI": 83, - "PO": 84, - "AT": 85, - "RN": 86, - "FR": 87, - "RA": 88, - "AC": 89, - "TH": 90, - "PA": 91, - "U": 92, - "NP": 93, - "PU": 94, - "AM": 95, - "CM": 96, - "BK": 97, - "CF": 98, - "ES": 99, - "FM": 100, - "MD": 101, - "NO": 102, - "LR": 103, - "RF": 104, - "DB": 105, - "SG": 106, - "BH": 107, - "HS": 108, - "MT": 109, - "DS": 110, - "RG": 111, - "CN": 112, - "NH": 113, - "FL": 114, - "MC": 115, - "LV": 116, - "TS": 117, - "OG": 118, -} - -MC_ATOM_NAMES = set(["N", "CA" "C", "O", "CB"]) +THREE_LETTER_CODES = { 0 : [ 'ALA', 'GLY', 'VAL', 'LEU', 'ILE', 'PRO', 'PHE', 'TYR', 'TRP', 'SER', + 'THR', 'CYS', 'MET', 'ASN', 'GLN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU' ], + 1 : [ 'MSE', 'SEC' ], + 2 : [ 'UNK' ] } + +ONE_LETTER_CODES = { 'A' : 'ALA', + 'C' : 'CYS', + 'D' : 'ASP', + 'E' : 'GLU', + 'F' : 'PHE', + 'G' : 'GLY', + 'H' : 'HIS', + 'I' : 'ILE', + 'K' : 'LYS', + 'L' : 'LEU', + 'M' : 'MET', + 'N' : 'ASN', + 'P' : 'PRO', + 'Q' : 'GLN', + 'R' : 'ARG', + 'S' : 'SER', + 'T' : 'THR', + 'U' : 'SEC', + 'V' : 'VAL', + 'W' : 'TRP', + 'Y' : 'TYR', + 'X' : 'UNK' } + +CHI_ATOMS = [ { ('N', 'CA', 'CB', 'CG') : ('ARG', 'ASN', 'ASP', 'GLN', 'GLU', 'HIS', 'LEU', 'LYS', + 'MET', 'PHE', 'PRO', 'TRP', 'TYR', 'MSE'), + ('N', 'CA', 'CB', 'CG1') : ('ILE', 'VAL'), + ('N', 'CA', 'CB', 'SG') : ('CYS'), + ('N', 'CA', 'CB', 'SE') : ('SEC'), + ('N', 'CA', 'CB', 'OG') : ('SER'), + ('N', 'CA', 'CB', 'OG1') : ('THR') }, + { ('CA', 'CB', 'CG', 'CD') : ('ARG', 'GLN', 'GLU', 'LYS', 'PRO'), + ('CA', 'CB', 'CG', 'CD1') : ('LEU', 'PHE', 'TRP', 'TYR'), + ('CA', 'CB', 'CG', 'OD1') : ('ASN', 'ASP'), + ('CA', 'CB', 'CG', 'ND1') : ('HIS'), + ('CA', 'CB', 'CG1', 'CD1') : ('ILE'), + ('CA', 'CB', 'CG', 'SD') : ('MET'), + ('CA', 'CB', 'CG', 'SE') : ('MSE') }, + { ('CB', 'CG', 'CD', 'OE1') : ('GLN', 'GLU'), + ('CB', 'CG', 'CD', 'NE') : ('ARG'), + ('CB', 'CG', 'CD', 'CE') : ('LYS'), + ('CB', 'CG', 'SD', 'CE') : ('MET'), + ('CB', 'CG', 'SE', 'CE') : ('MSE') }, + { ('CG', 'CD', 'NE', 'CZ') : ('ARG'), + ('CG', 'CD', 'CE', 'NZ') : ('LYS') }, + { ('CD', 'NE', 'CZ', 'NH1') : ('ARG') } ] + +ATOMIC_NUMBERS = { 'H': 1, 'HE': 2, 'LI': 3, 'BE': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, + 'NE': 10, 'NA': 11, 'MG': 12, 'AL': 13, 'SI': 14, 'P': 15, 'S': 16, 'CL': 17, + 'AR': 18, 'K': 19, 'CA': 20, 'SC': 21, 'TI': 22, 'V': 23, 'CR': 24, 'MN': 25, + 'FE': 26, 'CO': 27, 'NI': 28, 'CU': 29, 'ZN': 30, 'GA': 31, 'GE': 32, 'AS': 33, + 'SE': 34, 'BR': 35, 'KR': 36, 'RB': 37, 'SR': 38, 'Y': 39, 'ZR': 40, 'NB': 41, + 'MO': 42, 'TC': 43, 'RU': 44, 'RH': 45, 'PD': 46, 'AG': 47, 'CD': 48, 'IN': 49, + 'SN': 50, 'SB': 51, 'TE': 52, 'I': 53, 'XE': 54, 'CS': 55, 'BA': 56, 'LA': 57, + 'CE': 58, 'PR': 59, 'ND': 60, 'PM': 61, 'SM': 62, 'EU': 63, 'GD': 64, 'TB': 65, + 'DY': 66, 'HO': 67, 'ER': 68, 'TM': 69, 'YB': 70, 'LU': 71, 'HF': 72, 'TA': 73, + 'W': 74, 'RE': 75, 'OS': 76, 'IR': 77, 'PT': 78, 'AU': 79, 'HG': 80, 'TL': 81, + 'PB': 82, 'BI': 83, 'PO': 84, 'AT': 85, 'RN': 86, 'FR': 87, 'RA': 88, 'AC': 89, + 'TH': 90, 'PA': 91, 'U': 92, 'NP': 93, 'PU': 94, 'AM': 95, 'CM': 96, 'BK': 97, + 'CF': 98, 'ES': 99, 'FM': 100, 'MD': 101, 'NO': 102, 'LR': 103, 'RF': 104, + 'DB': 105, 'SG': 106, 'BH': 107, 'HS': 108, 'MT': 109, 'DS': 110, 'RG': 111, + 'CN': 112, 'NH': 113, 'FL': 114, 'MC': 115, 'LV': 116, 'TS': 117, 'OG': 118 } + +MC_ATOM_NAMES = set([ 'N', 'CA' 'C', 'O', 'CB' ]) # General calculations @@ -236,18 +81,18 @@ def median(values): num_values = len(values) if num_values < 1: return None - i = num_values // 2 + i = num_values//2 if num_values % 2 == 1: return values[i] - return mean(values[i - 1 : i + 1]) + return mean(values[i-1:i+1]) # Matrix operations def avg_coord(*xyzs): num_args = len(xyzs) - x = sum([xyz[0] for xyz in xyzs]) / num_args - y = sum([xyz[1] for xyz in xyzs]) / num_args - z = sum([xyz[2] for xyz in xyzs]) / num_args + x = sum([ xyz[0] for xyz in xyzs ]) / num_args + y = sum([ xyz[1] for xyz in xyzs ]) / num_args + z = sum([ xyz[2] for xyz in xyzs ]) / num_args return (x, y, z) @@ -263,24 +108,22 @@ def dot_product(xyz1, xyz2): def cross_product(xyz1, xyz2): - return [ - xyz1[1] * xyz2[2] - xyz1[2] * xyz2[1], - xyz1[2] * xyz2[0] - xyz1[0] * xyz2[2], - xyz1[0] * xyz2[1] - xyz1[1] * xyz2[0], - ] + return [ xyz1[1] * xyz2[2] - xyz1[2] * xyz2[1], + xyz1[2] * xyz2[0] - xyz1[0] * xyz2[2], + xyz1[0] * xyz2[1] - xyz1[1] * xyz2[0] ] def magnitude(xyz): - return (xyz[0] ** 2 + xyz[1] ** 2 + xyz[2] ** 2) ** 0.5 + return (xyz[0]**2 + xyz[1]**2 + xyz[2]**2) ** 0.5 def unit(xyz): length = magnitude(xyz) - return [xyz[0] / length, xyz[1] / length, xyz[2] / length] + return [ xyz[0] / length, xyz[1] / length, xyz[2] / length ] def subtract(xyz1, xyz2): - return [xyz1[0] - xyz2[0], xyz1[1] - xyz2[1], xyz1[2] - xyz2[2]] + return [ xyz1[0] - xyz2[0], xyz1[1] - xyz2[1], xyz1[2] - xyz2[2] ] def distance(xyz1, xyz2): @@ -314,43 +157,43 @@ def torsion(xyz1, xyz2, xyz3, xyz4, range_positive=False): # General functions def code_three_to_one(three_letter_codes, strict=False, verbose=False): - one_letter_codes = "" + one_letter_codes = '' if isinstance(three_letter_codes, str): - three_letter_codes = [three_letter_codes] + three_letter_codes = [ three_letter_codes ] for tlc in three_letter_codes: if tlc in ONE_LETTER_CODES.values(): olc = next(k for k, v in ONE_LETTER_CODES.items() if v == tlc) one_letter_codes += olc - elif tlc == "MSE": + elif tlc == 'MSE': if strict: - print("Warning: MSE will become M") - one_letter_codes += "M" + print('Warning: MSE will become M') + one_letter_codes += 'M' else: - one_letter_codes += "M" + one_letter_codes += 'M' else: if verbose: - print("Three-letter code not recognised:", tlc) + print('Three-letter code not recognised:', tlc) if strict: if verbose: - print("Returning None") + print('Returning None') return - one_letter_codes += "X" + one_letter_codes += 'X' return one_letter_codes def code_one_to_three(one_letter_codes, strict=False, verbose=False): - three_letter_codes = [] + three_letter_codes = [ ] for olc in one_letter_codes: if olc in ONE_LETTER_CODES: three_letter_codes.append(ONE_LETTER_CODES[olc]) else: if verbose: - print("One-letter code not recognised:", olc) + print('One-letter code not recognised:', olc) if strict: if verbose: - print("Returning None") + print('Returning None') return - three_letter_codes.append("UNK") + three_letter_codes.append('UNK') return three_letter_codes @@ -358,61 +201,49 @@ def needleman_wunsch(seq1, seq2, match_award=1, mismatch_penalty=-1, gap_penalty n = len(seq1) m = len(seq2) - score = [[0 for _ in range(n + 1)] for _ in range(m + 1)] + score = [ [ 0 for _ in range(n+1) ] for _ in range(m+1) ] - for i in range(0, m + 1): + for i in range(0, m+1): score[i][0] = gap_penalty * i - for j in range(0, n + 1): + for j in range(0, n+1): score[0][j] = gap_penalty * j - for i in range(1, m + 1): - for j in range(1, n + 1): - match = score[i - 1][j - 1] + ( - match_award - if seq1[j - 1] == seq2[i - 1] - else gap_penalty - if "-" in (seq1[j - 1], seq2[i - 1]) - else mismatch_penalty - ) - delete = score[i - 1][j] + gap_penalty - insert = score[i][j - 1] + gap_penalty + for i in range(1, m+1): + for j in range(1, n+1): + match = score[i-1][j-1] + (match_award if seq1[j-1] == seq2[i-1] else gap_penalty if '-' in (seq1[j-1], seq2[i-1]) else mismatch_penalty) + delete = score[i-1][j] + gap_penalty + insert = score[i][j-1] + gap_penalty score[i][j] = max(match, delete, insert) - alignment1, alignment2 = "", "" + alignment1, alignment2 = '', '' i, j = m, n while i > 0 and j > 0: score_current = score[i][j] - score_diagonal = score[i - 1][j - 1] - score_up = score[i][j - 1] - score_left = score[i - 1][j] - - if score_current == score_diagonal + ( - match_award - if seq1[j - 1] == seq2[i - 1] - else gap_penalty - if "-" in (seq1[j - 1], seq2[i - 1]) - else mismatch_penalty - ): - alignment1 += seq1[j - 1] - alignment2 += seq2[i - 1] + score_diagonal = score[i-1][j-1] + score_up = score[i][j-1] + score_left = score[i-1][j] + + if score_current == score_diagonal + (match_award if seq1[j-1] == seq2[i-1] else gap_penalty if '-' in (seq1[j-1], seq2[i-1]) else mismatch_penalty): + alignment1 += seq1[j-1] + alignment2 += seq2[i-1] i -= 1 j -= 1 elif score_current == score_up + gap_penalty: - alignment1 += seq1[j - 1] - alignment2 += "-" + alignment1 += seq1[j-1] + alignment2 += '-' j -= 1 elif score_current == score_left + gap_penalty: - alignment1 += "-" - alignment2 += seq2[i - 1] + alignment1 += '-' + alignment2 += seq2[i-1] i -= 1 while j > 0: - alignment1 += seq1[j - 1] - alignment2 += "-" + alignment1 += seq1[j-1] + alignment2 += '-' j -= 1 while i > 0: - alignment1 += "-" - alignment2 += seq2[i - 1] + alignment1 += '-' + alignment2 += seq2[i-1] i -= 1 alignment1, alignment2 = alignment1[::-1], alignment2[::-1] @@ -422,41 +253,22 @@ def needleman_wunsch(seq1, seq2, match_award=1, mismatch_penalty=-1, gap_penalty # (MiniMol) residue functions def code_type(mmol_residue): try: - return next( - category - for category, group in THREE_LETTER_CODES.items() - if mmol_residue.type().trim() in group - ) + return next(category for category, group in THREE_LETTER_CODES.items() if mmol_residue.type().trim() in group) except StopIteration: return None def get_backbone_atoms(mmol_residue): try: - n = next( - atom - for atom in mmol_residue - if atom.id().trim().replace(" ", "") == "N" - or atom.id().trim().replace(" ", "") == "N:A" - ) + n = next(atom for atom in mmol_residue if atom.id().trim().replace(' ', '') == 'N' or atom.id().trim().replace(' ', '') == 'N:A') except StopIteration: n = None try: - ca = next( - atom - for atom in mmol_residue - if atom.id().trim().replace(" ", "") == "CA" - or atom.id().trim().replace(" ", "") == "CA:A" - ) + ca = next(atom for atom in mmol_residue if atom.id().trim().replace(' ', '') == 'CA' or atom.id().trim().replace(' ', '') == 'CA:A') except StopIteration: ca = None try: - c = next( - atom - for atom in mmol_residue - if atom.id().trim().replace(" ", "") == "C" - or atom.id().trim().replace(" ", "") == "C:A" - ) + c = next(atom for atom in mmol_residue if atom.id().trim().replace(' ', '') == 'C' or atom.id().trim().replace(' ', '') == 'C:A') except StopIteration: c = None return n, ca, c @@ -476,26 +288,19 @@ def check_backbone_geometry(mmol_residue): def calculate_chis(mmol_residue): - chis = [] + chis = [ ] for i in range(5): - chi_atoms = [] - has_chi = any( - mmol_residue.type().trim() in residues - for residues in list(CHI_ATOMS[i].values()) - ) + chi_atoms = [ ] + has_chi = any(mmol_residue.type().trim() in residues for residues in list(CHI_ATOMS[i].values())) if not has_chi: return chis - required_atom_names = next( - atoms - for atoms, residues in CHI_ATOMS[i].items() - if mmol_residue.type().trim() in residues - ) - missing_atom_names = [] + required_atom_names = next(atoms for atoms, residues in CHI_ATOMS[i].items() if mmol_residue.type().trim() in residues) + missing_atom_names = [ ] for required_atom_name in required_atom_names: found = False for atom in mmol_residue: - atom_name = atom.id().trim().replace(" ", "") - if atom_name in (required_atom_name, required_atom_name + ":A"): + atom_name = atom.id().trim().replace(' ', '') + if atom_name in (required_atom_name, required_atom_name + ':A'): chi_atoms.append(atom) found = True if not found: @@ -503,10 +308,7 @@ def calculate_chis(mmol_residue): if len(chi_atoms) < 4: chis.append(None) continue - xyzs = [ - (atom.coord_orth().x(), atom.coord_orth().y(), atom.coord_orth().z()) - for atom in chi_atoms - ] + xyzs = [ (atom.coord_orth().x(), atom.coord_orth().y(), atom.coord_orth().z()) for atom in chi_atoms ] chis.append(torsion(xyzs[0], xyzs[1], xyzs[2], xyzs[3])) return tuple(chis) @@ -517,8 +319,8 @@ def analyse_b_factors(mmol_residue, is_aa=None, backbone_atoms=None): if backbone_atoms is None: backbone_atoms = get_backbone_atoms(mmol_residue) if is_aa: - backbone_atom_ids = set([str(atom.id()).strip() for atom in backbone_atoms]) - residue_b_factors, mc_b_factors, sc_b_factors = [], [], [] + backbone_atom_ids = set([ str(atom.id()).strip() for atom in backbone_atoms ]) + residue_b_factors, mc_b_factors, sc_b_factors = [ ], [ ], [ ] for atom in mmol_residue: atom_id = str(atom.id()).strip() bf = clipper.Util_u2b(atom.u_iso()) @@ -530,9 +332,7 @@ def analyse_b_factors(mmol_residue, is_aa=None, backbone_atoms=None): sc_b_factors.append(bf) b_max = max(residue_b_factors) b_avg = mean(residue_b_factors) - b_stdev = ( - sum([(x - b_avg) ** 2 for x in residue_b_factors]) / len(residue_b_factors) - ) ** 0.5 + b_stdev = (sum([ (x - b_avg) ** 2 for x in residue_b_factors ]) / len(residue_b_factors)) ** 0.5 mc_b_avg = mean(mc_b_factors) if is_aa else None sc_b_avg = mean(sc_b_factors) if is_aa else None return b_max, b_avg, b_stdev, mc_b_avg, sc_b_avg @@ -540,11 +340,9 @@ def analyse_b_factors(mmol_residue, is_aa=None, backbone_atoms=None): def check_is_aa(mmol_residue, strict=False): allowed_types = (0,) if strict else (0, 1) - if ( - code_type(mmol_residue) in allowed_types - and None not in get_backbone_atoms(mmol_residue) - and check_backbone_geometry(mmol_residue) - ): + if code_type(mmol_residue) in allowed_types and \ + None not in get_backbone_atoms(mmol_residue) and \ + check_backbone_geometry(mmol_residue): return True return False @@ -552,19 +350,17 @@ def check_is_aa(mmol_residue, strict=False): def get_rama_calculator(mmol_residue, code=None): if code is None: code = mmol_residue.type().trim() - if code == "GLY": + if code == 'GLY': return clipper.Ramachandran(clipper.Ramachandran.Gly2) - elif code == "PRO": + elif code == 'PRO': return clipper.Ramachandran(clipper.Ramachandran.Pro2) - elif code in ("ILE", "VAL"): + elif code in ('ILE', 'VAL'): return clipper.Ramachandran(clipper.Ramachandran.IleVal2) else: return clipper.Ramachandran(clipper.Ramachandran.NoGPIVpreP2) -def get_ramachandran_allowed( - mmol_residue, code=None, phi=None, psi=None, thresholds=None -): +def get_ramachandran_allowed(mmol_residue, code=None, phi=None, psi=None, thresholds=None): if phi is None or psi is None: return None if code is None: @@ -575,9 +371,7 @@ def get_ramachandran_allowed( return rama_function.allowed(phi, psi) -def get_ramachandran_favoured( - mmol_residue, code=None, phi=None, psi=None, thresholds=None -): +def get_ramachandran_favoured(mmol_residue, code=None, phi=None, psi=None, thresholds=None): if phi is None or psi is None: return None if code is None: @@ -588,9 +382,7 @@ def get_ramachandran_favoured( return rama_function.favoured(phi, psi) -def get_ramachandran_classification( - mmol_residue, code=None, phi=None, psi=None, thresholds=None -): +def get_ramachandran_classification(mmol_residue, code=None, phi=None, psi=None, thresholds=None): if phi is None or psi is None: return None if code is None: From ae5790a4af67cfbb45ff8d8765213beaec708a76 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Tue, 20 Jun 2023 13:30:03 +0100 Subject: [PATCH 09/20] fix import statements --- iris_validation/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/iris_validation/utils.py b/iris_validation/utils.py index da3b9a1..d69fea6 100644 --- a/iris_validation/utils.py +++ b/iris_validation/utils.py @@ -1,3 +1,8 @@ +from math import acos, atan2, degrees + +import clipper + + THREE_LETTER_CODES = { 0 : [ 'ALA', 'GLY', 'VAL', 'LEU', 'ILE', 'PRO', 'PHE', 'TYR', 'TRP', 'SER', 'THR', 'CYS', 'MET', 'ASN', 'GLN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU' ], 1 : [ 'MSE', 'SEC' ], From d1686bdf5ebe25e3ecec4ccced19b0556e3a1fd8 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Tue, 20 Jun 2023 15:42:47 +0100 Subject: [PATCH 10/20] bug fixes --- iris_validation/graphics/residue.py | 2 +- iris_validation/metrics/__init__.py | 7 ++++--- iris_validation/metrics/residue.py | 14 +++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/iris_validation/graphics/residue.py b/iris_validation/graphics/residue.py index 472f987..6d9dd21 100644 --- a/iris_validation/graphics/residue.py +++ b/iris_validation/graphics/residue.py @@ -20,7 +20,7 @@ def __init__( if ResidueViewBars_inp: self.residue_view_bars = ResidueViewBars_inp self.box_names = [ metric['short_name'] for metric in RESIDUE_VIEW_BOXES ] - self.bar_names = [ metric['long_name'] for metric in RESIDUE_VIEW_BARS ] + self.bar_names = [ metric['long_name'] for metric in self.residue_view_bars ] # TODO: allow any number of bars self.bar_names = self.bar_names[:2] diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index a70948b..3221130 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -224,7 +224,7 @@ def metrics_model_series_from_files( except TypeError as exception: raise ValueError('Argument \'model_paths\' should be an iterable of filenames') from exception - path_lists = [ model_paths, reflections_paths, sequence_paths, distpred_paths ] + path_lists = [ model_paths, reflections_paths, sequence_paths, distpred_paths, model_json_paths ] for i in range(1, len(path_lists)): if path_lists[i] is None: path_lists[i] = tuple([ None for _ in model_paths ]) @@ -266,8 +266,9 @@ def metrics_model_series_from_files( for metric in json_data: if metric == "molprobity": molprobity_data = json_data["molprobity"] - if metric == "rama_z": - rama_z_data = json_data["rama_z"] + if metric == "tortoize": + tortoize_data = json_data["tortoize"] + calculate_tortoize = False if metric == "map_fit": reflections_data = json_data["map_fit"] if metric == "b_fact": diff --git a/iris_validation/metrics/residue.py b/iris_validation/metrics/residue.py index a9786a9..29cc92a 100644 --- a/iris_validation/metrics/residue.py +++ b/iris_validation/metrics/residue.py @@ -30,9 +30,17 @@ def __init__( self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.density_scores = density_scores - self.rama_z = None if not tortoize_scores else tortoize_scores.rama_z - self.rota_z = None if not tortoize_scores else tortoize_scores.rota_z - + self.rama_z = None + self.rota_z = None + if tortoize_scores: + if hasattr(tortoize_scores,"rama_z"): + self.rama_z = tortoize_scores.rama_z + elif "rama_z" in tortoize_scores: + self.rama_z = tortoize_scores["rama_z"] + if hasattr(tortoize_scores,"rota_z"): + self.rota_z = tortoize_scores.rota_z + elif "rota_z" in tortoize_scores: + self.rota_z = tortoize_scores["rota_z"] self.atoms = list(mmol_residue) self.sequence_number = int(mmol_residue.seqnum()) self.code = mmol_residue.type().trim() From 43c9adde1156fcfc7512ea152eca9605ea3a3b8f Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Tue, 20 Jun 2023 15:53:44 +0100 Subject: [PATCH 11/20] allow empty list for discrete_metric display --- iris_validation/graphics/panel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index 4a6472a..5a860d5 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -343,7 +343,7 @@ def get_chain_view_rings( ): chain_view = [] # add discrete types first - if discrete_metrics_to_display: + if not discrete_metrics_to_display is None: for metric_name in discrete_metrics_to_display: for metric_info in DISCRETE_METRICS: if metric_info["short_name"] == metric_name: From 3aa4800346123293eefdc5aa27a8ef7b83565495 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Wed, 21 Jun 2023 11:58:45 +0100 Subject: [PATCH 12/20] fix bug with number of chain views --- iris_validation/graphics/chain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iris_validation/graphics/chain.py b/iris_validation/graphics/chain.py index 881e6ad..53bc591 100644 --- a/iris_validation/graphics/chain.py +++ b/iris_validation/graphics/chain.py @@ -26,7 +26,7 @@ def __init__( self.dwg = None self.cfa_cache = { } - self.num_rings = len(CHAIN_VIEW_RINGS) + self.num_rings = len(self.chain_view_rings) self.num_versions = self.data['num_versions'] self.num_segments = self.data['aligned_length'] self.center = (self.canvas_size[0] // 2, self.canvas_size[1] // 2) From 4f06529e3d119324c44781c02a742b197601cf98 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Fri, 21 Jul 2023 09:05:19 +0100 Subject: [PATCH 13/20] Revert "Merge remote-tracking branch 'origin/replace-rotamer-classification-with-tortoize-z' into Make-it-possible-to-pass-raw-values-#10" This reverts commit 0da2dea39306c415673aa41b121257d6b79cabdc, reversing changes made to ae5790a4af67cfbb45ff8d8765213beaec708a76. --- iris_validation/__init__.py | 4 +- iris_validation/_defs.py | 89 ++++++++--------------------- iris_validation/graphics/panel.py | 8 +-- iris_validation/metrics/__init__.py | 35 +++++------- iris_validation/metrics/chain.py | 16 +++--- iris_validation/metrics/model.py | 8 +-- iris_validation/metrics/residue.py | 15 +---- iris_validation/metrics/series.py | 9 +-- 8 files changed, 64 insertions(+), 120 deletions(-) diff --git a/iris_validation/__init__.py b/iris_validation/__init__.py index ab17bef..b1d0987 100644 --- a/iris_validation/__init__.py +++ b/iris_validation/__init__.py @@ -15,7 +15,7 @@ def generate_report( previous_distpred_path=None, run_covariance=False, run_molprobity=False, - calculate_tortoize=True, + calculate_rama_z=False, multiprocessing=True, latest_model_metrics_json=None, previous_model_metrics_json=None, @@ -42,7 +42,7 @@ def generate_report( distpred_paths, run_covariance, run_molprobity, - calculate_tortoize, + calculate_rama_z, model_json_paths, data_with_percentiles, multiprocessing, diff --git a/iris_validation/_defs.py b/iris_validation/_defs.py index 31da21a..c17c1a4 100644 --- a/iris_validation/_defs.py +++ b/iris_validation/_defs.py @@ -31,10 +31,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 1, 'type' : 'continuous', @@ -45,10 +43,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 2, 'type' : 'continuous', @@ -59,10 +55,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 3, 'type' : 'continuous', @@ -73,10 +67,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': True, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 4, 'type' : 'continuous', @@ -87,10 +79,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': True, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 5, 'type' : 'continuous', @@ -101,10 +91,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': True, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 6, 'type' : 'continuous', @@ -115,10 +103,8 @@ 'is_covariance' : True, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 7, 'type' : 'continuous', @@ -129,24 +115,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : True, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False - }, - { 'id' : 8, - 'type' : 'continuous', - 'long_name' : 'Rotamer z-score', - 'short_name' : 'Rota Z', - 'ring_color' : COLORS['GREY'], - 'polarity' : 1, - 'is_covariance' : False, - 'is_molprobity' : False, - 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : True, - 'is_rota_classification': False + 'is_rama_z': True, + 'is_rama_classification': False } ) @@ -164,10 +134,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': True + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 1, 'type' : 'discrete', @@ -183,10 +151,8 @@ 'is_covariance' : False, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': True, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': True }, { 'id' : 2, 'type' : 'discrete', @@ -202,10 +168,8 @@ 'is_covariance' : False, 'is_molprobity' : True, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False }, { 'id' : 3, 'type' : 'discrete', @@ -219,10 +183,8 @@ 'is_covariance' : True, 'is_molprobity' : False, 'is_reflections': False, - 'is_rama_z' : False, - 'is_rama_classification': False, - 'is_rota_z' : False, - 'is_rota_classification': False + 'is_rama_z': False, + 'is_rama_classification': False } ) @@ -238,8 +200,7 @@ CONTINUOUS_METRICS[4], CONTINUOUS_METRICS[5], CONTINUOUS_METRICS[6], - CONTINUOUS_METRICS[7], - CONTINUOUS_METRICS[8] ] + CONTINUOUS_METRICS[7] ] RESIDUE_VIEW_BOXES = [ DISCRETE_METRICS[0], DISCRETE_METRICS[1], diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index 5a860d5..1786c9d 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -84,13 +84,9 @@ def _verify_chosen_metrics(self): del metric_list[metric_index] elif (metric_list[metric_index]['is_reflections'] and not self.data[0]['has_reflections']): del metric_list[metric_index] - elif (metric_list[metric_index]['is_rama_z'] and not self.data[0]['has_tortoize']): + elif (metric_list[metric_index]['is_rama_z'] and not self.data[0]['has_rama_z']): del metric_list[metric_index] - elif (metric_list[metric_index]['is_rama_classification'] and self.data[0]['has_tortoize']): - del metric_list[metric_index] - elif (metric_list[metric_index]['is_rota_z'] and not self.data[0]['has_tortoize']): - del metric_list[metric_index] - elif (metric_list[metric_index]['is_rota_classification'] and self.data[0]['has_tortoize']): + elif (metric_list[metric_index]['is_rama_classification'] and self.data[0]['has_rama_z']): del metric_list[metric_index] def _generate_javascript(self): diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index 3221130..a49e016 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -1,6 +1,5 @@ from multiprocessing import Process, Queue -import collections import subprocess import json import clipper @@ -176,9 +175,8 @@ def _get_covariance_data(model_path, return covariance_data -def _get_tortoize_data(model_path, model_id=None, out_queue=None): - tortoize_datum = collections.namedtuple('tortoize_datum', ['rama_z', 'rota_z']) - tortoize_data = collections.defaultdict(tortoize_datum) +def _get_tortoize_data(model_path, seq_nums, model_id=None, out_queue=None): + rama_z_data = {chain_id: {} for chain_id in seq_nums.keys()} try: tortoize_process = subprocess.Popen( f'tortoize {model_path}', @@ -192,15 +190,12 @@ def _get_tortoize_data(model_path, model_id=None, out_queue=None): tortoize_dict = json.loads(tortoize_output) residues = tortoize_dict["model"]["1"]["residues"] for res in residues: - chain_tortoize_data = tortoize_data.setdefault(res['pdb']['strandID'], {}) - chain_tortoize_data[res['pdb']['seqNum']] = tortoize_datum( - rama_z=res['ramachandran']['z-score'], - rota_z=None if ('torsion' not in res or res['torsion']['z-score'] > 3) else res['torsion']['z-score']) - if out_queue is not None: - out_queue.put(('tortoize', model_id, tortoize_data)) + rama_z_data[res['pdb']['strandID']][res['pdb']['seqNum']] = res['ramachandran']['z-score'] - return tortoize_data + if out_queue is not None: + out_queue.put(('rama_z', model_id, rama_z_data)) + return rama_z_data def metrics_model_series_from_files( @@ -210,7 +205,7 @@ def metrics_model_series_from_files( distpred_paths=None, run_covariance=False, run_molprobity=False, - calculate_tortoize=True, + calculate_rama_z=False, model_json_paths=None, data_with_percentiles=None, multiprocessing=True, @@ -236,7 +231,7 @@ def metrics_model_series_from_files( all_covariance_data = [ ] all_molprobity_data = [ ] all_reflections_data = [ ] - all_tortoize_data = [ ] + all_rama_z_data = [ ] all_bfactor_data = [] # if externally supplied num_queued = 0 results_queue = Queue() @@ -256,7 +251,7 @@ def metrics_model_series_from_files( covariance_data = None molprobity_data = None reflections_data = None - tortoize_data = None + rama_z_data = None bfactor_data = None if json_data_path: @@ -303,7 +298,7 @@ def metrics_model_series_from_files( num_queued += 1 else: reflections_data = _get_reflections_data(model_path, reflections_path) - if calculate_tortoize: + if calculate_rama_z: if multiprocessing: p = Process(target=_get_tortoize_data, args=(model_path, seq_nums), @@ -312,13 +307,13 @@ def metrics_model_series_from_files( p.start() num_queued += 1 else: - tortoize_data = _get_tortoize_data(model_path) + rama_z_data = _get_tortoize_data(model_path, seq_nums) all_minimol_data.append(minimol) all_covariance_data.append(covariance_data) all_molprobity_data.append(molprobity_data) all_reflections_data.append(reflections_data) - all_tortoize_data.append(tortoize_data) + all_rama_z_data.append(rama_z_data) all_bfactor_data.append(bfactor_data) if multiprocessing: @@ -330,8 +325,8 @@ def metrics_model_series_from_files( all_molprobity_data[model_id] = result if result_type == 'reflections': all_reflections_data[model_id] = result - if result_type == 'tortoize': - all_tortoize_data[model_id] = result + if result_type == 'rama_z': + all_rama_z_data[model_id] = result metrics_models = [ ] for model_id, model_data in enumerate( zip( @@ -339,7 +334,7 @@ def metrics_model_series_from_files( all_covariance_data, all_molprobity_data, all_reflections_data, - all_tortoize_data, + all_rama_z_data, all_bfactor_data, ) ): diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index 7ff9d38..b596c4e 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -9,7 +9,7 @@ def __init__( covariance_data=None, molprobity_data=None, density_scores=None, - tortoize_data=None, + rama_z_data=None, bfactor_data=None, check_resnum=False, data_with_percentiles=None, @@ -19,7 +19,7 @@ def __init__( self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.density_scores = density_scores - self.tortoize_data = tortoize_data + self.rama_z_data = rama_z_data self._index = -1 self.residues = [ ] @@ -72,16 +72,16 @@ def __init__( else: residue_density_scores = density_scores[seq_num] # rama_z - if tortoize_data is None: - residue_tortoize_scores = None + if rama_z_data is None: + residue_rama_z_score = None else: if check_resnum: try: - residue_tortoize_scores = tortoize_data[res_id] + residue_rama_z_score = rama_z_data[res_id] except KeyError: - residue_tortoize_scores = None + residue_rama_z_score = None else: - residue_tortoize_scores = tortoize_data.get(seq_num, None) + residue_rama_z_score = rama_z_data.get(seq_num, None) # ext b-factor if bfactor_data is None: residue_bfact_score = None @@ -109,7 +109,7 @@ def __init__( residue_covariance_data, residue_molprobity_data, residue_density_scores, - residue_tortoize_scores, + residue_rama_z_score, residue_bfact_score, dict_ext_percentiles, ) diff --git a/iris_validation/metrics/model.py b/iris_validation/metrics/model.py index e65791d..892d9fd 100644 --- a/iris_validation/metrics/model.py +++ b/iris_validation/metrics/model.py @@ -10,7 +10,7 @@ def __init__( covariance_data=None, molprobity_data=None, reflections_data=None, - tortoize_data=None, + rama_z_data=None, bfactor_data=None, check_resnum=False, data_with_percentiles=None, @@ -19,7 +19,7 @@ def __init__( self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.reflections_data = reflections_data - self.tortoize_data = tortoize_data + self.rama_z_data = rama_z_data self._index = -1 self.minimol_chains = list(mmol_model.model()) @@ -37,7 +37,7 @@ def __init__( chain_covariance_data = None if covariance_data is None else covariance_data[chain_id] chain_molprobity_data = None if molprobity_data is None else molprobity_data[chain_id] chain_density_scores = None if self.density_scores is None else self.density_scores[chain_id] - chain_tortoize_data = None if tortoize_data is None else tortoize_data[chain_id] + chain_rama_z_data = None if rama_z_data is None else rama_z_data[chain_id] chain_bfactor_data = ( None if bfactor_data is None else bfactor_data[chain_id] ) @@ -47,7 +47,7 @@ def __init__( chain_covariance_data, chain_molprobity_data, chain_density_scores, - chain_tortoize_data, + chain_rama_z_data, chain_bfactor_data, check_resnum=check_resnum, data_with_percentiles=data_with_percentiles, diff --git a/iris_validation/metrics/residue.py b/iris_validation/metrics/residue.py index 29cc92a..595cff3 100644 --- a/iris_validation/metrics/residue.py +++ b/iris_validation/metrics/residue.py @@ -17,7 +17,7 @@ def __init__( covariance_data=None, molprobity_data=None, density_scores=None, - tortoize_scores=None, + rama_z_score=None, bfact_score=None, dict_ext_percentiles=None, ): @@ -30,17 +30,8 @@ def __init__( self.covariance_data = covariance_data self.molprobity_data = molprobity_data self.density_scores = density_scores - self.rama_z = None - self.rota_z = None - if tortoize_scores: - if hasattr(tortoize_scores,"rama_z"): - self.rama_z = tortoize_scores.rama_z - elif "rama_z" in tortoize_scores: - self.rama_z = tortoize_scores["rama_z"] - if hasattr(tortoize_scores,"rota_z"): - self.rota_z = tortoize_scores.rota_z - elif "rota_z" in tortoize_scores: - self.rota_z = tortoize_scores["rota_z"] + self.rama_z = rama_z_score + self.atoms = list(mmol_residue) self.sequence_number = int(mmol_residue.seqnum()) self.code = mmol_residue.type().trim() diff --git a/iris_validation/metrics/series.py b/iris_validation/metrics/series.py index b71dece..5483401 100644 --- a/iris_validation/metrics/series.py +++ b/iris_validation/metrics/series.py @@ -73,7 +73,8 @@ def get_raw_data(self): has_covariance = self.metrics_models[0].covariance_data is not None has_molprobity = self.metrics_models[0].molprobity_data is not None has_reflections = self.metrics_models[0].reflections_data is not None - has_tortoize = self.metrics_models[0].tortoize_data is not None + has_rama_z = self.metrics_models[0].rama_z_data is not None + has_rama_classification = not has_rama_z raw_data = [ ] for chain_id, chain_set in self.chain_sets.items(): @@ -84,7 +85,8 @@ def get_raw_data(self): 'has_covariance' : has_covariance, 'has_molprobity' : has_molprobity, 'has_reflections' : has_reflections, - 'has_tortoize' : has_tortoize, + 'has_rama_z' : has_rama_z, + 'has_rama_classification': has_rama_classification, 'aligned_length' : aligned_length, 'residue_seqnos' : [ ], 'residue_codes' : [ ], @@ -129,8 +131,7 @@ def get_raw_data(self): residue.mainchain_fit_score, residue.sidechain_fit_score, residue.covariance_score, - residue.rama_z, - residue.rota_z) + residue.rama_z) residue_percentile_values = (residue.avg_b_factor_percentile, residue.max_b_factor_percentile, residue.std_b_factor_percentile, From 671547277a3c69772cac8ac8a1ecd1db860e083d Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Fri, 21 Jul 2023 10:11:16 +0100 Subject: [PATCH 14/20] fix rama_z scores --- iris_validation/metrics/__init__.py | 4 ++-- iris_validation/metrics/chain.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index a49e016..c25b68e 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -262,8 +262,8 @@ def metrics_model_series_from_files( if metric == "molprobity": molprobity_data = json_data["molprobity"] if metric == "tortoize": - tortoize_data = json_data["tortoize"] - calculate_tortoize = False + rama_z_data = json_data["tortoize"] + calculate_rama_z = False if metric == "map_fit": reflections_data = json_data["map_fit"] if metric == "b_fact": diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index b596c4e..a563400 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -77,11 +77,14 @@ def __init__( else: if check_resnum: try: - residue_rama_z_score = rama_z_data[res_id] + residue_rama_z_score = rama_z_data[res_id]["rama_z"] except KeyError: residue_rama_z_score = None else: - residue_rama_z_score = rama_z_data.get(seq_num, None) + try: + residue_rama_z_score = rama_z_data[seq_num]["rama_z"] + except KeyError: + residue_rama_z_score = None # ext b-factor if bfactor_data is None: residue_bfact_score = None From adb4f05b6bd1d6fc6a442edd66a834b4376e3e1d Mon Sep 17 00:00:00 2001 From: aj26git Date: Mon, 24 Jul 2023 10:39:49 +0100 Subject: [PATCH 15/20] Update iris_validation/graphics/panel.py restructure functions to get `continuous_metrics_to_display`, `discrete_metrics_to_display` Co-authored-by: AliDariusKhan --- iris_validation/graphics/panel.py | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index 1786c9d..47d0685 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -334,6 +334,41 @@ def _draw(self): self.residue_view.attribs['viewBox'] = f'{width_buffer} {height_buffer} {viewbox_width} {viewbox_height}' self.dwg.add(self.residue_view) + def _add_metrics(self, metrics_to_display, metrics_source): + view = [] + for metric_name in metrics_to_display: + for metric_info in metrics_source: + if metric_info["short_name"] == metric_name: + view.append(metric_info) + break + return view + + def get_chain_view_rings( + self, + continuous_metrics_to_display, + discrete_metrics_to_display=None + ): + chain_view = [] + + # add discrete types first + if discrete_metrics_to_display: + chain_view.extend( + self._add_metrics(discrete_metrics_to_display, DISCRETE_METRICS) + ) + else: + chain_view.extend( + self._add_metrics([m["short_name"] for m in CHAIN_VIEW_RINGS if m["type"] == "discrete"], DISCRETE_METRICS) + ) + + chain_view.extend( + self._add_metrics(continuous_metrics_to_display, CONTINUOUS_METRICS) + ) + + return chain_view + + def get_residue_view_bars(self, residue_bars_to_display): + return self._add_metrics(residue_bars_to_display, CONTINUOUS_METRICS) + def get_chain_view_rings( self, continuous_metrics_to_display, discrete_metrics_to_display=None ): From 24be9520aa35257fdee9f24e9aa5b89eb1fa1191 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Mon, 24 Jul 2023 10:45:28 +0100 Subject: [PATCH 16/20] fix panels.py (duplicate functions) --- iris_validation/graphics/panel.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/iris_validation/graphics/panel.py b/iris_validation/graphics/panel.py index 47d0685..38aefa7 100644 --- a/iris_validation/graphics/panel.py +++ b/iris_validation/graphics/panel.py @@ -368,34 +368,3 @@ def get_chain_view_rings( def get_residue_view_bars(self, residue_bars_to_display): return self._add_metrics(residue_bars_to_display, CONTINUOUS_METRICS) - - def get_chain_view_rings( - self, continuous_metrics_to_display, discrete_metrics_to_display=None - ): - chain_view = [] - # add discrete types first - if not discrete_metrics_to_display is None: - for metric_name in discrete_metrics_to_display: - for metric_info in DISCRETE_METRICS: - if metric_info["short_name"] == metric_name: - chain_view.append(metric_info) - break - else: - for metric_info in CHAIN_VIEW_RINGS: - if metric_info["type"] == "discrete": - chain_view.append(metric_info) - for metric_name in continuous_metrics_to_display: - for metric_info in CONTINUOUS_METRICS: - if metric_info["short_name"] == metric_name: - chain_view.append(metric_info) - break - return chain_view - - def get_residue_view_bars(self, residue_bars_to_display): - residue_view = [] - for metric_name in residue_bars_to_display: - for metric_info in CONTINUOUS_METRICS: - if metric_info["short_name"] == metric_name: - residue_view.append(metric_info) - break - return residue_view From bf6014f4c2abab4741825feb6679fde95fd587b0 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Mon, 24 Jul 2023 13:47:25 +0100 Subject: [PATCH 17/20] add helper function to simplify chain data gathering --- iris_validation/metrics/chain.py | 100 +++++++++++-------------------- 1 file changed, 36 insertions(+), 64 deletions(-) diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index a563400..85f9791 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -32,77 +32,34 @@ def __init__( seq_num = int(mmol_residue.seqnum()) res_id = str(mmol_residue.id()).strip() # covariance - if covariance_data is None: - residue_covariance_data = None - else: - if check_resnum: - try: - residue_covariance_data = covariance_data[res_id] - except KeyError: - residue_covariance_data = None - else: - residue_covariance_data = covariance_data[seq_num] + residue_covariance_data = get_data_from_dict(covariance_data, + id=res_id,seq_num=seq_num,check_resnum=check_resnum) # molprobity - if molprobity_data is None: - residue_molprobity_data = None - else: - if check_resnum: - try: - residue_molprobity_data = molprobity_data[res_id] - except KeyError: - residue_molprobity_data = None - else: - residue_molprobity_data = molprobity_data[seq_num] + residue_molprobity_data = get_data_from_dict(molprobity_data, + id=res_id,seq_num=seq_num,check_resnum=check_resnum) # density scores - if density_scores is None: - residue_density_scores = None - else: - if check_resnum: - if data_with_percentiles and "map_fit" in data_with_percentiles: - try: - residue_density_scores = density_scores[res_id][0] - dict_ext_percentiles["map_fit"] = density_scores[res_id][-1] - except KeyError: - residue_density_scores = None - else: - try: - residue_density_scores = density_scores[res_id] - except KeyError: - residue_density_scores = None - else: - residue_density_scores = density_scores[seq_num] + residue_density_scores = get_data_from_dict(density_scores, + id=res_id,seq_num=seq_num,check_resnum=check_resnum, + with_percentiles=data_with_percentiles,percentile_key="map_fit", + dict_ext_percentiles=dict_ext_percentiles) # rama_z if rama_z_data is None: residue_rama_z_score = None + elif check_resnum: + try: + residue_rama_z_score = rama_z_data[res_id]["rama_z"] + except KeyError: + residue_rama_z_score = None else: - if check_resnum: - try: - residue_rama_z_score = rama_z_data[res_id]["rama_z"] - except KeyError: - residue_rama_z_score = None - else: - try: - residue_rama_z_score = rama_z_data[seq_num]["rama_z"] - except KeyError: - residue_rama_z_score = None + try: + residue_rama_z_score = rama_z_data[seq_num]["rama_z"] + except KeyError: + residue_rama_z_score = None # ext b-factor - if bfactor_data is None: - residue_bfact_score = None - else: - if check_resnum: - if data_with_percentiles and "b-factor" in data_with_percentiles: - try: - residue_bfact_score = bfactor_data[res_id][0] - dict_ext_percentiles["b-factor"] = bfactor_data[res_id][-1] - except KeyError: - residue_bfact_score = None - else: - try: - residue_bfact_score = bfactor_data[res_id] - except KeyError: - residue_bfact_score = None - else: - residue_bfact_score = bfactor_data[seq_num] + residue_bfact_score = get_data_from_dict(bfactor_data, + id=res_id,seq_num=seq_num,check_resnum=check_resnum, + with_percentiles=data_with_percentiles,percentile_key="b-factor", + dict_ext_percentiles=dict_ext_percentiles) residue = MetricsResidue( mmol_residue, residue_index, @@ -169,3 +126,18 @@ def b_factor_lists(self): else: ion_bfs.append(residue.avg_b_factor) return all_bfs, aa_bfs, mc_bfs, sc_bfs, non_aa_bfs, water_bfs, ligand_bfs, ion_bfs + +def get_data_from_dict(data_dict, id, seq_num, check_resnum, with_percentiles=None, percentile_key=None, dict_ext_percentiles=None): + if data_dict is None: + return None + if not check_resnum: + return data_dict.get(seq_num, None) + + try: + if with_percentiles and percentile_key in with_percentiles: + dict_ext_percentiles[percentile_key] = data_dict[id][-1] + return data_dict[id][0] + else: + return data_dict[id] + except KeyError: + return None \ No newline at end of file From 5ccf1d474b83d41e8633da5a08432b9e46e3ea9e Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Mon, 24 Jul 2023 14:09:48 +0100 Subject: [PATCH 18/20] reset metric calculations if provided externally --- iris_validation/metrics/__init__.py | 6 ++++-- iris_validation/metrics/chain.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index c25b68e..a95516f 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -261,13 +261,15 @@ def metrics_model_series_from_files( for metric in json_data: if metric == "molprobity": molprobity_data = json_data["molprobity"] + run_molprobity = False if metric == "tortoize": rama_z_data = json_data["tortoize"] calculate_rama_z = False if metric == "map_fit": reflections_data = json_data["map_fit"] - if metric == "b_fact": - bfactor_data = json_data["b_fact"] + reflections_path = None + if metric == "b_factor": + bfactor_data = json_data["b_factor"] if run_covariance: if multiprocessing: p = Process(target=_get_covariance_data, diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index 85f9791..997ab4c 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -58,7 +58,7 @@ def __init__( # ext b-factor residue_bfact_score = get_data_from_dict(bfactor_data, id=res_id,seq_num=seq_num,check_resnum=check_resnum, - with_percentiles=data_with_percentiles,percentile_key="b-factor", + with_percentiles=data_with_percentiles,percentile_key="b_factor", dict_ext_percentiles=dict_ext_percentiles) residue = MetricsResidue( mmol_residue, From af888156657968d2dab63d72c55fb6f89dc227b3 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Wed, 6 Dec 2023 11:23:24 +0000 Subject: [PATCH 19/20] set calc rama_z as True by default --- iris_validation/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iris_validation/__init__.py b/iris_validation/__init__.py index b1d0987..18b689a 100644 --- a/iris_validation/__init__.py +++ b/iris_validation/__init__.py @@ -15,7 +15,7 @@ def generate_report( previous_distpred_path=None, run_covariance=False, run_molprobity=False, - calculate_rama_z=False, + calculate_rama_z=True, multiprocessing=True, latest_model_metrics_json=None, previous_model_metrics_json=None, From 3605cd6c1a91fb73cbc9e0536052801be39eaa05 Mon Sep 17 00:00:00 2001 From: Agnel Joseph Date: Wed, 6 Dec 2023 11:24:31 +0000 Subject: [PATCH 20/20] pass rama_z and rota_z as separate dictionaries --- iris_validation/metrics/__init__.py | 4 ++-- iris_validation/metrics/chain.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/iris_validation/metrics/__init__.py b/iris_validation/metrics/__init__.py index a95516f..6be127a 100644 --- a/iris_validation/metrics/__init__.py +++ b/iris_validation/metrics/__init__.py @@ -262,8 +262,8 @@ def metrics_model_series_from_files( if metric == "molprobity": molprobity_data = json_data["molprobity"] run_molprobity = False - if metric == "tortoize": - rama_z_data = json_data["tortoize"] + if metric == "rama_z": + rama_z_data = json_data["rama_z"] calculate_rama_z = False if metric == "map_fit": reflections_data = json_data["map_fit"] diff --git a/iris_validation/metrics/chain.py b/iris_validation/metrics/chain.py index 997ab4c..b07b62e 100644 --- a/iris_validation/metrics/chain.py +++ b/iris_validation/metrics/chain.py @@ -47,12 +47,12 @@ def __init__( residue_rama_z_score = None elif check_resnum: try: - residue_rama_z_score = rama_z_data[res_id]["rama_z"] + residue_rama_z_score = rama_z_data[res_id] except KeyError: residue_rama_z_score = None else: try: - residue_rama_z_score = rama_z_data[seq_num]["rama_z"] + residue_rama_z_score = rama_z_data[seq_num] except KeyError: residue_rama_z_score = None # ext b-factor