From 041520b43f972f89174ca1831896e246468a31a8 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 11 Aug 2021 03:15:39 +0300 Subject: [PATCH 1/6] Replace code for processing factor loadings --- .../correlation_matrix/show_corr_matrix.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index e7b16e777..309a045a1 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -38,6 +38,8 @@ from utility import corr_result_helpers from utility.redis_tools import get_redis_conn +from gn3.computations.correlation_matrix import compute_pca + Redis = get_redis_conn() THIRTY_DAYS = 60 * 60 * 24 * 30 @@ -174,7 +176,7 @@ def __init__(self, start_vars): self.pca_trait_ids = [] pca = self.calculate_pca( list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - self.loadings_array = self.process_loadings() + # self.loadings_array = self.process_loadings() else: self.pca_works = "False" except: @@ -201,6 +203,14 @@ def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): self.scores = pca.rx('scores') self.scale = pca.rx('scale') + pca_obj,pca_scores = compute_pca(self.pca_corr_results) + + + self.loadings = pca_obj.components_ + + self.loadings_array = process_factor_loadings(self.loadings,len(self.trait_list)) + + trait_array = zScore(self.trait_data_array) trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) @@ -249,6 +259,18 @@ def process_loadings(self): return loadings_array +def process_factor_loadings(factor_loadings,trait_list_num): + + target_columns = 3 if trait_list_num > 2 else 2 + + traits_loadings = list(factor_loadings.T) + + table_row_loadings = [list(trait_loading[:target_columns]) + for trait_loading in traits_loadings] + + return table_row_loadings + + def export_corr_matrix(corr_results): corr_matrix_filename = "corr_matrix_" + \ ''.join(random.choice(string.ascii_uppercase + string.digits) From 22f7a04fc319eb1846908eb6e8260719c7409af4 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 11 Aug 2021 03:17:58 +0300 Subject: [PATCH 2/6] delete r code --- .../correlation_matrix/show_corr_matrix.py | 35 ++----------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 309a045a1..8bbcaaa5a 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -23,8 +23,7 @@ import random import string -import rpy2.robjects as ro -from rpy2.robjects.packages import importr + import numpy as np import scipy @@ -190,21 +189,8 @@ def __init__(self, start_vars): sample_data=self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): - base = importr('base') - stats = importr('stats') - - corr_results_to_list = ro.FloatVector( - [item for sublist in self.pca_corr_results for item in sublist]) - - m = ro.r.matrix(corr_results_to_list, nrow=len(cols)) - eigen = base.eigen(m) - pca = stats.princomp(m, cor="TRUE") - self.loadings = pca.rx('loadings') - self.scores = pca.rx('scores') - self.scale = pca.rx('scale') - pca_obj,pca_scores = compute_pca(self.pca_corr_results) - + self.scores = pca_scores self.loadings = pca_obj.components_ @@ -241,22 +227,7 @@ def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) self.pca_trait_ids.append(trait_id) - return pca - - def process_loadings(self): - loadings_array = [] - loadings_row = [] - for i in range(len(self.trait_list)): - loadings_row = [] - if len(self.trait_list) > 2: - the_range = 3 - else: - the_range = 2 - for j in range(the_range): - position = i + len(self.trait_list) * j - loadings_row.append(self.loadings[0][position]) - loadings_array.append(loadings_row) - return loadings_array + return pca_obj def process_factor_loadings(factor_loadings,trait_list_num): From e56e06dd067e8422271feeb2fd709dbf9ad75b33 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 11 Aug 2021 03:20:18 +0300 Subject: [PATCH 3/6] Replace code for computing zscores --- .../correlation_matrix/show_corr_matrix.py | 25 +++---------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 8bbcaaa5a..97af63df9 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -38,6 +38,7 @@ from utility.redis_tools import get_redis_conn from gn3.computations.correlation_matrix import compute_pca +from gn3.computations.correlation_matrix import compute_zscores Redis = get_redis_conn() THIRTY_DAYS = 60 * 60 * 24 * 30 @@ -189,6 +190,7 @@ def __init__(self, start_vars): sample_data=self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): + pca_obj,pca_scores = compute_pca(self.pca_corr_results) self.scores = pca_scores @@ -197,7 +199,7 @@ def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): self.loadings_array = process_factor_loadings(self.loadings,len(self.trait_list)) - trait_array = zScore(self.trait_data_array) + trait_array = compute_zscores(self.trait_data_array) trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) pca_traits = [] @@ -281,27 +283,6 @@ def export_corr_matrix(corr_results): return corr_matrix_filename, matrix_export_path -def zScore(trait_data_array): - NN = len(trait_data_array[0]) - if NN < 10: - return trait_data_array - else: - i = 0 - for data in trait_data_array: - N = len(data) - S = reduce(lambda x, y: x + y, data, 0.) - SS = reduce(lambda x, y: x + y * y, data, 0.) - mean = S / N - var = SS - S * S / N - stdev = math.sqrt(var / (N - 1)) - if stdev == 0: - stdev = 1e-100 - data2 = [(x - mean) / stdev for x in data] - trait_data_array[i] = data2 - i += 1 - return trait_data_array - - def sortEigenVectors(vector): try: eigenValues = vector[0].tolist() From b5ef2b9ba4074fcb9eb1314743111b4db4f9e80e Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 11 Aug 2021 03:23:21 +0300 Subject: [PATCH 4/6] Replace code for computing and sorting eigens --- wqflask/wqflask/correlation_matrix/show_corr_matrix.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 97af63df9..001e3cd4b 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -39,6 +39,7 @@ from gn3.computations.correlation_matrix import compute_pca from gn3.computations.correlation_matrix import compute_zscores +from gn3.computations.correlation_matrix import compute_sort_eigens Redis = get_redis_conn() THIRTY_DAYS = 60 * 60 * 24 * 30 @@ -167,9 +168,7 @@ def __init__(self, start_vars): self.pca_works = "False" try: - corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors( - corr_result_eigen) + corr_eigen_value,corr_eigen_vectors = compute_sort_eigens(self.pca_corr_results) if self.do_PCA == True: self.pca_works = "True" From 34645b1ebb600bbc46b04817a38112408a9d51df Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 11 Aug 2021 03:24:02 +0300 Subject: [PATCH 5/6] Delete ununsed functions --- .../correlation_matrix/show_corr_matrix.py | 49 ++++++------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 001e3cd4b..8e278a339 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -24,7 +24,6 @@ import string - import numpy as np import scipy @@ -44,6 +43,7 @@ Redis = get_redis_conn() THIRTY_DAYS = 60 * 60 * 24 * 30 + class CorrelationMatrix: def __init__(self, start_vars): @@ -168,14 +168,15 @@ def __init__(self, start_vars): self.pca_works = "False" try: - corr_eigen_value,corr_eigen_vectors = compute_sort_eigens(self.pca_corr_results) + corr_eigen_value, corr_eigen_vectors = compute_sort_eigens( + self.pca_corr_results) if self.do_PCA == True: self.pca_works = "True" self.pca_trait_ids = [] pca = self.calculate_pca( list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - # self.loadings_array = self.process_loadings() + else: self.pca_works = "False" except: @@ -190,13 +191,13 @@ def __init__(self, start_vars): def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): - pca_obj,pca_scores = compute_pca(self.pca_corr_results) - self.scores = pca_scores + pca_obj, pca_scores = compute_pca(self.pca_corr_results) + self.scores = pca_scores self.loadings = pca_obj.components_ - self.loadings_array = process_factor_loadings(self.loadings,len(self.trait_list)) - + self.loadings_array = process_factor_loadings( + self.loadings, len(self.trait_list)) trait_array = compute_zscores(self.trait_data_array) trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) @@ -231,7 +232,7 @@ def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): return pca_obj -def process_factor_loadings(factor_loadings,trait_list_num): +def process_factor_loadings(factor_loadings, trait_list_num): target_columns = 3 if trait_list_num > 2 else 2 @@ -255,11 +256,11 @@ def export_corr_matrix(corr_results): output_file.write("\n") output_file.write("Correlation ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i + 1) + ": " + \ + output_file.write("Trait" + str(i + 1) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i + 1) + ": " + \ + output_file.write("Trait" + str(i + 1) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[1]) + "\t") @@ -269,36 +270,14 @@ def export_corr_matrix(corr_results): output_file.write("\n") output_file.write("N ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i) + ": " + \ + output_file.write("Trait" + str(i) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i) + ": " + \ + output_file.write("Trait" + str(i) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[2]) + "\t") output_file.write("\n") - return corr_matrix_filename, matrix_export_path - - -def sortEigenVectors(vector): - try: - eigenValues = vector[0].tolist() - eigenVectors = vector[1].T.tolist() - combines = [] - i = 0 - for item in eigenValues: - combines.append([eigenValues[i], eigenVectors[i]]) - i += 1 - sorted(combines, key=cmp_to_key(webqtlUtil.cmpEigenValue)) - A = [] - B = [] - for item in combines: - A.append(item[0]) - B.append(item[1]) - sum = reduce(lambda x, y: x + y, A, 0.0) - A = [x * 100.0 / sum for x in A] - return [A, B] - except: - return [] + return corr_matrix_filename, matrix_export_path \ No newline at end of file From e16bb827ec8913023942e26be3aad09a080c9af8 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 11 Aug 2021 03:26:37 +0300 Subject: [PATCH 6/6] delete unused imports --- wqflask/wqflask/correlation_matrix/show_corr_matrix.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 8e278a339..a7bf935c9 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -19,7 +19,6 @@ # This module is used by GeneNetwork project (www.genenetwork.org) import datetime -import math import random import string @@ -29,9 +28,7 @@ from base import data_set from base.webqtlConfig import GENERATED_TEXT_DIR -from functools import reduce -from functools import cmp_to_key -from utility import webqtlUtil + from utility import helper_functions from utility import corr_result_helpers from utility.redis_tools import get_redis_conn @@ -280,4 +277,4 @@ def export_corr_matrix(corr_results): output_file.write(str(item[2]) + "\t") output_file.write("\n") - return corr_matrix_filename, matrix_export_path \ No newline at end of file + return corr_matrix_filename, matrix_export_path