-
Notifications
You must be signed in to change notification settings - Fork 0
/
20_homolog_model_weight_correlations.Rmd
97 lines (67 loc) · 2.97 KB
/
20_homolog_model_weight_correlations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
---
title: "homolog models - inferred weight correlations"
output: html_document
date: "2024-07-15"
---
```{r cars}
library(ggplot2)
library(data.table)
library(stringr)
library(viridis)
library(ggpubr)
library(GGally)
setwd("/path/to/your/files")
#load weights and plot correlations
pfamids<-fread("analysis_files/homolog_mochi_input_files/PFAM_IDs",header=FALSE)$V1
read_weights_and_plot<-function(family,fit_type,trait){
weights<-fread(paste("analysis_files/homolog_mochi_input_files/",paste(paste(family,fit_type,sep="_"),paste("/weights/weights",trait,sep="_"),".txt",sep=""),sep=""))
weights[,mut_aa:=substr(id,nchar(id),nchar(id))]
weights[id=="WT",mut_aa:=NA]
weights$mut_aa<-factor(weights$mut_aa,
levels=str_split("QNSTDEKRHGPCMAILVFYW","")[[1]])
features<-fread(paste("analysis_files/homolog_mochi_input_files/",paste(family,"_features_solu.txt",sep=""),sep = ""))
wt_weights<-features[SoluWeight!="",]$Folding
mut_weights<-weights[!is.na(`mean_kcal/mol`) & !(id %in% wt_weights),]
mut_weights[,family:=family]
mut_weights[,fit_type:=fit_type]
mut_weights[,trait:=trait]
return(mut_weights[,c("id","mean_kcal/mol","family","fit_type","trait")])
}
#folding (boltzmann) vs folding and solubility models
all_weights_folding<-data.frame()
all_weights_solu<-data.frame()
#Boltzmann fits
#2-state folding model
for (pfamid in pfamids){
all_weights_folding<-rbind(all_weights_folding,read_weights_and_plot(pfamid,"folding","Folding"))
}
#2-state folding model with an additional linear transform per domain
for (pfamid in pfamids){
all_weights_solu<-rbind(all_weights_solu,read_weights_and_plot(pfamid,"folding_solu","Folding"))
}
merged<-merge(all_weights_folding,all_weights_solu,by=c("id","family","trait"))
cor<-merged[,.(weight_cor_pearson=cor(`mean_kcal/mol.x`,`mean_kcal/mol.y`,use="pairwise.complete.obs"),
weight_cor_spearman=cor(`mean_kcal/mol.x`,`mean_kcal/mol.y`,use="pairwise.complete.obs",method="spearman")),by="family"]
median(cor$weight_cor_spearman)
min(cor$weight_cor_spearman)
ggplot(merged)+
geom_hex(aes(x=`mean_kcal/mol.x`,y=`mean_kcal/mol.y`))+
facet_wrap(~family,scales="free")+
xlab("model weights folding")+
ylab("model weights folding + solubility")
#folding (boltzmann) model vs linear
all_weights_linear<-data.frame()
for (pfamid in pfamids){
all_weights_linear<-rbind(all_weights_linear,read_weights_and_plot(pfamid,"folding_linear","Folding"))
}
merged<-merge(all_weights_folding,all_weights_linear,by=c("id","family","trait"))
cor<-merged[,.(weight_cor_pearson=cor(`mean_kcal/mol.x`,`mean_kcal/mol.y`,use="pairwise.complete.obs"),
weight_cor_spearman=cor(`mean_kcal/mol.x`,`mean_kcal/mol.y`,use="pairwise.complete.obs",method="spearman")),by="family"]
median(cor$weight_cor_spearman)
max(cor$weight_cor_spearman)
ggplot(merged)+
geom_hex(aes(x=`mean_kcal/mol.x`,y=`mean_kcal/mol.y`))+
facet_wrap(~family,scales="free")+
xlab("model weights folding (Boltzmann)")+
ylab("model weights linear")
```