forked from searphsea/COLORS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sequence.cpp
124 lines (119 loc) · 2.54 KB
/
sequence.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include "sequence.h"
#include <cstdlib>
#include <fstream>
#include <string>
//
int read_msa(const char*file_path, vector<string>& msa){
ifstream fin;
fin.open(file_path);
if(fin.fail()){
printf("The path %s does not exist!\n", file_path);
exit(-1);
}
string line;
while(getline(fin, line, '\n')){
if(line.size() == 0 || line[0] == '>'){
continue;
}
string seq = line;
for(int i = 0; i < line.size(); i++){
seq[i] = aatoi(line[i]);
}
msa.push_back(seq);
}
fin.close();
}
int cal_seq_weight(const vector<string>& msa, vector<double>& weight, double seq_id){
int M = msa.size();
weight.assign(M, 1.0);
for(int i = 0; i < M; i++){
for(int j = i + 1; j < M; j++){
double sim = cal_seq_sim(msa[i], msa[j]);
if(sim >= seq_id){
weight[i] += 1.0;
weight[j] += 1.0;
}
}
}
for(int i = 0; i < M; i++){
weight[i] = 1.0 / weight[i];
}
}
double cal_seq_sim(const string& seq1, const string& seq2){
double sim = 0.0;
for(int i = 0; i < seq1.size(); i++){
sim += seq1[i] == seq2[i];
}
return sim / seq1.size();
}
unsigned char aatoi(unsigned char aa) {
char id;
switch(aa){
case '-':
id = 0;
break;
case 'A':
id = 1;
break;
case 'C':
id = 2;
break;
case 'D':
id = 3;
break;
case 'E':
id = 4;
break;
case 'F':
id = 5;
break;
case 'G':
id = 6;
break;
case 'H':
id = 7;
break;
case 'I':
id = 8;
break;
case 'K':
id = 9;
break;
case 'L':
id = 10;
break;
case 'M':
id = 11;
break;
case 'N':
id = 12;
break;
case 'P':
id = 13;
break;
case 'Q':
id = 14;
break;
case 'R':
id = 15;
break;
case 'S':
id = 16;
break;
case 'T':
id = 17;
break;
case 'V':
id = 18;
break;
case 'W':
id = 19;
break;
case 'Y':
id = 20;
break;
default:
id = 0;
}
return id;
}