-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbrute_force.py
76 lines (60 loc) · 1.8 KB
/
brute_force.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import itertools
# read in proteins
with open('proteins.json') as f:
data = f.read()
f.close()
data = data[2:-2]
S = data.split('", "')
# k is 9
k = 9
# n is 6
n = 6
# define functions
def Robustness(S, Motifs):
# create list to record number of proteins recognized
numRecognized = []
# create matrix (|S| rows, |Motifs| columns)
# RecognitionMatrix[i][j] = 1 if motif j recognizes protein i
for s in S:
RecognitionMatrix = []
for m in Motifs:
if Recognizes(s, m):
RecognitionMatrix.append(1)
else:
RecognitionMatrix.append(0)
numRecognized.append(RecognitionMatrix)
# Calculate Robustness r
r = 0
for i in range(len(Motifs)):
MotifsWithout_i = [s[:i] + s[i+1:] for s in numRecognized]
# for each row (protein)
for j in MotifsWithout_i:
# if at least one motif recognizes protein, add 1 to robustness
if sum(j) >= 1:
r += 1
return r
def Recognizes(s, motif):
k, P_n2, P_c1 = motif
for i in range(len(s) - k + 1):
if (s[i+1] == P_n2) and (s[i + k -1] == P_c1):
return True
return False
AminoAcids = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
# generate all possible motifs
allMotifs = []
for i in AminoAcids:
for j in AminoAcids:
allMotifs.append((9, i, j))
#
allMotifSets = list(itertools.combinations(allMotifs, n))
maxRobustness = 0
bestMotifs = []
for Motifs in allMotifSets:
print(Motifs)
r = Robustness(S, Motifs)
if r > maxRobustness:
maxRobustness = r
bestMotifs = [Motifs]
elif r == maxRobustness:
bestMotifs.append(Motifs)
print(f'{len(bestMotifs)} motif sets have maximum robustness {r}')