-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcoarseGrain.py
executable file
·311 lines (244 loc) · 11.5 KB
/
coarseGrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
#!/usr/bin/env python
# coarseGrain.py
# Selects a sub-set of carbon atoms, equally distributed across the complex
# Author: Caroline Ross: [email protected]
# August 2017
import os
import sys
import argparse
from datetime import datetime
from lib.utils import *
import math
def parseAssembly(pdb_file,atomT):
try:
f = open(pdb_file, 'r')
lines = f.readlines()
f.close()
except IOError:
print ('\n**************************************\nFILE '+pdb_file+' NOT FOUND\n**************************************\n')
sys.exit()
# get index of first atom
for i in range(len(lines)):
if lines[i].startswith("ATOM"):
index_atom = i
break
header = lines[0:index_atom]
all_atoms = lines[index_atom:]
chain_dics = {}
number_protomer_atoms = 0
c_atoms = []
c_cords = []
for atom in all_atoms:
if atom.startswith("ATOM"):
coords = []
info = atom.split()
chain = info[4]
res = info[3]
atom_type = info[2]
if atom_type == atomT or (atom_type == "CA" and res == "GLY"): #Give user the option for C-aplhas or C-betas
if number_protomer_atoms == 0:
chain1 = chain
if chain not in chain_dics:
number_protomer_atoms += 1
if chain != chain1:
if chain1 not in chain_dics:
chain_dics[chain1] = 1
else:
# Is macro molecule
chain_dics[chain1] += 1
chain1 = chain
c_atoms.append(atom)
coords.append(float(atom[30:38].strip().strip()))
coords.append(float(atom[38:46].strip().strip()))
coords.append(float(atom[46:54].strip().strip()))
c_cords.append(coords)
if atom.startswith("END") and not "ENDMDL" in atom:
if chain1 in chain_dics:
chain_dics[chain1] += 1
else:
chain_dics[chain1] = 1
number_of_protomers = chain_dics[chain1]
return (header,number_protomer_atoms,number_of_protomers,c_atoms,c_cords)
def coarseGrain(c_g,starting_atom,number_protomer_atoms, number_of_protomers, c_cords,c_atoms):
index_of_selected_atoms = []
starting_atom_i = starting_atom - 1 # Index for starting atom
protomer_c_betas = c_cords[0:number_protomer_atoms]
coords_start = protomer_c_betas[starting_atom_i]
distances_from_start = []
distance_index = {} # holds the index of the atoms in order of distance from
xstart = coords_start[0]
ystart = coords_start[1]
zstart = coords_start[2]
for i in range(len(protomer_c_betas)):
atom = protomer_c_betas[i]
if i == starting_atom_i:
continue
x = atom[0]
y = atom[1]
z = atom[2]
distance = ((xstart - x) * (xstart - x)) + ((ystart - y)
* (ystart - y)) + ((zstart - z) * (zstart - z))
distance = math.sqrt(distance)
distances_from_start.append(distance)
distance_index[distance] = i
distances_from_start.sort()
index_of_selected_atoms = []
index_of_selected_atoms.append(starting_atom_i)
if c_g == 1:
c_g_select = 0
elif c_g ==2:
c_g_select = 1
else:
c_g_select = (c_g * (c_g - 1)) - c_g
# selects atoms which are not within this distance to already selected atoms
try:
cutoff = distances_from_start[c_g_select]
atom_index = distance_index[cutoff]
index_of_selected_atoms.append(atom_index)
distribution = {}
#loops through ordered list and selects all suitable atoms
for dist in distances_from_start[c_g_select+ 1:]:
atom_index = distance_index[dist]
x = protomer_c_betas[atom_index][0]
y = protomer_c_betas[atom_index][1]
z = protomer_c_betas[atom_index][2]
too_close = False
local_distribution = []
for atom in index_of_selected_atoms:
x1 = protomer_c_betas[atom][0]
y1 = protomer_c_betas[atom][1]
z1 = protomer_c_betas[atom][2]
surrounding_distance = ((x1 - x) * (x1 - x)) + ((y1 - y) * (y1 - y)) + ((z1 - z) * (z1 - z))
surrounding_distance = math.sqrt(surrounding_distance)
local_distribution.append(surrounding_distance)
if surrounding_distance < cutoff:
too_close = True
break
if not too_close:
index_of_selected_atoms.append(atom_index)
local_distribution.sort()
distribution[atom_index] = local_distribution
# print index_of_selected_atoms
print ("------------------------------------------------------------")
print ("SUMMARY OF COARSE GRAINING PERFORMED AT LEVEL "+str(c_g))
print ("No. atoms selected per unit: " + str(len(index_of_selected_atoms)) + " from " + str(number_protomer_atoms) + " original residues")
print ("No. atoms selected per macromolecule: " + str(len(index_of_selected_atoms) * number_of_protomers) + " from " + str(number_protomer_atoms* number_of_protomers) + " original residues")
print ("------------------------------------------------------------")
index_of_selected_atoms.sort()
# write a pdb file lines of the Coarse-Grained Capsid with renumbered atoms
selected_c_beta_lines = []
# Includes all c atoms of the first pentamer and then coarse grains the rest of the surrounding capsid
count = 0
for i in range(0, number_of_protomers): # parameter
for j in index_of_selected_atoms:
index = j + i * number_protomer_atoms
a = c_atoms[index]
spaces = " " * (len(a[6:11]) - len(str(count + 1)))
a = a[0:6] + spaces + str(count + 1) + a[11:]
selected_c_beta_lines.append(a)
count += 1
ter = "TER" + " " * 3 + " " * (5 - len(str(count))) + str(count) + " " * 6 + a.split()[3] + " " + a.split()[4] + " " + " " * (3 - len(a.split()[5])) + a.split()[5] + " \n"
selected_c_beta_lines.append(ter)
return selected_c_beta_lines
except IndexError:
print ("\n**************************************\nERROR: Coarse Grain Level = "+str(c_g)+" is TOO LARGE\nNo output generated\n**************************************")
def writeCG(outfile,outdir,header,selected_c_beta_lines,c_g):
if outfile == "ComplexCG.pdb":
outfile = outfile.strip('.pdb')+str(c_g)+'.pdb'
elif '.pdb' in outfile:
outfile = outfile.strip('.pdb')+'CG'+str(c_g)+'.pdb'
elif not '.pdb' in outfile and not'.' in outfile:
outfile+='CG'+str(c_g)
outfile+='.pdb'
elif not '.pdb' in outfile and '.' in outfile:
print ('\n**************************************\nSpecified output file name is not comptable with PDB format\nDefault file created: ComplexCG_'+str(c_g)+'.pdb\n**************************************')
outfile = 'ComplexCG'+str(c_g)+'.pdb'
w = open(args.outdir + "/" + outfile, 'w')
w.writelines(header)
w.writelines(selected_c_beta_lines)
w.write("END")
w.close()
def main(args):
#Get Input Parameters
pdb_file = args.pdb
atomT = args.atomType.upper()
if atomT!='CA' and atomT!='CB':
print ("\n**************************************\nUnrecognised atom type\nInput Options:\nCA: to select alpha carbon atoms\nCB: to select beta carbon atoms\n**************************************\n")
sys.exit()
# Set level of coarsegrain
c_gList = []
try:
cgs = args.cg.split(',')
for c_g in cgs:
c_gList.append(int(c_g.strip()))
except ValueError:
print ("\n**************************************\nInput for Coarse Graining Level: "+str(args.cg)+" is INVALID\nDefault CG Level = 4 has been used\n**************************************\n")
c_gList =[]
c_gList.append(4)
starting_atom = args.startingAtom # residue number of starting atoms
if starting_atom<=0:
print ("\n**************************************\nStarting Atom: "+str(starting_atom)+" is INVALID\nDefault Starting Atom = 1 has been used\n**************************************\n")
starting_atom = 1
outfile = args.output
outdir = args.outdir
#Parse PDB File
cAssembly = parseAssembly(pdb_file,atomT)
header = cAssembly[0]
number_protomer_atoms = cAssembly[1]
number_of_protomers = cAssembly[2]
c_atoms = cAssembly[3]
c_cords = cAssembly[4]
#Perform Coarse Graining
for c_g in c_gList:
if c_g == 0 or c_g<0:
print ("\n**************************************\nERROR: Coarse Grain Level = "+str(c_g)+" is INVALID\nNo output generated\n**************************************")
continue
selected_c_beta_lines = coarseGrain(c_g,starting_atom,number_protomer_atoms, number_of_protomers, c_cords,c_atoms)
if selected_c_beta_lines:
writeCG(outfile,outdir,header,selected_c_beta_lines,c_g)
silent = False
stream = sys.stdout
def log(message):
global silent
global stream
if not silent:
print_err(message)
if __name__ == "__main__":
# parse cmd arguments
parser = argparse.ArgumentParser()
# standard arguments for logging and output
parser.add_argument("--silent", help="Turn off logging", action='store_true', default=False)
parser.add_argument("--welcome", help="Display welcome message (true/false)", default="true")
parser.add_argument("--log-file", help="Output log file (default: standard output)", default=None)
parser.add_argument("--outdir", help="Output directory", default="output")
# custom arguments
parser.add_argument("--output", help="File name for Coarse Grained PDB", default='ComplexCG.pdb')
parser.add_argument("--pdb", help="PDB input file")
parser.add_argument("--cg", help="Course grain levels: Increase level to increase amount of coarse graining:\nEnter either a single level e.g. 4 or a comma separted list for the production of multiple CG models e.g 3,4,5", default='4')
parser.add_argument("--startingAtom", help="Residue number of first carbon atom to be selected [int]", default=1, type=int)
parser.add_argument("--atomType", help="Enter CA to select alpha carbons or CB to select beta carbons", default='CA')
# parser.add_argument("--protomerAtoms", help="", default=0, type=int)
args = parser.parse_args()
if args.welcome == "true":
welcome_msg("Coarse grain", "Caroline Ross ([email protected])")
# Check if required directories exist
if not os.path.isdir(args.outdir):
os.makedirs(args.outdir)
# Check if args supplied by user
if len(sys.argv) > 1:
# set up logging
silent = args.silent
if args.log_file:
stream = open(args.log_file, 'w')
start = datetime.now()
log("Started at: %s" % str(start))
# run script
main(args)
end = datetime.now()
time_taken = format_seconds((end - start).seconds)
log("Completed at: %s" % str(end))
log("- Total time: %s" % str(time_taken))
# close logging stream
stream.close()
else:
print ('No arguments provided. Use -h to view help')