-
Notifications
You must be signed in to change notification settings - Fork 24
/
calc_center_rdkit.py
executable file
·122 lines (94 loc) · 3.66 KB
/
calc_center_rdkit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
__author__ = 'Pavel Polishchuk'
import rdkit
from rdkit import Chem
import argparse
import sys
def read_pdbqt(fname, sanitize, removeHs):
mols = []
with open(fname) as f:
pdb_block = f.read().split('MODEL ')
for j, block in enumerate(pdb_block[1:]):
m = Chem.MolFromPDBBlock('\n'.join([i[:66] for i in block.split('\n')]),
sanitize=sanitize,
removeHs=removeHs)
if m is None:
sys.stderr.write(f'The pose #{j+1} cannot be read from {fname}\n')
else:
mols.append(m)
return mols
def get_coord(mol, indices=None):
if indices is None:
indices = tuple(range(mol.GetNumAtoms()))
output = []
for atom_id in indices:
pos = mol.GetConformer().GetAtomPosition(atom_id)
output.append((pos.x, pos.y, pos.z))
return tuple(output)
def calc_center(coord):
# coord tuple of tuples ((x1,y1,z1), (x2,y2,z2), ...)
if len(coord) == 0:
return None
min_x, min_y, min_z = coord[0]
max_x, max_y, max_z = coord[0]
for item in coord[1:]:
if item[0] < min_x:
min_x = item[0]
if item[0] > max_x:
max_x = item[0]
if item[1] < min_y:
min_y = item[1]
if item[1] > max_y:
max_y = item[1]
if item[2] < min_z:
min_z = item[2]
if item[2] > max_z:
max_z = item[2]
output = [(max_x + min_x) / 2, (max_y + min_y) / 2, (max_z + min_z) / 2]
output = (round(i, 3) for i in output)
return output
def main_params(input_fnames, output_fname):
if output_fname is not None:
sys.stdout = open(output_fname, 'wt')
for fname in input_fnames:
if fname[-4:].lower() == '.pdb':
f = Chem.MolFromPDBFile
elif fname[-5:].lower() == '.mol2':
f = Chem.MolFromMol2File
elif fname[-6:].lower() == '.pdbqt':
f = read_pdbqt
elif fname[-4:].lower() == '.sdf':
f = Chem.SDMolSupplier
else:
continue
m = f(fname, sanitize=False, removeHs=False)
if m is not None:
if isinstance(m, list): # pdbqt
for item in m:
center = calc_center(get_coord(item))
if center is not None:
print(fname + '\t' + '\t'.join(map(str, center)))
elif isinstance(m, rdkit.Chem.rdmolfiles.SDMolSupplier): # sdf
for item in m:
center = calc_center(get_coord(item))
if center is not None:
print(fname + '\t' + item.GetProp('_Name') + '\t' + '\t'.join(map(str, center)))
else: # pdb/mol2
center = calc_center(get_coord(m))
if center is not None:
print(fname + '\t' + '\t'.join(map(str, center)))
else:
sys.stderr.write(f'Molecule cannot be read from {fname}\n')
def main():
parser = argparse.ArgumentParser(description='Calc center of all supplied coordinates in a file.')
parser.add_argument('-i', '--input', metavar='input_molecule', required=True, nargs='*',
help='input files (PDB/PDBQT/MOL2/SDF).')
parser.add_argument('-o', '--output', metavar='output.txt',
help='output text file. If omitted output will be in stdout.')
args = vars(parser.parse_args())
for o, v in args.items():
if o == "input": input_fnames = v
if o == "output": output_fname = v
main_params(input_fnames, output_fname)
if __name__ == '__main__':
main()