forked from vanderkamp/enlighten2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdb_utils.py
145 lines (116 loc) · 4.6 KB
/
pdb_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from itertools import groupby
from copy import deepcopy
class Pdb(object):
def __init__(self, file=None, atoms=None, ter=[], conect=[], other=[]):
if file is None and atoms is None:
raise ValueError('Either file or atoms must be provided')
self.atoms = []
self.ter = []
self.conect = []
self.other = []
if file is None:
self.atoms = deepcopy(atoms)
self.ter = ter
self.conect = conect
self.other = other
return
for line in file:
getattr(self, pdb_line_key(line)).append(line)
self.atoms = [parse_atom(atom) for atom in self.atoms]
self.ter = [parse_ter(ter) for ter in self.ter]
def residues(self):
"""dict of residue_hash: residue_atom_list"""
return {k: list(v) for k, v in groupby(self.atoms, residue_hash)}
def get_residues_by_name(self, residue_name):
return [residue for k, residue in self.residues().items()
if residue[0]['resName'] == residue_name]
def to_file(self, file):
"""Writes atoms, TER and CONECT entries. Ignores all the rest."""
DUMP_CALLBACK = {'ATOM': dump_atom,
'HETATM': dump_atom,
'TER': dump_ter}
# Sort atoms with TER entries by resSeq. TER is always the last.
# If some atoms have no index (extra Hs added by reduce) they go
# after the "normal" ones.
for entry in sorted(self.atoms+self.ter,
key=lambda x: (x['resSeq'],
x['record'],
x['serial'] or 99999999)):
file.write(DUMP_CALLBACK[entry['record']](entry))
for entry in self.conect:
file.write(entry)
def to_filename(self, filename):
with open(filename, 'w') as f:
self.to_file(f)
def copy(self):
return Pdb(atoms=self.atoms, ter=self.ter,
conect=self.conect, other=self.other)
def remove_atom(self, atom):
try:
self.atoms.remove(atom)
except ValueError:
pass
def residue_hash(atom):
"""Uniquely identifies the residue atom belongs to"""
return '_'.join([str(atom[key])
for key in ['chainID', 'resSeq', 'resName']])
def modify_atoms(atoms, key, value):
for atom in atoms:
atom[key] = value
def find_atom(atoms, condition):
"""Return first atom in atoms that fulfills condition"""
return next(atom for atom in atoms if condition(atom))
def pdb_line_key(line):
KEY_DICT = {'ATOM ': 'atoms',
'HETATM': 'atoms',
'TER ': 'ter',
'CONECT': 'conect'}
return KEY_DICT.get(line[:6], 'other')
def parse_atom(atom_line):
"""
Based on official PDB format from
http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html
"""
return {
'record': atom_line[:6].strip(),
'serial': int(atom_line[6:11].strip()),
'name': atom_line[12:16].strip(),
'altLoc': atom_line[16].strip(),
'resName': atom_line[17:20].strip(),
'chainID': atom_line[21].strip(),
'resSeq': int(atom_line[22:26]),
'iCode': atom_line[26].strip(),
'x': float(atom_line[30:38]),
'y': float(atom_line[38:46]),
'z': float(atom_line[46:54]),
'occupancy': float(atom_line[54:60]),
'tempFactor': float(atom_line[60:66]),
'element': atom_line[76:78].strip(),
'charge': atom_line[78:80].strip(),
'extras': atom_line[80:] or '\n'
}
def parse_ter(ter_line):
"""
Based on official PDB format from
http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html
"""
return {
'record': ter_line[:6].strip(),
'serial': int(ter_line[6:11].strip()),
'resName': ter_line[17:20].strip(),
'chainID': ter_line[21].strip(),
'resSeq': int(ter_line[22:26]),
'iCode': ter_line[26].strip(),
'extras': ter_line[27:] or '\n'
}
def dump_atom(atom):
name_format = "{name:>4}" if len(atom['name']) > 2 else " {name:<3}"
return ("{record:6}{serial:5} " + name_format + "{altLoc:1}"
"{resName:>3} {chainID:1}{resSeq:4}{iCode:1}"
" {x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}"
"{tempFactor:6.2f} {element:>2}"
"{charge:>2}{extras}").format(**atom)
def dump_ter(ter):
return ("{record:6}{serial:5} {resName:>8} "
"{chainID:1}{resSeq:4}{iCode:1}{extras}"
.format(**ter))