From dd6a5ae07e65fd56631c9513b183543a8df338e4 Mon Sep 17 00:00:00 2001 From: peastman Date: Tue, 6 Aug 2024 13:07:00 -0700 Subject: [PATCH 1/4] Optimize sidechain rotations --- pdbfixer/pdbfixer.py | 177 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 1 deletion(-) diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index de434eb..da45199 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -37,7 +37,7 @@ import openmm.unit as unit from openmm.app.internal.pdbstructure import PdbStructure from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader -from openmm.app.element import hydrogen, oxygen +from openmm.app.element import hydrogen, oxygen, nitrogen, fluorine from openmm.app.forcefield import NonbondedGenerator # Support Cythonized functions in OpenMM 7.3 @@ -50,6 +50,7 @@ import numpy as np import numpy.linalg as lin +import itertools import sys import os import os.path @@ -1340,6 +1341,180 @@ def _describeVariant(self, residue, definitions): variant.append((h, parent)) return variant + def optimizeHydrogenBonds(self): + # Record the parent atom that every hydrogen is bonded to. + + parent = {} + for atom1, atom2 in self.topology.bonds(): + if atom1.element == hydrogen and atom2.element != hydrogen: + parent[atom1] = atom2 + elif atom2.element == hydrogen and atom1.element != hydrogen: + parent[atom2] = atom1 + + # Record the donors and acceptors in each residue. + + residueDonors = {} + residueAcceptors = {} + for residue in self.topology.residues(): + residueDonors[residue] = [] + residueAcceptors[residue] = [] + for atom in residue.atoms(): + if atom.element == hydrogen and parent[atom].element in (oxygen, nitrogen, fluorine): + residueDonors[residue].append((atom, parent[atom])) + elif atom.element in (oxygen, nitrogen, fluorine): + residueAcceptors[residue].append(atom) + + # Record the groups of atoms we will try to rotate. + + rotations = { + 'ASN': (('CB', 'CG'), ('OD1', 'ND2', 'HD21', 'HD22')), + 'GLN': (('CG', 'CD'), ('OE1', 'NE2', 'HE21', 'HE22')), + 'HIS': (('CB', 'CG'), ('ND1', 'CD2', 'CE1', 'NE2', 'HD1', 'HD2', 'HE1', 'HE2')) + } + residueRotations = {} + for residue in self.topology.residues(): + if residue.name in rotations: + axis, atoms = rotations[residue.name] + residueRotations[residue] = ([atom for atom in residue.atoms() if atom.name in axis], + [atom for atom in residue.atoms() if atom.name in atoms]) + + # For each residue we will be modifying, record all other residues that are close enough + # to potentially form hydrogen bonds. + + residueNeighbors = {} + positions = self.positions.value_in_unit(unit.nanometer) + for res1 in residueRotations: + residueNeighbors[res1] = [] + for res2 in self.topology.residues(): + if res1 != res2: + close = False + for atom1 in residueRotations[res1][1]: + for atom2 in res2.atoms(): + delta = positions[atom1.index]-positions[atom2.index] + if unit.norm(delta) < 0.5: + close = True + break + if close: + break + if close: + residueNeighbors[res1].append(res2) + + # Divide them into clusters that need to be analyzed together. + + clusters = [[res] for res in residueNeighbors] + converged = False + while not converged: + converged = True + for i in range(len(clusters)): + if i >= len(clusters): + break + c1 = clusters[i] + for j in range(i+1, len(clusters)): + c2 = clusters[j] + if any(res2 in residueNeighbors[res1] for res1, res2 in itertools.product(c1, c2)): + c1 += c2 + del clusters[j] + converged = False + break + + # Create the rotated version of the coordinates. + + rotatedPositions = positions[:] + for residue in residueRotations: + axis, atoms = residueRotations[residue] + center = positions[axis[1].index] + e = center-positions[axis[0].index] + e /= unit.norm(e) + for atom in atoms: + d = positions[atom.index]-center + dot = np.dot(e, d) + rotatedPositions[atom.index] = center - d + 2*dot*e + + # Define a function to decide whether three atoms form a hydrogen bond. + + def isHbond(d, h, a): + if unit.norm(d-a) > 0.35: + return False + deltaDH = h-d + deltaHA = a-h + deltaDH /= unit.norm(deltaDH) + deltaHA /= unit.norm(deltaHA) + return np.arccos(np.dot(deltaDH, deltaHA)) < 50*np.pi/180 + + # Define a function to count the number of hydrogen bonds between two residues. + + def countHbonds(res1, res2, pos1, pos2): + count = 0 + for h, d in residueDonors[res1]: + for a in residueAcceptors[res2]: + if isHbond(pos1[d.index], pos1[h.index], pos2[a.index]): + count += 1 + for h, d in residueDonors[res2]: + for a in residueAcceptors[res1]: + if isHbond(pos2[d.index], pos2[h.index], pos1[a.index]): + count += 1 + return count + + # For each variable residue, consider hydrogen bonds to other residues that are + # *not* variable. How does the number change? + + residueRotationHbondChange = defaultdict(int) + for residue in residueNeighbors: + for neighbor in residueNeighbors[residue]: + if neighbor not in residueNeighbors: + before = countHbonds(residue, neighbor, positions, positions) + after = countHbonds(residue, neighbor, rotatedPositions, positions) + residueRotationHbondChange[residue] += after-before + + # Now consider pairs of residues that are close enough to form hydrogen bonds to + # each other. + + pairRotationHbonds = {} + for residue in residueNeighbors: + for neighbor in residueNeighbors[residue]: + if neighbor.index > residue.index and neighbor in residueNeighbors: + pairRotationHbonds[(residue, neighbor)] = [ + [ + countHbonds(residue, neighbor, positions, positions), + countHbonds(residue, neighbor, positions, rotatedPositions) + ], + [ + countHbonds(residue, neighbor, rotatedPositions, positions), + countHbonds(residue, neighbor, rotatedPositions, rotatedPositions) + ] + ] + + # Optimize each cluster. + + newPositions = positions[:] + for cluster in clusters: + bestCombination = None + bestBonds = -1 + + # Loop over all possible combinations of which residues to rotate. + + for combination in itertools.product(*[[0,1]]*len(cluster)): + bonds = 0 + for rotated, residue in zip(combination, cluster): + if rotated == 1: + bonds += residueRotationHbondChange[residue] + for neighbor in residueNeighbors[residue]: + if (residue, neighbor) in pairRotationHbonds: + neighborRotated = combination[cluster.index(neighbor)] + bonds += pairRotationHbonds[(residue, neighbor)][rotated][neighborRotated] + if bonds > bestBonds: + bestCombination = combination + bestBonds = bonds + + # Set the positions to the optimal set of rotations. + + for rotated, residue in zip(bestCombination, cluster): + if rotated == 1: + for atom in residueRotations[residue][1]: + newPositions[atom.index] = rotatedPositions[atom.index] + self.positions = newPositions*unit.nanometer + + def addSolvent(self, boxSize=None, padding=None, boxVectors=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar, boxShape='cube'): """Add a solvent box surrounding the structure. From ef6d923fc4c3eee56410fc3dbe2a57bf78e96db1 Mon Sep 17 00:00:00 2001 From: peastman Date: Wed, 7 Aug 2024 15:36:03 -0700 Subject: [PATCH 2/4] Jointly optimize rotations and HIS hydrogens --- pdbfixer/hbonds.py | 292 +++++++++++++++++++++++++++++++++++++++++++ pdbfixer/pdbfixer.py | 177 +------------------------- 2 files changed, 295 insertions(+), 174 deletions(-) create mode 100644 pdbfixer/hbonds.py diff --git a/pdbfixer/hbonds.py b/pdbfixer/hbonds.py new file mode 100644 index 0000000..ce840a1 --- /dev/null +++ b/pdbfixer/hbonds.py @@ -0,0 +1,292 @@ +import openmm as mm +import openmm.app as app +import openmm.unit as unit +from openmm.app.element import hydrogen, oxygen, nitrogen, fluorine +import numpy as np +import itertools +from collections import defaultdict +from collections.abc import Iterator +from typing import List + +class HydrogenBondOptimizer(object): + + def __init__(self, fixer): + topology, positions = self.protonateHistidines(fixer) + + # Record the parent atom that every hydrogen is bonded to. + + parent = {} + for atom1, atom2 in topology.bonds(): + if atom1.element == hydrogen and atom2.element != hydrogen: + parent[atom1] = atom2 + elif atom2.element == hydrogen and atom1.element != hydrogen: + parent[atom2] = atom1 + + # Record the donors and acceptors in each residue. + + self.residueDonors = {} + self.residueAcceptors = {} + for residue in topology.residues(): + self.residueDonors[residue] = [] + self.residueAcceptors[residue] = [] + for atom in residue.atoms(): + if atom.element == hydrogen and parent[atom].element in (oxygen, nitrogen, fluorine): + self.residueDonors[residue].append((atom, parent[atom])) + elif atom.element in (oxygen, nitrogen, fluorine): + self.residueAcceptors[residue].append(atom) + + # Record the groups of atoms we will try to rotate. + + rotations = { + 'ASN': (('CB', 'CG'), ('OD1', 'ND2', 'HD21', 'HD22')), + 'GLN': (('CG', 'CD'), ('OE1', 'NE2', 'HE21', 'HE22')), + 'HIS': (('CB', 'CG'), ('ND1', 'CD2', 'CE1', 'NE2', 'HD1', 'HD2', 'HE1', 'HE2')) + } + residueRotations = {} + for residue in topology.residues(): + if residue.name in rotations: + axis, atoms = rotations[residue.name] + residueRotations[residue] = ([atom for atom in residue.atoms() if atom.name in axis], + [atom for atom in residue.atoms() if atom.name in atoms]) + + # For each residue we will be modifying, record all other residues that are close enough + # to potentially form hydrogen bonds. + + self.residueNeighbors = {} + residueBounds = dict((residue, BoundingSphere(residue.atoms(), positions)) for residue in topology.residues()) + for res1 in residueRotations: + self.residueNeighbors[res1] = [] + bounds = BoundingSphere(residueRotations[res1][1], positions) + for res2 in topology.residues(): + if res1 != res2 and bounds.distance(residueBounds[res2]) < 0.5: + close = False + for atom1 in residueRotations[res1][1]: + for atom2 in res2.atoms(): + delta = positions[atom1.index]-positions[atom2.index] + if unit.norm(delta) < 0.5: + close = True + break + if close: + break + if close: + self.residueNeighbors[res1].append(res2) + + # Divide them into clusters that need to be analyzed together. + + clusters = [[res] for res in self.residueNeighbors] + converged = False + while not converged: + converged = True + for i in range(len(clusters)): + if i >= len(clusters): + break + c1 = clusters[i] + for j in range(i+1, len(clusters)): + c2 = clusters[j] + if any(res2 in self.residueNeighbors[res1] for res1, res2 in itertools.product(c1, c2)): + c1 += c2 + del clusters[j] + converged = False + break + + # Create the rotated version of the coordinates. + + rotatedPositions = positions[:] + for residue in residueRotations: + axis, atoms = residueRotations[residue] + center = positions[axis[1].index] + e = center-positions[axis[0].index] + e /= unit.norm(e) + for atom in atoms: + d = positions[atom.index]-center + dot = np.dot(e, d) + rotatedPositions[atom.index] = center - d + 2*dot*e + + # For each variable residue, consider hydrogen bonds to other residues that are + # *not* variable. How does the number change? + + residueRotationHbondChange = defaultdict(int) + for residue in self.residueNeighbors: + for neighbor in self.residueNeighbors[residue]: + if neighbor not in self.residueNeighbors: + before = self.countHbonds(residue, neighbor, positions, positions) + after = self.countHbonds(residue, neighbor, rotatedPositions, positions) + residueRotationHbondChange[residue] += after-before + + # Now consider pairs of residues that are close enough to form hydrogen bonds to + # each other. + + pairRotationHbonds = {} + for residue in self.residueNeighbors: + for neighbor in self.residueNeighbors[residue]: + if neighbor.index > residue.index and neighbor in self.residueNeighbors: + pairRotationHbonds[(residue, neighbor)] = [ + [ + self.countHbonds(residue, neighbor, positions, positions), + self.countHbonds(residue, neighbor, positions, rotatedPositions) + ], + [ + self.countHbonds(residue, neighbor, rotatedPositions, positions), + self.countHbonds(residue, neighbor, rotatedPositions, rotatedPositions) + ] + ] + + # Optimize each cluster. + + newPositions = positions[:] + for cluster in clusters: + bestCombination = None + bestBonds = -1 + + # Loop over all possible combinations of which residues to rotate. + + for combination in itertools.product(*[[0,1]]*len(cluster)): + bonds = 0 + for rotated, residue in zip(combination, cluster): + if rotated == 1: + bonds += residueRotationHbondChange[residue] + for neighbor in self.residueNeighbors[residue]: + if (residue, neighbor) in pairRotationHbonds: + neighborRotated = combination[cluster.index(neighbor)] + bonds += pairRotationHbonds[(residue, neighbor)][rotated][neighborRotated] + if bonds > bestBonds: + bestCombination = combination + bestBonds = bonds + + # Set the positions to the optimal set of rotations. + + for rotated, residue in zip(bestCombination, cluster): + if rotated == 1: + for atom in residueRotations[residue][1]: + newPositions[atom.index] = rotatedPositions[atom.index] + + # Create the final topology and positions. + + fixer.topology, fixer.positions = self.deleteExtraHydrogens(fixer, topology, positions) + + def protonateHistidines(self, fixer): + """Add a second hydrogen to neutral histidines so we can identify bonds formed by both of them.""" + positions = fixer.positions.value_in_unit(unit.nanometer) + newTopology = app.Topology() + newTopology.setPeriodicBoxVectors(fixer.topology.getPeriodicBoxVectors()) + newPositions = [] + newAtoms = {} + for chain in fixer.topology.chains(): + newChain = newTopology.addChain(chain.id) + for residue in chain.residues(): + newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode) + for atom in residue.atoms(): + newAtoms[atom] = newTopology.addAtom(atom.name, atom.element, newResidue) + newPositions.append(positions[atom.index]) + if residue.name == 'HIS': + atomsByName = dict((atom.name, atom) for atom in newResidue.atoms()) + posByName = dict((atom.name, newPositions[atom.index]) for atom in newResidue.atoms()) + if all(name in atomsByName for name in ('CG', 'ND1', 'CD2', 'CE1', 'NE2')): + # If this residue has one of the two hydrogens, add the other. + if 'HD1' in atomsByName and 'HE2' not in atomsByName: + newAtom = newTopology.addAtom('HE2', hydrogen, newResidue) + newTopology.addBond(newAtom, atomsByName['NE2']) + axis = posByName['NE2'] - 0.5*(posByName['CD2']-posByName['CE1']) + axis /= unit.norm(axis) + newPositions.append(posByName['NE2'] + axis) + elif 'HD1' not in atomsByName and 'HE2' in atomsByName: + newAtom = newTopology.addAtom('HD1', hydrogen, newResidue) + newTopology.addBond(newAtom, atomsByName['ND1']) + axis = posByName['ND1'] - 0.5*(posByName['CG']-posByName['CE1']) + axis /= unit.norm(axis) + newPositions.append(posByName['ND1'] + axis) + for atom1, atom2 in fixer.topology.bonds(): + newTopology.addBond(newAtoms[atom1], newAtoms[atom2]) + return newTopology, newPositions + + def deleteExtraHydrogens(self, fixer, topology, positions): + """We may have added an extra hydrogen to some HIS residues. Figure out which one to keep + and delete the other one.""" + toDelete = [] + for oldRes, newRes in zip(fixer.topology.residues(), topology.residues()): + if oldRes.name == 'HIS' and len(oldRes) != len(newRes): + # See whether each of the hydrogens forms a hydrogen bond. + + atomsByName = dict((atom.name, atom) for atom in newRes.atoms()) + hd1 = atomsByName['HD1'] + nd1 = atomsByName['ND1'] + he2 = atomsByName['HE2'] + ne2 = atomsByName['NE2'] + hd1Acceptor = None + he2Acceptor = None + for neighbor in self.residueNeighbors[newRes]: + for a in self.residueAcceptors[neighbor]: + if self.isHbond(positions[nd1.index], positions[hd1.index], positions[a.index]): + hd1Acceptor = neighbor + if self.isHbond(positions[ne2.index], positions[he2.index], positions[a.index]): + he2Acceptor = neighbor + if hd1Acceptor is not None and he2Acceptor is None: + # HD1 forms a hydrogen bond. Delete HE2. + toDelete.append(he2) + elif hd1Acceptor is None and he2Acceptor is not None: + # HE2 forms a hydrogen bond. Delete HD1. + toDelete.append(hd1) + elif hd1Acceptor is None and he2Acceptor is None: + # Neither one forms a hydrogen bond. Delete the one that was not present + # in the original Topology. + if any(atom.name == 'HD1' for atom in oldRes.atoms()): + toDelete.append(he2) + else: + toDelete.append(hd1) + else: + # Both form hydrogen bonds. If one is bonded to a rotatable residue, keep that one. + # That residue was optimized based on the assumption it could form a hydrogen bond + # to this one. + if hd1Acceptor.name in ('ASN', 'GLN', 'HIS'): + toDelete.append(he2) + elif he2Acceptor.name in ('ASN', 'GLN', 'HIS'): + toDelete.append(hd1) + else: + # Delete the one that was not present in the original Topology. + if any(atom.name == 'HD1' for atom in oldRes.atoms()): + toDelete.append(he2) + else: + toDelete.append(hd1) + + # Create the new Topology and positions. + + modeller = app.Modeller(topology, positions) + modeller.delete(toDelete) + return modeller.topology, modeller.positions + + def isHbond(self, d: mm.Vec3, h: mm.Vec3, a: mm.Vec3): + """Decide whether three atoms could form a hydrogen bond.""" + if unit.norm(d-a) > 0.35: + return False + deltaDH = h-d + deltaHA = a-h + deltaDH /= unit.norm(deltaDH) + deltaHA /= unit.norm(deltaHA) + return np.arccos(np.dot(deltaDH, deltaHA)) < 50*np.pi/180 + + def countHbonds(self, res1: app.Residue, res2: app.Residue, pos1: List[mm.Vec3], pos2: List[mm.Vec3]): + """Count the number of hydrogen bonds between two residues.""" + count = 0 + for h, d in self.residueDonors[res1]: + for a in self.residueAcceptors[res2]: + if self.isHbond(pos1[d.index], pos1[h.index], pos2[a.index]): + count += 1 + for h, d in self.residueDonors[res2]: + for a in self.residueAcceptors[res1]: + if self.isHbond(pos2[d.index], pos2[h.index], pos1[a.index]): + count += 1 + return count + + +class BoundingSphere(object): + """Computes a bounding sphere for a set of atoms. This is used to accelerate searches for neighboring residues.""" + + def __init__(self, atoms: Iterator[app.Atom], positions: List[mm.Vec3]): + atoms = list(atoms) + minRange = mm.Vec3(*(min((positions[atom.index][i] for atom in atoms)) for i in range(3))) + maxRange = mm.Vec3(*(max((positions[atom.index][i] for atom in atoms)) for i in range(3))) + self.center = 0.5*(minRange+maxRange) + self.radius = max(unit.norm(self.center-positions[atom.index]) for atom in atoms) + + def distance(self, other: 'BoundingSphere'): + return max(0, unit.norm(self.center-other.center) - self.radius - other.radius) diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index da45199..6ec6ee8 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -37,8 +37,9 @@ import openmm.unit as unit from openmm.app.internal.pdbstructure import PdbStructure from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader -from openmm.app.element import hydrogen, oxygen, nitrogen, fluorine +from openmm.app.element import hydrogen, oxygen from openmm.app.forcefield import NonbondedGenerator +from .hbonds import HydrogenBondOptimizer # Support Cythonized functions in OpenMM 7.3 # and also implementations in older versions. @@ -50,7 +51,6 @@ import numpy as np import numpy.linalg as lin -import itertools import sys import os import os.path @@ -1342,178 +1342,7 @@ def _describeVariant(self, residue, definitions): return variant def optimizeHydrogenBonds(self): - # Record the parent atom that every hydrogen is bonded to. - - parent = {} - for atom1, atom2 in self.topology.bonds(): - if atom1.element == hydrogen and atom2.element != hydrogen: - parent[atom1] = atom2 - elif atom2.element == hydrogen and atom1.element != hydrogen: - parent[atom2] = atom1 - - # Record the donors and acceptors in each residue. - - residueDonors = {} - residueAcceptors = {} - for residue in self.topology.residues(): - residueDonors[residue] = [] - residueAcceptors[residue] = [] - for atom in residue.atoms(): - if atom.element == hydrogen and parent[atom].element in (oxygen, nitrogen, fluorine): - residueDonors[residue].append((atom, parent[atom])) - elif atom.element in (oxygen, nitrogen, fluorine): - residueAcceptors[residue].append(atom) - - # Record the groups of atoms we will try to rotate. - - rotations = { - 'ASN': (('CB', 'CG'), ('OD1', 'ND2', 'HD21', 'HD22')), - 'GLN': (('CG', 'CD'), ('OE1', 'NE2', 'HE21', 'HE22')), - 'HIS': (('CB', 'CG'), ('ND1', 'CD2', 'CE1', 'NE2', 'HD1', 'HD2', 'HE1', 'HE2')) - } - residueRotations = {} - for residue in self.topology.residues(): - if residue.name in rotations: - axis, atoms = rotations[residue.name] - residueRotations[residue] = ([atom for atom in residue.atoms() if atom.name in axis], - [atom for atom in residue.atoms() if atom.name in atoms]) - - # For each residue we will be modifying, record all other residues that are close enough - # to potentially form hydrogen bonds. - - residueNeighbors = {} - positions = self.positions.value_in_unit(unit.nanometer) - for res1 in residueRotations: - residueNeighbors[res1] = [] - for res2 in self.topology.residues(): - if res1 != res2: - close = False - for atom1 in residueRotations[res1][1]: - for atom2 in res2.atoms(): - delta = positions[atom1.index]-positions[atom2.index] - if unit.norm(delta) < 0.5: - close = True - break - if close: - break - if close: - residueNeighbors[res1].append(res2) - - # Divide them into clusters that need to be analyzed together. - - clusters = [[res] for res in residueNeighbors] - converged = False - while not converged: - converged = True - for i in range(len(clusters)): - if i >= len(clusters): - break - c1 = clusters[i] - for j in range(i+1, len(clusters)): - c2 = clusters[j] - if any(res2 in residueNeighbors[res1] for res1, res2 in itertools.product(c1, c2)): - c1 += c2 - del clusters[j] - converged = False - break - - # Create the rotated version of the coordinates. - - rotatedPositions = positions[:] - for residue in residueRotations: - axis, atoms = residueRotations[residue] - center = positions[axis[1].index] - e = center-positions[axis[0].index] - e /= unit.norm(e) - for atom in atoms: - d = positions[atom.index]-center - dot = np.dot(e, d) - rotatedPositions[atom.index] = center - d + 2*dot*e - - # Define a function to decide whether three atoms form a hydrogen bond. - - def isHbond(d, h, a): - if unit.norm(d-a) > 0.35: - return False - deltaDH = h-d - deltaHA = a-h - deltaDH /= unit.norm(deltaDH) - deltaHA /= unit.norm(deltaHA) - return np.arccos(np.dot(deltaDH, deltaHA)) < 50*np.pi/180 - - # Define a function to count the number of hydrogen bonds between two residues. - - def countHbonds(res1, res2, pos1, pos2): - count = 0 - for h, d in residueDonors[res1]: - for a in residueAcceptors[res2]: - if isHbond(pos1[d.index], pos1[h.index], pos2[a.index]): - count += 1 - for h, d in residueDonors[res2]: - for a in residueAcceptors[res1]: - if isHbond(pos2[d.index], pos2[h.index], pos1[a.index]): - count += 1 - return count - - # For each variable residue, consider hydrogen bonds to other residues that are - # *not* variable. How does the number change? - - residueRotationHbondChange = defaultdict(int) - for residue in residueNeighbors: - for neighbor in residueNeighbors[residue]: - if neighbor not in residueNeighbors: - before = countHbonds(residue, neighbor, positions, positions) - after = countHbonds(residue, neighbor, rotatedPositions, positions) - residueRotationHbondChange[residue] += after-before - - # Now consider pairs of residues that are close enough to form hydrogen bonds to - # each other. - - pairRotationHbonds = {} - for residue in residueNeighbors: - for neighbor in residueNeighbors[residue]: - if neighbor.index > residue.index and neighbor in residueNeighbors: - pairRotationHbonds[(residue, neighbor)] = [ - [ - countHbonds(residue, neighbor, positions, positions), - countHbonds(residue, neighbor, positions, rotatedPositions) - ], - [ - countHbonds(residue, neighbor, rotatedPositions, positions), - countHbonds(residue, neighbor, rotatedPositions, rotatedPositions) - ] - ] - - # Optimize each cluster. - - newPositions = positions[:] - for cluster in clusters: - bestCombination = None - bestBonds = -1 - - # Loop over all possible combinations of which residues to rotate. - - for combination in itertools.product(*[[0,1]]*len(cluster)): - bonds = 0 - for rotated, residue in zip(combination, cluster): - if rotated == 1: - bonds += residueRotationHbondChange[residue] - for neighbor in residueNeighbors[residue]: - if (residue, neighbor) in pairRotationHbonds: - neighborRotated = combination[cluster.index(neighbor)] - bonds += pairRotationHbonds[(residue, neighbor)][rotated][neighborRotated] - if bonds > bestBonds: - bestCombination = combination - bestBonds = bonds - - # Set the positions to the optimal set of rotations. - - for rotated, residue in zip(bestCombination, cluster): - if rotated == 1: - for atom in residueRotations[residue][1]: - newPositions[atom.index] = rotatedPositions[atom.index] - self.positions = newPositions*unit.nanometer - + optimizer = HydrogenBondOptimizer(self) def addSolvent(self, boxSize=None, padding=None, boxVectors=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar, boxShape='cube'): """Add a solvent box surrounding the structure. From 11d2cb4d30221e85c918b89737010be5b8ae51fa Mon Sep 17 00:00:00 2001 From: peastman Date: Thu, 8 Aug 2024 12:14:42 -0700 Subject: [PATCH 3/4] Bug fixes --- pdbfixer/hbonds.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pdbfixer/hbonds.py b/pdbfixer/hbonds.py index ce840a1..acb13b1 100644 --- a/pdbfixer/hbonds.py +++ b/pdbfixer/hbonds.py @@ -9,6 +9,7 @@ from typing import List class HydrogenBondOptimizer(object): + """This class rotates sidechains to optimize the hydrogen bond network.""" def __init__(self, fixer): topology, positions = self.protonateHistidines(fixer) @@ -30,7 +31,7 @@ def __init__(self, fixer): self.residueDonors[residue] = [] self.residueAcceptors[residue] = [] for atom in residue.atoms(): - if atom.element == hydrogen and parent[atom].element in (oxygen, nitrogen, fluorine): + if atom.element == hydrogen and atom in parent and parent[atom].element in (oxygen, nitrogen, fluorine): self.residueDonors[residue].append((atom, parent[atom])) elif atom.element in (oxygen, nitrogen, fluorine): self.residueAcceptors[residue].append(atom) @@ -162,7 +163,7 @@ def __init__(self, fixer): # Create the final topology and positions. - fixer.topology, fixer.positions = self.deleteExtraHydrogens(fixer, topology, positions) + fixer.topology, fixer.positions = self.deleteExtraHydrogens(fixer, topology, newPositions) def protonateHistidines(self, fixer): """Add a second hydrogen to neutral histidines so we can identify bonds formed by both of them.""" @@ -256,13 +257,13 @@ def deleteExtraHydrogens(self, fixer, topology, positions): def isHbond(self, d: mm.Vec3, h: mm.Vec3, a: mm.Vec3): """Decide whether three atoms could form a hydrogen bond.""" - if unit.norm(d-a) > 0.35: + if unit.norm(h-a) > 0.25: return False deltaDH = h-d deltaHA = a-h deltaDH /= unit.norm(deltaDH) deltaHA /= unit.norm(deltaHA) - return np.arccos(np.dot(deltaDH, deltaHA)) < 50*np.pi/180 + return np.arccos(np.dot(deltaDH, deltaHA)) < 60*np.pi/180 def countHbonds(self, res1: app.Residue, res2: app.Residue, pos1: List[mm.Vec3], pos2: List[mm.Vec3]): """Count the number of hydrogen bonds between two residues.""" From 1f6757edebf59b12051b44066d1caa70e2de36e7 Mon Sep 17 00:00:00 2001 From: peastman Date: Thu, 8 Aug 2024 12:14:53 -0700 Subject: [PATCH 4/4] Added documentation --- Manual.html | 10 +++++++++- pdbfixer/pdbfixer.py | 11 ++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Manual.html b/Manual.html index 41efd33..edcca99 100644 --- a/Manual.html +++ b/Manual.html @@ -4,7 +4,7 @@

PDBFixer

-
Copyright 2013-2023 by Peter Eastman and Stanford University
+
Copyright 2013-2024 by Peter Eastman and Stanford University

1. Introduction

@@ -207,6 +207,14 @@

Add Missing Hydrogens

The argument is the pH based on which to select protonation states. +

Optimize Hydrogen Bonds

+ +You can optimize the hydrogen bonding network by calling +

+fixer.optimizeHydrogenBonds() +

+This performs 180 degree rotations of HIS, ASN, and GLN sidechains, and moves hydrogens on neutral HIS residues, to maximize the total number of hydrogen bonds. +

Add Water

If you want to add a water box, call addSolvent(). This method has several optional arguments. Its full definition is diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index 6ec6ee8..1d27e28 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -1342,7 +1342,16 @@ def _describeVariant(self, residue, definitions): return variant def optimizeHydrogenBonds(self): - optimizer = HydrogenBondOptimizer(self) + """Optimize the hydrogen bond network. + + This should be called after hydrogens have been added. It considers two types of changes: + + 1. 180 degree rotations of the terminal groups in HIS, ASN, and GLN sidechains. + 2. For neutral HIS residues, exchanging the hydrogen position between ND1 and NE2. + + It considers all possible combinations of them to find the one producing the largest number of hydrogen bonds. + """ + HydrogenBondOptimizer(self) def addSolvent(self, boxSize=None, padding=None, boxVectors=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar, boxShape='cube'): """Add a solvent box surrounding the structure.