Skip to content

Commit

Permalink
Can add hydrogens to nonstandard residues (#295)
Browse files Browse the repository at this point in the history
* Can add hydrogens to nonstandard residues

* Build against OpenMM dev version

* Fixed typo

* Updated Python versions for CI

* Added required quotes
  • Loading branch information
peastman authored Jul 10, 2024
1 parent 268f1d7 commit 5cf78a2
Show file tree
Hide file tree
Showing 5 changed files with 1,647 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v2
Expand Down
3 changes: 2 additions & 1 deletion devtools/environment-dev.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name: pdbfixer-dev

channels:
- conda-forge/label/openmm_dev
- conda-forge

dependencies:
- pytest
- openmm
- openmm=8.1.1dev0
- numpy
- pip
101 changes: 95 additions & 6 deletions pdbfixer/pdbfixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,10 +1058,8 @@ def addMissingHydrogens(self, pH=7.0, forcefield=None):
Notes
-----
No extensive electrostatic analysis is performed; only default residue pKas are used.
Examples
--------
No extensive electrostatic analysis is performed; only default residue pKas are used. The pH is only
taken into account for standard amino acids.
Examples
--------
Expand All @@ -1070,13 +1068,104 @@ def addMissingHydrogens(self, pH=7.0, forcefield=None):
>>> fixer = PDBFixer(pdbid='1VII')
>>> fixer.addMissingHydrogens(pH=8.0)
"""
extraDefinitions = self._downloadNonstandardDefinitions()
variants = [self._describeVariant(res, extraDefinitions) for res in self.topology.residues()]
modeller = app.Modeller(self.topology, self.positions)
modeller.addHydrogens(pH=pH, forcefield=forcefield)
modeller.addHydrogens(pH=pH, forcefield=forcefield, variants=variants)
self.topology = modeller.topology
self.positions = modeller.positions

def _downloadNonstandardDefinitions(self):
"""If the file contains any nonstandard residues, download their definitions and build
the information needed to add hydrogens to them.
"""
app.Modeller._loadStandardHydrogenDefinitions()
resnames = set(residue.name for residue in self.topology.residues())
definitions = {}
for name in resnames:
if name not in app.Modeller._residueHydrogens:
# Try to download the definition.

try:
file = urlopen(f'https://files.rcsb.org/ligands/download/{name}.cif')
contents = file.read().decode('utf-8')
file.close()
except:
continue

# Record the atoms and bonds.

from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
reader = PdbxReader(StringIO(contents))
data = []
reader.read(data)
block = data[0]
atomData = block.getObj('chem_comp_atom')
atomNameCol = atomData.getAttributeIndex('atom_id')
symbolCol = atomData.getAttributeIndex('type_symbol')
leavingCol = atomData.getAttributeIndex('pdbx_leaving_atom_flag')
atoms = [(row[atomNameCol], row[symbolCol].upper(), row[leavingCol] == 'Y') for row in atomData.getRowList()]
bondData = block.getObj('chem_comp_bond')
if bondData is None:
bonds = []
else:
atom1Col = bondData.getAttributeIndex('atom_id_1')
atom2Col = bondData.getAttributeIndex('atom_id_2')
bonds = [(row[atom1Col], row[atom2Col]) for row in bondData.getRowList()]
definitions[name] = (atoms, bonds)
return definitions

def _describeVariant(self, residue, definitions):
"""Build the variant description to pass to addHydrogens() for a residue."""
if residue.name not in definitions:
return None
atoms, bonds = definitions[residue.name]

# See if the heavy atoms are identical.

topologyHeavy = dict((atom.name, atom) for atom in residue.atoms() if atom.element is not None and atom.element != app.element.hydrogen)
definitionHeavy = dict((atom[0], atom) for atom in atoms if atom[1] != '' and atom[1] != 'H')
for name in topologyHeavy:
if name not in definitionHeavy or definitionHeavy[name][1] != topologyHeavy[name].element.symbol.upper():
# This atom isn't present in the definition
return None
for name in definitionHeavy:
if name not in topologyHeavy and not definitionHeavy[name][2]:
# This isn't a leaving atom, and it isn't present in the topology.
return None

# Build the list of hydrogens.

variant = []
definitionAtoms = dict((atom[0], atom) for atom in atoms)
topologyBonds = list(residue.bonds())
for name1, name2 in bonds:
if definitionAtoms[name1][1] == 'H':
h, parent = name1, name2
elif definitionAtoms[name2][1] == 'H':
h, parent = name2, name1
else:
continue
if definitionAtoms[h][2]:
# The hydrogen is marked as a leaving atom. Omit it if the parent is not present,
# or if the parent has an external bond.
if parent not in topologyHeavy:
continue
parentAtom = topologyHeavy[parent]
exclude = False
for atom1, atom2 in topologyBonds:
if atom1 == parentAtom and atom2.residue != residue:
exclude = True
break
if atom2 == parentAtom and atom1.residue != residue:
exclude = True
break
if exclude:
continue
variant.append((h, parent))
return variant

def addSolvent(self, boxSize=None, padding=None, boxVectors=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar, boxShape='cube'):
"""Add a solvent box surrounding the structure.
Expand Down
Loading

0 comments on commit 5cf78a2

Please sign in to comment.