From 46d174ff78d352c2dbc3ba8b420fda75ff721500 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 14:46:07 +0100 Subject: [PATCH 1/7] first draft of PDBx Reader --- package/MDAnalysis/coordinates/PDBx.py | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 package/MDAnalysis/coordinates/PDBx.py diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py new file mode 100644 index 00000000000..ad7f29dcfec --- /dev/null +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -0,0 +1,58 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" + +""" +import gemmi +import numpy as np + +from . import base + + +class PDBxReader(base.SingleFrameReaderBase): + format = ['cif', 'pdbx'] + units = {'time': None, 'length': 'Angstrom'} + + def _read_first_frame(self): + doc = gemmi.cif.read(self.filename) + + block = doc.sole_block() + + coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + self.natoms = len(coords) + + xyz = np.zeros((self.natoms, 3), dtype=np.float32) + + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) + ts.frame = 0 + + # todo: unit cell + + if self.convert_units: + # in-place ! + self.convert_pos_from_native(self.ts._pos) + if self.ts.dimensions is not None: + self.convert_pos_from_native(self.ts.dimensions[:3]) + + return ts From 63f3f49b96922a03c59af626dde8205b2c94c1de Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 14:54:47 +0100 Subject: [PATCH 2/7] unpacked coords in mmcif Reader --- package/MDAnalysis/coordinates/PDBx.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index ad7f29dcfec..5434f56c3dc 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -44,6 +44,9 @@ def _read_first_frame(self): xyz = np.zeros((self.natoms, 3), dtype=np.float32) + for i, (x, y, z) in enumerate(coords): + xyz[i, :] = x, y, z + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) ts.frame = 0 From 4000fe8cf2bde326151db1dd2dac1f3fb183213a Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 15:02:13 +0100 Subject: [PATCH 3/7] add box reading to PDBx Reader --- package/MDAnalysis/coordinates/PDBx.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index 5434f56c3dc..d40236fd9d2 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -50,7 +50,13 @@ def _read_first_frame(self): ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) ts.frame = 0 - # todo: unit cell + box = block.find('_cell.', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + if box: + unitcell = np.zeros(6, dtype=np.float64) + unitcell[:] = box[0] + + ts.dimensions = unitcell if self.convert_units: # in-place ! From ad243719bbdb4d5cd0646bbb91f89b21b93e4ac7 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 15:04:30 +0100 Subject: [PATCH 4/7] pdbx Reader docstring --- package/MDAnalysis/coordinates/PDBx.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index d40236fd9d2..b14cbffa627 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -22,7 +22,14 @@ # """ +PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx` +======================================================================= +Reads coordinates from a PDBx_ (mmcif) format file. + + +.. _PDBx: + https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format """ import gemmi import numpy as np From 9f42c5f13acf47e837bc350c635b7a264079c99b Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 16:35:59 +0100 Subject: [PATCH 5/7] added PDBx doc stub --- .../doc/sphinx/source/documentation_pages/coordinates/PDBx.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst b/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst new file mode 100644 index 00000000000..50c9c53bd79 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst @@ -0,0 +1,2 @@ +.. automodule:: MDAnalysis.coordinates.PDBx + :members: From f063b14e2e39f3d654795d32b7c52962d8c0623c Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 16:37:36 +0100 Subject: [PATCH 6/7] work on PDBx coordinates docstring --- package/MDAnalysis/coordinates/PDBx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index b14cbffa627..d9b4500f7bc 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -25,7 +25,8 @@ PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx` ======================================================================= -Reads coordinates from a PDBx_ (mmcif) format file. +Reads coordinates from a PDBx_ (mmcif) format file. Will populate the Universe positions from the +``_atom_site.Cartn_x`` field in the PDBx file. Will populate the unitcell dimensions from the ``_cell`` section. .. _PDBx: From 837b861fe4c20a3cd0a09a761b03887f49e05895 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 6 Oct 2023 09:06:45 +0100 Subject: [PATCH 7/7] first pass at PDBx topology parser --- package/MDAnalysis/topology/PDBxParser.py | 122 ++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 package/MDAnalysis/topology/PDBxParser.py diff --git a/package/MDAnalysis/topology/PDBxParser.py b/package/MDAnalysis/topology/PDBxParser.py new file mode 100644 index 00000000000..a586b577af5 --- /dev/null +++ b/package/MDAnalysis/topology/PDBxParser.py @@ -0,0 +1,122 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +PDBx topology parser +==================== + + +See Also +-------- +:class:`MDAnalysis.coordinates.PDBx` + +""" +import gemmi +import numpy as np + +from .base import TopologyReaderBase, change_squash +from ..core.topology import Topology +from ..core.topologyattrs import ( + Atomnames, + Atomids, + AltLocs, + Elements, + ICodes, + RecordTypes, + Resids, + Resnames, + Segids, +) + + +class PDBxParser(TopologyReaderBase): + """Read a Topology from a PDBx file + + Creates the following attributes from these "_atom_site" PDBx loop entries + - "group_PDB" RecordType + - "id" AtomId + - "label_alt_id" AltLoc + - "label_type_symbol" Element + - "label_atom_id" AtomName + - "auth_seq_id" Resid + - "auth_comp_id" Resname + - "pdbx_PDB_ins_code" ICode + - "auth_asym_id" ChainID + """ + format = ['PBDx', 'cif'] + + def parse(self, **kwargs) -> Topology: + doc = gemmi.cif.read(self.filename) + block = doc.sole_block() + + attrs = [] + + def objarr(x): + return np.array(x, dtype=object) + + # hierarchy correspondence: + # seq_id -> residues + # entity_id -> chains + if recordtypes := block.find('_atom_site.group_PDB'): + attrs.append(RecordTypes(recordtypes)) + ids = block.find_loop('_atom_site.id') + n_atoms = len(ids) + attrs.append(Atomids(ids)) + if altlocs := block.find_loop('_atom_site.label_alt_id'): + altlocs = np.array(altlocs, dtype=object) + altlocs[altlocs == '.'] = '' + attrs.append(AltLocs(altlocs)) + if elements_loop := block.find_loop('_atom_site.type_symbol'): + attrs.append(Elements(objarr(elements_loop))) + if names_loop := block.find_loop('_atom_site.label_atom_id'): + attrs.append(Atomnames(objarr(names_loop))) + + # sort out residues/segments + # label_seq_id seems to not cover entire model unlike author versions + resids = block.find_loop('_atom_site.auth_seq_id') + resnames = block.find_loop('_atom_site.auth_comp_id') + icodes = block.find_loop('_atom_site.pdbx_PDB_ins_code') + chainids = block.find_loop('_atom_site.auth_asym_id') + + residx, (resids, icodes, resnames, chainids) = change_squash( + (resids, icodes), (resids, icodes, resnames, chainids) + ) + segidx, (chainids,) = change_squash((chainids,), (chainids,)) + + attrs.extend(( + Resids(resids), + Resnames(objarr(resnames)), + ICodes(objarr(icodes)), + Segids(chainids), + )) + + n_residues = len(resids) + n_segments = len(chainids) + + return Topology( + n_atoms=n_atoms, + n_res=n_residues, + n_seg=n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx, + )