From 1ade026e6af7d477291f613d9415687920b9eaaa Mon Sep 17 00:00:00 2001 From: IAlibay Date: Wed, 13 Jul 2022 14:54:24 +0100 Subject: [PATCH 1/7] Add formalcharge attribute and read/write them from PDB --- package/MDAnalysis/coordinates/PDB.py | 47 ++++++++++++++++++- package/MDAnalysis/core/groups.py | 5 ++ package/MDAnalysis/core/topologyattrs.py | 12 +++++ .../MDAnalysis/topology/ExtendedPDBParser.py | 6 +++ package/MDAnalysis/topology/PDBParser.py | 30 ++++++++++-- .../source/documentation_pages/selections.rst | 5 ++ .../MDAnalysisTests/coordinates/test_pdb.py | 34 +++++++++++++- .../core/test_atomselections.py | 15 ++++++ testsuite/MDAnalysisTests/data/charges.pdb | 39 +++++++++++++++ testsuite/MDAnalysisTests/data/elements.pdb | 4 +- testsuite/MDAnalysisTests/datafiles.py | 4 +- .../MDAnalysisTests/topology/test_pdb.py | 46 ++++++++++++++++++ 12 files changed, 236 insertions(+), 11 deletions(-) create mode 100644 testsuite/MDAnalysisTests/data/charges.pdb diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 94376ecf57a..b56099b0da6 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -552,12 +552,12 @@ class PDBWriter(base.WriterBase): "ATOM {serial:5d} {name:<4s}{altLoc:<1s}{resName:<4s}" "{chainID:1s}{resSeq:4d}{iCode:1s}" " {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}" - "{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"), + "{tempFactor:6.2f} {segID:<4s}{element:>2s}{charge:2s}\n"), 'HETATM': ( "HETATM{serial:5d} {name:<4s}{altLoc:<1s}{resName:<4s}" "{chainID:1s}{resSeq:4d}{iCode:1s}" " {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}" - "{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"), + "{tempFactor:6.2f} {segID:<4s}{element:>2s}{charge:2s}\n"), 'REMARK': "REMARK {0}\n", 'COMPND': "COMPND {0}\n", 'HEADER': "HEADER {0}\n", @@ -1023,6 +1023,47 @@ def _deduce_PDB_atom_name(self, atomname, resname): return '{:<4}'.format(atomname) return ' {:<3}'.format(atomname) + @staticmethod + def _format_PDB_charges(charges: np.ndarray) -> np.ndarray: + """Format formal charges to match PDB requirements. + + Formal charge entry is set to empty if charge is 0, otherwise the + charge is set to a two character ````` + entry, e.g. ``1+`` or ``2-``. + + This method also verifies that formal charges can adhere to the PDB + format (i.e. charge cannot be > 9 or < -9). + + Parameters + ---------- + charges: np.ndarray + NumPy array of integers representing the formal charges of + the atoms being written. + + Returns + ------- + np.ndarray + NumPy array of dtype object with strings representing the + formal charges of the atoms being written. + """ + if charges.dtype != int: + raise ValueError("formal charges array should be of `int` type") + + outcharges = charges.astype(object) + outcharges[outcharges == 0] = '' # empty strings for no charge case + # using np.where is more efficient than looping in sparse cases + for i in np.where(charges < 0)[0]: + if charges[i] < -9: + errmsg = "formal charge < -9 is not supported by PDB standard" + raise ValueError(errmsg) + outcharges[i] = f"{abs(charges[i])}-" + for i in np.where(charges > 0)[0]: + if charges[i] > 9: + errmsg = "formal charge > 9 is not supported by PDB standard" + raise ValueError(errmsg) + outcharges[i] = f"{charges[i]}+" + return outcharges + def _write_timestep(self, ts, multiframe=False): """Write a new timestep *ts* to file @@ -1093,6 +1134,7 @@ def get_attr(attrname, default): atomnames = get_attr('names', 'X') elements = get_attr('elements', ' ') record_types = get_attr('record_types', 'ATOM') + formal_charges = self._format_PDB_charges(get_attr('formalcharges', 0)) def validate_chainids(chainids, default): """Validate each atom's chainID @@ -1158,6 +1200,7 @@ def validate_chainids(chainids, default): vals['segID'] = segids[i][:4] vals['chainID'] = chainids[i] vals['element'] = elements[i][:2].upper() + vals['charge'] = formal_charges[i] # record_type attribute, if exists, can be ATOM or HETATM try: diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index d7245d518a1..097d4fa0b87 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3041,6 +3041,11 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, ``S`` will be possible options but other values will not raise an error. + formalcharge *formal-charge* + select atoms based on their formal charge, e.g. + ``name O and formalcharge -1`` to select all oxygens with a + negative 1 formal charge. + **Boolean** not diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 21c3fda216e..28d06fecdf3 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -2033,6 +2033,18 @@ def total_charge(group, compound='group'): ('total_charge', total_charge)) +class FormalCharges(AtomAttr): + """Formal charge on each atom""" + attrname = 'formalcharges' + singular = 'formalcharge' + per_object = 'atom' + dtype = int + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.zeros(na) + + # TODO: update docs to property doc class Occupancies(AtomAttr): attrname = 'occupancies' diff --git a/package/MDAnalysis/topology/ExtendedPDBParser.py b/package/MDAnalysis/topology/ExtendedPDBParser.py index 256f0eebc88..2138d386923 100644 --- a/package/MDAnalysis/topology/ExtendedPDBParser.py +++ b/package/MDAnalysis/topology/ExtendedPDBParser.py @@ -74,6 +74,12 @@ class ExtendedPDBParser(PDBParser.PDBParser): - resids - resnames - segids + - elements + - bonds + - formalcharges + + Guesses the following Attributes: + - masses See Also -------- diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index 830de44bd09..b36e4695910 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -83,6 +83,7 @@ Resnums, Segids, Tempfactors, + FormalCharges, ) @@ -155,7 +156,7 @@ def hy36decode(width, s): class PDBParser(TopologyReaderBase): """Parser that obtains a list of atoms from a standard PDB file. - Creates the following Attributes: + Creates the following Attributes (if present): - names - chainids - tempfactors @@ -166,6 +167,7 @@ class PDBParser(TopologyReaderBase): - segids - elements - bonds + - formalcharges Guesses the following Attributes: - masses @@ -186,6 +188,10 @@ class PDBParser(TopologyReaderBase): are now assigned (Issue #2422). Aliased ``bfactors`` topologyattribute to ``tempfactors``. ``bfactors`` is deprecated and will be removed in 3.0 (Issue #1901) + .. versionadded:: 2.3.0 + Formal charges are now read from PDB files if present. No formalcharge + attribute is create if no formal charges are present in the PDB file. + Any formal charges not set are assumed to have a value of 0. """ format = ['PDB', 'ENT'] @@ -222,12 +228,11 @@ def _parseatoms(self): icodes = [] tempfactors = [] occupancies = [] - resids = [] resnames = [] - segids = [] elements = [] + formalcharges = [] self._wrapped_serials = False # did serials go over 100k? last_wrapped_serial = 100000 # if serials wrap, start from here @@ -260,6 +265,7 @@ def _parseatoms(self): resnames.append(line[17:21].strip()) chainids.append(line[21:22].strip()) elements.append(line[76:78].strip()) + formalcharges.append(line[78:80].strip()) # Resids are optional try: @@ -335,6 +341,24 @@ def _parseatoms(self): validated_elements.append('') attrs.append(Elements(np.array(validated_elements, dtype=object))) + if any(formalcharges): + for i, entry in enumerate(formalcharges): + if not entry == '': + if ('+' in entry) or ('-' in entry): + try: + formalcharges[i] = int(entry[::-1]) + except ValueError: + errmsg = (f"Unknown formal charge {entry} " + "encountered") + raise ValueError(errmsg) + else: + errmsg = (f"Formal charge {entry} does not have + or -" + " assignment") + raise ValueError(errmsg) + else: + formalcharges[i] = 0 + attrs.append(FormalCharges(np.array(formalcharges, dtype=np.int32))) + masses = guess_masses(atomtypes) attrs.append(Masses(masses, guessed=True)) diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index 986cde2527d..4f0233b7360 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -186,6 +186,11 @@ chiral *R | S* to select only S-chiral carbon atoms. Only ``R`` and ``S`` will be possible options but other values will not raise an error. +formalcharge *formal-charge* + select atoms based on their formal charge, e.g. + ``name O and formalcharge -1`` to select all oxygens with a + negative 1 formal charge. + Pattern matching ---------------- diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index cb5750e4c6f..30a4c298cbf 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -37,7 +37,7 @@ PDB_cm, PDB_cm_gz, PDB_cm_bz2, PDB_mc, PDB_mc_gz, PDB_mc_bz2, PDB_CRYOEM_BOX, MMTF_NOCRYST, - PDB_HOLE, mol2_molecule) + PDB_HOLE, mol2_molecule, PDB_charges,) from numpy.testing import (assert_equal, assert_array_almost_equal, assert_almost_equal) @@ -1071,7 +1071,7 @@ def test_atomname_alignment(self, writtenstuff): def test_atomtype_alignment(self, writtenstuff): result_line = ("ATOM 1 H5T GUA X 1 7.974 6.430 9.561" - " 1.00 0.00 RNAA \n") + " 1.00 0.00 RNAA \n") assert_equal(writtenstuff[9], result_line) @@ -1273,3 +1273,33 @@ def test_cryst_meaningless_select(): u = mda.Universe(PDB_CRYOEM_BOX) cur_sele = u.select_atoms('around 0.1 (resid 4 and name CA and segid A)') assert cur_sele.n_atoms == 0 + + +def test_charges_roundtrip(tmpdir): + """ + Roundtrip test for PDB formal charges reading/writing. + """ + u = mda.Universe(PDB_charges) + + outfile = os.path.join(str(tmpdir), 'newcharges.pdb') + with mda.coordinates.PDB.PDBWriter(outfile) as writer: + writer.write(u.atoms) + + u_written = mda.Universe(outfile) + + assert_equal(u.atoms.formalcharges, u_written.atoms.formalcharges) + + +def test_charges_not_int(): + # np.zeros yields a float64 dtype + arr = np.zeros(10) + with pytest.raises(ValueError, match="array should be of `int` type"): + mda.coordinates.PDB.PDBWriter._format_PDB_charges(arr) + + +@pytest.mark.parametrize('value', [99, -100]) +def test_charges_limit(value): + # test for raising error when writing charges > 9 + arr = np.array([0, 0, 0, value, 1, -1, 0], dtype=int) + with pytest.raises(ValueError, match="9 is not supported by PDB standard"): + mda.coordinates.PDB.PDBWriter._format_PDB_charges(arr) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 8f27c99d794..90ff242c573 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -46,6 +46,7 @@ PDB_HOLE, PDB_helix, PDB_elements, + PDB_charges, ) from MDAnalysisTests import make_Universe @@ -1021,6 +1022,7 @@ def universe(): 'mass :3.0', 'mass 1-', 'chirality ', + 'formalcharge 0.2', ]) def test_selection_fail(self, selstr, universe): with pytest.raises(SelectionError): @@ -1457,3 +1459,16 @@ def test_chirality_selection(sel, size): ag = u.select_atoms('chirality {}'.format(sel)) assert len(ag) == size + + +@pytest.mark.parametrize('sel,size,name', [ + ('1', 1, 'NH2'), ('-1', 1, 'OD2'), ('0', 34, 'N'), ('-1 1', 2, 'OD2'), +]) +def test_formal_charge_selection(sel, size, name): + # 2 charge points, one positive one negative + u = mda.Universe(PDB_charges) + + ag = u.select_atoms(f'formalcharge {sel}') + + assert len(ag) == size + assert ag.atoms[0].name == name diff --git a/testsuite/MDAnalysisTests/data/charges.pdb b/testsuite/MDAnalysisTests/data/charges.pdb new file mode 100644 index 00000000000..0837a45e069 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/charges.pdb @@ -0,0 +1,39 @@ +REMARK For testing reading of formal charges +REMARK This model represents an ideal PDB with valid elements and formal charges +ATOM 577 N ASP H 49 16.704 14.108 12.210 1.00 21.33 N +ATOM 578 CA ASP H 49 16.584 13.977 10.747 1.00 24.21 C +ATOM 579 C ASP H 49 17.636 13.051 10.097 1.00 24.56 C +ATOM 580 O ASP H 49 17.363 12.527 9.019 1.00 24.83 O +ATOM 581 CB ASP H 49 16.488 15.330 9.988 1.00 28.91 C +ATOM 582 CG ASP H 49 17.600 16.382 10.180 1.00 32.38 C +ATOM 583 OD1 ASP H 49 18.584 16.125 10.909 1.00 33.55 O +ATOM 584 OD2 ASP H 49 17.442 17.461 9.571 1.00 42.36 O1- +ATOM 585 H ASP H 49 16.933 15.029 12.562 1.00 21.33 H +ATOM 586 HA ASP H 49 15.631 13.477 10.577 1.00 24.21 H +ATOM 587 HB3 ASP H 49 15.557 15.811 10.288 1.00 28.91 H +ATOM 588 HB2 ASP H 49 16.375 15.140 8.919 1.00 28.91 H +ATOM 589 N ARG H 50 18.794 12.852 10.751 1.00 21.10 N +ATOM 590 CA ARG H 50 19.908 12.062 10.225 1.00 23.13 C +ATOM 591 C ARG H 50 20.342 10.898 11.121 1.00 20.79 C +ATOM 592 O ARG H 50 21.159 10.112 10.651 1.00 20.85 O +ATOM 593 CB ARG H 50 21.113 12.980 9.947 1.00 31.78 C +ATOM 594 CG ARG H 50 20.844 13.983 8.813 1.00 43.76 C +ATOM 595 CD ARG H 50 22.110 14.635 8.244 1.00 55.64 C +ATOM 596 NE ARG H 50 22.911 13.672 7.465 1.00 64.62 N +ATOM 597 CZ ARG H 50 23.874 13.946 6.570 1.00 68.53 C +ATOM 598 NH1 ARG H 50 24.232 15.199 6.258 1.00 81.88 N +ATOM 599 NH2 ARG H 50 24.498 12.931 5.973 1.00 78.51 N1+ +ATOM 600 H ARG H 50 18.947 13.322 11.633 1.00 21.10 H +ATOM 601 HA ARG H 50 19.624 11.591 9.283 1.00 23.13 H +ATOM 602 HB3 ARG H 50 21.973 12.367 9.676 1.00 31.78 H +ATOM 603 HB2 ARG H 50 21.401 13.512 10.855 1.00 31.78 H +ATOM 604 HG3 ARG H 50 20.104 14.736 9.076 1.00 43.76 H +ATOM 605 HG2 ARG H 50 20.391 13.405 8.008 1.00 43.76 H +ATOM 606 HD3 ARG H 50 22.756 14.920 9.075 1.00 55.64 H +ATOM 607 HD2 ARG H 50 21.871 15.552 7.704 1.00 55.64 H +ATOM 608 HE ARG H 50 22.696 12.699 7.632 1.00 64.62 H +ATOM 609 HH12 ARG H 50 24.953 15.372 5.572 1.00 81.88 H +ATOM 610 HH11 ARG H 50 23.769 15.979 6.701 1.00 81.88 H +ATOM 611 HH22 ARG H 50 25.238 13.086 5.302 1.00 78.51 H +ATOM 612 HH21 ARG H 50 24.240 11.975 6.186 1.00 78.51 H +END diff --git a/testsuite/MDAnalysisTests/data/elements.pdb b/testsuite/MDAnalysisTests/data/elements.pdb index 727773bfdb7..9ffae621bef 100644 --- a/testsuite/MDAnalysisTests/data/elements.pdb +++ b/testsuite/MDAnalysisTests/data/elements.pdb @@ -23,10 +23,10 @@ HETATM 20 Mg Mg A 4 03.000 03.000 03.000 1.00 00.00 Mg HETATM 21 Ca Ca A 5 00.000 00.000 03.000 1.00 00.00 CA TER 22 HETATM 1609 S DMS A 101 19.762 39.489 18.350 1.00 25.99 S -HETATM 1610 O DMS A 101 19.279 37.904 18.777 1.00 23.69 Ox +HETATM 1610 O DMS A 101 19.279 37.904 18.777 1.00 23.69 O HETATM 1611 C1 DMS A 101 21.344 39.260 17.532 1.00 24.07 C HETATM 1612 C2 DMS A 101 18.750 40.066 17.029 1.00 20.50 C -HETATM 1613 S DMS A 102 22.157 39.211 12.217 1.00 27.26 S1 +HETATM 1613 S DMS A 102 22.157 39.211 12.217 1.00 27.26 S HETATM 1614 O DMS A 102 20.622 38.811 11.702 1.00 25.51 O HETATM 1615 C1 DMS A 102 22.715 40.764 11.522 1.00 26.00 C HETATM 1616 C2 DMS A 102 22.343 39.515 13.971 1.00 25.46 C diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index d42e1fcd78c..60e7d47541a 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -211,7 +211,7 @@ "SDF_molecule", # MDL SDFile for rdkit "PDBX", # PDBxfile "PDB_elements", # PDB file with elements - "PDB_elements", # PDB file with elements + "PDB_charges", # PDB file with formal charges "SURFACE_PDB", # 111 FCC lattice topology for NSGrid bug #2345 "SURFACE_TRR", # full precision coordinates for NSGrid bug #2345 ] @@ -582,7 +582,7 @@ SDF_molecule = resource_filename(__name__, 'data/molecule.sdf') PDB_elements = resource_filename(__name__, 'data/elements.pdb') - +PDB_charges = resource_filename(__name__, 'data/charges.pdb') PDBX = resource_filename(__name__, "data/4x8u.pdbx") diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index e10ea25d743..c087272fbe9 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -39,6 +39,7 @@ PDB_sameresid_diffresname, PDB_helix, PDB_elements, + PDB_charges, ) from MDAnalysis.topology.PDBParser import PDBParser from MDAnalysis import NoDataError @@ -325,3 +326,48 @@ def test_nobonds_error(): with pytest.raises(NoDataError, match=errmsg): u.atoms.bonds + + +def test_PDB_charges(): + """The test checks whether formalcharges attribute are assigned + properly given a PDB file with a valid formal charges record. + """ + u = mda.Universe(PDB_charges) + formal_charges = np.array([0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0], dtype=int) + assert_equal(u.atoms.formalcharges, formal_charges) + + +PDB_charges_nosign = """\ +REMARK Invalid charge assignment - no sign for MG2+ +HETATM 1 CU CU A 1 03.000 00.000 00.000 1.00 00.00 CU2+ +HETATM 2 FE FE A 2 00.000 03.000 00.000 1.00 00.00 Fe2+ +HETATM 3 Mg Mg A 3 03.000 03.000 03.000 1.00 00.00 MG2 +END +""" + + +def test_PDB_charge_nosign_error(): + """Test to check if there are missing signs for a given formal charge + entry""" + errmsg = r"Formal charge 2 does not have \+ or - assignment" + with pytest.raises(ValueError, match=errmsg): + u = mda.Universe(StringIO(PDB_charges_nosign), format='PDB') + + +PDB_charges_invertsign = """\ +REMARK Invalid charge format for MG2+ +HETATM 1 CU CU A 1 03.000 00.000 00.000 1.00 00.00 CU2+ +HETATM 2 FE FE A 2 00.000 03.000 00.000 1.00 00.00 Fe2+ +HETATM 3 Mg Mg A 3 03.000 03.000 03.000 1.00 00.00 MG+2 +END +""" + + +def test_PDB_charge_badformat(): + """Test to trigger an unrecognised formatting for the formal charge + entry""" + errmsg = r"Unknown formal charge \+2 encountered" + with pytest.raises(ValueError, match=errmsg): + u = mda.Universe(StringIO(PDB_charges_invertsign), format='PDB') From 67c7bdbc796b6051774d13e315abb4794e7af4a5 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 17 Jul 2022 18:12:34 +0100 Subject: [PATCH 2/7] Update changelog --- package/CHANGELOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 679a3157b40..397f4ace038 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -22,6 +22,8 @@ Fixes * add a 0.5 for correct midpoints in hole analysis (Issue #3715) Enhancements + * Add a new `formalcharge` attribute for storing formal charges (PR #3755) + * Add the ability to read & write formal charges from PDB files (PR #3755) * Add `norm` parameter to InterRDF, InterRDF_s to normalize as rdf, number density or do not normalize at all. (Issue #3687) * Additional logger.info output when per-frame analysis starts (PR #3710) From 205b6fe06c11ffef8b445b806720459f66e90efc Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 17 Jul 2022 18:15:35 +0100 Subject: [PATCH 3/7] Deal with fixable pep8 issues --- package/MDAnalysis/coordinates/PDB.py | 2 +- package/MDAnalysis/topology/PDBParser.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index b56099b0da6..0faf2db93ce 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1200,7 +1200,7 @@ def validate_chainids(chainids, default): vals['segID'] = segids[i][:4] vals['chainID'] = chainids[i] vals['element'] = elements[i][:2].upper() - vals['charge'] = formal_charges[i] + vals['charge'] = formal_charges[i] # record_type attribute, if exists, can be ATOM or HETATM try: diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index b36e4695910..ee83f0e6ea6 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -357,7 +357,8 @@ def _parseatoms(self): raise ValueError(errmsg) else: formalcharges[i] = 0 - attrs.append(FormalCharges(np.array(formalcharges, dtype=np.int32))) + attrs.append( + FormalCharges(np.array(formalcharges, dtype=np.int32))) masses = guess_masses(atomtypes) attrs.append(Masses(masses, guessed=True)) From 6acf47682cc66e931f7c13fa24797023f51b77ca Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 17 Jul 2022 20:13:34 +0100 Subject: [PATCH 4/7] Fix 0 case (looser interpretation of PDB v3 format) --- package/MDAnalysis/topology/PDBParser.py | 11 ++++++++--- testsuite/MDAnalysisTests/topology/test_pdb.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index ee83f0e6ea6..f4f02f12725 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -344,7 +344,13 @@ def _parseatoms(self): if any(formalcharges): for i, entry in enumerate(formalcharges): if not entry == '': - if ('+' in entry) or ('-' in entry): + if entry == '0': + # Technically a lack of charge shouldn't be in the PDB + # but MDA has a few files that specifically have 0 + # entries, indicating that some folks interpret 0 as + # an allowed entry + formalcharges[i] = 0 + elif ('+' in entry) or ('-' in entry): try: formalcharges[i] = int(entry[::-1]) except ValueError: @@ -352,8 +358,7 @@ def _parseatoms(self): "encountered") raise ValueError(errmsg) else: - errmsg = (f"Formal charge {entry} does not have + or -" - " assignment") + errmsg = (f"Formal charge {entry} is unrecognized") raise ValueError(errmsg) else: formalcharges[i] = 0 diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index c087272fbe9..c8819f7ae3b 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -351,7 +351,7 @@ def test_PDB_charges(): def test_PDB_charge_nosign_error(): """Test to check if there are missing signs for a given formal charge entry""" - errmsg = r"Formal charge 2 does not have \+ or - assignment" + errmsg = r"Formal charge 2 is unrecognized" with pytest.raises(ValueError, match=errmsg): u = mda.Universe(StringIO(PDB_charges_nosign), format='PDB') From 456906205b21744ba4ad4244c788fcf6457ed247 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 23 Jul 2022 09:35:22 +0100 Subject: [PATCH 5/7] Address review comments, switch to np.int16 --- package/MDAnalysis/coordinates/PDB.py | 2 +- package/MDAnalysis/core/topologyattrs.py | 2 +- package/MDAnalysis/topology/PDBParser.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 0faf2db93ce..473b2e52dd9 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1046,7 +1046,7 @@ def _format_PDB_charges(charges: np.ndarray) -> np.ndarray: NumPy array of dtype object with strings representing the formal charges of the atoms being written. """ - if charges.dtype != int: + if not np.issubdtype(charges, np.integer): raise ValueError("formal charges array should be of `int` type") outcharges = charges.astype(object) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 28d06fecdf3..f56eec3ef30 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -2038,7 +2038,7 @@ class FormalCharges(AtomAttr): attrname = 'formalcharges' singular = 'formalcharge' per_object = 'atom' - dtype = int + dtype = np.int16 @staticmethod def _gen_initial_values(na, nr, ns): diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index f4f02f12725..33f6a73a8b8 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -188,9 +188,9 @@ class PDBParser(TopologyReaderBase): are now assigned (Issue #2422). Aliased ``bfactors`` topologyattribute to ``tempfactors``. ``bfactors`` is deprecated and will be removed in 3.0 (Issue #1901) - .. versionadded:: 2.3.0 + .. versionchanged:: 2.3.0 Formal charges are now read from PDB files if present. No formalcharge - attribute is create if no formal charges are present in the PDB file. + attribute is created if no formal charges are present in the PDB file. Any formal charges not set are assumed to have a value of 0. """ format = ['PDB', 'ENT'] @@ -363,7 +363,7 @@ def _parseatoms(self): else: formalcharges[i] = 0 attrs.append( - FormalCharges(np.array(formalcharges, dtype=np.int32))) + FormalCharges(np.array(formalcharges, dtype=np.int16))) masses = guess_masses(atomtypes) attrs.append(Masses(masses, guessed=True)) From 3909c4567ea0e339510e207766ff82adabc97071 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 23 Jul 2022 09:44:44 +0100 Subject: [PATCH 6/7] Switch to int --- package/MDAnalysis/core/topologyattrs.py | 2 +- package/MDAnalysis/topology/PDBParser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index f56eec3ef30..28d06fecdf3 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -2038,7 +2038,7 @@ class FormalCharges(AtomAttr): attrname = 'formalcharges' singular = 'formalcharge' per_object = 'atom' - dtype = np.int16 + dtype = int @staticmethod def _gen_initial_values(na, nr, ns): diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index 33f6a73a8b8..0b00c5d45c9 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -363,7 +363,7 @@ def _parseatoms(self): else: formalcharges[i] = 0 attrs.append( - FormalCharges(np.array(formalcharges, dtype=np.int16))) + FormalCharges(np.array(formalcharges, dtype=int))) masses = guess_masses(atomtypes) attrs.append(Masses(masses, guessed=True)) From c56a46cc759454e9113666fdc0bfd1bd41fefe22 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 23 Jul 2022 10:33:54 +0100 Subject: [PATCH 7/7] pass dtype not array --- package/MDAnalysis/coordinates/PDB.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 473b2e52dd9..fae3b28a1d2 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1046,7 +1046,7 @@ def _format_PDB_charges(charges: np.ndarray) -> np.ndarray: NumPy array of dtype object with strings representing the formal charges of the atoms being written. """ - if not np.issubdtype(charges, np.integer): + if not np.issubdtype(charges.dtype, np.integer): raise ValueError("formal charges array should be of `int` type") outcharges = charges.astype(object)