Skip to content

Commit

Permalink
MOL2 parser populates elements attribute (#3063)
Browse files Browse the repository at this point in the history
Fixes #3062

## Work done in this PR
- Adds the ability to read elements from MOL2 files when using SYBYL atom types
  • Loading branch information
RMeli authored Aug 13, 2021
1 parent aba8b7d commit f8bb8ec
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 6 deletions.
1 change: 1 addition & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Fixes
* new `Results` class now can be pickled/unpickled. (PR #3309)

Enhancements
* MOL2 parser populates elements attribute from SYBYL atom types (Issue #3062)
* Added guessers for aromaticity and Gasteiger partial charges (Issue #2468,
PR #2926)
* Added `Results` class for storing analysis results (#3115, PR #3233)
Expand Down
35 changes: 33 additions & 2 deletions package/MDAnalysis/topology/MOL2Parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,17 @@
Atomtypes,
Bonds,
Charges,
Elements,
Masses,
Resids,
Resnums,
Resnames,
Segids,
)
from ..core.topology import Topology
from .tables import SYBYL2SYMB

import warnings


class MOL2Parser(TopologyReaderBase):
Expand All @@ -70,20 +74,29 @@ class MOL2Parser(TopologyReaderBase):
- Atomnames
- Atomtypes
- Charges
- Resids,
- Resids
- Resnames
- Bonds
- Elements
Guesses the following:
- masses
Notes
-----
Elements are obtained directly from the SYBYL atom types. If some atoms have
unknown atom types, they will be assigned an empty element record. If all
atoms have unknown atom types, the elements attribute will not be set.
.. versionchanged:: 0.9
Now subclasses TopologyReaderBase
.. versionchanged:: 0.20.0
Allows for comments at the top of the file
Ignores status bit strings
.. versionchanged:: 2.0.0
Bonds attribute is not added if no bonds are present in MOL2 file
.. versionchanged: 2.0.0
Parse elements from atom types.
"""
format = 'MOL2'

Expand Down Expand Up @@ -148,7 +161,22 @@ def parse(self, **kwargs):

n_atoms = len(ids)

masses = guessers.guess_masses(types)
validated_elements = np.empty(n_atoms, dtype="U3")
invalid_elements = set()
for i, at in enumerate(types):
if at in SYBYL2SYMB:
validated_elements[i] = SYBYL2SYMB[at]
else:
invalid_elements.add(at)
validated_elements[i] = ''

# Print single warning for all unknown elements, if any
if invalid_elements:
warnings.warn("Unknown elements found for some "
f"atoms: {invalid_elements}. "
"These have been given an empty element record.")

masses = guessers.guess_masses(validated_elements)

attrs = []
attrs.append(Atomids(np.array(ids, dtype=np.int32)))
Expand All @@ -157,6 +185,9 @@ def parse(self, **kwargs):
attrs.append(Charges(np.array(charges, dtype=np.float32)))
attrs.append(Masses(masses, guessed=True))

if not np.all(validated_elements == ''):
attrs.append(Elements(validated_elements))

resids = np.array(resids, dtype=np.int32)
resnames = np.array(resnames, dtype=object)

Expand Down
35 changes: 35 additions & 0 deletions package/MDAnalysis/topology/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,38 @@ def kv2dict(s, convertor=str):
113: 'Nh', 114: 'Fl', 115: 'Mc', 116: 'Lv', 117: 'Ts', 118: 'Og'}

SYMB2Z = {v:k for k, v in Z2SYMB.items()}

# Conversion between SYBYL atom types and corresponding elements
# Tripos MOL2 file format:
# https://web.archive.org/web/*/http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
SYBYL2SYMB = {
"H": "H", "H.spc": "H", "H.t3p": "H",
"C.3": "C", "C.2": "C", "C.1": "C", "C.ar": "C", "C.cat": "C",
"N.3": "N", "N.2": "N", "N.1": "N", "N.ar": "N",
"N.am": "N", "N.pl3": "N", "N.4": "N",
"O.3": "O", "O.2": "O", "O.co2": "O", "O.spc": "O", "O.t3p": "O",
"S.3": "S", "S.2": "S", "S.O": "S", "S.O2": "S",
"S.o": "S", "S.o2": "S", # Non-standard but often found in the wild...
"P.3": "P",
"F": "F",
"Li": "Li",
"Na": "Na",
"Mg": "Mg",
"Al": "Al",
"Si": "Si",
"K": "K",
"Ca": "Ca",
"Cr.th": "Cr",
"Cr.oh": "Cr",
"Mn": "Mn",
"Fe": "Fe",
"Co.oh": "Co",
"Cu": "Cu",
"Cl": "Cl",
"Br": "Br",
"I": "I",
"Zn": "Zn",
"Se": "Se",
"Mo": "Mo",
"Sn": "Sn",
}
4 changes: 4 additions & 0 deletions testsuite/MDAnalysisTests/converters/test_rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ def test_identical_topology(self, rdmol):

def test_raise_requires_elements(self):
u = mda.Universe(mol2_molecule)

# Delete topology attribute (PR #3069)
u.del_TopologyAttr('elements')

with pytest.raises(
AttributeError,
match="`elements` attribute is required for the RDKitConverter"
Expand Down
132 changes: 128 additions & 4 deletions testsuite/MDAnalysisTests/topology/test_mol2.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,94 @@
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
import pytest
from io import StringIO

import numpy as np
from numpy.testing import assert_equal
import pytest

import MDAnalysis as mda

from MDAnalysisTests.topology.base import ParserBase
from MDAnalysisTests.datafiles import (
mol2_molecule,
mol2_molecules,
)
from numpy.testing import assert_equal


mol2_wrong_element = """\
@<TRIPOS>MOLECULE
mol2_wrong_element
3 0 0 0 0
SMALL
USER_CHARGES
@<TRIPOS>ATOM
1 N1 6.8420 9.9900 22.7430 N.am 1 Q101 -0.8960
2 S1 8.1400 9.2310 23.3330 X.o2 1 Q101 1.3220
3 N2 4.4000 9.1300 20.4710 XX.am 1 Q101 -0.3970
"""

mol2_all_wrong_elements = """\
@<TRIPOS>MOLECULE
mol2_all_wrong_elements
3 0 0 0 0
SMALL
USER_CHARGES
@<TRIPOS>ATOM
1 N1 6.8420 9.9900 22.7430 X.am 1 Q101 -0.8960
2 S1 8.1400 9.2310 23.3330 X.o2 1 Q101 1.3220
3 N2 4.4000 9.1300 20.4710 XX.am 1 Q101 -0.3970
"""


mol2_fake = """\
@<TRIPOS>MOLECULE
mol2_fake
26 0 0 0 0
SMALL
USER_CHARGES
@<TRIPOS>ATOM
1 H1 0.0000 1.0000 10.0000 H.spc 1 XXXX 5.0000
2 H2 0.0000 1.0000 10.0000 H.t3p 1 XXXX 5.0000
3 H3 0.0000 1.0000 10.0000 H.xyz 1 XXXX 5.0000
4 C1 0.0000 1.0000 10.0000 C.1 1 XXXX 5.0000
5 C2 0.0000 1.0000 10.0000 C.2 1 XXXX 5.0000
5 C3 0.0000 1.0000 10.0000 C.3 1 XXXX 5.0000
6 C4 0.0000 1.0000 10.0000 C.ar 1 XXXX 5.0000
7 C5 0.0000 1.0000 10.0000 C.cat 1 XXXX 5.0000
8 C6 0.0000 1.0000 10.0000 C.xyz 1 XXXX 5.0000
9 N1 0.0000 1.0000 10.0000 N.1 1 XXXX 5.0000
10 N2 0.0000 1.0000 10.0000 N.2 1 XXXX 5.0000
11 N3 0.0000 1.0000 10.0000 N.3 1 XXXX 5.0000
12 N4 0.0000 1.0000 10.0000 N.ar 1 XXXX 5.0000
13 O1 0.0000 1.0000 10.0000 O.2 1 XXXX 5.0000
14 O2 0.0000 1.0000 10.0000 O.3 1 XXXX 5.0000
15 O3 0.0000 1.0000 10.0000 O.co2 1 XXXX 5.0000
16 O4 0.0000 1.0000 10.0000 O.spc 1 XXXX 5.0000
16 O5 0.0000 1.0000 10.0000 O.t3p 1 XXXX 5.0000
17 S1 0.0000 1.0000 10.0000 S.3 1 XXXX 5.0000
18 S2 0.0000 1.0000 10.0000 S.2 1 XXXX 5.0000
19 S3 0.0000 1.0000 10.0000 S.O 1 XXXX 5.0000
20 S4 0.0000 1.0000 10.0000 S.O2 1 XXXX 5.0000
21 S5 0.0000 1.0000 10.0000 S.o 1 XXXX 5.0000
22 S6 0.0000 1.0000 10.0000 S.o2 1 XXXX 5.0000
23 P1 0.0000 1.0000 10.0000 P.3 1 XXXX 5.0000
24 Cr1 0.0000 1.0000 10.0000 Cr.th 1 XXXX 5.0000
25 Cr2 0.0000 1.0000 10.0000 Cr.oh 1 XXXX 5.0000
26 Co1 0.0000 1.0000 10.0000 Co.oh 1 XXXX 5.0000
"""


class TestMOL2Base(ParserBase):
parser = mda.topology.MOL2Parser.MOL2Parser
expected_attrs = [
'ids', 'names', 'types', 'charges', 'resids', 'resnames', 'bonds'
'ids', 'names', 'types', 'charges', 'resids', 'resnames', 'bonds',
'elements',
]
guessed_attrs = ['masses']
expected_n_atoms = 49
Expand All @@ -50,6 +121,7 @@ def test_attr_size(self, top):
assert len(top.charges) == top.n_atoms
assert len(top.resids) == top.n_residues
assert len(top.resnames) == top.n_residues
assert len(top.elements) == top.n_atoms

def test_bonds(self, top):
assert len(top.bonds) == 49 # bonds for 49 atoms
Expand All @@ -68,3 +140,55 @@ def test_bond_orders():
u = mda.Universe(mol2_molecule)
orders = [bond.order for bond in u.atoms.bonds]
assert_equal(orders, ref_orders)


def test_elements():
u = mda.Universe(mol2_molecule)

assert_equal(
u.atoms.elements[:5],
np.array(["N", "S", "N", "N", "O"], dtype="U3")
)


# Test for #2927
def test_elements_selection():
u = mda.Universe(mol2_molecule)
ag = u.select_atoms("element S")

assert_equal(
ag.elements,
np.array(["S", "S"], dtype="U3")
)


def test_wrong_elements_warnings():
with pytest.warns(UserWarning, match='Unknown elements found') as record:
u = mda.Universe(StringIO(mol2_wrong_element), format='MOL2')

# One warning from invalid elements, one from invalid masses
assert len(record) == 2

expected = np.array(['N', '', ''], dtype=object)
assert_equal(u.atoms.elements, expected)


def test_all_wrong_elements_warnings():
with pytest.warns(UserWarning, match='Unknown elements found'):
u = mda.Universe(StringIO(mol2_all_wrong_elements), format='MOL2')

with pytest.raises(mda.exceptions.NoDataError,
match='This Universe does not contain element '
'information'):

u.atoms.elements


def test_all_elements():
with pytest.warns(UserWarning, match='Unknown elements found'):
u = mda.Universe(StringIO(mol2_fake), format='MOL2')

expected = ["H"] * 2 + [""] + ["C"] * 5 + [""] + ["N"] * 4 + ["O"] * 5 + \
["S"] * 6 + ["P"] + ["Cr"] * 2 + ["Co"]
expected = np.array(expected, dtype=object)
assert_equal(u.atoms.elements, expected)

0 comments on commit f8bb8ec

Please sign in to comment.