Skip to content

Commit

Permalink
added match_atoms keyword to analysis.align (#2380)
Browse files Browse the repository at this point in the history
* Fixes #2285
* added match_atoms keyword to analysis.align
* added issue
* added test
* removed redundant test
* logged error
* split tests
* moved match_atoms to 0.21.0
  • Loading branch information
lilyminium authored and orbeckst committed Nov 7, 2019
1 parent 240586e commit e12958c
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 51 deletions.
1 change: 1 addition & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Enhancements
* GSD reader is now supported on Windows (Issue #1923, PR #2384)
* Empty Universe with 0 atoms is possible (Issue #2383)
* Added ITPParser to parse GROMACS itp topology files (PR #2381)
* Added match_atoms keyword to analysis.align (Issue #2285, PR #2380)

Deprecations
* analysis.hbonds.HydrogenBondAnalysis will be deprecated in 1.0
Expand Down
121 changes: 70 additions & 51 deletions package/MDAnalysis/analysis/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,9 @@ def _fit_to(mobile_coordinates, ref_coordinates, mobile_atoms,
return mobile_atoms, min_rmsd


def alignto(mobile, reference, select="all", weights=None,
subselection=None, tol_mass=0.1, strict=False):
def alignto(mobile, reference, select=None, weights=None,
subselection=None, tol_mass=0.1, strict=False,
match_atoms=True):
"""Perform a spatial superposition by minimizing the RMSD.
Spatially align the group of atoms `mobile` to `reference` by
Expand Down Expand Up @@ -410,6 +411,8 @@ def alignto(mobile, reference, select="all", weights=None,
be a *list of selection strings* to generate a
:class:`~MDAnalysis.core.groups.AtomGroup` with defined atom order as
described under :ref:`ordered-selections-label`).
match_atoms : bool (optional)
Whether to match the mobile and reference atom-by-atom. Default ``True``.
weights : {"mass", ``None``} or array_like (optional)
choose weights. With ``"mass"`` uses masses as weights; with ``None``
weigh each atom equally. If a float array of the same length as
Expand Down Expand Up @@ -453,6 +456,8 @@ def alignto(mobile, reference, select="all", weights=None,
.. _ClustalW: http://www.clustal.org/
.. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/
.. versionchanged:: 0.21.0
Added *match_atoms* keyword to toggle atom matching.
.. versionchanged:: 0.8
Added check that the two groups describe the same atoms including
Expand Down Expand Up @@ -482,9 +487,11 @@ def alignto(mobile, reference, select="all", weights=None,
mobile_atoms = mobile.select_atoms(*select['mobile'])
ref_atoms = reference.select_atoms(*select['reference'])


ref_atoms, mobile_atoms = get_matching_atoms(ref_atoms, mobile_atoms,
tol_mass=tol_mass,
strict=strict)
tol_mass=tol_mass,
strict=strict,
match_atoms=match_atoms)

weights = get_weights(ref_atoms, weights)

Expand Down Expand Up @@ -534,7 +541,7 @@ class AlignTraj(AnalysisBase):

def __init__(self, mobile, reference, select='all', filename=None,
prefix='rmsfit_', weights=None,
tol_mass=0.1, strict=False, force=True, in_memory=False,
tol_mass=0.1, match_atoms=True, strict=False, force=True, in_memory=False,
**kwargs):
"""Parameters
----------
Expand All @@ -558,6 +565,8 @@ def __init__(self, mobile, reference, select='all', filename=None,
selection.
tol_mass : float (optional)
Tolerance given to `get_matching_atoms` to find appropriate atoms
match_atoms : bool (optional)
Whether to match the mobile and reference atom-by-atom. Default ``True``.
strict : bool (optional)
Force `get_matching_atoms` to fail if atoms can't be found using
exact methods
Expand Down Expand Up @@ -656,7 +665,7 @@ def __init__(self, mobile, reference, select='all', filename=None,
natoms = self.mobile.n_atoms
self.ref_atoms, self.mobile_atoms = get_matching_atoms(
self.ref_atoms, self.mobile_atoms, tol_mass=tol_mass,
strict=strict)
strict=strict, match_atoms=match_atoms)

# with self.filename == None (in_memory), the NullWriter is chosen
# (which just ignores input) and so only the in_memory trajectory is
Expand Down Expand Up @@ -975,7 +984,7 @@ def resid(nseq, ipos, t=t):
return {'reference': ref_selection, 'mobile': target_selection}


def get_matching_atoms(ag1, ag2, tol_mass=0.1, strict=False):
def get_matching_atoms(ag1, ag2, tol_mass=0.1, strict=False, match_atoms=True):
"""Return two atom groups with one-to-one matched atoms.
The function takes two :class:`~MDAnalysis.core.groups.AtomGroup`
Expand Down Expand Up @@ -1013,6 +1022,11 @@ def get_matching_atoms(ag1, ag2, tol_mass=0.1, strict=False):
Will try to prepare a matching selection by dropping
residues with non-matching atoms. See :func:`get_matching_atoms`
for details.
match_atoms : bool (optional)
``True``
Will attempt to match atoms based on mass
``False``
Will not attempt to match atoms based on mass
Returns
-------
Expand Down Expand Up @@ -1044,14 +1058,21 @@ def get_matching_atoms(ag1, ag2, tol_mass=0.1, strict=False):
"""

if ag1.n_atoms != ag2.n_atoms:
if not match_atoms:
errmsg = ("Mobile and reference atom selections do not "
"contain the same number of atoms and atom "
"matching is turned off. To match atoms based "
"on residue and mass, try match_atoms=True")
logger.error(errmsg)
raise SelectionError(errmsg)
if ag1.n_residues != ag2.n_residues:
errmsg = ("Reference and trajectory atom selections do not contain "
"the same number of atoms: \n"
"atoms: N_ref={0}, N_traj={1}\n"
"and also not the same number of residues:\n"
"residues: N_ref={2}, N_traj={3}").format(
ag1.n_atoms, ag2.n_atoms,
ag1.n_residues, ag2.n_residues)
"the same number of atoms: \n"
"atoms: N_ref={0}, N_traj={1}\n"
"and also not the same number of residues:\n"
"residues: N_ref={2}, N_traj={3}").format(
ag1.n_atoms, ag2.n_atoms,
ag1.n_residues, ag2.n_residues)
logger.error(errmsg)
raise SelectionError(errmsg)
else:
Expand Down Expand Up @@ -1137,9 +1158,6 @@ def get_atoms_byres(g, match_mask=np.logical_not(mismatch_mask)):
assert _ag1.atoms.n_atoms == _ag2.atoms.n_atoms

# diagnostics
# (ugly workaround for missing boolean indexing of AtomGroup)
# note: ag[arange(len(ag))[boolean]] is ~2x faster than
# ag[where[boolean]]
mismatch_resindex = np.arange(ag1.n_residues)[mismatch_mask]
logger.warning("Removed {0} residues with non-matching numbers of atoms"
.format(mismatch_mask.sum()))
Expand All @@ -1159,40 +1177,41 @@ def get_atoms_byres(g, match_mask=np.logical_not(mismatch_mask)):
"Try to improve your selections for mobile and reference.")
logger.error(errmsg)
raise SelectionError(errmsg)

# check again because the residue matching heuristic is not very
# good and can easily be misled (e.g., when one of the selections
# had fewer atoms but the residues in mobile and reference have
# each the same number)
try:
mass_mismatches = (np.absolute(ag1.masses - ag2.masses) > tol_mass)
except ValueError:
errmsg = ("Failed to find matching atoms: len(reference) = {}, len(mobile) = {} " +
"Try to improve your selections for mobile and reference.").format(
ag1.n_atoms, ag2.n_atoms)
logger.error(errmsg)
raise_from(SelectionError(errmsg), None)

if np.any(mass_mismatches):
# Test 2 failed.
# diagnostic output:
logger.error("Atoms: reference | trajectory")
for ar, at in zip(ag1[mass_mismatches], ag2[mass_mismatches]):
logger.error(
"{0!s:>4} {1:3d} {2!s:>3} {3!s:>3} {4:6.3f} | {5!s:>4} {6:3d} {7!s:>3} {8!s:>3} {9:6.3f}".format(
ar.segid,
ar.resid,
ar.resname,
ar.name,
ar.mass,
at.segid,
at.resid,
at.resname,
at.name,
at.mass))
errmsg = ("Inconsistent selections, masses differ by more than {0}; "
"mis-matching atoms are shown above.").format(tol_mass)
logger.error(errmsg)
raise SelectionError(errmsg)

if match_atoms:
# check again because the residue matching heuristic is not very
# good and can easily be misled (e.g., when one of the selections
# had fewer atoms but the residues in mobile and reference have
# each the same number)
try:
mass_mismatches = (np.absolute(ag1.masses - ag2.masses) > tol_mass)
except ValueError:
errmsg = ("Failed to find matching atoms: len(reference) = {}, len(mobile) = {} " +
"Try to improve your selections for mobile and reference.").format(
ag1.n_atoms, ag2.n_atoms)
logger.error(errmsg)
raise_from(SelectionError(errmsg), None)

if np.any(mass_mismatches):
# Test 2 failed.
# diagnostic output:
logger.error("Atoms: reference | trajectory")
for ar, at in zip(ag1[mass_mismatches], ag2[mass_mismatches]):
logger.error(
"{0!s:>4} {1:3d} {2!s:>3} {3!s:>3} {4:6.3f} | {5!s:>4} {6:3d} {7!s:>3} {8!s:>3} {9:6.3f}".format(
ar.segid,
ar.resid,
ar.resname,
ar.name,
ar.mass,
at.segid,
at.resid,
at.resname,
at.name,
at.mass))
errmsg = ("Inconsistent selections, masses differ by more than {0}; "
"mis-matching atoms are shown above.").format(tol_mass)
logger.error(errmsg)
raise SelectionError(errmsg)

return ag1, ag2
25 changes: 25 additions & 0 deletions testsuite/MDAnalysisTests/analysis/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,31 @@ def test_nomatch_residues_raise_empty(self, universe, reference_small,
with pytest.warns(SelectionWarning):
with pytest.raises(SelectionError):
groups = align.get_matching_atoms(ref, mobile, strict=strict)

def test_toggle_atom_mismatch_default_error(self, universe, reference):
selection = ('resname ALA and name CA', 'resname ALA and name O')
with pytest.raises(SelectionError):
rmsd = align.alignto(universe, reference, select=selection)

def test_toggle_atom_mismatch_kwarg_error(self, universe, reference):
selection = ('resname ALA and name CA', 'resname ALA and name O')
with pytest.raises(SelectionError):
rmsd = align.alignto(universe, reference, select=selection, match_atoms=True)

def test_toggle_atom_nomatch(self, universe, reference):
selection = ('resname ALA and name CA', 'resname ALA and name O')
rmsd = align.alignto(universe, reference, select=selection, match_atoms=False)
assert rmsd[0] > 0.01

def test_toggle_atom_nomatch_mismatch_atoms(self, universe, reference):
# mismatching number of atoms, but same number of residues
u = universe.select_atoms('resname ALA and name CA')
u += universe.select_atoms('resname ALA and name O')[-1]
ref = reference.select_atoms('resname ALA and name CA')
with pytest.raises(SelectionError):
align.alignto(u, ref, select='all', match_atoms=False)





Expand Down

0 comments on commit e12958c

Please sign in to comment.