From 45e56e8314c278e3eb98ed7a6029b74e7435e8be Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Tue, 25 Aug 2020 14:35:33 +0100 Subject: [PATCH] Faster name selections (#2755) * modified AtomNames topologyattr to include lookup table index * cheeky little optimisation * rework atom name selection to use lookup tables * Update topologyattrs.py * fixed test supplying integer as atom name really topologyattrs need to be statically typed and protective about this * Update test_topologyattrs.py * use dict-lookup string attrs EVERYWHERERE * removed some code duplication made protein selection faster, 48ms -> 0.5ms on GRO testfile * improved nucleic/backbone selections * Added explicit tests for Resnames topologyattr tests now provide str types for resnames/icodes * use fnmatchcase to be case sensitive * Update package/MDAnalysis/core/selection.py @jbarnoud's fix * apply suggestions from code review Co-authored-by: Irfan Alibay * added test for setting multiple segids at once Co-authored-by: Oliver Beckstein Co-authored-by: Irfan Alibay --- package/CHANGELOG | 3 + package/MDAnalysis/core/selection.py | 175 ++++++++++++--- package/MDAnalysis/core/topologyattrs.py | 207 +++++++++++++++--- .../core/test_atomselections.py | 2 +- .../MDAnalysisTests/core/test_segmentgroup.py | 18 ++ .../core/test_topologyattrs.py | 37 ++-- 6 files changed, 369 insertions(+), 73 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 9df19de4a28..5410e0f6153 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -45,6 +45,7 @@ Fixes * In hydrogenbonds.hbond_analysis.HydrogenbondAnalysis an AttributeError was thrown when finding D-H pairs via the topology if `hydrogens` was an empty AtomGroup (Issue #2848) + * Fixed performance regression on select_atoms for string selections (#2751) * Fixed the DMSParser, allowing the creation of multiple segids sharing residues with identical resids (Issue #1387, PR #2872) * H5MD files are now picklable with H5PYPicklable (Issue #2890, PR #2894) @@ -79,6 +80,8 @@ Enhancements * Added new kwargs `select_remove` and `select_protein` to analysis.dihedrals.Janin analysis to give user more fine grained control over selections (PR #2899) + * Improved performance of select_atoms on strings (e.g. name, type, resname) and + 'protein' selection (#2751 PR #2755) * Added an RDKit converter that works for any input with all hydrogens explicit in the topology (Issue #2468, PR #2775) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index bdb156ff249..dc8747832ea 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -515,7 +515,7 @@ def apply(self, group): return group[mask] -class StringSelection(Selection): +class _ProtoStringSelection(Selection): """Selections based on text attributes .. versionchanged:: 1.0.0 @@ -530,11 +530,23 @@ def __init__(self, parser, tokens): @return_empty_on_apply def apply(self, group): - mask = np.zeros(len(group), dtype=bool) - for val in self.values: - values = getattr(group, self.field) - mask |= [fnmatch.fnmatch(x, val) for x in values] - return group[mask].unique + # rather than work on group.names, cheat and look at the lookup table + nmattr = getattr(group.universe._topology, self.field) + + matches = [] # list of passing indices + # iterate through set of known atom names, check which pass + for nm, ix in nmattr.namedict.items(): + if any(fnmatch.fnmatchcase(nm, val) for val in self.values): + matches.append(ix) + + # atomname indices for members of this group + nmidx = nmattr.nmidx[getattr(group, self.level)] + + return group[np.in1d(nmidx, matches)].unique + + +class StringSelection(_ProtoStringSelection): + level = 'ix' # operates on atom level attribute, i.e. '.ix' class AtomNameSelection(StringSelection): @@ -561,22 +573,27 @@ class AtomICodeSelection(StringSelection): field = 'icodes' -class ResidueNameSelection(StringSelection): +class _ResidueStringSelection(_ProtoStringSelection): + level= 'resindices' + + +class ResidueNameSelection(_ResidueStringSelection): """Select atoms based on 'resnames' attribute""" token = 'resname' field = 'resnames' -class MoleculeTypeSelection(StringSelection): +class MoleculeTypeSelection(_ResidueStringSelection): """Select atoms based on 'moltypes' attribute""" token = 'moltype' field = 'moltypes' -class SegmentNameSelection(StringSelection): +class SegmentNameSelection(_ProtoStringSelection): """Select atoms based on 'segids' attribute""" token = 'segid' field = 'segids' + level = 'segindices' class AltlocSelection(StringSelection): @@ -802,10 +819,15 @@ class ProteinSelection(Selection): See Also -------- :func:`MDAnalysis.lib.util.convert_aa_code` + + + .. versionchanged:: 2.0.0 + prot_res changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'protein' - prot_res = np.array([ + prot_res = { # CHARMM top_all27_prot_lipid.rtf 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HSD', 'HSE', 'HSP', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', @@ -828,14 +850,20 @@ class ProteinSelection(Selection): 'CLEU', 'CILE', 'CVAL', 'CASF', 'CASN', 'CGLN', 'CARG', 'CHID', 'CHIE', 'CHIP', 'CTRP', 'CPHE', 'CTYR', 'CGLU', 'CASP', 'CLYS', 'CPRO', 'CCYS', 'CCYX', 'CMET', 'CME', 'ASF', - ]) + } def __init__(self, parser, tokens): pass def apply(self, group): - mask = np.in1d(group.resnames, self.prot_res) - return group[mask].unique + resname_attr = group.universe._topology.resnames + # which values in resname attr are in prot_res? + matches = [ix for (nm, ix) in resname_attr.namedict.items() + if nm in self.prot_res] + # index of each atom's resname + nmidx = resname_attr.nmidx[group.resindices] + # intersect atom's resname index and matches to prot_res + return group[np.in1d(nmidx, matches)].unique class NucleicSelection(Selection): @@ -850,23 +878,32 @@ class NucleicSelection(Selection): .. versionchanged:: 0.8 additional Gromacs selections + .. versionchanged:: 2.0.0 + nucl_res changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleic' - nucl_res = np.array([ + nucl_res = { 'ADE', 'URA', 'CYT', 'GUA', 'THY', 'DA', 'DC', 'DG', 'DT', 'RA', 'RU', 'RG', 'RC', 'A', 'T', 'U', 'C', 'G', 'DA5', 'DC5', 'DG5', 'DT5', 'DA3', 'DC3', 'DG3', 'DT3', 'RA5', 'RU5', 'RG5', 'RC5', 'RA3', 'RU3', 'RG3', 'RC3' - ]) + } def __init__(self, parser, tokens): pass def apply(self, group): - mask = np.in1d(group.resnames, self.nucl_res) + resnames = group.universe._topology.resnames + nmidx = resnames.nmidx[group.resindices] + + matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + mask = np.in1d(nmidx, matches) + return group[mask].unique @@ -875,14 +912,32 @@ class BackboneSelection(ProteinSelection): This excludes OT* on C-termini (which are included by, eg VMD's backbone selection). + + + .. versionchanged:: 2.0.0 + bb_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'backbone' - bb_atoms = np.array(['N', 'CA', 'C', 'O']) + bb_atoms = {'N', 'CA', 'C', 'O'} def apply(self, group): - mask = np.in1d(group.names, self.bb_atoms) - mask &= np.in1d(group.resnames, self.prot_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.bb_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.prot_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class NucleicBackboneSelection(NucleicSelection): @@ -890,14 +945,32 @@ class NucleicBackboneSelection(NucleicSelection): These atoms are only recognized if they are in a residue matched by the :class:`NucleicSelection`. + + + .. versionchanged:: 2.0.0 + bb_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleicbackbone' - bb_atoms = np.array(["P", "C5'", "C3'", "O3'", "O5'"]) + bb_atoms = {"P", "C5'", "C3'", "O3'", "O5'"} def apply(self, group): - mask = np.in1d(group.names, self.bb_atoms) - mask &= np.in1d(group.resnames, self.nucl_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.bb_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class BaseSelection(NucleicSelection): @@ -907,29 +980,65 @@ class BaseSelection(NucleicSelection): 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', 'O6','N2','N6', 'O2','N4','O4','C5M' + + + .. versionchanged:: 2.0.0 + base_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleicbase' - base_atoms = np.array([ + base_atoms = { 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', 'O6', 'N2', 'N6', - 'O2', 'N4', 'O4', 'C5M']) + 'O2', 'N4', 'O4', 'C5M'} def apply(self, group): - mask = np.in1d(group.names, self.base_atoms) - mask &= np.in1d(group.resnames, self.nucl_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.base_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class NucleicSugarSelection(NucleicSelection): """Contains all atoms with name C1', C2', C3', C4', O2', O4', O3'. + + + .. versionchanged:: 2.0.0 + sug_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleicsugar' - sug_atoms = np.array(["C1'", "C2'", "C3'", "C4'", "O4'"]) + sug_atoms = {"C1'", "C2'", "C3'", "C4'", "O4'"} def apply(self, group): - mask = np.in1d(group.names, self.sug_atoms) - mask &= np.in1d(group.resnames, self.nucl_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.sug_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class PropertySelection(Selection): diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index c600ada3eb1..a591f09cb3a 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -473,8 +473,65 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, na + 1) +class _AtomStringAttr(AtomAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(na)], dtype=object) + + @_check_length + def set_atoms(self, ag, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[ag.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Atomnames(AtomAttr): +class Atomnames(_AtomStringAttr): """Name for each atom. """ attrname = 'names' @@ -483,10 +540,6 @@ class Atomnames(AtomAttr): dtype = object transplants = defaultdict(list) - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - def phi_selection(residue, c_name='C', n_name='N', ca_name='CA'): """Select AtomGroup corresponding to the phi protein backbone dihedral C'-N-CA-C. @@ -958,20 +1011,16 @@ def chi1_selections(residues, n_name='N', ca_name='CA', cb_name='CB', # TODO: update docs to property doc -class Atomtypes(AtomAttr): +class Atomtypes(_AtomStringAttr): """Type for each atom""" attrname = 'types' singular = 'type' per_object = 'atom' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - # TODO: update docs to property doc -class Elements(AtomAttr): +class Elements(_AtomStringAttr): """Element for each atom""" attrname = 'elements' singular = 'element' @@ -995,7 +1044,7 @@ def _gen_initial_values(na, nr, ns): return np.zeros(na) -class RecordTypes(AtomAttr): +class RecordTypes(_AtomStringAttr): """For PDB-like formats, indicates if ATOM or HETATM Defaults to 'ATOM' @@ -1013,7 +1062,7 @@ def _gen_initial_values(na, nr, ns): return np.array(['ATOM'] * na, dtype=object) -class ChainIDs(AtomAttr): +class ChainIDs(_AtomStringAttr): """ChainID per atom Note @@ -1025,10 +1074,6 @@ class ChainIDs(AtomAttr): per_object = 'atom' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - class Tempfactors(AtomAttr): """Tempfactor for atoms""" @@ -1574,7 +1619,7 @@ def _gen_initial_values(na, nr, ns): # TODO: update docs to property doc -class AltLocs(AtomAttr): +class AltLocs(_AtomStringAttr): """AltLocs for each atom""" attrname = 'altLocs' singular = 'altLoc' @@ -1727,8 +1772,65 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, nr + 1) +class _ResidueStringAttr(ResidueAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(nr)], dtype=object) + + @_check_length + def set_residues(self, rg, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[rg.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Resnames(ResidueAttr): +class Resnames(_ResidueStringAttr): attrname = 'resnames' singular = 'resname' transplants = defaultdict(list) @@ -1847,18 +1949,14 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, nr + 1) -class ICodes(ResidueAttr): +class ICodes(_ResidueStringAttr): """Insertion code for Atoms""" attrname = 'icodes' singular = 'icode' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(nr)], dtype=object) - -class Moltypes(ResidueAttr): +class Moltypes(_ResidueStringAttr): """Name of the molecule type Two molecules that share a molecule type share a common template topology. @@ -1910,8 +2008,65 @@ def set_segments(self, sg, values): self.values[sg.ix] = values +class _SegmentStringAttr(SegmentAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(nr)], dtype=object) + + @_check_length + def set_segments(self, sg, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[sg.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Segids(SegmentAttr): +class Segids(_SegmentStringAttr): attrname = 'segids' singular = 'segid' transplants = defaultdict(list) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index b7d0f515b7f..4042bb84820 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -74,7 +74,7 @@ def test_protein(self, universe): sorted(universe.select_atoms('segid 4AKE').indices), "selected protein is not the same as auto-generated protein segment s4AKE") - @pytest.mark.parametrize('resname', MDAnalysis.core.selection.ProteinSelection.prot_res) + @pytest.mark.parametrize('resname', sorted(MDAnalysis.core.selection.ProteinSelection.prot_res)) def test_protein_resnames(self, resname): u = make_Universe(('resnames',)) # set half the residues' names to the resname we're testing diff --git a/testsuite/MDAnalysisTests/core/test_segmentgroup.py b/testsuite/MDAnalysisTests/core/test_segmentgroup.py index 3f0c251e543..546c5ad44cf 100644 --- a/testsuite/MDAnalysisTests/core/test_segmentgroup.py +++ b/testsuite/MDAnalysisTests/core/test_segmentgroup.py @@ -88,6 +88,24 @@ def test_set_segid_updates_(universe): err_msg="old selection was not changed in place after set_segid") +def test_set_segids_many(): + u = mda.Universe.empty(n_atoms=6, n_residues=2, n_segments=2, + atom_resindex=[0, 0, 0, 1, 1, 1], residue_segindex=[0,1]) + u.add_TopologyAttr('segids', ['A', 'B']) + + # universe with 2 segments, A and B + + u.segments.segids = ['X', 'Y'] + + assert u.segments[0].segid == 'X' + assert u.segments[1].segid == 'Y' + + assert len(u.select_atoms('segid A')) == 0 + assert len(u.select_atoms('segid B')) == 0 + assert len(u.select_atoms('segid X')) == 3 + assert len(u.select_atoms('segid Y')) == 3 + + def test_atom_order(universe): assert_equal(universe.segments.atoms.indices, sorted(universe.segments.atoms.indices)) diff --git a/testsuite/MDAnalysisTests/core/test_topologyattrs.py b/testsuite/MDAnalysisTests/core/test_topologyattrs.py index 6fa082b3b3f..270491514af 100644 --- a/testsuite/MDAnalysisTests/core/test_topologyattrs.py +++ b/testsuite/MDAnalysisTests/core/test_topologyattrs.py @@ -93,6 +93,7 @@ class TestAtomAttr(TopologyAttrMixin): """ values = np.array([7, 3, 69, 9993, 84, 194, 263, 501, 109, 5873]) + single_value = 567 attrclass = tpattrs.AtomAttr def test_set_atom_VE(self): @@ -112,8 +113,9 @@ def test_get_atoms(self, attr): def test_set_atoms_singular(self, attr): # set len 2 Group to len 1 value dg = DummyGroup([3, 7]) - attr.set_atoms(dg, 567) - assert_equal(attr.get_atoms(dg), np.array([567, 567])) + attr.set_atoms(dg, self.single_value) + assert_equal(attr.get_atoms(dg), + np.array([self.single_value, self.single_value])) def test_set_atoms_plural(self, attr): # set len 2 Group to len 2 values @@ -175,6 +177,7 @@ def test_cant_set_segment_indices(self, u): class TestAtomnames(TestAtomAttr): values = np.array(['O', 'C', 'CA', 'N', 'CB', 'CG', 'CD', 'NA', 'CL', 'OW'], dtype=np.object) + single_value = 'Ca2' attrclass = tpattrs.Atomnames @@ -206,18 +209,19 @@ class TestResidueAttr(TopologyAttrMixin): """Test residue-level TopologyAttrs. """ + single_value = 2 values = np.array([15.2, 395.6, 0.1, 9.8]) attrclass = tpattrs.ResidueAttr - def test_set_residue_VE(self): - u = make_Universe(('resnames',)) - res = u.residues[0] + def test_set_residue_VE(self, universe): + # setting e.g. resname to 2 values should fail with VE + res = universe.residues[0] with pytest.raises(ValueError): - setattr(res, 'resname', ['wrong', 'length']) + setattr(res, self.attrclass.singular, self.values[:2]) def test_get_atoms(self, attr): assert_equal(attr.get_atoms(DummyGroup([7, 3, 9])), - self.values[[3, 2, 2]]) + self.values[[3, 2, 2]]) def test_get_atom(self, universe): attr = getattr(universe.atoms[0], self.attrclass.singular) @@ -225,14 +229,14 @@ def test_get_atom(self, universe): def test_get_residues(self, attr): assert_equal(attr.get_residues(DummyGroup([1, 2, 1, 3])), - self.values[[1, 2, 1, 3]]) + self.values[[1, 2, 1, 3]]) def test_set_residues_singular(self, attr): dg = DummyGroup([3, 0, 1]) - attr.set_residues(dg, 2) + attr.set_residues(dg, self.single_value) - assert_almost_equal(attr.get_residues(dg), - np.array([2, 2, 2])) + assert_equal(attr.get_residues(dg), + np.array([self.single_value]*3, dtype=self.values.dtype)) def test_set_residues_plural(self, attr): attr.set_residues(DummyGroup([3, 0, 1]), @@ -254,10 +258,17 @@ def test_get_segments(self, attr): assert_equal(attr.get_segments(DummyGroup([0, 1, 1])), [self.values[[0, 3]], self.values[[1, 2]], self.values[[1, 2]]]) -class TestICodes(TestResidueAttr): - values = np.array(['a', 'b', '', 'd']) + +class TestResnames(TestResidueAttr): + attrclass = tpattrs.Resnames + single_value = 'xyz' + values = np.array(['a', 'b', '', 'd'], dtype=object) + + +class TestICodes(TestResnames): attrclass = tpattrs.ICodes + class TestResids(TestResidueAttr): values = np.array([10, 11, 18, 20]) attrclass = tpattrs.Resids