diff --git a/package/CHANGELOG b/package/CHANGELOG index 20cc2a4c6d9..a791a753d76 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -43,6 +43,8 @@ Enhancements analysis code (Issue #743) * Added ProgressMeter to 'transfer_to_memory' function, to show progress of loading trajectory to memory (Issue #1028 and PR #1055). + * Selecting atoms by resid now respects icodes if they were present. Ie + select_atoms('resid 163A') works! (Issue #839 PR #1066) Fixes * Removed MDAnalysis.analysis.PDBToBinaryTraj (Issue #1035) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 3afff79b8a8..912b2528235 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -995,6 +995,11 @@ def select_atoms(self, sel, *othersel, **selgroups): range consists of two numbers separated by a colon (inclusive) such as ``resid 1:5``. A residue number ("resid") is taken directly from the topology. + If icodes are present in the topology, then these will be + taken into account. Ie 'resid 163B' will only select resid + 163 with icode B while 'resid 163' will select only residue 163. + Range selections will also respect icodes, so 'resid 162-163B' + will select all residues in 162 and those in 163 up to icode B. resnum *resnum-number-range* resnum is the canonical residue number; typically it is set to the residue id in the original PDB structure. @@ -1098,7 +1103,8 @@ def select_atoms(self, sel, *othersel, **selgroups): Added *group* and *fullgroup* selections. .. versionchanged:: 0.13.0 Added *bonded* selection - + .. versionchanged:: 0.16.0 + Resid selection now takes icodes into account where present. """ atomgrp = selection.Parser.parse(sel, selgroups).apply(self) if othersel: diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 0db8db83e6d..a47bee877d4 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -616,7 +616,7 @@ class AltlocSelection(StringSelection): field = 'altLocs' -class RangeSelection(Selection): +class ResidSelection(Selection): """Select atoms based on numerical fields Allows the use of ':' and '-' to specify a range of values @@ -624,14 +624,129 @@ class RangeSelection(Selection): resid 1:10 """ + token = 'resid' + def __init__(self, parser, tokens): values = grab_not_keywords(tokens) if not values: raise ValueError("Unexpected token: '{0}'".format(tokens[0])) + # each value in uppers and lowers is a tuple of (resid, icode) uppers = [] lowers = [] + for val in values: + m1 = re.match("(\d+)(\w?)$", val) + if not m1 is None: + res = m1.groups() + lower = int(res[0]), res[1] + upper = None, None + else: + # check if in appropriate format 'lower:upper' or 'lower-upper' + # each val is one or more digits, maybe a letter + selrange = re.match("(\d+)(\w?)[:-](\d+)(\w?)", val) + if selrange is None: # re.match returns None on failure + raise ValueError("Failed to parse value: {0}".format(val)) + res = selrange.groups() + # resid and icode + lower = int(res[0]), res[1] + upper = int(res[2]), res[3] + + lowers.append(lower) + uppers.append(upper) + + self.lowers = lowers + self.uppers = uppers + + def apply(self, group): + # Grab arrays here to reduce number of calls to main topology + vals = group.resids + try: # optional attribute + icodes = group.icodes + except (AttributeError, NoDataError): + icodes = None + # if no icodes and icodes are part of selection, cause a fuss + if (any(v[1] for v in self.uppers) or + any(v[1] for v in self.lowers)): + raise ValueError("Selection specified icodes, while the " + "topology doesn't have any.") + + if not icodes is None: + mask = self._sel_with_icodes(vals, icodes) + else: + mask = self._sel_without_icodes(vals) + + return unique(group[mask]) + + def _sel_without_icodes(self, vals): + # Final mask that gets applied to group + mask = np.zeros(len(vals), dtype=np.bool) + + for (u_resid, _), (l_resid, _) in zip(self.uppers, self.lowers): + if u_resid is not None: # range selection + thismask = vals >= l_resid + thismask &= vals <= u_resid + else: # single residue selection + thismask = vals == l_resid + + mask |= thismask + + return mask + + def _sel_with_icodes(self, vals, icodes): + # Final mask that gets applied to group + mask = np.zeros(len(vals), dtype=np.bool) + + for (u_resid, u_icode), (l_resid, l_icode) in zip(self.uppers, self.lowers): + if u_resid is not None: # Selecting a range + # Special case, if l_resid == u_resid, ie 163A-163C, this simplifies to: + # all 163, and A <= icode <= C + if l_resid == u_resid: + thismask = vals == l_resid + thismask &= icodes >= l_icode + thismask &= icodes <= u_icode + # For 163A to 166B we want: + # [START] all 163 and icode >= 'A' + # [MIDDLE] all of 164 and 165, any icode + # [END] 166 and icode <= 'B' + else: + # start of range + startmask = vals == l_resid + startmask &= icodes >= l_icode + thismask = startmask + + # middle of range + mid = np.arange(l_resid + 1, u_resid) + if len(mid): # if there are any resids in the middle + mid_beg, mid_end = mid[0], mid[-1] + midmask = vals >= mid_beg + midmask &= vals <= mid_end + + thismask |= midmask + + # end of range + endmask = vals == u_resid + endmask &= icodes <= u_icode + + thismask |= endmask + else: # Selecting a single residue + thismask = vals == l_resid + thismask &= icodes == l_icode + + mask |= thismask + + return mask + + +class RangeSelection(Selection): + def __init__(self, parser, tokens): + values = grab_not_keywords(tokens) + if not values: + raise ValueError("Unexpected token: '{0}'".format(tokens[0])) + + uppers = [] # upper limit on any range + lowers = [] # lower limit on any range + for val in values: try: lower = int(val) @@ -650,12 +765,13 @@ def __init__(self, parser, tokens): self.lowers = lowers self.uppers = uppers - def _get_vals(self, group): - return getattr(group, self.field) + +class ResnumSelection(RangeSelection): + token = 'resnum' def apply(self, group): mask = np.zeros(len(group), dtype=np.bool) - vals = self._get_vals(group) + vals = group.resnums for upper, lower in zip(self.uppers, self.lowers): if upper is not None: @@ -668,23 +784,23 @@ def apply(self, group): return unique(group[mask]) -class ResidueIDSelection(RangeSelection): - token = 'resid' - field = 'resids' - +class ByNumSelection(RangeSelection): + token = 'bynum' -class ResnumSelection(RangeSelection): - token = 'resnum' - field = 'resnums' + def apply(self, group): + mask = np.zeros(len(group), dtype=np.bool) + vals = group.indices + 1 # queries are in 1 based indices + for upper, lower in zip(self.uppers, self.lowers): + if upper is not None: + thismask = vals >= lower + thismask &= vals <= upper + else: + thismask = vals == lower -class ByNumSelection(RangeSelection): - token = 'bynum' + mask |= thismask + return unique(group[mask]) - def _get_vals(self, group): - # In this case we'll use 1 indexing since that's what the - # user will be familiar with - return group.indices + 1 class ProteinSelection(Selection): diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 333469e7f6e..84ccd47d94f 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -45,6 +45,7 @@ GRO, NUCL, NUCLsel, TPR, XTC, TRZ_psf, TRZ, PDB_full, + PDB_icodes, ) from MDAnalysisTests.plugins.knownfailure import knownfailure from MDAnalysisTests import parser_not_found @@ -846,3 +847,122 @@ def test_range_selections(self): yield (self._check_sels, ref.format(typ=seltype), sel.format(typ=seltype)) + +class TestICodeSelection(object): + def setUp(self): + self.u = mda.Universe(PDB_icodes) + + def tearDown(self): + del self.u + + def test_select_icode(self): + ag = self.u.select_atoms('resid 163A') + + assert_(len(ag) == 7) + assert_array_equal(ag.ids, np.arange(7) + 1230) + + def test_select_resid_implicit_icode(self): + ag = self.u.select_atoms('resid 163') + + assert_(len(ag) == 6) + assert_array_equal(ag.ids, np.arange(6) + 1224) + + def test_select_icode_range_1(self): + # testing range within a single resid integer value + u = self.u + ag = u.select_atoms('resid 163B-163D') + + # do it manually without selection language... + ref = u.residues[u.residues.resids == 163] + ref = ref[(ref.icodes >= 'B') & (ref.icodes <= 'D')] + ref = ref.atoms + + assert_array_equal(ag.ids, ref.ids) + + assert_(len(ag) == 19) + assert_array_equal(ag.ids, np.arange(19) + 1237) + + def test_select_icode_range_2(self): + u = self.u + + ag = u.select_atoms('resid 163B-165') + + resids = u.residues.resids + + start = u.residues[resids == 163] + start = start[start.icodes >= 'B'] + + mid = u.residues[resids == 164] + + end = u.residues[resids == 165] + end = end[end.icodes == ''] + + ref = start.atoms + mid.atoms + end.atoms + + assert_array_equal(ag.ids, ref.ids) + + def test_select_icode_range_3(self): + # same as #2 but with no "middle" icodes + u = self.u + + ag = u.select_atoms('resid 163B-164') + + resids = u.residues.resids + + start = u.residues[resids == 163] + start = start[start.icodes >= 'B'] + + end = u.residues[resids == 164] + end = end[end.icodes == ''] + + ref = start.atoms + end.atoms + + assert_array_equal(ag.ids, ref.ids) + + def test_select_icode_range_4(self): + u = self.u + + ag = u.select_atoms('resid 160-163G') + + resids = u.residues.resids + + start = u.residues[resids == 160] + start = start[start.icodes >= ''] + + mid = u.residues[(resids == 161) | (resids == 162)] + + end = u.residues[resids == 163] + end = end[end.icodes <= 'G'] + + ref = start.atoms + mid.atoms + end.atoms + + assert_array_equal(ag.ids, ref.ids) + + def test_select_icode_range_5(self): + # same as #4 but with no "middle" icodes in range + u = self.u + + ag = u.select_atoms('resid 162-163G') + + resids = u.residues.resids + + start = u.residues[resids == 162] + start = start[start.icodes >= ''] + + end = u.residues[resids == 163] + end = end[end.icodes <= 'G'] + + ref = start.atoms + end.atoms + + assert_array_equal(ag.ids, ref.ids) + + def test_missing_icodes_VE(self): + # trying a selection with icodes in a Universe without raises VA + u = make_Universe(('resids',)) + + assert_raises(ValueError, u.select_atoms, 'resid 10A') + + def test_missing_icodes_range_VE(self): + u = make_Universe(('resids',)) + + assert_raises(ValueError, u.select_atoms, 'resid 10A-12') diff --git a/testsuite/MDAnalysisTests/data/1osm.pdb.gz b/testsuite/MDAnalysisTests/data/1osm.pdb.gz new file mode 100644 index 00000000000..edfb96d1530 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/1osm.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index f9578385f20..6e860c4f368 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -45,6 +45,7 @@ "PDB_conect", "PDB_conect2TER", # Conect record to a TER entry (Issue 936) "PDB_singleconect", # Conect record with one entry (Issue 937) + "PDB_icodes", # stripped down version of 1osm, has icodes! "XPDB_small", "PDB_full", # PDB 4E43 (full HEADER, TITLE, COMPND, REMARK, altloc) "ALIGN", # Various way to align atom names in PDB files @@ -176,6 +177,7 @@ PDB_conect = resource_filename(__name__, 'data/conect_parsing.pdb') PDB_conect2TER = resource_filename(__name__, 'data/CONECT2TER.pdb') PDB_singleconect = resource_filename(__name__, 'data/SINGLECONECT.pdb') +PDB_icodes = resource_filename(__name__, 'data/1osm.pdb.gz') GRO = resource_filename(__name__, 'data/adk_oplsaa.gro') GRO_velocity = resource_filename(__name__, 'data/sample_velocity_file.gro')