Skip to content

Commit

Permalink
Merge pull request MDAnalysis#1066 from MDAnalysis/issue-839-icodesel…
Browse files Browse the repository at this point in the history
…ection

Issue MDAnalysis#839 - Resid selection with icodes
  • Loading branch information
kain88-de authored Nov 11, 2016
2 parents 2dfa20a + 941b5a9 commit 9d5d7bf
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 18 deletions.
2 changes: 2 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ Enhancements
analysis code (Issue #743)
* Added ProgressMeter to 'transfer_to_memory' function, to show progress
of loading trajectory to memory (Issue #1028 and PR #1055).
* Selecting atoms by resid now respects icodes if they were present. Ie
select_atoms('resid 163A') works! (Issue #839 PR #1066)

Fixes
* Removed MDAnalysis.analysis.PDBToBinaryTraj (Issue #1035)
Expand Down
8 changes: 7 additions & 1 deletion package/MDAnalysis/core/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,11 @@ def select_atoms(self, sel, *othersel, **selgroups):
range consists of two numbers separated by a colon (inclusive)
such as ``resid 1:5``. A residue number ("resid") is taken
directly from the topology.
If icodes are present in the topology, then these will be
taken into account. Ie 'resid 163B' will only select resid
163 with icode B while 'resid 163' will select only residue 163.
Range selections will also respect icodes, so 'resid 162-163B'
will select all residues in 162 and those in 163 up to icode B.
resnum *resnum-number-range*
resnum is the canonical residue number; typically it is set to
the residue id in the original PDB structure.
Expand Down Expand Up @@ -1098,7 +1103,8 @@ def select_atoms(self, sel, *othersel, **selgroups):
Added *group* and *fullgroup* selections.
.. versionchanged:: 0.13.0
Added *bonded* selection
.. versionchanged:: 0.16.0
Resid selection now takes icodes into account where present.
"""
atomgrp = selection.Parser.parse(sel, selgroups).apply(self)
if othersel:
Expand Down
150 changes: 133 additions & 17 deletions package/MDAnalysis/core/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,22 +616,137 @@ class AltlocSelection(StringSelection):
field = 'altLocs'


class RangeSelection(Selection):
class ResidSelection(Selection):
"""Select atoms based on numerical fields
Allows the use of ':' and '-' to specify a range of values
For example
resid 1:10
"""
token = 'resid'

def __init__(self, parser, tokens):
values = grab_not_keywords(tokens)
if not values:
raise ValueError("Unexpected token: '{0}'".format(tokens[0]))

# each value in uppers and lowers is a tuple of (resid, icode)
uppers = []
lowers = []

for val in values:
m1 = re.match("(\d+)(\w?)$", val)
if not m1 is None:
res = m1.groups()
lower = int(res[0]), res[1]
upper = None, None
else:
# check if in appropriate format 'lower:upper' or 'lower-upper'
# each val is one or more digits, maybe a letter
selrange = re.match("(\d+)(\w?)[:-](\d+)(\w?)", val)
if selrange is None: # re.match returns None on failure
raise ValueError("Failed to parse value: {0}".format(val))
res = selrange.groups()
# resid and icode
lower = int(res[0]), res[1]
upper = int(res[2]), res[3]

lowers.append(lower)
uppers.append(upper)

self.lowers = lowers
self.uppers = uppers

def apply(self, group):
# Grab arrays here to reduce number of calls to main topology
vals = group.resids
try: # optional attribute
icodes = group.icodes
except (AttributeError, NoDataError):
icodes = None
# if no icodes and icodes are part of selection, cause a fuss
if (any(v[1] for v in self.uppers) or
any(v[1] for v in self.lowers)):
raise ValueError("Selection specified icodes, while the "
"topology doesn't have any.")

if not icodes is None:
mask = self._sel_with_icodes(vals, icodes)
else:
mask = self._sel_without_icodes(vals)

return unique(group[mask])

def _sel_without_icodes(self, vals):
# Final mask that gets applied to group
mask = np.zeros(len(vals), dtype=np.bool)

for (u_resid, _), (l_resid, _) in zip(self.uppers, self.lowers):
if u_resid is not None: # range selection
thismask = vals >= l_resid
thismask &= vals <= u_resid
else: # single residue selection
thismask = vals == l_resid

mask |= thismask

return mask

def _sel_with_icodes(self, vals, icodes):
# Final mask that gets applied to group
mask = np.zeros(len(vals), dtype=np.bool)

for (u_resid, u_icode), (l_resid, l_icode) in zip(self.uppers, self.lowers):
if u_resid is not None: # Selecting a range
# Special case, if l_resid == u_resid, ie 163A-163C, this simplifies to:
# all 163, and A <= icode <= C
if l_resid == u_resid:
thismask = vals == l_resid
thismask &= icodes >= l_icode
thismask &= icodes <= u_icode
# For 163A to 166B we want:
# [START] all 163 and icode >= 'A'
# [MIDDLE] all of 164 and 165, any icode
# [END] 166 and icode <= 'B'
else:
# start of range
startmask = vals == l_resid
startmask &= icodes >= l_icode
thismask = startmask

# middle of range
mid = np.arange(l_resid + 1, u_resid)
if len(mid): # if there are any resids in the middle
mid_beg, mid_end = mid[0], mid[-1]
midmask = vals >= mid_beg
midmask &= vals <= mid_end

thismask |= midmask

# end of range
endmask = vals == u_resid
endmask &= icodes <= u_icode

thismask |= endmask
else: # Selecting a single residue
thismask = vals == l_resid
thismask &= icodes == l_icode

mask |= thismask

return mask


class RangeSelection(Selection):
def __init__(self, parser, tokens):
values = grab_not_keywords(tokens)
if not values:
raise ValueError("Unexpected token: '{0}'".format(tokens[0]))

uppers = [] # upper limit on any range
lowers = [] # lower limit on any range

for val in values:
try:
lower = int(val)
Expand All @@ -650,12 +765,13 @@ def __init__(self, parser, tokens):
self.lowers = lowers
self.uppers = uppers

def _get_vals(self, group):
return getattr(group, self.field)

class ResnumSelection(RangeSelection):
token = 'resnum'

def apply(self, group):
mask = np.zeros(len(group), dtype=np.bool)
vals = self._get_vals(group)
vals = group.resnums

for upper, lower in zip(self.uppers, self.lowers):
if upper is not None:
Expand All @@ -668,23 +784,23 @@ def apply(self, group):
return unique(group[mask])


class ResidueIDSelection(RangeSelection):
token = 'resid'
field = 'resids'

class ByNumSelection(RangeSelection):
token = 'bynum'

class ResnumSelection(RangeSelection):
token = 'resnum'
field = 'resnums'
def apply(self, group):
mask = np.zeros(len(group), dtype=np.bool)
vals = group.indices + 1 # queries are in 1 based indices

for upper, lower in zip(self.uppers, self.lowers):
if upper is not None:
thismask = vals >= lower
thismask &= vals <= upper
else:
thismask = vals == lower

class ByNumSelection(RangeSelection):
token = 'bynum'
mask |= thismask
return unique(group[mask])

def _get_vals(self, group):
# In this case we'll use 1 indexing since that's what the
# user will be familiar with
return group.indices + 1


class ProteinSelection(Selection):
Expand Down
120 changes: 120 additions & 0 deletions testsuite/MDAnalysisTests/core/test_atomselections.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
GRO, NUCL, NUCLsel, TPR, XTC,
TRZ_psf, TRZ,
PDB_full,
PDB_icodes,
)
from MDAnalysisTests.plugins.knownfailure import knownfailure
from MDAnalysisTests import parser_not_found
Expand Down Expand Up @@ -846,3 +847,122 @@ def test_range_selections(self):
yield (self._check_sels,
ref.format(typ=seltype),
sel.format(typ=seltype))

class TestICodeSelection(object):
def setUp(self):
self.u = mda.Universe(PDB_icodes)

def tearDown(self):
del self.u

def test_select_icode(self):
ag = self.u.select_atoms('resid 163A')

assert_(len(ag) == 7)
assert_array_equal(ag.ids, np.arange(7) + 1230)

def test_select_resid_implicit_icode(self):
ag = self.u.select_atoms('resid 163')

assert_(len(ag) == 6)
assert_array_equal(ag.ids, np.arange(6) + 1224)

def test_select_icode_range_1(self):
# testing range within a single resid integer value
u = self.u
ag = u.select_atoms('resid 163B-163D')

# do it manually without selection language...
ref = u.residues[u.residues.resids == 163]
ref = ref[(ref.icodes >= 'B') & (ref.icodes <= 'D')]
ref = ref.atoms

assert_array_equal(ag.ids, ref.ids)

assert_(len(ag) == 19)
assert_array_equal(ag.ids, np.arange(19) + 1237)

def test_select_icode_range_2(self):
u = self.u

ag = u.select_atoms('resid 163B-165')

resids = u.residues.resids

start = u.residues[resids == 163]
start = start[start.icodes >= 'B']

mid = u.residues[resids == 164]

end = u.residues[resids == 165]
end = end[end.icodes == '']

ref = start.atoms + mid.atoms + end.atoms

assert_array_equal(ag.ids, ref.ids)

def test_select_icode_range_3(self):
# same as #2 but with no "middle" icodes
u = self.u

ag = u.select_atoms('resid 163B-164')

resids = u.residues.resids

start = u.residues[resids == 163]
start = start[start.icodes >= 'B']

end = u.residues[resids == 164]
end = end[end.icodes == '']

ref = start.atoms + end.atoms

assert_array_equal(ag.ids, ref.ids)

def test_select_icode_range_4(self):
u = self.u

ag = u.select_atoms('resid 160-163G')

resids = u.residues.resids

start = u.residues[resids == 160]
start = start[start.icodes >= '']

mid = u.residues[(resids == 161) | (resids == 162)]

end = u.residues[resids == 163]
end = end[end.icodes <= 'G']

ref = start.atoms + mid.atoms + end.atoms

assert_array_equal(ag.ids, ref.ids)

def test_select_icode_range_5(self):
# same as #4 but with no "middle" icodes in range
u = self.u

ag = u.select_atoms('resid 162-163G')

resids = u.residues.resids

start = u.residues[resids == 162]
start = start[start.icodes >= '']

end = u.residues[resids == 163]
end = end[end.icodes <= 'G']

ref = start.atoms + end.atoms

assert_array_equal(ag.ids, ref.ids)

def test_missing_icodes_VE(self):
# trying a selection with icodes in a Universe without raises VA
u = make_Universe(('resids',))

assert_raises(ValueError, u.select_atoms, 'resid 10A')

def test_missing_icodes_range_VE(self):
u = make_Universe(('resids',))

assert_raises(ValueError, u.select_atoms, 'resid 10A-12')
Binary file added testsuite/MDAnalysisTests/data/1osm.pdb.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"PDB_conect",
"PDB_conect2TER", # Conect record to a TER entry (Issue 936)
"PDB_singleconect", # Conect record with one entry (Issue 937)
"PDB_icodes", # stripped down version of 1osm, has icodes!
"XPDB_small",
"PDB_full", # PDB 4E43 (full HEADER, TITLE, COMPND, REMARK, altloc)
"ALIGN", # Various way to align atom names in PDB files
Expand Down Expand Up @@ -176,6 +177,7 @@
PDB_conect = resource_filename(__name__, 'data/conect_parsing.pdb')
PDB_conect2TER = resource_filename(__name__, 'data/CONECT2TER.pdb')
PDB_singleconect = resource_filename(__name__, 'data/SINGLECONECT.pdb')
PDB_icodes = resource_filename(__name__, 'data/1osm.pdb.gz')

GRO = resource_filename(__name__, 'data/adk_oplsaa.gro')
GRO_velocity = resource_filename(__name__, 'data/sample_velocity_file.gro')
Expand Down

0 comments on commit 9d5d7bf

Please sign in to comment.