Skip to content

Commit

Permalink
HydrogenBondAnalysis: small improvements
Browse files Browse the repository at this point in the history
- generate_table(): do not build recarray but just dtype array and removed superfluous code
- added tests for generate_table()
- import itertools at module level instead multiple times in methods
  • Loading branch information
orbeckst committed Jun 16, 2015
1 parent 2da4b76 commit 4fa5ea8
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 27 deletions.
32 changes: 11 additions & 21 deletions package/MDAnalysis/analysis/hbonds/hbond_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
"""

from collections import defaultdict
import itertools
import numpy

from MDAnalysis import MissingDataWarning, NoDataError, SelectionError, SelectionWarning
Expand Down Expand Up @@ -942,40 +943,30 @@ def generate_table(self):
.. _recsql: http://pypi.python.org/pypi/RecSQL
"""
from itertools import izip

if self.timeseries is None:
msg = "No timeseries computed, do run() first."
warnings.warn(msg, category=MissingDataWarning)
logger.warn(msg)
return

num_records = numpy.sum([len(hframe) for hframe in self.timeseries])
# build empty output table
dtype = [
("time", float), ("donor_idx", int), ("acceptor_idx", int),
("donor_resnm", "|S4"), ("donor_resid", int), ("donor_atom", "|S4"),
("acceptor_resnm", "|S4"), ("acceptor_resid", int), ("acceptor_atom", "|S4"),
("distance", float), ("angle", float)]
self.table = numpy.recarray((num_records,), dtype=dtype)

# according to Lukas' notes below, using a recarray at this stage is ineffective
# and speedups of ~x10 could be achieved by filling a standard array
# (perhaps at the cost of less clarity... but that might just be my code ;-) -- orbeckst)
# and speedups of ~x10 can be achieved by filling a standard array, like this:
out = numpy.empty((num_records,), dtype=dtype)
cursor = 0 # current row
for t, hframe in izip(self.timesteps, self.timeseries):
if len(hframe) == 0:
continue # not really necessary, should also work without
self.table[cursor:cursor + len(hframe)].time = t
for t, hframe in itertools.izip(self.timesteps, self.timeseries):
for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe:
r = self.table[cursor]
r.donor_idx = donor_idx
r.donor_resnm, r.donor_resid, r.donor_atom = parse_residue(donor)
r.acceptor_idx = acceptor_idx
r.acceptor_resnm, r.acceptor_resid, r.acceptor_atom = parse_residue(acceptor)
r.distance = distance
r.angle = angle
out[cursor] = (t, donor_idx, acceptor_idx) + parse_residue(donor) + \
parse_residue(acceptor) + (distance, angle)
cursor += 1
assert cursor == num_records, "Internal Error: Not all HB records stored"
self.table = out.view(numpy.recarray)
logger.debug("HBond: Stored results as table with %(num_records)d entries.", vars())

def save_table(self, filename="hbond_table.pickle"):
Expand All @@ -999,7 +990,6 @@ def count_by_time(self):
:Returns: a class:`numpy.recarray`
"""
from itertools import izip, imap

if self.timeseries is None:
msg = "No timeseries computed, do run() first."
Expand All @@ -1008,7 +998,8 @@ def count_by_time(self):
return

out = numpy.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)])
for cursor, time_count in enumerate(izip(self.timesteps, imap(len, self.timeseries))):
for cursor, time_count in enumerate(itertools.izip(self.timesteps,
itertools.imap(len, self.timeseries))):
out[cursor] = time_count
return out.view(numpy.recarray)

Expand Down Expand Up @@ -1079,7 +1070,6 @@ def timesteps_by_type(self):
:Returns: a class:`numpy.recarray`
"""
from itertools import izip

if self.timeseries is None:
msg = "No timeseries computed, do run() first."
Expand All @@ -1088,7 +1078,7 @@ def timesteps_by_type(self):
return

hbonds = defaultdict(list)
for (t, hframe) in izip(self.timesteps, self.timeseries):
for (t, hframe) in itertools.izip(self.timesteps, self.timeseries):
for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe:
donor_resnm, donor_resid, donor_atom = parse_residue(donor)
acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor)
Expand Down
28 changes: 22 additions & 6 deletions testsuite/MDAnalysisTests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,11 @@ def setUp(self):
'angle': 150.0,
}
# ideal helix with 1 proline:
self.num_bb_hbonds = u.atoms.numberOfResidues() - u.SYSTEM.PRO.numberOfResidues() - 4
self.values = {
'num_bb_hbonds': u.atoms.numberOfResidues() - u.SYSTEM.PRO.numberOfResidues() - 4,
'donor_resid': numpy.array([5, 6, 8, 9, 10, 11, 12, 13]),
'acceptor_resnm': numpy.array(['ALA', 'ALA', 'ALA', 'ALA', 'ALA', 'PRO', 'ALA', 'ALA']),
}

def _run(self, **kwargs):
kw = self.kwargs.copy()
Expand All @@ -219,26 +223,36 @@ def _run(self, **kwargs):

def test_helix_backbone(self):
h = self._run()
assert_equal(len(h.timeseries[0]), self.num_bb_hbonds, "wrong number of backbone hydrogen bonds")
assert_equal(len(h.timeseries[0]),
self.values['num_bb_hbonds'], "wrong number of backbone hydrogen bonds")
assert_equal(h.timesteps, [0.0])

def test_generate_table(self):
h = self._run()
h.generate_table()
assert_equal(len(h.table),
self.values['num_bb_hbonds'], "wrong number of backbone hydrogen bonds in table")
assert_(isinstance(h.table, numpy.core.records.recarray))
assert_array_equal(h.table.donor_resid, self.values['donor_resid'])
assert_array_equal(h.table.acceptor_resnm, self.values['acceptor_resnm'])

# TODO: Expand tests because the following ones are a bit superficial
# because we should really run them on a trajectory

def test_count_by_time(self):
h = self._run()
c = h.count_by_time()
assert_equal(c.tolist(), [(0.0, self.num_bb_hbonds)])
assert_equal(c.tolist(), [(0.0, self.values['num_bb_hbonds'])])

def test_count_by_type(self):
h = self._run()
c = h.count_by_type()
assert_equal(c.frequency, self.num_bb_hbonds * [1.0])
assert_equal(c.frequency, self.values['num_bb_hbonds'] * [1.0])

def test_count_by_type(self):
h = self._run()
t = h.timesteps_by_type()
assert_equal(t.time, self.num_bb_hbonds * [0.0])
assert_equal(t.time, self.values['num_bb_hbonds'] * [0.0])

def tearDown(self):
del self.universe
Expand All @@ -261,7 +275,9 @@ class TestHydrogenBondAnalysisHeavyFail(TestHydrogenBondAnalysisHeavy):
def setUp(self):
super(TestHydrogenBondAnalysisHeavyFail, self).setUp()
self.kwargs["distance"] = 3.0
self.num_bb_hbonds = 0 # no H-bonds with a D-A distance < 3.0 A (they start at 3.05 A)
self.values['num_bb_hbonds'] = 0 # no H-bonds with a D-A distance < 3.0 A (they start at 3.05 A)
self.values['donor_resid'] = numpy.array([])
self.values['acceptor_resnm'] = numpy.array([], dtype="|S3")

class TestHydrogenBondAnalysisChecking(object):
def _setUp(self):
Expand Down

0 comments on commit 4fa5ea8

Please sign in to comment.