From 4fa5ea8935fd5449ca7de861d49e02cb642a9c95 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 15 Jun 2015 22:27:13 -0700 Subject: [PATCH] HydrogenBondAnalysis: small improvements - generate_table(): do not build recarray but just dtype array and removed superfluous code - added tests for generate_table() - import itertools at module level instead multiple times in methods --- .../analysis/hbonds/hbond_analysis.py | 32 +++++++------------ testsuite/MDAnalysisTests/test_analysis.py | 28 ++++++++++++---- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index ad170302814..f8df878e6b0 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -311,6 +311,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): """ from collections import defaultdict +import itertools import numpy from MDAnalysis import MissingDataWarning, NoDataError, SelectionError, SelectionWarning @@ -942,8 +943,6 @@ def generate_table(self): .. _recsql: http://pypi.python.org/pypi/RecSQL """ - from itertools import izip - if self.timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) @@ -951,31 +950,23 @@ def generate_table(self): return num_records = numpy.sum([len(hframe) for hframe in self.timeseries]) + # build empty output table dtype = [ ("time", float), ("donor_idx", int), ("acceptor_idx", int), ("donor_resnm", "|S4"), ("donor_resid", int), ("donor_atom", "|S4"), ("acceptor_resnm", "|S4"), ("acceptor_resid", int), ("acceptor_atom", "|S4"), ("distance", float), ("angle", float)] - self.table = numpy.recarray((num_records,), dtype=dtype) - # according to Lukas' notes below, using a recarray at this stage is ineffective - # and speedups of ~x10 could be achieved by filling a standard array - # (perhaps at the cost of less clarity... but that might just be my code ;-) -- orbeckst) + # and speedups of ~x10 can be achieved by filling a standard array, like this: + out = numpy.empty((num_records,), dtype=dtype) cursor = 0 # current row - for t, hframe in izip(self.timesteps, self.timeseries): - if len(hframe) == 0: - continue # not really necessary, should also work without - self.table[cursor:cursor + len(hframe)].time = t + for t, hframe in itertools.izip(self.timesteps, self.timeseries): for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe: - r = self.table[cursor] - r.donor_idx = donor_idx - r.donor_resnm, r.donor_resid, r.donor_atom = parse_residue(donor) - r.acceptor_idx = acceptor_idx - r.acceptor_resnm, r.acceptor_resid, r.acceptor_atom = parse_residue(acceptor) - r.distance = distance - r.angle = angle + out[cursor] = (t, donor_idx, acceptor_idx) + parse_residue(donor) + \ + parse_residue(acceptor) + (distance, angle) cursor += 1 assert cursor == num_records, "Internal Error: Not all HB records stored" + self.table = out.view(numpy.recarray) logger.debug("HBond: Stored results as table with %(num_records)d entries.", vars()) def save_table(self, filename="hbond_table.pickle"): @@ -999,7 +990,6 @@ def count_by_time(self): :Returns: a class:`numpy.recarray` """ - from itertools import izip, imap if self.timeseries is None: msg = "No timeseries computed, do run() first." @@ -1008,7 +998,8 @@ def count_by_time(self): return out = numpy.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)]) - for cursor, time_count in enumerate(izip(self.timesteps, imap(len, self.timeseries))): + for cursor, time_count in enumerate(itertools.izip(self.timesteps, + itertools.imap(len, self.timeseries))): out[cursor] = time_count return out.view(numpy.recarray) @@ -1079,7 +1070,6 @@ def timesteps_by_type(self): :Returns: a class:`numpy.recarray` """ - from itertools import izip if self.timeseries is None: msg = "No timeseries computed, do run() first." @@ -1088,7 +1078,7 @@ def timesteps_by_type(self): return hbonds = defaultdict(list) - for (t, hframe) in izip(self.timesteps, self.timeseries): + for (t, hframe) in itertools.izip(self.timesteps, self.timeseries): for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe: donor_resnm, donor_resid, donor_atom = parse_residue(donor) acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor) diff --git a/testsuite/MDAnalysisTests/test_analysis.py b/testsuite/MDAnalysisTests/test_analysis.py index aa69e5d9003..7d913c05f38 100644 --- a/testsuite/MDAnalysisTests/test_analysis.py +++ b/testsuite/MDAnalysisTests/test_analysis.py @@ -208,7 +208,11 @@ def setUp(self): 'angle': 150.0, } # ideal helix with 1 proline: - self.num_bb_hbonds = u.atoms.numberOfResidues() - u.SYSTEM.PRO.numberOfResidues() - 4 + self.values = { + 'num_bb_hbonds': u.atoms.numberOfResidues() - u.SYSTEM.PRO.numberOfResidues() - 4, + 'donor_resid': numpy.array([5, 6, 8, 9, 10, 11, 12, 13]), + 'acceptor_resnm': numpy.array(['ALA', 'ALA', 'ALA', 'ALA', 'ALA', 'PRO', 'ALA', 'ALA']), + } def _run(self, **kwargs): kw = self.kwargs.copy() @@ -219,26 +223,36 @@ def _run(self, **kwargs): def test_helix_backbone(self): h = self._run() - assert_equal(len(h.timeseries[0]), self.num_bb_hbonds, "wrong number of backbone hydrogen bonds") + assert_equal(len(h.timeseries[0]), + self.values['num_bb_hbonds'], "wrong number of backbone hydrogen bonds") assert_equal(h.timesteps, [0.0]) + def test_generate_table(self): + h = self._run() + h.generate_table() + assert_equal(len(h.table), + self.values['num_bb_hbonds'], "wrong number of backbone hydrogen bonds in table") + assert_(isinstance(h.table, numpy.core.records.recarray)) + assert_array_equal(h.table.donor_resid, self.values['donor_resid']) + assert_array_equal(h.table.acceptor_resnm, self.values['acceptor_resnm']) + # TODO: Expand tests because the following ones are a bit superficial # because we should really run them on a trajectory def test_count_by_time(self): h = self._run() c = h.count_by_time() - assert_equal(c.tolist(), [(0.0, self.num_bb_hbonds)]) + assert_equal(c.tolist(), [(0.0, self.values['num_bb_hbonds'])]) def test_count_by_type(self): h = self._run() c = h.count_by_type() - assert_equal(c.frequency, self.num_bb_hbonds * [1.0]) + assert_equal(c.frequency, self.values['num_bb_hbonds'] * [1.0]) def test_count_by_type(self): h = self._run() t = h.timesteps_by_type() - assert_equal(t.time, self.num_bb_hbonds * [0.0]) + assert_equal(t.time, self.values['num_bb_hbonds'] * [0.0]) def tearDown(self): del self.universe @@ -261,7 +275,9 @@ class TestHydrogenBondAnalysisHeavyFail(TestHydrogenBondAnalysisHeavy): def setUp(self): super(TestHydrogenBondAnalysisHeavyFail, self).setUp() self.kwargs["distance"] = 3.0 - self.num_bb_hbonds = 0 # no H-bonds with a D-A distance < 3.0 A (they start at 3.05 A) + self.values['num_bb_hbonds'] = 0 # no H-bonds with a D-A distance < 3.0 A (they start at 3.05 A) + self.values['donor_resid'] = numpy.array([]) + self.values['acceptor_resnm'] = numpy.array([], dtype="|S3") class TestHydrogenBondAnalysisChecking(object): def _setUp(self):