From 6e538f80e986058092f75f3fe1f4f3e2b092659d Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Mon, 19 Apr 2021 12:31:19 +0200 Subject: [PATCH] Add 'use_cell_list' parameter --- src/biotite/structure/rdf.py | 69 +++++++++++++++-------- tests/structure/test_rdf.py | 106 +++++++++++++++++++++++------------ 2 files changed, 115 insertions(+), 60 deletions(-) diff --git a/src/biotite/structure/rdf.py b/src/biotite/structure/rdf.py index af5c79ffa..439af5759 100644 --- a/src/biotite/structure/rdf.py +++ b/src/biotite/structure/rdf.py @@ -20,7 +20,7 @@ def rdf(center, atoms, selection=None, interval=(0, 10), bins=100, box=None, - periodic=False): + periodic=False, use_cell_list=None): r""" Compute the radial distribution function *g(r)* (RDF) for one or multiple given central positions based on a given system of @@ -72,6 +72,14 @@ def rdf(center, atoms, selection=None, interval=(0, 10), bins=100, box=None, *(m,3,3)* if atoms is an :class:`AtomArrayStack`, respectively. periodic : bool, optional Defines if periodic boundary conditions are taken into account. + use_cell_list : bool, optional + Whether to use a :class:`CellList` for RDF calculation or not. + A :class:`CellList` requires constant time to obtain proximate + atoms for each center, while otherwise linear time is required. + However, the creation of a :class:`CellList` has a significant + overhead if there are only a few considered centers. + By default a :class:`CellList` is not used if `center` is + smaller than 100 elements. Returns ------- @@ -189,28 +197,43 @@ def rdf(center, atoms, selection=None, interval=(0, 10), bins=100, box=None, # Make histogram of quared distances to save computation time # of sqrt calculation sq_edges = edges**2 - threshold_dist = edges[-1] - cell_size = threshold_dist - disp = [] - for i in range(atoms.stack_depth()): - # Use cell list to efficiently preselect atoms that are in range - # of the desired bin range - cell_list = CellList(atom_coord[i], cell_size, periodic, box[i]) - # 'cell_radius=1' is used in 'get_atoms_in_cells()' - # This is enough to find all atoms that are in the given - # interval (and more), since the size of each cell is as large - # as the last edge of the bins - near_atom_mask = cell_list.get_atoms_in_cells(center[i], as_mask=True) - # Calculate distances of each center to preselected atoms - # for each center - for j in range(center.shape[1]): - dist_box = box[i] if periodic else None - # Calculate squared distances - disp.append(displacement( - center[i,j], atom_coord[i, near_atom_mask[j]], box=dist_box - )) - # Make one array from multiple arrays with different length - disp = np.concatenate(disp) + + if use_cell_list is None: + if len(center) < 100: + use_cell_list = True + else: + use_cell_list = False + + if use_cell_list: + threshold_dist = edges[-1] + cell_size = threshold_dist + disp = [] + for i in range(atoms.stack_depth()): + # Use cell list to efficiently preselect atoms that are in range + # of the desired bin range + cell_list = CellList(atom_coord[i], cell_size, periodic, box[i]) + # 'cell_radius=1' is used in 'get_atoms_in_cells()' + # This is enough to find all atoms that are in the given + # interval (and more), since the size of each cell is as large + # as the last edge of the bins + near_atom_mask = cell_list.get_atoms_in_cells(center[i], as_mask=True) + # Calculate distances of each center to preselected atoms + # for each center + for j in range(center.shape[1]): + dist_box = box[i] if periodic else None + # Calculate squared distances + disp.append(displacement( + center[i,j], atom_coord[i, near_atom_mask[j]], box=dist_box + )) + # Make one array from multiple arrays with different length + disp = np.concatenate(disp) + else: + dist_box = box if periodic else None + disp = np.concatenate([ + displacement(center[:, i ,np.newaxis], atom_coord, box=dist_box) + for i in range(center.shape[1]) + ], axis=1) + sq_distances = vector_dot(disp, disp) hist, _ = np.histogram(sq_distances, bins=sq_edges) diff --git a/tests/structure/test_rdf.py b/tests/structure/test_rdf.py index a515f4a41..43c801748 100644 --- a/tests/structure/test_rdf.py +++ b/tests/structure/test_rdf.py @@ -15,7 +15,8 @@ cannot_import("mdtraj"), reason="MDTraj is not installed" ) -def test_rdf(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf(use_cell_list): """ General test to reproduce oxygen RDF for a box of water""" test_file = TEST_FILE stack = load_structure(test_file) @@ -25,7 +26,7 @@ def test_rdf(): interval = np.array([0, 10]) n_bins = 100 bins, g_r = rdf(oxygen[:, 0].coord, oxygen, interval=interval, - bins=n_bins, periodic=False) + bins=n_bins, periodic=False, use_cell_list=use_cell_list) # Compare with MDTraj import mdtraj @@ -36,23 +37,28 @@ def test_rdf(): r_range=interval/10, n_bins=n_bins, periodic=False) - assert np.allclose(bins, mdt_bins*10) - assert np.allclose(g_r, mdt_g_r, rtol=0.0001) + assert bins.tolist() == pytest.approx((mdt_bins*10).tolist()) + assert g_r.tolist() == pytest.approx(mdt_g_r.tolist(), rel=1e-4) -def test_rdf_bins(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf_bins(use_cell_list): """ Test if RDF produce correct bin ranges """ stack = load_structure(TEST_FILE) center = stack[:, 0] num_bins = 44 bin_range = (0, 11.7) - bins, g_r = rdf(center, stack, bins=num_bins, interval=bin_range) + bins, g_r = rdf( + center, stack, bins=num_bins, interval=bin_range, + use_cell_list=use_cell_list + ) assert(len(bins) == num_bins) assert(bins[0] > bin_range[0]) assert(bins[1] < bin_range[1]) -def test_rdf_with_selection(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf_with_selection(use_cell_list): """ Test if the selection argument of rdf function works as expected """ stack = load_structure(TEST_FILE) @@ -61,17 +67,24 @@ def test_rdf_with_selection(): interval = np.array([0, 10]) n_bins = 100 sele = (stack.atom_name == 'OW') & (stack.res_id >= 3) - bins, g_r = rdf(oxygen[:, 0].coord, stack, selection=sele, - interval=interval, bins=n_bins, periodic=False) - - nosel_bins, nosel_g_r = rdf(oxygen[:, 0].coord, oxygen[:, 1:], - interval=interval, bins=n_bins, periodic=False) + bins, g_r = rdf( + oxygen[:, 0].coord, stack, selection=sele, + interval=interval, bins=n_bins, periodic=False, + use_cell_list=use_cell_list + ) + + nosel_bins, nosel_g_r = rdf( + oxygen[:, 0].coord, oxygen[:, 1:], + interval=interval, bins=n_bins, periodic=False, + use_cell_list=use_cell_list + ) assert np.allclose(bins, nosel_bins) assert np.allclose(g_r, nosel_g_r) -def test_rdf_atom_argument(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf_atom_argument(use_cell_list): """ Test if the first argument allows to use AtomArrayStack """ stack = load_structure(TEST_FILE) @@ -80,16 +93,22 @@ def test_rdf_atom_argument(): interval = np.array([0, 10]) n_bins = 100 - bins, g_r = rdf(oxygen[:, 0], stack, interval=interval, - bins=n_bins, periodic=False) + bins, g_r = rdf( + oxygen[:, 0], stack, interval=interval, + bins=n_bins, periodic=False, use_cell_list=use_cell_list + ) - atom_bins, atoms_g_r = rdf(oxygen[:, 0].coord, stack, interval=interval, - bins=n_bins, periodic=False) + atom_bins, atoms_g_r = rdf( + oxygen[:, 0].coord, stack, interval=interval, + bins=n_bins, periodic=False, + use_cell_list=use_cell_list + ) assert np.allclose(g_r, atoms_g_r) -def test_rdf_multiple_center(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf_multiple_center(use_cell_list): """ Test if the first argument allows to use multiple centers""" stack = load_structure(TEST_FILE) @@ -99,15 +118,21 @@ def test_rdf_multiple_center(): n_bins = 100 # averaging individual calculations - bins1, g_r1 = rdf(oxygen[:, 1].coord, oxygen[:, 2:], interval=interval, - bins=n_bins, periodic=False) - bins2, g_r2 = rdf(oxygen[:, 0].coord, oxygen[:, 2:], interval=interval, - bins=n_bins, periodic=False) + bins1, g_r1 = rdf( + oxygen[:, 1].coord, oxygen[:, 2:], interval=interval, + bins=n_bins, periodic=False, use_cell_list=use_cell_list + ) + bins2, g_r2 = rdf( + oxygen[:, 0].coord, oxygen[:, 2:], interval=interval, + bins=n_bins, periodic=False, use_cell_list=use_cell_list + ) mean = np.mean([g_r1, g_r2], axis=0) # this should give the same result as averaging for oxygen 0 and 1 - bins, g_r = rdf(oxygen[:, 0:2].coord, oxygen[:, 2:], interval=interval, - bins=n_bins, periodic=False) + bins, g_r = rdf( + oxygen[:, 0:2].coord, oxygen[:, 2:], interval=interval, + bins=n_bins, periodic=False, use_cell_list=use_cell_list + ) assert np.allclose(g_r, mean, rtol=0.0001) @@ -116,7 +141,8 @@ def test_rdf_multiple_center(): cannot_import("mdtraj"), reason="MDTraj is not installed" ) -def test_rdf_periodic(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf_periodic(use_cell_list): """ Test if the periodic argument gives the correct results""" test_file = TEST_FILE stack = load_structure(test_file) @@ -125,8 +151,10 @@ def test_rdf_periodic(): oxygen = stack[:, stack.atom_name == 'OW'] interval = np.array([0, 10]) n_bins = 100 - bins, g_r = rdf(oxygen[:, 0].coord, oxygen[:, 1:], interval=interval, - bins=n_bins, periodic=True) + bins, g_r = rdf( + oxygen[:, 0].coord, oxygen[:, 1:], interval=interval, + bins=n_bins, periodic=True, use_cell_list=use_cell_list + ) # Compare with MDTraj import mdtraj @@ -137,33 +165,35 @@ def test_rdf_periodic(): r_range=interval/10, n_bins=n_bins, periodic=True) - assert np.allclose(bins, mdt_bins*10) - assert np.allclose(g_r, mdt_g_r, rtol=0.0001) + assert bins.tolist() == pytest.approx((mdt_bins*10).tolist()) + assert g_r.tolist() == pytest.approx(mdt_g_r.tolist(), rel=1e-4) -def test_rdf_box(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf_box(use_cell_list): """ Test correct use of simulation boxes """ stack = load_structure(TEST_FILE) box = vectors_from_unitcell(1, 1, 1, 90, 90, 90) box = np.repeat(box[np.newaxis, :, :], len(stack), axis=0) # Use box attribute of stack - rdf(stack[:, 0], stack) + rdf(stack[:, 0], stack, use_cell_list=use_cell_list) # Use box attribute and dont fail because stack has no box stack.box = None - rdf(stack[:, 0], stack, box=box) + rdf(stack[:, 0], stack, box=box, use_cell_list=use_cell_list) # Fail if no box is present with pytest.raises(ValueError): - rdf(stack[:, 0], stack) + rdf(stack[:, 0], stack, use_cell_list=use_cell_list) # Fail if box is of wrong size with pytest.raises(ValueError): - rdf(stack[:, 0], stack, box=box[0]) + rdf(stack[:, 0], stack, box=box[0], use_cell_list=use_cell_list) -def test_rdf_normalized(): +@pytest.mark.parametrize("use_cell_list", [None, False, True]) +def test_rdf_normalized(use_cell_list): """ Assert that the RDF tail is normalized to 1""" test_file = TEST_FILE stack = load_structure(test_file) @@ -173,7 +203,9 @@ def test_rdf_normalized(): interval = np.array([0, 5]) n_bins = 100 - bins, g_r = rdf(oxygen.coord, oxygen, interval=interval, - bins=n_bins, periodic=True) + bins, g_r = rdf( + oxygen.coord, oxygen, interval=interval, + bins=n_bins, periodic=True, use_cell_list=use_cell_list + ) assert np.allclose(g_r[-10:], np.ones(10), atol=0.1)