biotite-dev · padix-key · Apr 19, 2021
diff --git a/src/biotite/structure/rdf.py b/src/biotite/structure/rdf.py
@@ -20,7 +20,7 @@
 
 
 def rdf(center, atoms, selection=None, interval=(0, 10), bins=100, box=None,
-        periodic=False):
+        periodic=False, use_cell_list=None):
     r"""
     Compute the radial distribution function *g(r)* (RDF) for one or
     multiple given central positions based on a given system of
@@ -72,6 +72,14 @@ def rdf(center, atoms, selection=None, interval=(0, 10), bins=100, box=None,
         *(m,3,3)* if atoms is an :class:`AtomArrayStack`, respectively.
     periodic : bool, optional
         Defines if periodic boundary conditions are taken into account.
+    use_cell_list : bool, optional
+        Whether to use a :class:`CellList` for RDF calculation or not.
+        A :class:`CellList` requires constant time to obtain proximate 
+        atoms for each center, while otherwise linear time is required.
+        However, the creation of a :class:`CellList` has a significant
+        overhead if there are only a few considered centers.
+        By default a :class:`CellList` is not used if `center` is
+        smaller than 100 elements.
 
     Returns
     -------
@@ -189,28 +197,43 @@ def rdf(center, atoms, selection=None, interval=(0, 10), bins=100, box=None,
     # Make histogram of quared distances to save computation time
     # of sqrt calculation
     sq_edges = edges**2
-    threshold_dist = edges[-1]
-    cell_size = threshold_dist
-    disp = []
-    for i in range(atoms.stack_depth()):
-        # Use cell list to efficiently preselect atoms that are in range
-        # of the desired bin range
-        cell_list = CellList(atom_coord[i], cell_size, periodic, box[i])
-        # 'cell_radius=1' is used in 'get_atoms_in_cells()'
-        # This is enough to find all atoms that are in the given
-        # interval (and more), since the size of each cell is as large
-        # as the last edge of the bins
-        near_atom_mask = cell_list.get_atoms_in_cells(center[i], as_mask=True)
-        # Calculate distances of each center to preselected atoms
-        # for each center
-        for j in range(center.shape[1]):
-            dist_box = box[i] if periodic else None
-            # Calculate squared distances
-            disp.append(displacement(
-                center[i,j], atom_coord[i, near_atom_mask[j]], box=dist_box
-            ))
-    # Make one array from multiple arrays with different length
-    disp = np.concatenate(disp)
+
+    if use_cell_list is None:
+        if len(center) < 100:
+            use_cell_list = True
+        else:
+            use_cell_list = False
+
+    if use_cell_list:
+        threshold_dist = edges[-1]
+        cell_size = threshold_dist
+        disp = []
+        for i in range(atoms.stack_depth()):
+            # Use cell list to efficiently preselect atoms that are in range
+            # of the desired bin range
+            cell_list = CellList(atom_coord[i], cell_size, periodic, box[i])
+            # 'cell_radius=1' is used in 'get_atoms_in_cells()'
+            # This is enough to find all atoms that are in the given
+            # interval (and more), since the size of each cell is as large
+            # as the last edge of the bins
+            near_atom_mask = cell_list.get_atoms_in_cells(center[i], as_mask=True)
+            # Calculate distances of each center to preselected atoms
+            # for each center
+            for j in range(center.shape[1]):
+                dist_box = box[i] if periodic else None
+                # Calculate squared distances
+                disp.append(displacement(
+                    center[i,j], atom_coord[i, near_atom_mask[j]], box=dist_box
+                ))
+        # Make one array from multiple arrays with different length
+        disp = np.concatenate(disp)
+    else:
+        dist_box = box if periodic else None
+        disp = np.concatenate([
+            displacement(center[:, i ,np.newaxis], atom_coord, box=dist_box)
+            for i in range(center.shape[1])
+        ], axis=1)
+
     sq_distances = vector_dot(disp, disp)
     hist, _ = np.histogram(sq_distances, bins=sq_edges)
 

diff --git a/tests/structure/test_rdf.py b/tests/structure/test_rdf.py
@@ -15,7 +15,8 @@
     cannot_import("mdtraj"),
     reason="MDTraj is not installed"
 )
-def test_rdf():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf(use_cell_list):
     """ General test to reproduce oxygen RDF for a box of water"""
     test_file = TEST_FILE
     stack = load_structure(test_file)
@@ -25,7 +26,7 @@ def test_rdf():
     interval = np.array([0, 10])
     n_bins = 100
     bins, g_r = rdf(oxygen[:, 0].coord, oxygen, interval=interval,
-                    bins=n_bins, periodic=False)
+                    bins=n_bins, periodic=False, use_cell_list=use_cell_list)
 
     # Compare with MDTraj
     import mdtraj
@@ -36,23 +37,28 @@ def test_rdf():
                                            r_range=interval/10, n_bins=n_bins,
                                            periodic=False)
 
-    assert np.allclose(bins, mdt_bins*10)
-    assert np.allclose(g_r, mdt_g_r, rtol=0.0001)
+    assert bins.tolist() == pytest.approx((mdt_bins*10).tolist())
+    assert g_r.tolist() == pytest.approx(mdt_g_r.tolist(), rel=1e-4)
 
 
-def test_rdf_bins():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf_bins(use_cell_list):
     """ Test if RDF produce correct bin ranges """
     stack = load_structure(TEST_FILE)
     center = stack[:, 0]
     num_bins = 44
     bin_range = (0, 11.7)
-    bins, g_r = rdf(center, stack, bins=num_bins, interval=bin_range)
+    bins, g_r = rdf(
+        center, stack, bins=num_bins, interval=bin_range,
+        use_cell_list=use_cell_list
+    )
     assert(len(bins) == num_bins)
     assert(bins[0] > bin_range[0])
     assert(bins[1] < bin_range[1])
 
 
-def test_rdf_with_selection():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf_with_selection(use_cell_list):
     """ Test if the selection argument of rdf function works as expected """
     stack = load_structure(TEST_FILE)
 
@@ -61,17 +67,24 @@ def test_rdf_with_selection():
     interval = np.array([0, 10])
     n_bins = 100
     sele = (stack.atom_name == 'OW') & (stack.res_id >= 3)
-    bins, g_r = rdf(oxygen[:, 0].coord, stack, selection=sele,
-                    interval=interval, bins=n_bins, periodic=False)
-
-    nosel_bins, nosel_g_r = rdf(oxygen[:, 0].coord, oxygen[:, 1:],
-                                interval=interval, bins=n_bins, periodic=False)
+    bins, g_r = rdf(
+        oxygen[:, 0].coord, stack, selection=sele,
+        interval=interval, bins=n_bins, periodic=False,
+        use_cell_list=use_cell_list
+    )
+
+    nosel_bins, nosel_g_r = rdf(
+        oxygen[:, 0].coord, oxygen[:, 1:],
+        interval=interval, bins=n_bins, periodic=False,
+        use_cell_list=use_cell_list
+    )
 
     assert np.allclose(bins, nosel_bins)
     assert np.allclose(g_r, nosel_g_r)
 
 
-def test_rdf_atom_argument():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf_atom_argument(use_cell_list):
     """ Test if the first argument allows to use AtomArrayStack """
     stack = load_structure(TEST_FILE)
 
@@ -80,16 +93,22 @@ def test_rdf_atom_argument():
     interval = np.array([0, 10])
     n_bins = 100
 
-    bins, g_r = rdf(oxygen[:, 0], stack, interval=interval,
-                    bins=n_bins, periodic=False)
+    bins, g_r = rdf(
+        oxygen[:, 0], stack, interval=interval,
+        bins=n_bins, periodic=False, use_cell_list=use_cell_list
+    )
 
-    atom_bins, atoms_g_r = rdf(oxygen[:, 0].coord, stack, interval=interval,
-                    bins=n_bins, periodic=False)
+    atom_bins, atoms_g_r = rdf(
+        oxygen[:, 0].coord, stack, interval=interval,
+        bins=n_bins, periodic=False,
+        use_cell_list=use_cell_list
+    )
 
     assert np.allclose(g_r, atoms_g_r)
 
 
-def test_rdf_multiple_center():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf_multiple_center(use_cell_list):
     """ Test if the first argument allows to use multiple centers"""
     stack = load_structure(TEST_FILE)
 
@@ -99,15 +118,21 @@ def test_rdf_multiple_center():
     n_bins = 100
 
     # averaging individual calculations
-    bins1, g_r1 = rdf(oxygen[:, 1].coord, oxygen[:, 2:], interval=interval,
-                    bins=n_bins, periodic=False)
-    bins2, g_r2 = rdf(oxygen[:, 0].coord, oxygen[:, 2:], interval=interval,
-                    bins=n_bins, periodic=False)
+    bins1, g_r1 = rdf(
+        oxygen[:, 1].coord, oxygen[:, 2:], interval=interval,
+        bins=n_bins, periodic=False, use_cell_list=use_cell_list
+    )
+    bins2, g_r2 = rdf(
+        oxygen[:, 0].coord, oxygen[:, 2:], interval=interval,
+        bins=n_bins, periodic=False, use_cell_list=use_cell_list
+    )
     mean = np.mean([g_r1, g_r2], axis=0)
 
     # this should give the same result as averaging for oxygen 0 and 1
-    bins, g_r = rdf(oxygen[:, 0:2].coord, oxygen[:, 2:], interval=interval,
-                    bins=n_bins, periodic=False)
+    bins, g_r = rdf(
+        oxygen[:, 0:2].coord, oxygen[:, 2:], interval=interval,
+        bins=n_bins, periodic=False, use_cell_list=use_cell_list
+    )
 
     assert np.allclose(g_r, mean, rtol=0.0001)
 
@@ -116,7 +141,8 @@ def test_rdf_multiple_center():
     cannot_import("mdtraj"),
     reason="MDTraj is not installed"
 )
-def test_rdf_periodic():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf_periodic(use_cell_list):
     """ Test if the periodic argument gives the correct results"""
     test_file = TEST_FILE
     stack = load_structure(test_file)
@@ -125,8 +151,10 @@ def test_rdf_periodic():
     oxygen = stack[:, stack.atom_name == 'OW']
     interval = np.array([0, 10])
     n_bins = 100
-    bins, g_r = rdf(oxygen[:, 0].coord, oxygen[:, 1:], interval=interval,
-                    bins=n_bins, periodic=True)
+    bins, g_r = rdf(
+        oxygen[:, 0].coord, oxygen[:, 1:], interval=interval,
+        bins=n_bins, periodic=True, use_cell_list=use_cell_list
+    )
 
     # Compare with MDTraj
     import mdtraj
@@ -137,33 +165,35 @@ def test_rdf_periodic():
                                            r_range=interval/10, n_bins=n_bins,
                                            periodic=True)
 
-    assert np.allclose(bins, mdt_bins*10)
-    assert np.allclose(g_r, mdt_g_r, rtol=0.0001)
+    assert bins.tolist() == pytest.approx((mdt_bins*10).tolist())
+    assert g_r.tolist() == pytest.approx(mdt_g_r.tolist(), rel=1e-4)
 
 
-def test_rdf_box():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf_box(use_cell_list):
     """ Test correct use of simulation boxes """
     stack = load_structure(TEST_FILE)
     box = vectors_from_unitcell(1, 1, 1, 90, 90, 90)
     box = np.repeat(box[np.newaxis, :, :], len(stack), axis=0)
 
     # Use box attribute of stack
-    rdf(stack[:, 0], stack)
+    rdf(stack[:, 0], stack, use_cell_list=use_cell_list)
 
     # Use box attribute and dont fail because stack has no box
     stack.box = None
-    rdf(stack[:, 0], stack, box=box)
+    rdf(stack[:, 0], stack, box=box, use_cell_list=use_cell_list)
 
     # Fail if no box is present
     with pytest.raises(ValueError):
-        rdf(stack[:, 0], stack)
+        rdf(stack[:, 0], stack, use_cell_list=use_cell_list)
 
     # Fail if box is of wrong size
     with pytest.raises(ValueError):
-        rdf(stack[:, 0], stack, box=box[0])
+        rdf(stack[:, 0], stack, box=box[0], use_cell_list=use_cell_list)
 
 
-def test_rdf_normalized():
+@pytest.mark.parametrize("use_cell_list", [None, False, True])
+def test_rdf_normalized(use_cell_list):
     """ Assert that the RDF tail is normalized to 1"""
     test_file = TEST_FILE
     stack = load_structure(test_file)
@@ -173,7 +203,9 @@ def test_rdf_normalized():
     interval = np.array([0, 5])
     n_bins = 100
 
-    bins, g_r = rdf(oxygen.coord, oxygen, interval=interval,
-                    bins=n_bins, periodic=True)
+    bins, g_r = rdf(
+        oxygen.coord, oxygen, interval=interval,
+        bins=n_bins, periodic=True, use_cell_list=use_cell_list
+    )
     assert np.allclose(g_r[-10:], np.ones(10), atol=0.1)