-
Notifications
You must be signed in to change notification settings - Fork 664
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce Capped distances to evaluate the indices of pairs within a fixed radius #1941
Changes from 8 commits
e47ed4d
cd3d29c
23e9868
dd12343
9833a69
e1dce21
8cc25fe
f02868c
61d9c6b
15e40ae
1d0ba7c
57363bb
ae6c2d9
32b2f06
6197b5a
1068578
2218686
cea2c37
7e25ea3
edc5b2d
9d3a2fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*- | ||
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*- | ||
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 | ||
# | ||
# MDAnalysis --- https://www.mdanalysis.org | ||
|
@@ -72,6 +72,7 @@ | |
from .mdamath import triclinic_vectors, triclinic_box | ||
|
||
|
||
|
||
# hack to select backend with backend=<backend> kwarg. Note that | ||
# the cython parallel code (prange) in parallel.distances is | ||
# independent from the OpenMP code | ||
|
@@ -390,6 +391,214 @@ def self_distance_array(reference, box=None, result=None, backend="serial"): | |
return distances | ||
|
||
|
||
def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None): | ||
"""Calculates the pairs and distance within a specified distance | ||
|
||
If a *box* is supplied, then a minimum image convention is used | ||
to evaluate the distances. | ||
|
||
An automatic guessing of optimized method to calculate the distances is | ||
included in the function. An optional keyword for the method is also | ||
provided. Users can override the method with this functionality. | ||
Currently pkdtree and bruteforce are implemented. | ||
|
||
|
||
Parameters | ||
----------- | ||
reference : array | ||
reference coordinates array with shape ``reference.shape = (3,)`` | ||
or ``reference.shape = (len(reference), 3)`` | ||
configuration : array | ||
Configuration coordinate array with shape ``reference.shape = (3,)`` | ||
or ``reference.shape = (len(reference), 3)`` | ||
max_cutoff : float | ||
Maximum cutoff distance between the reference and configuration | ||
min_cutoff : (optional) float | ||
Minimum cutoff distance between reference and configuration [None] | ||
box : (optional) array or None | ||
The dimensions, if provided, must be provided in the same | ||
The unitcell dimesions for this system format as returned | ||
by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: | ||
``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention | ||
is applied if the box is provided [None] | ||
method : (optional) 'bruteforce' or 'pkdtree' or 'None' | ||
Keyword to override the automatic guessing of method built-in | ||
in the function [None] | ||
|
||
Returns | ||
------- | ||
pairs : array | ||
Pair of indices, one from each reference and configuration such that | ||
distance between them is within the ``max_cutoff`` and ``min_cutoff`` | ||
pairs[i,j] contains the indices i from reference coordinates, and | ||
j from configuration | ||
distances : array | ||
Distances corresponding to each pair of indices. | ||
d[k] corresponding to the pairs[i,j] gives the distance between | ||
i-th and j-th coordinate in reference and configuration respectively | ||
|
||
.. code-block:: python | ||
|
||
pairs, distances = capped_distances(reference, coordinates, max_cutoff) | ||
for indx, [a,b] in enumerate(pairs): | ||
coord1 = reference[a] | ||
coord2 = configuration[b] | ||
distance = distances[indx] | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. simple usage example here |
||
Note | ||
----- | ||
Currently only supports brute force and Periodic KDtree | ||
|
||
.. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array' | ||
.. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' | ||
|
||
""" | ||
|
||
if box is not None: | ||
boxtype = _box_check(box) | ||
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]] | ||
if (boxtype == 'tri_box'): | ||
box = triclinic_vectors(box) | ||
if (boxtype == 'tri_vecs_bad'): | ||
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2])) | ||
method = _determine_method(reference, configuration, | ||
max_cutoff, min_cutoff=min_cutoff, | ||
box=box, method=method) | ||
pairs, dist = method(reference, configuration, max_cutoff, | ||
min_cutoff=min_cutoff, box=box) | ||
|
||
return np.array(pairs), np.array(dist) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be |
||
|
||
|
||
def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None): | ||
""" | ||
Switch between different methods based on the the optimized time. | ||
All the rules to select the method based on the input can be | ||
incorporated here. | ||
|
||
Returns | ||
------- | ||
Function object based on the rules and specified method | ||
|
||
Currently implemented methods are | ||
bruteforce : returns ``_bruteforce_capped`` | ||
PKDtree : return ``_pkdtree_capped` | ||
|
||
""" | ||
methods = {'bruteforce': _bruteforce_capped, | ||
'pkdtree':_pkdtree_capped} | ||
|
||
if method is not None: | ||
return methods[method] | ||
|
||
if len(reference) > 5000 and len(configuration) > 5000: | ||
if box is None and reference.shape[0] != 3 and configuration.shape[0] != 3: | ||
min_dim = np.array([reference.min(axis=0), | ||
configuration.min(axis=0)]) | ||
max_dim = np.array([reference.max(axis=0), | ||
configuration.max(axis=0)]) | ||
size = max_dim.max(axis=0) - min_dim.min(axis=0) | ||
elif box is not None: | ||
if box.shape[0] == 6: | ||
size = box[:3] | ||
else: | ||
size = box.max(axis=0) - box.min(axis=0) | ||
|
||
if np.any(size < 10.0*max_cutoff) and len(reference) > 100000 and len(configuration) > 100000: | ||
return methods['bruteforce'] | ||
else: | ||
return methods['pkdtree'] | ||
return methods['bruteforce'] | ||
|
||
|
||
def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None): | ||
""" | ||
Using naive distance calulations, returns a list | ||
containing the indices with one from each | ||
reference and configuration arrays, such that the distance between | ||
them is less than the specified cutoff distance | ||
""" | ||
pairs, distance = [], [] | ||
|
||
reference = np.asarray(reference, dtype=np.float32) | ||
configuration = np.asarray(configuration, dtype=np.float32) | ||
|
||
if reference.shape == (3, ): | ||
reference = reference[None, :] | ||
if configuration.shape == (3, ): | ||
configuration = configuration[None, :] | ||
|
||
_check_array(reference, 'reference') | ||
_check_array(configuration, 'configuration') | ||
|
||
for i, coords in enumerate(reference): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this way of iterating over one coordinate set is probably slower than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes exactly. The |
||
dist = distance_array(coords[None, :], configuration, box=box)[0] | ||
if min_cutoff is not None: | ||
idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0] | ||
else: | ||
idx = np.where((dist <= max_cutoff))[0] | ||
for j in idx: | ||
pairs.append((i, j)) | ||
distance.append(dist[j]) | ||
return pairs, distance | ||
|
||
|
||
def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None): | ||
""" Capped Distance evaluations using KDtree. | ||
|
||
Uses minimum image convention if *box* is specified | ||
|
||
Returns: | ||
-------- | ||
pairs : list | ||
List of atom indices which are within the specified cutoff distance. | ||
pairs `(i, j)` corresponds to i-th particle in reference and | ||
j-th particle in configuration | ||
distance : list | ||
Distance between two atoms corresponding to the (i, j) indices | ||
in pairs. | ||
|
||
""" | ||
from .pkdtree import PeriodicKDTree | ||
from Bio.KDTree import KDTree | ||
|
||
pairs, distances = [], [] | ||
|
||
reference = np.asarray(reference, dtype=np.float32) | ||
configuration = np.asarray(configuration, dtype=np.float32) | ||
|
||
if reference.shape == (3, ): | ||
reference = reference[None, :] | ||
if configuration.shape == (3, ): | ||
configuration = configuration[None, :] | ||
|
||
_check_array(reference, 'reference') | ||
_check_array(configuration, 'configuration') | ||
|
||
# Build The KDTree | ||
if box is not None: | ||
kdtree = PeriodicKDTree(box, bucket_size=10) | ||
else: | ||
kdtree = KDTree(dim=3, bucket_size=10) | ||
|
||
kdtree.set_coords(configuration) | ||
# Search for every query point | ||
for idx, centers in enumerate(reference): | ||
kdtree.search(centers, max_cutoff) | ||
indices = kdtree.get_indices() | ||
dist = distance_array(centers.reshape((1, 3)), | ||
configuration[indices], box=box)[0] | ||
if min_cutoff is not None: | ||
mask = np.where(dist > min_cutoff)[0] | ||
dist = dist[mask] | ||
indices = [indices[mask[i]] for i in range(len(mask))] | ||
if len(indices) != 0: | ||
for num, j in enumerate(indices): | ||
pairs.append((idx, j)) | ||
distances.append(dist[num]) | ||
return pairs, distances | ||
|
||
|
||
def transform_RtoS(inputcoords, box, backend="serial"): | ||
"""Transform an array of coordinates from real space to S space (aka lambda space) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,8 +25,12 @@ | |
import numpy as np | ||
from numpy.testing import assert_equal | ||
|
||
import itertools | ||
|
||
import MDAnalysis as mda | ||
|
||
from MDAnalysis.lib.mdamath import triclinic_vectors | ||
|
||
def test_transform_StoR_pass(): | ||
box = np.array([10, 7, 3, 45, 60, 90], dtype=np.float32) | ||
s = np.array([[0.5, -0.1, 0.5]], dtype=np.float32) | ||
|
@@ -37,9 +41,62 @@ def test_transform_StoR_pass(): | |
|
||
assert_equal(original_r, test_r) | ||
|
||
|
||
def test_transform_StoR_fail(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why removing this test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test is included in |
||
box = np.array([10, 7, 3, 45, 60, 90], dtype=np.float32) | ||
s = np.array([[0.5, -0.1, 0.5]]) | ||
|
||
with pytest.raises(TypeError, match='S must be of type float32'): | ||
r = mda.lib.distances.transform_StoR(s, box) | ||
|
||
|
||
def test_capped_distance_noresults(): | ||
point1 = np.array([0.1, 0.1, 0.1], dtype=np.float32) | ||
point2 = np.array([0.95, 0.1, 0.1], dtype=np.float32) | ||
|
||
pairs, distances = mda.lib.distances.capped_distance(point1, | ||
point2, | ||
max_cutoff=0.2) | ||
|
||
assert_equal(len(pairs), 0) | ||
|
||
|
||
boxes_1 = (np.array([1, 2, 3, 90, 90, 90], dtype=np.float32), # ortho | ||
np.array([1, 2, 3, 30, 45, 60], dtype=np.float32), # tri_box | ||
triclinic_vectors( # tri_vecs | ||
np.array([1, 2, 3, 90, 90, 45], dtype=np.float32)), | ||
np.array([[0.5, 0.9, 1.9], # tri_vecs_bad | ||
[2.0, 0.4, 0.1], | ||
[0.0, 0.6, 0.5]], dtype=np.float32)) | ||
|
||
query_1 = (np.array([0.1, 0.1, 0.1], dtype=np.float32), | ||
np.array([[0.1, 0.1, 0.1], | ||
[0.2, 0.1, 0.1]], dtype=np.float32)) | ||
|
||
method_1 = ('bruteforce', 'pkdtree') | ||
|
||
np.random.seed(90003) | ||
points = (np.random.uniform(low=0, high=1.0, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these two lines should be inside the test function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and now deleted There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, didn't see it. Sorry about that. |
||
size=(100, 3))*(boxes_1[0][:3])).astype(np.float32) | ||
|
||
|
||
@pytest.mark.parametrize('box, query , method', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if you stack the decorators, you don't need the product call, it does it itself, ie @pytest.mark.parametrize('box', ...)
@pytest.mark.parametrize('query', ...)
@pytest.mark.parametrize('method', ...)
def test_thing(box, query, method): |
||
itertools.product(boxes_1, query_1, method_1)) | ||
def test_capped_distance_checkbrute(box, query, method): | ||
max_cutoff = 0.3 | ||
# capped distance should be able to handle array of vectors | ||
# as well as single vectors. | ||
pairs, dist = mda.lib.distances.capped_distance(query, | ||
points, | ||
max_cutoff, | ||
box=box, | ||
method=method) | ||
if(query.shape[0] == 3): | ||
distances = mda.lib.distances.distance_array(query[None, :], | ||
points, box=box) | ||
else: | ||
distances = mda.lib.distances.distance_array(query, | ||
points, box=box) | ||
indices = np.where(distances < max_cutoff) | ||
|
||
assert_equal(pairs[:, 1], indices[1]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this space might even break the emacs format line thing here?