Skip to content

Commit

Permalink
Merge pull request #1941 from ayushsuhane/cappedfeature
Browse files Browse the repository at this point in the history
Introduce Capped distances to evaluate the indices of pairs within a fixed radius
  • Loading branch information
richardjgowers authored Jul 12, 2018
2 parents 28404e2 + 9d3a2fc commit 4be325c
Show file tree
Hide file tree
Showing 3 changed files with 310 additions and 1 deletion.
4 changes: 3 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ The rules for this file:

------------------------------------------------------------------------------
??/??/18 tylerjereddy, richardjgowers, palnabarun, orbeckst, kain88-de, zemanj,
VOD555, davidercruz, jbarnoud
VOD555, davidercruz, jbarnoud, ayushsuhane

* 0.18.1

Enhancements

* Added lib.distances.capped_distance function to quickly calculate all distances
up to a given maximum distance (PR #1941)
* Added a rotation coordinate transformation (PR #1937)
* Added a box centering trajectory transformation (PR #1946)
* Added a on-the-fly trajectory transformations API and a coordinate translation
Expand Down
210 changes: 210 additions & 0 deletions package/MDAnalysis/lib/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
from .mdamath import triclinic_vectors, triclinic_box



# hack to select backend with backend=<backend> kwarg. Note that
# the cython parallel code (prange) in parallel.distances is
# independent from the OpenMP code
Expand Down Expand Up @@ -396,6 +397,215 @@ def self_distance_array(reference, box=None, result=None, backend="serial"):
return distances


def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None):
"""Calculates the pairs and distance within a specified distance
If a *box* is supplied, then a minimum image convention is used
to evaluate the distances.
An automatic guessing of optimized method to calculate the distances is
included in the function. An optional keyword for the method is also
provided. Users can override the method with this functionality.
Currently pkdtree and bruteforce are implemented.
Parameters
-----------
reference : array
reference coordinates array with shape ``reference.shape = (3,)``
or ``reference.shape = (len(reference), 3)``
configuration : array
Configuration coordinate array with shape ``reference.shape = (3,)``
or ``reference.shape = (len(reference), 3)``
max_cutoff : float
Maximum cutoff distance between the reference and configuration
min_cutoff : (optional) float
Minimum cutoff distance between reference and configuration [None]
box : (optional) array or None
The dimensions, if provided, must be provided in the same
The unitcell dimesions for this system format as returned
by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:
``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention
is applied if the box is provided [None]
method : (optional) 'bruteforce' or 'pkdtree' or 'None'
Keyword to override the automatic guessing of method built-in
in the function [None]
Returns
-------
pairs : array
Pair of indices, one from each reference and configuration such that
distance between them is within the ``max_cutoff`` and ``min_cutoff``
pairs[i,j] contains the indices i from reference coordinates, and
j from configuration
distances : array
Distances corresponding to each pair of indices.
d[k] corresponding to the pairs[i,j] gives the distance between
i-th and j-th coordinate in reference and configuration respectively
.. code-block:: python
pairs, distances = capped_distances(reference, coordinates, max_cutoff)
for indx, [a,b] in enumerate(pairs):
coord1 = reference[a]
coord2 = configuration[b]
distance = distances[indx]
Note
-----
Currently only supports brute force and Periodic KDtree
.. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array'
.. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree'
"""

if box is not None:
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
method = _determine_method(reference, configuration,
max_cutoff, min_cutoff=min_cutoff,
box=box, method=method)
pairs, dist = method(reference, configuration, max_cutoff,
min_cutoff=min_cutoff, box=box)

return np.asarray(pairs), np.asarray(dist)


def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None):
"""
Switch between different methods based on the the optimized time.
All the rules to select the method based on the input can be
incorporated here.
Returns
-------
Function object based on the rules and specified method
Currently implemented methods are
bruteforce : returns ``_bruteforce_capped``
PKDtree : return ``_pkdtree_capped`
"""
methods = {'bruteforce': _bruteforce_capped,
'pkdtree': _pkdtree_capped}

if method is not None:
return methods[method]

if len(reference) > 5000 and len(configuration) > 5000:
if box is None and reference.shape[0] != 3 and configuration.shape[0] != 3:
min_dim = np.array([reference.min(axis=0),
configuration.min(axis=0)])
max_dim = np.array([reference.max(axis=0),
configuration.max(axis=0)])
size = max_dim.max(axis=0) - min_dim.min(axis=0)
elif box is not None:
if box.shape[0] == 6:
size = box[:3]
else:
size = box.max(axis=0) - box.min(axis=0)

if (np.any(size < 10.0*max_cutoff) and
len(reference) > 100000 and
len(configuration) > 100000):
return methods['bruteforce']
else:
return methods['pkdtree']
return methods['bruteforce']


def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None):
"""
Using naive distance calulations, returns a list
containing the indices with one from each
reference and configuration arrays, such that the distance between
them is less than the specified cutoff distance
"""
pairs, distance = [], []

reference = np.asarray(reference, dtype=np.float32)
configuration = np.asarray(configuration, dtype=np.float32)

if reference.shape == (3, ):
reference = reference[None, :]
if configuration.shape == (3, ):
configuration = configuration[None, :]

_check_array(reference, 'reference')
_check_array(configuration, 'configuration')

for i, coords in enumerate(reference):
dist = distance_array(coords[None, :], configuration, box=box)[0]
if min_cutoff is not None:
idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0]
else:
idx = np.where((dist <= max_cutoff))[0]
for j in idx:
pairs.append((i, j))
distance.append(dist[j])
return pairs, distance


def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None):
""" Capped Distance evaluations using KDtree.
Uses minimum image convention if *box* is specified
Returns:
--------
pairs : list
List of atom indices which are within the specified cutoff distance.
pairs `(i, j)` corresponds to i-th particle in reference and
j-th particle in configuration
distance : list
Distance between two atoms corresponding to the (i, j) indices
in pairs.
"""
from .pkdtree import PeriodicKDTree
from Bio.KDTree import KDTree

pairs, distances = [], []

reference = np.asarray(reference, dtype=np.float32)
configuration = np.asarray(configuration, dtype=np.float32)

if reference.shape == (3, ):
reference = reference[None, :]
if configuration.shape == (3, ):
configuration = configuration[None, :]

_check_array(reference, 'reference')
_check_array(configuration, 'configuration')

# Build The KDTree
if box is not None:
kdtree = PeriodicKDTree(box, bucket_size=10)
else:
kdtree = KDTree(dim=3, bucket_size=10)

kdtree.set_coords(configuration)
# Search for every query point
for idx, centers in enumerate(reference):
kdtree.search(centers, max_cutoff)
indices = kdtree.get_indices()
dist = distance_array(centers.reshape((1, 3)),
configuration[indices], box=box)[0]
if min_cutoff is not None:
mask = np.where(dist > min_cutoff)[0]
dist = dist[mask]
indices = [indices[mask[i]] for i in range(len(mask))]
if len(indices) != 0:
for num, j in enumerate(indices):
pairs.append((idx, j))
distances.append(dist[num])
return pairs, distances


def transform_RtoS(inputcoords, box, backend="serial"):
"""Transform an array of coordinates from real space to S space (aka lambda space)
Expand Down
97 changes: 97 additions & 0 deletions testsuite/MDAnalysisTests/lib/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,12 @@
import numpy as np
from numpy.testing import assert_equal, assert_almost_equal

import itertools

import MDAnalysis as mda

from MDAnalysis.lib.mdamath import triclinic_vectors, triclinic_box

@pytest.mark.parametrize('coord_dtype', (np.float32, np.float64))
def test_transform_StoR_pass(coord_dtype):
box = np.array([10, 7, 3, 45, 60, 90], dtype=np.float32)
Expand All @@ -39,6 +43,99 @@ def test_transform_StoR_pass(coord_dtype):
assert_equal(original_r, test_r)


def test_capped_distance_noresults():
point1 = np.array([0.1, 0.1, 0.1], dtype=np.float32)
point2 = np.array([0.95, 0.1, 0.1], dtype=np.float32)

pairs, distances = mda.lib.distances.capped_distance(point1,
point2,
max_cutoff=0.2)

assert_equal(len(pairs), 0)


npoints_1 = (1, 100)

boxes_1 = (np.array([1, 2, 3, 90, 90, 90], dtype=np.float32), # ortho
np.array([1, 2, 3, 30, 45, 60], dtype=np.float32), # tri_box
triclinic_vectors( # tri_vecs
np.array([1, 2, 3, 90, 90, 45], dtype=np.float32)),
np.array([[0.5, 0.9, 1.9], # tri_vecs_bad
[2.0, 0.4, 0.1],
[0.0, 0.6, 0.5]], dtype=np.float32),
None, # Non Periodic
)


query_1 = (np.array([0.1, 0.1, 0.1], dtype=np.float32),
np.array([[0.1, 0.1, 0.1],
[0.2, 0.1, 0.1]], dtype=np.float32))

method_1 = ('bruteforce', 'pkdtree')

min_cutoff_1 = (None, 0.1)


@pytest.mark.parametrize('npoints', npoints_1)
@pytest.mark.parametrize('box', boxes_1)
@pytest.mark.parametrize('query', query_1)
@pytest.mark.parametrize('method', method_1)
@pytest.mark.parametrize('min_cutoff', min_cutoff_1)
def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff):
np.random.seed(90003)
points = (np.random.uniform(low=0, high=1.0,
size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32)
max_cutoff = 0.3
# capped distance should be able to handle array of vectors
# as well as single vectors.
pairs, dist = mda.lib.distances.capped_distance(query,
points,
max_cutoff,
min_cutoff=min_cutoff,
box=box,
method=method)
if pairs.shape != (0, ):
found_pairs = pairs[:, 1]
else:
found_pairs = list()

if(query.shape[0] == 3):
query = query.reshape((1, 3))

distances = mda.lib.distances.distance_array(query,
points, box=box)

if min_cutoff is None:
min_cutoff = 0.
indices = np.where((distances < max_cutoff) & (distances > min_cutoff))

assert_equal(np.sort(found_pairs, axis=0), np.sort(indices[1], axis=0))


@pytest.mark.parametrize('npoints,cutoff,meth',
[(1, 0.02, '_bruteforce_capped'),
(1, 0.2, '_bruteforce_capped'),
(6000, 0.02, '_pkdtree_capped'),
(6000, 0.2, '_pkdtree_capped'),
(200000, 0.02, '_pkdtree_capped'),
(200000, 0.2, '_bruteforce_capped')])
def test_method_selection(npoints, cutoff, meth):
np.random.seed(90003)
box = np.array([1, 1, 1, 90, 90, 90], dtype=np.float32)
points = (np.random.uniform(low=0, high=1.0,
size=(npoints, 3)) * (box[:3])).astype(np.float32)
if box is not None:
boxtype = mda.lib.distances._box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
method = mda.lib.distances._determine_method(points, points,
cutoff, box=box)
assert_equal(method.__name__, meth)


# different boxlengths to shift a coordinate
shifts = [
((0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0)), # no shifting
Expand Down

0 comments on commit 4be325c

Please sign in to comment.