Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Capped distances to evaluate the indices of pairs within a fixed radius #1941

Merged
merged 21 commits into from
Jul 12, 2018
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 210 additions & 1 deletion package/MDAnalysis/lib/distances.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this space might even break the emacs format line thing here?

# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
Expand Down Expand Up @@ -72,6 +72,7 @@
from .mdamath import triclinic_vectors, triclinic_box



# hack to select backend with backend=<backend> kwarg. Note that
# the cython parallel code (prange) in parallel.distances is
# independent from the OpenMP code
Expand Down Expand Up @@ -390,6 +391,214 @@ def self_distance_array(reference, box=None, result=None, backend="serial"):
return distances


def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None):
"""Calculates the pairs and distance within a specified distance

If a *box* is supplied, then a minimum image convention is used
to evaluate the distances.

An automatic guessing of optimized method to calculate the distances is
included in the function. An optional keyword for the method is also
provided. Users can override the method with this functionality.
Currently pkdtree and bruteforce are implemented.


Parameters
-----------
reference : array
reference coordinates array with shape ``reference.shape = (3,)``
or ``reference.shape = (len(reference), 3)``
configuration : array
Configuration coordinate array with shape ``reference.shape = (3,)``
or ``reference.shape = (len(reference), 3)``
max_cutoff : float
Maximum cutoff distance between the reference and configuration
min_cutoff : (optional) float
Minimum cutoff distance between reference and configuration [None]
box : (optional) array or None
The dimensions, if provided, must be provided in the same
The unitcell dimesions for this system format as returned
by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:
``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention
is applied if the box is provided [None]
method : (optional) 'bruteforce' or 'pkdtree' or 'None'
Keyword to override the automatic guessing of method built-in
in the function [None]

Returns
-------
pairs : array
Pair of indices, one from each reference and configuration such that
distance between them is within the ``max_cutoff`` and ``min_cutoff``
pairs[i,j] contains the indices i from reference coordinates, and
j from configuration
distances : array
Distances corresponding to each pair of indices.
d[k] corresponding to the pairs[i,j] gives the distance between
i-th and j-th coordinate in reference and configuration respectively

.. code-block:: python

pairs, distances = capped_distances(reference, coordinates, max_cutoff)
for indx, [a,b] in enumerate(pairs):
coord1 = reference[a]
coord2 = configuration[b]
distance = distances[indx]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simple usage example here

Note
-----
Currently only supports brute force and Periodic KDtree

.. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array'
.. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree'

"""

if box is not None:
boxtype = _box_check(box)
# Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]]
if (boxtype == 'tri_box'):
box = triclinic_vectors(box)
if (boxtype == 'tri_vecs_bad'):
box = triclinic_vectors(triclinic_box(box[0], box[1], box[2]))
method = _determine_method(reference, configuration,
max_cutoff, min_cutoff=min_cutoff,
box=box, method=method)
pairs, dist = method(reference, configuration, max_cutoff,
min_cutoff=min_cutoff, box=box)

return np.array(pairs), np.array(dist)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be np.asarray in case the methods already return an array



def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None):
"""
Switch between different methods based on the the optimized time.
All the rules to select the method based on the input can be
incorporated here.

Returns
-------
Function object based on the rules and specified method

Currently implemented methods are
bruteforce : returns ``_bruteforce_capped``
PKDtree : return ``_pkdtree_capped`

"""
methods = {'bruteforce': _bruteforce_capped,
'pkdtree':_pkdtree_capped}

if method is not None:
return methods[method]

if len(reference) > 5000 and len(configuration) > 5000:
if box is None and reference.shape[0] != 3 and configuration.shape[0] != 3:
min_dim = np.array([reference.min(axis=0),
configuration.min(axis=0)])
max_dim = np.array([reference.max(axis=0),
configuration.max(axis=0)])
size = max_dim.max(axis=0) - min_dim.min(axis=0)
elif box is not None:
if box.shape[0] == 6:
size = box[:3]
else:
size = box.max(axis=0) - box.min(axis=0)

if np.any(size < 10.0*max_cutoff) and len(reference) > 100000 and len(configuration) > 100000:
return methods['bruteforce']
else:
return methods['pkdtree']
return methods['bruteforce']


def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None):
"""
Using naive distance calulations, returns a list
containing the indices with one from each
reference and configuration arrays, such that the distance between
them is less than the specified cutoff distance
"""
pairs, distance = [], []

reference = np.asarray(reference, dtype=np.float32)
configuration = np.asarray(configuration, dtype=np.float32)

if reference.shape == (3, ):
reference = reference[None, :]
if configuration.shape == (3, ):
configuration = configuration[None, :]

_check_array(reference, 'reference')
_check_array(configuration, 'configuration')

for i, coords in enumerate(reference):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this way of iterating over one coordinate set is probably slower than distance_array(ref, conf). I think guess_bonds does it this way to avoid memory problems

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes exactly. The distance_array cannot go beyond 5k particles. So unless we are explicitly limiting the brute force algorithm to 5k, shouldn't the more robust way be a part of the calculation. I agree once we put in the rules, the iteration over each particle would be about half as fast as the distance_array and should be replaced.

dist = distance_array(coords[None, :], configuration, box=box)[0]
if min_cutoff is not None:
idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0]
else:
idx = np.where((dist <= max_cutoff))[0]
for j in idx:
pairs.append((i, j))
distance.append(dist[j])
return pairs, distance


def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None):
""" Capped Distance evaluations using KDtree.

Uses minimum image convention if *box* is specified

Returns:
--------
pairs : list
List of atom indices which are within the specified cutoff distance.
pairs `(i, j)` corresponds to i-th particle in reference and
j-th particle in configuration
distance : list
Distance between two atoms corresponding to the (i, j) indices
in pairs.

"""
from .pkdtree import PeriodicKDTree
from Bio.KDTree import KDTree

pairs, distances = [], []

reference = np.asarray(reference, dtype=np.float32)
configuration = np.asarray(configuration, dtype=np.float32)

if reference.shape == (3, ):
reference = reference[None, :]
if configuration.shape == (3, ):
configuration = configuration[None, :]

_check_array(reference, 'reference')
_check_array(configuration, 'configuration')

# Build The KDTree
if box is not None:
kdtree = PeriodicKDTree(box, bucket_size=10)
else:
kdtree = KDTree(dim=3, bucket_size=10)

kdtree.set_coords(configuration)
# Search for every query point
for idx, centers in enumerate(reference):
kdtree.search(centers, max_cutoff)
indices = kdtree.get_indices()
dist = distance_array(centers.reshape((1, 3)),
configuration[indices], box=box)[0]
if min_cutoff is not None:
mask = np.where(dist > min_cutoff)[0]
dist = dist[mask]
indices = [indices[mask[i]] for i in range(len(mask))]
if len(indices) != 0:
for num, j in enumerate(indices):
pairs.append((idx, j))
distances.append(dist[num])
return pairs, distances


def transform_RtoS(inputcoords, box, backend="serial"):
"""Transform an array of coordinates from real space to S space (aka lambda space)

Expand Down
57 changes: 57 additions & 0 deletions testsuite/MDAnalysisTests/lib/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,12 @@
import numpy as np
from numpy.testing import assert_equal

import itertools

import MDAnalysis as mda

from MDAnalysis.lib.mdamath import triclinic_vectors

def test_transform_StoR_pass():
box = np.array([10, 7, 3, 45, 60, 90], dtype=np.float32)
s = np.array([[0.5, -0.1, 0.5]], dtype=np.float32)
Expand All @@ -37,9 +41,62 @@ def test_transform_StoR_pass():

assert_equal(original_r, test_r)


def test_transform_StoR_fail():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why removing this test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test is included in test_transform_StoR_pass using parametrize. I guess someone modified it recently which got reflected here after I performed a pull from the develop branch.

box = np.array([10, 7, 3, 45, 60, 90], dtype=np.float32)
s = np.array([[0.5, -0.1, 0.5]])

with pytest.raises(TypeError, match='S must be of type float32'):
r = mda.lib.distances.transform_StoR(s, box)


def test_capped_distance_noresults():
point1 = np.array([0.1, 0.1, 0.1], dtype=np.float32)
point2 = np.array([0.95, 0.1, 0.1], dtype=np.float32)

pairs, distances = mda.lib.distances.capped_distance(point1,
point2,
max_cutoff=0.2)

assert_equal(len(pairs), 0)


boxes_1 = (np.array([1, 2, 3, 90, 90, 90], dtype=np.float32), # ortho
np.array([1, 2, 3, 30, 45, 60], dtype=np.float32), # tri_box
triclinic_vectors( # tri_vecs
np.array([1, 2, 3, 90, 90, 45], dtype=np.float32)),
np.array([[0.5, 0.9, 1.9], # tri_vecs_bad
[2.0, 0.4, 0.1],
[0.0, 0.6, 0.5]], dtype=np.float32))

query_1 = (np.array([0.1, 0.1, 0.1], dtype=np.float32),
np.array([[0.1, 0.1, 0.1],
[0.2, 0.1, 0.1]], dtype=np.float32))

method_1 = ('bruteforce', 'pkdtree')

np.random.seed(90003)
points = (np.random.uniform(low=0, high=1.0,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these two lines should be inside the test function

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and now deleted

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, didn't see it. Sorry about that.

size=(100, 3))*(boxes_1[0][:3])).astype(np.float32)


@pytest.mark.parametrize('box, query , method',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if you stack the decorators, you don't need the product call, it does it itself, ie

@pytest.mark.parametrize('box', ...)
@pytest.mark.parametrize('query', ...)
@pytest.mark.parametrize('method', ...)
def test_thing(box, query, method):

itertools.product(boxes_1, query_1, method_1))
def test_capped_distance_checkbrute(box, query, method):
max_cutoff = 0.3
# capped distance should be able to handle array of vectors
# as well as single vectors.
pairs, dist = mda.lib.distances.capped_distance(query,
points,
max_cutoff,
box=box,
method=method)
if(query.shape[0] == 3):
distances = mda.lib.distances.distance_array(query[None, :],
points, box=box)
else:
distances = mda.lib.distances.distance_array(query,
points, box=box)
indices = np.where(distances < max_cutoff)

assert_equal(pairs[:, 1], indices[1])