Skip to content

Commit

Permalink
Refactor LookupDistDotBuilder
Browse files Browse the repository at this point in the history
- Use composition instead of inheritance to allow boundaries to be
  defined later
- Add (questionable) helper methods for determining boundaries
  • Loading branch information
clbarnes committed Apr 15, 2021
1 parent 978a99d commit c6b19c6
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 29 deletions.
135 changes: 120 additions & 15 deletions navis/nbl/smat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@
from collections import Counter
from concurrent.futures import ProcessPoolExecutor
from typing import Iterator, Sequence, Callable, List, Iterable, Any, Tuple
import logging

import numpy as np
import pandas as pd

from ..config import logger
from ..core.neurons import Dotprops

logger = logging.getLogger(__name__)

DEFAULT_SEED = 1991

epsilon = sys.float_info.epsilon
Expand Down Expand Up @@ -276,13 +274,11 @@ def dist_dot_alpha(q: Dotprops, t: Dotprops):
return [dist, dot * np.sqrt(alpha)]


class LookupDistDotBuilder(LookupNdBuilder):
class LookupDistDotBuilder:
def __init__(
self,
dotprops,
matching_sets,
dist_boundaries,
dot_boundaries,
nonmatching=None,
use_alpha=False,
seed=DEFAULT_SEED,
Expand All @@ -295,6 +291,10 @@ def __init__(
2. The dot products of direction vectors around those points,
optionally scaled by the colinearity ``alpha``.
The boundaries for the bins for each of these two values need to be set.
Use the ``.calc_(dist|dot)_boundaries`` methods to estimate values
based on the data, or the ``.set_(dist|dot)_boundaries`` to use your own.
Parameters
----------
dotprops : dict or list of Dotprops
Expand Down Expand Up @@ -334,18 +334,123 @@ def __init__(
Non-matching pairs are drawn at random using this seed,
by default {DEFAULT_SEED}
"""
fn = dist_dot_alpha if use_alpha else dist_dot
super().__init__(
dotprops,
matching_sets,
[wrap_bounds(dist_boundaries, 0), wrap_bounds(dot_boundaries, 0, 1)],
fn,
nonmatching,
seed,
self.dotprops = dotprops
self.matching_sets = matching_sets
self._dist_boundaries = None
self._dot_boundaries = None
self.fn = dist_dot_alpha if use_alpha else dist_dot
self.nonmatching = nonmatching
self.seed = seed

@property
def dist_boundaries(self):
return self._dist_boundaries

def set_dist_boundaries(self, bounds):
"""Set the boundaries for point match distance bins.
``numpy.geomspace`` is helpful if you know approximately
what the upper bound of the lowest bin and the lower bound of the
highest bin should be.
Parameters
----------
bounds : array-like of float
Bin boundaries to set (may be expanded to cover full domain).
Should not go below 0.
Returns
-------
np.ndarray
The (possibly expanded) bin boundaries which will be used.
"""
self._dist_boundaries = wrap_bounds(bounds, 0)
return self._dist_boundaries

def calc_dist_boundaries(self, n_bins, scale=10):
"""Estimate appropriate point match distance bins from the scale of the data.
The lower bound of the highest bin will be estimated as ``1/scale`` the size
of the diagonal of the axis aligned bounding box of the smallest dotprop:
there is unlikely to be any value in a point match
at a distance the same order of magnitude as a neuron's size.
Each subsequent lower bound will be half of that bin's upper bound;
therefore the upper bound of the lowest bin will be
``highest_lower / scale ** (n_bins - 1)``.
These estimates are quite conservative and may not yield good results for your use case,
but you may want to use them as a guide for your own automatically-determined boundaries.
Parameters
----------
n_bins : int
Number of bins to create.
scale : float, optional
The multiplication from one bound to another, default 10.
Returns
-------
np.ndarray
The bin boundaries which will be used (length ``n_bins + 1``).
"""

smallest_diag = np.inf
for dp in self.dotprops:
smallest_diag = min(
smallest_diag, np.linalg.norm(np.ptp(dp.points, axis=0))
)
highest_lower = smallest_diag / scale
return self.set_dist_boundaries(
highest_lower / scale ** np.arange(n_bins - 1)[::-1]
)

@property
def dot_boundaries(self):
return self._dot_boundaries

def set_dot_boundaries(self, bounds):
"""Set the boundaries for point match tangent dot product bins.
Parameters
----------
bounds : array-like of float
Bin boundaries to set (may be expanded to cover full domain).
Should not go below 0 or above 1.
Returns
-------
np.ndarray
The (possibly expanded) bin boundaries which will be used.
"""
self._dot_boundaries = wrap_bounds(bounds, 0, 1)
return self._dot_boundaries

def calc_dot_boundaries(self, n_bins):
"""Linearly interpolate bin boundaries between 0 and 1.
Parameters
----------
n_bins : int
Number of bins to use.
Returns
-------
np.ndarray
Bin boundaries which will be used (length ``n_bins + 1``)
"""
return self.set_dot_boundaries(np.linspace(0, 1, n_bins + 1))

def build(self, threads=None) -> Lookup2d:
return Lookup2d(*self._build(threads))
builder = LookupNdBuilder(
self.dotprops,
self.matching_sets,
[self.dist_boundaries, self.dot_boundaries],
self.fn,
self.nonmatching,
self.seed
)
return Lookup2d(*builder._build(threads))


class LookupNd:
Expand Down
42 changes: 28 additions & 14 deletions tests/test_nbl/test_smat.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,27 @@ def test_lookup2d_roundtrip(strict):
assert np.allclose(b1, b2)


def prepare_lookupdistdotbuilder(neurons, alpha=False, k=5):
k = 5
def test_calc_dist_boundaries(neurons):
dotprops = [Dotprops(n.nodes[["x", "y", "z"]], k=5) for n in neurons]
builder = LookupDistDotBuilder(dotprops, [])
n_bins = 5
bounds = builder.calc_dist_boundaries(n_bins)
assert bounds.shape == (n_bins + 1,)
assert bounds[0] == -np.inf
assert bounds[-1] == np.inf


def test_calc_dot_boundaries(neurons):
dotprops = [Dotprops(n.nodes[["x", "y", "z"]], k=5) for n in neurons]
builder = LookupDistDotBuilder(dotprops, [])
n_bins = 5
bounds = builder.calc_dot_boundaries(n_bins)
assert bounds.shape == (n_bins + 1,)
assert bounds[0] == -np.inf
assert bounds[-1] == np.inf


def prepare_lookupdistdotbuilder(neurons, alpha=False, k=5, with_bounds=False):
dotprops = [Dotprops(n.nodes[["x", "y", "z"]], k) for n in neurons]
n_orig = len(dotprops)

Expand All @@ -135,29 +154,24 @@ def prepare_lookupdistdotbuilder(neurons, alpha=False, k=5):
# original neurons should all not match each other
nonmatching = list(range(n_orig))

# max distance between any 2 points in the data
# for calculating dist boundaries
max_dist = np.linalg.norm(
np.ptp(
np.concatenate([dp.points for dp in dotprops], axis=0), axis=0,
)
)

return LookupDistDotBuilder(
builder = LookupDistDotBuilder(
dotprops,
matching_sets,
np.geomspace(10, max_dist, 5)[:-1],
np.linspace(0, 1, 5),
nonmatching,
alpha,
seed=SEED + 1,
)
if with_bounds:
n_bins = 5
builder.calc_dist_boundaries(n_bins)
builder.calc_dot_boundaries(n_bins)
return builder


@pytest.mark.parametrize(["threads"], [(None,), (0,), (2,)])
@pytest.mark.parametrize(["alpha"], [(True,), (False,)])
def test_lookupdistdotbuilder_builds(neurons, threads, alpha):
builder = prepare_lookupdistdotbuilder(neurons, alpha)
builder = prepare_lookupdistdotbuilder(neurons, alpha, with_bounds=True)
lookup = builder.build(threads)
# `pytest -rP` to see output
print(lookup.to_dataframe())

0 comments on commit c6b19c6

Please sign in to comment.