diff --git a/benchmarks/bench_filter_cluster.py b/benchmarks/bench_filter_cluster.py
deleted file mode 100644
index 2d0a6fe09..000000000
--- a/benchmarks/bench_filter_cluster.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import timeit
-import numpy as np
-
-from kbmod.filters.clustering_filters import *
-from kbmod.result_list import ResultList, ResultRow
-from kbmod.search import *
-
-
-def _make_data(objs, times):
- """Create a ResultList for the given objects.
-
- Parameters
- ----------
- obj : list of lists
- A list where each element specifies a Trajectory
- as [x, y, xv, yv].
-
- Returns
- -------
- ResultList
- """
- num_times = len(times)
- rs = ResultList(times, track_filtered=True)
- for x in objs:
- t = Trajectory()
- t.x = x[0]
- t.y = x[1]
- t.vx = x[2]
- t.vy = x[3]
- t.lh = 100.0
-
- row = ResultRow(t, num_times)
- rs.append_result(row)
- return rs
-
-
-def run_index_benchmark(filter, rs):
- tmr = timeit.Timer(stmt="filter.keep_indices(rs)", globals=locals())
- res_time = np.mean(tmr.repeat(repeat=10, number=20))
- return res_time
-
-
-def run_all_benchmarks():
- times = [(10.0 + 0.1 * float(i)) for i in range(20)]
- rs1 = _make_data(
- [
- [10, 11, 1, 2],
- [10, 11, 1000, -1000],
- [10, 11, 0.0, 0.0],
- [25, 24, 1.0, 1.0],
- [25, 26, 10.0, 10.0],
- [10, 12, 5, 5],
- ],
- times,
- )
-
- print("Method | Time")
- print("-" * 40)
-
- f1 = DBSCANFilter("position", 0.025, 100, 100, [0, 50], [0, 1.5], times)
-
- res_time = run_index_benchmark(f1, rs1)
- print(f"position - 2 clusters | {res_time:10.7f}")
-
- f2 = DBSCANFilter("position", 0.025, 100, 100, [0, 50], [0, 1.5], times)
-
- res_time = run_index_benchmark(f2, rs1)
- print(f"position - 4 clusters | {res_time:10.7f}")
-
- f3 = DBSCANFilter("position", 0.025, 1000, 1000, [0, 50], [0, 1.5], times)
-
- res_time = run_index_benchmark(f3, rs1)
- print(f"position - 1 cluster | {res_time:10.7f}")
-
- rs2 = _make_data(
- [
- [10, 11, 1, 2],
- [10, 11, 1000, -1000],
- [10, 11, 1.0, 2.1],
- [55, 54, 1.0, 1.0],
- [55, 56, 10.0, 10.0],
- [10, 12, 4.1, 8],
- ],
- times,
- )
-
- f4 = DBSCANFilter("all", 0.025, 100, 100, [0, 100], [0, 1.5], times)
-
- res_time = run_index_benchmark(f4, rs2)
- print(f"all - 5 clusters | {res_time:10.7f}")
-
- # Larger eps is 3 clusters.
- f5 = DBSCANFilter("all", 0.25, 100, 100, [0, 100], [0, 1.5], times)
-
- res_time = run_index_benchmark(f5, rs2)
- print(f"all - 3 clusters | {res_time:10.7f}")
-
- # Larger scale is 3 clusters.
- f6 = DBSCANFilter("all", 0.025, 100, 100, [0, 5000], [0, 1.5], times)
-
- res_time = run_index_benchmark(f6, rs2)
- print(f"all (larger) - 3 clusters | {res_time:10.7f}")
-
- rs3 = _make_data(
- [
- [10, 11, 1, 2],
- [10, 11, 2, 5],
- [10, 11, 1.01, 1.99],
- [21, 23, 1, 2],
- [21, 23, -10, -10],
- [5, 10, 6, 1],
- [5, 10, 1, 2],
- ],
- times,
- )
-
- f7 = DBSCANFilter("mid_position", 0.1, 20, 20, [0, 100], [0, 1.5], times)
- res_time = run_index_benchmark(f7, rs3)
- print(f"mid_position | {res_time:10.7f}")
-
-
-if __name__ == "__main__":
- run_all_benchmarks()
diff --git a/docs/source/user_manual/results_filtering.rst b/docs/source/user_manual/results_filtering.rst
index 5171af502..9eb08e7f7 100644
--- a/docs/source/user_manual/results_filtering.rst
+++ b/docs/source/user_manual/results_filtering.rst
@@ -54,25 +54,23 @@ In the extreme case imagine a bright object centered at (10, 15) and moving at (
But how do we tell which trajectories are "close"? If we only look at the pixel locations at a given time point (event t=0), we might combined two trajectories with very different velocities that happen to pass near the same pixel at that time. Even if this is not likely for real objects, we might merge a real object with a noisy false detection.
-The `scikit-learn `_ ``DBSCAN`` algorithm performs clustering the trajectories. The algorithm can cluster the results based on a combination of position, velocity, and angle as specified by the parameter cluster_type, which can take on the values of:
+The `scikit-learn `_ ``DBSCAN`` algorithm performs clustering the trajectories. The algorithm can cluster the results based on a combination of position and velocity angle as specified by the parameter ``cluster_type``, which can take on the values of:
-* ``all`` - Use scaled x position, scaled y position, scaled velocity, and scaled angle as coordinates for clustering.
-* ``position`` - Use only the trajectory's scaled (x, y) position at the first timestep for clustering.
-* ``position_unscaled`` - Use only trajctory's (x, y) position at the first timestep for clustering.
-* ``mid_position`` - Use the (scaled) predicted position at the median time as coordinates for clustering.
-* ``mid_position_unscaled`` - Use the predicted position at the median time as coordinates for clustering.
-* ``start_end_position`` - Use the (scaled) predicted positions at the start and end times as coordinates for clustering.
-* ``start_end_position_unscaled`` - Use the predicted positions at the start and end times as coordinates for clustering.
+* ``all`` or ``pos_vel`` - Use a trajectory's position at the first time stamp (x, y) and velocity (vx, vy) for filtering
+* ``position`` or ``start_position`` - Use only trajctory's (x, y) position at the first timestep for clustering.
+* ``mid_position`` - Use the predicted position at the median time as coordinates for clustering.
+* ``start_end_position`` - Use the predicted positions at the start and end times as coordinates for clustering.
Most of the clustering approaches rely on predicted positions at different times. For example midpoint-based clustering will encode each trajectory `(x0, y0, xv, yv)` as a 2-dimensional point `(x0 + tm * xv, y0 + tm + yv)` where `tm` is the median time. Thus trajectories only need to be close at time=`tm` to be merged into a single trajectory. In contrast the start and eng based clustering will encode the same trajectory as a 4-dimensional point (x0, y0, x0 + te * xv, y0 + te + yv)` where `te` is the last time. Thus the points will need to be close at both time=0.0 and time=`te` to be merged into a single result.
-Each of the positional based clusterings have both a scaled and unscaled version. This impacts how DBSCAN interprets distances. In the scaled version all values are divided by the width of the corresponding dimension to normalize the values. This maps points within the image to [0, 1], so an `eps` value of 0.01 might make sense. In contrast the unscaled versions do not perform normalization. The distances between two trajectories is measured in pixels. Here an `eps` value of 10 (for 10 pixels) might be better.
+The way DBSCAN computes distances between the trajectories depends on the encoding used. For positional encodings, such as ``position``, ``mid_position``, and ``start_end_position``, the distance is measured directly in pixels. The ``all`` encoding behaves somewhat similarly. However since it combines positions and velocities (or change in pixels per day), they are not actually in the same space.
Relevant clustering parameters include:
* ``cluster_type`` - The types of predicted values to use when determining which trajectories should be clustered together, including position, velocity, and angles (if ``do_clustering = True``). Must be one of all, position, or mid_position.
* ``do_clustering`` - Cluster the resulting trajectories to remove duplicates.
-* ``eps`` - The distance threshold used by DBSCAN.
+* ``cluster_eps`` - The distance threshold used by DBSCAN.
+* ``cluster_v_scale`` - The relative scale between velocity differences and positional differences in ``all`` clustering.
See Also
________
diff --git a/docs/source/user_manual/search_params.rst b/docs/source/user_manual/search_params.rst
index 9c27042f5..fd640aeb3 100644
--- a/docs/source/user_manual/search_params.rst
+++ b/docs/source/user_manual/search_params.rst
@@ -28,15 +28,20 @@ This document serves to provide a quick overview of the existing parameters and
| | | remove all negative values prior to |
| | | computing the percentiles. |
+------------------------+-----------------------------+----------------------------------------+
+| ``cluster_eps `` | 20.0 | The threshold to use for clustering |
+| | | similar results. |
++------------------------+-----------------------------+----------------------------------------+
| ``cluster_type`` | all | Types of predicted values to use when |
| | | determining trajectories to clustered |
-| | | together, including position, velocity,|
-| | | and angles (if do_clustering = True). |
+| | | together, including position and |
+| | | velocities (if do_clustering = True). |
| | | Options include: ``all``, ``position``,|
-| | | ``position_unscaled``, ``mid_position``|
-| | | ``mid_position_unscaled``, |
-| | | ``start_end_position``, or |
-| | | ``start_end_position_unscaled``. |
+| | | ``mid_position``, and |
+| | | ``start_end_position`` |
++------------------------+-----------------------------+----------------------------------------+
+| ``cluster_v_scale`` | 1.0 | The weight of differences in velocity |
+| | | relative to differences in distances |
+| | | during clustering. |
+------------------------+-----------------------------+----------------------------------------+
| ``coadds`` | [] | A list of additional coadds to create. |
| | | These are not used in filtering, but |
@@ -56,10 +61,6 @@ This document serves to provide a quick overview of the existing parameters and
| ``do_stamp_filter`` | True | Apply post-search filtering on the |
| | | image stamps. |
+------------------------+-----------------------------+----------------------------------------+
-| ``eps`` | 0.03 | The epsilon value to use in DBSCAN |
-| | | clustering (if ``cluster_type=DBSCAN`` |
-| | | and ``do_clustering=True``). |
-+------------------------+-----------------------------+----------------------------------------+
| ``encode_num_bytes`` | -1 | The number of bytes to use to encode |
| | | ``psi`` and ``phi`` images on GPU. By |
| | | default a ``float`` encoding is used. |
diff --git a/notebooks/region_search/discrete_piles_e2e.ipynb b/notebooks/region_search/discrete_piles_e2e.ipynb
index fdc1a0acd..29dc3f134 100644
--- a/notebooks/region_search/discrete_piles_e2e.ipynb
+++ b/notebooks/region_search/discrete_piles_e2e.ipynb
@@ -213,7 +213,7 @@
" \"peak_offset\": [3.0, 3.0],\n",
" \"chunk_size\": 1000000,\n",
" \"stamp_type\": \"cpp_median\",\n",
- " \"eps\": 0.03,\n",
+ " \"cluster_eps\": 20.0,\n",
" \"clip_negative\": True,\n",
" \"mask_num_images\": 0,\n",
" \"cluster_type\": \"position\",\n",
diff --git a/src/kbmod/configuration.py b/src/kbmod/configuration.py
index 521068fa6..7ce545589 100644
--- a/src/kbmod/configuration.py
+++ b/src/kbmod/configuration.py
@@ -24,13 +24,14 @@ def __init__(self):
"center_thresh": 0.00,
"chunk_size": 500000,
"clip_negative": False,
+ "cluster_eps": 20.0,
"cluster_type": "all",
+ "cluster_v_scale": 1.0,
"coadds": [],
"debug": False,
"do_clustering": True,
"do_mask": True,
"do_stamp_filter": True,
- "eps": 0.03,
"encode_num_bytes": -1,
"generator_config": None,
"gpu_filter": False,
diff --git a/src/kbmod/filters/clustering_filters.py b/src/kbmod/filters/clustering_filters.py
index 450c34f00..db7015889 100644
--- a/src/kbmod/filters/clustering_filters.py
+++ b/src/kbmod/filters/clustering_filters.py
@@ -11,17 +11,17 @@ class DBSCANFilter:
"""Cluster the candidates using DBSCAN and only keep a
single representative trajectory from each cluster."""
- def __init__(self, eps, **kwargs):
+ def __init__(self, cluster_eps, **kwargs):
"""Create a DBSCANFilter.
Parameters
----------
- eps : `float`
+ cluster_eps : `float`
The clustering threshold.
"""
- self.eps = eps
+ self.cluster_eps = cluster_eps
self.cluster_type = ""
- self.cluster_args = dict(eps=self.eps, min_samples=1, n_jobs=-1)
+ self.cluster_args = dict(eps=self.cluster_eps, min_samples=1, n_jobs=-1)
def get_filter_name(self):
"""Get the name of the filter.
@@ -31,7 +31,7 @@ def get_filter_name(self):
str
The filter name.
"""
- return f"DBSCAN_{self.cluster_type} eps={self.eps}"
+ return f"DBSCAN_{self.cluster_type} eps={self.cluster_eps}"
def _build_clustering_data(self, result_data):
"""Build the specific data set for this clustering approach.
@@ -82,37 +82,18 @@ def keep_indices(self, result_data):
class ClusterPredictionFilter(DBSCANFilter):
"""Cluster the candidates using their positions at specific times."""
- def __init__(self, eps, pred_times=[0.0], height=1.0, width=1.0, scaled=True, **kwargs):
+ def __init__(self, cluster_eps, pred_times=[0.0], **kwargs):
"""Create a DBSCANFilter.
Parameters
----------
- eps : `float`
+ cluster_eps : `float`
The clustering threshold.
pred_times : `list`
The times a which to prediction the positions.
Default = [0.0] (starting position only)
- height : `int`
- The size of the image height (in pixels) for scaling.
- Default: 1 (no scaling)
- width : `int`
- The size of the image width (in pixels) for scaling.
- Default: 1 (no scaling)
- scaled : `bool`
- Scale the positions to [0, 1] based on ``width`` and ``height``. This impacts
- how ``eps`` is interpreted by DBSCAN. If scaling is turned on ``eps``
- approximates the percentage of each dimension between points. If scaling is
- turned off ``eps`` is a distance in pixels.
"""
- super().__init__(eps, **kwargs)
- if scaled:
- if height <= 0.0 or width <= 0:
- raise ValueError(f"Invalid scaling parameters y={height} by x={width}")
- self.height = height
- self.width = width
- else:
- self.height = 1.0
- self.width = 1.0
+ super().__init__(cluster_eps, **kwargs)
# Confirm we have at least one prediction time.
if len(pred_times) == 0:
@@ -120,7 +101,7 @@ def __init__(self, eps, pred_times=[0.0], height=1.0, width=1.0, scaled=True, **
self.times = pred_times
# Set up the clustering algorithm's name.
- self.cluster_type = f"position (scaled={scaled}) t={self.times}"
+ self.cluster_type = f"position t={self.times}"
def _build_clustering_data(self, result_data):
"""Build the specific data set for this clustering approach.
@@ -145,49 +126,29 @@ def _build_clustering_data(self, result_data):
# the division by height and width will be no-ops.
coords = []
for t in self.times:
- coords.append((x_arr + t * vx_arr) / self.width)
- coords.append((y_arr + t * vy_arr) / self.height)
+ coords.append(x_arr + t * vx_arr)
+ coords.append(y_arr + t * vy_arr)
return np.array(coords)
-class ClusterPosAngVelFilter(DBSCANFilter):
- """Cluster the candidates using their scaled starting position, trajectory
- angles, and trajectory velocity magnitude.
- """
+class ClusterPosVelFilter(DBSCANFilter):
+ """Cluster the candidates using their starting position and velocities."""
- def __init__(self, eps, height, width, vel_lims, ang_lims, **kwargs):
+ def __init__(self, cluster_eps, cluster_v_scale=1.0, **kwargs):
"""Create a DBSCANFilter.
Parameters
----------
- eps : `float`
- The clustering threshold.
- height : `int`
- The size of the image height (in pixels) for scaling.
- width : `int`
- The size of the image width (in pixels) for scaling.
- vel_lims : `list`
- The velocity limits of the search such that v_lim[1] - v_lim[0]
- is the range of velocities searched. Used for scaling.
- ang_lims : `list`
- The angle limits of the search such that ang_lim[1] - ang_lim[0]
- is the range of velocities searched.
+ cluster_eps : `float`
+ The clustering threshold in pixels.
+ cluster_v_scale : `float`
+ The relative scaling of velocity differences compared to position
+ differences. Default: 1.0 (no difference).
"""
- super().__init__(eps, **kwargs)
- if height <= 0.0 or width <= 0:
- raise ValueError(f"Invalid scaling parameters y={height} by x={width}")
- if len(vel_lims) < 2:
- raise ValueError(f"Invalid velocity magnitude scaling parameters {vel_lims}")
- if len(ang_lims) < 2:
- raise ValueError(f"Invalid velocity angle scaling parameters {ang_lims}")
- self.height = height
- self.width = width
-
- self.v_scale = (vel_lims[1] - vel_lims[0]) if vel_lims[1] != vel_lims[0] else 1.0
- self.a_scale = (ang_lims[1] - ang_lims[0]) if ang_lims[1] != ang_lims[0] else 1.0
- self.min_v = vel_lims[0]
- self.min_a = ang_lims[0]
-
+ super().__init__(cluster_eps, **kwargs)
+ if cluster_v_scale < 0.0:
+ raise ValueError("cluster_v_scale cannot be negative.")
+ self.cluster_v_scale = cluster_v_scale
self.cluster_type = "all"
def _build_clustering_data(self, result_data):
@@ -207,19 +168,9 @@ def _build_clustering_data(self, result_data):
# Create arrays of each the trajectories information.
x_arr = np.array(result_data["x"])
y_arr = np.array(result_data["y"])
- vx_arr = np.array(result_data["vx"])
- vy_arr = np.array(result_data["vy"])
- vel_arr = np.sqrt(np.square(vx_arr) + np.square(vy_arr))
- ang_arr = np.arctan2(vy_arr, vx_arr)
-
- # Scale the values. We always do this because distances and velocities
- # are not directly comparable.
- scaled_x = x_arr / self.width
- scaled_y = y_arr / self.height
- scaled_vel = (vel_arr - self.min_v) / self.v_scale
- scaled_ang = (ang_arr - self.min_a) / self.a_scale
-
- return np.array([scaled_x, scaled_y, scaled_vel, scaled_ang])
+ vx_arr = np.array(result_data["vx"]) * self.cluster_v_scale
+ vy_arr = np.array(result_data["vy"]) * self.cluster_v_scale
+ return np.array([x_arr, y_arr, vx_arr, vy_arr])
def apply_clustering(result_data, cluster_params):
@@ -232,7 +183,7 @@ def apply_clustering(result_data, cluster_params):
the filtering.
cluster_params : dict
Contains values concerning the image and search settings including:
- cluster_type, eps, height, width, scaled, vel_lims, ang_lims, and times.
+ cluster_type, cluster_eps, times, and cluster_v_scale (optional).
Raises
------
@@ -240,7 +191,7 @@ def apply_clustering(result_data, cluster_params):
Raises a TypeError if ``result_data`` is of an unsupported type.
"""
if "cluster_type" not in cluster_params:
- raise ValueError("Missing cluster_type parameter")
+ raise KeyError("Missing cluster_type parameter")
cluster_type = cluster_params["cluster_type"]
# Skip clustering if there is nothing to cluster.
@@ -249,36 +200,23 @@ def apply_clustering(result_data, cluster_params):
return
# Get the times used for prediction clustering.
+ if not "times" in cluster_params:
+ raise KeyError("Missing times parameter in the clustering parameters.")
all_times = np.sort(cluster_params["times"])
zeroed_times = np.array(all_times) - all_times[0]
# Do the clustering and the filtering.
if cluster_type == "all" or cluster_type == "pos_vel":
- filt = ClusterPosAngVelFilter(**cluster_params)
+ filt = ClusterPosVelFilter(**cluster_params)
elif cluster_type == "position" or cluster_type == "start_position":
cluster_params["pred_times"] = [0.0]
- cluster_params["scaled"] = True
- filt = ClusterPredictionFilter(**cluster_params)
- elif cluster_type == "position_unscaled" or cluster_type == "start_position_unscaled":
- cluster_params["pred_times"] = [0.0]
- cluster_params["scaled"] = False
filt = ClusterPredictionFilter(**cluster_params)
elif cluster_type == "mid_position":
cluster_params["pred_times"] = [np.median(zeroed_times)]
- cluster_params["scaled"] = True
- filt = ClusterPredictionFilter(**cluster_params)
- elif cluster_type == "mid_position_unscaled":
- cluster_params["pred_times"] = [np.median(zeroed_times)]
- cluster_params["scaled"] = False
filt = ClusterPredictionFilter(**cluster_params)
elif cluster_type == "start_end_position":
cluster_params["pred_times"] = [0.0, zeroed_times[-1]]
filt = ClusterPredictionFilter(**cluster_params)
- cluster_params["scaled"] = True
- elif cluster_type == "start_end_position_unscaled":
- cluster_params["pred_times"] = [0.0, zeroed_times[-1]]
- filt = ClusterPredictionFilter(**cluster_params)
- cluster_params["scaled"] = False
else:
raise ValueError(f"Unknown clustering type: {cluster_type}")
logger.info(f"Clustering {len(result_data)} results using {filt.get_filter_name()}")
diff --git a/src/kbmod/run_search.py b/src/kbmod/run_search.py
index 52c481ccd..b5bbd6111 100644
--- a/src/kbmod/run_search.py
+++ b/src/kbmod/run_search.py
@@ -25,23 +25,6 @@ class SearchRunner:
def __init__(self):
pass
- def get_angle_limits(self, config):
- """Compute the angle limits based on the configuration information.
-
- Parameters
- ----------
- config : `SearchConfiguration`
- The configuration parameters
-
- Returns
- -------
- res : `list`
- A list with the minimum and maximum angle to search (in pixel space).
- """
- ang_min = config["average_angle"] - config["ang_arr"][0]
- ang_max = config["average_angle"] + config["ang_arr"][1]
- return [ang_min, ang_max]
-
def load_and_filter_results(self, search, config):
"""This function loads results that are output by the gpu grid search.
Results are loaded in chunks and evaluated to see if the minimum
@@ -248,13 +231,10 @@ def run_search(self, config, stack, trj_generator=None):
cluster_timer = kb.DebugTimer("clustering", logger)
mjds = [stack.get_obstime(t) for t in range(stack.img_count())]
cluster_params = {
- "ang_lims": self.get_angle_limits(config),
"cluster_type": config["cluster_type"],
- "eps": config["eps"],
+ "cluster_eps": config["cluster_eps"],
+ "cluster_v_scale": config["cluster_v_scale"],
"times": np.array(mjds),
- "vel_lims": config["v_arr"],
- "width": stack.get_width(),
- "height": stack.get_height(),
}
apply_clustering(keep, cluster_params)
cluster_timer.stop()
diff --git a/tests/test_clustering_filters.py b/tests/test_clustering_filters.py
index 2a77efa29..dfdce7ae1 100644
--- a/tests/test_clustering_filters.py
+++ b/tests/test_clustering_filters.py
@@ -40,30 +40,19 @@ def test_dbscan_position_results(self):
)
# Standard-ish params collapses to 2 clusters.
- f1 = ClusterPredictionFilter(eps=0.025, pred_times=[0.0], height=100, width=100)
+ f1 = ClusterPredictionFilter(cluster_eps=5.0, pred_times=[0.0])
self.assertEqual(f1.keep_indices(rs), [0, 3])
# Small eps is 4 clusters.
- f2 = ClusterPredictionFilter(eps=0.000015, pred_times=[0.0], height=100, width=100)
+ f2 = ClusterPredictionFilter(cluster_eps=0.000015, pred_times=[0.0])
self.assertEqual(f2.keep_indices(rs), [0, 3, 4, 5])
# Large scale means 1 cluster
- f3 = ClusterPredictionFilter(eps=0.025, pred_times=[0.0], height=1000, width=1000)
+ f3 = ClusterPredictionFilter(cluster_eps=5000.0, pred_times=[0.0])
self.assertEqual(f3.keep_indices(rs), [0])
- # If we do not scale everything at the same starting point is marked its own cluster at low eps
- # (0.025 pixels max distance).
- f4 = ClusterPredictionFilter(eps=0.025, pred_times=[0.0], height=100, width=100, scaled=False)
- self.assertEqual(f4.keep_indices(rs), [0, 3, 4, 5])
-
- # We regain the clustering power with a higher eps (3 pixels max distance).
- f5 = ClusterPredictionFilter(eps=3.0, pred_times=[0.0], height=100, width=100, scaled=False)
- self.assertEqual(f5.keep_indices(rs), [0, 3])
-
# Catch invalid parameters
- self.assertRaises(ValueError, ClusterPredictionFilter, eps=0.025, width=0)
- self.assertRaises(ValueError, ClusterPredictionFilter, eps=0.025, height=0)
- self.assertRaises(ValueError, ClusterPredictionFilter, eps=0.025, pred_times=[])
+ self.assertRaises(ValueError, ClusterPredictionFilter, cluster_eps=0.025, pred_times=[])
def test_dbscan_all_results(self):
"""Test clustering based on the median position of the trajectory."""
@@ -79,36 +68,19 @@ def test_dbscan_all_results(self):
)
# Start with 5 clusters
- f1 = ClusterPosAngVelFilter(eps=0.025, height=100, width=100, vel_lims=[0, 100], ang_lims=[0, 1.5])
+ f1 = ClusterPosVelFilter(cluster_eps=5.0)
self.assertEqual(f1.keep_indices(rs), [0, 1, 3, 4, 5])
# Larger eps is 3 clusters.
- f2 = ClusterPosAngVelFilter(eps=0.25, height=100, width=100, vel_lims=[0, 100], ang_lims=[0, 1.5])
+ f2 = ClusterPosVelFilter(cluster_eps=20.0)
self.assertEqual(f2.keep_indices(rs), [0, 1, 3])
- # Larger scale is 3 clusters.
- f3 = ClusterPosAngVelFilter(eps=0.025, height=100, width=100, vel_lims=[0, 5000], ang_lims=[0, 1.5])
- self.assertEqual(f3.keep_indices(rs), [0, 1, 3])
+ # Adding the scaling factor increases or reduces the impact of velocity.
+ f3 = ClusterPosVelFilter(cluster_eps=5.0, cluster_v_scale=5.0)
+ self.assertEqual(f3.keep_indices(rs), [0, 1, 3, 4, 5])
- # Catch invalid parameters
- self.assertRaises(
- ValueError,
- ClusterPosAngVelFilter,
- eps=0.025,
- height=100,
- width=100,
- vel_lims=[100],
- ang_lims=[0, 1.5],
- )
- self.assertRaises(
- ValueError,
- ClusterPosAngVelFilter,
- eps=0.025,
- height=100,
- width=100,
- vel_lims=[0, 5000],
- ang_lims=[1.5],
- )
+ f4 = ClusterPosVelFilter(cluster_eps=5.0, cluster_v_scale=1e-9)
+ self.assertEqual(f4.keep_indices(rs), [0, 3])
def test_dbscan_mid_pos(self):
rs = self._make_result_data(
@@ -124,21 +96,17 @@ def test_dbscan_mid_pos(self):
)
# Use the median time.
- f1 = ClusterPredictionFilter(eps=0.1, pred_times=[0.95], height=20, width=20)
+ f1 = ClusterPredictionFilter(cluster_eps=2.0, pred_times=[0.95])
self.assertEqual(f1.keep_indices(rs), [0, 1, 3, 6])
# Use different times.
- f2 = ClusterPredictionFilter(eps=0.1, pred_times=[10.0], height=20, width=20)
+ f2 = ClusterPredictionFilter(cluster_eps=2.0, pred_times=[10.0])
self.assertEqual(f2.keep_indices(rs), [0, 1, 3, 4, 5, 6])
# Use different times again.
- f3 = ClusterPredictionFilter(eps=0.1, pred_times=[0.001], height=20, width=20)
+ f3 = ClusterPredictionFilter(cluster_eps=2.0, pred_times=[0.001])
self.assertEqual(f3.keep_indices(rs), [0, 3, 5])
- # Try without scaling
- f4 = ClusterPredictionFilter(eps=0.1, pred_times=[10.95], height=20, width=20, scaled=False)
- self.assertEqual(f4.keep_indices(rs), [0, 1, 2, 3, 4, 5, 6])
-
def test_dbscan_start_end_pos(self):
rs = self._make_result_data(
[
@@ -156,18 +124,15 @@ def test_dbscan_start_end_pos(self):
]
)
- f1 = ClusterPredictionFilter(eps=3.0, pred_times=[10, 11.9], scaled=False)
+ f1 = ClusterPredictionFilter(cluster_eps=3.0, pred_times=[10, 11.9])
self.assertEqual(f1.keep_indices(rs), [0, 1, 4, 5, 8])
def test_clustering_results(self):
cluster_params = {
- "ang_lims": [0.0, 1.5],
"cluster_type": "all",
- "eps": 0.03,
+ "cluster_eps": 5.0,
+ "cluster_v_scale": 1.0,
"times": self.times,
- "width": 100,
- "height": 100,
- "vel_lims": [5.0, 40.0],
}
results = self._make_result_data(
@@ -182,6 +147,11 @@ def test_clustering_results(self):
apply_clustering(results, cluster_params)
self.assertEqual(len(results), 4)
+ # If we remove the weighting on velocity, then we drop to three clusters.
+ cluster_params["cluster_v_scale"] = 1e-16
+ apply_clustering(results, cluster_params)
+ self.assertEqual(len(results), 3)
+
# Try clustering with only positions.
results2 = self._make_result_data(
[
@@ -196,27 +166,10 @@ def test_clustering_results(self):
apply_clustering(results2, cluster_params)
self.assertEqual(len(results2), 3)
- # Try clustering with start and end positions
- results3 = self._make_result_data(
- [
- [10, 11, 1, 2],
- [10, 11, 10, 20],
- [40, 5, -1, 2],
- [5, 0, 1, 2],
- [5, 1, 1, 2],
- ]
- )
- cluster_params["cluster_type"] = "start_end_position"
- apply_clustering(results3, cluster_params)
- self.assertEqual(len(results3), 4)
-
# Try invalid or missing cluster_type.
cluster_params["cluster_type"] = "invalid"
self.assertRaises(ValueError, apply_clustering, results2, cluster_params)
- del cluster_params["cluster_type"]
- self.assertRaises(ValueError, apply_clustering, results2, cluster_params)
-
if __name__ == "__main__":
unittest.main()
diff --git a/tests/test_regression_test.py b/tests/test_regression_test.py
index e8ac76de1..521a54e8c 100644
--- a/tests/test_regression_test.py
+++ b/tests/test_regression_test.py
@@ -271,7 +271,7 @@ def perform_search(im_stack, res_filename, default_psf):
"peak_offset": [3.0, 3.0],
"chunk_size": 1000000,
"stamp_type": "cpp_median",
- "eps": 0.03,
+ "cluster_eps": 20.0,
"gpu_filter": True,
"clip_negative": True,
"x_pixel_buffer": 10,