forked from mattnedrich/MeanShift_py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mean_shift.py
79 lines (69 loc) · 3.05 KB
/
mean_shift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import point_grouper as pg
import mean_shift_utils as ms_utils
MIN_DISTANCE = 0.000001
class MeanShift(object):
def __init__(self, kernel=ms_utils.gaussian_kernel):
if kernel == 'multivariate_gaussian':
kernel = ms_utils.multivariate_gaussian_kernel
self.kernel = kernel
def cluster(self, points, kernel_bandwidth, iteration_callback=None):
if(iteration_callback):
iteration_callback(points, 0)
shift_points = np.array(points)
max_min_dist = 1
iteration_number = 0
still_shifting = [True] * points.shape[0]
while max_min_dist > MIN_DISTANCE:
# print max_min_dist
max_min_dist = 0
iteration_number += 1
for i in range(0, len(shift_points)):
if not still_shifting[i]:
continue
p_new = shift_points[i]
p_new_start = p_new
p_new = self._shift_point(p_new, points, kernel_bandwidth)
dist = ms_utils.euclidean_dist(p_new, p_new_start)
if dist > max_min_dist:
max_min_dist = dist
if dist < MIN_DISTANCE:
still_shifting[i] = False
shift_points[i] = p_new
if iteration_callback:
iteration_callback(shift_points, iteration_number)
point_grouper = pg.PointGrouper()
group_assignments = point_grouper.group_points(shift_points.tolist())
return MeanShiftResult(points, shift_points, group_assignments)
def _shift_point(self, point, points, kernel_bandwidth):
# from http://en.wikipedia.org/wiki/Mean-shift
points = np.array(points)
# numerator
point_weights = self.kernel(point-points, kernel_bandwidth)
tiled_weights = np.tile(point_weights, [len(point), 1])
# denominator
denominator = sum(point_weights)
shifted_point = np.multiply(tiled_weights.transpose(), points).sum(axis=0) / denominator
return shifted_point
# ***************************************************************************
# ** The above vectorized code is equivalent to the unrolled version below **
# ***************************************************************************
# shift_x = float(0)
# shift_y = float(0)
# scale_factor = float(0)
# for p_temp in points:
# # numerator
# dist = ms_utils.euclidean_dist(point, p_temp)
# weight = self.kernel(dist, kernel_bandwidth)
# shift_x += p_temp[0] * weight
# shift_y += p_temp[1] * weight
# # denominator
# scale_factor += weight
# shift_x = shift_x / scale_factor
# shift_y = shift_y / scale_factor
# return [shift_x, shift_y]
class MeanShiftResult:
def __init__(self, original_points, shifted_points, cluster_ids):
self.original_points = original_points
self.shifted_points = shifted_points
self.cluster_ids = cluster_ids