-
Notifications
You must be signed in to change notification settings - Fork 5
/
kmeansPP.m
39 lines (33 loc) · 1.35 KB
/
kmeansPP.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
function [L,C] = kmeansPP(X,k)
%KMEANS Cluster multivariate data using the k-means++ algorithm.
% [L,C] = kmeans(X,k) produces a 1-by-size(X,2) vector L with one class
% label per column in X and a size(X,1)-by-k matrix C containing the
% centers corresponding to each class.
% Version: 07/08/11
% Authors: Laurent Sorber ([email protected])
%
% References:
% [1] J. B. MacQueen, "Some Methods for Classification and Analysis of
% MultiVariate Observations", in Proc. of the fifth Berkeley
% Symposium on Mathematical Statistics and Probability, L. M. L. Cam
% and J. Neyman, eds., vol. 1, UC Press, 1967, pp. 281-297.
% [2] D. Arthur and S. Vassilvitskii, "k-means++: The Advantages of
% Careful Seeding", Technical Report 2006-13, Stanford InfoLab, 2006.
L = [];
L1 = 0;
while length(unique(L)) ~= k
C = X(:,1+round(rand*(size(X,2)-1)));
L = ones(1,size(X,2));
for i = 2:k
D = X-C(:,L);
D = cumsum(sqrt(dot(D,D)));
if D(end) == 0, C(:,i:k) = X(:,ones(1,k-i+1)); return; end
C(:,i) = X(:,find(rand < D/D(end),1));
[tmp,L] = max(bsxfun(@minus,2*real(C'*X),dot(C,C).'));
end
while any(L ~= L1)
L1 = L;
for i = 1:k, l = L==i; C(:,i) = sum(X(:,l),2)/sum(l); end
[tmp,L] = max(bsxfun(@minus,2*real(C'*X),dot(C,C).'),[],1);
end
end