-
Notifications
You must be signed in to change notification settings - Fork 0
/
dbscan.m
117 lines (89 loc) · 3.48 KB
/
dbscan.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
function [clustLabel, varType] = dbscan(x, MinPts, Eps)
% DBSCAN computes the clustering for a dataset using the dbscan algorithm
%
% Input:
% x - data matrix (rows observations, columns variables)
% MinPts - minimum number of neighbors to define a core point
% Eps - range of the neighborhood
%
% Output:
% clustLabel - final label vector (0 is assigned to noise points)
% varType - 1: core point, -1: noise point
%
% If Eps not provided, a k-dist plot is showed and the user can select the
% value of Eps by looking at the y value corresponding to the knee of the
% plot.
%
% Reference: Ester, Martin; Kriegel, Hans-Peter; Sander, Jrg;
% Xu, Xiaowei (1996). Simoudis, Evangelos; Han, Jiawei; Fayyad, Usama M.,
% eds. A density-based algorithm for discovering clusters in large spatial
% databases with noise. Proceedings of the Second International Conference
% on Knowledge Discovery and Data Mining (KDD-96). AAAI Press. pp. 226?231.
% ISBN 1-57735-004-9. CiteSeerX: 10.1.1.71.1980
%
% Author: Paolo Inglese, 2015
nobs = size(x,1);
classified = zeros(nobs,1); % state CLASSIFIED=1/NOT-CLASSIFIED=0
Neps = zeros(nobs,1); % num neighbours in range <= Eps
varType = nan(nobs,1); % class (core=1, noise=-1);
clustLabel = nan(nobs,1); % label
numClust = 0; % current label
D = squareform(pdist(x, 'euclidean'));
if(nargin<3)
plot_kdist(x, MinPts, D);
drawnow;
Eps = input('Insert Eps (knee point):');
end
for iobs = 1:nobs
if(~classified(iobs))
[~, neighIdx] = find(D(iobs, :) <= Eps);
Neps(iobs) = length(neighIdx);
if(Neps(iobs) <= MinPts)
% noise
varType(iobs) = -1;
clustLabel(iobs) = 0;
classified(iobs) = 1;
continue;
else
% core
numClust = numClust + 1;
varType(neighIdx) = 1;
clustLabel(neighIdx) = numClust;
classified(neighIdx) = 1;
% stack
seedsIdx = neighIdx(neighIdx ~= iobs);
while(~isempty(seedsIdx))
currSeedStackIdx = 1;
currSeedIdx = seedsIdx(currSeedStackIdx);
[~, neighIdx] = find(D(currSeedIdx, :) <= Eps);
Neps(currSeedIdx) = length(neighIdx);
if(Neps(currSeedIdx)>=MinPts+1)
% push the unclassified onto seeds
seedsIdx = [seedsIdx, neighIdx(classified(neighIdx) == 0)]; %#ok<AGROW>
% and they are added to the current cluster
clustLabel(neighIdx(classified(neighIdx) == 0 | varType(neighIdx) == -1)) = numClust;
% they are not noise anymore
varType(neighIdx) = 1;
% they become classified
classified(neighIdx) = 1;
end
% pop the seed from the stack
seedsIdx(currSeedStackIdx) = [];
end
end
end
end
end
function plot_kdist(x, MinPts, D)
nobs = size(x,1);
kdist = zeros(nobs, 1);
for i = 1:nobs
dtmp = D(i, :);
dtmp(i) = [];
dtmp = sort(dtmp, 'ascend');
kdist(i) = dtmp(MinPts);
end
kdist = sort(kdist, 'descend');
figure;
plot(kdist);
end