forked from LispMechanics/cl-data-structures
-
Notifications
You must be signed in to change notification settings - Fork 7
/
external-functions.lisp
152 lines (146 loc) · 6.76 KB
/
external-functions.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
(in-package #:cl-ds.utils.cluster)
(-> partition-around-medoids (vector
cl-ds.utils:half-matrix
positive-fixnum
&key
(:select-medoids-attempts-count (or null positive-fixnum))
(:attempts non-negative-fixnum)
(:split (or null positive-fixnum))
(:merge (or null positive-fixnum)))
clustering-result)
(defun partition-around-medoids (input-data
distance-matrix
number-of-medoids
&key
(select-medoids-attempts-count 50)
(attempts 0)
split
merge)
(when (or (zerop (length input-data)))
(error "Can't cluster because there is no data"))
(let ((state (make 'pam-algorithm-state
:number-of-medoids number-of-medoids
:input-data input-data
:distance-matrix distance-matrix
:split-merge-attempts-count attempts
:select-medoids-attempts-count select-medoids-attempts-count
:split-threshold split
:merge-threshold merge)))
(cl-ds.utils:with-slots-for (state pam-algorithm-state)
(build-pam-clusters state)
(unless (null %split-merge-attempts-count)
(iterate
(scan-for-clusters-of-invalid-size state)
(while (unfinished-clusters-p state))
(repeat %split-merge-attempts-count)
(recluster-clusters-with-invalid-size state))))
(let ((silhouette (silhouette state)))
(replace-indexes-in-clusters-with-data state)
(the clustering-result
(obtain-result state
silhouette)))))
(-> clara (vector
positive-fixnum
(or symbol list)
(or symbol function)
positive-fixnum
positive-fixnum
&key
(:key (or symbol function))
(:select-medoids-attempts-count (or null positive-fixnum))
(:attempts non-negative-fixnum)
(:split (or null positive-fixnum))
(:merge (or null positive-fixnum)))
clustering-result)
(defun clara (input-data
number-of-medoids
metric-type
metric-fn
sample-size
sample-count
&key
(key #'identity)
(select-medoids-attempts-count 50)
(attempts 0)
split
merge)
(cl-progress-bar:with-progress-bar ((* 2 sample-count)
"Clustering data set of size ~a using CLARA algorithm with ~a samples of size ~a."
(length input-data)
sample-count
sample-size)
(let ((state (build-clara-clusters
input-data number-of-medoids metric-type
(ensure-function metric-fn) sample-size sample-count
:key (ensure-function key)
:select-medoids-attempts-count select-medoids-attempts-count
:attempts attempts :split split :merge merge)))
(assign-clara-data-to-medoids state)
(replace-indexes-in-clusters-with-data state)
(the clustering-result
(obtain-result state
(access-silhouette state))))))
(-> clara-variable-number-of-medoids (vector
(or symbol list)
(or symbol function)
positive-fixnum
positive-fixnum
positive-fixnum
positive-fixnum
&key
(:key (or symbol function))
(:select-medoids-attempts-count (or null positive-fixnum))
(:attempts non-negative-fixnum)
(:split (or null positive-fixnum))
(:merge (or null positive-fixnum)))
clustering-result)
(defun clara-variable-number-of-medoids (input-data
metric-type
metric-fn
sample-size
sample-count
from to
&key
(key #'identity)
(select-medoids-attempts-count 50)
(attempts 0)
split
merge)
(assert (< 0 from to))
(let ((vector (make-array (1+ (- to from))))
(metric-fn (ensure-function metric-fn))
(key (ensure-function key)))
(cl-progress-bar:with-progress-bar ((* (* 2 sample-count) (- (1+ to) from))
"Clustering data set of size ~a using CLARA algorithm searching for optimal medoids count (between ~a and ~a)."
(length input-data)
from to)
(iterate
(with progress-bar = cl-progress-bar:*progress-bar*)
(for index from 0 below (length vector))
(for i from from to to)
(nest (setf (aref vector index))
(let ((i i)))
(bt:make-thread)
(lambda ())
(let ((cl-progress-bar:*progress-bar* progress-bar)))
(build-clara-clusters
input-data i metric-type metric-fn
sample-size sample-count
:key key
:select-medoids-attempts-count select-medoids-attempts-count
:attempts attempts
:split split
:merge merge)))
(iterate
(with final = nil)
(for thread in-vector vector)
(for state = (bt:join-thread thread))
(for mean-silhouette = (~> state access-silhouette mean))
(maximize mean-silhouette into maximum)
(when (= mean-silhouette maximum)
(setf final state))
(finally (assign-clara-data-to-medoids final)
(replace-indexes-in-clusters-with-data final)
(return (the clustering-result
(obtain-result final
(access-silhouette final)))))))))