-
-
Notifications
You must be signed in to change notification settings - Fork 118
/
discovery.go
318 lines (275 loc) · 8.6 KB
/
discovery.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
// Copyright 2018-2024 Burak Sezer
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Package discovery provides a basic memberlist integration.*/
package discovery
import (
"context"
"errors"
"fmt"
"net"
"plugin"
"sort"
"strconv"
"sync"
"time"
"github.com/buraksezer/olric/config"
"github.com/buraksezer/olric/internal/stats"
"github.com/buraksezer/olric/pkg/flog"
"github.com/buraksezer/olric/pkg/service_discovery"
"github.com/hashicorp/memberlist"
)
const eventChanCapacity = 256
// UptimeSeconds is number of seconds since the server started.
var UptimeSeconds = stats.NewInt64Counter()
// ErrMemberNotFound indicates that the requested member could not be found in the member list.
var ErrMemberNotFound = errors.New("member not found")
// ClusterEvent is a single event related to node activity in the memberlist.
// The Node member of this struct must not be directly modified.
type ClusterEvent struct {
Event memberlist.NodeEventType
NodeName string
NodeAddr net.IP
NodePort uint16
NodeMeta []byte // Metadata from the delegate for this node.
}
func (c *ClusterEvent) MemberAddr() string {
port := strconv.Itoa(int(c.NodePort))
return net.JoinHostPort(c.NodeAddr.String(), port)
}
// Discovery is a structure that encapsulates memberlist and
// provides useful functions to utilize it.
type Discovery struct {
log *flog.Logger
member *Member
memberlist *memberlist.Memberlist
config *config.Config
// To manage Join/Leave/Update events
clusterEventsMtx sync.RWMutex
ClusterEvents chan *ClusterEvent
// Try to reconnect dead members
eventSubscribers []chan *ClusterEvent
serviceDiscovery service_discovery.ServiceDiscovery
// Flow control
wg sync.WaitGroup
ctx context.Context
cancel context.CancelFunc
}
// New creates a new memberlist with a proper configuration and returns a new Discovery instance along with it.
func New(log *flog.Logger, c *config.Config) *Discovery {
member := NewMember(c)
ctx, cancel := context.WithCancel(context.Background())
d := &Discovery{
member: &member,
config: c,
log: log,
ctx: ctx,
cancel: cancel,
}
return d
}
func (d *Discovery) loadServiceDiscoveryPlugin() error {
var sd service_discovery.ServiceDiscovery
if val, ok := d.config.ServiceDiscovery["plugin"]; ok {
if sd, ok = val.(service_discovery.ServiceDiscovery); !ok {
return fmt.Errorf("plugin type %T is not a ServiceDiscovery interface", val)
}
} else {
pluginPath, ok := d.config.ServiceDiscovery["path"]
if !ok {
return fmt.Errorf("plugin path could not be found")
}
plug, err := plugin.Open(pluginPath.(string))
if err != nil {
return fmt.Errorf("failed to open plugin: %w", err)
}
symDiscovery, err := plug.Lookup("ServiceDiscovery")
if err != nil {
return fmt.Errorf("failed to lookup serviceDiscovery symbol: %w", err)
}
if sd, ok = symDiscovery.(service_discovery.ServiceDiscovery); !ok {
return fmt.Errorf("unable to assert type to serviceDiscovery")
}
}
if err := sd.SetConfig(d.config.ServiceDiscovery); err != nil {
return err
}
sd.SetLogger(d.config.Logger)
if err := sd.Initialize(); err != nil {
return err
}
d.serviceDiscovery = sd
return nil
}
// increaseUptimeSeconds calls UptimeSeconds.Increase function every second.
func (d *Discovery) increaseUptimeSeconds() {
defer d.wg.Done()
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
UptimeSeconds.Increase(1)
case <-d.ctx.Done():
return
}
}
}
func (d *Discovery) Start() error {
if d.config.ServiceDiscovery != nil {
if err := d.loadServiceDiscoveryPlugin(); err != nil {
return err
}
}
// ClusterEvents chan is consumed by the Olric package to maintain a consistent hash ring.
d.ClusterEvents = d.SubscribeNodeEvents()
// Initialize a new memberlist
dl, err := d.newDelegate()
if err != nil {
return err
}
eventsCh := make(chan memberlist.NodeEvent, eventChanCapacity)
d.config.MemberlistConfig.Delegate = dl
d.config.MemberlistConfig.Logger = d.config.Logger
d.config.MemberlistConfig.Events = &memberlist.ChannelEventDelegate{
Ch: eventsCh,
}
list, err := memberlist.Create(d.config.MemberlistConfig)
if err != nil {
return err
}
d.memberlist = list
if d.serviceDiscovery != nil {
if err := d.serviceDiscovery.Register(); err != nil {
return err
}
}
d.wg.Add(1)
go d.eventLoop(eventsCh)
d.wg.Add(1)
go d.increaseUptimeSeconds()
return nil
}
// Join is used to take an existing Memberlist and attempt to Join a cluster
// by contacting all the given hosts and performing a state sync. Initially,
// the Memberlist only contains our own state, so doing this will cause remote
// nodes to become aware of the existence of this node, effectively joining the cluster.
func (d *Discovery) Join() (int, error) {
if d.serviceDiscovery != nil {
peers, err := d.serviceDiscovery.DiscoverPeers()
if err != nil {
return 0, err
}
return d.memberlist.Join(peers)
}
return d.memberlist.Join(d.config.Peers)
}
func (d *Discovery) Rejoin(peers []string) (int, error) {
return d.memberlist.Join(peers)
}
// GetMembers returns a full list of known alive nodes.
func (d *Discovery) GetMembers() []Member {
var members []Member
nodes := d.memberlist.Members()
for _, node := range nodes {
member, _ := NewMemberFromMetadata(node.Meta)
members = append(members, member)
}
// sort members by birthdate
sort.Slice(members, func(i int, j int) bool {
return members[i].Birthdate < members[j].Birthdate
})
return members
}
func (d *Discovery) NumMembers() int {
return d.memberlist.NumMembers()
}
// FindMemberByName finds and returns an alive member.
func (d *Discovery) FindMemberByName(name string) (Member, error) {
members := d.GetMembers()
for _, member := range members {
if member.Name == name {
return member, nil
}
}
return Member{}, ErrMemberNotFound
}
// FindMemberByID finds and returns an alive member.
func (d *Discovery) FindMemberByID(id uint64) (Member, error) {
members := d.GetMembers()
for _, member := range members {
if member.ID == id {
return member, nil
}
}
return Member{}, ErrMemberNotFound
}
// GetCoordinator returns the oldest node in the memberlist.
func (d *Discovery) GetCoordinator() Member {
members := d.GetMembers()
if len(members) == 0 {
d.log.V(1).Printf("[ERROR] There is no member in memberlist")
return Member{}
}
return members[0]
}
// IsCoordinator returns true if the caller is the coordinator node.
func (d *Discovery) IsCoordinator() bool {
return d.GetCoordinator().ID == d.member.ID
}
// LocalNode is used to return the local Node
func (d *Discovery) LocalNode() *memberlist.Node {
return d.memberlist.LocalNode()
}
// Shutdown will stop any background maintenance of network activity
// for this memberlist, causing it to appear "dead". A leave message
// will not be broadcasted prior, so the cluster being left will have
// to detect this node's Shutdown using probing. If you wish to more
// gracefully exit the cluster, call Leave prior to shutting down.
//
// This method is safe to call multiple times.
func (d *Discovery) Shutdown() error {
select {
case <-d.ctx.Done():
return nil
default:
}
d.cancel()
// We don't do that in a goroutine with a timeout mechanism
// because this mechanism may cause goroutine leak.
d.wg.Wait()
if d.memberlist != nil {
// Leave will broadcast a leave message but will not shutdown the background
// listeners, meaning the node will continue participating in gossip and state
// updates.
d.log.V(2).Printf("[INFO] Broadcasting a leave message")
if err := d.memberlist.Leave(d.config.LeaveTimeout); err != nil {
d.log.V(3).Printf("[WARN] memberlist.Leave returned an error: %v", err)
}
}
if d.serviceDiscovery != nil {
defer func(serviceDiscovery service_discovery.ServiceDiscovery) {
err := serviceDiscovery.Close()
if err != nil {
d.log.V(3).Printf("[ERROR] ServiceDiscovery.Close returned an error: %v", err)
}
}(d.serviceDiscovery)
if err := d.serviceDiscovery.Deregister(); err != nil {
d.log.V(3).Printf("[ERROR] ServiceDiscovery.Deregister returned an error: %v", err)
}
}
if d.memberlist != nil {
return d.memberlist.Shutdown()
}
return nil
}