-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
drainer: stop watching deleted, down, or disconnected nodes
When a node is down or disconnected, we can no longer gracefully migrate its allocations. Any evaluations we need to replace the allocations will have already been created by the heartbeater, so there's no more work for the drainer to do. Stop watching nodes in this state. Also, the blocking query for nodes set the maximum index to the highest index of a node it found, rather than the index of the nodes table. This misses updates to the index from deleting nodes. This was done as an performance optimization to avoid excessive unblocking, but because the query is over all nodes anyways there's no optimization to be had here. Remove the optimization so we can detect deleted nodes without having to wait for an update to an unrelated node. This changeset also refactors the tests of the draining node watcher so that we don't mock the node watcher's `Remove` and `Update` methods for its own tests. Instead we'll mock the node watcher's dependencies (the job watcher and deadline notifier) and now unit tests can cover the real code. This allows us to remove a bunch of TODOs in `watch_nodes.go` around testing.
- Loading branch information
Showing
5 changed files
with
352 additions
and
171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```release-note:bug | ||
drainer: Fixed a bug where draining nodes that become lost or disconnected were still tracked by the drainer | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,142 @@ | ||
package drainer | ||
|
||
import ( | ||
"context" | ||
"sync" | ||
"testing" | ||
"time" | ||
|
||
"golang.org/x/time/rate" | ||
|
||
"github.com/hashicorp/nomad/helper/testlog" | ||
"github.com/hashicorp/nomad/nomad/state" | ||
"github.com/hashicorp/nomad/nomad/structs" | ||
) | ||
|
||
type MockNodeTrackerEvent struct { | ||
NodeUpdate *structs.Node | ||
NodeRemove string | ||
} | ||
// This file contains helpers for testing. The raft shims make it hard to test | ||
// the whole package behavior of the drainer. See also nomad/drainer_int_test.go | ||
// for integration tests. | ||
|
||
type MockNodeTracker struct { | ||
Nodes map[string]*structs.Node | ||
Events []*MockNodeTrackerEvent | ||
type MockJobWatcher struct { | ||
drainCh chan *DrainRequest | ||
migratedCh chan []*structs.Allocation | ||
jobs map[structs.NamespacedID]struct{} | ||
sync.Mutex | ||
} | ||
|
||
func NewMockNodeTracker() *MockNodeTracker { | ||
return &MockNodeTracker{ | ||
Nodes: make(map[string]*structs.Node), | ||
Events: make([]*MockNodeTrackerEvent, 0, 16), | ||
// RegisterJobs marks the job as being watched | ||
func (m *MockJobWatcher) RegisterJobs(jobs []structs.NamespacedID) { | ||
m.Lock() | ||
defer m.Unlock() | ||
for _, job := range jobs { | ||
m.jobs[job] = struct{}{} | ||
} | ||
} | ||
|
||
func (m *MockNodeTracker) TrackedNodes() map[string]*structs.Node { | ||
m.Lock() | ||
defer m.Unlock() | ||
return m.Nodes | ||
// Drain returns the DrainRequest channel. Tests can send on this channel to | ||
// simulate steps through the NodeDrainer watch loop. (Sending on this channel | ||
// will block anywhere else.) | ||
func (m *MockJobWatcher) Drain() <-chan *DrainRequest { | ||
return m.drainCh | ||
} | ||
|
||
func (m *MockNodeTracker) Remove(nodeID string) { | ||
m.Lock() | ||
defer m.Unlock() | ||
delete(m.Nodes, nodeID) | ||
m.Events = append(m.Events, &MockNodeTrackerEvent{NodeRemove: nodeID}) | ||
// Migrated returns the channel of migrated allocations. Tests can send on this | ||
// channel to simulate steps through the NodeDrainer watch loop. (Sending on | ||
// this channel will block anywhere else.) | ||
func (m *MockJobWatcher) Migrated() <-chan []*structs.Allocation { | ||
return m.migratedCh | ||
} | ||
|
||
type MockDeadlineNotifier struct { | ||
expiredCh <-chan []string | ||
nodes map[string]struct{} | ||
sync.Mutex | ||
} | ||
|
||
func (m *MockNodeTracker) Update(node *structs.Node) { | ||
// NextBatch returns the channel of expired nodes. Tests can send on this | ||
// channel to simulate timer events in the NodeDrainer watch loop. (Sending on | ||
// this channel will block anywhere else.) | ||
func (m *MockDeadlineNotifier) NextBatch() <-chan []string { | ||
return m.expiredCh | ||
} | ||
|
||
// Remove removes the given node from being tracked for a deadline. | ||
func (m *MockDeadlineNotifier) Remove(nodeID string) { | ||
m.Lock() | ||
defer m.Unlock() | ||
m.Nodes[node.ID] = node | ||
m.Events = append(m.Events, &MockNodeTrackerEvent{NodeUpdate: node}) | ||
delete(m.nodes, nodeID) | ||
} | ||
|
||
func (m *MockNodeTracker) events() []*MockNodeTrackerEvent { | ||
// Watch marks the node as being watched; this mock throws out the timer in lieu | ||
// of manully sending on the channel to avoid racy tests. | ||
func (m *MockDeadlineNotifier) Watch(nodeID string, _ time.Time) { | ||
m.Lock() | ||
defer m.Unlock() | ||
m.nodes[nodeID] = struct{}{} | ||
} | ||
|
||
type MockRaftApplierShim struct { | ||
state *state.StateStore | ||
} | ||
|
||
// AllocUpdateDesiredTransition mocks a write to raft as a state store update | ||
func (m *MockRaftApplierShim) AllocUpdateDesiredTransition( | ||
allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) (uint64, error) { | ||
index, _ := m.state.LatestIndex() | ||
index++ | ||
err := m.state.UpdateAllocsDesiredTransitions(structs.MsgTypeTestSetup, index, allocs, evals) | ||
return index, err | ||
} | ||
|
||
// NodesDrainComplete mocks a write to raft as a state store update | ||
func (m *MockRaftApplierShim) NodesDrainComplete( | ||
nodes []string, event *structs.NodeEvent) (uint64, error) { | ||
index, _ := m.state.LatestIndex() | ||
index++ | ||
|
||
updates := make(map[string]*structs.DrainUpdate, len(nodes)) | ||
nodeEvents := make(map[string]*structs.NodeEvent, len(nodes)) | ||
update := &structs.DrainUpdate{} | ||
for _, node := range nodes { | ||
updates[node] = update | ||
if event != nil { | ||
nodeEvents[node] = event | ||
} | ||
} | ||
now := time.Now().Unix() | ||
|
||
err := m.state.BatchUpdateNodeDrain(structs.MsgTypeTestSetup, index, now, | ||
updates, nodeEvents) | ||
|
||
return index, err | ||
} | ||
|
||
func testNodeDrainWatcher(t *testing.T) (*nodeDrainWatcher, *state.StateStore, *NodeDrainer) { | ||
t.Helper() | ||
store := state.TestStateStore(t) | ||
limiter := rate.NewLimiter(100.0, 100) | ||
logger := testlog.HCLogger(t) | ||
|
||
drainer := &NodeDrainer{ | ||
enabled: false, | ||
logger: logger, | ||
nodes: map[string]*drainingNode{}, | ||
jobWatcher: &MockJobWatcher{ | ||
drainCh: make(chan *DrainRequest), | ||
migratedCh: make(chan []*structs.Allocation), | ||
jobs: map[structs.NamespacedID]struct{}{}, | ||
}, | ||
deadlineNotifier: &MockDeadlineNotifier{ | ||
expiredCh: make(<-chan []string), | ||
nodes: map[string]struct{}{}, | ||
}, | ||
state: store, | ||
queryLimiter: limiter, | ||
raft: &MockRaftApplierShim{state: store}, | ||
batcher: allocMigrateBatcher{}, | ||
} | ||
|
||
return m.Events | ||
w := NewNodeDrainWatcher(context.Background(), limiter, store, logger, drainer) | ||
drainer.nodeWatcher = w | ||
return w, store, drainer | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.