Skip to content

Commit

Permalink
Fix and test node expiry
Browse files Browse the repository at this point in the history
This commit makes sure node expiry is propagated to
the nodes immediatly when a node expiry even occurs
and verifies this with integration tests.

In addition, it removes old logic for filtering out
expired nodes from the netmap, this is not how Tailscale
SaaS does it, and we should follow the same approach.

Signed-off-by: Kristoffer Dalby <[email protected]>
  • Loading branch information
kradalby committed Jan 3, 2024
1 parent 71e158e commit 623c6aa
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 33 deletions.
42 changes: 34 additions & 8 deletions hscontrol/db/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,16 @@ func (hsdb *HSDatabase) nodeSetExpiry(node *types.Node, expiry time.Time) error
)
}

node.Expiry = &expiry

stateSelfUpdate := types.StateUpdate{
Type: types.StateSelfUpdate,
ChangeNodes: types.Nodes{node},
}
if stateSelfUpdate.Valid() {
hsdb.notifier.NotifyByMachineKey(stateSelfUpdate, node.MachineKey)
}

stateUpdate := types.StateUpdate{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
Expand All @@ -350,7 +360,7 @@ func (hsdb *HSDatabase) nodeSetExpiry(node *types.Node, expiry time.Time) error
},
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
hsdb.notifier.NotifyWithIgnore(stateUpdate, node.MachineKey.String())
}

return nil
Expand Down Expand Up @@ -856,7 +866,7 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
// checked everything.
started := time.Now()

expired := make([]*tailcfg.PeerChange, 0)
expiredNodes := make([]*types.Node, 0)

nodes, err := hsdb.listNodes()
if err != nil {
Expand All @@ -872,17 +882,13 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
// It will notify about all nodes that has been expired.
// It should only notify about expired nodes since _last check_.
node.Expiry.After(lastCheck) {
expired = append(expired, &tailcfg.PeerChange{
NodeID: tailcfg.NodeID(node.ID),
KeyExpiry: node.Expiry,
})
expiredNodes = append(expiredNodes, &nodes[index])

now := time.Now()
// Do not use setNodeExpiry as that has a notifier hook, which
// can cause a deadlock, we are updating all changed nodes later
// and there is no point in notifiying twice.
if err := hsdb.db.Model(nodes[index]).Updates(types.Node{
Expiry: &now,
Expiry: &started,
}).Error; err != nil {
log.Error().
Err(err).
Expand All @@ -898,6 +904,15 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
}
}

expired := make([]*tailcfg.PeerChange, len(expiredNodes))
for idx, node := range expiredNodes {
expired[idx] = &tailcfg.PeerChange{
NodeID: tailcfg.NodeID(node.ID),
KeyExpiry: &started,
}
}

// Inform the peers of a node with a lightweight update.
stateUpdate := types.StateUpdate{
Type: types.StatePeerChangedPatch,
ChangePatches: expired,
Expand All @@ -906,5 +921,16 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
hsdb.notifier.NotifyAll(stateUpdate)
}

// Inform the node itself that it has expired.
for _, node := range expiredNodes {
stateSelfUpdate := types.StateUpdate{
Type: types.StateSelfUpdate,
ChangeNodes: types.Nodes{node},
}
if stateSelfUpdate.Valid() {
hsdb.notifier.NotifyByMachineKey(stateSelfUpdate, node.MachineKey)
}
}

return started
}
13 changes: 0 additions & 13 deletions hscontrol/mapper/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"github.com/juanfont/headscale/hscontrol/util"
"github.com/klauspost/compress/zstd"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"golang.org/x/exp/maps"
"tailscale.com/envknob"
"tailscale.com/smallzstd"
Expand Down Expand Up @@ -595,15 +594,6 @@ func nodeMapToList(nodes map[uint64]*types.Node) types.Nodes {
return ret
}

func filterExpiredAndNotReady(peers types.Nodes) types.Nodes {
return lo.Filter(peers, func(item *types.Node, index int) bool {
// Filter out nodes that are expired OR
// nodes that has no endpoints, this typically means they have
// registered, but are not configured.
return !item.IsExpired() || len(item.Endpoints) > 0
})
}

// appendPeerChanges mutates a tailcfg.MapResponse with all the
// necessary changes when peers have changed.
func appendPeerChanges(
Expand All @@ -629,9 +619,6 @@ func appendPeerChanges(
return err
}

// Filter out peers that have expired.
changed = filterExpiredAndNotReady(changed)

// If there are filter rules present, see if there are any nodes that cannot
// access eachother at all and remove them from the peers.
if len(rules) > 0 {
Expand Down
36 changes: 24 additions & 12 deletions integration/general_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ func TestExpireNode(t *testing.T) {

t.Logf("Node %s with node_key %s has been expired", node.GetName(), expiredNodeKey.String())

time.Sleep(30 * time.Second)
time.Sleep(2 * time.Minute)

now := time.Now()

Expand All @@ -572,21 +572,33 @@ func TestExpireNode(t *testing.T) {
if client.Hostname() != node.GetName() {
t.Logf("available peers of %s: %v", client.Hostname(), status.Peers())

// In addition to marking nodes expired, we filter them out during the map response
// this check ensures that the node is either not present, or that it is expired
// if it is in the map response.
// Ensures that the node is present, and that it is expired.
if peerStatus, ok := status.Peer[expiredNodeKey]; ok {
assertNotNil(t, peerStatus.Expired)
assert.Truef(t, peerStatus.KeyExpiry.Before(now), "node %s should have a key expire before %s, was %s", peerStatus.HostName, now.String(), peerStatus.KeyExpiry)
assert.Truef(t, peerStatus.Expired, "node %s should be expired, expired is %v", peerStatus.HostName, peerStatus.Expired)
assert.NotNil(t, peerStatus.KeyExpiry)

t.Logf("node %q should have a key expire before %s, was %s", peerStatus.HostName, now.String(), peerStatus.KeyExpiry)
if peerStatus.KeyExpiry != nil {
assert.Truef(t, peerStatus.KeyExpiry.Before(now), "node %q should have a key expire before %s, was %s", peerStatus.HostName, now.String(), peerStatus.KeyExpiry)
}

assert.Truef(t, peerStatus.Expired, "node %q should be expired, expired is %v", peerStatus.HostName, peerStatus.Expired)

_, stderr, _ := client.Execute([]string{"tailscale", "ping", node.GetName()})
if !strings.Contains(stderr, "node key has expired") {
t.Errorf("expected to be unable to ping expired host %q from %q", node.GetName(), client.Hostname())
}
} else {
t.Errorf("failed to find node %q with nodekey (%s) in mapresponse, should be present even if it is expired", node.GetName(), expiredNodeKey)
}
} else {
if status.Self.KeyExpiry != nil {
assert.Truef(t, status.Self.KeyExpiry.Before(now), "node %q should have a key expire before %s, was %s", status.Self.HostName, now.String(), status.Self.KeyExpiry)
}

// TODO(kradalby): We do not propogate expiry correctly, nodes should be aware
// of their status, and this should be sent directly to the node when its
// expired. This needs a notifier that goes directly to the node (currently we only do peers)
// so fix this in a follow up PR.
// } else {
// assert.True(t, status.Self.Expired)
// NeedsLogin means that the node has understood that it is no longer
// valid.
assert.Equal(t, "NeedsLogin", status.BackendState)
}
}
}
Expand Down

0 comments on commit 623c6aa

Please sign in to comment.