diff --git a/.changelog/20317.txt b/.changelog/20317.txt new file mode 100644 index 000000000000..76fba7afc07e --- /dev/null +++ b/.changelog/20317.txt @@ -0,0 +1,7 @@ +```release-note:bug +drain: Fixed a bug where Workload Identity tokens could not be used to drain a node +``` + +```release-note:bug +state: Fixed a bug where restarting a server could fail if the Raft logs include a drain update that used a now-expired token +``` diff --git a/nomad/fsm.go b/nomad/fsm.go index 555c671e6dc7..7555937c087f 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -465,31 +465,8 @@ func (n *nomadFSM) applyDrainUpdate(reqType structs.MessageType, buf []byte, ind panic(fmt.Errorf("failed to decode request: %v", err)) } - accessorId := "" - if req.AuthToken != "" { - token, err := n.state.ACLTokenBySecretID(nil, req.AuthToken) - if err != nil { - n.logger.Error("error looking up ACL token from drain update", "error", err) - return fmt.Errorf("error looking up ACL token: %v", err) - } - if token == nil { - node, err := n.state.NodeBySecretID(nil, req.AuthToken) - if err != nil { - n.logger.Error("error looking up node for drain update", "error", err) - return fmt.Errorf("error looking up node for drain update: %v", err) - } - if node == nil { - n.logger.Error("token did not exist during node drain update") - return fmt.Errorf("token did not exist during node drain update") - } - accessorId = node.ID - } else { - accessorId = token.AccessorID - } - } - if err := n.state.UpdateNodeDrain(reqType, index, req.NodeID, req.DrainStrategy, req.MarkEligible, req.UpdatedAt, - req.NodeEvent, req.Meta, accessorId); err != nil { + req.NodeEvent, req.Meta, req.UpdatedBy); err != nil { n.logger.Error("UpdateNodeDrain failed", "error", err) return err } diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 581a1a76930d..fa1152fe602e 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -713,7 +713,10 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, return fmt.Errorf("node event must not be set") } - // Look for the node + // The AuthenticatedIdentity is unexported so won't be written via + // Raft. Record the identity string so it can be written to LastDrain + args.UpdatedBy = args.GetIdentity().String() + snap, err := n.srv.fsm.State().Snapshot() if err != nil { return err diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index f628d6058d45..b3b88125ac2b 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -1166,10 +1166,11 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.Equal(NodeDrainEventDrainSet, out.Events[1].Message) require.NotNil(out.LastDrain) require.Equal(structs.DrainMetadata{ - StartedAt: out.LastDrain.UpdatedAt, - UpdatedAt: out.LastDrain.StartedAt, - Status: structs.DrainStatusDraining, - Meta: map[string]string{"message": "this node is not needed"}, + StartedAt: out.LastDrain.UpdatedAt, + UpdatedAt: out.LastDrain.StartedAt, + Status: structs.DrainStatusDraining, + Meta: map[string]string{"message": "this node is not needed"}, + AccessorID: "token:acls-disabled", }, *out.LastDrain) // before+deadline should be before the forced deadline @@ -1213,10 +1214,11 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.NotNil(out.LastDrain) require.False(out.LastDrain.UpdatedAt.Before(out.LastDrain.StartedAt)) require.Equal(structs.DrainMetadata{ - StartedAt: out.LastDrain.StartedAt, - UpdatedAt: out.LastDrain.UpdatedAt, - Status: structs.DrainStatusCanceled, - Meta: map[string]string{"cancelled": "yes"}, + StartedAt: out.LastDrain.StartedAt, + UpdatedAt: out.LastDrain.UpdatedAt, + Status: structs.DrainStatusCanceled, + Meta: map[string]string{"cancelled": "yes"}, + AccessorID: "token:acls-disabled", }, *out.LastDrain) // Check that calling UpdateDrain with the same DrainStrategy does not emit @@ -1281,10 +1283,11 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { require.NotNil(out.LastDrain) firstDrainUpdate := out.LastDrain.UpdatedAt require.Equal(structs.DrainMetadata{ - StartedAt: firstDrainUpdate, - UpdatedAt: firstDrainUpdate, - Status: structs.DrainStatusDraining, - Meta: map[string]string{"message": "first drain"}, + StartedAt: firstDrainUpdate, + UpdatedAt: firstDrainUpdate, + Status: structs.DrainStatusDraining, + Meta: map[string]string{"message": "first drain"}, + AccessorID: "token:acls-disabled", }, *out.LastDrain) time.Sleep(1 * time.Second) @@ -1302,10 +1305,11 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { secondDrainUpdate := out.LastDrain.UpdatedAt require.True(secondDrainUpdate.After(firstDrainUpdate)) require.Equal(structs.DrainMetadata{ - StartedAt: firstDrainUpdate, - UpdatedAt: secondDrainUpdate, - Status: structs.DrainStatusDraining, - Meta: map[string]string{"message": "second drain"}, + StartedAt: firstDrainUpdate, + UpdatedAt: secondDrainUpdate, + Status: structs.DrainStatusDraining, + Meta: map[string]string{"message": "second drain"}, + AccessorID: "token:acls-disabled", }, *out.LastDrain) time.Sleep(1 * time.Second) @@ -1328,10 +1332,11 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { require.True(out.LastDrain.UpdatedAt.After(secondDrainUpdate)) require.Equal(structs.DrainMetadata{ - StartedAt: firstDrainUpdate, - UpdatedAt: out.LastDrain.UpdatedAt, - Status: structs.DrainStatusComplete, - Meta: map[string]string{"message": "second drain"}, + StartedAt: firstDrainUpdate, + UpdatedAt: out.LastDrain.UpdatedAt, + Status: structs.DrainStatusComplete, + Meta: map[string]string{"message": "second drain"}, + AccessorID: "token:acls-disabled", }, *out.LastDrain) } @@ -1456,7 +1461,7 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") out, err := state.NodeByID(nil, node.ID) require.NoError(err) - require.Equal(validToken.AccessorID, out.LastDrain.AccessorID) + require.Equal("token:"+validToken.AccessorID, out.LastDrain.AccessorID) } // Try with a invalid token @@ -1476,7 +1481,7 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") out, err := state.NodeByID(nil, node.ID) require.NoError(err) - require.Equal(root.AccessorID, out.LastDrain.AccessorID) + require.Equal("token:"+root.AccessorID, out.LastDrain.AccessorID) } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index ef9bb1522130..80a4441212cf 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -637,6 +637,10 @@ type NodeUpdateDrainRequest struct { // Meta is user-provided metadata relating to the drain operation Meta map[string]string + // UpdatedBy represents the AuthenticatedIdentity of the request, so that we + // can record it in the LastDrain data without re-authenticating in the FSM. + UpdatedBy string + WriteRequest }