From a92461cdc9fd6805ebf9d8a17587d49768f4f54a Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 11 Oct 2023 10:59:31 -0400 Subject: [PATCH] auth: add server-only ACL (#18715) * auth: add server-only ACL The RPC handlers expect to see `nil` ACL objects whenever ACLs are disabled. By using `nil` as a sentinel value, we have the risk of nil pointer exceptions and improper handling of `nil` when returned from our various auth methods that can lead to privilege escalation bugs. This is the second in a series to eliminate the use of `nil` ACLs as a sentinel value for when ACLs are disabled. This patch involves creating a new "virtual" ACL object for checking permissions on server operations and a matching `AuthenticateServerOnly` method for server-only RPCs that can produce that ACL. Ref: https://github.com/hashicorp/nomad-enterprise/pull/1218 Ref: https://github.com/hashicorp/nomad/pull/18703 --- .semgrep/rpc_endpoint.yml | 18 +++++ acl/acl.go | 23 +++++++ acl/acl_test.go | 4 ++ acl/virtual.go | 16 +++++ nomad/acl.go | 4 ++ nomad/auth/auth.go | 64 ++++++++++++++++++ nomad/auth/auth_test.go | 79 ++++++++++++++++++++++ nomad/client_csi_endpoint.go | 13 ++-- nomad/deployment_endpoint.go | 15 ++--- nomad/encrypter_test.go | 7 +- nomad/eval_endpoint.go | 106 ++++++++++------------------- nomad/keyring_endpoint.go | 29 +++----- nomad/keyring_endpoint_test.go | 13 ++-- nomad/plan_endpoint.go | 15 ++--- nomad/rpc_test.go | 119 ++++++++++++++++----------------- 15 files changed, 338 insertions(+), 187 deletions(-) create mode 100644 acl/virtual.go diff --git a/.semgrep/rpc_endpoint.yml b/.semgrep/rpc_endpoint.yml index 98e5e503a27..5fc8f22ebbd 100644 --- a/.semgrep/rpc_endpoint.yml +++ b/.semgrep/rpc_endpoint.yml @@ -22,6 +22,24 @@ rules: ... ... := $A.$B.ResolveACL(...) ... + + # Pattern used by endpoints that are used only for server-to-server. The + # authentication and authorization check must be done together before + # forwarding to prevent the risk of confused deputy when RPCs are + # forwarded. + - pattern-not-inside: | + + aclObj, err := $A.srv.AuthenticateServerOnly($A.ctx, args) + ... + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied + } + + if done, err := $A.srv.forward($METHOD, ...); done { + return err + } + ... + # Pattern used by endpoints that are used by both ACLs and Clients. # These endpoints will always have a ctx passed to Authenticate - pattern-not-inside: | diff --git a/acl/acl.go b/acl/acl.go index 4bfd8408a0a..a791dc576c1 100644 --- a/acl/acl.go +++ b/acl/acl.go @@ -75,6 +75,11 @@ type ACL struct { operator string quota string plugin string + + // The attributes below detail a virtual policy that we never expose + // directly to the end user. + server string + isLeader bool } // maxPrivilege returns the policy which grants the most privilege @@ -296,6 +301,9 @@ func NewACL(management bool, policies []*Policy) (*ACL, error) { acl.variables = svTxn.Commit() acl.wildcardVariables = wsvTxn.Commit() + acl.server = PolicyDeny + acl.isLeader = false + return acl, nil } @@ -723,6 +731,11 @@ func (a *ACL) AllowNodeRead() bool { return true case a.node == PolicyRead: return true + case a.server == PolicyRead, + a.server == PolicyWrite: + return true + case a.isLeader: + return true default: return false } @@ -824,6 +837,16 @@ func (a *ACL) AllowPluginList() bool { } } +// AllowServerOp checks if server-only operations are allowed +func (a *ACL) AllowServerOp() bool { + if a == nil { + // ACL is nil only if ACLs are disabled + // TODO(tgross): return false when there are no nil ACLs + return true + } + return a.server != PolicyDeny || a.isLeader +} + // IsManagement checks if this represents a management token func (a *ACL) IsManagement() bool { return a.management diff --git a/acl/acl_test.go b/acl/acl_test.go index d6d21f0b7c1..260daa10067 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -99,6 +99,7 @@ func TestACLManagement(t *testing.T) { must.True(t, acl.AllowOperatorWrite()) must.True(t, acl.AllowQuotaRead()) must.True(t, acl.AllowQuotaWrite()) + must.True(t, acl.AllowServerOp()) } func TestACLMerge(t *testing.T) { @@ -141,6 +142,7 @@ func TestACLMerge(t *testing.T) { must.True(t, acl.AllowOperatorWrite()) must.True(t, acl.AllowQuotaRead()) must.True(t, acl.AllowQuotaWrite()) + must.False(t, acl.AllowServerOp()) // Merge read + blank p3, err := Parse("") @@ -175,6 +177,7 @@ func TestACLMerge(t *testing.T) { must.False(t, acl.AllowOperatorWrite()) must.True(t, acl.AllowQuotaRead()) must.False(t, acl.AllowQuotaWrite()) + must.False(t, acl.AllowServerOp()) // Merge read + deny p4, err := Parse(denyAll) @@ -209,6 +212,7 @@ func TestACLMerge(t *testing.T) { must.False(t, acl.AllowOperatorWrite()) must.False(t, acl.AllowQuotaRead()) must.False(t, acl.AllowQuotaWrite()) + must.False(t, acl.AllowServerOp()) } var readAll = ` diff --git a/acl/virtual.go b/acl/virtual.go new file mode 100644 index 00000000000..e9fd58813a6 --- /dev/null +++ b/acl/virtual.go @@ -0,0 +1,16 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package acl + +var ServerACL = initServerACL() + +func initServerACL() *ACL { + aclObj, err := NewACL(false, []*Policy{}) + if err != nil { + panic(err) + } + aclObj.agent = PolicyRead + aclObj.server = PolicyWrite + return aclObj +} diff --git a/nomad/acl.go b/nomad/acl.go index 40e3c3b2226..22cb7c65e69 100644 --- a/nomad/acl.go +++ b/nomad/acl.go @@ -12,6 +12,10 @@ func (srv *Server) Authenticate(ctx *RPCContext, args structs.RequestWithIdentit return srv.auth.Authenticate(ctx, args) } +func (srv *Server) AuthenticateServerOnly(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) { + return srv.auth.AuthenticateServerOnly(ctx, args) +} + func (srv *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) { return srv.auth.ResolveACL(args) } diff --git a/nomad/auth/auth.go b/nomad/auth/auth.go index 1055b70e929..69756839a8d 100644 --- a/nomad/auth/auth.go +++ b/nomad/auth/auth.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "net" + "strings" "time" "github.com/armon/go-metrics" @@ -204,6 +205,69 @@ func (s *Authenticator) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, return nil, nil } +// AuthenticateServerOnly returns an ACL object for use *only* with internal +// server-to-server RPCs. This should never be used for RPCs that serve HTTP +// endpoints or accept ACL tokens to avoid confused deputy attacks by making a +// request to a follower that's forwarded. +// +// The returned ACL object is always an acl.ServerACL but in the future this +// could be extended to allow servers to have jurisdiction over specific pools, +// etc. +func (s *Authenticator) AuthenticateServerOnly(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) { + + remoteIP, err := ctx.GetRemoteIP() // capture for metrics + if err != nil { + s.logger.Error("could not determine remote address", "error", err) + } + + identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP} + defer args.SetIdentity(identity) // always set the identity, even on errors + + if s.tlsEnabled && !ctx.IsStatic() { + tlsCert := ctx.Certificate() + if tlsCert == nil { + return nil, errors.New("missing certificate information") + } + + // set on the identity whether or not its valid for server RPC, so we + // can capture it for metrics + identity.TLSName = tlsCert.Subject.CommonName + + expected := "server." + s.region + ".nomad" + _, err := validateCertificateForName(tlsCert, expected) + if err != nil { + return nil, err + } + return acl.ServerACL, nil + } + + // Note: if servers had auth tokens like clients do, we would be able to + // verify them here and only return the server ACL for actual servers even + // if mTLS was disabled. Without mTLS, any request can spoof server RPCs. + // This is known and documented in the Security Model: + // https://developer.hashicorp.com/nomad/docs/concepts/security#requirements + return acl.ServerACL, nil +} + +// validateCertificateForName returns true if the certificate is valid +// for the given domain name. +func validateCertificateForName(cert *x509.Certificate, expectedName string) (bool, error) { + if cert == nil { + return false, nil + } + + validNames := []string{cert.Subject.CommonName} + validNames = append(validNames, cert.DNSNames...) + for _, valid := range validNames { + if expectedName == valid { + return true, nil + } + } + + return false, fmt.Errorf("invalid certificate, %s not in %s", + expectedName, strings.Join(validNames, ",")) +} + // ResolveACLForToken resolves an ACL from a token only. It should be used only // by Variables endpoints, which have additional implicit policies for their // claims so we can't wrap them up in ResolveACL. diff --git a/nomad/auth/auth_test.go b/nomad/auth/auth_test.go index f6c727ea131..8184c449eba 100644 --- a/nomad/auth/auth_test.go +++ b/nomad/auth/auth_test.go @@ -335,6 +335,85 @@ func TestAuthenticateDefault(t *testing.T) { } +func TestAuthenticateServerOnly(t *testing.T) { + ci.Parallel(t) + + testAuthenticator := func(t *testing.T, store *state.StateStore, + hasACLs, hasTLS bool) *Authenticator { + leaderACL := uuid.Generate() + return NewAuthenticator(&AuthenticatorConfig{ + StateFn: func() *state.StateStore { return store }, + Logger: testlog.HCLogger(t), + GetLeaderACLFn: func() string { return leaderACL }, + AclsEnabled: hasACLs, + TLSEnabled: hasTLS, + Region: "global", + Encrypter: nil, + }) + } + + testCases := []struct { + name string + testFn func(t *testing.T) + }{ + { + name: "no mTLS", + testFn: func(t *testing.T) { + ctx := newTestContext(t, noTLSCtx, "192.168.1.1") + args := &structs.GenericRequest{} + + store := testStateStore(t) + auth := testAuthenticator(t, store, true, false) + + aclObj, err := auth.AuthenticateServerOnly(ctx, args) + must.NoError(t, err) + must.NotNil(t, aclObj) + must.Eq(t, ":192.168.1.1", args.GetIdentity().String()) + must.True(t, aclObj.AllowServerOp()) + }, + }, + { + name: "with mTLS but client cert", + testFn: func(t *testing.T) { + ctx := newTestContext(t, "client.global.nomad", "192.168.1.1") + args := &structs.GenericRequest{} + + store := testStateStore(t) + auth := testAuthenticator(t, store, true, true) + + aclObj, err := auth.AuthenticateServerOnly(ctx, args) + must.EqError(t, err, + "invalid certificate, server.global.nomad not in client.global.nomad") + must.Eq(t, "client.global.nomad:192.168.1.1", args.GetIdentity().String()) + must.Nil(t, aclObj) + }, + }, + { + name: "with mTLS and server cert", + testFn: func(t *testing.T) { + ctx := newTestContext(t, "server.global.nomad", "192.168.1.1") + args := &structs.GenericRequest{} + + store := testStateStore(t) + auth := testAuthenticator(t, store, true, true) + + aclObj, err := auth.AuthenticateServerOnly(ctx, args) + must.NoError(t, err) + must.Eq(t, "server.global.nomad:192.168.1.1", args.GetIdentity().String()) + must.NotNil(t, aclObj) + must.True(t, aclObj.AllowServerOp()) + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tc.testFn(t) + }) + } + +} + func TestResolveACLToken(t *testing.T) { ci.Parallel(t) diff --git a/nomad/client_csi_endpoint.go b/nomad/client_csi_endpoint.go index 7965d9b1f6c..8f2faf943f1 100644 --- a/nomad/client_csi_endpoint.go +++ b/nomad/client_csi_endpoint.go @@ -175,12 +175,11 @@ func (a *ClientCSI) sendCSIControllerRPC(pluginID, method, fwdMethod, op string, // client requests aren't RequestWithIdentity, so we use a placeholder here // to populate the identity data for metrics identityReq := &structs.GenericRequest{} - authErr := a.srv.Authenticate(a.ctx, identityReq) + + aclObj, err := a.srv.AuthenticateServerOnly(a.ctx, identityReq) a.srv.MeasureRPCRate("client_csi", op, identityReq) - // only servers can send these client RPCs - err := validateTLSCertificateLevel(a.srv, a.ctx, tlsCertificateLevelServer) - if authErr != nil || err != nil { + if err != nil || !aclObj.AllowServerOp() { return structs.ErrPermissionDenied } @@ -250,12 +249,10 @@ func (a *ClientCSI) sendCSINodeRPC(nodeID, method, fwdMethod, op string, args an // client requests aren't RequestWithIdentity, so we use a placeholder here // to populate the identity data for metrics identityReq := &structs.GenericRequest{} - authErr := a.srv.Authenticate(a.ctx, identityReq) + aclObj, err := a.srv.AuthenticateServerOnly(a.ctx, identityReq) a.srv.MeasureRPCRate("client_csi", op, identityReq) - // only servers can send these client RPCs - err := validateTLSCertificateLevel(a.srv, a.ctx, tlsCertificateLevelServer) - if authErr != nil || err != nil { + if err != nil || !aclObj.AllowServerOp() { return structs.ErrPermissionDenied } diff --git a/nomad/deployment_endpoint.go b/nomad/deployment_endpoint.go index 46e270186a2..d8fd6497165 100644 --- a/nomad/deployment_endpoint.go +++ b/nomad/deployment_endpoint.go @@ -614,20 +614,15 @@ func (d *Deployment) Allocations(args *structs.DeploymentSpecificRequest, reply func (d *Deployment) Reap(args *structs.DeploymentDeleteRequest, reply *structs.GenericResponse) error { - authErr := d.srv.Authenticate(d.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(d.srv, d.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := d.srv.AuthenticateServerOnly(d.ctx, args) + d.srv.MeasureRPCRate("deployment", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := d.srv.forward("Deployment.Reap", args, args, reply); done { return err } - d.srv.MeasureRPCRate("deployment", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "deployment", "reap"}, time.Now()) // Update via Raft diff --git a/nomad/encrypter_test.go b/nomad/encrypter_test.go index 571bf7d8236..cf8383e3a98 100644 --- a/nomad/encrypter_test.go +++ b/nomad/encrypter_test.go @@ -81,7 +81,8 @@ func TestEncrypter_Restore(t *testing.T) { listReq := &structs.KeyringListRootKeyMetaRequest{ QueryOptions: structs.QueryOptions{ - Region: "global", + Region: "global", + AuthToken: rootToken.SecretID, }, } var listResp structs.KeyringListRootKeyMetaResponse @@ -119,6 +120,7 @@ func TestEncrypter_Restore(t *testing.T) { codec = rpcClient(t, srv2) // Verify we've restored all the keys from the old keystore + listReq.AuthToken = rootToken.SecretID require.Eventually(t, func() bool { msgpackrpc.CallWithCodec(codec, "Keyring.List", listReq, &listResp) @@ -130,7 +132,8 @@ func TestEncrypter_Restore(t *testing.T) { getReq := &structs.KeyringGetRootKeyRequest{ KeyID: keyMeta.KeyID, QueryOptions: structs.QueryOptions{ - Region: "global", + Region: "global", + AuthToken: rootToken.SecretID, }, } var getResp structs.KeyringGetRootKeyResponse diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go index 661de477e63..588f11cd324 100644 --- a/nomad/eval_endpoint.go +++ b/nomad/eval_endpoint.go @@ -120,20 +120,15 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest, func (e *Eval) Dequeue(args *structs.EvalDequeueRequest, reply *structs.EvalDequeueResponse) error { - authErr := e.srv.Authenticate(e.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := e.srv.AuthenticateServerOnly(e.ctx, args) + e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := e.srv.forward("Eval.Dequeue", args, args, reply); done { return err } - e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now()) // Ensure there is at least one scheduler @@ -234,20 +229,15 @@ func (e *Eval) getWaitIndex(namespace, job string, evalModifyIndex uint64) (uint func (e *Eval) Ack(args *structs.EvalAckRequest, reply *structs.GenericResponse) error { - authErr := e.srv.Authenticate(e.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := e.srv.AuthenticateServerOnly(e.ctx, args) + e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := e.srv.forward("Eval.Ack", args, args, reply); done { return err } - e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now()) // Ack the EvalID @@ -269,20 +259,15 @@ func (e *Eval) Ack(args *structs.EvalAckRequest, func (e *Eval) Nack(args *structs.EvalAckRequest, reply *structs.GenericResponse) error { - authErr := e.srv.Authenticate(e.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := e.srv.AuthenticateServerOnly(e.ctx, args) + e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := e.srv.forward("Eval.Nack", args, args, reply); done { return err } - e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now()) // Nack the EvalID @@ -296,20 +281,15 @@ func (e *Eval) Nack(args *structs.EvalAckRequest, func (e *Eval) Update(args *structs.EvalUpdateRequest, reply *structs.GenericResponse) error { - authErr := e.srv.Authenticate(e.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := e.srv.AuthenticateServerOnly(e.ctx, args) + e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := e.srv.forward("Eval.Update", args, args, reply); done { return err } - e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now()) // Ensure there is only a single update with token @@ -338,20 +318,15 @@ func (e *Eval) Update(args *structs.EvalUpdateRequest, func (e *Eval) Create(args *structs.EvalUpdateRequest, reply *structs.GenericResponse) error { - authErr := e.srv.Authenticate(e.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := e.srv.AuthenticateServerOnly(e.ctx, args) + e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := e.srv.forward("Eval.Create", args, args, reply); done { return err } - e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now()) // Ensure there is only a single update with token @@ -394,21 +369,15 @@ func (e *Eval) Create(args *structs.EvalUpdateRequest, // Reblock is used to reinsert an existing blocked evaluation into the blocked // evaluation tracker. func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericResponse) error { - - authErr := e.srv.Authenticate(e.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := e.srv.AuthenticateServerOnly(e.ctx, args) + e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := e.srv.forward("Eval.Reblock", args, args, reply); done { return err } - e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now()) // Ensure there is only a single update with token @@ -449,20 +418,15 @@ func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericRe func (e *Eval) Reap(args *structs.EvalReapRequest, reply *structs.GenericResponse) error { - authErr := e.srv.Authenticate(e.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := e.srv.AuthenticateServerOnly(e.ctx, args) + e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := e.srv.forward("Eval.Reap", args, args, reply); done { return err } - e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now()) // Update via Raft diff --git a/nomad/keyring_endpoint.go b/nomad/keyring_endpoint.go index 4bae3e38e21..6cf91410e05 100644 --- a/nomad/keyring_endpoint.go +++ b/nomad/keyring_endpoint.go @@ -112,16 +112,10 @@ func (k *Keyring) List(args *structs.KeyringListRootKeyMetaRequest, reply *struc defer metrics.MeasureSince([]string{"nomad", "keyring", "list"}, time.Now()) - // we need to allow both humans with management tokens and - // non-leader servers to list keys, in order to support - // replication - err := validateTLSCertificateLevel(k.srv, k.ctx, tlsCertificateLevelServer) - if err != nil { - if aclObj, err := k.srv.ResolveACL(args); err != nil { - return err - } else if aclObj != nil && !aclObj.IsManagement() { - return structs.ErrPermissionDenied - } + if aclObj, err := k.srv.ResolveACL(args); err != nil { + return err + } else if aclObj != nil && !aclObj.IsManagement() { + return structs.ErrPermissionDenied } // Setup the blocking query @@ -239,20 +233,17 @@ func (k *Keyring) validateUpdate(args *structs.KeyringUpdateRootKeyRequest) erro // key material and metadata. It is used only for replication. func (k *Keyring) Get(args *structs.KeyringGetRootKeyRequest, reply *structs.KeyringGetRootKeyResponse) error { - authErr := k.srv.Authenticate(k.ctx, args) + // TODO(tgross): this should use the replication token, not cert check + aclObj, err := k.srv.AuthenticateServerOnly(k.ctx, args) + k.srv.MeasureRPCRate("keyring", structs.RateMetricRead, args) - // ensure that only another server can make this request - err := validateTLSCertificateLevel(k.srv, k.ctx, tlsCertificateLevelServer) - if err != nil { - return err + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := k.srv.forward("Keyring.Get", args, args, reply); done { return err } - k.srv.MeasureRPCRate("keyring", structs.RateMetricRead, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "keyring", "get"}, time.Now()) if args.KeyID == "" { diff --git a/nomad/keyring_endpoint_test.go b/nomad/keyring_endpoint_test.go index a2c702f6106..b758a00fc78 100644 --- a/nomad/keyring_endpoint_test.go +++ b/nomad/keyring_endpoint_test.go @@ -49,7 +49,7 @@ func TestKeyringEndpoint_CRUD(t *testing.T) { require.NoError(t, err) require.NotEqual(t, uint64(0), updateResp.Index) - // Get and List don't need a token here because they rely on mTLS role verification + // Get doesn't need a token here because it uses mTLS role verification getReq := &structs.KeyringGetRootKeyRequest{ KeyID: id, QueryOptions: structs.QueryOptions{Region: "global"}, @@ -62,7 +62,7 @@ func TestKeyringEndpoint_CRUD(t *testing.T) { require.Equal(t, structs.EncryptionAlgorithmAES256GCM, getResp.Key.Meta.Algorithm) // Make a blocking query for List and wait for an Update. Note - // that List/Get queries don't need ACL tokens in the test server + // that Get queries don't need ACL tokens in the test server // because they always pass the mTLS check var wg sync.WaitGroup @@ -76,6 +76,7 @@ func TestKeyringEndpoint_CRUD(t *testing.T) { QueryOptions: structs.QueryOptions{ Region: "global", MinQueryIndex: getResp.Index, + AuthToken: rootToken.SecretID, }, } err = msgpackrpc.CallWithCodec(codec, "Keyring.List", listReq, &listResp) @@ -117,7 +118,10 @@ func TestKeyringEndpoint_CRUD(t *testing.T) { require.Greater(t, delResp.Index, getResp.Index) listReq := &structs.KeyringListRootKeyMetaRequest{ - QueryOptions: structs.QueryOptions{Region: "global"}, + QueryOptions: structs.QueryOptions{ + Region: "global", + AuthToken: rootToken.SecretID, + }, } err = msgpackrpc.CallWithCodec(codec, "Keyring.List", listReq, &listResp) require.NoError(t, err) @@ -265,7 +269,8 @@ func TestKeyringEndpoint_Rotate(t *testing.T) { listReq := &structs.KeyringListRootKeyMetaRequest{ QueryOptions: structs.QueryOptions{ - Region: "global", + Region: "global", + AuthToken: rootToken.SecretID, }, } var listResp structs.KeyringListRootKeyMetaResponse diff --git a/nomad/plan_endpoint.go b/nomad/plan_endpoint.go index e7f0ecec7b6..465481d1ff9 100644 --- a/nomad/plan_endpoint.go +++ b/nomad/plan_endpoint.go @@ -27,20 +27,15 @@ func NewPlanEndpoint(srv *Server, ctx *RPCContext) *Plan { // Submit is used to submit a plan to the leader func (p *Plan) Submit(args *structs.PlanRequest, reply *structs.PlanResponse) error { - authErr := p.srv.Authenticate(p.ctx, args) - - // Ensure the connection was initiated by another server if TLS is used. - err := validateTLSCertificateLevel(p.srv, p.ctx, tlsCertificateLevelServer) - if err != nil { - return err + aclObj, err := p.srv.AuthenticateServerOnly(p.ctx, args) + p.srv.MeasureRPCRate("plan", structs.RateMetricWrite, args) + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied } + if done, err := p.srv.forward("Plan.Submit", args, args, reply); done { return err } - p.srv.MeasureRPCRate("plan", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "plan", "submit"}, time.Now()) if args.Plan == nil { diff --git a/nomad/rpc_test.go b/nomad/rpc_test.go index 8aed8ec45e4..4ab9717865a 100644 --- a/nomad/rpc_test.go +++ b/nomad/rpc_test.go @@ -1203,99 +1203,93 @@ func TestRPC_TLS_Enforcement_RPC(t *testing.T) { // Some endpoints can only be called server -> server // Some endpoints can only be called client -> server cases := []struct { - name string - cn string - rpcs map[string]any - canRPC bool + name string + cn string + rpcs map[string]any + expectErr string }{ // Local server. { - name: "local server/standard rpc", - cn: "server.global.nomad", - rpcs: standardRPCs, - canRPC: true, + name: "local server/standard rpc", + cn: "server.global.nomad", + rpcs: standardRPCs, }, { - name: "local server/servers only rpc", - cn: "server.global.nomad", - rpcs: localServersOnlyRPCs, - canRPC: true, + name: "local server/servers only rpc", + cn: "server.global.nomad", + rpcs: localServersOnlyRPCs, }, { - name: "local server/clients only rpc", - cn: "server.global.nomad", - rpcs: localClientsOnlyRPCs, - canRPC: true, + name: "local server/clients only rpc", + cn: "server.global.nomad", + rpcs: localClientsOnlyRPCs, }, // Local client. { - name: "local client/standard rpc", - cn: "client.global.nomad", - rpcs: standardRPCs, - canRPC: true, + name: "local client/standard rpc", + cn: "client.global.nomad", + rpcs: standardRPCs, }, { - name: "local client/servers only rpc", - cn: "client.global.nomad", - rpcs: localServersOnlyRPCs, - canRPC: false, + name: "local client/servers only rpc", + cn: "client.global.nomad", + rpcs: localServersOnlyRPCs, + expectErr: "(Permission denied|broken pipe)", }, { - name: "local client/clients only rpc", - cn: "client.global.nomad", - rpcs: localClientsOnlyRPCs, - canRPC: true, + name: "local client/clients only rpc", + cn: "client.global.nomad", + rpcs: localClientsOnlyRPCs, }, // Other region server. { - name: "other region server/standard rpc", - cn: "server.other.nomad", - rpcs: standardRPCs, - canRPC: true, + name: "other region server/standard rpc", + cn: "server.other.nomad", + rpcs: standardRPCs, }, { - name: "other region server/servers only rpc", - cn: "server.other.nomad", - rpcs: localServersOnlyRPCs, - canRPC: false, + name: "other region server/servers only rpc", + cn: "server.other.nomad", + rpcs: localServersOnlyRPCs, + expectErr: "(Permission denied|broken pipe)", }, { - name: "other region server/clients only rpc", - cn: "server.other.nomad", - rpcs: localClientsOnlyRPCs, - canRPC: false, + name: "other region server/clients only rpc", + cn: "server.other.nomad", + rpcs: localClientsOnlyRPCs, + expectErr: "(certificate|broken pipe)", }, // Other region client. { - name: "other region client/standard rpc", - cn: "client.other.nomad", - rpcs: standardRPCs, - canRPC: false, + name: "other region client/standard rpc", + cn: "client.other.nomad", + rpcs: standardRPCs, + expectErr: "(certificate|broken pipe)", }, { - name: "other region client/servers only rpc", - cn: "client.other.nomad", - rpcs: localServersOnlyRPCs, - canRPC: false, + name: "other region client/servers only rpc", + cn: "client.other.nomad", + rpcs: localServersOnlyRPCs, + expectErr: "(certificate|broken pipe)", }, { - name: "other region client/clients only rpc", - cn: "client.other.nomad", - rpcs: localClientsOnlyRPCs, - canRPC: false, + name: "other region client/clients only rpc", + cn: "client.other.nomad", + rpcs: localClientsOnlyRPCs, + expectErr: "(certificate|broken pipe)", }, // Wrong certs. { - name: "irrelevant cert", - cn: "nomad.example.com", - rpcs: standardRPCs, - canRPC: false, + name: "irrelevant cert", + cn: "nomad.example.com", + rpcs: standardRPCs, + expectErr: "(certificate|broken pipe)", }, { - name: "globs", - cn: "*.global.nomad", - rpcs: standardRPCs, - canRPC: false, + name: "globs", + cn: "*.global.nomad", + rpcs: standardRPCs, + expectErr: "(certificate|broken pipe)", }, {}, } @@ -1318,7 +1312,7 @@ func TestRPC_TLS_Enforcement_RPC(t *testing.T) { t.Run(name, func(t *testing.T) { err := tlsHelper.nomadRPC(t, srv, cfg, method, arg) - if tc.canRPC { + if tc.expectErr == "" { if err != nil { // note: lots of these RPCs will return // validation errors after connection b/c we're @@ -1332,8 +1326,7 @@ func TestRPC_TLS_Enforcement_RPC(t *testing.T) { // we immediately write on the pipe that was just // closed by the client must.Error(t, err) - must.RegexMatch(t, - regexp.MustCompile("(certificate|broken pipe)"), err.Error()) + must.RegexMatch(t, regexp.MustCompile(tc.expectErr), err.Error()) } }) }