Skip to content

Commit

Permalink
auth: add server-only ACL (#18715)
Browse files Browse the repository at this point in the history
* auth: add server-only ACL

The RPC handlers expect to see `nil` ACL objects whenever ACLs are disabled. By
using `nil` as a sentinel value, we have the risk of nil pointer exceptions and
improper handling of `nil` when returned from our various auth methods that can
lead to privilege escalation bugs. This is the second in a series to eliminate
the use of `nil` ACLs as a sentinel value for when ACLs are disabled.

This patch involves creating a new "virtual" ACL object for checking permissions
on server operations and a matching `AuthenticateServerOnly` method for
server-only RPCs that can produce that ACL.

Ref: hashicorp/nomad-enterprise#1218
Ref: #18703
  • Loading branch information
tgross authored Oct 11, 2023
1 parent 7ca619f commit a92461c
Show file tree
Hide file tree
Showing 15 changed files with 338 additions and 187 deletions.
18 changes: 18 additions & 0 deletions .semgrep/rpc_endpoint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,24 @@ rules:
...
... := $A.$B.ResolveACL(...)
...
# Pattern used by endpoints that are used only for server-to-server. The
# authentication and authorization check must be done together before
# forwarding to prevent the risk of confused deputy when RPCs are
# forwarded.
- pattern-not-inside: |
aclObj, err := $A.srv.AuthenticateServerOnly($A.ctx, args)
...
if err != nil || !aclObj.AllowServerOp() {
return structs.ErrPermissionDenied
}
if done, err := $A.srv.forward($METHOD, ...); done {
return err
}
...
# Pattern used by endpoints that are used by both ACLs and Clients.
# These endpoints will always have a ctx passed to Authenticate
- pattern-not-inside: |
Expand Down
23 changes: 23 additions & 0 deletions acl/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ type ACL struct {
operator string
quota string
plugin string

// The attributes below detail a virtual policy that we never expose
// directly to the end user.
server string
isLeader bool
}

// maxPrivilege returns the policy which grants the most privilege
Expand Down Expand Up @@ -296,6 +301,9 @@ func NewACL(management bool, policies []*Policy) (*ACL, error) {
acl.variables = svTxn.Commit()
acl.wildcardVariables = wsvTxn.Commit()

acl.server = PolicyDeny
acl.isLeader = false

return acl, nil
}

Expand Down Expand Up @@ -723,6 +731,11 @@ func (a *ACL) AllowNodeRead() bool {
return true
case a.node == PolicyRead:
return true
case a.server == PolicyRead,
a.server == PolicyWrite:
return true
case a.isLeader:
return true
default:
return false
}
Expand Down Expand Up @@ -824,6 +837,16 @@ func (a *ACL) AllowPluginList() bool {
}
}

// AllowServerOp checks if server-only operations are allowed
func (a *ACL) AllowServerOp() bool {
if a == nil {
// ACL is nil only if ACLs are disabled
// TODO(tgross): return false when there are no nil ACLs
return true
}
return a.server != PolicyDeny || a.isLeader
}

// IsManagement checks if this represents a management token
func (a *ACL) IsManagement() bool {
return a.management
Expand Down
4 changes: 4 additions & 0 deletions acl/acl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func TestACLManagement(t *testing.T) {
must.True(t, acl.AllowOperatorWrite())
must.True(t, acl.AllowQuotaRead())
must.True(t, acl.AllowQuotaWrite())
must.True(t, acl.AllowServerOp())
}

func TestACLMerge(t *testing.T) {
Expand Down Expand Up @@ -141,6 +142,7 @@ func TestACLMerge(t *testing.T) {
must.True(t, acl.AllowOperatorWrite())
must.True(t, acl.AllowQuotaRead())
must.True(t, acl.AllowQuotaWrite())
must.False(t, acl.AllowServerOp())

// Merge read + blank
p3, err := Parse("")
Expand Down Expand Up @@ -175,6 +177,7 @@ func TestACLMerge(t *testing.T) {
must.False(t, acl.AllowOperatorWrite())
must.True(t, acl.AllowQuotaRead())
must.False(t, acl.AllowQuotaWrite())
must.False(t, acl.AllowServerOp())

// Merge read + deny
p4, err := Parse(denyAll)
Expand Down Expand Up @@ -209,6 +212,7 @@ func TestACLMerge(t *testing.T) {
must.False(t, acl.AllowOperatorWrite())
must.False(t, acl.AllowQuotaRead())
must.False(t, acl.AllowQuotaWrite())
must.False(t, acl.AllowServerOp())
}

var readAll = `
Expand Down
16 changes: 16 additions & 0 deletions acl/virtual.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package acl

var ServerACL = initServerACL()

func initServerACL() *ACL {
aclObj, err := NewACL(false, []*Policy{})
if err != nil {
panic(err)
}
aclObj.agent = PolicyRead
aclObj.server = PolicyWrite
return aclObj
}
4 changes: 4 additions & 0 deletions nomad/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ func (srv *Server) Authenticate(ctx *RPCContext, args structs.RequestWithIdentit
return srv.auth.Authenticate(ctx, args)
}

func (srv *Server) AuthenticateServerOnly(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.AuthenticateServerOnly(ctx, args)
}

func (srv *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.ResolveACL(args)
}
Expand Down
64 changes: 64 additions & 0 deletions nomad/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"net"
"strings"
"time"

"github.com/armon/go-metrics"
Expand Down Expand Up @@ -204,6 +205,69 @@ func (s *Authenticator) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL,
return nil, nil
}

// AuthenticateServerOnly returns an ACL object for use *only* with internal
// server-to-server RPCs. This should never be used for RPCs that serve HTTP
// endpoints or accept ACL tokens to avoid confused deputy attacks by making a
// request to a follower that's forwarded.
//
// The returned ACL object is always an acl.ServerACL but in the future this
// could be extended to allow servers to have jurisdiction over specific pools,
// etc.
func (s *Authenticator) AuthenticateServerOnly(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {

remoteIP, err := ctx.GetRemoteIP() // capture for metrics
if err != nil {
s.logger.Error("could not determine remote address", "error", err)
}

identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP}
defer args.SetIdentity(identity) // always set the identity, even on errors

if s.tlsEnabled && !ctx.IsStatic() {
tlsCert := ctx.Certificate()
if tlsCert == nil {
return nil, errors.New("missing certificate information")
}

// set on the identity whether or not its valid for server RPC, so we
// can capture it for metrics
identity.TLSName = tlsCert.Subject.CommonName

expected := "server." + s.region + ".nomad"
_, err := validateCertificateForName(tlsCert, expected)
if err != nil {
return nil, err
}
return acl.ServerACL, nil
}

// Note: if servers had auth tokens like clients do, we would be able to
// verify them here and only return the server ACL for actual servers even
// if mTLS was disabled. Without mTLS, any request can spoof server RPCs.
// This is known and documented in the Security Model:
// https://developer.hashicorp.com/nomad/docs/concepts/security#requirements
return acl.ServerACL, nil
}

// validateCertificateForName returns true if the certificate is valid
// for the given domain name.
func validateCertificateForName(cert *x509.Certificate, expectedName string) (bool, error) {
if cert == nil {
return false, nil
}

validNames := []string{cert.Subject.CommonName}
validNames = append(validNames, cert.DNSNames...)
for _, valid := range validNames {
if expectedName == valid {
return true, nil
}
}

return false, fmt.Errorf("invalid certificate, %s not in %s",
expectedName, strings.Join(validNames, ","))
}

// ResolveACLForToken resolves an ACL from a token only. It should be used only
// by Variables endpoints, which have additional implicit policies for their
// claims so we can't wrap them up in ResolveACL.
Expand Down
79 changes: 79 additions & 0 deletions nomad/auth/auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,85 @@ func TestAuthenticateDefault(t *testing.T) {

}

func TestAuthenticateServerOnly(t *testing.T) {
ci.Parallel(t)

testAuthenticator := func(t *testing.T, store *state.StateStore,
hasACLs, hasTLS bool) *Authenticator {
leaderACL := uuid.Generate()
return NewAuthenticator(&AuthenticatorConfig{
StateFn: func() *state.StateStore { return store },
Logger: testlog.HCLogger(t),
GetLeaderACLFn: func() string { return leaderACL },
AclsEnabled: hasACLs,
TLSEnabled: hasTLS,
Region: "global",
Encrypter: nil,
})
}

testCases := []struct {
name string
testFn func(t *testing.T)
}{
{
name: "no mTLS",
testFn: func(t *testing.T) {
ctx := newTestContext(t, noTLSCtx, "192.168.1.1")
args := &structs.GenericRequest{}

store := testStateStore(t)
auth := testAuthenticator(t, store, true, false)

aclObj, err := auth.AuthenticateServerOnly(ctx, args)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.Eq(t, ":192.168.1.1", args.GetIdentity().String())
must.True(t, aclObj.AllowServerOp())
},
},
{
name: "with mTLS but client cert",
testFn: func(t *testing.T) {
ctx := newTestContext(t, "client.global.nomad", "192.168.1.1")
args := &structs.GenericRequest{}

store := testStateStore(t)
auth := testAuthenticator(t, store, true, true)

aclObj, err := auth.AuthenticateServerOnly(ctx, args)
must.EqError(t, err,
"invalid certificate, server.global.nomad not in client.global.nomad")
must.Eq(t, "client.global.nomad:192.168.1.1", args.GetIdentity().String())
must.Nil(t, aclObj)
},
},
{
name: "with mTLS and server cert",
testFn: func(t *testing.T) {
ctx := newTestContext(t, "server.global.nomad", "192.168.1.1")
args := &structs.GenericRequest{}

store := testStateStore(t)
auth := testAuthenticator(t, store, true, true)

aclObj, err := auth.AuthenticateServerOnly(ctx, args)
must.NoError(t, err)
must.Eq(t, "server.global.nomad:192.168.1.1", args.GetIdentity().String())
must.NotNil(t, aclObj)
must.True(t, aclObj.AllowServerOp())
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tc.testFn(t)
})
}

}

func TestResolveACLToken(t *testing.T) {
ci.Parallel(t)

Expand Down
13 changes: 5 additions & 8 deletions nomad/client_csi_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,11 @@ func (a *ClientCSI) sendCSIControllerRPC(pluginID, method, fwdMethod, op string,
// client requests aren't RequestWithIdentity, so we use a placeholder here
// to populate the identity data for metrics
identityReq := &structs.GenericRequest{}
authErr := a.srv.Authenticate(a.ctx, identityReq)

aclObj, err := a.srv.AuthenticateServerOnly(a.ctx, identityReq)
a.srv.MeasureRPCRate("client_csi", op, identityReq)

// only servers can send these client RPCs
err := validateTLSCertificateLevel(a.srv, a.ctx, tlsCertificateLevelServer)
if authErr != nil || err != nil {
if err != nil || !aclObj.AllowServerOp() {
return structs.ErrPermissionDenied
}

Expand Down Expand Up @@ -250,12 +249,10 @@ func (a *ClientCSI) sendCSINodeRPC(nodeID, method, fwdMethod, op string, args an
// client requests aren't RequestWithIdentity, so we use a placeholder here
// to populate the identity data for metrics
identityReq := &structs.GenericRequest{}
authErr := a.srv.Authenticate(a.ctx, identityReq)
aclObj, err := a.srv.AuthenticateServerOnly(a.ctx, identityReq)
a.srv.MeasureRPCRate("client_csi", op, identityReq)

// only servers can send these client RPCs
err := validateTLSCertificateLevel(a.srv, a.ctx, tlsCertificateLevelServer)
if authErr != nil || err != nil {
if err != nil || !aclObj.AllowServerOp() {
return structs.ErrPermissionDenied
}

Expand Down
15 changes: 5 additions & 10 deletions nomad/deployment_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,20 +614,15 @@ func (d *Deployment) Allocations(args *structs.DeploymentSpecificRequest, reply
func (d *Deployment) Reap(args *structs.DeploymentDeleteRequest,
reply *structs.GenericResponse) error {

authErr := d.srv.Authenticate(d.ctx, args)

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(d.srv, d.ctx, tlsCertificateLevelServer)
if err != nil {
return err
aclObj, err := d.srv.AuthenticateServerOnly(d.ctx, args)
d.srv.MeasureRPCRate("deployment", structs.RateMetricWrite, args)
if err != nil || !aclObj.AllowServerOp() {
return structs.ErrPermissionDenied
}

if done, err := d.srv.forward("Deployment.Reap", args, args, reply); done {
return err
}
d.srv.MeasureRPCRate("deployment", structs.RateMetricWrite, args)
if authErr != nil {
return structs.ErrPermissionDenied
}
defer metrics.MeasureSince([]string{"nomad", "deployment", "reap"}, time.Now())

// Update via Raft
Expand Down
7 changes: 5 additions & 2 deletions nomad/encrypter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ func TestEncrypter_Restore(t *testing.T) {

listReq := &structs.KeyringListRootKeyMetaRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
Region: "global",
AuthToken: rootToken.SecretID,
},
}
var listResp structs.KeyringListRootKeyMetaResponse
Expand Down Expand Up @@ -119,6 +120,7 @@ func TestEncrypter_Restore(t *testing.T) {
codec = rpcClient(t, srv2)

// Verify we've restored all the keys from the old keystore
listReq.AuthToken = rootToken.SecretID

require.Eventually(t, func() bool {
msgpackrpc.CallWithCodec(codec, "Keyring.List", listReq, &listResp)
Expand All @@ -130,7 +132,8 @@ func TestEncrypter_Restore(t *testing.T) {
getReq := &structs.KeyringGetRootKeyRequest{
KeyID: keyMeta.KeyID,
QueryOptions: structs.QueryOptions{
Region: "global",
Region: "global",
AuthToken: rootToken.SecretID,
},
}
var getResp structs.KeyringGetRootKeyResponse
Expand Down
Loading

0 comments on commit a92461c

Please sign in to comment.