Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rpc,security: allow setting the tenant ID via gRPC metadata #96153

Merged
merged 2 commits into from
Feb 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/rpc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ go_library(
"//pkg/security",
"//pkg/security/certnames",
"//pkg/security/username",
"//pkg/settings",
"//pkg/settings/cluster",
"//pkg/util/buildutil",
"//pkg/util/contextutil",
Expand All @@ -42,6 +43,7 @@ go_library(
"//pkg/util/grpcutil",
"//pkg/util/hlc",
"//pkg/util/log",
"//pkg/util/log/logcrash",
"//pkg/util/log/severity",
"//pkg/util/metric",
"//pkg/util/netutil",
Expand Down
161 changes: 140 additions & 21 deletions pkg/rpc/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@ package rpc
import (
"context"
"crypto/x509"
"fmt"

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/util/grpcutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
"github.com/cockroachdb/errors"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
Expand All @@ -41,6 +44,7 @@ func authErrorf(format string, a ...interface{}) error {
// validates that client TLS certificate provided by the incoming connection
// contains a sufficiently privileged user.
type kvAuth struct {
sv *settings.Values
tenant tenantAuthorizer
}

Expand Down Expand Up @@ -68,13 +72,9 @@ func (a kvAuth) unaryInterceptor(
return nil, err
}

// Enhance the context if the peer is a tenant server.
switch ar := authnRes.(type) {
case authnSuccessPeerIsTenantServer:
ctx = contextWithClientTenant(ctx, roachpb.TenantID(ar))
default:
ctx = contextWithoutClientTenant(ctx)
}
// Enhance the context to ensure the API handler only sees a client tenant ID
// via roachpb.ClientTenantFromContext when relevant.
ctx = contextForRequest(ctx, authnRes)

// Handle authorization according to the selected authz method.
switch ar := authz.(type) {
Expand Down Expand Up @@ -103,13 +103,9 @@ func (a kvAuth) streamInterceptor(
return err
}

// Enhance the context if the peer is a tenant server.
switch ar := authnRes.(type) {
case authnSuccessPeerIsTenantServer:
ctx = contextWithClientTenant(ctx, roachpb.TenantID(ar))
default:
ctx = contextWithoutClientTenant(ctx)
}
// Enhance the context to ensure the API handler only sees a client tenant ID
// via roachpb.ClientTenantFromContext when relevant.
ctx = contextForRequest(ctx, authnRes)

// Handle authorization according to the selected authz method.
switch ar := authz.(type) {
Expand Down Expand Up @@ -188,7 +184,10 @@ func (authnSuccessPeerIsTenantServer) authnResult() {}
func (authnSuccessPeerIsPrivileged) authnResult() {}

// authenticate verifies the credentials of the client and performs
// some consistency check with the information provided.
// some consistency check with the information provided. The caller
// should discard the original context.Context and use the new one;
// the function also consumes and strips some fields from the incoming
// gRPC metadata.MD to avoid confusion if/when the RPC gets forwarded.
func (a kvAuth) authenticate(ctx context.Context) (authnResult, error) {
var ar authnResult
if clientTenantID, localRequest := grpcutil.IsLocalRequestContext(ctx); localRequest {
Expand Down Expand Up @@ -219,14 +218,26 @@ func (a kvAuth) authenticate(ctx context.Context) (authnResult, error) {
return ar, nil
}

// Deal with local requests done through the
// internalClientAdapter. There's no TLS for these calls, so the
// regular authentication code path doesn't apply. The clientTenantID
// should be the result of a call to grpcutil.IsLocalRequestContext.
// Deal with local requests done through the internalClientAdapter.
// There's no TLS for these calls, so the regular authentication code
// path doesn't apply. The clientTenantID should be the result of a
// call to grpcutil.IsLocalRequestContext.
func (a kvAuth) authenticateLocalRequest(
ctx context.Context, clientTenantID roachpb.TenantID,
) (authnResult, error) {
if !clientTenantID.IsSet() || clientTenantID.IsSystem() {
// Sanity check: verify that we do not also have gRPC network credentials
// in the context. This would indicate that metadata was improperly propagated.
maybeTid, err := tenantIDFromRPCMetadata(ctx)
if err != nil || maybeTid.IsSet() {
logcrash.ReportOrPanic(ctx, a.sv, "programming error: network credentials in internal adapter request (%v, %v)", maybeTid, err)
return nil, authErrorf("programming error")
}

if !clientTenantID.IsSet() {
return authnSuccessPeerIsPrivileged{}, nil
}

if clientTenantID.IsSystem() {
return authnSuccessPeerIsPrivileged{}, nil
}

Expand All @@ -242,6 +253,11 @@ func (a kvAuth) authenticateNetworkRequest(ctx context.Context) (authnResult, er
return nil, err
}

tenantIDFromMetadata, err := tenantIDFromRPCMetadata(ctx)
if err != nil {
return nil, authErrorf("client provided invalid tenant ID: %v", err)
}

// Did the client peer use a tenant client cert?
if security.IsTenantCertificate(clientCert) {
// If the peer is using a client tenant cert, in any case we
Expand All @@ -250,7 +266,14 @@ func (a kvAuth) authenticateNetworkRequest(ctx context.Context) (authnResult, er
if err != nil {
return nil, err
}

// If the peer is using a TenantCertificate and also
// provided a tenant ID via gRPC metadata, they must
// match.
if tenantIDFromMetadata.IsSet() && tenantIDFromMetadata != tlsID {
return nil, authErrorf(
"client wants to authenticate as tenant %v, but is using TLS cert for tenant %v",
tenantIDFromMetadata, tlsID)
}
return authnSuccessPeerIsTenantServer(tlsID), nil
}

Expand All @@ -273,6 +296,9 @@ func (a kvAuth) authenticateNetworkRequest(ctx context.Context) (authnResult, er
return nil, err
}

if tenantIDFromMetadata.IsSet() {
return authnSuccessPeerIsTenantServer(tenantIDFromMetadata), nil
}
return authnSuccessPeerIsPrivileged{}, nil
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of the previous discussion is around it being hard to see what cases lead us to hit this case. I think part of the difficulty is tracking when wantedTenantID is set and unset. I wonder if a simple refactoring might help (warning: code written directly in browser). I still don't love it, but it is nice to be able to directly scan for return authnSuccessPeerIsPrivileged. Anyway, perhaps this is just pushing peas around the plate.

// authenticate verifies the credentials of the client and performs
// some consistency check with the information provided.
func (a kvAuth) authenticate(ctx context.Context) (authnResult, error) {
	var ar authnResult
	if clientTenantID, localRequest := grpcutil.IsLocalRequestContext(ctx); localRequest {
		var err error
		ar, err = authenticateLocalRequest(ctx, clientTenantID)
		if err != nil {
			return nil, err
		}
	} else {
		var err error
		ar, err = authenticateNeworkRequest(ctx)
		if err != nil {
			return nil, err
		}
	}

	switch res := ar.(type) {
	case authnSuccessPeerIsTenantServer:
		if res != a.tenant.tenantID {
			log.Ops.Infof(ctx, "rejected incoming request from tenant %d (misconfiguration?)", wantedTenantID)
			return nil, authErrorf("client tenant identity (%v) does not match server", wantedTenantID)
		}
	case authnSuccessPeerIsPrivileged:
	default:
		return nil, errors.AssertionFailedf("programming error: unhandled case %T", ar)
	}

	return ar, nil

}

// Deal with local requests done through the
// internalClientAdapter. There's no TLS for these calls, so the
// regular authentication code path doesn't apply. The clientTenantID
// should be the result of a call to grpcutil.IsLocalRequestContext.
func (a kvAuth) authenticateLocalRequest(ctx context.Context, clientTenantID roachpb.TenantID) (authnResult, error) {
	if clientTenantID.IsSystem() {
		return authnSuccessPeerIsPrivileged{}, nil
	}
	
	// Sanity check: verify that we do not also have gRPC network credentials
	// in the context. This would indicate that metadata was improperly propagated.
	maybeTid, err := getTenantIDFromNetworkCredentials(ctx)
	if err != nil || maybeTid.IsSet() {
		return nil, authErrorf("programming error: network credentials in internal adapter request (%v, %v)", maybeTid, err)
	}
	return authnSuccessPeerIsTenantServer(clientTenantID), nil
}

// authenticateRemoteRequest authenticates requests made over a TLS connection.
func (a kvAuth) authenticateRemoteRequest(ctx context.Context) (authnResult, error) {
	// We will need to look at the TLS cert in any case, so
	// extract it first.
	clientCert, err := getClientCert(ctx)
	if err != nil {
		return nil, err
	}
	tenantIDFromCredentials, err := getTenantIDFromNetworkCredentials(ctx)
	if err != nil {
		return nil, authErrorf("client provided invalid tenant ID: %v", err)
	}
	
	// Did the client peer use a tenant client cert?
	if security.IsTenantCertificate(clientCert) {
		// If the peer is using a client tenant cert, in any case we
		// validate the tenant ID stored in the CN for correctness.
		tlsID, err := tenantIDFromString(clientCert.Subject.CommonName, "Common Name (CN)")
		if err != nil {
			return nil, err
		}

		// If the peer is using a TenantCertificate and also
		// provided a tenant ID via gRPC metadata, they must
		// match.
		if tenantIDFromCredentials.IsSet() && tenantIDFromCredentials != tlsID {
			return nil, authErrorf(
				"client wants to authenticate as tenant %v, but is using TLS cert for tenant %v",
				tenantIDFromCredentials, tlsID)
		}
		return authnSuccessPeerIsTenantServer(tlsID), nil
	} else {
		// We are using TLS, but the peer is not using a client tenant
		// cert. In that case, we only allow RPCs if the principal is
		// 'node' or 'root' and the tenant scope in the cert matches
		// this server (either the cert has scope "global" or its scope
		// tenant ID matches our own).
		//
		// TODO(benesch): the vast majority of RPCs should be limited to just
		// NodeUser. This is not a security concern, as RootUser has access to
		// read and write all data, merely good hygiene. For example, there is
		// no reason to permit the root user to send raw Raft RPCs.
		certUserScope, err := security.GetCertificateUserScope(clientCert)
		if err != nil {
			return nil, err
		}
		if err := checkRootOrNodeInScope(certUserScope, a.tenant.tenantID); err != nil {
			return nil, err
		}
		
		if tenantIDFromCredentials.IsSet() {
			return authnSuccessPeerIsTenantServer(tlsID), nil
		} else {
			return authnSuccessPeerIsPrivileged{}, nil
		}
	}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not bad. Doing this simplification in a separate PR: #96338

}

Expand Down Expand Up @@ -350,3 +376,96 @@ func checkRootOrNodeInScope(
"need root or node client cert to perform RPCs on this server (this is tenant %v; cert is valid for %s)",
serverTenantID, security.FormatUserScopes(certUserScope))
}

// contextForRequest sets up the context.Context for use by
// the API handler. It covers two cases:
//
// - the request is coming from a secondary tenant.
// Then it uses roachpb.ContextWithTenantClient() to
// ensure that the API handler will find the tenant ID
// with roachpb.TenantClientFromContext().
// - the request is coming from the system tenant.
// then it clears the tenant client information
// to ensure that the API handler will _not_ find
// a tenant ID with roachpb.TenantClientFromContext().
//
// This latter case is important e.g. in the following scenario:
//
// SQL (a) -(network gRPC)-> KV (b) -(internal client adapter)-> KV (c)
//
// The authn in the call from (a) to (b) has added a tenant ID in the
// Go context for the handler at (b). This context.Context "pierces"
// the stack of calls in the internal client adapter, and thus the
// tenant ID is still present when the call is received at (c).
// However, we don't want the API handler at (c) to see it any more.
// So we need to remove it.
func contextForRequest(ctx context.Context, authnRes authnResult) context.Context {
switch ar := authnRes.(type) {
case authnSuccessPeerIsTenantServer:
// The simple context key will be used in various places via
// roachpb.ClientTenantFromContext(). This also adds a logging
// tag.
ctx = contextWithClientTenant(ctx, roachpb.TenantID(ar))
default:
// The caller is not a tenant server, but it may have been in the
// process of handling an API call for a tenant server and so it
// may have a client tenant ID in its context already. To ensure
// none will be found, we need to clear it explicitly.
ctx = contextWithoutClientTenant(ctx)
}
return ctx
}

// tenantClientCred is responsible for passing the tenant ID as
// medatada header to called RPCs. This makes it possible to pass the
// tenant ID even when using a different TLS cert than the "tenant
// client cert".
type tenantClientCred struct {
md map[string]string
}

// clientTIDMetadataHeaderKey is the gRPC metadata key that indicates
// which tenant ID the client is intending to connect as (originating
// tenant identity).
//
// This is used instead of the cert CN field when connecting with a
// TLS client cert that is not marked as special "tenant client cert"
// via the "Tenants" string in the OU field.
//
// This metadata item is not meant to be used beyond authentication;
// to access the client tenant ID inside RPC handlers or other code,
// use roachpb.ClientTenantFromContext() instead.
const clientTIDMetadataHeaderKey = "client-tid"

// newTenantClientCreds constructs a credentials.PerRPCCredentials
// which injects the client tenant ID as extra gRPC metadata in each
// RPC.
func newTenantClientCreds(tid roachpb.TenantID) credentials.PerRPCCredentials {
return &tenantClientCred{
md: map[string]string{
clientTIDMetadataHeaderKey: fmt.Sprint(tid),
},
}
}

// tenantIDFromRPCMetadata checks if there is a tenant ID in
// the incoming gRPC metadata.
func tenantIDFromRPCMetadata(ctx context.Context) (roachpb.TenantID, error) {
val, ok := grpcutil.FastFirstValueFromIncomingContext(ctx, clientTIDMetadataHeaderKey)
if !ok {
return roachpb.TenantID{}, nil
}
return tenantIDFromString(val, "gRPC metadata")
}

// GetRequestMetadata implements the (grpc)
// credentials.PerRPCCredentials interface.
func (tcc *tenantClientCred) GetRequestMetadata(
ctx context.Context, uri ...string,
) (map[string]string, error) {
return tcc.md, nil
}

// RequireTransportSecurity implements the (grpc)
// credentials.PerRPCCredentials interface.
func (tcc *tenantClientCred) RequireTransportSecurity() bool { return false }
2 changes: 1 addition & 1 deletion pkg/rpc/auth_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ func validateSpan(tenID roachpb.TenantID, sp roachpb.Span) error {
return checkSpanBounds(rSpan, tenSpan)
}

const tenantLoggingTag = "tenant"
const tenantLoggingTag = "client-tenant"

// contextWithClientTenant inserts a tenant identifier in the context,
// identifying the tenant that's the client for an RPC. The identifier can be
Expand Down
56 changes: 49 additions & 7 deletions pkg/rpc/auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,13 @@ func TestAuthenticateTenant(t *testing.T) {
stid := roachpb.SystemTenantID
tenTen := roachpb.MustMakeTenantID(10)
for _, tc := range []struct {
systemID roachpb.TenantID
ous []string
commonName string
expTenID roachpb.TenantID
expErr string
tenantScope uint64
systemID roachpb.TenantID
ous []string
commonName string
expTenID roachpb.TenantID
expErr string
tenantScope uint64
clientTenantInMD string
}{
{systemID: stid, ous: correctOU, commonName: "10", expTenID: tenTen},
{systemID: stid, ous: correctOU, commonName: roachpb.MinTenantID.String(), expTenID: roachpb.MinTenantID},
Expand All @@ -127,8 +128,44 @@ func TestAuthenticateTenant(t *testing.T) {
{systemID: tenTen, ous: correctOU, commonName: "1", expErr: `invalid tenant ID 1 in Common Name \(CN\)`},
{systemID: tenTen, ous: nil, commonName: "root"},
{systemID: tenTen, ous: nil, commonName: "node"},

// Passing a client ID in metadata instead of relying only on the TLS cert.
{clientTenantInMD: "invalid", expErr: `could not parse tenant ID from gRPC metadata`},
{clientTenantInMD: "1", expErr: `invalid tenant ID 1 in gRPC metadata`},
{clientTenantInMD: "-1", expErr: `could not parse tenant ID from gRPC metadata`},

// tenant ID in MD matches that in client cert.
// Server is KV node: expect tenant authorization.
{clientTenantInMD: "10",
systemID: stid, ous: correctOU, commonName: "10", expTenID: tenTen},
// tenant ID in MD doesn't match that in client cert.
{clientTenantInMD: "10",
systemID: stid, ous: correctOU, commonName: "123",
expErr: `client wants to authenticate as tenant 10, but is using TLS cert for tenant 123`},
// tenant ID present in MD, but not in client cert. However,
// client cert is valid. Use MD tenant ID.
// Server is KV node: expect tenant authorization.
{clientTenantInMD: "10",
systemID: stid, ous: nil, commonName: "root", expTenID: tenTen},
// tenant ID present in MD, but not in client cert. However,
// client cert is valid. Use MD tenant ID.
// Server is KV node: expect tenant authorization.
{clientTenantInMD: "10",
systemID: stid, ous: nil, commonName: "node", expTenID: tenTen},
// tenant ID present in MD, but not in client cert. However,
// client cert is valid. Use MD tenant ID.
// Server is secondary tenant: do not do additional tenant authorization.
{clientTenantInMD: "10",
systemID: tenTen, ous: nil, commonName: "root", expTenID: roachpb.TenantID{}},
{clientTenantInMD: "10",
systemID: tenTen, ous: nil, commonName: "node", expTenID: roachpb.TenantID{}},
// tenant ID present in MD, but not in client cert. Use MD tenant ID.
// Server tenant ID does not match client tenant ID.
{clientTenantInMD: "123",
systemID: tenTen, ous: nil, commonName: "root",
expErr: `client tenant identity \(123\) does not match server`},
} {
t.Run(fmt.Sprintf("from %v to %v", tc.commonName, tc.systemID), func(t *testing.T) {
t.Run(fmt.Sprintf("from %v to %v (md %q)", tc.commonName, tc.systemID, tc.clientTenantInMD), func(t *testing.T) {
cert := &x509.Certificate{
Subject: pkix.Name{
CommonName: tc.commonName,
Expand All @@ -150,6 +187,11 @@ func TestAuthenticateTenant(t *testing.T) {
p := peer.Peer{AuthInfo: tlsInfo}
ctx := peer.NewContext(context.Background(), &p)

if tc.clientTenantInMD != "" {
md := metadata.MD{"client-tid": []string{tc.clientTenantInMD}}
ctx = metadata.NewIncomingContext(ctx, md)
}

tenID, err := rpc.TestingAuthenticateTenant(ctx, tc.systemID)

if tc.expErr == "" {
Expand Down
24 changes: 24 additions & 0 deletions pkg/rpc/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ func NewServerEx(

if !rpcCtx.Config.Insecure {
a := kvAuth{
sv: &rpcCtx.Settings.SV,
tenant: tenantAuthorizer{
tenantID: rpcCtx.tenID,
},
Expand Down Expand Up @@ -421,6 +422,9 @@ type Context struct {
// The loopbackDialFn fits under that common case by transporting
// the gRPC protocol over an in-memory pipe.
loopbackDialFn func(context.Context) (net.Conn, error)

// clientCreds is used to pass additional headers to called RPCs.
clientCreds credentials.PerRPCCredentials
}

// SetLoopbackDialer configures the loopback dialer function.
Expand Down Expand Up @@ -615,6 +619,10 @@ func NewContext(ctx context.Context, opts ContextOptions) *Context {
logClosingConnEvery: log.Every(time.Second),
}

if !opts.TenantID.IsSystem() {
rpcCtx.clientCreds = newTenantClientCreds(opts.TenantID)
}

if opts.Knobs.NoLoopbackDialer {
// The test has decided it doesn't need/want a loopback dialer.
// Ensure we still have a working dial function in that case.
Expand Down Expand Up @@ -801,6 +809,18 @@ func makeInternalClientAdapter(
// the outer RPC.
ctx = grpcutil.NewLocalRequestContext(ctx, tenantIDFromContext(ctx))

// Clear any leftover gRPC incoming metadata, if this call
// is originating from a RPC handler function called as
// a result of a tenant call. This is this case:
//
// tenant -(rpc)-> tenant -(rpc)-> KV
// ^ YOU ARE HERE
//
// at this point, the left side RPC has left some incoming
// metadata in the context, but we need to get rid of it
// before we let the call go through KV.
ctx = grpcutil.ClearIncomingContext(ctx)

// If this is a tenant calling to the local KV server, we make things look
// closer to a remote call from the tracing point of view.
if tenant {
Expand Down Expand Up @@ -1693,6 +1713,10 @@ func (rpcCtx *Context) dialOptsCommon(
grpc.MaxCallSendMsgSize(math.MaxInt32),
)}

if rpcCtx.clientCreds != nil {
dialOpts = append(dialOpts, grpc.WithPerRPCCredentials(rpcCtx.clientCreds))
}

// We throw this one in for good measure, but it only disables the retries
// for RPCs that were already pending (which are opt in anyway, and we don't
// opt in). It doesn't disable what gRPC calls "transparent retries" (RPC
Expand Down
Loading