Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rpc,server: use node certs for shared-process tenant RPCs #96553

Merged
merged 9 commits into from
Feb 7, 2023
2 changes: 1 addition & 1 deletion pkg/cli/clierrorplus/decorate_error.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func MaybeDecorateError(
return connInsecureHint()
}

if wErr := (*security.Error)(nil); errors.As(err, &wErr) {
if errors.Is(err, security.ErrCertManagement) {
// Avoid errors.Wrapf here so that we have more control over the
// formatting of the message with error text.
const format = "cannot load certificates.\n" +
Expand Down
58 changes: 31 additions & 27 deletions pkg/cli/democluster/demo_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1303,8 +1303,8 @@ func (c *transientCluster) generateCerts(ctx context.Context, certsDir string) (
nodeKeyExists && nodeCertExists &&
rootClientKeyExists && rootClientCertExists &&
demoClientKeyExists && demoClientCertExists &&
tenantSigningKeyExists && tenantSigningCertExists &&
tenantKeyExists && tenantCertExists {
(!c.demoCtx.Multitenant || (tenantSigningKeyExists && tenantSigningCertExists &&
(c.demoCtx.DisableServerController || (tenantKeyExists && tenantCertExists)))) {
// All good.
return nil
}
Expand Down Expand Up @@ -1430,33 +1430,37 @@ func (c *transientCluster) generateCerts(ctx context.Context, certsDir string) (
}
}

if !(tenantKeyExists && tenantCertExists) {
c.infoLog(ctx, "generating tenant server key/cert pair in %q", certsDir)
pair, err := security.CreateTenantPair(
certsDir,
caKeyPath,
c.demoCtx.DefaultKeySize,
c.demoCtx.DefaultCertLifetime,
2,
tlsServerNames,
)
if err != nil {
return err
}
if err := security.WriteTenantPair(certsDir, pair, true /* overwrite */); err != nil {
return err
if c.demoCtx.Multitenant {
if !(tenantSigningKeyExists && tenantSigningCertExists) {
c.infoLog(ctx, "generating tenant signing key/cert pair in %q", certsDir)
if err := security.CreateTenantSigningPair(
certsDir,
c.demoCtx.DefaultCertLifetime,
true, /* overwrite */
2,
); err != nil {
return err
}
}
}

if !(tenantSigningKeyExists && tenantSigningCertExists) {
c.infoLog(ctx, "generating tenant signing key/cert pair in %q", certsDir)
if err := security.CreateTenantSigningPair(
certsDir,
c.demoCtx.DefaultCertLifetime,
true, /* overwrite */
2,
); err != nil {
return err
if c.demoCtx.DisableServerController {
if !(tenantKeyExists && tenantCertExists) {
c.infoLog(ctx, "generating tenant server key/cert pair in %q", certsDir)
pair, err := security.CreateTenantPair(
certsDir,
caKeyPath,
c.demoCtx.DefaultKeySize,
c.demoCtx.DefaultCertLifetime,
2,
tlsServerNames,
)
if err != nil {
return err
}
if err := security.WriteTenantPair(certsDir, pair, true /* overwrite */); err != nil {
return err
}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ eexpect $prompt

# create some cert without an IP address in there.
set db_dir "logs/db"
set certs_dir "logs/my-safe-directory"
set certs_dir "my-safe-directory"
send "mkdir -p $certs_dir\r"
eexpect $prompt

Expand Down
19 changes: 17 additions & 2 deletions pkg/cli/interactive_tests/test_error_hints.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ end_test
# Check what happens when attempting to connect securely to an
# insecure server.

send "$argv start-single-node --insecure\r"
send "$argv start-single-node --insecure --store=logs/db\r"
eexpect "initialized new cluster"

spawn /bin/bash
Expand Down Expand Up @@ -73,9 +73,24 @@ interrupt
interrupt
eexpect ":/# "

send "$argv start-single-node --listen-addr=localhost --certs-dir=$certs_dir\r"
send "$argv start-single-node --listen-addr=localhost --certs-dir=$certs_dir --store=logs/db\r"
eexpect "restarted pre-existing node"

set spawn_id $client_spawn_id
start_test "Connecting an insecure RPC client to a secure server"
send "$argv node drain 1 --insecure\r"
eexpect "ERROR"
eexpect "failed to connect to the node"
eexpect ":/# "
end_test

start_test "Connecting an insecure SQL client to a secure server"
send "$argv sql -e 'select 1' --insecure\r"
eexpect "ERROR: node is running secure mode, SSL connection required"
eexpect ":/# "
end_test


# Check what happens when attempting to connect to something
# that is not a CockroachDB server.
set spawn_id $shell_spawn_id
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/zip/testzip
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
[node 1] requesting heap file list... received response... done
[node ?] ? heap profiles found
[node 1] requesting goroutine dump list... received response... done
[node 1] 0 goroutine dumps found
[node ?] ? goroutine dumps found
[node 1] requesting log file ...
[node 1] 0 log file ...
[node 1] requesting ranges... received response... done
Expand Down
230 changes: 183 additions & 47 deletions pkg/cli/testdata/zip/unavailable

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions pkg/cli/zip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,16 +345,22 @@ func eraseNonDeterministicZipOutput(out string) string {
out = re.ReplaceAllString(out, `dial tcp ...`)
re = regexp.MustCompile(`(?m)rpc error: .*$`)
out = re.ReplaceAllString(out, `rpc error: ...`)
re = regexp.MustCompile(`(?m)timed out after.*$`)
out = re.ReplaceAllString(out, `timed out after...`)
re = regexp.MustCompile(`(?m)failed to connect to .*$`)
out = re.ReplaceAllString(out, `failed to connect to ...`)

// The number of memory profiles previously collected is not deterministic.
re = regexp.MustCompile(`(?m)^\[node \d+\] \d+ heap profiles found$`)
out = re.ReplaceAllString(out, `[node ?] ? heap profiles found`)
re = regexp.MustCompile(`(?m)^\[node \d+\] \d+ goroutine dumps found$`)
out = re.ReplaceAllString(out, `[node ?] ? goroutine dumps found`)
re = regexp.MustCompile(`(?m)^\[node \d+\] retrieving (memprof|memstats).*$` + "\n")
out = re.ReplaceAllString(out, ``)
re = regexp.MustCompile(`(?m)^\[node \d+\] writing profile.*$` + "\n")
out = re.ReplaceAllString(out, ``)
re = regexp.MustCompile(`(?m)^\[node \d+\] writing dump.*$` + "\n")
out = re.ReplaceAllString(out, ``)

//out = strings.ReplaceAll(out, "\n\n", "\n")
return out
Expand Down
71 changes: 61 additions & 10 deletions pkg/rpc/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ func (c *Connection) Health() error {
// thing.
type Context struct {
ContextOptions
SecurityContext
*SecurityContext

breakerClock breakerClock
RemoteClocks *RemoteClockMonitor
Expand Down Expand Up @@ -499,6 +499,12 @@ type ContextOptions struct {
// utility, not a server, and thus misses server configuration, a
// cluster version, a node ID, etc.
ClientOnly bool

// UseNodeAuth is only used when ClientOnly is not set.
// When set, it indicates that this rpc.Context is running inside
// the same process as a KV layer and thus should feel empowered
// to use its node cert to perform outgoing RPC dials.
UseNodeAuth bool
}

func (c ContextOptions) validate() error {
Expand Down Expand Up @@ -604,9 +610,16 @@ func NewContext(ctx context.Context, opts ContextOptions) *Context {

masterCtx, _ := opts.Stopper.WithCancelOnQuiesce(ctx)

secCtx := NewSecurityContext(
opts.Config,
security.ClusterTLSSettings(opts.Settings),
opts.TenantID,
)
secCtx.useNodeAuth = opts.UseNodeAuth

rpcCtx := &Context{
ContextOptions: opts,
SecurityContext: MakeSecurityContext(opts.Config, security.ClusterTLSSettings(opts.Settings), opts.TenantID),
SecurityContext: secCtx,
breakerClock: breakerClock{
clock: opts.Clock,
},
Expand All @@ -618,6 +631,14 @@ func NewContext(ctx context.Context, opts ContextOptions) *Context {
logClosingConnEvery: log.Every(time.Second),
}

if !opts.TenantID.IsSet() {
panic("tenant ID not set")
}

if opts.ClientOnly && opts.Config.User.Undefined() {
panic("client username not set")
}

if !opts.TenantID.IsSystem() {
rpcCtx.clientCreds = newTenantClientCreds(opts.TenantID)
}
Expand Down Expand Up @@ -1484,7 +1505,7 @@ func (rpcCtx *Context) SetLocalInternalServer(
) {
clientTenantID := rpcCtx.TenantID
separateTracers := false
if rpcCtx.TenantID.IsSet() && !rpcCtx.TenantID.IsSystem() {
if !clientTenantID.IsSystem() {
// This is a secondary tenant server in the same process as the KV
// layer (shared-process multitenancy). In this case, the caller
// and the callee use separate tracers, so we can't mix and match
Expand Down Expand Up @@ -1589,20 +1610,50 @@ func (rpcCtx *Context) dialOptsLocal() ([]grpc.DialOption, error) {
return dialOpts, err
}

// GetClientTLSConfig decides which TLS client configuration (&
// certificates) to use to reach the remote node.
func (rpcCtx *Context) GetClientTLSConfig() (*tls.Config, error) {
if rpcCtx.config.Insecure {
return nil, nil
}

cm, err := rpcCtx.GetCertificateManager()
if err != nil {
return nil, wrapError(err)
}

switch {
case rpcCtx.ClientOnly:
// A CLI command is performing a remote RPC.
tlsCfg, err := cm.GetClientTLSConfig(rpcCtx.config.User)
return tlsCfg, wrapError(err)

case rpcCtx.UseNodeAuth || rpcCtx.tenID.IsSystem():
tlsCfg, err := cm.GetNodeClientTLSConfig()
return tlsCfg, wrapError(err)

case !rpcCtx.tenID.IsSystem():
// A SQL server running in a standalone server doesn't have access
// to the node certs, and thus must use the standalone tenant
// client cert.
tlsCfg, err := cm.GetTenantTLSConfig()
return tlsCfg, wrapError(err)

default:
// We don't currently support any other way to use the rpc context.
// go away.
return nil, errors.AssertionFailedf("programming error: rpc context not initialized correctly")
}
}

// dialOptsNetworkCredentials computes options that determines how the
// RPC client authenticates itself to the remote server.
func (rpcCtx *Context) dialOptsNetworkCredentials() ([]grpc.DialOption, error) {
var dialOpts []grpc.DialOption
if rpcCtx.Config.Insecure {
dialOpts = append(dialOpts, grpc.WithTransportCredentials(insecure.NewCredentials()))
} else {
var tlsConfig *tls.Config
var err error
if rpcCtx.tenID == roachpb.SystemTenantID {
tlsConfig, err = rpcCtx.GetClientTLSConfig()
} else {
tlsConfig, err = rpcCtx.GetTenantTLSConfig()
}
tlsConfig, err := rpcCtx.GetClientTLSConfig()
if err != nil {
return nil, err
}
Expand Down
71 changes: 14 additions & 57 deletions pkg/rpc/tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ type lazyCertificateManager struct {
}

func wrapError(err error) error {
if !errors.HasType(err, (*security.Error)(nil)) {
return &security.Error{
Message: "problem using security settings",
Err: err,
}
if err == nil {
return nil
}
if errors.Is(err, security.ErrCertManagement) {
err = errors.Wrap(err, "problem using security settings")
}
return err
}
Expand All @@ -66,18 +66,19 @@ type SecurityContext struct {
// httpClient uses the client TLS config. It is initialized lazily.
httpClient lazyHTTPClient
}
useNodeAuth bool
}

// MakeSecurityContext makes a SecurityContext.
// NewSecurityContext instantiates a SecurityContext.
//
// TODO(tbg): don't take a whole Config. This can be trimmed down significantly.
func MakeSecurityContext(
func NewSecurityContext(
cfg *base.Config, tlsSettings security.TLSSettings, tenID roachpb.TenantID,
) SecurityContext {
) *SecurityContext {
if tenID.ToUint64() == 0 {
panic(errors.AssertionFailedf("programming error: tenant ID not defined"))
}
return SecurityContext{
return &SecurityContext{
Locator: certnames.MakeLocator(cfg.SSLCertsDir),
TLSSettings: tlsSettings,
config: cfg,
Expand All @@ -91,7 +92,7 @@ func MakeSecurityContext(
func (ctx *SecurityContext) GetCertificateManager() (*security.CertificateManager, error) {
ctx.lazy.certificateManager.Do(func() {
var opts []security.Option
if ctx.tenID != roachpb.SystemTenantID {
if !(ctx.useNodeAuth || ctx.tenID == roachpb.SystemTenantID) {
opts = append(opts, security.ForTenant(ctx.tenID.ToUint64()))
}
ctx.lazy.certificateManager.cm, ctx.lazy.certificateManager.err =
Expand All @@ -112,7 +113,9 @@ func (ctx *SecurityContext) GetCertificateManager() (*security.CertificateManage
return ctx.lazy.certificateManager.cm, ctx.lazy.certificateManager.err
}

var errNoCertificatesFound = errors.New("no certificates found; does certs dir exist?")
var errNoCertificatesFound = errors.Mark(
errors.New("no certificates found; does certs dir exist?"),
security.ErrCertManagement)

// GetServerTLSConfig returns the server TLS config, initializing it if needed.
// If Insecure is true, return a nil config, otherwise ask the certificate
Expand All @@ -135,52 +138,6 @@ func (ctx *SecurityContext) GetServerTLSConfig() (*tls.Config, error) {
return tlsCfg, nil
}

// GetClientTLSConfig returns the client TLS config, initializing it if needed.
// If Insecure is true, return a nil config, otherwise ask the certificate
// manager for a TLS config using certs for the config.User.
// This TLSConfig might **NOT** be suitable to talk to the Admin UI, use GetUIClientTLSConfig instead.
func (ctx *SecurityContext) GetClientTLSConfig() (*tls.Config, error) {
// Early out.
if ctx.config.Insecure {
return nil, nil
}

cm, err := ctx.GetCertificateManager()
if err != nil {
return nil, wrapError(err)
}

tlsCfg, err := cm.GetClientTLSConfig(ctx.config.User)
if err != nil {
return nil, wrapError(err)
}
return tlsCfg, nil
}

// GetTenantTLSConfig returns the client TLS config for the tenant, provided
// the SecurityContext operates on behalf of a secondary tenant (i.e. not the
// system tenant).
//
// If Insecure is true, return a nil config, otherwise retrieves the client
// certificate for the configured tenant from the cert manager.
func (ctx *SecurityContext) GetTenantTLSConfig() (*tls.Config, error) {
// Early out.
if ctx.config.Insecure {
return nil, nil
}

cm, err := ctx.GetCertificateManager()
if err != nil {
return nil, wrapError(err)
}

tlsCfg, err := cm.GetTenantTLSConfig()
if err != nil {
return nil, wrapError(err)
}
return tlsCfg, nil
}

// getUIClientTLSConfig returns the client TLS config for Admin UI clients, initializing it if needed.
// If Insecure is true, return a nil config, otherwise ask the certificate
// manager for a TLS config configured to talk to the Admin UI.
Expand Down
Loading