From 17e8e3caa92d100d735bb0e296c1a74cec96476a Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Sat, 16 Sep 2023 03:35:12 -0400 Subject: [PATCH] security: clear client cert expiration cache on SIGHUP Release note (security update): The SIGHUP signal will now clear the cached expiration times for client certificates that are reported by the security.certificate.expiration.client metric. --- pkg/security/cert_expiry_cache.go | 1 + pkg/security/certificate_manager.go | 3 +++ pkg/security/certs_rotation_test.go | 42 ++++++++++++++++++++++++++--- pkg/security/certs_test.go | 8 +++--- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/pkg/security/cert_expiry_cache.go b/pkg/security/cert_expiry_cache.go index 655bb9cdb7de..9932557169a5 100644 --- a/pkg/security/cert_expiry_cache.go +++ b/pkg/security/cert_expiry_cache.go @@ -88,6 +88,7 @@ func NewClientCertExpirationCache( }, OnEvictedEntry: func(entry *cache.Entry) { gauge := entry.Value.(*aggmetric.Gauge) + gauge.Update(0) gauge.Unlink() c.mu.acc.Shrink(ctx, int64(unsafe.Sizeof(*gauge))) }, diff --git a/pkg/security/certificate_manager.go b/pkg/security/certificate_manager.go index 5100da414b3f..4b1a8a0b8e29 100644 --- a/pkg/security/certificate_manager.go +++ b/pkg/security/certificate_manager.go @@ -170,6 +170,9 @@ func (cm *CertificateManager) RegisterSignalHandler( return case sig := <-ch: log.Ops.Infof(ctx, "received signal %q, triggering certificate reload", sig) + if cache := cm.clientCertExpirationCache; cache != nil { + cache.Clear() + } if err := cm.LoadCertificates(); err != nil { log.Ops.Warningf(ctx, "could not reload certificates: %v", err) log.StructuredEvent(ctx, &eventpb.CertsReload{Success: false, ErrorMessage: err.Error()}) diff --git a/pkg/security/certs_rotation_test.go b/pkg/security/certs_rotation_test.go index 244bc90fb9d1..2358b1147c4f 100644 --- a/pkg/security/certs_rotation_test.go +++ b/pkg/security/certs_rotation_test.go @@ -25,6 +25,7 @@ import ( "regexp" "strings" "testing" + "time" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer" @@ -41,6 +42,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log/eventpb" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" + prometheusgo "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/require" "golang.org/x/sys/unix" ) @@ -56,7 +59,7 @@ func TestRotateCerts(t *testing.T) { defer ResetTest() certsDir := t.TempDir() - if err := generateBaseCerts(certsDir); err != nil { + if err := generateBaseCerts(certsDir, 48*time.Hour); err != nil { t.Fatal(err) } @@ -103,6 +106,22 @@ func TestRotateCerts(t *testing.T) { return goDB } + checkCertExpirationMetric := func() int64 { + cm, err := s.RPCContext().SecurityContext.GetCertificateManager() + if err != nil { + t.Fatal(err) + } + ret := int64(0) + cm.Metrics().ClientExpiration.Each(nil, func(pm *prometheusgo.Metric) { + for _, l := range pm.GetLabel() { + if l.GetName() == "sql_user" && l.GetValue() == username.RootUser { + ret = int64(pm.GetGauge().GetValue()) + } + } + }) + return ret + } + // Some errors codes. const kBadAuthority = "certificate signed by unknown authority" const kBadCertificate = "tls: bad certificate" @@ -120,6 +139,9 @@ func TestRotateCerts(t *testing.T) { t.Fatalf("could not create http client: %v", err) } + // Before any client has connected, the expiration metric should be zero. + require.Zero(t, checkCertExpirationMetric()) + if err := clientTest(firstClient); err != nil { t.Fatal(err) } @@ -131,12 +153,15 @@ func TestRotateCerts(t *testing.T) { t.Fatal(err) } - // Delete certs and re-generate them. + // Now the expiration metric should be set. + require.Greater(t, checkCertExpirationMetric(), timeutil.Now().Add(40*time.Hour).Unix()) + + // Delete certs and re-generate them with a longer client cert lifetime. // New clients will fail with CA errors. if err := os.RemoveAll(certsDir); err != nil { t.Fatal(err) } - if err := generateBaseCerts(certsDir); err != nil { + if err := generateBaseCerts(certsDir, 54*time.Hour); err != nil { t.Fatal(err) } @@ -197,6 +222,9 @@ func TestRotateCerts(t *testing.T) { return nil }) + // The expiration metric should be zero after the SIGHUP. + require.Zero(t, checkCertExpirationMetric()) + // Check that the structured event was logged. // We use SucceedsSoon here because there may be a delay between // the moment SIGHUP is processed and certs are reloaded, and @@ -244,13 +272,16 @@ func TestRotateCerts(t *testing.T) { t.Fatal(err) } + // The expiration metric should be set again, but should now show a higher value. + require.Greater(t, checkCertExpirationMetric(), timeutil.Now().Add(50*time.Hour).Unix()) + // Now regenerate certs, but keep the CA cert around. // We still need to delete the key. // New clients with certs will fail with bad certificate (CA not yet loaded). if err := os.Remove(filepath.Join(certsDir, certnames.EmbeddedCAKey)); err != nil { t.Fatal(err) } - if err := generateBaseCerts(certsDir); err != nil { + if err := generateBaseCerts(certsDir, 58*time.Hour); err != nil { t.Fatal(err) } @@ -302,6 +333,9 @@ func TestRotateCerts(t *testing.T) { } return nil }) + + // The expiration metric should be updated to be larger again. + require.Greater(t, checkCertExpirationMetric(), timeutil.Now().Add(57*time.Hour).Unix()) } var cmLogRe = regexp.MustCompile(`event_log\.go`) diff --git a/pkg/security/certs_test.go b/pkg/security/certs_test.go index 3c344d5aa9cb..bead782b41c8 100644 --- a/pkg/security/certs_test.go +++ b/pkg/security/certs_test.go @@ -264,7 +264,7 @@ func TestGenerateNodeCerts(t *testing.T) { // client.root.crt: client certificate for the root user. // client-tenant.10.crt: tenant client certificate for tenant 10. // tenant-signing.10.crt: tenant signing certificate for tenant 10. -func generateBaseCerts(certsDir string) error { +func generateBaseCerts(certsDir string, clientCertLifetime time.Duration) error { { caKey := filepath.Join(certsDir, certnames.EmbeddedCAKey) @@ -286,7 +286,7 @@ func generateBaseCerts(certsDir string) error { certsDir, caKey, testKeySize, - time.Hour*48, + clientCertLifetime, true, username.RootUserName(), []roachpb.TenantID{roachpb.SystemTenantID}, @@ -329,7 +329,7 @@ func generateBaseCerts(certsDir string) error { // client.node.crt: node client cert: signed by ca-client.crt // client.root.crt: root client cert: signed by ca-client.crt func generateSplitCACerts(certsDir string) error { - if err := generateBaseCerts(certsDir); err != nil { + if err := generateBaseCerts(certsDir, 48*time.Hour); err != nil { return err } @@ -384,7 +384,7 @@ func TestUseCerts(t *testing.T) { defer ResetTest() certsDir := t.TempDir() - if err := generateBaseCerts(certsDir); err != nil { + if err := generateBaseCerts(certsDir, 48*time.Hour); err != nil { t.Fatal(err) }