From aac17a174c579eea87a75bdd8b64ef14c7dcde45 Mon Sep 17 00:00:00 2001 From: Hridoy Roy Date: Tue, 27 Oct 2020 08:24:43 -0700 Subject: [PATCH] Port: Add metrics to report mount table sizes for auth and logical [Vault 671] (#10201) * first commit * update * removed some ent features from backport * final refactor * backport patch Co-authored-by: Hridoy Roy Co-authored-by: Hridoy Roy --- helper/metricsutil/metricsutil.go | 43 +++++- vault/auth.go | 32 +++-- vault/auth_test.go | 118 +++++++++++++--- vault/core_metrics.go | 19 +++ .../metrics/core_metrics_int_test.go | 121 ++++++++++++++++- vault/mount.go | 78 +++++++++-- vault/mount_test.go | 128 +++++++++++++++--- vault/testing.go | 1 + website/pages/docs/internals/telemetry.mdx | 2 + 9 files changed, 481 insertions(+), 61 deletions(-) diff --git a/helper/metricsutil/metricsutil.go b/helper/metricsutil/metricsutil.go index 83ca85a2d8fe..31adf69551b2 100644 --- a/helper/metricsutil/metricsutil.go +++ b/helper/metricsutil/metricsutil.go @@ -6,6 +6,7 @@ import ( "fmt" "net/http" "strings" + "sync" "github.com/armon/go-metrics" "github.com/hashicorp/vault/sdk/logical" @@ -26,13 +27,34 @@ const ( PrometheusMetricFormat = "prometheus" ) +// PhysicalTableSizeName is a set of gauge metric keys for physical mount table sizes +var PhysicalTableSizeName []string = []string{"core", "mount_table", "size"} + +// LogicalTableSizeName is a set of gauge metric keys for logical mount table sizes +var LogicalTableSizeName []string = []string{"core", "mount_table", "num_entries"} + type MetricsHelper struct { inMemSink *metrics.InmemSink PrometheusEnabled bool + LoopMetrics GaugeMetrics +} + +type GaugeMetrics struct { + // Metrics is a map from keys concatenated by "." to the metric. + // It is a map because although we do not care about distinguishing + // these loop metrics during emission, we must distinguish them + // when we update a metric. + Metrics sync.Map +} + +type GaugeMetric struct { + Value float32 + Labels []Label + Key []string } func NewMetricsHelper(inMem *metrics.InmemSink, enablePrometheus bool) *MetricsHelper { - return &MetricsHelper{inMem, enablePrometheus} + return &MetricsHelper{inMem, enablePrometheus, GaugeMetrics{Metrics: sync.Map{}}} } func FormatFromRequest(req *logical.Request) string { @@ -53,6 +75,25 @@ func FormatFromRequest(req *logical.Request) string { return "" } +func (m *MetricsHelper) AddGaugeLoopMetric(key []string, val float32, labels []Label) { + mapKey := m.CreateMetricsCacheKeyName(key, val, labels) + m.LoopMetrics.Metrics.Store(mapKey, + GaugeMetric{ + Key: key, + Value: val, + Labels: labels}) +} + +func (m *MetricsHelper) CreateMetricsCacheKeyName(key []string, val float32, labels []Label) string { + var keyJoin string = strings.Join(key, ".") + var labelJoinStr = "" + for _, label := range labels { + labelJoinStr = labelJoinStr + label.Name + "|" + label.Value + "||" + } + keyJoin = keyJoin + "." + labelJoinStr + return keyJoin +} + func (m *MetricsHelper) ResponseForFormat(format string) *logical.Response { switch format { case PrometheusMetricFormat: diff --git a/vault/auth.go b/vault/auth.go index dbf719198ff8..11efc118295b 100644 --- a/vault/auth.go +++ b/vault/auth.go @@ -478,8 +478,10 @@ func (c *Core) loadCredentials(ctx context.Context) error { if c.auth == nil { c.auth = c.defaultAuthTable() needPersist = true + } else { + // only record tableMetrics if we have loaded something from storge + c.tableMetrics(len(c.auth.Entries), false, true, raw.Value) } - if rawLocal != nil { localAuthTable, err := c.decodeMountTable(ctx, rawLocal.Value) if err != nil { @@ -488,6 +490,7 @@ func (c *Core) loadCredentials(ctx context.Context) error { } if localAuthTable != nil && len(localAuthTable.Entries) > 0 { c.auth.Entries = append(c.auth.Entries, localAuthTable.Entries...) + c.tableMetrics(len(localAuthTable.Entries), true, true, rawLocal.Value) } } @@ -579,12 +582,12 @@ func (c *Core) persistAuth(ctx context.Context, table *MountTable, local *bool) } } - writeTable := func(mt *MountTable, path string) error { + writeTable := func(mt *MountTable, path string) ([]byte, error) { // Encode the mount table into JSON and compress it (lzw). compressedBytes, err := jsonutil.EncodeJSONAndCompress(mt, nil) if err != nil { c.logger.Error("failed to encode or compress auth mount table", "error", err) - return err + return nil, err } // Create an entry @@ -596,29 +599,40 @@ func (c *Core) persistAuth(ctx context.Context, table *MountTable, local *bool) // Write to the physical backend if err := c.barrier.Put(ctx, entry); err != nil { c.logger.Error("failed to persist auth mount table", "error", err) - return err + return nil, err } - return nil + return compressedBytes, nil } var err error + var compressedBytes []byte switch { case local == nil: // Write non-local mounts - err := writeTable(nonLocalAuth, coreAuthConfigPath) + compressedBytes, err := writeTable(nonLocalAuth, coreAuthConfigPath) if err != nil { return err } + c.tableMetrics(len(nonLocalAuth.Entries), false, true, compressedBytes) // Write local mounts - err = writeTable(localAuth, coreLocalAuthConfigPath) + compressedBytes, err = writeTable(localAuth, coreLocalAuthConfigPath) if err != nil { return err } + c.tableMetrics(len(localAuth.Entries), true, true, compressedBytes) case *local: - err = writeTable(localAuth, coreLocalAuthConfigPath) + compressedBytes, err = writeTable(localAuth, coreLocalAuthConfigPath) + if err != nil { + return err + } + c.tableMetrics(len(localAuth.Entries), true, true, compressedBytes) default: - err = writeTable(nonLocalAuth, coreAuthConfigPath) + compressedBytes, err = writeTable(nonLocalAuth, coreAuthConfigPath) + if err != nil { + return err + } + c.tableMetrics(len(nonLocalAuth.Entries), false, true, compressedBytes) } return err diff --git a/vault/auth_test.go b/vault/auth_test.go index 38a0a94f09d0..89fe95efdf2d 100644 --- a/vault/auth_test.go +++ b/vault/auth_test.go @@ -5,14 +5,17 @@ import ( "reflect" "strings" "testing" + "time" + metrics "github.com/armon/go-metrics" + "github.com/hashicorp/vault/helper/metricsutil" "github.com/hashicorp/vault/helper/namespace" "github.com/hashicorp/vault/sdk/helper/jsonutil" "github.com/hashicorp/vault/sdk/logical" ) func TestAuth_ReadOnlyViewDuringMount(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["noop"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) { err := config.StorageView.Put(ctx, &logical.StorageEntry{ Key: "bar", @@ -37,14 +40,85 @@ func TestAuth_ReadOnlyViewDuringMount(t *testing.T) { } } +func TestAuthMountMetrics(t *testing.T) { + c, _, _, _ := TestCoreUnsealedWithMetrics(t) + c.credentialBackends["noop"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) { + return &NoopBackend{ + BackendType: logical.TypeCredential, + }, nil + } + mountKeyName := "core.mount_table.num_entries.type|auth||local|false||" + mountMetrics := &c.metricsHelper.LoopMetrics.Metrics + loadMetric, ok := mountMetrics.Load(mountKeyName) + var numEntriesMetric metricsutil.GaugeMetric = loadMetric.(metricsutil.GaugeMetric) + + // 1 default nonlocal auth backend + if !ok || numEntriesMetric.Value != 1 { + t.Fatalf("Auth values should be: %+v", numEntriesMetric) + } + + me := &MountEntry{ + Table: credentialTableType, + Path: "foo", + Type: "noop", + } + err := c.enableCredential(namespace.RootContext(nil), me) + if err != nil { + t.Fatalf("err: %v", err) + } + mountMetrics = &c.metricsHelper.LoopMetrics.Metrics + loadMetric, ok = mountMetrics.Load(mountKeyName) + numEntriesMetric = loadMetric.(metricsutil.GaugeMetric) + if !ok || numEntriesMetric.Value != 2 { + t.Fatalf("mount metrics for num entries do not match true values") + } + if len(numEntriesMetric.Key) != 3 || + numEntriesMetric.Key[0] != "core" || + numEntriesMetric.Key[1] != "mount_table" || + numEntriesMetric.Key[2] != "num_entries" { + t.Fatalf("mount metrics for num entries have wrong key") + } + if len(numEntriesMetric.Labels) != 2 || + numEntriesMetric.Labels[0].Name != "type" || + numEntriesMetric.Labels[0].Value != "auth" || + numEntriesMetric.Labels[1].Name != "local" || + numEntriesMetric.Labels[1].Value != "false" { + t.Fatalf("mount metrics for num entries have wrong labels") + } + mountSizeKeyName := "core.mount_table.size.type|auth||local|false||" + loadMetric, ok = mountMetrics.Load(mountSizeKeyName) + sizeMetric := loadMetric.(metricsutil.GaugeMetric) + + if !ok { + t.Fatalf("mount metrics for size do not match exist") + } + if len(sizeMetric.Key) != 3 || + sizeMetric.Key[0] != "core" || + sizeMetric.Key[1] != "mount_table" || + sizeMetric.Key[2] != "size" { + t.Fatalf("mount metrics for size have wrong key") + } + if len(sizeMetric.Labels) != 2 || + sizeMetric.Labels[0].Name != "type" || + sizeMetric.Labels[0].Value != "auth" || + sizeMetric.Labels[1].Name != "local" || + sizeMetric.Labels[1].Value != "false" { + t.Fatalf("mount metrics for size have wrong labels") + } +} + func TestCore_DefaultAuthTable(t *testing.T) { - c, keys, _ := TestCoreUnsealed(t) + c, keys, _, _ := TestCoreUnsealedWithMetrics(t) verifyDefaultAuthTable(t, c.auth) // Start a second core with same physical + inmemSink := metrics.NewInmemSink(1000000*time.Hour, 2000000*time.Hour) conf := &CoreConfig{ - Physical: c.physical, - DisableMlock: true, + Physical: c.physical, + DisableMlock: true, + BuiltinRegistry: NewMockBuiltinRegistry(), + MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } c2, err := NewCore(conf) if err != nil { @@ -67,7 +141,7 @@ func TestCore_DefaultAuthTable(t *testing.T) { } func TestCore_EnableCredential(t *testing.T) { - c, keys, _ := TestCoreUnsealed(t) + c, keys, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["noop"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return &NoopBackend{ BackendType: logical.TypeCredential, @@ -89,9 +163,13 @@ func TestCore_EnableCredential(t *testing.T) { t.Fatalf("missing mount, match: %q", match) } + inmemSink := metrics.NewInmemSink(1000000*time.Hour, 2000000*time.Hour) conf := &CoreConfig{ - Physical: c.physical, - DisableMlock: true, + Physical: c.physical, + DisableMlock: true, + BuiltinRegistry: NewMockBuiltinRegistry(), + MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } c2, err := NewCore(conf) if err != nil { @@ -122,7 +200,7 @@ func TestCore_EnableCredential(t *testing.T) { // entries, and that upon reading the entries from both are recombined // correctly func TestCore_EnableCredential_Local(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["noop"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return &NoopBackend{ BackendType: logical.TypeCredential, @@ -211,7 +289,7 @@ func TestCore_EnableCredential_Local(t *testing.T) { } func TestCore_EnableCredential_twice_409(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["noop"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return &NoopBackend{ BackendType: logical.TypeCredential, @@ -241,7 +319,7 @@ func TestCore_EnableCredential_twice_409(t *testing.T) { } func TestCore_EnableCredential_Token(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) me := &MountEntry{ Table: credentialTableType, Path: "foo", @@ -254,7 +332,7 @@ func TestCore_EnableCredential_Token(t *testing.T) { } func TestCore_DisableCredential(t *testing.T) { - c, keys, _ := TestCoreUnsealed(t) + c, keys, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["noop"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return &NoopBackend{ BackendType: logical.TypeCredential, @@ -286,9 +364,13 @@ func TestCore_DisableCredential(t *testing.T) { t.Fatalf("backend present") } + inmemSink := metrics.NewInmemSink(1000000*time.Hour, 2000000*time.Hour) conf := &CoreConfig{ - Physical: c.physical, - DisableMlock: true, + Physical: c.physical, + DisableMlock: true, + BuiltinRegistry: NewMockBuiltinRegistry(), + MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } c2, err := NewCore(conf) if err != nil { @@ -311,7 +393,7 @@ func TestCore_DisableCredential(t *testing.T) { } func TestCore_DisableCredential_Protected(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) err := c.disableCredential(namespace.RootContext(nil), "token") if err.Error() != "token credential backend cannot be disabled" { t.Fatalf("err: %v", err) @@ -323,7 +405,7 @@ func TestCore_DisableCredential_Cleanup(t *testing.T) { Login: []string{"login"}, BackendType: logical.TypeCredential, } - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["noop"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return noop, nil } @@ -394,7 +476,7 @@ func TestCore_DisableCredential_Cleanup(t *testing.T) { } func TestDefaultAuthTable(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) table := c.defaultAuthTable() verifyDefaultAuthTable(t, table) } @@ -432,7 +514,7 @@ func TestCore_CredentialInitialize(t *testing.T) { BackendType: logical.TypeCredential, }, false} - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["initable"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return backend, nil } @@ -457,7 +539,7 @@ func TestCore_CredentialInitialize(t *testing.T) { BackendType: logical.TypeCredential, }, false} - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.credentialBackends["initable"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return backend, nil } diff --git a/vault/core_metrics.go b/vault/core_metrics.go index 5dcd6ed176ac..122bd615445b 100644 --- a/vault/core_metrics.go +++ b/vault/core_metrics.go @@ -49,6 +49,9 @@ func (c *Core) metricsLoop(stopCh chan struct{}) { c.metricSink.SetGaugeWithLabels([]string{"core", "leader"}, 1, nil) } + // Refresh gauge metrics that are looped + c.cachedGaugeMetricsEmitter() + case <-writeTimer: if stopped := grabLockOrStop(c.stateLock.RLock, c.stateLock.RUnlock, stopCh); stopped { // Go through the loop again, this time the stop channel case @@ -425,3 +428,19 @@ func (c *Core) entityGaugeCollectorByMount(ctx context.Context) ([]metricsutil.G return values, nil } + +func (c *Core) cachedGaugeMetricsEmitter() { + if c.metricsHelper == nil { + return + } + + loopMetrics := &c.metricsHelper.LoopMetrics.Metrics + + emit := func(key interface{}, value interface{}) bool { + metricValue := value.(metricsutil.GaugeMetric) + c.metricSink.SetGaugeWithLabels(metricValue.Key, metricValue.Value, metricValue.Labels) + return true + } + + loopMetrics.Range(emit) +} diff --git a/vault/external_tests/metrics/core_metrics_int_test.go b/vault/external_tests/metrics/core_metrics_int_test.go index 0696e7408774..1635c548cc3e 100644 --- a/vault/external_tests/metrics/core_metrics_int_test.go +++ b/vault/external_tests/metrics/core_metrics_int_test.go @@ -3,16 +3,132 @@ package metrics import ( "context" "encoding/json" + "errors" "io/ioutil" "testing" "time" "github.com/armon/go-metrics" + "github.com/hashicorp/vault/api" "github.com/hashicorp/vault/helper/metricsutil" vaulthttp "github.com/hashicorp/vault/http" "github.com/hashicorp/vault/vault" ) +func TestMountTableMetrics(t *testing.T) { + inm := metrics.NewInmemSink(time.Minute, time.Minute*10) + clusterSink := metricsutil.NewClusterMetricSink("mycluster", inm) + clusterSink.GaugeInterval = time.Second + conf := &vault.CoreConfig{ + BuiltinRegistry: vault.NewMockBuiltinRegistry(), + MetricsHelper: metricsutil.NewMetricsHelper(inm, true), + MetricSink: clusterSink, + } + cluster := vault.NewTestCluster(t, conf, &vault.TestClusterOptions{ + KeepStandbysSealed: false, + HandlerFunc: vaulthttp.Handler, + NumCores: 2, + }) + + cluster.Start() + defer cluster.Cleanup() + + // Wait for core to become active + cores := cluster.Cores + vault.TestWaitActive(t, cores[0].Core) + + client := cores[0].Client + + // Verify that the nonlocal logical mount table has 3 entries -- cubbyhole, identity, and kv + + data, err := sysMetricsReq(client, cluster) + if err != nil { + t.Fatal(err) + } + + nonlocalLogicalMountsize, err := gaugeSearchHelper(data, 3) + if err != nil { + t.Errorf(err.Error()) + } + + // Mount new kv + if err = client.Sys().Mount("kv", &api.MountInput{ + Type: "kv", + Options: map[string]string{ + "version": "2", + }, + }); err != nil { + t.Fatal(err) + } + + data, err = sysMetricsReq(client, cluster) + if err != nil { + t.Fatal(err) + } + + nonlocalLogicalMountsizeAfterMount, err := gaugeSearchHelper(data, 4) + if err != nil { + t.Errorf(err.Error()) + } + + if nonlocalLogicalMountsizeAfterMount <= nonlocalLogicalMountsize { + t.Errorf("Mount size does not change after new mount is mounted") + } + +} + +func sysMetricsReq(client *api.Client, cluster *vault.TestCluster) (*SysMetricsJSON, error) { + r := client.NewRequest("GET", "/v1/sys/metrics") + r.Headers.Set("X-Vault-Token", cluster.RootToken) + var data SysMetricsJSON + mountAddResp, err := client.RawRequestWithContext(context.Background(), r) + if err != nil { + return nil, err + } + bodyBytes, err := ioutil.ReadAll(mountAddResp.Response.Body) + if err != nil { + return nil, err + } + if mountAddResp != nil { + defer mountAddResp.Body.Close() + } + if err := json.Unmarshal(bodyBytes, &data); err != nil { + return nil, errors.New("failed to unmarshal:" + err.Error()) + } + return &data, nil +} + +func gaugeSearchHelper(data *SysMetricsJSON, expectedValue int) (int, error) { + foundFlag := false + tablesize := int(^uint(0) >> 1) + for _, gauge := range data.Gauges { + labels := gauge.Labels + if loc, ok := labels["local"]; ok && loc.(string) == "false" { + if tp, ok := labels["type"]; ok && tp.(string) == "logical" { + if gauge.Name == "core.mount_table.num_entries" { + foundFlag = true + if err := gaugeConditionCheck("eq", expectedValue, gauge.Value); err != nil { + return int(^uint(0) >> 1), err + } + } else if gauge.Name == "core.mount_table.size" { + tablesize = gauge.Value + } + } + } + } + if !foundFlag { + return int(^uint(0) >> 1), errors.New("No metrics reported for mount sizes") + } + return tablesize, nil +} + +func gaugeConditionCheck(comparator string, compareVal int, compareToVal int) error { + if comparator == "eq" && compareVal != compareToVal { + return errors.New("equality gauge check for comparison failed") + } + return nil +} + func TestLeaderReElectionMetrics(t *testing.T) { inm := metrics.NewInmemSink(time.Minute, time.Minute*10) clusterSink := metricsutil.NewClusterMetricSink("mycluster", inm) @@ -127,6 +243,7 @@ type SysMetricsJSON struct { } type GaugeJSON struct { - Name string `json:"Name"` - Value int `json:"Value"` + Name string `json:"Name"` + Value int `json:"Value"` + Labels map[string]interface{} `json:"Labels"` } diff --git a/vault/mount.go b/vault/mount.go index 7e49c2982d6c..860fcb419166 100644 --- a/vault/mount.go +++ b/vault/mount.go @@ -10,8 +10,10 @@ import ( "sync" "time" + "github.com/armon/go-metrics" uuid "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/builtin/plugin" + "github.com/hashicorp/vault/helper/metricsutil" "github.com/hashicorp/vault/helper/namespace" "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/helper/jsonutil" @@ -123,6 +125,49 @@ type MountTable struct { Entries []*MountEntry `json:"entries"` } +// tableMetrics is responsible for setting gauge metrics for +// mount table storage sizes (in bytes) and mount table num +// entries. It does this via setGaugeWithLabels. It then +// saves these metrics in a cache for regular reporting in +// a loop, via AddGaugeLoopMetric. + +// Note that the reported storage sizes are pre-encryption +// sizes. Currently barrier uses aes-gcm for encryption, which +// preserves plaintext size, adding a constant of 30 bytes of +// padding, which is negligable and subject to change, and thus +// not accounted for. +func (c *Core) tableMetrics(entryCount int, isLocal bool, isAuth bool, compressedTable []byte) { + if c.metricsHelper == nil { + // do nothing if metrics are not initialized + return + } + typeAuthLabelMap := map[bool]metrics.Label{ + true: metrics.Label{Name: "type", Value: "auth"}, + false: metrics.Label{Name: "type", Value: "logical"}, + } + + typeLocalLabelMap := map[bool]metrics.Label{ + true: metrics.Label{Name: "local", Value: "true"}, + false: metrics.Label{Name: "local", Value: "false"}, + } + + c.metricSink.SetGaugeWithLabels(metricsutil.LogicalTableSizeName, + float32(entryCount), []metrics.Label{typeAuthLabelMap[isAuth], + typeLocalLabelMap[isLocal]}) + + c.metricsHelper.AddGaugeLoopMetric(metricsutil.LogicalTableSizeName, + float32(entryCount), []metrics.Label{typeAuthLabelMap[isAuth], + typeLocalLabelMap[isLocal]}) + + c.metricSink.SetGaugeWithLabels(metricsutil.PhysicalTableSizeName, + float32(len(compressedTable)), []metrics.Label{typeAuthLabelMap[isAuth], + typeLocalLabelMap[isLocal]}) + + c.metricsHelper.AddGaugeLoopMetric(metricsutil.PhysicalTableSizeName, + float32(len(compressedTable)), []metrics.Label{typeAuthLabelMap[isAuth], + typeLocalLabelMap[isLocal]}) +} + // shallowClone returns a copy of the mount table that // keeps the MountEntry locations, so as not to invalidate // other locations holding pointers. Care needs to be taken @@ -132,6 +177,7 @@ func (t *MountTable) shallowClone() *MountTable { Type: t.Type, Entries: make([]*MountEntry, len(t.Entries)), } + for i, e := range t.Entries { mt.Entries[i] = e } @@ -915,6 +961,7 @@ func (c *Core) loadMounts(ctx context.Context) error { c.logger.Error("failed to decompress and/or decode the mount table", "error", err) return err } + c.tableMetrics(len(mountTable.Entries), false, false, raw.Value) c.mounts = mountTable } @@ -932,6 +979,7 @@ func (c *Core) loadMounts(ctx context.Context) error { return err } if localMountTable != nil && len(localMountTable.Entries) > 0 { + c.tableMetrics(len(localMountTable.Entries), true, false, raw.Value) c.mounts.Entries = append(c.mounts.Entries, localMountTable.Entries...) } } @@ -1056,12 +1104,12 @@ func (c *Core) persistMounts(ctx context.Context, table *MountTable, local *bool } } - writeTable := func(mt *MountTable, path string) error { + writeTable := func(mt *MountTable, path string) ([]byte, error) { // Encode the mount table into JSON and compress it (lzw). compressedBytes, err := jsonutil.EncodeJSONAndCompress(mt, nil) if err != nil { c.logger.Error("failed to encode or compress mount table", "error", err) - return err + return nil, err } // Create an entry @@ -1073,34 +1121,46 @@ func (c *Core) persistMounts(ctx context.Context, table *MountTable, local *bool // Write to the physical backend if err := c.barrier.Put(ctx, entry); err != nil { c.logger.Error("failed to persist mount table", "error", err) - return err + return nil, err } - return nil + return compressedBytes, nil } var err error + var compressedBytes []byte switch { case local == nil: // Write non-local mounts - err := writeTable(nonLocalMounts, coreMountConfigPath) + compressedBytes, err := writeTable(nonLocalMounts, coreMountConfigPath) if err != nil { return err } + c.tableMetrics(len(nonLocalMounts.Entries), false, false, compressedBytes) // Write local mounts - err = writeTable(localMounts, coreLocalMountConfigPath) + compressedBytes, err = writeTable(localMounts, coreLocalMountConfigPath) if err != nil { return err } + c.tableMetrics(len(localMounts.Entries), true, false, compressedBytes) + case *local: // Write local mounts - err = writeTable(localMounts, coreLocalMountConfigPath) + compressedBytes, err = writeTable(localMounts, coreLocalMountConfigPath) + if err != nil { + return err + } + c.tableMetrics(len(localMounts.Entries), true, false, compressedBytes) default: // Write non-local mounts - err = writeTable(nonLocalMounts, coreMountConfigPath) + compressedBytes, err = writeTable(nonLocalMounts, coreMountConfigPath) + if err != nil { + return err + } + c.tableMetrics(len(nonLocalMounts.Entries), false, false, compressedBytes) } - return err + return nil } // setupMounts is invoked after we've loaded the mount table to diff --git a/vault/mount_test.go b/vault/mount_test.go index adf040ba6328..b0bf11e18dd9 100644 --- a/vault/mount_test.go +++ b/vault/mount_test.go @@ -8,8 +8,10 @@ import ( "testing" "time" + metrics "github.com/armon/go-metrics" "github.com/go-test/deep" "github.com/hashicorp/vault/audit" + "github.com/hashicorp/vault/helper/metricsutil" "github.com/hashicorp/vault/helper/namespace" "github.com/hashicorp/vault/sdk/helper/compressutil" "github.com/hashicorp/vault/sdk/helper/jsonutil" @@ -17,7 +19,7 @@ import ( ) func TestMount_ReadOnlyViewDuringMount(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.logicalBackends["noop"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) { err := config.StorageView.Put(ctx, &logical.StorageEntry{ Key: "bar", @@ -40,14 +42,84 @@ func TestMount_ReadOnlyViewDuringMount(t *testing.T) { } } +func TestLogicalMountMetrics(t *testing.T) { + c, _, _, _ := TestCoreUnsealedWithMetrics(t) + c.logicalBackends["noop"] = func(ctx context.Context, config *logical.BackendConfig) (logical.Backend, error) { + return &NoopBackend{ + BackendType: logical.TypeLogical, + }, nil + } + mountKeyName := "core.mount_table.num_entries.type|logical||local|false||" + mountMetrics := &c.metricsHelper.LoopMetrics.Metrics + loadMetric, ok := mountMetrics.Load(mountKeyName) + var numEntriesMetric metricsutil.GaugeMetric = loadMetric.(metricsutil.GaugeMetric) + + // 3 default nonlocal logical backends + if !ok || numEntriesMetric.Value != 3 { + t.Fatalf("Auth values should be: %+v", numEntriesMetric) + } + me := &MountEntry{ + Table: mountTableType, + Path: "foo", + Type: "noop", + } + err := c.mount(namespace.RootContext(nil), me) + if err != nil { + t.Fatalf("err: %v", err) + } + mountMetrics = &c.metricsHelper.LoopMetrics.Metrics + loadMetric, ok = mountMetrics.Load(mountKeyName) + numEntriesMetric = loadMetric.(metricsutil.GaugeMetric) + if !ok || numEntriesMetric.Value != 4 { + t.Fatalf("mount metrics for num entries do not match true values") + } + if len(numEntriesMetric.Key) != 3 || + numEntriesMetric.Key[0] != "core" || + numEntriesMetric.Key[1] != "mount_table" || + numEntriesMetric.Key[2] != "num_entries" { + t.Fatalf("mount metrics for num entries have wrong key") + } + if len(numEntriesMetric.Labels) != 2 || + numEntriesMetric.Labels[0].Name != "type" || + numEntriesMetric.Labels[0].Value != "logical" || + numEntriesMetric.Labels[1].Name != "local" || + numEntriesMetric.Labels[1].Value != "false" { + t.Fatalf("mount metrics for num entries have wrong labels") + } + mountSizeKeyName := "core.mount_table.size.type|logical||local|false||" + loadMetric, ok = mountMetrics.Load(mountSizeKeyName) + sizeMetric := loadMetric.(metricsutil.GaugeMetric) + + if !ok { + t.Fatalf("mount metrics for size do not match exist") + } + if len(sizeMetric.Key) != 3 || + sizeMetric.Key[0] != "core" || + sizeMetric.Key[1] != "mount_table" || + sizeMetric.Key[2] != "size" { + t.Fatalf("mount metrics for size have wrong key") + } + if len(sizeMetric.Labels) != 2 || + sizeMetric.Labels[0].Name != "type" || + sizeMetric.Labels[0].Value != "logical" || + sizeMetric.Labels[1].Name != "local" || + sizeMetric.Labels[1].Value != "false" { + t.Fatalf("mount metrics for size have wrong labels") + } +} + func TestCore_DefaultMountTable(t *testing.T) { - c, keys, _ := TestCoreUnsealed(t) + c, keys, _, _ := TestCoreUnsealedWithMetrics(t) verifyDefaultTable(t, c.mounts, 4) // Start a second core with same physical + inmemSink := metrics.NewInmemSink(1000000*time.Hour, 2000000*time.Hour) conf := &CoreConfig{ - Physical: c.physical, - DisableMlock: true, + Physical: c.physical, + DisableMlock: true, + BuiltinRegistry: NewMockBuiltinRegistry(), + MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } c2, err := NewCore(conf) if err != nil { @@ -69,7 +141,7 @@ func TestCore_DefaultMountTable(t *testing.T) { } func TestCore_Mount(t *testing.T) { - c, keys, _ := TestCoreUnsealed(t) + c, keys, _, _ := TestCoreUnsealedWithMetrics(t) me := &MountEntry{ Table: mountTableType, Path: "foo", @@ -85,9 +157,13 @@ func TestCore_Mount(t *testing.T) { t.Fatalf("missing mount") } + inmemSink := metrics.NewInmemSink(1000000*time.Hour, 2000000*time.Hour) conf := &CoreConfig{ - Physical: c.physical, - DisableMlock: true, + Physical: c.physical, + DisableMlock: true, + BuiltinRegistry: NewMockBuiltinRegistry(), + MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } c2, err := NewCore(conf) if err != nil { @@ -113,7 +189,7 @@ func TestCore_Mount(t *testing.T) { // entries, and that upon reading the entries from both are recombined // correctly func TestCore_Mount_Local(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.mounts = &MountTable{ Type: mountTableType, @@ -212,7 +288,7 @@ func TestCore_Mount_Local(t *testing.T) { } func TestCore_Unmount(t *testing.T) { - c, keys, _ := TestCoreUnsealed(t) + c, keys, _, _ := TestCoreUnsealedWithMetrics(t) err := c.unmount(namespace.RootContext(nil), "secret") if err != nil { t.Fatalf("err: %v", err) @@ -223,9 +299,13 @@ func TestCore_Unmount(t *testing.T) { t.Fatalf("backend present") } + inmemSink := metrics.NewInmemSink(1000000*time.Hour, 2000000*time.Hour) conf := &CoreConfig{ - Physical: c.physical, - DisableMlock: true, + Physical: c.physical, + DisableMlock: true, + BuiltinRegistry: NewMockBuiltinRegistry(), + MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } c2, err := NewCore(conf) if err != nil { @@ -254,7 +334,7 @@ func TestCore_Unmount_Cleanup(t *testing.T) { func testCore_Unmount_Cleanup(t *testing.T, causeFailure bool) { noop := &NoopBackend{} - c, _, root := TestCoreUnsealed(t) + c, _, root, _ := TestCoreUnsealedWithMetrics(t) c.logicalBackends["noop"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return noop, nil } @@ -362,7 +442,7 @@ func testCore_Unmount_Cleanup(t *testing.T, causeFailure bool) { } func TestCore_Remount(t *testing.T) { - c, keys, _ := TestCoreUnsealed(t) + c, keys, _, _ := TestCoreUnsealedWithMetrics(t) err := c.remount(namespace.RootContext(nil), "secret", "foo", true) if err != nil { t.Fatalf("err: %v", err) @@ -373,9 +453,13 @@ func TestCore_Remount(t *testing.T) { t.Fatalf("failed remount") } + inmemSink := metrics.NewInmemSink(1000000*time.Hour, 2000000*time.Hour) conf := &CoreConfig{ - Physical: c.physical, - DisableMlock: true, + Physical: c.physical, + DisableMlock: true, + BuiltinRegistry: NewMockBuiltinRegistry(), + MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } c2, err := NewCore(conf) if err != nil { @@ -410,7 +494,7 @@ func TestCore_Remount(t *testing.T) { func TestCore_Remount_Cleanup(t *testing.T) { noop := &NoopBackend{} - c, _, root := TestCoreUnsealed(t) + c, _, root, _ := TestCoreUnsealedWithMetrics(t) c.logicalBackends["noop"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return noop, nil } @@ -494,7 +578,7 @@ func TestCore_Remount_Cleanup(t *testing.T) { } func TestCore_Remount_Protected(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) err := c.remount(namespace.RootContext(nil), "sys", "foo", true) if err.Error() != `cannot remount "sys/"` { t.Fatalf("err: %v", err) @@ -502,13 +586,13 @@ func TestCore_Remount_Protected(t *testing.T) { } func TestDefaultMountTable(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) table := c.defaultMountTable() verifyDefaultTable(t, table, 3) } func TestCore_MountTable_UpgradeToTyped(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.auditBackends["noop"] = func(ctx context.Context, config *audit.BackendConfig) (audit.Backend, error) { return &NoopAudit{ @@ -710,7 +794,7 @@ func verifyDefaultTable(t *testing.T, table *MountTable, expected int) { } func TestSingletonMountTableFunc(t *testing.T) { - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) mounts, auth := c.singletonMountTables() @@ -743,7 +827,7 @@ func TestCore_MountInitialize(t *testing.T) { BackendType: logical.TypeLogical, }, false} - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.logicalBackends["initable"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return backend, nil } @@ -768,7 +852,7 @@ func TestCore_MountInitialize(t *testing.T) { BackendType: logical.TypeLogical, }, false} - c, _, _ := TestCoreUnsealed(t) + c, _, _, _ := TestCoreUnsealedWithMetrics(t) c.logicalBackends["initable"] = func(context.Context, *logical.BackendConfig) (logical.Backend, error) { return backend, nil } diff --git a/vault/testing.go b/vault/testing.go index b32e695cd737..8e0857d7f9b7 100644 --- a/vault/testing.go +++ b/vault/testing.go @@ -309,6 +309,7 @@ func TestCoreUnsealedWithMetrics(t testing.T) (*Core, [][]byte, string, *metrics conf := &CoreConfig{ BuiltinRegistry: NewMockBuiltinRegistry(), MetricSink: metricsutil.NewClusterMetricSink("test-cluster", inmemSink), + MetricsHelper: metricsutil.NewMetricsHelper(inmemSink, false), } core, keys, root := testCoreUnsealed(t, TestCoreWithSealAndUI(t, conf)) return core, keys, root, inmemSink diff --git a/website/pages/docs/internals/telemetry.mdx b/website/pages/docs/internals/telemetry.mdx index 124eafd283aa..60c42354590b 100644 --- a/website/pages/docs/internals/telemetry.mdx +++ b/website/pages/docs/internals/telemetry.mdx @@ -92,6 +92,8 @@ These metrics represent operational aspects of the running Vault instance. | `vault.core.leader` | Has value 1 when the vault node is leader, and 0 when node is in standby. | bool | gauge | | `vault.core.leadership_setup_failed` | Duration of time taken by cluster leadership setup failures which have occurred in a highly available Vault cluster. This should be monitored and alerted on for overall cluster leadership status. | ms | summary | | `vault.core.leadership_lost` | Duration of time taken by cluster leadership losses which have occurred in a highly available Vault cluster. This should be monitored and alerted on for overall cluster leadership status. | ms | summary | +| `vault.core.mount_table.num_entries` | Number of mounts in a particular mount table. This metric is labeled by table type (auth or logical) and whether or not the table is replicated (local or not) | objects | summary | +| `vault.core.mount_table.size` | Size of a particular mount table. This metric is labeled by table type (auth or logical) and whether or not the table is replicated (local or not) | objects | summary | | `vault.core.post_unseal` | Duration of time taken by post-unseal operations handled by Vault core | ms | summary | | `vault.core.pre_seal` | Duration of time taken by pre-seal operations | ms | summary | | `vault.core.seal-with-request` | Duration of time taken by requested seal operations | ms | summary |