diff --git a/pkg/ccl/changefeedccl/changefeed_processors.go b/pkg/ccl/changefeedccl/changefeed_processors.go index 9447043021c5..5cef0e4cca9e 100644 --- a/pkg/ccl/changefeedccl/changefeed_processors.go +++ b/pkg/ccl/changefeedccl/changefeed_processors.go @@ -817,7 +817,7 @@ func (j *jobState) checkpointCompleted(ctx context.Context, checkpointDuration t j.metrics.CheckpointHistNanos.RecordValue(checkpointDuration.Nanoseconds()) j.lastProgressUpdate = j.ts.Now() - j.checkpointDuration = time.Duration(j.metrics.CheckpointHistNanos.Snapshot().Mean()) + j.checkpointDuration = time.Duration(j.metrics.CheckpointHistNanos.Mean()) j.progressUpdatesSkipped = false } diff --git a/pkg/ccl/changefeedccl/metrics.go b/pkg/ccl/changefeedccl/metrics.go index a97908f13efc..dfad65ed45bf 100644 --- a/pkg/ccl/changefeedccl/metrics.go +++ b/pkg/ccl/changefeedccl/metrics.go @@ -284,14 +284,6 @@ func (w *wrappingCostController) getBackfillRangeCallback() func(int64) (func(), return w.inner.getBackfillRangeCallback() } -const ( - changefeedCheckpointHistMaxLatency = 30 * time.Second - changefeedBatchHistMaxLatency = 30 * time.Second - changefeedFlushHistMaxLatency = 1 * time.Minute - admitLatencyMaxValue = 1 * time.Minute - commitLatencyMaxValue = 10 * time.Minute -) - var ( metaChangefeedForwardedResolvedMessages = metric.Metadata{ Name: "changefeed.forwarded_resolved_messages", @@ -450,20 +442,15 @@ func newAggregateMetrics(histogramWindow time.Duration) *AggMetrics { a := &AggMetrics{ ErrorRetries: b.Counter(metaChangefeedErrorRetries), EmittedMessages: b.Counter(metaChangefeedEmittedMessages), - MessageSize: b.Histogram(metaMessageSize, - histogramWindow, 10<<20 /* 10MB max message size */, 1), - EmittedBytes: b.Counter(metaChangefeedEmittedBytes), - FlushedBytes: b.Counter(metaChangefeedFlushedBytes), - Flushes: b.Counter(metaChangefeedFlushes), - - BatchHistNanos: b.Histogram(metaChangefeedBatchHistNanos, - histogramWindow, changefeedBatchHistMaxLatency.Nanoseconds(), 1), - FlushHistNanos: b.Histogram(metaChangefeedFlushHistNanos, - histogramWindow, changefeedFlushHistMaxLatency.Nanoseconds(), 2), - CommitLatency: b.Histogram(metaCommitLatency, - histogramWindow, commitLatencyMaxValue.Nanoseconds(), 1), - AdmitLatency: b.Histogram(metaAdmitLatency, histogramWindow, - admitLatencyMaxValue.Nanoseconds(), 1), + MessageSize: b.Histogram(metaMessageSize, histogramWindow, metric.DataSize16MBBuckets), + EmittedBytes: b.Counter(metaChangefeedEmittedBytes), + FlushedBytes: b.Counter(metaChangefeedFlushedBytes), + Flushes: b.Counter(metaChangefeedFlushes), + + BatchHistNanos: b.Histogram(metaChangefeedBatchHistNanos, histogramWindow, metric.BatchProcessLatencyBuckets), + FlushHistNanos: b.Histogram(metaChangefeedFlushHistNanos, histogramWindow, metric.BatchProcessLatencyBuckets), + CommitLatency: b.Histogram(metaCommitLatency, histogramWindow, metric.BatchProcessLatencyBuckets), + AdmitLatency: b.Histogram(metaAdmitLatency, histogramWindow, metric.BatchProcessLatencyBuckets), BackfillCount: b.Gauge(metaChangefeedBackfillCount), BackfillPendingRanges: b.Gauge(metaChangefeedBackfillPendingRanges), RunningCount: b.Gauge(metaChangefeedRunning), @@ -566,17 +553,16 @@ func (m *Metrics) getSLIMetrics(scope string) (*sliMetrics, error) { // MakeMetrics makes the metrics for changefeed monitoring. func MakeMetrics(histogramWindow time.Duration) metric.Struct { m := &Metrics{ - AggMetrics: newAggregateMetrics(histogramWindow), - KVFeedMetrics: kvevent.MakeMetrics(histogramWindow), - SchemaFeedMetrics: schemafeed.MakeMetrics(histogramWindow), - ResolvedMessages: metric.NewCounter(metaChangefeedForwardedResolvedMessages), - Failures: metric.NewCounter(metaChangefeedFailures), - QueueTimeNanos: metric.NewCounter(metaEventQueueTime), - CheckpointHistNanos: metric.NewHistogram(metaChangefeedCheckpointHistNanos, histogramWindow, - changefeedCheckpointHistMaxLatency.Nanoseconds(), 2), - FrontierUpdates: metric.NewCounter(metaChangefeedFrontierUpdates), - ThrottleMetrics: cdcutils.MakeMetrics(histogramWindow), - ReplanCount: metric.NewCounter(metaChangefeedReplanCount), + AggMetrics: newAggregateMetrics(histogramWindow), + KVFeedMetrics: kvevent.MakeMetrics(histogramWindow), + SchemaFeedMetrics: schemafeed.MakeMetrics(histogramWindow), + ResolvedMessages: metric.NewCounter(metaChangefeedForwardedResolvedMessages), + Failures: metric.NewCounter(metaChangefeedFailures), + QueueTimeNanos: metric.NewCounter(metaEventQueueTime), + CheckpointHistNanos: metric.NewHistogram(metaChangefeedCheckpointHistNanos, histogramWindow, metric.IOLatencyBuckets), + FrontierUpdates: metric.NewCounter(metaChangefeedFrontierUpdates), + ThrottleMetrics: cdcutils.MakeMetrics(histogramWindow), + ReplanCount: metric.NewCounter(metaChangefeedReplanCount), } m.mu.resolved = make(map[int]hlc.Timestamp) diff --git a/pkg/ccl/sqlproxyccl/connector_test.go b/pkg/ccl/sqlproxyccl/connector_test.go index 0d8eb941cdef..f3f0afed3d2d 100644 --- a/pkg/ccl/sqlproxyccl/connector_test.go +++ b/pkg/ccl/sqlproxyccl/connector_test.go @@ -375,7 +375,9 @@ func TestConnector_dialTenantCluster(t *testing.T) { defer cancel() c := &connector{ - DialTenantLatency: metric.NewLatency(metaDialTenantLatency, time.Millisecond), + DialTenantLatency: metric.NewHistogram( + metaDialTenantLatency, time.Millisecond, metric.NetworkLatencyBuckets, + ), DialTenantRetries: metric.NewCounter(metaDialTenantRetries), } c.testingKnobs.lookupAddr = func(ctx context.Context) (string, error) { @@ -403,8 +405,10 @@ func TestConnector_dialTenantCluster(t *testing.T) { var reportFailureFnCount int c := &connector{ - TenantID: roachpb.MakeTenantID(42), - DialTenantLatency: metric.NewLatency(metaDialTenantLatency, time.Millisecond), + TenantID: roachpb.MakeTenantID(42), + DialTenantLatency: metric.NewHistogram( + metaDialTenantLatency, time.Millisecond, metric.NetworkLatencyBuckets, + ), DialTenantRetries: metric.NewCounter(metaDialTenantRetries), } c.DirectoryCache = &testTenantDirectoryCache{ diff --git a/pkg/ccl/sqlproxyccl/metrics.go b/pkg/ccl/sqlproxyccl/metrics.go index cf7abe31f044..31af7bc2af5c 100644 --- a/pkg/ccl/sqlproxyccl/metrics.go +++ b/pkg/ccl/sqlproxyccl/metrics.go @@ -50,16 +50,6 @@ func (metrics) MetricStruct() {} var _ metric.Struct = metrics{} -const ( - // maxExpectedTransferResponseMessageSize corresponds to maximum expected - // response message size for the SHOW TRANSFER STATE query. We choose 16MB - // here to match the defaultMaxReadBufferSize used for ingesting SQL - // statements in the SQL server (see pkg/sql/pgwire/pgwirebase/encoding.go). - // - // This will be used to tune sql.session_transfer.max_session_size. - maxExpectedTransferResponseMessageSize = 1 << 24 // 16MB -) - var ( metaCurConnCount = metric.Metadata{ Name: "proxy.sql.conns", @@ -224,16 +214,18 @@ func makeProxyMetrics() metrics { RoutingErrCount: metric.NewCounter(metaRoutingErrCount), RefusedConnCount: metric.NewCounter(metaRefusedConnCount), SuccessfulConnCount: metric.NewCounter(metaSuccessfulConnCount), - ConnectionLatency: metric.NewLatency( + ConnectionLatency: metric.NewHistogram( metaConnMigrationAttemptedCount, base.DefaultHistogramWindowInterval(), + metric.NetworkLatencyBuckets, ), AuthFailedCount: metric.NewCounter(metaAuthFailedCount), ExpiredClientConnCount: metric.NewCounter(metaExpiredClientConnCount), // Connector metrics. - DialTenantLatency: metric.NewLatency( + DialTenantLatency: metric.NewHistogram( metaDialTenantLatency, base.DefaultHistogramWindowInterval(), + metric.NetworkLatencyBuckets, ), DialTenantRetries: metric.NewCounter(metaDialTenantRetries), // Connection migration metrics. @@ -241,15 +233,15 @@ func makeProxyMetrics() metrics { ConnMigrationErrorFatalCount: metric.NewCounter(metaConnMigrationErrorFatalCount), ConnMigrationErrorRecoverableCount: metric.NewCounter(metaConnMigrationErrorRecoverableCount), ConnMigrationAttemptedCount: metric.NewCounter(metaConnMigrationAttemptedCount), - ConnMigrationAttemptedLatency: metric.NewLatency( + ConnMigrationAttemptedLatency: metric.NewHistogram( metaConnMigrationAttemptedLatency, base.DefaultHistogramWindowInterval(), + metric.NetworkLatencyBuckets, ), ConnMigrationTransferResponseMessageSize: metric.NewHistogram( metaConnMigrationTransferResponseMessageSize, base.DefaultHistogramWindowInterval(), - maxExpectedTransferResponseMessageSize, - 1, + metric.DataSize16MBBuckets, ), QueryCancelReceivedPGWire: metric.NewCounter(metaQueryCancelReceivedPGWire), QueryCancelReceivedHTTP: metric.NewCounter(metaQueryCancelReceivedHTTP), diff --git a/pkg/ccl/streamingccl/streamingest/metrics.go b/pkg/ccl/streamingccl/streamingest/metrics.go index 79b98a6667fb..aec88cc3a517 100644 --- a/pkg/ccl/streamingccl/streamingest/metrics.go +++ b/pkg/ccl/streamingccl/streamingest/metrics.go @@ -15,12 +15,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/metric" ) -const ( - streamingFlushHistMaxLatency = 1 * time.Minute - streamingAdmitLatencyMaxValue = 3 * time.Minute - streamingCommitLatencyMaxValue = 10 * time.Minute -) - var ( metaStreamingEventsIngested = metric.Metadata{ Name: "streaming.events_ingested", @@ -135,11 +129,11 @@ func MakeMetrics(histogramWindow time.Duration) metric.Struct { ResolvedEvents: metric.NewCounter(metaStreamingResolvedEventsIngested), JobProgressUpdates: metric.NewCounter(metaJobProgressUpdates), FlushHistNanos: metric.NewHistogram(metaStreamingFlushHistNanos, - histogramWindow, streamingFlushHistMaxLatency.Nanoseconds(), 1), + histogramWindow, metric.BatchProcessLatencyBuckets), CommitLatency: metric.NewHistogram(metaStreamingCommitLatency, - histogramWindow, streamingCommitLatencyMaxValue.Nanoseconds(), 1), + histogramWindow, metric.BatchProcessLatencyBuckets), AdmitLatency: metric.NewHistogram(metaStreamingAdmitLatency, - histogramWindow, streamingAdmitLatencyMaxValue.Nanoseconds(), 1), + histogramWindow, metric.BatchProcessLatencyBuckets), RunningCount: metric.NewGauge(metaStreamsRunning), EarliestDataCheckpointSpan: metric.NewGauge(metaEarliestDataCheckpointSpan), LatestDataCheckpointSpan: metric.NewGauge(metaLatestDataCheckpointSpan), diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index e4481e22ce1f..aa7a01985d7a 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -79,6 +79,7 @@ go_library( "mixed_version_schemachange.go", "multitenant.go", "multitenant_fairness.go", + "multitenant_tpch.go", "multitenant_upgrade.go", "multitenant_utils.go", "network.go", diff --git a/pkg/cmd/roachtest/tests/cancel.go b/pkg/cmd/roachtest/tests/cancel.go index 3aa9503ea580..b73d5536072a 100644 --- a/pkg/cmd/roachtest/tests/cancel.go +++ b/pkg/cmd/roachtest/tests/cancel.go @@ -48,16 +48,16 @@ func registerCancel(r registry.Registry) { m := c.NewMonitor(ctx, c.All()) m.Go(func(ctx context.Context) error { + conn := c.Conn(ctx, t.L(), 1) + defer conn.Close() + t.Status("restoring TPCH dataset for Scale Factor 1") if err := loadTPCHDataset( - ctx, t, c, 1 /* sf */, c.NewMonitor(ctx), c.All(), false, /* disableMergeQueue */ + ctx, t, c, conn, 1 /* sf */, c.NewMonitor(ctx), c.All(), false, /* disableMergeQueue */ ); err != nil { t.Fatal(err) } - conn := c.Conn(ctx, t.L(), 1) - defer conn.Close() - queryPrefix := "USE tpch; " if !useDistsql { queryPrefix += "SET distsql = off; " diff --git a/pkg/cmd/roachtest/tests/disk_stall.go b/pkg/cmd/roachtest/tests/disk_stall.go index 66c7a96abee4..5bba262bfb56 100644 --- a/pkg/cmd/roachtest/tests/disk_stall.go +++ b/pkg/cmd/roachtest/tests/disk_stall.go @@ -25,8 +25,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeutil" ) -// RegisterDiskStalledDetection registers the disk stall test. -func RegisterDiskStalledDetection(r registry.Registry) { +// registerDiskStalledDetection registers the disk stall test. +func registerDiskStalledDetection(r registry.Registry) { for _, affectsLogDir := range []bool{false, true} { for _, affectsDataDir := range []bool{false, true} { // Grab copies of the args because we'll pass them into a closure. diff --git a/pkg/cmd/roachtest/tests/jepsen.go b/pkg/cmd/roachtest/tests/jepsen.go index deef817fb0f7..1b68f357ce54 100644 --- a/pkg/cmd/roachtest/tests/jepsen.go +++ b/pkg/cmd/roachtest/tests/jepsen.go @@ -327,9 +327,9 @@ cd /mnt/data1/jepsen/cockroachdb && set -eo pipefail && \ } } -// RegisterJepsen registers the Jepsen test suite, which primarily checks for +// registerJepsen registers the Jepsen test suite, which primarily checks for // transaction anomalies. -func RegisterJepsen(r registry.Registry) { +func registerJepsen(r registry.Registry) { // NB: the "comments" test is not included because it requires // linearizability. tests := []string{ diff --git a/pkg/cmd/roachtest/tests/multitenant_tpch.go b/pkg/cmd/roachtest/tests/multitenant_tpch.go new file mode 100644 index 000000000000..79a770136384 --- /dev/null +++ b/pkg/cmd/roachtest/tests/multitenant_tpch.go @@ -0,0 +1,102 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "context" + gosql "database/sql" + "fmt" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/workload/tpch" +) + +// runMultiTenantTPCH runs TPCH queries on a cluster that is first used as a +// single-tenant deployment followed by a run of all queries in a multi-tenant +// deployment with a single SQL instance. +func runMultiTenantTPCH(ctx context.Context, t test.Test, c cluster.Cluster) { + c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) + c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(1)) + c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(install.SecureOption(true)), c.All()) + + setupNames := []string{"single-tenant", "multi-tenant"} + const numRunsPerQuery = 3 + perfHelper := newTpchVecPerfHelper(setupNames) + + // runTPCH runs all TPCH queries on a single setup. It first restores the + // TPCH dataset using the provided connection and then runs each TPCH query + // one at a time (using the given url as a parameter to the 'workload run' + // command). The runtimes are accumulated in the perf helper. + runTPCH := func(conn *gosql.DB, url string, setupIdx int) { + t.Status("restoring TPCH dataset for Scale Factor 1 in %s", setupNames[setupIdx]) + if err := loadTPCHDataset( + ctx, t, c, conn, 1 /* sf */, c.NewMonitor(ctx), c.All(), false, /* disableMergeQueue */ + ); err != nil { + t.Fatal(err) + } + if _, err := conn.Exec("USE tpch;"); err != nil { + t.Fatal(err) + } + createStatsFromTables(t, conn, tpchTables) + for queryNum := 1; queryNum <= tpch.NumQueries; queryNum++ { + cmd := fmt.Sprintf("./workload run tpch %s --secure "+ + "--concurrency=1 --db=tpch --max-ops=%d --queries=%d", + url, numRunsPerQuery, queryNum) + result, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(1), cmd) + workloadOutput := result.Stdout + result.Stderr + t.L().Printf(workloadOutput) + if err != nil { + t.Fatal(err) + } + perfHelper.parseQueryOutput(t, []byte(workloadOutput), setupIdx) + } + } + + // First, use the cluster as a single tenant deployment. It is important to + // not create the tenant yet so that the certs directory is not overwritten. + singleTenantConn := c.Conn(ctx, t.L(), 1) + runTPCH(singleTenantConn, "" /* url */, 0 /* setupIdx */) + + // Now we create a tenant and run all TPCH queries within it. + const ( + tenantID = 123 + tenantHTTPPort = 8081 + tenantSQLPort = 30258 + tenantNode = 1 + ) + _, err := singleTenantConn.Exec(`SELECT crdb_internal.create_tenant($1)`, tenantID) + if err != nil { + t.Fatal(err) + } + tenant := createTenantNode(ctx, t, c, c.All(), tenantID, tenantNode, tenantHTTPPort, tenantSQLPort) + tenant.start(ctx, t, c, "./cockroach") + multiTenantConn, err := gosql.Open("postgres", tenant.pgURL) + if err != nil { + t.Fatal(err) + } + runTPCH(multiTenantConn, "'"+tenant.secureURL()+"'", 1 /* setupIdx */) + + // Analyze the runtimes of both setups. + perfHelper.compareSetups(t, numRunsPerQuery, nil /* timesCallback */) +} + +func registerMultiTenantTPCH(r registry.Registry) { + r.Add(registry.TestSpec{ + Name: "multitenant/tpch", + Owner: registry.OwnerSQLQueries, + Cluster: r.MakeClusterSpec(1 /* nodeCount */), + Run: runMultiTenantTPCH, + }) +} diff --git a/pkg/cmd/roachtest/tests/registry.go b/pkg/cmd/roachtest/tests/registry.go index 82805d6fc501..83ee6b0c9061 100644 --- a/pkg/cmd/roachtest/tests/registry.go +++ b/pkg/cmd/roachtest/tests/registry.go @@ -14,18 +14,19 @@ import "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" // RegisterTests registers all tests to the Registry. This powers `roachtest run`. func RegisterTests(r registry.Registry) { + registerAWSDMS(r) registerAcceptance(r) registerActiveRecord(r) registerAllocator(r) registerAlterPK(r) - registerAWSDMS(r) registerAsyncpg(r) registerAutoUpgrade(r) registerBackup(r) registerBackupMixedVersion(r) registerBackupNodeShutdown(r) - registerCancel(r) registerCDC(r) + registerCDCMixedVersions(r) + registerCancel(r) registerClearRange(r) registerClockJumpTests(r) registerClockMonotonicTests(r) @@ -36,7 +37,7 @@ func RegisterTests(r registry.Registry) { registerDecommission(r) registerDecommissionBench(r) registerDiskFull(r) - RegisterDiskStalledDetection(r) + registerDiskStalledDetection(r) registerDjango(r) registerDrain(r) registerDrop(r) @@ -44,42 +45,48 @@ func RegisterTests(r registry.Registry) { registerFixtures(r) registerFlowable(r) registerFollowerReads(r) + registerGORM(r) registerGopg(r) registerGossip(r) - registerGORM(r) registerHibernate(r, hibernateOpts) registerHibernate(r, hibernateSpatialOpts) registerHotSpotSplits(r) registerImportDecommissioned(r) registerImportMixedVersion(r) + registerImportNodeShutdown(r) registerImportTPCC(r) registerImportTPCH(r) - registerImportNodeShutdown(r) registerInconsistency(r) registerIndexes(r) registerJasyncSQL(r) - RegisterJepsen(r) + registerJepsen(r) registerJobsMixedVersions(r) - registerKnex(r) registerKV(r) + registerKVBench(r) registerKVContention(r) - registerKVQuiescenceDead(r) registerKVGracefulDraining(r) + registerKVMultiStoreWithOverload(r) + registerKVQuiescenceDead(r) + registerKVRangeLookups(r) registerKVScalability(r) registerKVSplits(r) - registerKVRangeLookups(r) - registerKVMultiStoreWithOverload(r) + registerKnex(r) + registerLOQRecovery(r) registerLargeRange(r) registerLedger(r) registerLibPQ(r) registerLiquibase(r) - registerLOQRecovery(r) + registerLoadSplits(r) + registerMultiTenantFairness(r) + registerMultiTenantTPCH(r) + registerMultiTenantUpgrade(r) registerNetwork(r) + registerNodeJSPostgres(r) + registerOverload(r) registerPebbleWriteThroughput(r) registerPebbleYCSB(r) registerPgjdbc(r) registerPgx(r) - registerNodeJSPostgres(r) registerPop(r) registerPsycopg(r) registerQueue(r) @@ -87,49 +94,43 @@ func RegisterTests(r registry.Registry) { registerRebalanceLoad(r) registerReplicaGC(r) registerRestart(r) - registerRestoreNodeShutdown(r) registerRestore(r) + registerRestoreNodeShutdown(r) registerRoachmart(r) registerRoachtest(r) registerRubyPG(r) registerRustPostgres(r) + registerSQLAlchemy(r) + registerSQLSmith(r) + registerSSTableCorruption(r) registerSchemaChangeBulkIngest(r) registerSchemaChangeDuringKV(r) + registerSchemaChangeDuringTPCC1000(r) registerSchemaChangeIndexTPCC100(r) registerSchemaChangeIndexTPCC1000(r) - registerSchemaChangeDuringTPCC1000(r) registerSchemaChangeInvertedIndex(r) registerSchemaChangeMixedVersions(r) registerSchemaChangeRandomLoad(r) - registerCDCMixedVersions(r) registerScrubAllChecksTPCC(r) registerScrubIndexOnlyTPCC(r) registerSecondaryIndexesMultiVersionCluster(r) registerSecure(r) registerSequelize(r) registerSlowDrain(r) - registerSQLAlchemy(r) - registerSQLSmith(r) - registerSSTableCorruption(r) registerSyncTest(r) registerSysbench(r) registerTLP(r) registerTPCC(r) registerTPCDSVec(r) registerTPCE(r) + registerTPCHBench(r) registerTPCHConcurrency(r) registerTPCHVec(r) - registerUnoptimizedQueryOracle(r) - registerKVBench(r) registerTypeORM(r) - registerLoadSplits(r) + registerUnoptimizedQueryOracle(r) + registerValidateSystemSchemaAfterVersionUpgrade(r) registerVersion(r) registerYCSB(r) - registerTPCHBench(r) - registerOverload(r) - registerMultiTenantUpgrade(r) - registerMultiTenantFairness(r) - registerValidateSystemSchemaAfterVersionUpgrade(r) } // RegisterBenchmarks registers all benchmarks to the registry. This powers `roachtest bench`. diff --git a/pkg/cmd/roachtest/tests/tpc_utils.go b/pkg/cmd/roachtest/tests/tpc_utils.go index a78feb464c23..c48d1f6e72e4 100644 --- a/pkg/cmd/roachtest/tests/tpc_utils.go +++ b/pkg/cmd/roachtest/tests/tpc_utils.go @@ -33,14 +33,12 @@ func loadTPCHDataset( ctx context.Context, t test.Test, c cluster.Cluster, + db *gosql.DB, sf int, m cluster.Monitor, roachNodes option.NodeListOption, disableMergeQueue bool, ) error { - db := c.Conn(ctx, t.L(), roachNodes[0]) - defer db.Close() - if disableMergeQueue { if _, err := db.Exec("SET CLUSTER SETTING kv.range_merge.queue_enabled = false;"); err != nil { t.Fatal(err) diff --git a/pkg/cmd/roachtest/tests/tpch_concurrency.go b/pkg/cmd/roachtest/tests/tpch_concurrency.go index cab299499c0c..bab42ed9ef81 100644 --- a/pkg/cmd/roachtest/tests/tpch_concurrency.go +++ b/pkg/cmd/roachtest/tests/tpch_concurrency.go @@ -55,7 +55,7 @@ func registerTPCHConcurrency(r registry.Registry) { } if err := loadTPCHDataset( - ctx, t, c, 1 /* sf */, c.NewMonitor(ctx, c.Range(1, numNodes-1)), + ctx, t, c, conn, 1 /* sf */, c.NewMonitor(ctx, c.Range(1, numNodes-1)), c.Range(1, numNodes-1), true, /* disableMergeQueue */ ); err != nil { t.Fatal(err) diff --git a/pkg/cmd/roachtest/tests/tpchbench.go b/pkg/cmd/roachtest/tests/tpchbench.go index ba65f1fa755e..001169c8e4d8 100644 --- a/pkg/cmd/roachtest/tests/tpchbench.go +++ b/pkg/cmd/roachtest/tests/tpchbench.go @@ -71,9 +71,12 @@ func runTPCHBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpchBen m := c.NewMonitor(ctx, roachNodes) m.Go(func(ctx context.Context) error { + conn := c.Conn(ctx, t.L(), 1) + defer conn.Close() + t.Status("setting up dataset") err := loadTPCHDataset( - ctx, t, c, b.ScaleFactor, m, roachNodes, true, /* disableMergeQueue */ + ctx, t, c, conn, b.ScaleFactor, m, roachNodes, true, /* disableMergeQueue */ ) if err != nil { return err diff --git a/pkg/cmd/roachtest/tests/tpchvec.go b/pkg/cmd/roachtest/tests/tpchvec.go index 42d1ba089e11..b1ea26f23358 100644 --- a/pkg/cmd/roachtest/tests/tpchvec.go +++ b/pkg/cmd/roachtest/tests/tpchvec.go @@ -44,9 +44,6 @@ type tpchVecTestRunConfig struct { // numRunsPerQuery determines how many time a single query runs, set to 1 // by default. numRunsPerQuery int - // queriesToRun specifies which queries to run (in [1, tpch.NumQueries] - // range). - queriesToRun []int // clusterSetups specifies all cluster setup queries that need to be // executed before running any of the TPCH queries. First dimension // determines the number of different clusterSetups a tpchvec test is run @@ -86,7 +83,7 @@ type tpchVecTestCase interface { type tpchVecTestCaseBase struct{} func (b tpchVecTestCaseBase) getRunConfig() tpchVecTestRunConfig { - runConfig := tpchVecTestRunConfig{ + return tpchVecTestRunConfig{ numRunsPerQuery: 1, clusterSetups: [][]string{{ "RESET CLUSTER SETTING sql.distsql.temp_storage.workmem", @@ -94,10 +91,6 @@ func (b tpchVecTestCaseBase) getRunConfig() tpchVecTestRunConfig { }}, setupNames: []string{"default"}, } - for queryNum := 1; queryNum <= tpch.NumQueries; queryNum++ { - runConfig.queriesToRun = append(runConfig.queriesToRun, queryNum) - } - return runConfig } func (b tpchVecTestCaseBase) preQueryRunHook(t test.Test, conn *gosql.DB, clusterSetup []string) { @@ -112,15 +105,17 @@ func (b tpchVecTestCaseBase) postTestRunHook( } type tpchVecPerfHelper struct { + setupNames []string timeByQueryNum []map[int][]float64 } -func newTpchVecPerfHelper(numSetups int) *tpchVecPerfHelper { - timeByQueryNum := make([]map[int][]float64, numSetups) +func newTpchVecPerfHelper(setupNames []string) *tpchVecPerfHelper { + timeByQueryNum := make([]map[int][]float64, len(setupNames)) for i := range timeByQueryNum { timeByQueryNum[i] = make(map[int][]float64) } return &tpchVecPerfHelper{ + setupNames: setupNames, timeByQueryNum: timeByQueryNum, } } @@ -145,6 +140,56 @@ func (h *tpchVecPerfHelper) parseQueryOutput(t test.Test, output []byte, setupId } } +// compareSetups compares the runtimes of TPCH queries in different setups and +// logs that comparison. The expectation is that the second "ON" setup should be +// faster, and if that is not the case, then a warning message is included in +// the log. +func (h *tpchVecPerfHelper) compareSetups( + t test.Test, + numRunsPerQuery int, + timesCallback func(queryNum int, onTime, offTime float64, onTimes, offTimes []float64), +) { + t.Status("comparing the runtimes (only median values for each query are compared)") + for queryNum := 1; queryNum <= tpch.NumQueries; queryNum++ { + findMedian := func(times []float64) float64 { + sort.Float64s(times) + return times[len(times)/2] + } + onTimes := h.timeByQueryNum[tpchPerfTestOnConfigIdx][queryNum] + onName := h.setupNames[tpchPerfTestOnConfigIdx] + offTimes := h.timeByQueryNum[tpchPerfTestOffConfigIdx][queryNum] + offName := h.setupNames[tpchPerfTestOffConfigIdx] + if len(onTimes) != numRunsPerQuery { + t.Fatal(fmt.Sprintf("[q%d] unexpectedly wrong number of run times "+ + "recorded with %s config: %v", queryNum, onName, onTimes)) + } + if len(offTimes) != numRunsPerQuery { + t.Fatal(fmt.Sprintf("[q%d] unexpectedly wrong number of run times "+ + "recorded with %s config: %v", queryNum, offName, offTimes)) + } + onTime := findMedian(onTimes) + offTime := findMedian(offTimes) + if offTime < onTime { + t.L().Printf( + fmt.Sprintf("[q%d] %s was faster by %.2f%%: "+ + "%.2fs %s vs %.2fs %s --- WARNING\n"+ + "%s times: %v\t %s times: %v", + queryNum, offName, 100*(onTime-offTime)/offTime, onTime, onName, + offTime, offName, onName, onTimes, offName, offTimes)) + } else { + t.L().Printf( + fmt.Sprintf("[q%d] %s was faster by %.2f%%: "+ + "%.2fs %s vs %.2fs %s\n"+ + "%s times: %v\t %s times: %v", + queryNum, onName, 100*(offTime-onTime)/onTime, onTime, onName, + offTime, offName, onName, onTimes, offName, offTimes)) + } + if timesCallback != nil { + timesCallback(queryNum, onTime, offTime, onTimes, offTimes) + } + } +} + const ( tpchPerfTestOnConfigIdx = 1 tpchPerfTestOffConfigIdx = 0 @@ -162,7 +207,7 @@ var _ tpchVecTestCase = &tpchVecPerfTest{} func newTpchVecPerfTest(settingName string, slownessThreshold float64) *tpchVecPerfTest { return &tpchVecPerfTest{ - tpchVecPerfHelper: newTpchVecPerfHelper(2 /* numSetups */), + tpchVecPerfHelper: newTpchVecPerfHelper([]string{"OFF", "ON"}), settingName: settingName, slownessThreshold: slownessThreshold, } @@ -198,39 +243,7 @@ func (p *tpchVecPerfTest) postTestRunHook( ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, ) { runConfig := p.getRunConfig() - t.Status("comparing the runtimes (only median values for each query are compared)") - for _, queryNum := range runConfig.queriesToRun { - findMedian := func(times []float64) float64 { - sort.Float64s(times) - return times[len(times)/2] - } - onTimes := p.timeByQueryNum[tpchPerfTestOnConfigIdx][queryNum] - offTimes := p.timeByQueryNum[tpchPerfTestOffConfigIdx][queryNum] - if len(onTimes) != runConfig.numRunsPerQuery { - t.Fatal(fmt.Sprintf("[q%d] unexpectedly wrong number of run times "+ - "recorded with ON config: %v", queryNum, onTimes)) - } - if len(offTimes) != runConfig.numRunsPerQuery { - t.Fatal(fmt.Sprintf("[q%d] unexpectedly wrong number of run times "+ - "recorded with OFF config: %v", queryNum, offTimes)) - } - onTime := findMedian(onTimes) - offTime := findMedian(offTimes) - if offTime < onTime { - t.L().Printf( - fmt.Sprintf("[q%d] OFF was faster by %.2f%%: "+ - "%.2fs ON vs %.2fs OFF --- WARNING\n"+ - "ON times: %v\t OFF times: %v", - queryNum, 100*(onTime-offTime)/offTime, - onTime, offTime, onTimes, offTimes)) - } else { - t.L().Printf( - fmt.Sprintf("[q%d] ON was faster by %.2f%%: "+ - "%.2fs ON vs %.2fs OFF\n"+ - "ON times: %v\t OFF times: %v", - queryNum, 100*(offTime-onTime)/onTime, - onTime, offTime, onTimes, offTimes)) - } + p.tpchVecPerfHelper.compareSetups(t, runConfig.numRunsPerQuery, func(queryNum int, onTime, offTime float64, onTimes, offTimes []float64) { if onTime >= p.slownessThreshold*offTime { // For some reason, the ON setup executed the query a lot slower // than the OFF setup which is unexpected. In order to understand @@ -301,7 +314,7 @@ func (p *tpchVecPerfTest) postTestRunHook( "ON times: %v\nOFF times: %v", queryNum, 100*(onTime-offTime)/offTime, onTimes, offTimes)) } - } + }) } type tpchVecBenchTest struct { @@ -309,33 +322,24 @@ type tpchVecBenchTest struct { *tpchVecPerfHelper numRunsPerQuery int - queriesToRun []int clusterSetups [][]string - setupNames []string } var _ tpchVecTestCase = &tpchVecBenchTest{} -// queriesToRun can be omitted in which case all queries that are not skipped -// for the given version will be run. func newTpchVecBenchTest( - numRunsPerQuery int, queriesToRun []int, clusterSetups [][]string, setupNames []string, + numRunsPerQuery int, clusterSetups [][]string, setupNames []string, ) *tpchVecBenchTest { return &tpchVecBenchTest{ - tpchVecPerfHelper: newTpchVecPerfHelper(len(setupNames)), + tpchVecPerfHelper: newTpchVecPerfHelper(setupNames), numRunsPerQuery: numRunsPerQuery, - queriesToRun: queriesToRun, clusterSetups: clusterSetups, - setupNames: setupNames, } } func (b tpchVecBenchTest) getRunConfig() tpchVecTestRunConfig { runConfig := b.tpchVecTestCaseBase.getRunConfig() runConfig.numRunsPerQuery = b.numRunsPerQuery - if b.queriesToRun != nil { - runConfig.queriesToRun = b.queriesToRun - } defaultSetup := runConfig.clusterSetups[0] // We slice up defaultSetup to make sure that new slices are allocated in // appends below. @@ -362,7 +366,7 @@ func (b *tpchVecBenchTest) postTestRunHook( // and then all query scores are summed. So the lower the total score, the // better the config is. scores := make([]float64, len(runConfig.setupNames)) - for _, queryNum := range runConfig.queriesToRun { + for queryNum := 1; queryNum <= tpch.NumQueries; queryNum++ { // findAvgTime finds the average of times excluding best and worst as // possible outliers. It expects that len(times) >= 3. findAvgTime := func(times []float64) float64 { @@ -439,7 +443,7 @@ func baseTestRun( ) { firstNode := c.Node(1) runConfig := tc.getRunConfig() - for _, queryNum := range runConfig.queriesToRun { + for queryNum := 1; queryNum <= tpch.NumQueries; queryNum++ { for setupIdx, setup := range runConfig.clusterSetups { tc.preQueryRunHook(t, conn, setup) // Note that we use --default-vectorize flag which tells tpch @@ -528,7 +532,7 @@ func runTPCHVec( conn := c.Conn(ctx, t.L(), 1) t.Status("restoring TPCH dataset for Scale Factor 1") if err := loadTPCHDataset( - ctx, t, c, 1 /* sf */, c.NewMonitor(ctx), c.All(), true, /* disableMergeQueue */ + ctx, t, c, conn, 1 /* sf */, c.NewMonitor(ctx), c.All(), true, /* disableMergeQueue */ ); err != nil { t.Fatal(err) } @@ -614,8 +618,7 @@ func registerTPCHVec(r registry.Registry) { setupNames = append(setupNames, fmt.Sprintf("%d", batchSize)) } benchTest := newTpchVecBenchTest( - 5, /* numRunsPerQuery */ - nil, /* queriesToRun */ + 5, /* numRunsPerQuery */ clusterSetups, setupNames, ) diff --git a/pkg/kv/bulk/bulk_metrics.go b/pkg/kv/bulk/bulk_metrics.go index d85d5883e103..2d3629ce5e01 100644 --- a/pkg/kv/bulk/bulk_metrics.go +++ b/pkg/kv/bulk/bulk_metrics.go @@ -43,14 +43,10 @@ var ( } ) -// See pkg/sql/mem_metrics.go -// log10int64times1000 = log10(math.MaxInt64) * 1000, rounded up somewhat -const log10int64times1000 = 19 * 1000 - // MakeBulkMetrics instantiates the metrics holder for bulk operation monitoring. func MakeBulkMetrics(histogramWindow time.Duration) Metrics { return Metrics{ - MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, log10int64times1000, 3), + MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets), CurBytesCount: metric.NewGauge(metaMemCurBytes), } } diff --git a/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go b/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go index 27701a8eb1e0..ed19f2f07cdf 100644 --- a/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go +++ b/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go @@ -1073,15 +1073,12 @@ func checkTxnMetrics( commits, commits1PC, aborts, restarts int64, ) { testutils.SucceedsSoon(t, func() error { - return checkTxnMetricsOnce(t, metrics, name, commits, commits1PC, aborts, restarts) + return checkTxnMetricsOnce(metrics, name, commits, commits1PC, aborts, restarts) }) } func checkTxnMetricsOnce( - t *testing.T, - metrics kvcoord.TxnMetrics, - name string, - commits, commits1PC, aborts, restarts int64, + metrics kvcoord.TxnMetrics, name string, commits, commits1PC, aborts, restarts int64, ) error { testcases := []struct { name string @@ -1091,6 +1088,7 @@ func checkTxnMetricsOnce( {"commits1PC", metrics.Commits1PC.Count(), commits1PC}, {"aborts", metrics.Aborts.Count(), aborts}, {"durations", metrics.Durations.TotalCount(), commits + aborts}, + {"restarts", metrics.Restarts.TotalCount(), restarts}, } for _, tc := range testcases { @@ -1099,22 +1097,6 @@ func checkTxnMetricsOnce( } } - // Handle restarts separately, because that's a histogram. Though the - // histogram is approximate, we're recording so few distinct values - // that we should be okay. - dist := metrics.Restarts.Snapshot().Distribution() - var actualRestarts int64 - for _, b := range dist { - if b.From == b.To { - actualRestarts += b.From * b.Count - } else { - t.Fatalf("unexpected value in histogram: %d-%d", b.From, b.To) - } - } - if a, e := actualRestarts, restarts; a != e { - return errors.Errorf("%s: actual restarts %d != expected %d", name, a, e) - } - return nil } @@ -1324,10 +1306,13 @@ func TestTxnDurations(t *testing.T) { t.Fatalf("durations %d != expected %d", a, e) } - // Metrics lose fidelity, so we can't compare incr directly. - if min, thresh := hist.Min(), (incr - 10).Nanoseconds(); min < thresh { - t.Fatalf("min %d < %d", min, thresh) + for _, b := range hist.ToPrometheusMetric().GetHistogram().GetBucket() { + thresh := incr.Nanoseconds() + if *b.UpperBound < float64(thresh) && *b.CumulativeCount != 0 { + t.Fatalf("expected no values in bucket: %f", *b.UpperBound) + } } + } // TestTxnCommitWait tests the commit-wait sleep phase of transactions under diff --git a/pkg/kv/kvclient/kvcoord/txn_metrics.go b/pkg/kv/kvclient/kvcoord/txn_metrics.go index 372361a64699..eb6313012717 100644 --- a/pkg/kv/kvclient/kvcoord/txn_metrics.go +++ b/pkg/kv/kvclient/kvcoord/txn_metrics.go @@ -274,11 +274,11 @@ func MakeTxnMetrics(histogramWindow time.Duration) TxnMetrics { RefreshFailWithCondensedSpans: metric.NewCounter(metaRefreshFailWithCondensedSpans), RefreshMemoryLimitExceeded: metric.NewCounter(metaRefreshMemoryLimitExceeded), RefreshAutoRetries: metric.NewCounter(metaRefreshAutoRetries), - Durations: metric.NewLatency(metaDurationsHistograms, histogramWindow), + Durations: metric.NewHistogram(metaDurationsHistograms, histogramWindow, metric.IOLatencyBuckets), TxnsWithCondensedIntents: metric.NewCounter(metaTxnsWithCondensedIntentSpans), TxnsWithCondensedIntentsGauge: metric.NewGauge(metaTxnsWithCondensedIntentSpansGauge), TxnsRejectedByLockSpanBudget: metric.NewCounter(metaTxnsRejectedByLockSpanBudget), - Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, 100, 3), + Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, metric.Count1KBuckets), RestartsWriteTooOld: telemetry.NewCounterWithMetric(metaRestartsWriteTooOld), RestartsWriteTooOldMulti: telemetry.NewCounterWithMetric(metaRestartsWriteTooOldMulti), RestartsSerializable: telemetry.NewCounterWithMetric(metaRestartsSerializable), diff --git a/pkg/kv/kvprober/kvprober.go b/pkg/kv/kvprober/kvprober.go index e2566a89bc30..fe4176415943 100644 --- a/pkg/kv/kvprober/kvprober.go +++ b/pkg/kv/kvprober/kvprober.go @@ -214,14 +214,18 @@ func NewProber(opts Opts) *Prober { writePlanner: newMeta2Planner(opts.DB, opts.Settings, func() time.Duration { return writeInterval.Get(&opts.Settings.SV) }), metrics: Metrics{ - ReadProbeAttempts: metric.NewCounter(metaReadProbeAttempts), - ReadProbeFailures: metric.NewCounter(metaReadProbeFailures), - ReadProbeLatency: metric.NewLatency(metaReadProbeLatency, opts.HistogramWindowInterval), + ReadProbeAttempts: metric.NewCounter(metaReadProbeAttempts), + ReadProbeFailures: metric.NewCounter(metaReadProbeFailures), + ReadProbeLatency: metric.NewHistogram( + metaReadProbeLatency, opts.HistogramWindowInterval, metric.NetworkLatencyBuckets, + ), WriteProbeAttempts: metric.NewCounter(metaWriteProbeAttempts), WriteProbeFailures: metric.NewCounter(metaWriteProbeFailures), - WriteProbeLatency: metric.NewLatency(metaWriteProbeLatency, opts.HistogramWindowInterval), - ProbePlanAttempts: metric.NewCounter(metaProbePlanAttempts), - ProbePlanFailures: metric.NewCounter(metaProbePlanFailures), + WriteProbeLatency: metric.NewHistogram( + metaWriteProbeLatency, opts.HistogramWindowInterval, metric.NetworkLatencyBuckets, + ), + ProbePlanAttempts: metric.NewCounter(metaProbePlanAttempts), + ProbePlanFailures: metric.NewCounter(metaProbePlanFailures), }, tracer: opts.Tracer, } diff --git a/pkg/kv/kvserver/liveness/liveness.go b/pkg/kv/kvserver/liveness/liveness.go index 215a45e6ad22..d2e92628c4db 100644 --- a/pkg/kv/kvserver/liveness/liveness.go +++ b/pkg/kv/kvserver/liveness/liveness.go @@ -309,7 +309,9 @@ func NewNodeLiveness(opts NodeLivenessOptions) *NodeLiveness { HeartbeatSuccesses: metric.NewCounter(metaHeartbeatSuccesses), HeartbeatFailures: telemetry.NewCounterWithMetric(metaHeartbeatFailures), EpochIncrements: telemetry.NewCounterWithMetric(metaEpochIncrements), - HeartbeatLatency: metric.NewLatency(metaHeartbeatLatency, opts.HistogramWindowInterval), + HeartbeatLatency: metric.NewHistogram( + metaHeartbeatLatency, opts.HistogramWindowInterval, metric.NetworkLatencyBuckets, + ), } nl.mu.nodes = make(map[roachpb.NodeID]Record) nl.heartbeatToken <- struct{}{} diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 8a47f9e2e975..501e8e2f3750 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -2245,18 +2245,27 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { // Raft processing metrics. RaftTicks: metric.NewCounter(metaRaftTicks), RaftQuotaPoolPercentUsed: metric.NewHistogram( - // NB: this results in 64 buckets (i.e. 64 timeseries in prometheus). - metaRaftQuotaPoolPercentUsed, histogramWindow, 100 /* maxVal */, 1, /* sigFigs */ + metaRaftQuotaPoolPercentUsed, histogramWindow, metric.Percent100Buckets, ), - RaftWorkingDurationNanos: metric.NewCounter(metaRaftWorkingDurationNanos), - RaftTickingDurationNanos: metric.NewCounter(metaRaftTickingDurationNanos), - RaftCommandsApplied: metric.NewCounter(metaRaftCommandsApplied), - RaftLogCommitLatency: metric.NewLatency(metaRaftLogCommitLatency, histogramWindow), - RaftCommandCommitLatency: metric.NewLatency(metaRaftCommandCommitLatency, histogramWindow), - RaftHandleReadyLatency: metric.NewLatency(metaRaftHandleReadyLatency, histogramWindow), - RaftApplyCommittedLatency: metric.NewLatency(metaRaftApplyCommittedLatency, histogramWindow), - RaftSchedulerLatency: metric.NewLatency(metaRaftSchedulerLatency, histogramWindow), - RaftTimeoutCampaign: metric.NewCounter(metaRaftTimeoutCampaign), + RaftWorkingDurationNanos: metric.NewCounter(metaRaftWorkingDurationNanos), + RaftTickingDurationNanos: metric.NewCounter(metaRaftTickingDurationNanos), + RaftCommandsApplied: metric.NewCounter(metaRaftCommandsApplied), + RaftLogCommitLatency: metric.NewHistogram( + metaRaftLogCommitLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftCommandCommitLatency: metric.NewHistogram( + metaRaftCommandCommitLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftHandleReadyLatency: metric.NewHistogram( + metaRaftHandleReadyLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftApplyCommittedLatency: metric.NewHistogram( + metaRaftApplyCommittedLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftSchedulerLatency: metric.NewHistogram( + metaRaftSchedulerLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftTimeoutCampaign: metric.NewCounter(metaRaftTimeoutCampaign), // Raft message metrics. RaftRcvdMessages: [...]*metric.Counter{ @@ -2394,8 +2403,12 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { ReplicaCircuitBreakerCumTripped: metric.NewCounter(metaReplicaCircuitBreakerCumTripped), // Replica batch evaluation. - ReplicaReadBatchEvaluationLatency: metric.NewLatency(metaReplicaReadBatchEvaluationLatency, histogramWindow), - ReplicaWriteBatchEvaluationLatency: metric.NewLatency(metaReplicaWriteBatchEvaluationLatency, histogramWindow), + ReplicaReadBatchEvaluationLatency: metric.NewHistogram( + metaReplicaReadBatchEvaluationLatency, histogramWindow, metric.IOLatencyBuckets, + ), + ReplicaWriteBatchEvaluationLatency: metric.NewHistogram( + metaReplicaWriteBatchEvaluationLatency, histogramWindow, metric.IOLatencyBuckets, + ), } { diff --git a/pkg/kv/kvserver/txnwait/metrics.go b/pkg/kv/kvserver/txnwait/metrics.go index 1d4c6f2b0653..2e9d1d2a2055 100644 --- a/pkg/kv/kvserver/txnwait/metrics.go +++ b/pkg/kv/kvserver/txnwait/metrics.go @@ -74,8 +74,7 @@ func NewMetrics(histogramWindowInterval time.Duration) *Metrics { Unit: metric.Unit_NANOSECONDS, }, histogramWindowInterval, - time.Hour.Nanoseconds(), - 1, + metric.LongRunning60mLatencyBuckets, ), QueryWaitTime: metric.NewHistogram( @@ -86,8 +85,7 @@ func NewMetrics(histogramWindowInterval time.Duration) *Metrics { Unit: metric.Unit_NANOSECONDS, }, histogramWindowInterval, - time.Hour.Nanoseconds(), - 1, + metric.LongRunning60mLatencyBuckets, ), DeadlocksTotal: metric.NewCounter( diff --git a/pkg/rpc/clock_offset.go b/pkg/rpc/clock_offset.go index 90220dd288d3..48f290dd517c 100644 --- a/pkg/rpc/clock_offset.go +++ b/pkg/rpc/clock_offset.go @@ -122,7 +122,9 @@ func newRemoteClockMonitor( r.metrics = RemoteClockMetrics{ ClockOffsetMeanNanos: metric.NewGauge(metaClockOffsetMeanNanos), ClockOffsetStdDevNanos: metric.NewGauge(metaClockOffsetStdDevNanos), - LatencyHistogramNanos: metric.NewLatency(metaLatencyHistogramNanos, histogramWindowInterval), + LatencyHistogramNanos: metric.NewHistogram( + metaLatencyHistogramNanos, histogramWindowInterval, metric.IOLatencyBuckets, + ), } return &r } diff --git a/pkg/server/node.go b/pkg/server/node.go index b5f0cc7d7eef..b44bb6be479c 100644 --- a/pkg/server/node.go +++ b/pkg/server/node.go @@ -174,7 +174,9 @@ type nodeMetrics struct { func makeNodeMetrics(reg *metric.Registry, histogramWindow time.Duration) nodeMetrics { nm := nodeMetrics{ - Latency: metric.NewLatency(metaExecLatency, histogramWindow), + Latency: metric.NewHistogram( + metaExecLatency, histogramWindow, metric.IOLatencyBuckets, + ), Success: metric.NewCounter(metaExecSuccess), Err: metric.NewCounter(metaExecError), DiskStalls: metric.NewCounter(metaDiskStalls), diff --git a/pkg/server/status/BUILD.bazel b/pkg/server/status/BUILD.bazel index 577fdde29bcb..e6981fd55727 100644 --- a/pkg/server/status/BUILD.bazel +++ b/pkg/server/status/BUILD.bazel @@ -148,7 +148,6 @@ go_test( "//pkg/util/system", "//pkg/util/timeutil", "@com_github_kr_pretty//:pretty", - "@com_github_prometheus_client_golang//prometheus", "@com_github_shirou_gopsutil_v3//net", ], ) diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index 3d6fe5ad18ce..2aee2ee6001e 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -529,26 +529,6 @@ type registryRecorder struct { func extractValue(name string, mtr interface{}, fn func(string, float64)) error { switch mtr := mtr.(type) { case *metric.Histogram: - // TODO(mrtracy): Where should this comment go for better - // visibility? - // - // Proper support of Histograms for time series is difficult and - // likely not worth the trouble. Instead, we aggregate a windowed - // histogram at fixed quantiles. If the scraping window and the - // histogram's eviction duration are similar, this should give - // good results; if the two durations are very different, we either - // report stale results or report only the more recent data. - // - // Additionally, we can only aggregate max/min of the quantiles; - // roll-ups don't know that and so they will return mathematically - // nonsensical values, but that seems acceptable for the time - // being. - curr, _ := mtr.Windowed() - for _, pt := range recordHistogramQuantiles { - fn(name+pt.suffix, float64(curr.ValueAtQuantile(pt.quantile))) - } - fn(name+"-count", float64(curr.TotalCount())) - case *metric.HistogramV2: n := float64(mtr.TotalCountWindowed()) fn(name+"-count", n) avg := mtr.TotalSumWindowed() / n diff --git a/pkg/server/status/recorder_test.go b/pkg/server/status/recorder_test.go index 3628c975643a..c03a25734393 100644 --- a/pkg/server/status/recorder_test.go +++ b/pkg/server/status/recorder_test.go @@ -33,7 +33,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/system" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/kr/pretty" - "github.com/prometheus/client_golang/prometheus" ) // byTimeAndName is a slice of tspb.TimeSeriesData. @@ -202,9 +201,7 @@ func TestMetricsRecorder(t *testing.T) { {"testGauge", "gauge", 20}, {"testGaugeFloat64", "floatgauge", 20}, {"testCounter", "counter", 5}, - {"testHistogram", "histogram", 10}, - {"testHistogramV2", "histogramV2", 9}, - {"testLatency", "latency", 10}, + {"testHistogram", "histogram", 9}, {"testAggGauge", "agggauge", 4}, {"testAggCounter", "aggcounter", 7}, @@ -289,16 +286,7 @@ func TestMetricsRecorder(t *testing.T) { c.Inc((data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "histogram": - h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, 1000, 2) - reg.reg.AddMetric(h) - h.RecordValue(data.val) - for _, q := range recordHistogramQuantiles { - addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) - } - addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode) - case "histogramV2": - h := metric.NewHistogramV2(metric.Metadata{Name: reg.prefix + data.name}, time.Second, - prometheus.HistogramOpts{Buckets: []float64{1.0, 10.0, 100.0, 1000.0}}) + h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, []float64{1.0, 10.0, 100.0, 1000.0}) reg.reg.AddMetric(h) h.RecordValue(data.val) for _, q := range recordHistogramQuantiles { @@ -306,16 +294,6 @@ func TestMetricsRecorder(t *testing.T) { } addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode) addExpected(reg.prefix, data.name+"-avg", reg.source, 100, 9, reg.isNode) - case "latency": - l := metric.NewLatency(metric.Metadata{Name: reg.prefix + data.name}, time.Hour) - reg.reg.AddMetric(l) - l.RecordValue(data.val) - // Latency is simply three histograms (at different resolution - // time scales). - for _, q := range recordHistogramQuantiles { - addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) - } - addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode) default: t.Fatalf("unexpected: %+v", data) } diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index 4fae18c8509d..c89c9a099f1a 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -444,16 +444,21 @@ func makeMetrics(internal bool) Metrics { SQLOptPlanCacheHits: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheHits, internal)), SQLOptPlanCacheMisses: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheMisses, internal)), // TODO(mrtracy): See HistogramWindowInterval in server/config.go for the 6x factor. - DistSQLExecLatency: metric.NewLatency(getMetricMeta(MetaDistSQLExecLatency, internal), - 6*metricsSampleInterval), - SQLExecLatency: metric.NewLatency(getMetricMeta(MetaSQLExecLatency, internal), - 6*metricsSampleInterval), - DistSQLServiceLatency: metric.NewLatency(getMetricMeta(MetaDistSQLServiceLatency, internal), - 6*metricsSampleInterval), - SQLServiceLatency: metric.NewLatency(getMetricMeta(MetaSQLServiceLatency, internal), - 6*metricsSampleInterval), - SQLTxnLatency: metric.NewLatency(getMetricMeta(MetaSQLTxnLatency, internal), - 6*metricsSampleInterval), + DistSQLExecLatency: metric.NewHistogram( + getMetricMeta(MetaDistSQLExecLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + SQLExecLatency: metric.NewHistogram( + getMetricMeta(MetaSQLExecLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + DistSQLServiceLatency: metric.NewHistogram( + getMetricMeta(MetaDistSQLServiceLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + SQLServiceLatency: metric.NewHistogram( + getMetricMeta(MetaSQLServiceLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + SQLTxnLatency: metric.NewHistogram( + getMetricMeta(MetaSQLTxnLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), SQLTxnsOpen: metric.NewGauge(getMetricMeta(MetaSQLTxnsOpen, internal)), SQLActiveStatements: metric.NewGauge(getMetricMeta(MetaSQLActiveQueries, internal)), SQLContendedTxns: metric.NewCounter(getMetricMeta(MetaSQLTxnContended, internal)), @@ -480,26 +485,24 @@ func makeServerMetrics(cfg *ExecutorConfig) ServerMetrics { SQLStatsMemoryMaxBytesHist: metric.NewHistogram( MetaSQLStatsMemMaxBytes, cfg.HistogramWindowInterval, - log10int64times1000, - 3, /* sigFigs */ + metric.MemoryUsage64MBBuckets, ), SQLStatsMemoryCurBytesCount: metric.NewGauge(MetaSQLStatsMemCurBytes), ReportedSQLStatsMemoryMaxBytesHist: metric.NewHistogram( MetaReportedSQLStatsMemMaxBytes, cfg.HistogramWindowInterval, - log10int64times1000, - 3, /* sigFigs */ + metric.MemoryUsage64MBBuckets, ), ReportedSQLStatsMemoryCurBytesCount: metric.NewGauge(MetaReportedSQLStatsMemCurBytes), DiscardedStatsCount: metric.NewCounter(MetaDiscardedSQLStats), SQLStatsFlushStarted: metric.NewCounter(MetaSQLStatsFlushStarted), SQLStatsFlushFailure: metric.NewCounter(MetaSQLStatsFlushFailure), - SQLStatsFlushDuration: metric.NewLatency( - MetaSQLStatsFlushDuration, 6*metricsSampleInterval, + SQLStatsFlushDuration: metric.NewHistogram( + MetaSQLStatsFlushDuration, 6*metricsSampleInterval, metric.IOLatencyBuckets, ), SQLStatsRemovedRows: metric.NewCounter(MetaSQLStatsRemovedRows), - SQLTxnStatsCollectionOverhead: metric.NewLatency( - MetaSQLTxnStatsCollectionOverhead, 6*metricsSampleInterval, + SQLTxnStatsCollectionOverhead: metric.NewHistogram( + MetaSQLTxnStatsCollectionOverhead, 6*metricsSampleInterval, metric.IOLatencyBuckets, ), }, ContentionSubsystemMetrics: txnidcache.NewMetrics(), diff --git a/pkg/sql/execinfra/metrics.go b/pkg/sql/execinfra/metrics.go index b827c8936519..85f36259b959 100644 --- a/pkg/sql/execinfra/metrics.go +++ b/pkg/sql/execinfra/metrics.go @@ -141,10 +141,6 @@ var ( } ) -// See pkg/sql/mem_metrics.go -// log10int64times1000 = log10(math.MaxInt64) * 1000, rounded up somewhat -const log10int64times1000 = 19 * 1000 - // MakeDistSQLMetrics instantiates the metrics holder for DistSQL monitoring. func MakeDistSQLMetrics(histogramWindow time.Duration) DistSQLMetrics { return DistSQLMetrics{ @@ -155,12 +151,12 @@ func MakeDistSQLMetrics(histogramWindow time.Duration) DistSQLMetrics { FlowsTotal: metric.NewCounter(metaFlowsTotal), FlowsQueued: metric.NewGauge(metaFlowsQueued), FlowsScheduled: metric.NewCounter(metaFlowsScheduled), - QueueWaitHist: metric.NewLatency(metaQueueWaitHist, histogramWindow), - MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, log10int64times1000, 3), + QueueWaitHist: metric.NewHistogram(metaQueueWaitHist, histogramWindow, metric.IOLatencyBuckets), + MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets), CurBytesCount: metric.NewGauge(metaMemCurBytes), VecOpenFDs: metric.NewGauge(metaVecOpenFDs), CurDiskBytesCount: metric.NewGauge(metaDiskCurBytes), - MaxDiskBytesHist: metric.NewHistogram(metaDiskMaxBytes, histogramWindow, log10int64times1000, 3), + MaxDiskBytesHist: metric.NewHistogram(metaDiskMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets), QueriesSpilled: metric.NewCounter(metaQueriesSpilled), SpilledBytesWritten: metric.NewCounter(metaSpilledBytesWritten), SpilledBytesRead: metric.NewCounter(metaSpilledBytesRead), diff --git a/pkg/sql/mem_metrics.go b/pkg/sql/mem_metrics.go index aa3ce8a6416d..db2198d6bfd8 100644 --- a/pkg/sql/mem_metrics.go +++ b/pkg/sql/mem_metrics.go @@ -41,22 +41,6 @@ func (MemoryMetrics) MetricStruct() {} var _ metric.Struct = MemoryMetrics{} -// TODO(knz): Until #10014 is addressed, the UI graphs don't have a -// log scale on the Y axis and the histograms are thus displayed using -// a manual log scale: we store the logarithm in the value in the DB -// and plot that logarithm in the UI. -// -// We could, but do not, store the full value in the DB and compute -// the log in the UI, because the current histogram implementation -// does not deal well with large maxima (#10015). -// -// Since the DB stores an integer, we scale the values by 1000 so that -// a modicum of precision is restored when exponentiating the value. -// - -// log10int64times1000 = log10(math.MaxInt64) * 1000, rounded up somewhat -const log10int64times1000 = 19 * 1000 - func makeMemMetricMetadata(name, help string) metric.Metadata { return metric.Metadata{ Name: name, @@ -73,7 +57,7 @@ func MakeBaseMemMetrics(endpoint string, histogramWindow time.Duration) BaseMemo MetaMemMaxBytes := makeMemMetricMetadata(prefix+".max", "Memory usage per sql statement for "+endpoint) MetaMemCurBytes := makeMemMetricMetadata(prefix+".current", "Current sql statement memory usage for "+endpoint) return BaseMemoryMetrics{ - MaxBytesHist: metric.NewHistogram(MetaMemMaxBytes, histogramWindow, log10int64times1000, 3), + MaxBytesHist: metric.NewHistogram(MetaMemMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets), CurBytesCount: metric.NewGauge(MetaMemCurBytes), } } @@ -88,9 +72,9 @@ func MakeMemMetrics(endpoint string, histogramWindow time.Duration) MemoryMetric MetaMemSessionCurBytes := makeMemMetricMetadata(prefix+".session.current", "Current sql session memory usage for "+endpoint) return MemoryMetrics{ BaseMemoryMetrics: base, - TxnMaxBytesHist: metric.NewHistogram(MetaMemMaxTxnBytes, histogramWindow, log10int64times1000, 3), + TxnMaxBytesHist: metric.NewHistogram(MetaMemMaxTxnBytes, histogramWindow, metric.MemoryUsage64MBBuckets), TxnCurBytesCount: metric.NewGauge(MetaMemTxnCurBytes), - SessionMaxBytesHist: metric.NewHistogram(MetaMemMaxSessionBytes, histogramWindow, log10int64times1000, 3), + SessionMaxBytesHist: metric.NewHistogram(MetaMemMaxSessionBytes, histogramWindow, metric.MemoryUsage64MBBuckets), SessionCurBytesCount: metric.NewGauge(MetaMemSessionCurBytes), } diff --git a/pkg/sql/pgwire/server.go b/pkg/sql/pgwire/server.go index 5757f49777e4..7e6993d1ca5a 100644 --- a/pkg/sql/pgwire/server.go +++ b/pkg/sql/pgwire/server.go @@ -292,11 +292,13 @@ func makeServerMetrics( sqlMemMetrics sql.MemoryMetrics, histogramWindow time.Duration, ) ServerMetrics { return ServerMetrics{ - BytesInCount: metric.NewCounter(MetaBytesIn), - BytesOutCount: metric.NewCounter(MetaBytesOut), - Conns: metric.NewGauge(MetaConns), - NewConns: metric.NewCounter(MetaNewConns), - ConnLatency: metric.NewLatency(MetaConnLatency, histogramWindow), + BytesInCount: metric.NewCounter(MetaBytesIn), + BytesOutCount: metric.NewCounter(MetaBytesOut), + Conns: metric.NewGauge(MetaConns), + NewConns: metric.NewCounter(MetaNewConns), + ConnLatency: metric.NewHistogram( + MetaConnLatency, histogramWindow, metric.IOLatencyBuckets, + ), ConnFailures: metric.NewCounter(MetaConnFailures), PGWireCancelTotalCount: metric.NewCounter(MetaPGWireCancelTotal), PGWireCancelIgnoredCount: metric.NewCounter(MetaPGWireCancelIgnored), diff --git a/pkg/sql/sqlstats/persistedsqlstats/bench_test.go b/pkg/sql/sqlstats/persistedsqlstats/bench_test.go index ebb10adde996..0a8c7cd2d030 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/bench_test.go +++ b/pkg/sql/sqlstats/persistedsqlstats/bench_test.go @@ -74,7 +74,7 @@ func BenchmarkConcurrentSelect1(b *testing.B) { sqlServer.ServerMetrics. StatsMetrics. SQLTxnStatsCollectionOverhead. - Snapshot().Mean(), + Mean(), "overhead(ns/op)") }) } diff --git a/pkg/sql/ttl/ttljob/ttljob_metrics.go b/pkg/sql/ttl/ttljob/ttljob_metrics.go index ee23a7d70e26..bbfcf8614c74 100644 --- a/pkg/sql/ttl/ttljob/ttljob_metrics.go +++ b/pkg/sql/ttl/ttljob/ttljob_metrics.go @@ -96,7 +96,6 @@ func (m *RowLevelTTLAggMetrics) loadMetrics(labelMetrics bool, relation string) } func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Struct { - sigFigs := 2 b := aggmetric.MakeBuilder("relation") ret := &RowLevelTTLAggMetrics{ RangeTotalDuration: b.Histogram( @@ -108,8 +107,7 @@ func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Str MetricType: io_prometheus_client.MetricType_HISTOGRAM, }, histogramWindowInterval, - time.Hour.Nanoseconds(), - sigFigs, + metric.LongRunning60mLatencyBuckets, ), SelectDuration: b.Histogram( metric.Metadata{ @@ -120,8 +118,7 @@ func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Str MetricType: io_prometheus_client.MetricType_HISTOGRAM, }, histogramWindowInterval, - time.Minute.Nanoseconds(), - sigFigs, + metric.BatchProcessLatencyBuckets, ), DeleteDuration: b.Histogram( metric.Metadata{ @@ -132,8 +129,7 @@ func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Str MetricType: io_prometheus_client.MetricType_HISTOGRAM, }, histogramWindowInterval, - time.Minute.Nanoseconds(), - sigFigs, + metric.BatchProcessLatencyBuckets, ), RowSelections: b.Counter( metric.Metadata{ diff --git a/pkg/util/admission/work_queue.go b/pkg/util/admission/work_queue.go index c51e42167bc4..60a5e5133be8 100644 --- a/pkg/util/admission/work_queue.go +++ b/pkg/util/admission/work_queue.go @@ -1538,8 +1538,9 @@ func makeWorkQueueMetrics(name string) WorkQueueMetrics { Admitted: metric.NewCounter(addName(name, admittedMeta)), Errored: metric.NewCounter(addName(name, erroredMeta)), WaitDurationSum: metric.NewCounter(addName(name, waitDurationSumMeta)), - WaitDurations: metric.NewLatency( - addName(name, waitDurationsMeta), base.DefaultHistogramWindowInterval()), + WaitDurations: metric.NewHistogram( + addName(name, waitDurationsMeta), base.DefaultHistogramWindowInterval(), metric.IOLatencyBuckets, + ), WaitQueueLength: metric.NewGauge(addName(name, waitQueueLengthMeta)), } } diff --git a/pkg/util/metric/BUILD.bazel b/pkg/util/metric/BUILD.bazel index 2bbc8c8b10d8..fe1ad312f84d 100644 --- a/pkg/util/metric/BUILD.bazel +++ b/pkg/util/metric/BUILD.bazel @@ -8,6 +8,7 @@ go_library( srcs = [ "doc.go", "graphite_exporter.go", + "histogram_buckets.go", "metric.go", "prometheus_exporter.go", "prometheus_rule_exporter.go", @@ -25,7 +26,6 @@ go_library( "//pkg/util/syncutil", "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", - "@com_github_codahale_hdrhistogram//:hdrhistogram", "@com_github_gogo_protobuf//proto", "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_golang//prometheus/graphite", @@ -41,6 +41,7 @@ go_test( name = "metric_test", size = "small", srcs = [ + "histogram_buckets_test.go", "metric_test.go", "prometheus_exporter_test.go", "prometheus_rule_exporter_test.go", @@ -51,6 +52,7 @@ go_test( embed = [":metric"], deps = [ "//pkg/util/log", + "@com_github_dustin_go_humanize//:go-humanize", "@com_github_kr_pretty//:pretty", "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_model//go", diff --git a/pkg/util/metric/aggmetric/BUILD.bazel b/pkg/util/metric/aggmetric/BUILD.bazel index 7d6ea003d48e..4f66e44deba4 100644 --- a/pkg/util/metric/aggmetric/BUILD.bazel +++ b/pkg/util/metric/aggmetric/BUILD.bazel @@ -15,9 +15,9 @@ go_library( "//pkg/util/metric", "//pkg/util/syncutil", "@com_github_cockroachdb_errors//:errors", - "@com_github_codahale_hdrhistogram//:hdrhistogram", "@com_github_gogo_protobuf//proto", "@com_github_google_btree//:btree", + "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_model//go", ], ) diff --git a/pkg/util/metric/aggmetric/agg_metric.go b/pkg/util/metric/aggmetric/agg_metric.go index d3fbb0400f65..c9afb965d64f 100644 --- a/pkg/util/metric/aggmetric/agg_metric.go +++ b/pkg/util/metric/aggmetric/agg_metric.go @@ -51,9 +51,9 @@ func (b Builder) Counter(metadata metric.Metadata) *AggCounter { // Histogram constructs a new AggHistogram with the Builder's labels. func (b Builder) Histogram( - metadata metric.Metadata, duration time.Duration, maxVal int64, sigFigs int, + metadata metric.Metadata, duration time.Duration, buckets []float64, ) *AggHistogram { - return NewHistogram(metadata, duration, maxVal, sigFigs, b.labels...) + return NewHistogram(metadata, duration, buckets, b.labels...) } type childSet struct { diff --git a/pkg/util/metric/aggmetric/agg_metric_test.go b/pkg/util/metric/aggmetric/agg_metric_test.go index 68574822cd5b..acb9b9883b97 100644 --- a/pkg/util/metric/aggmetric/agg_metric_test.go +++ b/pkg/util/metric/aggmetric/agg_metric_test.go @@ -64,7 +64,7 @@ func TestAggMetric(t *testing.T) { h := aggmetric.NewHistogram(metric.Metadata{ Name: "histo_gram", - }, base.DefaultHistogramWindowInterval(), 100, 1, "tenant_id") + }, base.DefaultHistogramWindowInterval(), metric.Count1KBuckets, "tenant_id") r.AddMetric(h) tenant2 := roachpb.MakeTenantID(2) @@ -99,18 +99,47 @@ foo_counter 6 foo_counter{tenant_id="2"} 2 foo_counter{tenant_id="3"} 4 histo_gram_bucket{le="+Inf"} 2 -histo_gram_bucket{le="10"} 1 -histo_gram_bucket{le="91"} 2 +histo_gram_bucket{le="1"} 0 +histo_gram_bucket{le="1024"} 2 +histo_gram_bucket{le="128"} 2 +histo_gram_bucket{le="16"} 1 +histo_gram_bucket{le="2"} 0 +histo_gram_bucket{le="256"} 2 +histo_gram_bucket{le="32"} 1 +histo_gram_bucket{le="4"} 0 +histo_gram_bucket{le="512"} 2 +histo_gram_bucket{le="64"} 1 +histo_gram_bucket{le="8"} 0 histo_gram_bucket{tenant_id="2",le="+Inf"} 1 -histo_gram_bucket{tenant_id="2",le="10"} 1 +histo_gram_bucket{tenant_id="2",le="1"} 0 +histo_gram_bucket{tenant_id="2",le="1024"} 1 +histo_gram_bucket{tenant_id="2",le="128"} 1 +histo_gram_bucket{tenant_id="2",le="16"} 1 +histo_gram_bucket{tenant_id="2",le="2"} 0 +histo_gram_bucket{tenant_id="2",le="256"} 1 +histo_gram_bucket{tenant_id="2",le="32"} 1 +histo_gram_bucket{tenant_id="2",le="4"} 0 +histo_gram_bucket{tenant_id="2",le="512"} 1 +histo_gram_bucket{tenant_id="2",le="64"} 1 +histo_gram_bucket{tenant_id="2",le="8"} 0 histo_gram_bucket{tenant_id="3",le="+Inf"} 1 -histo_gram_bucket{tenant_id="3",le="91"} 1 +histo_gram_bucket{tenant_id="3",le="1"} 0 +histo_gram_bucket{tenant_id="3",le="1024"} 1 +histo_gram_bucket{tenant_id="3",le="128"} 1 +histo_gram_bucket{tenant_id="3",le="16"} 0 +histo_gram_bucket{tenant_id="3",le="2"} 0 +histo_gram_bucket{tenant_id="3",le="256"} 1 +histo_gram_bucket{tenant_id="3",le="32"} 0 +histo_gram_bucket{tenant_id="3",le="4"} 0 +histo_gram_bucket{tenant_id="3",le="512"} 1 +histo_gram_bucket{tenant_id="3",le="64"} 0 +histo_gram_bucket{tenant_id="3",le="8"} 0 histo_gram_count 2 histo_gram_count{tenant_id="2"} 1 histo_gram_count{tenant_id="3"} 1 -histo_gram_sum 101 +histo_gram_sum 100 histo_gram_sum{tenant_id="2"} 10 -histo_gram_sum{tenant_id="3"} 91`, +histo_gram_sum{tenant_id="3"} 90`, writePrometheusMetrics(t)) }) @@ -127,13 +156,32 @@ baz_gauge{tenant_id="2"} 1.5 foo_counter 6 foo_counter{tenant_id="3"} 4 histo_gram_bucket{le="+Inf"} 2 -histo_gram_bucket{le="10"} 1 -histo_gram_bucket{le="91"} 2 +histo_gram_bucket{le="1"} 0 +histo_gram_bucket{le="1024"} 2 +histo_gram_bucket{le="128"} 2 +histo_gram_bucket{le="16"} 1 +histo_gram_bucket{le="2"} 0 +histo_gram_bucket{le="256"} 2 +histo_gram_bucket{le="32"} 1 +histo_gram_bucket{le="4"} 0 +histo_gram_bucket{le="512"} 2 +histo_gram_bucket{le="64"} 1 +histo_gram_bucket{le="8"} 0 histo_gram_bucket{tenant_id="2",le="+Inf"} 1 -histo_gram_bucket{tenant_id="2",le="10"} 1 +histo_gram_bucket{tenant_id="2",le="1"} 0 +histo_gram_bucket{tenant_id="2",le="1024"} 1 +histo_gram_bucket{tenant_id="2",le="128"} 1 +histo_gram_bucket{tenant_id="2",le="16"} 1 +histo_gram_bucket{tenant_id="2",le="2"} 0 +histo_gram_bucket{tenant_id="2",le="256"} 1 +histo_gram_bucket{tenant_id="2",le="32"} 1 +histo_gram_bucket{tenant_id="2",le="4"} 0 +histo_gram_bucket{tenant_id="2",le="512"} 1 +histo_gram_bucket{tenant_id="2",le="64"} 1 +histo_gram_bucket{tenant_id="2",le="8"} 0 histo_gram_count 2 histo_gram_count{tenant_id="2"} 1 -histo_gram_sum 101 +histo_gram_sum 100 histo_gram_sum{tenant_id="2"} 10`, writePrometheusMetrics(t)) }) @@ -167,15 +215,45 @@ foo_counter 6 foo_counter{tenant_id="2"} 0 foo_counter{tenant_id="3"} 4 histo_gram_bucket{le="+Inf"} 2 -histo_gram_bucket{le="10"} 1 -histo_gram_bucket{le="91"} 2 +histo_gram_bucket{le="1"} 0 +histo_gram_bucket{le="1024"} 2 +histo_gram_bucket{le="128"} 2 +histo_gram_bucket{le="16"} 1 +histo_gram_bucket{le="2"} 0 +histo_gram_bucket{le="256"} 2 +histo_gram_bucket{le="32"} 1 +histo_gram_bucket{le="4"} 0 +histo_gram_bucket{le="512"} 2 +histo_gram_bucket{le="64"} 1 +histo_gram_bucket{le="8"} 0 histo_gram_bucket{tenant_id="2",le="+Inf"} 1 -histo_gram_bucket{tenant_id="2",le="10"} 1 +histo_gram_bucket{tenant_id="2",le="1"} 0 +histo_gram_bucket{tenant_id="2",le="1024"} 1 +histo_gram_bucket{tenant_id="2",le="128"} 1 +histo_gram_bucket{tenant_id="2",le="16"} 1 +histo_gram_bucket{tenant_id="2",le="2"} 0 +histo_gram_bucket{tenant_id="2",le="256"} 1 +histo_gram_bucket{tenant_id="2",le="32"} 1 +histo_gram_bucket{tenant_id="2",le="4"} 0 +histo_gram_bucket{tenant_id="2",le="512"} 1 +histo_gram_bucket{tenant_id="2",le="64"} 1 +histo_gram_bucket{tenant_id="2",le="8"} 0 histo_gram_bucket{tenant_id="3",le="+Inf"} 0 +histo_gram_bucket{tenant_id="3",le="1"} 0 +histo_gram_bucket{tenant_id="3",le="1024"} 0 +histo_gram_bucket{tenant_id="3",le="128"} 0 +histo_gram_bucket{tenant_id="3",le="16"} 0 +histo_gram_bucket{tenant_id="3",le="2"} 0 +histo_gram_bucket{tenant_id="3",le="256"} 0 +histo_gram_bucket{tenant_id="3",le="32"} 0 +histo_gram_bucket{tenant_id="3",le="4"} 0 +histo_gram_bucket{tenant_id="3",le="512"} 0 +histo_gram_bucket{tenant_id="3",le="64"} 0 +histo_gram_bucket{tenant_id="3",le="8"} 0 histo_gram_count 2 histo_gram_count{tenant_id="2"} 1 histo_gram_count{tenant_id="3"} 0 -histo_gram_sum 101 +histo_gram_sum 100 histo_gram_sum{tenant_id="2"} 10 histo_gram_sum{tenant_id="3"} 0`, writePrometheusMetrics(t)) @@ -195,7 +273,7 @@ func TestAggMetricBuilder(t *testing.T) { g := b.Gauge(metric.Metadata{Name: "bar_gauge"}) f := b.GaugeFloat64(metric.Metadata{Name: "baz_gauge"}) h := b.Histogram(metric.Metadata{Name: "histo_gram"}, - base.DefaultHistogramWindowInterval(), 100, 1) + base.DefaultHistogramWindowInterval(), metric.Count1KBuckets) for i := 5; i < 10; i++ { tenantLabel := roachpb.MakeTenantID(uint64(i)).String() diff --git a/pkg/util/metric/aggmetric/histogram.go b/pkg/util/metric/aggmetric/histogram.go index e3b710a30c5a..f4819aecd50e 100644 --- a/pkg/util/metric/aggmetric/histogram.go +++ b/pkg/util/metric/aggmetric/histogram.go @@ -14,7 +14,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/util/metric" - "github.com/codahale/hdrhistogram" + "github.com/prometheus/client_golang/prometheus" io_prometheus_client "github.com/prometheus/client_model/go" ) @@ -34,14 +34,10 @@ var _ metric.PrometheusExportable = (*AggHistogram)(nil) // NewHistogram constructs a new AggHistogram. func NewHistogram( - metadata metric.Metadata, - duration time.Duration, - maxVal int64, - sigFigs int, - childLabels ...string, + metadata metric.Metadata, duration time.Duration, buckets []float64, childLabels ...string, ) *AggHistogram { create := func() *metric.Histogram { - return metric.NewHistogram(metadata, duration, maxVal, sigFigs) + return metric.NewHistogram(metadata, duration, buckets) } a := &AggHistogram{ h: *create(), @@ -86,7 +82,7 @@ func (a *AggHistogram) ToPrometheusMetric() *io_prometheus_client.Metric { // Windowed returns a copy of the current windowed histogram data and its // rotation interval. -func (a *AggHistogram) Windowed() (*hdrhistogram.Histogram, time.Duration) { +func (a *AggHistogram) Windowed() prometheus.Histogram { return a.h.Windowed() } diff --git a/pkg/util/metric/histogram_buckets.go b/pkg/util/metric/histogram_buckets.go new file mode 100644 index 000000000000..8ce3c9dce884 --- /dev/null +++ b/pkg/util/metric/histogram_buckets.go @@ -0,0 +1,178 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package metric + +// IOLatencyBuckets are prometheus histogram buckets suitable for a histogram +// that records a quantity (nanosecond-denominated) in which most measurements +// resemble those of typical disk latencies, i.e. which are in the micro- and +// millisecond range during normal operation. +var IOLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/IOLatencyBuckets. + 10000.000000, // 10µs + 26826.957953, // 26.826µs + 71968.567300, // 71.968µs + 193069.772888, // 193.069µs + 517947.467923, // 517.947µs + 1389495.494373, // 1.389495ms + 3727593.720315, // 3.727593ms + 10000000.000000, // 9.999999ms + 26826957.952797, // 26.826957ms + 71968567.300115, // 71.968567ms + 193069772.888325, // 193.069772ms + 517947467.923120, // 517.947467ms + 1389495494.373135, // 1.389495494s + 3727593720.314933, // 3.72759372s + 9999999999.999981, // 9.999999999s +} + +// NetworkLatencyBuckets are prometheus histogram buckets suitable for a histogram +// that records a quantity (nanosecond-denominated) in which most measurements +// behave like network latencies, i.e. most measurements are in the ms to sub-second +// range during normal operation. +var NetworkLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/NetworkLatencyBuckets. + 500000.000000, // 500µs + 860513.842995, // 860.513µs + 1480968.147973, // 1.480968ms + 2548787.184731, // 2.548787ms + 4386533.310619, // 4.386533ms + 7549345.273094, // 7.549345ms + 12992632.226094, // 12.992632ms + 22360679.774998, // 22.360679ms + 38483348.970335, // 38.483348ms + 66230909.027573, // 66.230909ms + 113985228.104760, // 113.985228ms + 196171733.362212, // 196.171733ms + 337616984.325077, // 337.616984ms + 581048177.284016, // 581.048177ms + 999999999.999999, // 999.999999ms +} + +// BatchProcessLatencyBuckets are prometheus histogram buckets suitable for a +// histogram that records a quantity (nanosecond-denominated) in which most +// measurements are in the seconds to minutes range during normal operation. +var BatchProcessLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/BatchProcessLatencyBuckets. + 500000000.000000, // 500ms + 789604072.059876, // 789.604072ms + 1246949181.227077, // 1.246949181s + 1969192302.297256, // 1.969192302s + 3109764521.125753, // 3.109764521s + 4910965458.056452, // 4.910965458s + 7755436646.853539, // 7.755436646s + 12247448713.915894, // 12.247448713s + 19341270753.704967, // 19.341270753s + 30543892291.876068, // 30.543892291s + 48235163460.447227, // 48.23516346s + 76173362969.685760, // 1m16.173362969s + 120293595166.717728, // 2m0.293595166s + 189968625172.725128, // 3m9.968625172s + 300000000000.000183, // 5m0s +} + +// LongRunning60mLatencyBuckets are prometheus histogram buckets suitable +// for a histogram that records a quantity (nanosecond-denominated) for +// long-running processes (multiple minutes). +var LongRunning60mLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/LongRunning60mLatencyBuckets. + 500000000.000000, // 500ms + 942961049.923126, // 942.961049ms + 1778351083.344248, // 1.778351083s + 3353831609.364442, // 3.353831609s + 6325065151.263324, // 6.325065151s + 11928580151.734879, // 11.928580151s + 22496372927.944168, // 22.496372927s + 42426406871.192848, // 42.426406871s + 80012898335.451462, // 1m20.012898335s + 150898093243.579315, // 2m30.898093243s + 284582048872.726685, // 4m44.582048872s + 536699575188.601318, // 8m56.699575188s + 1012173589826.278687, // 16m52.173589826s + 1908880541934.094238, // 31m48.880541934s + 3599999999999.998535, // 59m59.999999999s +} + +// Count1KBuckets are prometheus histogram buckets suitable for a histogram that +// records a quantity that is a count (unit-less) in which most measurements are +// in the 1 to ~1000 range during normal operation. +var Count1KBuckets = []float64{ + // Generated via TestHistogramBuckets/Count1KBuckets. + 1.000000, + 2.000000, + 4.000000, + 8.000000, + 16.000000, + 32.000000, + 64.000000, + 128.000000, + 256.000000, + 512.000000, + 1024.000000, +} + +// Percent100Buckets are prometheus histogram buckets suitable for a histogram that +// records a percent quantity [0,100] +var Percent100Buckets = []float64{ + // Generated via TestHistogramBuckets/Percent100Buckets. + 10.000000, + 20.000000, + 30.000000, + 40.000000, + 50.000000, + 60.000000, + 70.000000, + 80.000000, + 90.000000, + 100.000000, +} + +// DataSize16MBBuckets are prometheus histogram buckets suitable for a histogram that +// records a quantity that is a size (byte-denominated) in which most measurements are +// in the kB to MB range during normal operation. +var DataSize16MBBuckets = []float64{ + // Generated via TestHistogramBuckets/DataSize16MBBuckets. + 1000.000000, // 1.0 kB + 2000.000000, // 2.0 kB + 4000.000000, // 4.0 kB + 8000.000000, // 8.0 kB + 16000.000000, // 16 kB + 32000.000000, // 32 kB + 64000.000000, // 64 kB + 128000.000000, // 128 kB + 256000.000000, // 256 kB + 512000.000000, // 512 kB + 1024000.000000, // 1.0 MB + 2048000.000000, // 2.0 MB + 4096000.000000, // 4.1 MB + 8192000.000000, // 8.2 MB + 16384000.000000, // 16 MB +} + +// MemoryUsage64MBBuckets are prometheus histogram buckets suitable for a histogram that +// records memory usage (in Bytes) +var MemoryUsage64MBBuckets = []float64{ + // Generated via TestHistogramBuckets/MemoryUsage64MBBuckets. + 1.000000, // 1 B + 3.610641, // 3 B + 13.036727, // 13 B + 47.070938, // 47 B + 169.956248, // 169 B + 613.650962, // 613 B + 2215.673192, // 2.2 kB + 8000.000000, // 8.0 kB + 28885.126301, // 29 kB + 104293.815179, // 104 kB + 376567.502984, // 377 kB + 1359649.985574, // 1.4 MB + 4909207.694830, // 4.9 MB + 17725385.537954, // 18 MB + 64000000.000000, // 64 MB +} diff --git a/pkg/util/metric/histogram_buckets_test.go b/pkg/util/metric/histogram_buckets_test.go new file mode 100644 index 000000000000..7fb183d70bf6 --- /dev/null +++ b/pkg/util/metric/histogram_buckets_test.go @@ -0,0 +1,89 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package metric + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/dustin/go-humanize" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" +) + +const LATENCY = "LATENCY" +const SIZE = "SIZE" + +// TestHistogramBuckets is used to generate additional prometheus buckets to be +// used with Histogram. Please include obs-inf in the review process of new +// buckets. +func TestHistogramBuckets(t *testing.T) { + verifyAndPrint := func(t *testing.T, exp, act []float64, histType string) { + t.Helper() + var buf strings.Builder + for idx, f := range exp { + if idx == 0 { + fmt.Fprintf(&buf, "// Generated via %s.", t.Name()) + } + switch histType { + case LATENCY: + fmt.Fprintf(&buf, "\n%f, // %s", f, time.Duration(f)) + case SIZE: + fmt.Fprintf(&buf, "\n%f, // %s", f, humanize.Bytes(uint64(f))) + default: + fmt.Fprintf(&buf, "\n%f,", f) + } + } + t.Logf("%s", &buf) + require.InDeltaSlice(t, exp, act, 1 /* delta */, "Please update the bucket boundaries for %s", t.Name()) + } + t.Run("IOLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(10e3, 10e9, 15) + verifyAndPrint(t, exp, IOLatencyBuckets, LATENCY) + }) + + t.Run("NetworkLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(500e3, 1e9, 15) + verifyAndPrint(t, exp, NetworkLatencyBuckets, LATENCY) + }) + + t.Run("BatchProcessLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(500e6, 300e9, 15) + verifyAndPrint(t, exp, BatchProcessLatencyBuckets, LATENCY) + }) + + t.Run("LongRunning60mLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(500e6, 3600e9, 15) + verifyAndPrint(t, exp, LongRunning60mLatencyBuckets, LATENCY) + }) + + t.Run("Count1KBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBuckets(1, 2, 11) + verifyAndPrint(t, exp, Count1KBuckets, "") + }) + + t.Run("Percent100Buckets", func(t *testing.T) { + exp := prometheus.LinearBuckets(10, 10, 10) + verifyAndPrint(t, exp, Percent100Buckets, "") + }) + + t.Run("DataSize16MBBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBuckets(1e3, 2, 15) + verifyAndPrint(t, exp, DataSize16MBBuckets, SIZE) + }) + + t.Run("MemoryUsage64MBBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(1, 64e6, 15) + verifyAndPrint(t, exp, MemoryUsage64MBBuckets, SIZE) + }) +} diff --git a/pkg/util/metric/metric.go b/pkg/util/metric/metric.go index 46981240b87b..98ffcabd469d 100644 --- a/pkg/util/metric/metric.go +++ b/pkg/util/metric/metric.go @@ -19,7 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" - "github.com/codahale/hdrhistogram" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" prometheusgo "github.com/prometheus/client_model/go" @@ -27,16 +26,9 @@ import ( ) const ( - // MaxLatency is the maximum value tracked in latency histograms. Higher - // values will be recorded as this value instead. - MaxLatency = 10 * time.Second - // TestSampleInterval is passed to histograms during tests which don't // want to concern themselves with supplying a "correct" interval. TestSampleInterval = time.Duration(math.MaxInt64) - - // The number of histograms to keep in rolling window. - histWrapNum = 2 ) // Iterable provides a method for synchronized access to interior objects. @@ -135,7 +127,6 @@ func (m *Metadata) AddLabel(name, value string) { var _ Iterable = &Gauge{} var _ Iterable = &GaugeFloat64{} var _ Iterable = &Counter{} -var _ Iterable = &Histogram{} var _ json.Marshaler = &Gauge{} var _ json.Marshaler = &GaugeFloat64{} @@ -145,7 +136,6 @@ var _ json.Marshaler = &Registry{} var _ PrometheusExportable = &Gauge{} var _ PrometheusExportable = &GaugeFloat64{} var _ PrometheusExportable = &Counter{} -var _ PrometheusExportable = &Histogram{} type periodic interface { nextTick() time.Time @@ -164,230 +154,23 @@ func TestingSetNow(f func() time.Time) func() { } } -func cloneHistogram(in *hdrhistogram.Histogram) *hdrhistogram.Histogram { - return hdrhistogram.Import(in.Export()) -} - func maybeTick(m periodic) { for m.nextTick().Before(now()) { m.tick() } } -// A Histogram collects observed values by keeping bucketed counts. For -// convenience, internally two sets of buckets are kept: A cumulative set (i.e. -// data is never evicted) and a windowed set (which keeps only recently -// collected samples). -// -// Top-level methods generally apply to the cumulative buckets; the windowed -// variant is exposed through the Windowed method. -type Histogram struct { - Metadata - maxVal int64 - mu struct { - syncutil.Mutex - cumulative *hdrhistogram.Histogram - *tickHelper - sliding *hdrhistogram.WindowedHistogram - } -} - -// NewHistogram initializes a given Histogram. The contained windowed histogram -// rotates every 'duration'; both the windowed and the cumulative histogram -// track nonnegative values up to 'maxVal' with 'sigFigs' decimal points of -// precision. -func NewHistogram(metadata Metadata, duration time.Duration, maxVal int64, sigFigs int) *Histogram { - h := &Histogram{ - Metadata: metadata, - maxVal: maxVal, - } - wHist := hdrhistogram.NewWindowed(histWrapNum, 0, maxVal, sigFigs) - h.mu.cumulative = hdrhistogram.New(0, maxVal, sigFigs) - h.mu.sliding = wHist - h.mu.tickHelper = &tickHelper{ - nextT: now(), - tickInterval: duration / histWrapNum, - onTick: func() { - wHist.Rotate() - }, - } - return h -} - -// NewLatency is a convenience function which returns a histogram with -// suitable defaults for latency tracking. Values are expressed in ns, -// are truncated into the interval [0, MaxLatency] and are recorded -// with one digit of precision (i.e. errors of <10ms at 100ms, <6s at 60s). -// -// The windowed portion of the Histogram retains values for approximately -// histogramWindow. -func NewLatency(metadata Metadata, histogramWindow time.Duration) *Histogram { - return NewHistogram( - metadata, histogramWindow, MaxLatency.Nanoseconds(), 1, - ) -} - -// Windowed returns a copy of the current windowed histogram data and its -// rotation interval. -func (h *Histogram) Windowed() (*hdrhistogram.Histogram, time.Duration) { - h.mu.Lock() - defer h.mu.Unlock() - maybeTick(h.mu.tickHelper) - // TODO(obs-inf): not sure we should multiply by histWrapNum here, but it - // has been the behavior for a long time. - return cloneHistogram(h.mu.sliding.Merge()), histWrapNum * h.mu.tickInterval -} - -// Snapshot returns a copy of the cumulative (i.e. all-time samples) histogram -// data. -func (h *Histogram) Snapshot() *hdrhistogram.Histogram { - h.mu.Lock() - defer h.mu.Unlock() - return cloneHistogram(h.mu.cumulative) -} - -// RecordValue adds the given value to the histogram. Recording a value in -// excess of the configured maximum value for that histogram results in -// recording the maximum value instead. -func (h *Histogram) RecordValue(v int64) { - h.mu.Lock() - defer h.mu.Unlock() - - if h.mu.sliding.Current.RecordValue(v) != nil { - _ = h.mu.sliding.Current.RecordValue(h.maxVal) - } - if h.mu.cumulative.RecordValue(v) != nil { - _ = h.mu.cumulative.RecordValue(h.maxVal) - } -} - -// TotalCount returns the (cumulative) number of samples. -func (h *Histogram) TotalCount() int64 { - h.mu.Lock() - defer h.mu.Unlock() - return h.mu.cumulative.TotalCount() -} - -// Min returns the minimum. -func (h *Histogram) Min() int64 { - h.mu.Lock() - defer h.mu.Unlock() - return h.mu.cumulative.Min() -} - -// Inspect calls the closure with the empty string and the receiver. -func (h *Histogram) Inspect(f func(interface{})) { - h.mu.Lock() - maybeTick(h.mu.tickHelper) - h.mu.Unlock() - f(h) -} - -// GetType returns the prometheus type enum for this metric. -func (h *Histogram) GetType() *prometheusgo.MetricType { - return prometheusgo.MetricType_HISTOGRAM.Enum() -} - -// ToPrometheusMetric returns a filled-in prometheus metric of the right type. -func (h *Histogram) ToPrometheusMetric() *prometheusgo.Metric { - hist := &prometheusgo.Histogram{} - - h.mu.Lock() - maybeTick(h.mu.tickHelper) - bars := h.mu.cumulative.Distribution() - hist.Bucket = make([]*prometheusgo.Bucket, 0, len(bars)) - - var cumCount uint64 - var sum float64 - for _, bar := range bars { - if bar.Count == 0 { - // No need to expose trivial buckets. - continue - } - upperBound := float64(bar.To) - sum += upperBound * float64(bar.Count) - - cumCount += uint64(bar.Count) - curCumCount := cumCount // need a new alloc thanks to bad proto code - - hist.Bucket = append(hist.Bucket, &prometheusgo.Bucket{ - CumulativeCount: &curCumCount, - UpperBound: &upperBound, - }) - } - hist.SampleCount = &cumCount - hist.SampleSum = &sum // can do better here; we approximate in the loop - h.mu.Unlock() - - return &prometheusgo.Metric{ - Histogram: hist, - } -} - -// GetMetadata returns the metric's metadata including the Prometheus -// MetricType. -func (h *Histogram) GetMetadata() Metadata { - baseMetadata := h.Metadata - baseMetadata.MetricType = prometheusgo.MetricType_HISTOGRAM - return baseMetadata -} - -// IOLatencyBuckets are prometheus histogram buckets suitable for a histogram -// that records a quantity (nanosecond-denominated) in which most measurements -// resemble those of typical disk latencies, i.e. which are in the micro- and -// millisecond range during normal operation. -var IOLatencyBuckets = []float64{ - // Generated via TestHistogramBuckets/IOLatencyBuckets. - 10000.000000, // 10µs - 26826.957953, // 26.826µs - 71968.567300, // 71.968µs - 193069.772888, // 193.069µs - 517947.467923, // 517.947µs - 1389495.494373, // 1.389495ms - 3727593.720315, // 3.727593ms - 10000000.000000, // 9.999999ms - 26826957.952797, // 26.826957ms - 71968567.300115, // 71.968567ms - 193069772.888325, // 193.069772ms - 517947467.923120, // 517.947467ms - 1389495494.373135, // 1.389495494s - 3727593720.314933, // 3.72759372s - 9999999999.999981, // 9.999999999s -} - -// NetworkLatencyBuckets are prometheus histogram buckets suitable for a histogram -// that records a quantity (nanosecond-denominated) in which most measurements -// behave like network latencies, i.e. most measurements are in the ms to sub-second -// range during normal operation. -var NetworkLatencyBuckets = []float64{ - // Generated via TestHistogramBuckets/NetworkLatencyBuckets. - 500000.000000, // 500µs - 860513.842995, // 860.513µs - 1480968.147973, // 1.480968ms - 2548787.184731, // 2.548787ms - 4386533.310619, // 4.386533ms - 7549345.273094, // 7.549345ms - 12992632.226094, // 12.992632ms - 22360679.774998, // 22.360679ms - 38483348.970335, // 38.483348ms - 66230909.027573, // 66.230909ms - 113985228.104760, // 113.985228ms - 196171733.362212, // 196.171733ms - 337616984.325077, // 337.616984ms - 581048177.284016, // 581.048177ms - 999999999.999999, // 999.999999ms, -} - -// NewHistogramV2 is a prometheus-backed histogram. Depending on the value of +// NewHistogram is a prometheus-backed histogram. Depending on the value of // opts.Buckets, this is suitable for recording any kind of quantity. Common // sensible choices are {IO,Network}LatencyBuckets. -func NewHistogramV2( - meta Metadata, windowDuration time.Duration, opts prometheus.HistogramOpts, -) *HistogramV2 { +func NewHistogram(meta Metadata, windowDuration time.Duration, buckets []float64) *Histogram { // TODO(obs-inf): prometheus supports labeled histograms but they require more // plumbing and don't fit into the PrometheusObservable interface any more. + opts := prometheus.HistogramOpts{ + Buckets: buckets, + } cum := prometheus.NewHistogram(opts) - h := &HistogramV2{ + h := &Histogram{ Metadata: meta, cum: cum, } @@ -403,16 +186,16 @@ func NewHistogramV2( return h } -var _ periodic = (*HistogramV2)(nil) -var _ PrometheusExportable = (*HistogramV2)(nil) +var _ periodic = (*Histogram)(nil) +var _ PrometheusExportable = (*Histogram)(nil) -// HistogramV2 is a prometheus-backed histogram. It collects observed values by +// Histogram is a prometheus-backed histogram. It collects observed values by // keeping bucketed counts. For convenience, internally two sets of buckets are // kept: A cumulative set (i.e. data is never evicted) and a windowed set (which // keeps only recently collected samples). // // New buckets are created using TestHistogramBuckets. -type HistogramV2 struct { +type Histogram struct { Metadata cum prometheus.Histogram @@ -436,27 +219,27 @@ type HistogramV2 struct { } } -func (h *HistogramV2) nextTick() time.Time { +func (h *Histogram) nextTick() time.Time { h.windowed.RLock() defer h.windowed.RUnlock() return h.windowed.nextTick() } -func (h *HistogramV2) tick() { +func (h *Histogram) tick() { h.windowed.Lock() defer h.windowed.Unlock() h.windowed.tick() } // Windowed returns a copy of the current windowed histogram. -func (h *HistogramV2) Windowed() prometheus.Histogram { +func (h *Histogram) Windowed() prometheus.Histogram { h.windowed.RLock() defer h.windowed.RUnlock() return h.windowed.cur } // RecordValue adds the given value to the histogram. -func (h *HistogramV2) RecordValue(n int64) { +func (h *Histogram) RecordValue(n int64) { v := float64(n) h.cum.Observe(v) @@ -466,12 +249,12 @@ func (h *HistogramV2) RecordValue(n int64) { } // GetType returns the prometheus type enum for this metric. -func (h *HistogramV2) GetType() *prometheusgo.MetricType { +func (h *Histogram) GetType() *prometheusgo.MetricType { return prometheusgo.MetricType_HISTOGRAM.Enum() } // ToPrometheusMetric returns a filled-in prometheus metric of the right type. -func (h *HistogramV2) ToPrometheusMetric() *prometheusgo.Metric { +func (h *Histogram) ToPrometheusMetric() *prometheusgo.Metric { m := &prometheusgo.Metric{} if err := h.cum.Write(m); err != nil { panic(err) @@ -480,7 +263,7 @@ func (h *HistogramV2) ToPrometheusMetric() *prometheusgo.Metric { } // ToPrometheusMetricWindowed returns a filled-in prometheus metric of the right type. -func (h *HistogramV2) ToPrometheusMetricWindowed() *prometheusgo.Metric { +func (h *Histogram) ToPrometheusMetricWindowed() *prometheusgo.Metric { h.windowed.Lock() defer h.windowed.Unlock() m := &prometheusgo.Metric{} @@ -492,12 +275,12 @@ func (h *HistogramV2) ToPrometheusMetricWindowed() *prometheusgo.Metric { // GetMetadata returns the metric's metadata including the Prometheus // MetricType. -func (h *HistogramV2) GetMetadata() Metadata { +func (h *Histogram) GetMetadata() Metadata { return h.Metadata } // Inspect calls the closure. -func (h *HistogramV2) Inspect(f func(interface{})) { +func (h *Histogram) Inspect(f func(interface{})) { h.windowed.Lock() maybeTick(&h.windowed) h.windowed.Unlock() @@ -505,25 +288,30 @@ func (h *HistogramV2) Inspect(f func(interface{})) { } // TotalCount returns the (cumulative) number of samples. -func (h *HistogramV2) TotalCount() int64 { +func (h *Histogram) TotalCount() int64 { return int64(h.ToPrometheusMetric().Histogram.GetSampleCount()) } // TotalCountWindowed returns the number of samples in the current window. -func (h *HistogramV2) TotalCountWindowed() int64 { +func (h *Histogram) TotalCountWindowed() int64 { return int64(h.ToPrometheusMetricWindowed().Histogram.GetSampleCount()) } // TotalSum returns the (cumulative) number of samples. -func (h *HistogramV2) TotalSum() float64 { +func (h *Histogram) TotalSum() float64 { return h.ToPrometheusMetric().Histogram.GetSampleSum() } // TotalSumWindowed returns the number of samples in the current window. -func (h *HistogramV2) TotalSumWindowed() float64 { +func (h *Histogram) TotalSumWindowed() float64 { return h.ToPrometheusMetricWindowed().Histogram.GetSampleSum() } +// Mean returns the (cumulative) mean of samples. +func (h *Histogram) Mean() float64 { + return h.TotalSum() / float64(h.TotalCount()) +} + // ValueAtQuantileWindowed takes a quantile value [0,100] and returns the // interpolated value at that quantile for the windowed histogram. // @@ -535,7 +323,7 @@ func (h *HistogramV2) TotalSumWindowed() float64 { // with the quantiles that include the +Inf bucket. // 2. Since the prometheus client library ensures buckets are in a strictly // increasing order at creation, we do not sort them. -func (h *HistogramV2) ValueAtQuantileWindowed(q float64) float64 { +func (h *Histogram) ValueAtQuantileWindowed(q float64) float64 { m := h.ToPrometheusMetricWindowed() buckets := m.Histogram.Bucket @@ -563,10 +351,12 @@ func (h *HistogramV2) ValueAtQuantileWindowed(q float64) float64 { if math.IsNaN(val) || math.IsInf(val, -1) { return 0 } + // should not extrapolate past the upper bound of the largest bucket if val > *buckets[len(buckets)-1].UpperBound { return *buckets[len(buckets)-1].UpperBound } + return val } diff --git a/pkg/util/metric/metric_test.go b/pkg/util/metric/metric_test.go index f65e31aea098..13f3dbe7fc91 100644 --- a/pkg/util/metric/metric_test.go +++ b/pkg/util/metric/metric_test.go @@ -13,17 +13,14 @@ package metric import ( "bytes" "encoding/json" - "fmt" "math" "reflect" - "strings" "sync" "testing" "time" _ "github.com/cockroachdb/cockroach/pkg/util/log" // for flags "github.com/kr/pretty" - "github.com/prometheus/client_golang/prometheus" prometheusgo "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" ) @@ -103,7 +100,7 @@ func setNow(d time.Duration) { } } -func TestHistogramPrometheus(t *testing.T) { +func TestHistogram(t *testing.T) { u := func(v int) *uint64 { n := uint64(v) return &n @@ -114,58 +111,15 @@ func TestHistogramPrometheus(t *testing.T) { return &n } - h := NewHistogram(Metadata{}, time.Hour, 10, 1) - h.RecordValue(1) - h.RecordValue(5) - h.RecordValue(5) - h.RecordValue(10) - h.RecordValue(15000) // counts as 10 - act := *h.ToPrometheusMetric().Histogram - - expSum := float64(1*1 + 2*5 + 2*10) - - exp := prometheusgo.Histogram{ - SampleCount: u(5), - SampleSum: &expSum, - Bucket: []*prometheusgo.Bucket{ - {CumulativeCount: u(1), UpperBound: f(1)}, - {CumulativeCount: u(3), UpperBound: f(5)}, - {CumulativeCount: u(5), UpperBound: f(10)}, - }, - } - - if !reflect.DeepEqual(act, exp) { - t.Fatalf("expected differs from actual: %s", pretty.Diff(exp, act)) - } -} - -func TestHistogramV2(t *testing.T) { - u := func(v int) *uint64 { - n := uint64(v) - return &n - } - - f := func(v int) *float64 { - n := float64(v) - return &n - } - - h := NewHistogramV2( + h := NewHistogram( Metadata{}, time.Hour, - prometheus.HistogramOpts{ - Namespace: "", - Subsystem: "", - Name: "", - Help: "", - ConstLabels: nil, - Buckets: []float64{ - 1.0, - 5.0, - 10.0, - 25.0, - 100.0, - }, + []float64{ + 1.0, + 5.0, + 10.0, + 25.0, + 100.0, }, ) @@ -211,38 +165,11 @@ func TestHistogramV2(t *testing.T) { require.Equal(t, 100.0, h.ValueAtQuantileWindowed(99.99)) } -// TestHistogramBuckets is used to generate additional prometheus buckets to be -// used with HistogramV2. Please include obs-inf in the review process of new -// buckets. -func TestHistogramBuckets(t *testing.T) { - verifyAndPrint := func(t *testing.T, exp, act []float64) { - t.Helper() - var buf strings.Builder - for idx, f := range exp { - if idx == 0 { - fmt.Fprintf(&buf, "// Generated via %s.", t.Name()) - } - fmt.Fprintf(&buf, "\n%f, // %s", f, time.Duration(f)) - } - t.Logf("%s", &buf) - require.InDeltaSlice(t, exp, act, 1 /* delta */, "Please update the bucket boundaries for %s", t.Name()) - } - t.Run("IOLatencyBuckets", func(t *testing.T) { - exp := prometheus.ExponentialBucketsRange(10e3, 10e9, 15) - verifyAndPrint(t, exp, IOLatencyBuckets) - }) - - t.Run("NetworkLatencyBuckets", func(t *testing.T) { - exp := prometheus.ExponentialBucketsRange(500e3, 1e9, 15) - verifyAndPrint(t, exp, NetworkLatencyBuckets) - }) -} - -func TestNewHistogramV2Rotate(t *testing.T) { +func TestNewHistogramRotate(t *testing.T) { defer TestingSetNow(nil)() setNow(0) - h := NewHistogramV2(emptyMetadata, 10*time.Second, prometheus.HistogramOpts{Buckets: nil}) + h := NewHistogram(emptyMetadata, 10*time.Second, nil) for i := 0; i < 4; i++ { // Windowed histogram is initially empty. h.Inspect(func(interface{}) {}) // triggers ticking @@ -265,36 +192,3 @@ func TestNewHistogramV2Rotate(t *testing.T) { // Go to beginning. } } - -func TestHistogramRotate(t *testing.T) { - defer TestingSetNow(nil)() - setNow(0) - duration := histWrapNum * time.Second - h := NewHistogram(emptyMetadata, duration, 1000+10*histWrapNum, 3) - var cur time.Duration - for i := 0; i < 3*histWrapNum; i++ { - v := int64(10 * i) - h.RecordValue(v) - cur += time.Second - setNow(cur) - cur, windowDuration := h.Windowed() - if windowDuration != duration { - t.Fatalf("window changed: is %s, should be %s", windowDuration, duration) - } - - // When i == histWrapNum-1, we expect the entry from i==0 to move out - // of the window (since we rotated for the histWrapNum'th time). - expMin := int64((1 + i - (histWrapNum - 1)) * 10) - if expMin < 0 { - expMin = 0 - } - - if min := cur.Min(); min != expMin { - t.Fatalf("%d: unexpected minimum %d, expected %d", i, min, expMin) - } - - if max, expMax := cur.Max(), v; max != expMax { - t.Fatalf("%d: unexpected maximum %d, expected %d", i, max, expMax) - } - } -} diff --git a/pkg/util/metric/registry_test.go b/pkg/util/metric/registry_test.go index 52e38069109c..5d2b2a6c4e88 100644 --- a/pkg/util/metric/registry_test.go +++ b/pkg/util/metric/registry_test.go @@ -76,7 +76,7 @@ func TestRegistry(t *testing.T) { topCounter := NewCounter(Metadata{Name: "top.counter"}) r.AddMetric(topCounter) - r.AddMetric(NewHistogram(Metadata{Name: "top.histogram"}, time.Minute, 1000, 3)) + r.AddMetric(NewHistogram(Metadata{Name: "top.histogram"}, time.Minute, Count1KBuckets)) r.AddMetric(NewGauge(Metadata{Name: "bottom.gauge"})) ms := &struct { @@ -103,7 +103,7 @@ func TestRegistry(t *testing.T) { StructGauge: NewGauge(Metadata{Name: "struct.gauge"}), StructGauge64: NewGaugeFloat64(Metadata{Name: "struct.gauge64"}), StructCounter: NewCounter(Metadata{Name: "struct.counter"}), - StructHistogram: NewHistogram(Metadata{Name: "struct.histogram"}, time.Minute, 1000, 3), + StructHistogram: NewHistogram(Metadata{Name: "struct.histogram"}, time.Minute, Count1KBuckets), NestedStructGauge: NestedStruct{ NestedStructGauge: NewGauge(Metadata{Name: "nested.struct.gauge"}), }, @@ -122,7 +122,7 @@ func TestRegistry(t *testing.T) { privateStructGauge: NewGauge(Metadata{Name: "private.struct.gauge"}), privateStructGauge64: NewGaugeFloat64(Metadata{Name: "private.struct.gauge64"}), privateStructCounter: NewCounter(Metadata{Name: "private.struct.counter"}), - privateStructHistogram: NewHistogram(Metadata{Name: "private.struct.histogram"}, time.Minute, 1000, 3), + privateStructHistogram: NewHistogram(Metadata{Name: "private.struct.histogram"}, time.Minute, Count1KBuckets), privateNestedStructGauge: NestedStruct{ NestedStructGauge: NewGauge(Metadata{Name: "private.nested.struct.gauge"}), },