From a28aa6cbb4b0de3ff92b4dcc74e319d2e2a220c2 Mon Sep 17 00:00:00 2001
From: David Hartunian <davidh@cockroachlabs.com>
Date: Thu, 26 Jan 2023 12:35:30 -0500
Subject: [PATCH 1/2] pkg/util/metric: optionally reintroduce legacy
 hdrhistogram model

Addresses https://github.com/cockroachdb/cockroach/issues/95833

This patch reeintroduces the old HdrHistogram model to optionally be
enabled in favor of the new Prometheus model, gated behind
an environment variable called `COCKROACH_ENABLE_HDR_HISTOGRAMS`,
allowing users a means to "fall back" to the old model in the
event that the new model does not adequately serve their needs
(think of this as an "insurance policy" to protect against
this from happening again with no real mitigation - ideally,
this environment variable should never have to be used).

Note: some histograms were introduced *after* the new
Prometheus histograms were added to CockroachDB. In this
case, we use the `ForceUsePrometheus` option in the
`HistogramOptions` struct to ignore the value of the env
var, since there never was a time where these specific
histograms used the HdrHistogram model.

Release note (ops change): Histogram metrics can now optionally
use the legacy HdrHistogram model by setting the environment var
`COCKROACH_ENABLE_HDR_HISTOGRAMS=true` on CockroachDB nodes.
**Note that this is not recommended** unless users are having
difficulties with the newer Prometheus-backed histogram model.
Enabling can cause performance issues with timeseries databases
like Prometheus, as processing and storing the increased number
of buckets is taxing on both CPU and storage. Note that the
HdrHistogram model is slated for full deprecation in upcoming
releases.
---
 pkg/ccl/changefeedccl/metrics.go              |  96 +++++--
 pkg/ccl/sqlproxyccl/connector.go              |   2 +-
 pkg/ccl/sqlproxyccl/connector_test.go         |  27 +-
 pkg/ccl/sqlproxyccl/metrics.go                |  61 +++--
 pkg/ccl/streamingccl/streamingest/metrics.go  |  42 ++-
 pkg/kv/bulk/bulk_metrics.go                   |  14 +-
 pkg/kv/kvclient/kvcoord/txn_metrics.go        |  45 ++--
 pkg/kv/kvprober/kvprober.go                   |  22 +-
 pkg/kv/kvserver/liveness/liveness.go          |  11 +-
 pkg/kv/kvserver/logstore/logstore.go          |   2 +-
 .../kvserver/logstore/logstore_bench_test.go  |   7 +-
 pkg/kv/kvserver/metrics.go                    |  89 ++++---
 pkg/kv/kvserver/scheduler.go                  |   2 +-
 pkg/kv/kvserver/txnwait/metrics.go            |  28 +-
 pkg/rpc/clock_offset.go                       |  11 +-
 pkg/server/node.go                            |  11 +-
 pkg/server/status/recorder_test.go            |   7 +-
 pkg/sql/conn_executor.go                      |  87 +++---
 pkg/sql/execinfra/metrics.go                  |  35 ++-
 pkg/sql/executor_statement_metrics.go         |  18 +-
 pkg/sql/mem_metrics.go                        |  48 +++-
 pkg/sql/pgwire/pre_serve.go                   |  11 +-
 pkg/sql/pgwire/server.go                      |  11 +-
 .../sqlstats/persistedsqlstats/provider.go    |   2 +-
 pkg/sql/sqlstats/sslocal/sql_stats.go         |   2 +-
 pkg/sql/sqlstats/sslocal/sslocal_provider.go  |   2 +-
 pkg/sql/ttl/ttljob/ttljob_metrics.go          |  37 +--
 pkg/util/admission/work_queue.go              |  11 +-
 pkg/util/metric/BUILD.bazel                   |   4 +
 pkg/util/metric/aggmetric/BUILD.bazel         |   1 -
 pkg/util/metric/aggmetric/agg_metric.go       |   7 +-
 pkg/util/metric/aggmetric/agg_metric_test.go  |  43 ++-
 pkg/util/metric/aggmetric/histogram.go        |  27 +-
 .../testdata/add_after_destroy_hdr.txt        |  23 ++
 .../metric/aggmetric/testdata/basic_hdr.txt   |  24 ++
 .../metric/aggmetric/testdata/destroy_hdr.txt |  17 ++
 pkg/util/metric/hdrhistogram.go               | 248 ++++++++++++++++++
 pkg/util/metric/metric.go                     | 101 ++++++-
 pkg/util/metric/metric_ext_test.go            |   7 +-
 pkg/util/metric/metric_test.go                |  28 +-
 pkg/util/metric/registry_test.go              |  37 ++-
 pkg/util/mon/bytes_usage.go                   |  10 +-
 .../scheduler_latency_test.go                 |   7 +-
 43 files changed, 1017 insertions(+), 308 deletions(-)
 create mode 100644 pkg/util/metric/aggmetric/testdata/add_after_destroy_hdr.txt
 create mode 100644 pkg/util/metric/aggmetric/testdata/basic_hdr.txt
 create mode 100644 pkg/util/metric/aggmetric/testdata/destroy_hdr.txt
 create mode 100644 pkg/util/metric/hdrhistogram.go

diff --git a/pkg/ccl/changefeedccl/metrics.go b/pkg/ccl/changefeedccl/metrics.go
index 4b042273df8a..45578bc6645e 100644
--- a/pkg/ccl/changefeedccl/metrics.go
+++ b/pkg/ccl/changefeedccl/metrics.go
@@ -28,6 +28,14 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
 )
 
+const (
+	changefeedCheckpointHistMaxLatency = 30 * time.Second
+	changefeedBatchHistMaxLatency      = 30 * time.Second
+	changefeedFlushHistMaxLatency      = 1 * time.Minute
+	admitLatencyMaxValue               = 1 * time.Minute
+	commitLatencyMaxValue              = 10 * time.Minute
+)
+
 // max length for the scope name.
 const maxSLIScopeNameLen = 128
 
@@ -488,16 +496,46 @@ func newAggregateMetrics(histogramWindow time.Duration) *AggMetrics {
 		ErrorRetries:     b.Counter(metaChangefeedErrorRetries),
 		EmittedMessages:  b.Counter(metaChangefeedEmittedMessages),
 		FilteredMessages: b.Counter(metaChangefeedFilteredMessages),
-		MessageSize:      b.Histogram(metaMessageSize, histogramWindow, metric.DataSize16MBBuckets),
+		MessageSize: b.Histogram(metric.HistogramOptions{
+			Metadata: metaMessageSize,
+			Duration: histogramWindow,
+			MaxVal:   10 << 20, /* 10MB max message size */
+			SigFigs:  1,
+			Buckets:  metric.DataSize16MBBuckets,
+		}),
 		EmittedBytes:     b.Counter(metaChangefeedEmittedBytes),
 		FlushedBytes:     b.Counter(metaChangefeedFlushedBytes),
 		Flushes:          b.Counter(metaChangefeedFlushes),
 		SizeBasedFlushes: b.Counter(metaSizeBasedFlushes),
 
-		BatchHistNanos:            b.Histogram(metaChangefeedBatchHistNanos, histogramWindow, metric.BatchProcessLatencyBuckets),
-		FlushHistNanos:            b.Histogram(metaChangefeedFlushHistNanos, histogramWindow, metric.BatchProcessLatencyBuckets),
-		CommitLatency:             b.Histogram(metaCommitLatency, histogramWindow, metric.BatchProcessLatencyBuckets),
-		AdmitLatency:              b.Histogram(metaAdmitLatency, histogramWindow, metric.BatchProcessLatencyBuckets),
+		BatchHistNanos: b.Histogram(metric.HistogramOptions{
+			Metadata: metaChangefeedBatchHistNanos,
+			Duration: histogramWindow,
+			MaxVal:   changefeedBatchHistMaxLatency.Nanoseconds(),
+			SigFigs:  1,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+		}),
+		FlushHistNanos: b.Histogram(metric.HistogramOptions{
+			Metadata: metaChangefeedFlushHistNanos,
+			Duration: histogramWindow,
+			MaxVal:   changefeedFlushHistMaxLatency.Nanoseconds(),
+			SigFigs:  2,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+		}),
+		CommitLatency: b.Histogram(metric.HistogramOptions{
+			Metadata: metaCommitLatency,
+			Duration: histogramWindow,
+			MaxVal:   commitLatencyMaxValue.Nanoseconds(),
+			SigFigs:  1,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+		}),
+		AdmitLatency: b.Histogram(metric.HistogramOptions{
+			Metadata: metaAdmitLatency,
+			Duration: histogramWindow,
+			MaxVal:   admitLatencyMaxValue.Nanoseconds(),
+			SigFigs:  1,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+		}),
 		BackfillCount:             b.Gauge(metaChangefeedBackfillCount),
 		BackfillPendingRanges:     b.Gauge(metaChangefeedBackfillPendingRanges),
 		RunningCount:              b.Gauge(metaChangefeedRunning),
@@ -572,12 +610,12 @@ type Metrics struct {
 	Failures                       *metric.Counter
 	ResolvedMessages               *metric.Counter
 	QueueTimeNanos                 *metric.Counter
-	CheckpointHistNanos            *metric.Histogram
+	CheckpointHistNanos            metric.IHistogram
 	FrontierUpdates                *metric.Counter
 	ThrottleMetrics                cdcutils.Metrics
 	ReplanCount                    *metric.Counter
-	ParallelConsumerFlushNanos     *metric.Histogram
-	ParallelConsumerConsumeNanos   *metric.Histogram
+	ParallelConsumerFlushNanos     metric.IHistogram
+	ParallelConsumerConsumeNanos   metric.IHistogram
 	ParallelConsumerInFlightEvents *metric.Gauge
 
 	mu struct {
@@ -599,18 +637,36 @@ func (m *Metrics) getSLIMetrics(scope string) (*sliMetrics, error) {
 // MakeMetrics makes the metrics for changefeed monitoring.
 func MakeMetrics(histogramWindow time.Duration) metric.Struct {
 	m := &Metrics{
-		AggMetrics:                     newAggregateMetrics(histogramWindow),
-		KVFeedMetrics:                  kvevent.MakeMetrics(histogramWindow),
-		SchemaFeedMetrics:              schemafeed.MakeMetrics(histogramWindow),
-		ResolvedMessages:               metric.NewCounter(metaChangefeedForwardedResolvedMessages),
-		Failures:                       metric.NewCounter(metaChangefeedFailures),
-		QueueTimeNanos:                 metric.NewCounter(metaEventQueueTime),
-		CheckpointHistNanos:            metric.NewHistogram(metaChangefeedCheckpointHistNanos, histogramWindow, metric.IOLatencyBuckets),
-		FrontierUpdates:                metric.NewCounter(metaChangefeedFrontierUpdates),
-		ThrottleMetrics:                cdcutils.MakeMetrics(histogramWindow),
-		ReplanCount:                    metric.NewCounter(metaChangefeedReplanCount),
-		ParallelConsumerFlushNanos:     metric.NewHistogram(metaChangefeedEventConsumerFlushNanos, histogramWindow, metric.IOLatencyBuckets),
-		ParallelConsumerConsumeNanos:   metric.NewHistogram(metaChangefeedEventConsumerConsumeNanos, histogramWindow, metric.IOLatencyBuckets),
+		AggMetrics:        newAggregateMetrics(histogramWindow),
+		KVFeedMetrics:     kvevent.MakeMetrics(histogramWindow),
+		SchemaFeedMetrics: schemafeed.MakeMetrics(histogramWindow),
+		ResolvedMessages:  metric.NewCounter(metaChangefeedForwardedResolvedMessages),
+		Failures:          metric.NewCounter(metaChangefeedFailures),
+		QueueTimeNanos:    metric.NewCounter(metaEventQueueTime),
+		CheckpointHistNanos: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaChangefeedCheckpointHistNanos,
+			Duration: histogramWindow,
+			MaxVal:   changefeedCheckpointHistMaxLatency.Nanoseconds(),
+			SigFigs:  2,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
+		FrontierUpdates: metric.NewCounter(metaChangefeedFrontierUpdates),
+		ThrottleMetrics: cdcutils.MakeMetrics(histogramWindow),
+		ReplanCount:     metric.NewCounter(metaChangefeedReplanCount),
+		// Below two metrics were never implemented using the hdr histogram. Set ForceUsePrometheus
+		// to true.
+		ParallelConsumerFlushNanos: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaChangefeedEventConsumerFlushNanos,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+			Mode:     metric.HistogramModePrometheus,
+		}),
+		ParallelConsumerConsumeNanos: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaChangefeedEventConsumerConsumeNanos,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+			Mode:     metric.HistogramModePrometheus,
+		}),
 		ParallelConsumerInFlightEvents: metric.NewGauge(metaChangefeedEventConsumerInFlightEvents),
 	}
 
diff --git a/pkg/ccl/sqlproxyccl/connector.go b/pkg/ccl/sqlproxyccl/connector.go
index 8329efecc8ae..6a891a7ab53f 100644
--- a/pkg/ccl/sqlproxyccl/connector.go
+++ b/pkg/ccl/sqlproxyccl/connector.go
@@ -77,7 +77,7 @@ type connector struct {
 
 	// DialTenantLatency tracks how long it takes to retrieve the address for
 	// a tenant and set up a tcp connection to the address.
-	DialTenantLatency *metric.Histogram
+	DialTenantLatency metric.IHistogram
 
 	// DialTenantRetries counts how often dialing a tenant is retried.
 	DialTenantRetries *metric.Counter
diff --git a/pkg/ccl/sqlproxyccl/connector_test.go b/pkg/ccl/sqlproxyccl/connector_test.go
index 8a28955b71a6..6fa21c56afe3 100644
--- a/pkg/ccl/sqlproxyccl/connector_test.go
+++ b/pkg/ccl/sqlproxyccl/connector_test.go
@@ -380,9 +380,12 @@ func TestConnector_dialTenantCluster(t *testing.T) {
 
 		c := &connector{
 			TenantID: roachpb.MustMakeTenantID(42),
-			DialTenantLatency: metric.NewHistogram(
-				metaDialTenantLatency, time.Millisecond, metric.NetworkLatencyBuckets,
-			),
+			DialTenantLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePrometheus,
+				Metadata: metaDialTenantLatency,
+				Duration: time.Millisecond,
+				Buckets:  metric.NetworkLatencyBuckets,
+			}),
 			DialTenantRetries: metric.NewCounter(metaDialTenantRetries),
 		}
 		dc := &testTenantDirectoryCache{}
@@ -460,9 +463,12 @@ func TestConnector_dialTenantCluster(t *testing.T) {
 		defer cancel()
 
 		c := &connector{
-			DialTenantLatency: metric.NewHistogram(
-				metaDialTenantLatency, time.Millisecond, metric.NetworkLatencyBuckets,
-			),
+			DialTenantLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: metaDialTenantLatency,
+				Duration: time.Millisecond,
+				Buckets:  metric.NetworkLatencyBuckets,
+			}),
 			DialTenantRetries: metric.NewCounter(metaDialTenantRetries),
 		}
 		c.testingKnobs.lookupAddr = func(ctx context.Context) (string, error) {
@@ -491,9 +497,12 @@ func TestConnector_dialTenantCluster(t *testing.T) {
 		var reportFailureFnCount int
 		c := &connector{
 			TenantID: roachpb.MustMakeTenantID(42),
-			DialTenantLatency: metric.NewHistogram(
-				metaDialTenantLatency, time.Millisecond, metric.NetworkLatencyBuckets,
-			),
+			DialTenantLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: metaDialTenantLatency,
+				Duration: time.Millisecond,
+				Buckets:  metric.NetworkLatencyBuckets,
+			}),
 			DialTenantRetries: metric.NewCounter(metaDialTenantRetries),
 		}
 		c.DirectoryCache = &testTenantDirectoryCache{
diff --git a/pkg/ccl/sqlproxyccl/metrics.go b/pkg/ccl/sqlproxyccl/metrics.go
index 2fe0d8e16131..2432f9b1f668 100644
--- a/pkg/ccl/sqlproxyccl/metrics.go
+++ b/pkg/ccl/sqlproxyccl/metrics.go
@@ -23,19 +23,19 @@ type metrics struct {
 	RoutingErrCount        *metric.Counter
 	RefusedConnCount       *metric.Counter
 	SuccessfulConnCount    *metric.Counter
-	ConnectionLatency      *metric.Histogram
+	ConnectionLatency      metric.IHistogram
 	AuthFailedCount        *metric.Counter
 	ExpiredClientConnCount *metric.Counter
 
-	DialTenantLatency *metric.Histogram
+	DialTenantLatency metric.IHistogram
 	DialTenantRetries *metric.Counter
 
 	ConnMigrationSuccessCount                *metric.Counter
 	ConnMigrationErrorFatalCount             *metric.Counter
 	ConnMigrationErrorRecoverableCount       *metric.Counter
 	ConnMigrationAttemptedCount              *metric.Counter
-	ConnMigrationAttemptedLatency            *metric.Histogram
-	ConnMigrationTransferResponseMessageSize *metric.Histogram
+	ConnMigrationAttemptedLatency            metric.IHistogram
+	ConnMigrationTransferResponseMessageSize metric.IHistogram
 
 	QueryCancelReceivedPGWire *metric.Counter
 	QueryCancelReceivedHTTP   *metric.Counter
@@ -49,6 +49,16 @@ func (metrics) MetricStruct() {}
 
 var _ metric.Struct = metrics{}
 
+const (
+	// maxExpectedTransferResponseMessageSize corresponds to maximum expected
+	// response message size for the SHOW TRANSFER STATE query. We choose 16MB
+	// here to match the defaultMaxReadBufferSize used for ingesting SQL
+	// statements in the SQL server (see pkg/sql/pgwire/pgwirebase/encoding.go).
+	//
+	// This will be used to tune sql.session_transfer.max_session_size.
+	maxExpectedTransferResponseMessageSize = 1 << 24 // 16MB
+)
+
 var (
 	metaCurConnCount = metric.Metadata{
 		Name:        "proxy.sql.conns",
@@ -213,18 +223,20 @@ func makeProxyMetrics() metrics {
 		RoutingErrCount:        metric.NewCounter(metaRoutingErrCount),
 		RefusedConnCount:       metric.NewCounter(metaRefusedConnCount),
 		SuccessfulConnCount:    metric.NewCounter(metaSuccessfulConnCount),
-		ConnectionLatency: metric.NewHistogram(
-			metaConnMigrationAttemptedCount,
-			base.DefaultHistogramWindowInterval(),
-			metric.NetworkLatencyBuckets,
-		),
+		ConnectionLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaConnMigrationAttemptedCount,
+			Duration: base.DefaultHistogramWindowInterval(),
+			Buckets:  metric.NetworkLatencyBuckets,
+		}),
 		AuthFailedCount:        metric.NewCounter(metaAuthFailedCount),
 		ExpiredClientConnCount: metric.NewCounter(metaExpiredClientConnCount),
 		// Connector metrics.
-		DialTenantLatency: metric.NewHistogram(
-			metaDialTenantLatency,
-			base.DefaultHistogramWindowInterval(),
-			metric.NetworkLatencyBuckets,
+		DialTenantLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaDialTenantLatency,
+			Duration: base.DefaultHistogramWindowInterval(),
+			Buckets:  metric.NetworkLatencyBuckets},
 		),
 		DialTenantRetries: metric.NewCounter(metaDialTenantRetries),
 		// Connection migration metrics.
@@ -232,16 +244,19 @@ func makeProxyMetrics() metrics {
 		ConnMigrationErrorFatalCount:       metric.NewCounter(metaConnMigrationErrorFatalCount),
 		ConnMigrationErrorRecoverableCount: metric.NewCounter(metaConnMigrationErrorRecoverableCount),
 		ConnMigrationAttemptedCount:        metric.NewCounter(metaConnMigrationAttemptedCount),
-		ConnMigrationAttemptedLatency: metric.NewHistogram(
-			metaConnMigrationAttemptedLatency,
-			base.DefaultHistogramWindowInterval(),
-			metric.NetworkLatencyBuckets,
-		),
-		ConnMigrationTransferResponseMessageSize: metric.NewHistogram(
-			metaConnMigrationTransferResponseMessageSize,
-			base.DefaultHistogramWindowInterval(),
-			metric.DataSize16MBBuckets,
-		),
+		ConnMigrationAttemptedLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaConnMigrationAttemptedLatency,
+			Duration: base.DefaultHistogramWindowInterval(),
+			Buckets:  metric.NetworkLatencyBuckets,
+		}),
+		ConnMigrationTransferResponseMessageSize: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaConnMigrationTransferResponseMessageSize,
+			Duration: base.DefaultHistogramWindowInterval(),
+			Buckets:  metric.DataSize16MBBuckets,
+			MaxVal:   maxExpectedTransferResponseMessageSize,
+			SigFigs:  1,
+		}),
 		QueryCancelReceivedPGWire: metric.NewCounter(metaQueryCancelReceivedPGWire),
 		QueryCancelReceivedHTTP:   metric.NewCounter(metaQueryCancelReceivedHTTP),
 		QueryCancelIgnored:        metric.NewCounter(metaQueryCancelIgnored),
diff --git a/pkg/ccl/streamingccl/streamingest/metrics.go b/pkg/ccl/streamingccl/streamingest/metrics.go
index f9f28f38a247..c2858cd5ad36 100644
--- a/pkg/ccl/streamingccl/streamingest/metrics.go
+++ b/pkg/ccl/streamingccl/streamingest/metrics.go
@@ -15,6 +15,12 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util/metric"
 )
 
+const (
+	streamingFlushHistMaxLatency   = 1 * time.Minute
+	streamingAdmitLatencyMaxValue  = 3 * time.Minute
+	streamingCommitLatencyMaxValue = 10 * time.Minute
+)
+
 var (
 	metaReplicationEventsIngested = metric.Metadata{
 		Name:        "replication.events_ingested",
@@ -120,9 +126,9 @@ type Metrics struct {
 	Flushes                     *metric.Counter
 	JobProgressUpdates          *metric.Counter
 	ResolvedEvents              *metric.Counter
-	FlushHistNanos              *metric.Histogram
-	CommitLatency               *metric.Histogram
-	AdmitLatency                *metric.Histogram
+	FlushHistNanos              metric.IHistogram
+	CommitLatency               metric.IHistogram
+	AdmitLatency                metric.IHistogram
 	RunningCount                *metric.Gauge
 	EarliestDataCheckpointSpan  *metric.Gauge
 	LatestDataCheckpointSpan    *metric.Gauge
@@ -143,12 +149,30 @@ func MakeMetrics(histogramWindow time.Duration) metric.Struct {
 		Flushes:            metric.NewCounter(metaReplicationFlushes),
 		ResolvedEvents:     metric.NewCounter(metaReplicationResolvedEventsIngested),
 		JobProgressUpdates: metric.NewCounter(metaJobProgressUpdates),
-		FlushHistNanos: metric.NewHistogram(metaReplicationFlushHistNanos,
-			histogramWindow, metric.BatchProcessLatencyBuckets),
-		CommitLatency: metric.NewHistogram(metaReplicationCommitLatency,
-			histogramWindow, metric.BatchProcessLatencyBuckets),
-		AdmitLatency: metric.NewHistogram(metaReplicationAdmitLatency,
-			histogramWindow, metric.BatchProcessLatencyBuckets),
+		FlushHistNanos: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaReplicationFlushHistNanos,
+			Duration: histogramWindow,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+			MaxVal:   streamingFlushHistMaxLatency.Nanoseconds(),
+			SigFigs:  1,
+			Mode:     metric.HistogramModePreferHdrLatency,
+		}),
+		CommitLatency: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaReplicationCommitLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+			MaxVal:   streamingCommitLatencyMaxValue.Nanoseconds(),
+			SigFigs:  1,
+			Mode:     metric.HistogramModePreferHdrLatency,
+		}),
+		AdmitLatency: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaReplicationAdmitLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+			MaxVal:   streamingAdmitLatencyMaxValue.Nanoseconds(),
+			SigFigs:  1,
+			Mode:     metric.HistogramModePreferHdrLatency,
+		}),
 		RunningCount:                metric.NewGauge(metaStreamsRunning),
 		EarliestDataCheckpointSpan:  metric.NewGauge(metaEarliestDataCheckpointSpan),
 		LatestDataCheckpointSpan:    metric.NewGauge(metaLatestDataCheckpointSpan),
diff --git a/pkg/kv/bulk/bulk_metrics.go b/pkg/kv/bulk/bulk_metrics.go
index f3390d54733e..7cbbc748a20b 100644
--- a/pkg/kv/bulk/bulk_metrics.go
+++ b/pkg/kv/bulk/bulk_metrics.go
@@ -20,7 +20,7 @@ import (
 // Metrics contains pointers to the metrics for
 // monitoring bulk operations.
 type Metrics struct {
-	MaxBytesHist  *metric.Histogram
+	MaxBytesHist  metric.IHistogram
 	CurBytesCount *metric.Gauge
 }
 
@@ -44,10 +44,20 @@ var (
 	}
 )
 
+// See pkg/sql/mem_metrics.go
+// log10int64times1000 = log10(math.MaxInt64) * 1000, rounded up somewhat
+const log10int64times1000 = 19 * 1000
+
 // MakeBulkMetrics instantiates the metrics holder for bulk operation monitoring.
 func MakeBulkMetrics(histogramWindow time.Duration) Metrics {
 	return Metrics{
-		MaxBytesHist:  metric.NewHistogram(metaMemMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets),
+		MaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaMemMaxBytes,
+			Duration: histogramWindow,
+			MaxVal:   log10int64times1000,
+			SigFigs:  3,
+			Buckets:  metric.MemoryUsage64MBBuckets,
+		}),
 		CurBytesCount: metric.NewGauge(metaMemCurBytes),
 	}
 }
diff --git a/pkg/kv/kvclient/kvcoord/txn_metrics.go b/pkg/kv/kvclient/kvcoord/txn_metrics.go
index eb6313012717..fcca64aa74ae 100644
--- a/pkg/kv/kvclient/kvcoord/txn_metrics.go
+++ b/pkg/kv/kvclient/kvcoord/txn_metrics.go
@@ -31,14 +31,14 @@ type TxnMetrics struct {
 	RefreshMemoryLimitExceeded    *metric.Counter
 	RefreshAutoRetries            *metric.Counter
 
-	Durations *metric.Histogram
+	Durations metric.IHistogram
 
 	TxnsWithCondensedIntents      *metric.Counter
 	TxnsWithCondensedIntentsGauge *metric.Gauge
 	TxnsRejectedByLockSpanBudget  *metric.Counter
 
 	// Restarts is the number of times we had to restart the transaction.
-	Restarts *metric.Histogram
+	Restarts metric.IHistogram
 
 	// Counts of restart types.
 	RestartsWriteTooOld            telemetry.CounterWithMetric
@@ -264,21 +264,32 @@ var (
 // windowed portions retain data for approximately histogramWindow.
 func MakeTxnMetrics(histogramWindow time.Duration) TxnMetrics {
 	return TxnMetrics{
-		Aborts:                         metric.NewCounter(metaAbortsRates),
-		Commits:                        metric.NewCounter(metaCommitsRates),
-		Commits1PC:                     metric.NewCounter(metaCommits1PCRates),
-		ParallelCommits:                metric.NewCounter(metaParallelCommitsRates),
-		CommitWaits:                    metric.NewCounter(metaCommitWaitCount),
-		RefreshSuccess:                 metric.NewCounter(metaRefreshSuccess),
-		RefreshFail:                    metric.NewCounter(metaRefreshFail),
-		RefreshFailWithCondensedSpans:  metric.NewCounter(metaRefreshFailWithCondensedSpans),
-		RefreshMemoryLimitExceeded:     metric.NewCounter(metaRefreshMemoryLimitExceeded),
-		RefreshAutoRetries:             metric.NewCounter(metaRefreshAutoRetries),
-		Durations:                      metric.NewHistogram(metaDurationsHistograms, histogramWindow, metric.IOLatencyBuckets),
-		TxnsWithCondensedIntents:       metric.NewCounter(metaTxnsWithCondensedIntentSpans),
-		TxnsWithCondensedIntentsGauge:  metric.NewGauge(metaTxnsWithCondensedIntentSpansGauge),
-		TxnsRejectedByLockSpanBudget:   metric.NewCounter(metaTxnsRejectedByLockSpanBudget),
-		Restarts:                       metric.NewHistogram(metaRestartsHistogram, histogramWindow, metric.Count1KBuckets),
+		Aborts:                        metric.NewCounter(metaAbortsRates),
+		Commits:                       metric.NewCounter(metaCommitsRates),
+		Commits1PC:                    metric.NewCounter(metaCommits1PCRates),
+		ParallelCommits:               metric.NewCounter(metaParallelCommitsRates),
+		CommitWaits:                   metric.NewCounter(metaCommitWaitCount),
+		RefreshSuccess:                metric.NewCounter(metaRefreshSuccess),
+		RefreshFail:                   metric.NewCounter(metaRefreshFail),
+		RefreshFailWithCondensedSpans: metric.NewCounter(metaRefreshFailWithCondensedSpans),
+		RefreshMemoryLimitExceeded:    metric.NewCounter(metaRefreshMemoryLimitExceeded),
+		RefreshAutoRetries:            metric.NewCounter(metaRefreshAutoRetries),
+		Durations: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaDurationsHistograms,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
+		TxnsWithCondensedIntents:      metric.NewCounter(metaTxnsWithCondensedIntentSpans),
+		TxnsWithCondensedIntentsGauge: metric.NewGauge(metaTxnsWithCondensedIntentSpansGauge),
+		TxnsRejectedByLockSpanBudget:  metric.NewCounter(metaTxnsRejectedByLockSpanBudget),
+		Restarts: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaRestartsHistogram,
+			Duration: histogramWindow,
+			MaxVal:   100,
+			SigFigs:  3,
+			Buckets:  metric.Count1KBuckets,
+		}),
 		RestartsWriteTooOld:            telemetry.NewCounterWithMetric(metaRestartsWriteTooOld),
 		RestartsWriteTooOldMulti:       telemetry.NewCounterWithMetric(metaRestartsWriteTooOldMulti),
 		RestartsSerializable:           telemetry.NewCounterWithMetric(metaRestartsSerializable),
diff --git a/pkg/kv/kvprober/kvprober.go b/pkg/kv/kvprober/kvprober.go
index 2e0d84529fc9..0ec07006efe1 100644
--- a/pkg/kv/kvprober/kvprober.go
+++ b/pkg/kv/kvprober/kvprober.go
@@ -140,10 +140,10 @@ var (
 type Metrics struct {
 	ReadProbeAttempts                  *metric.Counter
 	ReadProbeFailures                  *metric.Counter
-	ReadProbeLatency                   *metric.Histogram
+	ReadProbeLatency                   metric.IHistogram
 	WriteProbeAttempts                 *metric.Counter
 	WriteProbeFailures                 *metric.Counter
-	WriteProbeLatency                  *metric.Histogram
+	WriteProbeLatency                  metric.IHistogram
 	WriteProbeQuarantineOldestDuration *metric.Gauge
 	ProbePlanAttempts                  *metric.Counter
 	ProbePlanFailures                  *metric.Counter
@@ -229,14 +229,20 @@ func NewProber(opts Opts) *Prober {
 		metrics: Metrics{
 			ReadProbeAttempts: metric.NewCounter(metaReadProbeAttempts),
 			ReadProbeFailures: metric.NewCounter(metaReadProbeFailures),
-			ReadProbeLatency: metric.NewHistogram(
-				metaReadProbeLatency, opts.HistogramWindowInterval, metric.NetworkLatencyBuckets,
-			),
+			ReadProbeLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: metaReadProbeLatency,
+				Duration: opts.HistogramWindowInterval,
+				Buckets:  metric.NetworkLatencyBuckets,
+			}),
 			WriteProbeAttempts: metric.NewCounter(metaWriteProbeAttempts),
 			WriteProbeFailures: metric.NewCounter(metaWriteProbeFailures),
-			WriteProbeLatency: metric.NewHistogram(
-				metaWriteProbeLatency, opts.HistogramWindowInterval, metric.NetworkLatencyBuckets,
-			),
+			WriteProbeLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: metaWriteProbeLatency,
+				Duration: opts.HistogramWindowInterval,
+				Buckets:  metric.NetworkLatencyBuckets,
+			}),
 			WriteProbeQuarantineOldestDuration: metric.NewFunctionalGauge(
 				metaWriteProbeQuarantineOldestDuration,
 				func() int64 { return qPool.oldestDuration().Nanoseconds() },
diff --git a/pkg/kv/kvserver/liveness/liveness.go b/pkg/kv/kvserver/liveness/liveness.go
index 3411ea1bbd78..a11f3abc4114 100644
--- a/pkg/kv/kvserver/liveness/liveness.go
+++ b/pkg/kv/kvserver/liveness/liveness.go
@@ -144,7 +144,7 @@ type Metrics struct {
 	HeartbeatSuccesses *metric.Counter
 	HeartbeatFailures  telemetry.CounterWithMetric
 	EpochIncrements    telemetry.CounterWithMetric
-	HeartbeatLatency   *metric.Histogram
+	HeartbeatLatency   metric.IHistogram
 }
 
 // IsLiveCallback is invoked when a node's IsLive state changes to true.
@@ -310,9 +310,12 @@ func NewNodeLiveness(opts NodeLivenessOptions) *NodeLiveness {
 		HeartbeatSuccesses: metric.NewCounter(metaHeartbeatSuccesses),
 		HeartbeatFailures:  telemetry.NewCounterWithMetric(metaHeartbeatFailures),
 		EpochIncrements:    telemetry.NewCounterWithMetric(metaEpochIncrements),
-		HeartbeatLatency: metric.NewHistogram(
-			metaHeartbeatLatency, opts.HistogramWindowInterval, metric.NetworkLatencyBuckets,
-		),
+		HeartbeatLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaHeartbeatLatency,
+			Duration: opts.HistogramWindowInterval,
+			Buckets:  metric.NetworkLatencyBuckets,
+		}),
 	}
 	nl.mu.nodes = make(map[roachpb.NodeID]Record)
 	nl.heartbeatToken <- struct{}{}
diff --git a/pkg/kv/kvserver/logstore/logstore.go b/pkg/kv/kvserver/logstore/logstore.go
index f1784431b0f5..e1cd72d34483 100644
--- a/pkg/kv/kvserver/logstore/logstore.go
+++ b/pkg/kv/kvserver/logstore/logstore.go
@@ -90,7 +90,7 @@ type AppendStats struct {
 
 // Metrics contains metrics specific to the log storage.
 type Metrics struct {
-	RaftLogCommitLatency *metric.Histogram
+	RaftLogCommitLatency metric.IHistogram
 }
 
 // LogStore is a stub of a separated Raft log storage.
diff --git a/pkg/kv/kvserver/logstore/logstore_bench_test.go b/pkg/kv/kvserver/logstore/logstore_bench_test.go
index 0c3f8e0473e2..f09472e27385 100644
--- a/pkg/kv/kvserver/logstore/logstore_bench_test.go
+++ b/pkg/kv/kvserver/logstore/logstore_bench_test.go
@@ -60,7 +60,12 @@ func runBenchmarkLogStore_StoreEntries(b *testing.B, bytes int64) {
 		EntryCache:  ec,
 		Settings:    cluster.MakeTestingClusterSettings(),
 		Metrics: Metrics{
-			RaftLogCommitLatency: metric.NewHistogram(metric.Metadata{}, 10*time.Second, metric.IOLatencyBuckets),
+			RaftLogCommitLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePrometheus,
+				Metadata: metric.Metadata{},
+				Duration: 10 * time.Second,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
 		},
 	}
 
diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go
index 38953afe5d50..5476a6c17e9a 100644
--- a/pkg/kv/kvserver/metrics.go
+++ b/pkg/kv/kvserver/metrics.go
@@ -1850,15 +1850,15 @@ type StoreMetrics struct {
 
 	// Raft processing metrics.
 	RaftTicks                 *metric.Counter
-	RaftQuotaPoolPercentUsed  *metric.Histogram
+	RaftQuotaPoolPercentUsed  metric.IHistogram
 	RaftWorkingDurationNanos  *metric.Counter
 	RaftTickingDurationNanos  *metric.Counter
 	RaftCommandsApplied       *metric.Counter
-	RaftLogCommitLatency      *metric.Histogram
-	RaftCommandCommitLatency  *metric.Histogram
-	RaftHandleReadyLatency    *metric.Histogram
-	RaftApplyCommittedLatency *metric.Histogram
-	RaftSchedulerLatency      *metric.Histogram
+	RaftLogCommitLatency      metric.IHistogram
+	RaftCommandCommitLatency  metric.IHistogram
+	RaftHandleReadyLatency    metric.IHistogram
+	RaftApplyCommittedLatency metric.IHistogram
+	RaftSchedulerLatency      metric.IHistogram
 	RaftTimeoutCampaign       *metric.Counter
 
 	// Raft message metrics.
@@ -1990,8 +1990,8 @@ type StoreMetrics struct {
 	ReplicaCircuitBreakerCumTripped *metric.Counter
 
 	// Replica batch evaluation metrics.
-	ReplicaReadBatchEvaluationLatency  *metric.Histogram
-	ReplicaWriteBatchEvaluationLatency *metric.Histogram
+	ReplicaReadBatchEvaluationLatency  metric.IHistogram
+	ReplicaWriteBatchEvaluationLatency metric.IHistogram
 
 	ReplicaReadBatchDroppedLatchesBeforeEval *metric.Counter
 	ReplicaReadBatchWithoutInterleavingIter  *metric.Counter
@@ -2377,27 +2377,46 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
 
 		// Raft processing metrics.
 		RaftTicks: metric.NewCounter(metaRaftTicks),
-		RaftQuotaPoolPercentUsed: metric.NewHistogram(
-			metaRaftQuotaPoolPercentUsed, histogramWindow, metric.Percent100Buckets,
-		),
+		RaftQuotaPoolPercentUsed: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaRaftQuotaPoolPercentUsed,
+			Duration: histogramWindow,
+			MaxVal:   100,
+			SigFigs:  1,
+			Buckets:  metric.Percent100Buckets,
+		}),
 		RaftWorkingDurationNanos: metric.NewCounter(metaRaftWorkingDurationNanos),
 		RaftTickingDurationNanos: metric.NewCounter(metaRaftTickingDurationNanos),
 		RaftCommandsApplied:      metric.NewCounter(metaRaftCommandsApplied),
-		RaftLogCommitLatency: metric.NewHistogram(
-			metaRaftLogCommitLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
-		RaftCommandCommitLatency: metric.NewHistogram(
-			metaRaftCommandCommitLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
-		RaftHandleReadyLatency: metric.NewHistogram(
-			metaRaftHandleReadyLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
-		RaftApplyCommittedLatency: metric.NewHistogram(
-			metaRaftApplyCommittedLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
-		RaftSchedulerLatency: metric.NewHistogram(
-			metaRaftSchedulerLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
+		RaftLogCommitLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaRaftLogCommitLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
+		RaftCommandCommitLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaRaftCommandCommitLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
+		RaftHandleReadyLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaRaftHandleReadyLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
+		RaftApplyCommittedLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaRaftApplyCommittedLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
+		RaftSchedulerLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaRaftSchedulerLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
 		RaftTimeoutCampaign: metric.NewCounter(metaRaftTimeoutCampaign),
 
 		// Raft message metrics.
@@ -2538,12 +2557,18 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
 		ReplicaCircuitBreakerCumTripped: metric.NewCounter(metaReplicaCircuitBreakerCumTripped),
 
 		// Replica batch evaluation.
-		ReplicaReadBatchEvaluationLatency: metric.NewHistogram(
-			metaReplicaReadBatchEvaluationLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
-		ReplicaWriteBatchEvaluationLatency: metric.NewHistogram(
-			metaReplicaWriteBatchEvaluationLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
+		ReplicaReadBatchEvaluationLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaReplicaReadBatchEvaluationLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
+		ReplicaWriteBatchEvaluationLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaReplicaWriteBatchEvaluationLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
 		FlushUtilization: metric.NewGaugeFloat64(metaStorageFlushUtilization),
 		FsyncLatency:     metric.NewManualWindowHistogram(metaStorageFsyncLatency, pebble.FsyncLatencyBuckets),
 
diff --git a/pkg/kv/kvserver/scheduler.go b/pkg/kv/kvserver/scheduler.go
index 85db3cefa1fb..062bb4562843 100644
--- a/pkg/kv/kvserver/scheduler.go
+++ b/pkg/kv/kvserver/scheduler.go
@@ -181,7 +181,7 @@ type raftScheduleState struct {
 type raftScheduler struct {
 	ambientContext log.AmbientContext
 	processor      raftProcessor
-	latency        *metric.Histogram
+	latency        metric.IHistogram
 	numWorkers     int
 	maxTicks       int
 
diff --git a/pkg/kv/kvserver/txnwait/metrics.go b/pkg/kv/kvserver/txnwait/metrics.go
index 2e9d1d2a2055..4610fd8e375c 100644
--- a/pkg/kv/kvserver/txnwait/metrics.go
+++ b/pkg/kv/kvserver/txnwait/metrics.go
@@ -22,8 +22,8 @@ type Metrics struct {
 	PusherWaiting  *metric.Gauge
 	QueryWaiting   *metric.Gauge
 	PusherSlow     *metric.Gauge
-	PusherWaitTime *metric.Histogram
-	QueryWaitTime  *metric.Histogram
+	PusherWaitTime metric.IHistogram
+	QueryWaitTime  metric.IHistogram
 	DeadlocksTotal *metric.Counter
 }
 
@@ -66,27 +66,31 @@ func NewMetrics(histogramWindowInterval time.Duration) *Metrics {
 			},
 		),
 
-		PusherWaitTime: metric.NewHistogram(
-			metric.Metadata{
+		PusherWaitTime: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metric.Metadata{
 				Name:        "txnwaitqueue.pusher.wait_time",
 				Help:        "Histogram of durations spent in queue by pushers",
 				Measurement: "Pusher wait time",
 				Unit:        metric.Unit_NANOSECONDS,
 			},
-			histogramWindowInterval,
-			metric.LongRunning60mLatencyBuckets,
-		),
+			MaxVal:   time.Hour.Nanoseconds(),
+			SigFigs:  1,
+			Duration: histogramWindowInterval,
+			Buckets:  metric.LongRunning60mLatencyBuckets,
+		}),
 
-		QueryWaitTime: metric.NewHistogram(
-			metric.Metadata{
+		QueryWaitTime: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metric.Metadata{
 				Name:        "txnwaitqueue.query.wait_time",
 				Help:        "Histogram of durations spent in queue by queries",
 				Measurement: "Query wait time",
 				Unit:        metric.Unit_NANOSECONDS,
 			},
-			histogramWindowInterval,
-			metric.LongRunning60mLatencyBuckets,
-		),
+			MaxVal:   time.Hour.Nanoseconds(),
+			SigFigs:  1,
+			Duration: histogramWindowInterval,
+			Buckets:  metric.LongRunning60mLatencyBuckets,
+		}),
 
 		DeadlocksTotal: metric.NewCounter(
 			metric.Metadata{
diff --git a/pkg/rpc/clock_offset.go b/pkg/rpc/clock_offset.go
index bd5877f26b6b..99daad030a72 100644
--- a/pkg/rpc/clock_offset.go
+++ b/pkg/rpc/clock_offset.go
@@ -29,7 +29,7 @@ import (
 type RemoteClockMetrics struct {
 	ClockOffsetMeanNanos   *metric.Gauge
 	ClockOffsetStdDevNanos *metric.Gauge
-	LatencyHistogramNanos  *metric.Histogram
+	LatencyHistogramNanos  metric.IHistogram
 }
 
 // avgLatencyMeasurementAge determines how to exponentially weight the
@@ -136,9 +136,12 @@ func newRemoteClockMonitor(
 	r.metrics = RemoteClockMetrics{
 		ClockOffsetMeanNanos:   metric.NewGauge(metaClockOffsetMeanNanos),
 		ClockOffsetStdDevNanos: metric.NewGauge(metaClockOffsetStdDevNanos),
-		LatencyHistogramNanos: metric.NewHistogram(
-			metaLatencyHistogramNanos, histogramWindowInterval, metric.IOLatencyBuckets,
-		),
+		LatencyHistogramNanos: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaLatencyHistogramNanos,
+			Duration: histogramWindowInterval,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
 	}
 	return &r
 }
diff --git a/pkg/server/node.go b/pkg/server/node.go
index 76019595ca04..2259a512bd75 100644
--- a/pkg/server/node.go
+++ b/pkg/server/node.go
@@ -167,7 +167,7 @@ var (
 )
 
 type nodeMetrics struct {
-	Latency    *metric.Histogram
+	Latency    metric.IHistogram
 	Success    *metric.Counter
 	Err        *metric.Counter
 	DiskStalls *metric.Counter
@@ -178,9 +178,12 @@ type nodeMetrics struct {
 
 func makeNodeMetrics(reg *metric.Registry, histogramWindow time.Duration) nodeMetrics {
 	nm := nodeMetrics{
-		Latency: metric.NewHistogram(
-			metaExecLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
+		Latency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: metaExecLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
 		Success:    metric.NewCounter(metaExecSuccess),
 		Err:        metric.NewCounter(metaExecError),
 		DiskStalls: metric.NewCounter(metaDiskStalls),
diff --git a/pkg/server/status/recorder_test.go b/pkg/server/status/recorder_test.go
index 82016bd8902c..da77954343af 100644
--- a/pkg/server/status/recorder_test.go
+++ b/pkg/server/status/recorder_test.go
@@ -385,7 +385,12 @@ func TestMetricsRecorder(t *testing.T) {
 				c.Inc((data.val))
 				addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode)
 			case "histogram":
-				h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, []float64{1.0, 10.0, 100.0, 1000.0})
+				h := metric.NewHistogram(metric.HistogramOptions{
+					Metadata: metric.Metadata{Name: reg.prefix + data.name},
+					Duration: time.Second,
+					Buckets:  []float64{1.0, 10.0, 100.0, 1000.0},
+					Mode:     metric.HistogramModePrometheus,
+				})
 				reg.reg.AddMetric(h)
 				h.RecordValue(data.val)
 				for _, q := range recordHistogramQuantiles {
diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go
index b593acf1f863..ae3dffb05c35 100644
--- a/pkg/sql/conn_executor.go
+++ b/pkg/sql/conn_executor.go
@@ -452,21 +452,36 @@ func makeMetrics(internal bool) Metrics {
 			SQLOptPlanCacheHits:   metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheHits, internal)),
 			SQLOptPlanCacheMisses: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheMisses, internal)),
 			// TODO(mrtracy): See HistogramWindowInterval in server/config.go for the 6x factor.
-			DistSQLExecLatency: metric.NewHistogram(
-				getMetricMeta(MetaDistSQLExecLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets,
-			),
-			SQLExecLatency: metric.NewHistogram(
-				getMetricMeta(MetaSQLExecLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets,
-			),
-			DistSQLServiceLatency: metric.NewHistogram(
-				getMetricMeta(MetaDistSQLServiceLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets,
-			),
-			SQLServiceLatency: metric.NewHistogram(
-				getMetricMeta(MetaSQLServiceLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets,
-			),
-			SQLTxnLatency: metric.NewHistogram(
-				getMetricMeta(MetaSQLTxnLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets,
-			),
+			DistSQLExecLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: getMetricMeta(MetaDistSQLExecLatency, internal),
+				Duration: 6 * metricsSampleInterval,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
+			SQLExecLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: getMetricMeta(MetaSQLExecLatency, internal),
+				Duration: 6 * metricsSampleInterval,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
+			DistSQLServiceLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: getMetricMeta(MetaDistSQLServiceLatency, internal),
+				Duration: 6 * metricsSampleInterval,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
+			SQLServiceLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: getMetricMeta(MetaSQLServiceLatency, internal),
+				Duration: 6 * metricsSampleInterval,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
+			SQLTxnLatency: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: getMetricMeta(MetaSQLTxnLatency, internal),
+				Duration: 6 * metricsSampleInterval,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
 			SQLTxnsOpen:         metric.NewGauge(getMetricMeta(MetaSQLTxnsOpen, internal)),
 			SQLActiveStatements: metric.NewGauge(getMetricMeta(MetaSQLActiveQueries, internal)),
 			SQLContendedTxns:    metric.NewCounter(getMetricMeta(MetaSQLTxnContended, internal)),
@@ -490,28 +505,38 @@ func makeMetrics(internal bool) Metrics {
 func makeServerMetrics(cfg *ExecutorConfig) ServerMetrics {
 	return ServerMetrics{
 		StatsMetrics: StatsMetrics{
-			SQLStatsMemoryMaxBytesHist: metric.NewHistogram(
-				MetaSQLStatsMemMaxBytes,
-				cfg.HistogramWindowInterval,
-				metric.MemoryUsage64MBBuckets,
-			),
+			SQLStatsMemoryMaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
+				Metadata: MetaSQLStatsMemMaxBytes,
+				Duration: cfg.HistogramWindowInterval,
+				MaxVal:   log10int64times1000,
+				SigFigs:  3,
+				Buckets:  metric.MemoryUsage64MBBuckets,
+			}),
 			SQLStatsMemoryCurBytesCount: metric.NewGauge(MetaSQLStatsMemCurBytes),
-			ReportedSQLStatsMemoryMaxBytesHist: metric.NewHistogram(
-				MetaReportedSQLStatsMemMaxBytes,
-				cfg.HistogramWindowInterval,
-				metric.MemoryUsage64MBBuckets,
-			),
+			ReportedSQLStatsMemoryMaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
+				Metadata: MetaReportedSQLStatsMemMaxBytes,
+				Duration: cfg.HistogramWindowInterval,
+				MaxVal:   log10int64times1000,
+				SigFigs:  3,
+				Buckets:  metric.MemoryUsage64MBBuckets,
+			}),
 			ReportedSQLStatsMemoryCurBytesCount: metric.NewGauge(MetaReportedSQLStatsMemCurBytes),
 			DiscardedStatsCount:                 metric.NewCounter(MetaDiscardedSQLStats),
 			SQLStatsFlushStarted:                metric.NewCounter(MetaSQLStatsFlushStarted),
 			SQLStatsFlushFailure:                metric.NewCounter(MetaSQLStatsFlushFailure),
-			SQLStatsFlushDuration: metric.NewHistogram(
-				MetaSQLStatsFlushDuration, 6*metricsSampleInterval, metric.IOLatencyBuckets,
-			),
+			SQLStatsFlushDuration: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: MetaSQLStatsFlushDuration,
+				Duration: 6 * metricsSampleInterval,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
 			SQLStatsRemovedRows: metric.NewCounter(MetaSQLStatsRemovedRows),
-			SQLTxnStatsCollectionOverhead: metric.NewHistogram(
-				MetaSQLTxnStatsCollectionOverhead, 6*metricsSampleInterval, metric.IOLatencyBuckets,
-			),
+			SQLTxnStatsCollectionOverhead: metric.NewHistogram(metric.HistogramOptions{
+				Mode:     metric.HistogramModePreferHdrLatency,
+				Metadata: MetaSQLTxnStatsCollectionOverhead,
+				Duration: 6 * metricsSampleInterval,
+				Buckets:  metric.IOLatencyBuckets,
+			}),
 		},
 		ContentionSubsystemMetrics: txnidcache.NewMetrics(),
 		InsightsMetrics:            insights.NewMetrics(),
diff --git a/pkg/sql/execinfra/metrics.go b/pkg/sql/execinfra/metrics.go
index 17ad207d88db..30123ce27ee6 100644
--- a/pkg/sql/execinfra/metrics.go
+++ b/pkg/sql/execinfra/metrics.go
@@ -24,11 +24,11 @@ type DistSQLMetrics struct {
 	ContendedQueriesCount *metric.Counter
 	FlowsActive           *metric.Gauge
 	FlowsTotal            *metric.Counter
-	MaxBytesHist          *metric.Histogram
+	MaxBytesHist          metric.IHistogram
 	CurBytesCount         *metric.Gauge
 	VecOpenFDs            *metric.Gauge
 	CurDiskBytesCount     *metric.Gauge
-	MaxDiskBytesHist      *metric.Histogram
+	MaxDiskBytesHist      metric.IHistogram
 	QueriesSpilled        *metric.Counter
 	SpilledBytesWritten   *metric.Counter
 	SpilledBytesRead      *metric.Counter
@@ -120,6 +120,10 @@ var (
 	}
 )
 
+// See pkg/sql/mem_metrics.go
+// log10int64times1000 = log10(math.MaxInt64) * 1000, rounded up somewhat
+const log10int64times1000 = 19 * 1000
+
 // MakeDistSQLMetrics instantiates the metrics holder for DistSQL monitoring.
 func MakeDistSQLMetrics(histogramWindow time.Duration) DistSQLMetrics {
 	return DistSQLMetrics{
@@ -128,14 +132,25 @@ func MakeDistSQLMetrics(histogramWindow time.Duration) DistSQLMetrics {
 		ContendedQueriesCount: metric.NewCounter(metaContendedQueriesCount),
 		FlowsActive:           metric.NewGauge(metaFlowsActive),
 		FlowsTotal:            metric.NewCounter(metaFlowsTotal),
-		MaxBytesHist:          metric.NewHistogram(metaMemMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets),
-		CurBytesCount:         metric.NewGauge(metaMemCurBytes),
-		VecOpenFDs:            metric.NewGauge(metaVecOpenFDs),
-		CurDiskBytesCount:     metric.NewGauge(metaDiskCurBytes),
-		MaxDiskBytesHist:      metric.NewHistogram(metaDiskMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets),
-		QueriesSpilled:        metric.NewCounter(metaQueriesSpilled),
-		SpilledBytesWritten:   metric.NewCounter(metaSpilledBytesWritten),
-		SpilledBytesRead:      metric.NewCounter(metaSpilledBytesRead),
+		MaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaMemMaxBytes,
+			Duration: histogramWindow,
+			MaxVal:   log10int64times1000,
+			SigFigs:  3,
+			Buckets:  metric.MemoryUsage64MBBuckets,
+		}),
+		CurBytesCount:     metric.NewGauge(metaMemCurBytes),
+		VecOpenFDs:        metric.NewGauge(metaVecOpenFDs),
+		CurDiskBytesCount: metric.NewGauge(metaDiskCurBytes),
+		MaxDiskBytesHist: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: metaDiskMaxBytes,
+			Duration: histogramWindow,
+			MaxVal:   log10int64times1000,
+			SigFigs:  3,
+			Buckets:  metric.MemoryUsage64MBBuckets}),
+		QueriesSpilled:      metric.NewCounter(metaQueriesSpilled),
+		SpilledBytesWritten: metric.NewCounter(metaSpilledBytesWritten),
+		SpilledBytesRead:    metric.NewCounter(metaSpilledBytesRead),
 	}
 }
 
diff --git a/pkg/sql/executor_statement_metrics.go b/pkg/sql/executor_statement_metrics.go
index e9cfcdf12c95..3eb34a83eb75 100644
--- a/pkg/sql/executor_statement_metrics.go
+++ b/pkg/sql/executor_statement_metrics.go
@@ -37,11 +37,11 @@ type EngineMetrics struct {
 	SQLOptPlanCacheHits   *metric.Counter
 	SQLOptPlanCacheMisses *metric.Counter
 
-	DistSQLExecLatency    *metric.Histogram
-	SQLExecLatency        *metric.Histogram
-	DistSQLServiceLatency *metric.Histogram
-	SQLServiceLatency     *metric.Histogram
-	SQLTxnLatency         *metric.Histogram
+	DistSQLExecLatency    metric.IHistogram
+	SQLExecLatency        metric.IHistogram
+	DistSQLServiceLatency metric.IHistogram
+	SQLServiceLatency     metric.IHistogram
+	SQLTxnLatency         metric.IHistogram
 	SQLTxnsOpen           *metric.Gauge
 	SQLActiveStatements   *metric.Gauge
 	SQLContendedTxns      *metric.Counter
@@ -70,20 +70,20 @@ func (EngineMetrics) MetricStruct() {}
 
 // StatsMetrics groups metrics related to SQL Stats collection.
 type StatsMetrics struct {
-	SQLStatsMemoryMaxBytesHist  *metric.Histogram
+	SQLStatsMemoryMaxBytesHist  metric.IHistogram
 	SQLStatsMemoryCurBytesCount *metric.Gauge
 
-	ReportedSQLStatsMemoryMaxBytesHist  *metric.Histogram
+	ReportedSQLStatsMemoryMaxBytesHist  metric.IHistogram
 	ReportedSQLStatsMemoryCurBytesCount *metric.Gauge
 
 	DiscardedStatsCount *metric.Counter
 
 	SQLStatsFlushStarted  *metric.Counter
 	SQLStatsFlushFailure  *metric.Counter
-	SQLStatsFlushDuration *metric.Histogram
+	SQLStatsFlushDuration metric.IHistogram
 	SQLStatsRemovedRows   *metric.Counter
 
-	SQLTxnStatsCollectionOverhead *metric.Histogram
+	SQLTxnStatsCollectionOverhead metric.IHistogram
 }
 
 // StatsMetrics is part of the metric.Struct interface.
diff --git a/pkg/sql/mem_metrics.go b/pkg/sql/mem_metrics.go
index db2198d6bfd8..248aff4ddc88 100644
--- a/pkg/sql/mem_metrics.go
+++ b/pkg/sql/mem_metrics.go
@@ -19,7 +19,7 @@ import (
 // BaseMemoryMetrics contains a max histogram and a current count of the
 // bytes allocated by a sql endpoint.
 type BaseMemoryMetrics struct {
-	MaxBytesHist  *metric.Histogram
+	MaxBytesHist  metric.IHistogram
 	CurBytesCount *metric.Gauge
 }
 
@@ -30,9 +30,9 @@ type BaseMemoryMetrics struct {
 // - "internal" for activities related to leases, schema changes, etc.
 type MemoryMetrics struct {
 	BaseMemoryMetrics
-	TxnMaxBytesHist      *metric.Histogram
+	TxnMaxBytesHist      metric.IHistogram
 	TxnCurBytesCount     *metric.Gauge
-	SessionMaxBytesHist  *metric.Histogram
+	SessionMaxBytesHist  metric.IHistogram
 	SessionCurBytesCount *metric.Gauge
 }
 
@@ -41,6 +41,22 @@ func (MemoryMetrics) MetricStruct() {}
 
 var _ metric.Struct = MemoryMetrics{}
 
+// TODO(knz): Until #10014 is addressed, the UI graphs don't have a
+// log scale on the Y axis and the histograms are thus displayed using
+// a manual log scale: we store the logarithm in the value in the DB
+// and plot that logarithm in the UI.
+//
+// We could, but do not, store the full value in the DB and compute
+// the log in the UI, because the current histogram implementation
+// does not deal well with large maxima (#10015).
+//
+// Since the DB stores an integer, we scale the values by 1000 so that
+// a modicum of precision is restored when exponentiating the value.
+//
+
+// log10int64times1000 = log10(math.MaxInt64) * 1000, rounded up somewhat
+const log10int64times1000 = 19 * 1000
+
 func makeMemMetricMetadata(name, help string) metric.Metadata {
 	return metric.Metadata{
 		Name:        name,
@@ -57,7 +73,13 @@ func MakeBaseMemMetrics(endpoint string, histogramWindow time.Duration) BaseMemo
 	MetaMemMaxBytes := makeMemMetricMetadata(prefix+".max", "Memory usage per sql statement for "+endpoint)
 	MetaMemCurBytes := makeMemMetricMetadata(prefix+".current", "Current sql statement memory usage for "+endpoint)
 	return BaseMemoryMetrics{
-		MaxBytesHist:  metric.NewHistogram(MetaMemMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets),
+		MaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: MetaMemMaxBytes,
+			Duration: histogramWindow,
+			MaxVal:   log10int64times1000,
+			SigFigs:  3,
+			Buckets:  metric.MemoryUsage64MBBuckets,
+		}),
 		CurBytesCount: metric.NewGauge(MetaMemCurBytes),
 	}
 }
@@ -71,10 +93,20 @@ func MakeMemMetrics(endpoint string, histogramWindow time.Duration) MemoryMetric
 	MetaMemMaxSessionBytes := makeMemMetricMetadata(prefix+".session.max", "Memory usage per sql session for "+endpoint)
 	MetaMemSessionCurBytes := makeMemMetricMetadata(prefix+".session.current", "Current sql session memory usage for "+endpoint)
 	return MemoryMetrics{
-		BaseMemoryMetrics:    base,
-		TxnMaxBytesHist:      metric.NewHistogram(MetaMemMaxTxnBytes, histogramWindow, metric.MemoryUsage64MBBuckets),
-		TxnCurBytesCount:     metric.NewGauge(MetaMemTxnCurBytes),
-		SessionMaxBytesHist:  metric.NewHistogram(MetaMemMaxSessionBytes, histogramWindow, metric.MemoryUsage64MBBuckets),
+		BaseMemoryMetrics: base,
+		TxnMaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: MetaMemMaxTxnBytes,
+			Duration: histogramWindow,
+			MaxVal:   log10int64times1000,
+			SigFigs:  3,
+			Buckets:  metric.MemoryUsage64MBBuckets}),
+		TxnCurBytesCount: metric.NewGauge(MetaMemTxnCurBytes),
+		SessionMaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: MetaMemMaxSessionBytes,
+			Duration: histogramWindow,
+			MaxVal:   log10int64times1000,
+			SigFigs:  3,
+			Buckets:  metric.MemoryUsage64MBBuckets}),
 		SessionCurBytesCount: metric.NewGauge(MetaMemSessionCurBytes),
 	}
 
diff --git a/pkg/sql/pgwire/pre_serve.go b/pkg/sql/pgwire/pre_serve.go
index e9d2a975c29e..b87b61ad406b 100644
--- a/pkg/sql/pgwire/pre_serve.go
+++ b/pkg/sql/pgwire/pre_serve.go
@@ -183,7 +183,7 @@ type tenantIndependentMetrics struct {
 	PreServeBytesOutCount *metric.Counter
 	PreServeConnFailures  *metric.Counter
 	PreServeNewConns      *metric.Counter
-	PreServeMaxBytes      *metric.Histogram
+	PreServeMaxBytes      metric.IHistogram
 	PreServeCurBytes      *metric.Gauge
 }
 
@@ -193,8 +193,13 @@ func makeTenantIndependentMetrics(histogramWindow time.Duration) tenantIndepende
 		PreServeBytesOutCount: metric.NewCounter(MetaPreServeBytesOut),
 		PreServeNewConns:      metric.NewCounter(MetaPreServeNewConns),
 		PreServeConnFailures:  metric.NewCounter(MetaPreServeConnFailures),
-		PreServeMaxBytes:      metric.NewHistogram(MetaPreServeMaxBytes, histogramWindow, metric.MemoryUsage64MBBuckets),
-		PreServeCurBytes:      metric.NewGauge(MetaPreServeCurBytes),
+		PreServeMaxBytes: metric.NewHistogram(metric.HistogramOptions{
+			Metadata: MetaPreServeMaxBytes,
+			Duration: histogramWindow,
+			Buckets:  metric.MemoryUsage64MBBuckets,
+			Mode:     metric.HistogramModePrometheus,
+		}),
+		PreServeCurBytes: metric.NewGauge(MetaPreServeCurBytes),
 	}
 }
 
diff --git a/pkg/sql/pgwire/server.go b/pkg/sql/pgwire/server.go
index 1e07fc426b36..56d9a0b14ccc 100644
--- a/pkg/sql/pgwire/server.go
+++ b/pkg/sql/pgwire/server.go
@@ -256,7 +256,7 @@ type tenantSpecificMetrics struct {
 	BytesOutCount               *metric.Counter
 	Conns                       *metric.Gauge
 	NewConns                    *metric.Counter
-	ConnLatency                 *metric.Histogram
+	ConnLatency                 metric.IHistogram
 	ConnFailures                *metric.Counter
 	PGWireCancelTotalCount      *metric.Counter
 	PGWireCancelIgnoredCount    *metric.Counter
@@ -273,9 +273,12 @@ func makeTenantSpecificMetrics(
 		BytesOutCount: metric.NewCounter(MetaBytesOut),
 		Conns:         metric.NewGauge(MetaConns),
 		NewConns:      metric.NewCounter(MetaNewConns),
-		ConnLatency: metric.NewHistogram(
-			MetaConnLatency, histogramWindow, metric.IOLatencyBuckets,
-		),
+		ConnLatency: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: MetaConnLatency,
+			Duration: histogramWindow,
+			Buckets:  metric.IOLatencyBuckets,
+		}),
 		ConnFailures:                metric.NewCounter(MetaConnFailures),
 		PGWireCancelTotalCount:      metric.NewCounter(MetaPGWireCancelTotal),
 		PGWireCancelIgnoredCount:    metric.NewCounter(MetaPGWireCancelIgnored),
diff --git a/pkg/sql/sqlstats/persistedsqlstats/provider.go b/pkg/sql/sqlstats/persistedsqlstats/provider.go
index f6581e68eac1..3a95761a8499 100644
--- a/pkg/sql/sqlstats/persistedsqlstats/provider.go
+++ b/pkg/sql/sqlstats/persistedsqlstats/provider.go
@@ -43,7 +43,7 @@ type Config struct {
 
 	// Metrics.
 	FlushCounter   *metric.Counter
-	FlushDuration  *metric.Histogram
+	FlushDuration  metric.IHistogram
 	FailureCounter *metric.Counter
 
 	// Testing knobs.
diff --git a/pkg/sql/sqlstats/sslocal/sql_stats.go b/pkg/sql/sqlstats/sslocal/sql_stats.go
index 90c3350961eb..92fe6dcf3904 100644
--- a/pkg/sql/sqlstats/sslocal/sql_stats.go
+++ b/pkg/sql/sqlstats/sslocal/sql_stats.go
@@ -75,7 +75,7 @@ func newSQLStats(
 	uniqueStmtFingerprintLimit *settings.IntSetting,
 	uniqueTxnFingerprintLimit *settings.IntSetting,
 	curMemBytesCount *metric.Gauge,
-	maxMemBytesHist *metric.Histogram,
+	maxMemBytesHist metric.IHistogram,
 	insightsWriter insights.WriterProvider,
 	parentMon *mon.BytesMonitor,
 	flushTarget Sink,
diff --git a/pkg/sql/sqlstats/sslocal/sslocal_provider.go b/pkg/sql/sqlstats/sslocal/sslocal_provider.go
index b508533ada09..e375c9c00e27 100644
--- a/pkg/sql/sqlstats/sslocal/sslocal_provider.go
+++ b/pkg/sql/sqlstats/sslocal/sslocal_provider.go
@@ -35,7 +35,7 @@ func New(
 	maxStmtFingerprints *settings.IntSetting,
 	maxTxnFingerprints *settings.IntSetting,
 	curMemoryBytesCount *metric.Gauge,
-	maxMemoryBytesHist *metric.Histogram,
+	maxMemoryBytesHist metric.IHistogram,
 	insightsWriter insights.WriterProvider,
 	pool *mon.BytesMonitor,
 	reportingSink Sink,
diff --git a/pkg/sql/ttl/ttljob/ttljob_metrics.go b/pkg/sql/ttl/ttljob/ttljob_metrics.go
index d9b450a8d481..ae526f574226 100644
--- a/pkg/sql/ttl/ttljob/ttljob_metrics.go
+++ b/pkg/sql/ttl/ttljob/ttljob_metrics.go
@@ -96,41 +96,48 @@ func (m *RowLevelTTLAggMetrics) loadMetrics(labelMetrics bool, relation string)
 }
 
 func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Struct {
+	sigFigs := 2
 	b := aggmetric.MakeBuilder("relation")
 	ret := &RowLevelTTLAggMetrics{
-		SpanTotalDuration: b.Histogram(
-			metric.Metadata{
+		SpanTotalDuration: b.Histogram(metric.HistogramOptions{
+			Metadata: metric.Metadata{
 				Name:        "jobs.row_level_ttl.span_total_duration",
 				Help:        "Duration for processing a span during row level TTL.",
 				Measurement: "nanoseconds",
 				Unit:        metric.Unit_NANOSECONDS,
 				MetricType:  io_prometheus_client.MetricType_HISTOGRAM,
 			},
-			histogramWindowInterval,
-			metric.LongRunning60mLatencyBuckets,
-		),
-		SelectDuration: b.Histogram(
-			metric.Metadata{
+			MaxVal:   time.Hour.Nanoseconds(),
+			SigFigs:  sigFigs,
+			Duration: histogramWindowInterval,
+			Buckets:  metric.LongRunning60mLatencyBuckets,
+		}),
+		SelectDuration: b.Histogram(metric.HistogramOptions{
+			Metadata: metric.Metadata{
 				Name:        "jobs.row_level_ttl.select_duration",
 				Help:        "Duration for select requests during row level TTL.",
 				Measurement: "nanoseconds",
 				Unit:        metric.Unit_NANOSECONDS,
 				MetricType:  io_prometheus_client.MetricType_HISTOGRAM,
 			},
-			histogramWindowInterval,
-			metric.BatchProcessLatencyBuckets,
-		),
-		DeleteDuration: b.Histogram(
-			metric.Metadata{
+			MaxVal:   time.Minute.Nanoseconds(),
+			SigFigs:  sigFigs,
+			Duration: histogramWindowInterval,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+		}),
+		DeleteDuration: b.Histogram(metric.HistogramOptions{
+			Metadata: metric.Metadata{
 				Name:        "jobs.row_level_ttl.delete_duration",
 				Help:        "Duration for delete requests during row level TTL.",
 				Measurement: "nanoseconds",
 				Unit:        metric.Unit_NANOSECONDS,
 				MetricType:  io_prometheus_client.MetricType_HISTOGRAM,
 			},
-			histogramWindowInterval,
-			metric.BatchProcessLatencyBuckets,
-		),
+			MaxVal:   time.Minute.Nanoseconds(),
+			SigFigs:  sigFigs,
+			Duration: histogramWindowInterval,
+			Buckets:  metric.BatchProcessLatencyBuckets,
+		}),
 		RowSelections: b.Counter(
 			metric.Metadata{
 				Name:        "jobs.row_level_ttl.rows_selected",
diff --git a/pkg/util/admission/work_queue.go b/pkg/util/admission/work_queue.go
index be9b4fabc3d4..d6211bee01ab 100644
--- a/pkg/util/admission/work_queue.go
+++ b/pkg/util/admission/work_queue.go
@@ -1561,7 +1561,7 @@ type workQueueMetricsSingle struct {
 	Requested       *metric.Counter
 	Admitted        *metric.Counter
 	Errored         *metric.Counter
-	WaitDurations   *metric.Histogram
+	WaitDurations   metric.IHistogram
 	WaitQueueLength *metric.Gauge
 }
 
@@ -1622,9 +1622,12 @@ func makeWorkQueueMetricsSingle(name string) workQueueMetricsSingle {
 		Requested: metric.NewCounter(addName(name, requestedMeta)),
 		Admitted:  metric.NewCounter(addName(name, admittedMeta)),
 		Errored:   metric.NewCounter(addName(name, erroredMeta)),
-		WaitDurations: metric.NewHistogram(
-			addName(name, waitDurationsMeta), base.DefaultHistogramWindowInterval(), metric.IOLatencyBuckets,
-		),
+		WaitDurations: metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePreferHdrLatency,
+			Metadata: addName(name, waitDurationsMeta),
+			Duration: base.DefaultHistogramWindowInterval(),
+			Buckets:  metric.IOLatencyBuckets,
+		}),
 		WaitQueueLength: metric.NewGauge(addName(name, waitQueueLengthMeta)),
 	}
 }
diff --git a/pkg/util/metric/BUILD.bazel b/pkg/util/metric/BUILD.bazel
index 7d0473c33b40..e94462787b40 100644
--- a/pkg/util/metric/BUILD.bazel
+++ b/pkg/util/metric/BUILD.bazel
@@ -8,6 +8,7 @@ go_library(
     srcs = [
         "doc.go",
         "graphite_exporter.go",
+        "hdrhistogram.go",
         "histogram_buckets.go",
         "metric.go",
         "prometheus_exporter.go",
@@ -22,10 +23,13 @@ go_library(
     importpath = "github.com/cockroachdb/cockroach/pkg/util/metric",
     visibility = ["//visibility:public"],
     deps = [
+        "//pkg/util",
+        "//pkg/util/envutil",
         "//pkg/util/log",
         "//pkg/util/syncutil",
         "//pkg/util/timeutil",
         "@com_github_cockroachdb_errors//:errors",
+        "@com_github_codahale_hdrhistogram//:hdrhistogram",
         "@com_github_gogo_protobuf//proto",
         "@com_github_prometheus_client_golang//prometheus",
         "@com_github_prometheus_client_golang//prometheus/graphite",
diff --git a/pkg/util/metric/aggmetric/BUILD.bazel b/pkg/util/metric/aggmetric/BUILD.bazel
index daf77ba5d5ff..5984de7e5154 100644
--- a/pkg/util/metric/aggmetric/BUILD.bazel
+++ b/pkg/util/metric/aggmetric/BUILD.bazel
@@ -17,7 +17,6 @@ go_library(
         "@com_github_cockroachdb_errors//:errors",
         "@com_github_gogo_protobuf//proto",
         "@com_github_google_btree//:btree",
-        "@com_github_prometheus_client_golang//prometheus",
         "@com_github_prometheus_client_model//go",
     ],
 )
diff --git a/pkg/util/metric/aggmetric/agg_metric.go b/pkg/util/metric/aggmetric/agg_metric.go
index c9afb965d64f..ab5ad03ce5b6 100644
--- a/pkg/util/metric/aggmetric/agg_metric.go
+++ b/pkg/util/metric/aggmetric/agg_metric.go
@@ -15,7 +15,6 @@ package aggmetric
 
 import (
 	"strings"
-	"time"
 
 	"github.com/cockroachdb/cockroach/pkg/util/metric"
 	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
@@ -50,10 +49,8 @@ func (b Builder) Counter(metadata metric.Metadata) *AggCounter {
 }
 
 // Histogram constructs a new AggHistogram with the Builder's labels.
-func (b Builder) Histogram(
-	metadata metric.Metadata, duration time.Duration, buckets []float64,
-) *AggHistogram {
-	return NewHistogram(metadata, duration, buckets, b.labels...)
+func (b Builder) Histogram(opts metric.HistogramOptions) *AggHistogram {
+	return NewHistogram(opts, b.labels...)
 }
 
 type childSet struct {
diff --git a/pkg/util/metric/aggmetric/agg_metric_test.go b/pkg/util/metric/aggmetric/agg_metric_test.go
index 842a30504889..a139347d38db 100644
--- a/pkg/util/metric/aggmetric/agg_metric_test.go
+++ b/pkg/util/metric/aggmetric/agg_metric_test.go
@@ -13,6 +13,7 @@ package aggmetric_test
 import (
 	"bufio"
 	"bytes"
+	"fmt"
 	"sort"
 	"strings"
 	"testing"
@@ -63,10 +64,15 @@ func TestAggMetric(t *testing.T) {
 		Name: "baz_gauge",
 	}, "tenant_id")
 	r.AddMetric(f)
-
-	h := aggmetric.NewHistogram(metric.Metadata{
-		Name: "histo_gram",
-	}, base.DefaultHistogramWindowInterval(), metric.Count1KBuckets, "tenant_id")
+	h := aggmetric.NewHistogram(metric.HistogramOptions{
+		Metadata: metric.Metadata{
+			Name: "histo_gram",
+		},
+		Duration: base.DefaultHistogramWindowInterval(),
+		MaxVal:   100,
+		SigFigs:  1,
+		Buckets:  metric.Count1KBuckets,
+	}, "tenant_id")
 	r.AddMetric(h)
 
 	tenant2 := roachpb.MustMakeTenantID(2)
@@ -87,18 +93,28 @@ func TestAggMetric(t *testing.T) {
 		g3.Inc(3)
 		g3.Dec(1)
 		f2.Update(1.5)
+		fmt.Println(r)
 		f3.Update(2.5)
 		h2.RecordValue(10)
 		h3.RecordValue(90)
-		echotest.Require(t, writePrometheusMetrics(t), datapathutils.TestDataPath(t, "basic.txt"))
+		testFile := "basic.txt"
+		if metric.HdrEnabled() {
+			testFile = "basic_hdr.txt"
+		}
+		echotest.Require(t, writePrometheusMetrics(t), datapathutils.TestDataPath(t, testFile))
 	})
 
 	t.Run("destroy", func(t *testing.T) {
+		fmt.Println(r)
 		g3.Unlink()
 		c2.Unlink()
 		f3.Unlink()
 		h3.Unlink()
-		echotest.Require(t, writePrometheusMetrics(t), datapathutils.TestDataPath(t, "destroy.txt"))
+		testFile := "destroy.txt"
+		if metric.HdrEnabled() {
+			testFile = "destroy_hdr.txt"
+		}
+		echotest.Require(t, writePrometheusMetrics(t), datapathutils.TestDataPath(t, testFile))
 	})
 
 	t.Run("panic on already exists", func(t *testing.T) {
@@ -119,7 +135,11 @@ func TestAggMetric(t *testing.T) {
 		c2 = c.AddChild(tenant2.String())
 		f3 = f.AddChild(tenant3.String())
 		h3 = h.AddChild(tenant3.String())
-		echotest.Require(t, writePrometheusMetrics(t), datapathutils.TestDataPath(t, "add_after_destroy.txt"))
+		testFile := "add_after_destroy.txt"
+		if metric.HdrEnabled() {
+			testFile = "add_after_destroy_hdr.txt"
+		}
+		echotest.Require(t, writePrometheusMetrics(t), datapathutils.TestDataPath(t, testFile))
 	})
 
 	t.Run("panic on label length mismatch", func(t *testing.T) {
@@ -135,8 +155,13 @@ func TestAggMetricBuilder(t *testing.T) {
 	c := b.Counter(metric.Metadata{Name: "foo_counter"})
 	g := b.Gauge(metric.Metadata{Name: "bar_gauge"})
 	f := b.GaugeFloat64(metric.Metadata{Name: "baz_gauge"})
-	h := b.Histogram(metric.Metadata{Name: "histo_gram"},
-		base.DefaultHistogramWindowInterval(), metric.Count1KBuckets)
+	h := b.Histogram(metric.HistogramOptions{
+		Metadata: metric.Metadata{Name: "histo_gram"},
+		Duration: base.DefaultHistogramWindowInterval(),
+		MaxVal:   100,
+		SigFigs:  1,
+		Buckets:  metric.Count1KBuckets,
+	})
 
 	for i := 5; i < 10; i++ {
 		tenantLabel := roachpb.MustMakeTenantID(uint64(i)).String()
diff --git a/pkg/util/metric/aggmetric/histogram.go b/pkg/util/metric/aggmetric/histogram.go
index 68d30fab17a2..1c07cc1ef3d3 100644
--- a/pkg/util/metric/aggmetric/histogram.go
+++ b/pkg/util/metric/aggmetric/histogram.go
@@ -11,10 +11,7 @@
 package aggmetric
 
 import (
-	"time"
-
 	"github.com/cockroachdb/cockroach/pkg/util/metric"
-	"github.com/prometheus/client_golang/prometheus"
 	io_prometheus_client "github.com/prometheus/client_model/go"
 )
 
@@ -23,8 +20,8 @@ import (
 // children, while its children are additionally exported to prometheus via the
 // PrometheusIterable interface.
 type AggHistogram struct {
-	h      metric.Histogram
-	create func() *metric.Histogram
+	h      metric.IHistogram
+	create func() metric.IHistogram
 	childSet
 }
 
@@ -34,14 +31,12 @@ var _ metric.PrometheusExportable = (*AggHistogram)(nil)
 var _ metric.WindowedHistogram = (*AggHistogram)(nil)
 
 // NewHistogram constructs a new AggHistogram.
-func NewHistogram(
-	metadata metric.Metadata, duration time.Duration, buckets []float64, childLabels ...string,
-) *AggHistogram {
-	create := func() *metric.Histogram {
-		return metric.NewHistogram(metadata, duration, buckets)
+func NewHistogram(opts metric.HistogramOptions, childLabels ...string) *AggHistogram {
+	create := func() metric.IHistogram {
+		return metric.NewHistogram(opts)
 	}
 	a := &AggHistogram{
-		h:      *create(),
+		h:      create(),
 		create: create,
 	}
 	a.init(childLabels)
@@ -96,19 +91,13 @@ func (a *AggHistogram) ToPrometheusMetric() *io_prometheus_client.Metric {
 	return a.h.ToPrometheusMetric()
 }
 
-// Windowed returns a copy of the current windowed histogram data and its
-// rotation interval.
-func (a *AggHistogram) Windowed() prometheus.Histogram {
-	return a.h.Windowed()
-}
-
 // AddChild adds a Counter to this AggCounter. This method panics if a Counter
 // already exists for this set of labelVals.
 func (a *AggHistogram) AddChild(labelVals ...string) *Histogram {
 	child := &Histogram{
 		parent:           a,
 		labelValuesSlice: labelValuesSlice(labelVals),
-		h:                *a.create(),
+		h:                a.create(),
 	}
 	a.add(child)
 	return child
@@ -121,7 +110,7 @@ func (a *AggHistogram) AddChild(labelVals ...string) *Histogram {
 type Histogram struct {
 	parent *AggHistogram
 	labelValuesSlice
-	h metric.Histogram
+	h metric.IHistogram
 }
 
 // ToPrometheusMetric constructs a prometheus metric for this Histogram.
diff --git a/pkg/util/metric/aggmetric/testdata/add_after_destroy_hdr.txt b/pkg/util/metric/aggmetric/testdata/add_after_destroy_hdr.txt
new file mode 100644
index 000000000000..ffcbd4571a33
--- /dev/null
+++ b/pkg/util/metric/aggmetric/testdata/add_after_destroy_hdr.txt
@@ -0,0 +1,23 @@
+echo
+----
+bar_gauge 4
+bar_gauge{tenant_id="2"} 2
+bar_gauge{tenant_id="3"} 0
+baz_gauge 4
+baz_gauge{tenant_id="2"} 1.5
+baz_gauge{tenant_id="3"} 0
+foo_counter 6
+foo_counter{tenant_id="2"} 0
+foo_counter{tenant_id="3"} 4
+histo_gram_bucket{le="+Inf"} 2
+histo_gram_bucket{le="10"} 1
+histo_gram_bucket{le="91"} 2
+histo_gram_bucket{tenant_id="2",le="+Inf"} 1
+histo_gram_bucket{tenant_id="2",le="10"} 1
+histo_gram_bucket{tenant_id="3",le="+Inf"} 0
+histo_gram_count 2
+histo_gram_count{tenant_id="2"} 1
+histo_gram_count{tenant_id="3"} 0
+histo_gram_sum 101
+histo_gram_sum{tenant_id="2"} 10
+histo_gram_sum{tenant_id="3"} 0
\ No newline at end of file
diff --git a/pkg/util/metric/aggmetric/testdata/basic_hdr.txt b/pkg/util/metric/aggmetric/testdata/basic_hdr.txt
new file mode 100644
index 000000000000..a796b8ef3406
--- /dev/null
+++ b/pkg/util/metric/aggmetric/testdata/basic_hdr.txt
@@ -0,0 +1,24 @@
+echo
+----
+bar_gauge 4
+bar_gauge{tenant_id="2"} 2
+bar_gauge{tenant_id="3"} 2
+baz_gauge 4
+baz_gauge{tenant_id="2"} 1.5
+baz_gauge{tenant_id="3"} 2.5
+foo_counter 6
+foo_counter{tenant_id="2"} 2
+foo_counter{tenant_id="3"} 4
+histo_gram_bucket{le="+Inf"} 2
+histo_gram_bucket{le="10"} 1
+histo_gram_bucket{le="91"} 2
+histo_gram_bucket{tenant_id="2",le="+Inf"} 1
+histo_gram_bucket{tenant_id="2",le="10"} 1
+histo_gram_bucket{tenant_id="3",le="+Inf"} 1
+histo_gram_bucket{tenant_id="3",le="91"} 1
+histo_gram_count 2
+histo_gram_count{tenant_id="2"} 1
+histo_gram_count{tenant_id="3"} 1
+histo_gram_sum 101
+histo_gram_sum{tenant_id="2"} 10
+histo_gram_sum{tenant_id="3"} 91
\ No newline at end of file
diff --git a/pkg/util/metric/aggmetric/testdata/destroy_hdr.txt b/pkg/util/metric/aggmetric/testdata/destroy_hdr.txt
new file mode 100644
index 000000000000..dd17b7aae5bb
--- /dev/null
+++ b/pkg/util/metric/aggmetric/testdata/destroy_hdr.txt
@@ -0,0 +1,17 @@
+echo
+----
+bar_gauge 4
+bar_gauge{tenant_id="2"} 2
+baz_gauge 4
+baz_gauge{tenant_id="2"} 1.5
+foo_counter 6
+foo_counter{tenant_id="3"} 4
+histo_gram_bucket{le="+Inf"} 2
+histo_gram_bucket{le="10"} 1
+histo_gram_bucket{le="91"} 2
+histo_gram_bucket{tenant_id="2",le="+Inf"} 1
+histo_gram_bucket{tenant_id="2",le="10"} 1
+histo_gram_count 2
+histo_gram_count{tenant_id="2"} 1
+histo_gram_sum 101
+histo_gram_sum{tenant_id="2"} 10
\ No newline at end of file
diff --git a/pkg/util/metric/hdrhistogram.go b/pkg/util/metric/hdrhistogram.go
new file mode 100644
index 000000000000..e33582b9c2e3
--- /dev/null
+++ b/pkg/util/metric/hdrhistogram.go
@@ -0,0 +1,248 @@
+// Copyright 2023 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package metric
+
+import (
+	"time"
+
+	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
+	"github.com/codahale/hdrhistogram"
+	prometheusgo "github.com/prometheus/client_model/go"
+)
+
+const (
+	// HdrHistogramMaxLatency is the maximum value tracked in latency histograms. Higher
+	// values will be recorded as this value instead.
+	HdrHistogramMaxLatency = 10 * time.Second
+
+	// The number of histograms to keep in rolling window.
+	hdrHistogramHistWrapNum = 2 // TestSampleInterval is passed to histograms during tests which don't
+)
+
+// A HdrHistogram collects observed values by keeping bucketed counts. For
+// convenience, internally two sets of buckets are kept: A cumulative set (i.e.
+// data is never evicted) and a windowed set (which keeps only recently
+// collected samples).
+//
+// Top-level methods generally apply to the cumulative buckets; the windowed
+// variant is exposed through the Windowed method.
+//
+// TODO(#96357): remove HdrHistogram model entirely once the Prometheus
+// backed histogram and its bucket boundaries have been reliably proven in
+// production.
+type HdrHistogram struct {
+	Metadata
+	maxVal int64
+	mu     struct {
+		syncutil.Mutex
+		cumulative *hdrhistogram.Histogram
+		*tickHelper
+		sliding *hdrhistogram.WindowedHistogram
+	}
+}
+
+var _ IHistogram = &HdrHistogram{}
+var _ PrometheusExportable = &HdrHistogram{}
+var _ Iterable = &HdrHistogram{}
+
+// NewHdrHistogram initializes a given Histogram. The contained windowed histogram
+// rotates every 'duration'; both the windowed and the cumulative histogram
+// track nonnegative values up to 'maxVal' with 'sigFigs' decimal points of
+// precision.
+func NewHdrHistogram(
+	metadata Metadata, duration time.Duration, maxVal int64, sigFigs int,
+) *HdrHistogram {
+	h := &HdrHistogram{
+		Metadata: metadata,
+		maxVal:   maxVal,
+	}
+	wHist := hdrhistogram.NewWindowed(hdrHistogramHistWrapNum, 0, maxVal, sigFigs)
+	h.mu.cumulative = hdrhistogram.New(0, maxVal, sigFigs)
+	h.mu.sliding = wHist
+	h.mu.tickHelper = &tickHelper{
+		nextT:        now(),
+		tickInterval: duration / hdrHistogramHistWrapNum,
+		onTick: func() {
+			wHist.Rotate()
+		},
+	}
+	return h
+}
+
+// NewHdrLatency is a convenience function which returns a histogram with
+// suitable defaults for latency tracking. Values are expressed in ns,
+// are truncated into the interval [0, HdrHistogramMaxLatency] and are recorded
+// with one digit of precision (i.e. errors of <10ms at 100ms, <6s at 60s).
+//
+// The windowed portion of the Histogram retains values for approximately
+// histogramWindow.
+func NewHdrLatency(metadata Metadata, histogramWindow time.Duration) *HdrHistogram {
+	return NewHdrHistogram(
+		metadata, histogramWindow, HdrHistogramMaxLatency.Nanoseconds(), 1,
+	)
+}
+
+// RecordValue adds the given value to the histogram. Recording a value in
+// excess of the configured maximum value for that histogram results in
+// recording the maximum value instead.
+func (h *HdrHistogram) RecordValue(v int64) {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	if h.mu.sliding.Current.RecordValue(v) != nil {
+		_ = h.mu.sliding.Current.RecordValue(h.maxVal)
+	}
+	if h.mu.cumulative.RecordValue(v) != nil {
+		_ = h.mu.cumulative.RecordValue(h.maxVal)
+	}
+}
+
+// TotalCount returns the (cumulative) number of samples.
+func (h *HdrHistogram) TotalCount() int64 {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	return h.mu.cumulative.TotalCount()
+}
+
+// Min returns the minimum.
+func (h *HdrHistogram) Min() int64 {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	return h.mu.cumulative.Min()
+}
+
+// Inspect calls the closure with the empty string and the receiver.
+func (h *HdrHistogram) Inspect(f func(interface{})) {
+	h.mu.Lock()
+	maybeTick(h.mu.tickHelper)
+	h.mu.Unlock()
+	f(h)
+}
+
+// GetType returns the prometheus type enum for this metric.
+func (h *HdrHistogram) GetType() *prometheusgo.MetricType {
+	return prometheusgo.MetricType_HISTOGRAM.Enum()
+}
+
+// ToPrometheusMetric returns a filled-in prometheus metric of the right type.
+func (h *HdrHistogram) ToPrometheusMetric() *prometheusgo.Metric {
+	hist := &prometheusgo.Histogram{}
+
+	h.mu.Lock()
+	maybeTick(h.mu.tickHelper)
+	bars := h.mu.cumulative.Distribution()
+	hist.Bucket = make([]*prometheusgo.Bucket, 0, len(bars))
+
+	var cumCount uint64
+	var sum float64
+	for _, bar := range bars {
+		if bar.Count == 0 {
+			// No need to expose trivial buckets.
+			continue
+		}
+		upperBound := float64(bar.To)
+		sum += upperBound * float64(bar.Count)
+
+		cumCount += uint64(bar.Count)
+		curCumCount := cumCount // need a new alloc thanks to bad proto code
+
+		hist.Bucket = append(hist.Bucket, &prometheusgo.Bucket{
+			CumulativeCount: &curCumCount,
+			UpperBound:      &upperBound,
+		})
+	}
+	hist.SampleCount = &cumCount
+	hist.SampleSum = &sum // can do better here; we approximate in the loop
+	h.mu.Unlock()
+
+	return &prometheusgo.Metric{
+		Histogram: hist,
+	}
+}
+
+// TotalCountWindowed implements the WindowedHistogram interface.
+func (h *HdrHistogram) TotalCountWindowed() int64 {
+	return int64(h.ToPrometheusMetricWindowed().Histogram.GetSampleCount())
+}
+
+// TotalSumWindowed implements the WindowedHistogram interface.
+func (h *HdrHistogram) TotalSumWindowed() float64 {
+	return h.ToPrometheusMetricWindowed().Histogram.GetSampleSum()
+}
+
+func (h *HdrHistogram) toPrometheusMetricWindowedLocked() *prometheusgo.Metric {
+	hist := &prometheusgo.Histogram{}
+
+	maybeTick(h.mu.tickHelper)
+	bars := h.mu.sliding.Current.Distribution()
+	hist.Bucket = make([]*prometheusgo.Bucket, 0, len(bars))
+
+	var cumCount uint64
+	var sum float64
+	for _, bar := range bars {
+		if bar.Count == 0 {
+			// No need to expose trivial buckets.
+			continue
+		}
+		upperBound := float64(bar.To)
+		sum += upperBound * float64(bar.Count)
+
+		cumCount += uint64(bar.Count)
+		curCumCount := cumCount // need a new alloc thanks to bad proto code
+
+		hist.Bucket = append(hist.Bucket, &prometheusgo.Bucket{
+			CumulativeCount: &curCumCount,
+			UpperBound:      &upperBound,
+		})
+	}
+	hist.SampleCount = &cumCount
+	hist.SampleSum = &sum // can do better here; we approximate in the loop
+
+	return &prometheusgo.Metric{
+		Histogram: hist,
+	}
+}
+
+// ToPrometheusMetricWindowed returns a filled-in prometheus metric of the right type.
+func (h *HdrHistogram) ToPrometheusMetricWindowed() *prometheusgo.Metric {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	return h.toPrometheusMetricWindowedLocked()
+}
+
+// GetMetadata returns the metric's metadata including the Prometheus
+// MetricType.
+func (h *HdrHistogram) GetMetadata() Metadata {
+	baseMetadata := h.Metadata
+	baseMetadata.MetricType = prometheusgo.MetricType_HISTOGRAM
+	return baseMetadata
+}
+
+func (h *HdrHistogram) ValueAtQuantileWindowed(q float64) float64 {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	return ValueAtQuantileWindowed(h.toPrometheusMetricWindowedLocked().Histogram, q)
+}
+
+func (h *HdrHistogram) Mean() float64 {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	return h.mu.cumulative.Mean()
+}
+
+func (h *HdrHistogram) TotalSum() float64 {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	return h.ToPrometheusMetric().GetSummary().GetSampleSum()
+}
diff --git a/pkg/util/metric/metric.go b/pkg/util/metric/metric.go
index a6b794422e4b..afd19cf83638 100644
--- a/pkg/util/metric/metric.go
+++ b/pkg/util/metric/metric.go
@@ -17,6 +17,8 @@ import (
 	"sync/atomic"
 	"time"
 
+	"github.com/cockroachdb/cockroach/pkg/util"
+	"github.com/cockroachdb/cockroach/pkg/util/envutil"
 	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
 	"github.com/gogo/protobuf/proto"
@@ -25,11 +27,9 @@ import (
 	"github.com/rcrowley/go-metrics"
 )
 
-const (
-	// TestSampleInterval is passed to histograms during tests which don't
-	// want to concern themselves with supplying a "correct" interval.
-	TestSampleInterval = time.Duration(math.MaxInt64)
-)
+// TestSampleInterval is passed to histograms during tests which don't
+// want to concern themselves with supplying a "correct" interval.
+const TestSampleInterval = time.Duration(math.MaxInt64)
 
 // Iterable provides a method for synchronized access to interior objects.
 type Iterable interface {
@@ -176,10 +176,81 @@ func maybeTick(m periodic) {
 	}
 }
 
+// useHdrHistogramsEnvVar can be used to switch all histograms to use the
+// legacy HDR histograms (except for those that explicitly force the use
+// of the newer Prometheus via HistogramModePrometheus). HDR Histograms
+// dynamically generate bucket boundaries, which can lead to hundreds of
+// buckets. This can cause performance issues with timeseries databases
+// like Prometheus.
+const useHdrHistogramsEnvVar = "COCKROACH_ENABLE_HDR_HISTOGRAMS"
+
+var hdrEnabled = util.ConstantWithMetamorphicTestBool(useHdrHistogramsEnvVar, envutil.EnvOrDefaultBool(useHdrHistogramsEnvVar, false))
+
+// HdrEnabled returns whether or not the HdrHistogram model is enabled
+// in the metric package. Primarily useful in tests where we want to validate
+// different outputs depending on whether or not HDR is enabled.
+func HdrEnabled() bool {
+	return hdrEnabled
+}
+
+type HistogramMode byte
+
+const (
+	// HistogramModePrometheus will force the constructed histogram to use
+	// the Prometheus histogram model, regardless of the value of
+	// useHdrHistogramsEnvVar. This option should be used for all
+	// newly defined histograms moving forward.
+	//
+	// NB: If neither this mode nor the HistogramModePreferHdrLatency mode
+	// is set, MaxVal and SigFigs must be defined to maintain backwards
+	// compatibility with the legacy HdrHistogram model.
+	HistogramModePrometheus HistogramMode = iota + 1
+	// HistogramModePreferHdrLatency will cause the returned histogram to
+	// use the HdrHistgoram model and be configured with suitable defaults
+	// for latency tracking iff useHdrHistogramsEnvVar is enabled.
+	//
+	// NB: If this option is set, no MaxVal or SigFigs are required in the
+	// HistogramOptions to maintain backwards compatibility with the legacy
+	// HdrHistogram model, since suitable defaults are used for both.
+	HistogramModePreferHdrLatency
+)
+
+type HistogramOptions struct {
+	// Metadata is the metric Metadata associated with the histogram.
+	Metadata Metadata
+	// Duration is the histogram's window duration.
+	Duration time.Duration
+	// MaxVal is only relevant to the HdrHistogram, and represents the
+	// highest trackable value in the resulting histogram buckets.
+	MaxVal int64
+	// SigFigs is only relevant to the HdrHistogram, and represents
+	// the number of significant figures to be used to determine the
+	// degree of accuracy used in measurements.
+	SigFigs int
+	// Buckets are only relevant to Prometheus histograms, and represent
+	// the pre-defined histogram bucket boundaries to be used.
+	Buckets []float64
+	// Mode defines the type of histogram to be used. See individual
+	// comments on each HistogramMode value for details.
+	Mode HistogramMode
+}
+
+func NewHistogram(opt HistogramOptions) IHistogram {
+	if hdrEnabled && opt.Mode != HistogramModePrometheus {
+		if opt.Mode == HistogramModePreferHdrLatency {
+			return NewHdrLatency(opt.Metadata, opt.Duration)
+		} else {
+			return NewHdrHistogram(opt.Metadata, opt.Duration, opt.MaxVal, opt.SigFigs)
+		}
+	} else {
+		return newHistogram(opt.Metadata, opt.Duration, opt.Buckets)
+	}
+}
+
 // NewHistogram is a prometheus-backed histogram. Depending on the value of
 // opts.Buckets, this is suitable for recording any kind of quantity. Common
 // sensible choices are {IO,Network}LatencyBuckets.
-func NewHistogram(meta Metadata, windowDuration time.Duration, buckets []float64) *Histogram {
+func newHistogram(meta Metadata, windowDuration time.Duration, buckets []float64) *Histogram {
 	// TODO(obs-inf): prometheus supports labeled histograms but they require more
 	// plumbing and don't fit into the PrometheusObservable interface any more.
 	opts := prometheus.HistogramOpts{
@@ -236,6 +307,21 @@ type Histogram struct {
 	}
 }
 
+type IHistogram interface {
+	Iterable
+	PrometheusExportable
+	WindowedHistogram
+
+	RecordValue(n int64)
+	TotalCount() int64
+	TotalSum() float64
+	TotalCountWindowed() int64
+	TotalSumWindowed() float64
+	Mean() float64
+}
+
+var _ IHistogram = &Histogram{}
+
 func (h *Histogram) nextTick() time.Time {
 	h.windowed.RLock()
 	defer h.windowed.RUnlock()
@@ -326,7 +412,8 @@ func (h *Histogram) TotalSumWindowed() float64 {
 
 // Mean returns the (cumulative) mean of samples.
 func (h *Histogram) Mean() float64 {
-	return h.TotalSum() / float64(h.TotalCount())
+	pm := h.ToPrometheusMetric()
+	return pm.Histogram.GetSampleSum() / float64(pm.Histogram.GetSampleCount())
 }
 
 // ValueAtQuantileWindowed implements the WindowedHistogram interface.
diff --git a/pkg/util/metric/metric_ext_test.go b/pkg/util/metric/metric_ext_test.go
index a06cdfc0cb63..60c14ba25bc5 100644
--- a/pkg/util/metric/metric_ext_test.go
+++ b/pkg/util/metric/metric_ext_test.go
@@ -25,7 +25,12 @@ func TestHistogramPrometheus(t *testing.T) {
 	// Regression test against https://github.com/cockroachdb/cockroach/pull/88331.
 	// The output includes buckets for which the upper bound equals the previous
 	// bucket's upper bound.
-	h := metric.NewHistogram(metric.Metadata{}, time.Second, []float64{1, 2, 3, 4, 5, 6, 10, 20, 30})
+	h := metric.NewHistogram(metric.HistogramOptions{
+		Mode:     metric.HistogramModePrometheus,
+		Metadata: metric.Metadata{},
+		Duration: time.Second,
+		Buckets:  []float64{1, 2, 3, 4, 5, 6, 10, 20, 30},
+	})
 	h.RecordValue(1)
 	h.RecordValue(5)
 	h.RecordValue(5)
diff --git a/pkg/util/metric/metric_test.go b/pkg/util/metric/metric_test.go
index 2a273bc7fa85..d8a451ec06ad 100644
--- a/pkg/util/metric/metric_test.go
+++ b/pkg/util/metric/metric_test.go
@@ -112,17 +112,18 @@ func TestHistogram(t *testing.T) {
 		return &n
 	}
 
-	h := NewHistogram(
-		Metadata{},
-		time.Hour,
-		[]float64{
+	h := NewHistogram(HistogramOptions{
+		Mode:     HistogramModePrometheus,
+		Metadata: Metadata{},
+		Duration: time.Hour,
+		Buckets: []float64{
 			1.0,
 			5.0,
 			10.0,
 			25.0,
 			100.0,
 		},
-	)
+	})
 
 	// should return 0 if no observations are made
 	require.Equal(t, 0.0, h.ValueAtQuantileWindowed(0))
@@ -236,23 +237,24 @@ func TestNewHistogramRotate(t *testing.T) {
 	defer TestingSetNow(nil)()
 	setNow(0)
 
-	h := NewHistogram(emptyMetadata, 10*time.Second, nil)
+	h := NewHistogram(HistogramOptions{
+		Mode:     HistogramModePrometheus,
+		Metadata: emptyMetadata,
+		Duration: 10 * time.Second,
+		Buckets:  nil,
+	})
 	for i := 0; i < 4; i++ {
 		// Windowed histogram is initially empty.
 		h.Inspect(func(interface{}) {}) // triggers ticking
-		var m prometheusgo.Metric
-		require.NoError(t, h.Windowed().Write(&m))
-		require.Zero(t, *m.Histogram.SampleSum)
+		require.Zero(t, h.TotalSumWindowed())
 		// But cumulative histogram has history (if i > 0).
-		require.EqualValues(t, i, *h.ToPrometheusMetric().Histogram.SampleCount)
+		require.EqualValues(t, i, h.TotalCount())
 
 		// Add a measurement and verify it's there.
 		{
 			h.RecordValue(12345)
 			f := float64(12345)
-			var m prometheusgo.Metric
-			require.NoError(t, h.Windowed().Write(&m))
-			require.Equal(t, *m.Histogram.SampleSum, f)
+			require.Equal(t, h.TotalSumWindowed(), f)
 		}
 		// Tick. This rotates the histogram.
 		setNow(time.Duration(i+1) * 10 * time.Second)
diff --git a/pkg/util/metric/registry_test.go b/pkg/util/metric/registry_test.go
index 5d2b2a6c4e88..48f4aba216bd 100644
--- a/pkg/util/metric/registry_test.go
+++ b/pkg/util/metric/registry_test.go
@@ -76,14 +76,19 @@ func TestRegistry(t *testing.T) {
 	topCounter := NewCounter(Metadata{Name: "top.counter"})
 	r.AddMetric(topCounter)
 
-	r.AddMetric(NewHistogram(Metadata{Name: "top.histogram"}, time.Minute, Count1KBuckets))
+	r.AddMetric(NewHistogram(HistogramOptions{
+		Mode:     HistogramModePrometheus,
+		Metadata: Metadata{Name: "top.histogram"},
+		Duration: time.Minute,
+		Buckets:  Count1KBuckets,
+	}))
 
 	r.AddMetric(NewGauge(Metadata{Name: "bottom.gauge"}))
 	ms := &struct {
 		StructGauge         *Gauge
 		StructGauge64       *GaugeFloat64
 		StructCounter       *Counter
-		StructHistogram     *Histogram
+		StructHistogram     IHistogram
 		NestedStructGauge   NestedStruct
 		ArrayStructCounters [4]*Counter
 		// Ensure that nil struct values in arrays are safe.
@@ -92,7 +97,7 @@ func TestRegistry(t *testing.T) {
 		privateStructGauge            *Gauge
 		privateStructGauge64          *GaugeFloat64
 		privateStructCounter          *Counter
-		privateStructHistogram        *Histogram
+		privateStructHistogram        IHistogram
 		privateNestedStructGauge      NestedStruct
 		privateArrayStructCounters    [2]*Counter
 		NotAMetric                    int
@@ -100,10 +105,15 @@ func TestRegistry(t *testing.T) {
 		ReallyNotAMetric              *Registry
 		DefinitelyNotAnArrayOfMetrics [2]int
 	}{
-		StructGauge:     NewGauge(Metadata{Name: "struct.gauge"}),
-		StructGauge64:   NewGaugeFloat64(Metadata{Name: "struct.gauge64"}),
-		StructCounter:   NewCounter(Metadata{Name: "struct.counter"}),
-		StructHistogram: NewHistogram(Metadata{Name: "struct.histogram"}, time.Minute, Count1KBuckets),
+		StructGauge:   NewGauge(Metadata{Name: "struct.gauge"}),
+		StructGauge64: NewGaugeFloat64(Metadata{Name: "struct.gauge64"}),
+		StructCounter: NewCounter(Metadata{Name: "struct.counter"}),
+		StructHistogram: NewHistogram(HistogramOptions{
+			Mode:     HistogramModePrometheus,
+			Metadata: Metadata{Name: "struct.histogram"},
+			Duration: time.Minute,
+			Buckets:  Count1KBuckets,
+		}),
 		NestedStructGauge: NestedStruct{
 			NestedStructGauge: NewGauge(Metadata{Name: "nested.struct.gauge"}),
 		},
@@ -119,10 +129,15 @@ func TestRegistry(t *testing.T) {
 				NestedStructGauge: NewGauge(Metadata{Name: "nested.struct.array.1.gauge"}),
 			},
 		},
-		privateStructGauge:     NewGauge(Metadata{Name: "private.struct.gauge"}),
-		privateStructGauge64:   NewGaugeFloat64(Metadata{Name: "private.struct.gauge64"}),
-		privateStructCounter:   NewCounter(Metadata{Name: "private.struct.counter"}),
-		privateStructHistogram: NewHistogram(Metadata{Name: "private.struct.histogram"}, time.Minute, Count1KBuckets),
+		privateStructGauge:   NewGauge(Metadata{Name: "private.struct.gauge"}),
+		privateStructGauge64: NewGaugeFloat64(Metadata{Name: "private.struct.gauge64"}),
+		privateStructCounter: NewCounter(Metadata{Name: "private.struct.counter"}),
+		privateStructHistogram: NewHistogram(HistogramOptions{
+			Mode:     HistogramModePrometheus,
+			Metadata: Metadata{Name: "private.struct.histogram"},
+			Duration: time.Minute,
+			Buckets:  Count1KBuckets,
+		}),
 		privateNestedStructGauge: NestedStruct{
 			NestedStructGauge: NewGauge(Metadata{Name: "private.nested.struct.gauge"}),
 		},
diff --git a/pkg/util/mon/bytes_usage.go b/pkg/util/mon/bytes_usage.go
index 4ea0489afa20..5ba2493730dd 100644
--- a/pkg/util/mon/bytes_usage.go
+++ b/pkg/util/mon/bytes_usage.go
@@ -195,7 +195,7 @@ type BytesMonitor struct {
 
 		// maxBytesHist is the metric object used to track the high watermark of bytes
 		// allocated by the monitor during its lifetime.
-		maxBytesHist *metric.Histogram
+		maxBytesHist metric.IHistogram
 	}
 
 	// name identifies this monitor in logging messages.
@@ -273,7 +273,7 @@ func NewMonitor(
 	name redact.RedactableString,
 	res Resource,
 	curCount *metric.Gauge,
-	maxHist *metric.Histogram,
+	maxHist metric.IHistogram,
 	increment int64,
 	noteworthy int64,
 	settings *cluster.Settings,
@@ -289,7 +289,7 @@ func NewMonitorWithLimit(
 	res Resource,
 	limit int64,
 	curCount *metric.Gauge,
-	maxHist *metric.Histogram,
+	maxHist metric.IHistogram,
 	increment int64,
 	noteworthy int64,
 	settings *cluster.Settings,
@@ -386,7 +386,7 @@ func NewUnlimitedMonitor(
 	name redact.RedactableString,
 	res Resource,
 	curCount *metric.Gauge,
-	maxHist *metric.Histogram,
+	maxHist metric.IHistogram,
 	noteworthy int64,
 	settings *cluster.Settings,
 ) *BytesMonitor {
@@ -485,7 +485,7 @@ func (mm *BytesMonitor) AllocBytes() int64 {
 }
 
 // SetMetrics sets the metric objects for the monitor.
-func (mm *BytesMonitor) SetMetrics(curCount *metric.Gauge, maxHist *metric.Histogram) {
+func (mm *BytesMonitor) SetMetrics(curCount *metric.Gauge, maxHist metric.IHistogram) {
 	mm.mu.Lock()
 	defer mm.mu.Unlock()
 	mm.mu.curBytesCount = curCount
diff --git a/pkg/util/schedulerlatency/scheduler_latency_test.go b/pkg/util/schedulerlatency/scheduler_latency_test.go
index 1ec15ed46987..2539e6f62adb 100644
--- a/pkg/util/schedulerlatency/scheduler_latency_test.go
+++ b/pkg/util/schedulerlatency/scheduler_latency_test.go
@@ -170,7 +170,12 @@ func TestComputeSchedulerPercentileAgainstPrometheus(t *testing.T) {
 		}
 
 		// Compare values against metric.Histogram (prometheus-based implementation)
-		promhist := metric.NewHistogram(metric.Metadata{}, time.Hour, hist.Buckets)
+		promhist := metric.NewHistogram(metric.HistogramOptions{
+			Mode:     metric.HistogramModePrometheus,
+			Metadata: metric.Metadata{},
+			Duration: time.Hour,
+			Buckets:  hist.Buckets,
+		})
 		for i := 0; i < len(hist.Counts); i++ {
 			for j := 0; j < int(hist.Counts[i]); j++ {
 				// Since the scheduler buckets are non-inclusive of Upper Bound and prometheus

From 4b32a98cbdd44243297847e2291f731554f2fe1d Mon Sep 17 00:00:00 2001
From: Alex Barganier <abarganier@cockroachlabs.com>
Date: Mon, 30 Jan 2023 16:23:56 -0400
Subject: [PATCH 2/2] pkg/util/metric: increase bucket counts for Prometheus
 histograms

This patch increases the fidelity of the histogram buckets for
the new Prometheus model. This is primarily done by increasing the
bucket counts for all latency buckets, but may also be manually
tweaked according to feedback from various engineering teams for
their own use cases.

Release note (ops change): Prometheus histograms will now export
more buckets across the board to improve precision & fidelity of
information reported by histogram metrics, such as quantiles.
This will lead to an increase in storage requirements to process
these histogram metrics in downstream systems like Prometheus,
but should still be a marked improvement when compared to the
legacy HdrHistogram model. If users have issues with the precision
of these bucket boundaries, they can set the environment variable
`COCKROACH_ENABLE_HDR_HISTOGRAMS=true` to revert to using the
legacy HdrHistogram model instead, although this is not recommended
otherwise as the HdrHistogram strains systems like Prometheus with
excessive numbers of histogram buckets. Note that HdrHistograms are
slated for full deprecation in upcoming releases.
---
 pkg/util/metric/histogram_buckets.go      | 294 +++++++++++++++++-----
 pkg/util/metric/histogram_buckets_test.go |   8 +-
 2 files changed, 241 insertions(+), 61 deletions(-)

diff --git a/pkg/util/metric/histogram_buckets.go b/pkg/util/metric/histogram_buckets.go
index 8ce3c9dce884..bf9f70579afb 100644
--- a/pkg/util/metric/histogram_buckets.go
+++ b/pkg/util/metric/histogram_buckets.go
@@ -17,20 +17,65 @@ package metric
 var IOLatencyBuckets = []float64{
 	// Generated via TestHistogramBuckets/IOLatencyBuckets.
 	10000.000000,      // 10µs
-	26826.957953,      // 26.826µs
-	71968.567300,      // 71.968µs
-	193069.772888,     // 193.069µs
-	517947.467923,     // 517.947µs
-	1389495.494373,    // 1.389495ms
-	3727593.720315,    // 3.727593ms
-	10000000.000000,   // 9.999999ms
-	26826957.952797,   // 26.826957ms
-	71968567.300115,   // 71.968567ms
-	193069772.888325,  // 193.069772ms
-	517947467.923120,  // 517.947467ms
-	1389495494.373135, // 1.389495494s
-	3727593720.314933, // 3.72759372s
-	9999999999.999981, // 9.999999999s
+	12638.482029,      // 12.638µs
+	15973.122801,      // 15.973µs
+	20187.602547,      // 20.187µs
+	25514.065200,      // 25.514µs
+	32245.905453,      // 32.245µs
+	40753.929659,      // 40.753µs
+	51506.780762,      // 51.506µs
+	65096.752305,      // 65.096µs
+	82272.413417,      // 82.272µs
+	103979.841848,     // 103.979µs
+	131414.736261,     // 131.414µs
+	166088.278263,     // 166.088µs
+	209910.372011,     // 209.91µs
+	265294.846443,     // 265.294µs
+	335292.414925,     // 335.292µs
+	423758.716060,     // 423.758µs
+	535566.691771,     // 535.566µs
+	676875.000946,     // 676.875µs
+	855467.253557,     // 855.467µs
+	1081180.751077,    // 1.08118ms
+	1366448.349295,    // 1.366448ms
+	1726983.290659,    // 1.726983ms
+	2182644.728397,    // 2.182644ms
+	2758531.617629,    // 2.758531ms
+	3486365.227678,    // 3.486365ms
+	4406236.427774,    // 4.406236ms
+	5568813.990945,    // 5.568813ms
+	7038135.554932,    // 7.038135ms
+	8895134.973108,    // 8.895134ms
+	11242100.350621,   // 11.2421ms
+	14208308.325339,   // 14.208308ms
+	17957144.943716,   // 17.957144ms
+	22695105.366947,   // 22.695105ms
+	28683168.133420,   // 28.683168ms
+	36251170.499885,   // 36.25117ms
+	45815976.690545,   // 45.815976ms
+	57904439.806025,   // 57.904439ms
+	73182422.190762,   // 73.182422ms
+	92491472.772173,   // 92.491472ms
+	116895181.649858,  // 116.895181ms
+	147737765.259851,  // 147.737765ms
+	186718109.129192,  // 186.718109ms
+	235983346.678219,  // 235.983346ms
+	298247128.621688,  // 298.247128ms
+	376939097.538835,  // 376.939097ms
+	476393801.040133,  // 476.393801ms
+	602089449.333611,  // 602.089449ms
+	760949668.545986,  // 760.949668ms
+	961724871.115294,  // 961.724871ms
+	1215474250.076283, // 1.21547425s
+	1536174946.671824, // 1.536174946s
+	1941491945.743876, // 1.941491945s
+	2453751106.639811, // 2.453751106s
+	3101168926.574770, // 3.101168926s
+	3919406774.847209, // 3.919406774s
+	4953535208.959157, // 4.953535208s
+	6260516572.014802, // 6.260516572s
+	7912342618.981298, // 7.912342618s
+	9999999999.999969, // 9.999999999s
 }
 
 // NetworkLatencyBuckets are prometheus histogram buckets suitable for a histogram
@@ -39,21 +84,66 @@ var IOLatencyBuckets = []float64{
 // range during normal operation.
 var NetworkLatencyBuckets = []float64{
 	// Generated via TestHistogramBuckets/NetworkLatencyBuckets.
-	500000.000000,    // 500µs
-	860513.842995,    // 860.513µs
-	1480968.147973,   // 1.480968ms
-	2548787.184731,   // 2.548787ms
-	4386533.310619,   // 4.386533ms
-	7549345.273094,   // 7.549345ms
-	12992632.226094,  // 12.992632ms
-	22360679.774998,  // 22.360679ms
-	38483348.970335,  // 38.483348ms
-	66230909.027573,  // 66.230909ms
-	113985228.104760, // 113.985228ms
-	196171733.362212, // 196.171733ms
-	337616984.325077, // 337.616984ms
-	581048177.284016, // 581.048177ms
-	999999999.999999, // 999.999999ms
+	500000.000000,     // 500µs
+	568747.715565,     // 568.747µs
+	646947.927922,     // 646.947µs
+	735900.312190,     // 735.9µs
+	837083.242884,     // 837.083µs
+	952178.364257,     // 952.178µs
+	1083098.538963,    // 1.083098ms
+	1232019.639535,    // 1.232019ms
+	1401416.711034,    // 1.401416ms
+	1594105.105912,    // 1.594105ms
+	1813287.274717,    // 1.813287ms
+	2062605.990318,    // 2.062605ms
+	2346204.890209,    // 2.346204ms
+	2668797.343109,    // 2.668797ms
+	3035744.784401,    // 3.035744ms
+	3453145.822334,    // 3.453145ms
+	3927937.595933,    // 3.927937ms
+	4468011.069141,    // 4.468011ms
+	5082342.177389,    // 5.082342ms
+	5781141.006222,    // 5.781141ms
+	6576021.481300,    // 6.576021ms
+	7480194.389996,    // 7.480194ms
+	8508686.942589,    // 8.508686ms
+	9678592.522117,    // 9.678592ms
+	11009354.773683,   // 11.009354ms
+	12523090.754761,   // 12.52309ms
+	14244958.517175,   // 14.244958ms
+	16203575.229933,   // 16.203575ms
+	18431492.792031,   // 18.431492ms
+	20965738.839853,   // 20.965738ms
+	23848432.140611,   // 23.848432ms
+	27127482.599575,   // 27.127482ms
+	30857387.515093,   // 30.857387ms
+	35100137.315047,   // 35.100137ms
+	39926245.827925,   // 39.926245ms
+	45415922.211464,   // 45.415922ms
+	51660404.016126,   // 51.660404ms
+	58763473.538708,   // 58.763473ms
+	66843182.667648,   // 66.843182ms
+	76033814.886682,   // 76.033814ms
+	86488117.045035,   // 86.488117ms
+	98379837.985822,   // 98.379837ms
+	111906616.224248,  // 111.906616ms
+	127293264.668375,  // 127.293264ms
+	144795506.973983,  // 144.795506ms
+	164704227.631154,  // 164.704227ms
+	187350306.418342,  // 187.350306ms
+	213110117.571795,  // 213.110117ms
+	242411785.065635,  // 242.411785ms
+	275742297.964389,  // 275.742297ms
+	313655604.103963,  // 313.655604ms
+	356781816.616787,  // 356.781816ms
+	405837686.312094,  // 405.837686ms
+	461638513.960647,  // 461.638513ms
+	525111700.464186,  // 525.1117ms
+	597312160.111267,  // 597.31216ms
+	679439853.085354,  // 679.439853ms
+	772859728.612681,  // 772.859728ms
+	879124410.201811,  // 879.12441ms
+	1000000000.000001, // 1s
 }
 
 // BatchProcessLatencyBuckets are prometheus histogram buckets suitable for a
@@ -62,20 +152,65 @@ var NetworkLatencyBuckets = []float64{
 var BatchProcessLatencyBuckets = []float64{
 	// Generated via TestHistogramBuckets/BatchProcessLatencyBuckets.
 	500000000.000000,    // 500ms
-	789604072.059876,    // 789.604072ms
-	1246949181.227077,   // 1.246949181s
-	1969192302.297256,   // 1.969192302s
-	3109764521.125753,   // 3.109764521s
-	4910965458.056452,   // 4.910965458s
-	7755436646.853539,   // 7.755436646s
-	12247448713.915894,  // 12.247448713s
-	19341270753.704967,  // 19.341270753s
-	30543892291.876068,  // 30.543892291s
-	48235163460.447227,  // 48.23516346s
-	76173362969.685760,  // 1m16.173362969s
-	120293595166.717728, // 2m0.293595166s
-	189968625172.725128, // 3m9.968625172s
-	300000000000.000183, // 5m0s
+	557259285.358743,    // 557.259285ms
+	621075822.237074,    // 621.075822ms
+	692200537.706851,    // 692.200537ms
+	771470353.934916,    // 771.470353ms
+	859818036.218456,    // 859.818036ms
+	958283168.803309,    // 958.283168ms
+	1068024387.637287,   // 1.068024387s
+	1190333014.000928,   // 1.190333014s
+	1326648249.442152,   // 1.326648249s
+	1478574110.813123,   // 1.47857411s
+	1647898304.683320,   // 1.647898304s
+	1836613263.223422,   // 1.836613263s
+	2046939589.088547,   // 2.046939589s
+	2281352185.176006,   // 2.281352185s
+	2542609376.725576,   // 2.542609376s
+	2833785368.441068,   // 2.833785368s
+	3158306418.555065,   // 3.158306418s
+	3519991155.495853,   // 3.519991155s
+	3923095511.561431,   // 3.923095511s
+	4372362802.333632,   // 4.372362802s
+	4873079541.115184,   // 4.873079541s
+	5431137645.156319,   // 5.431137645s
+	6053103765.649553,   // 6.053103765s
+	6746296557.296375,   // 6.746296557s
+	7518872796.674253,   // 7.518872796s
+	8379923362.755980,   // 8.379923362s
+	9339580208.980864,   // 9.339580208s
+	10409135585.614676,  // 10.409135585s
+	11601174915.283792,  // 11.601174915s
+	12929724885.225649,  // 12.929724885s
+	14410418498.852003,  // 14.410418498s
+	16060679028.781363,  // 16.060679028s
+	17899925035.909710,  // 17.899925035s
+	19949798866.972237,  // 19.949798866s
+	22234421319.319225,  // 22.234421319s
+	24780675469.538071,  // 24.780675469s
+	27618523005.723442,  // 27.618523005s
+	30781356785.666904,  // 30.781356785s
+	34306393769.506477,  // 34.306393769s
+	38235112950.461639,  // 38.23511295s
+	42613743436.770157,  // 42.613743436s
+	47493808428.070732,  // 47.493808428s
+	52932731487.183495,  // 52.932731487s
+	58994512241.268242,  // 58.994512241s
+	65750479463.313522,  // 1m5.750479463s
+	73280130395.441635,  // 1m13.280130395s
+	81672066190.318619,  // 1m21.67206619s
+	91025034477.977859,  // 1m31.025034477s
+	101449091325.905777, // 1m41.449091325s
+	113066896265.136261, // 1m53.066896265s
+	126015155620.881943, // 2m6.01515562s
+	140446231131.326965, // 2m20.446231131s
+	156529932783.144257, // 2m36.529932783s
+	174455516959.974152, // 2m54.455516959s
+	194433913416.010529, // 3m14.433913416s
+	216700207279.419586, // 3m36.700207279s
+	241516405291.241699, // 4m1.516405291s
+	269174518830.019897, // 4m29.17451883s
+	300000000000.000854, // 5m0s
 }
 
 // LongRunning60mLatencyBuckets are prometheus histogram buckets suitable
@@ -84,20 +219,65 @@ var BatchProcessLatencyBuckets = []float64{
 var LongRunning60mLatencyBuckets = []float64{
 	// Generated via TestHistogramBuckets/LongRunning60mLatencyBuckets.
 	500000000.000000,     // 500ms
-	942961049.923126,     // 942.961049ms
-	1778351083.344248,    // 1.778351083s
-	3353831609.364442,    // 3.353831609s
-	6325065151.263324,    // 6.325065151s
-	11928580151.734879,   // 11.928580151s
-	22496372927.944168,   // 22.496372927s
-	42426406871.192848,   // 42.426406871s
-	80012898335.451462,   // 1m20.012898335s
-	150898093243.579315,  // 2m30.898093243s
-	284582048872.726685,  // 4m44.582048872s
-	536699575188.601318,  // 8m56.699575188s
-	1012173589826.278687, // 16m52.173589826s
-	1908880541934.094238, // 31m48.880541934s
-	3599999999999.998535, // 59m59.999999999s
+	581230667.894489,     // 581.230667ms
+	675658178.602148,     // 675.658178ms
+	785426508.834601,     // 785.426508ms
+	913027948.623944,     // 913.027948ms
+	1061359688.770060,    // 1.061359688s
+	1233789601.560218,    // 1.233789601s
+	1434232708.312242,    // 1.434232708s
+	1667240069.936893,    // 1.667240069s
+	1938102118.779750,    // 1.938102118s
+	2252968777.892157,    // 2.252968777s
+	2618989095.039379,    // 2.618989095s
+	3044473561.836243,    // 3.044473561s
+	3539082803.466387,    // 3.539082803s
+	4114046923.185338,    // 4.114046923s
+	4782420481.824564,    // 4.782420481s
+	5559378901.606352,    // 5.559378901s
+	6462563024.118382,    // 6.462563024s
+	7512479645.637113,    // 7.512479645s
+	8732967123.954826,    // 8.732967123s
+	10151736628.313759,   // 10.151736628s
+	11801001321.527510,   // 11.801001321s
+	13718207759.870365,   // 13.718207759s
+	15946886117.169632,   // 15.946886117s
+	18537638537.439724,   // 18.537638537s
+	21549288056.605419,   // 21.549288056s
+	25050214179.583008,   // 25.050214179s
+	29119905436.998066,   // 29.119905436s
+	33850764172.341507,   // 33.850764172s
+	39350204537.257782,   // 39.350204537s
+	45743091329.950188,   // 45.743091329s
+	53174575050.531136,   // 53.17457505s
+	61813387543.251701,   // 1m1.813387543s
+	71855673053.170151,   // 1m11.855673053s
+	83529441681.404266,   // 1m23.529441681s
+	97099746354.672745,   // 1m37.099746354s
+	112874700852.223846,  // 1m52.874700852s
+	131212475529.457443,  // 2m11.212475529s
+	152529429576.151703,  // 2m32.529429576s
+	177309564452.224213,  // 2m57.309564452s
+	206115513141.294464,  // 3m26.115513141s
+	239601314733.059875,  // 3m59.601314733s
+	278527264381.388123,  // 4m38.527264381s
+	323777175806.438293,  // 5m23.777175806s
+	376378448285.935181,  // 6m16.378448285s
+	437525393756.650940,  // 7m17.525393756s
+	508606353667.955078,  // 8m28.606353667s
+	591235221275.612671,  // 9m51.235221275s
+	687288085089.540771,  // 11m27.288085089s
+	798945825465.036499,  // 13m18.945825465s
+	928743631493.114136,  // 15m28.743631493s
+	1079628562470.991943, // 17m59.62856247s
+	1255026460885.963623, // 20m55.026460885s
+	1458919736172.010742, // 24m18.919736172s
+	1695937785319.419434, // 28m15.937785319s
+	1971462103337.413574, // 32m51.462103337s
+	2291748470102.958496, // 38m11.748470102s
+	2664068987848.231934, // 44m24.068987848s
+	3096877194248.046875, // 51m36.877194248s
+	3600000000000.007812, // 1h0m0s
 }
 
 // Count1KBuckets are prometheus histogram buckets suitable for a histogram that
diff --git a/pkg/util/metric/histogram_buckets_test.go b/pkg/util/metric/histogram_buckets_test.go
index 7fb183d70bf6..6f28454b89ff 100644
--- a/pkg/util/metric/histogram_buckets_test.go
+++ b/pkg/util/metric/histogram_buckets_test.go
@@ -48,22 +48,22 @@ func TestHistogramBuckets(t *testing.T) {
 		require.InDeltaSlice(t, exp, act, 1 /* delta */, "Please update the bucket boundaries for %s", t.Name())
 	}
 	t.Run("IOLatencyBuckets", func(t *testing.T) {
-		exp := prometheus.ExponentialBucketsRange(10e3, 10e9, 15)
+		exp := prometheus.ExponentialBucketsRange(10e3, 10e9, 60)
 		verifyAndPrint(t, exp, IOLatencyBuckets, LATENCY)
 	})
 
 	t.Run("NetworkLatencyBuckets", func(t *testing.T) {
-		exp := prometheus.ExponentialBucketsRange(500e3, 1e9, 15)
+		exp := prometheus.ExponentialBucketsRange(500e3, 1e9, 60)
 		verifyAndPrint(t, exp, NetworkLatencyBuckets, LATENCY)
 	})
 
 	t.Run("BatchProcessLatencyBuckets", func(t *testing.T) {
-		exp := prometheus.ExponentialBucketsRange(500e6, 300e9, 15)
+		exp := prometheus.ExponentialBucketsRange(500e6, 300e9, 60)
 		verifyAndPrint(t, exp, BatchProcessLatencyBuckets, LATENCY)
 	})
 
 	t.Run("LongRunning60mLatencyBuckets", func(t *testing.T) {
-		exp := prometheus.ExponentialBucketsRange(500e6, 3600e9, 15)
+		exp := prometheus.ExponentialBucketsRange(500e6, 3600e9, 60)
 		verifyAndPrint(t, exp, LongRunning60mLatencyBuckets, LATENCY)
 	})