From 4a3695fe43b40391d2e1cfefb899aa402b81c5d6 Mon Sep 17 00:00:00 2001 From: Alex Barganier Date: Wed, 5 Jul 2023 17:04:31 -0400 Subject: [PATCH 1/7] pkg/util/log: introduce LogMetrics interface Logging is a critical subsystem within CRDB, but as things are today, we have very little observability into logging itself. For starters, we have no metrics in the logging package at all! This makes it difficult to observe things within the log package. For example, if a fluent-server log sink fails to connect to FluentBit, how can we tell? We get some STDOUT message, but that's about it. It's time to get some metrics into the log package. Doing so is a bit of a tricky dance, because pretty much every package in CRDB imports the logging package, meaning you almost always get a circular dependency when trying to make use of any library within pkg/util/log. Therefore, we must inject metrics functionality into the logging package. This patch provides a means of doing so. Within the log package, we add a new interface called `LogMetrics` with functions that enable its users to modify metrics. The implementation of the interface can live elsewhere, where circular dependencies aren't such a pain. We can then inject the implementation into the log package. This patch also provides a basic implementation to be used. Future patches will plumb the implementation into the log package and actually modify the supported metrics. Release note: none --- pkg/BUILD.bazel | 3 + pkg/util/log/BUILD.bazel | 1 + pkg/util/log/clog.go | 13 +++ pkg/util/log/logmetrics/BUILD.bazel | 27 ++++++ pkg/util/log/logmetrics/metrics.go | 108 ++++++++++++++++++++++++ pkg/util/log/logmetrics/metrics_test.go | 71 ++++++++++++++++ pkg/util/log/metric.go | 43 ++++++++++ 7 files changed, 266 insertions(+) create mode 100644 pkg/util/log/logmetrics/BUILD.bazel create mode 100644 pkg/util/log/logmetrics/metrics.go create mode 100644 pkg/util/log/logmetrics/metrics_test.go create mode 100644 pkg/util/log/metric.go diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 7cc260a1b154..f565b413752f 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -648,6 +648,7 @@ ALL_TESTS = [ "//pkg/util/log/eventpb:eventpb_test", "//pkg/util/log/logconfig:logconfig_test", "//pkg/util/log/logcrash:logcrash_test", + "//pkg/util/log/logmetrics:logmetrics_test", "//pkg/util/log/testshout:testshout_test", "//pkg/util/log:log_test", "//pkg/util/metric/aggmetric:aggmetric_test", @@ -2296,6 +2297,8 @@ GO_TARGETS = [ "//pkg/util/log/logcrash:logcrash", "//pkg/util/log/logcrash:logcrash_test", "//pkg/util/log/logflags:logflags", + "//pkg/util/log/logmetrics:logmetrics", + "//pkg/util/log/logmetrics:logmetrics_test", "//pkg/util/log/logpb:logpb", "//pkg/util/log/logtestutils:logtestutils", "//pkg/util/log/logutil:logutil", diff --git a/pkg/util/log/BUILD.bazel b/pkg/util/log/BUILD.bazel index 73e3ac9a17f5..d4bde7c984ce 100644 --- a/pkg/util/log/BUILD.bazel +++ b/pkg/util/log/BUILD.bazel @@ -34,6 +34,7 @@ go_library( "log_decoder.go", "log_entry.go", "log_flush.go", + "metric.go", "redact.go", "registry.go", "sinks.go", diff --git a/pkg/util/log/clog.go b/pkg/util/log/clog.go index d0206c66cf0c..ea7c44bb474e 100644 --- a/pkg/util/log/clog.go +++ b/pkg/util/log/clog.go @@ -109,6 +109,19 @@ type loggingT struct { allSinkInfos sinkInfoRegistry allLoggers loggerRegistry + metrics LogMetrics +} + +// SetLogMetrics injects an initialized implementation of +// the LogMetrics interface into the logging package. The +// implementation must be injected to avoid a dependency +// cycle. +// +// Should be called within the init() function of the +// implementing package to avoid the possibility of a nil +// LogMetrics during server startups. +func SetLogMetrics(m LogMetrics) { + logging.metrics = m } func init() { diff --git a/pkg/util/log/logmetrics/BUILD.bazel b/pkg/util/log/logmetrics/BUILD.bazel new file mode 100644 index 000000000000..0278d9e27f79 --- /dev/null +++ b/pkg/util/log/logmetrics/BUILD.bazel @@ -0,0 +1,27 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "logmetrics", + srcs = ["metrics.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics", + visibility = ["//visibility:public"], + deps = [ + "//pkg/util/log", + "//pkg/util/metric", + "//pkg/util/syncutil", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "logmetrics_test", + srcs = ["metrics_test.go"], + args = ["-test.timeout=295s"], + embed = [":logmetrics"], + deps = [ + "//pkg/util/leaktest", + "//pkg/util/log", + "//pkg/util/metric", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/util/log/logmetrics/metrics.go b/pkg/util/log/logmetrics/metrics.go new file mode 100644 index 000000000000..4bef93b775a8 --- /dev/null +++ b/pkg/util/log/logmetrics/metrics.go @@ -0,0 +1,108 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package logmetrics + +import ( + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/errors" +) + +var ( + // logMetricsReg is a singleton instance of the LogMetricsRegistry. + logMetricsReg = newLogMetricsRegistry() + FluentSinkConnErrors = metric.Metadata{ + Name: string(log.FluentSinkConnectionError), + Help: "Number of connection errors experienced by fluent-server logging sinks", + Measurement: "fluent-server log sink connection errors", + Unit: metric.Unit_COUNT, + } +) + +// logMetricsStruct is a struct used to contain all metrics +// tracked by the LogMetricsRegistry. This container is necessary +// to register all the metrics with the Registry internal to the +// LogMetricsRegistry. +type logMetricsStruct struct { + FluentSinkConnErrors *metric.Counter +} + +// LogMetricsRegistry is a log.LogMetrics implementation used in the +// logging package to give it access to metrics without introducing a +// circular dependency. +// +// All metrics meant to be available to the logging package must be +// registered at the time of initialization. +// +// LogMetricsRegistry is thread-safe. +type LogMetricsRegistry struct { + mu struct { + syncutil.Mutex + // metricsStruct holds the same metrics as the below structures, but + // provides an easy way to inject them into metric.Registry's on demand + // in NewRegistry(). + metricsStruct logMetricsStruct + counters map[log.MetricName]*metric.Counter + } +} + +var _ log.LogMetrics = (*LogMetricsRegistry)(nil) + +func newLogMetricsRegistry() *LogMetricsRegistry { + registry := &LogMetricsRegistry{} + registry.registerCounters() + return registry +} + +func (l *LogMetricsRegistry) registerCounters() { + l.mu.Lock() + defer l.mu.Unlock() + l.mu.counters = make(map[log.MetricName]*metric.Counter) + // Create the metrics struct for us to add to registries as they're + // requested. + l.mu.metricsStruct = logMetricsStruct{ + FluentSinkConnErrors: metric.NewCounter(FluentSinkConnErrors), + } + // Be sure to also add the metrics to our internal store, for + // recall in functions such as IncrementCounter. + l.mu.counters[log.MetricName(FluentSinkConnErrors.Name)] = l.mu.metricsStruct.FluentSinkConnErrors +} + +// NewRegistry initializes and returns a new metric.Registry, populated with metrics +// tracked by the LogMetricsRegistry. While the metrics tracked by the logmetrics package +// are global, they may be shared by multiple servers, test servers, etc. Therefore, we +// need the means to label the metrics separately depending on the server, tenant, etc. +// serving them. For this reason, we provide the ability to track the same log metrics +// across multiple registries. +func NewRegistry() *metric.Registry { + if logMetricsReg == nil { + panic(errors.AssertionFailedf("LogMetricsRegistry was not initialized")) + } + reg := metric.NewRegistry() + logMetricsReg.mu.Lock() + defer logMetricsReg.mu.Unlock() + reg.AddMetricStruct(logMetricsReg.mu.metricsStruct) + return reg +} + +// IncrementCounter increments thegi Counter held by the given alias. If a log.MetricName +// is provided as an argument, but is not registered with the LogMetricsRegistry, this function +// panics. +func (l *LogMetricsRegistry) IncrementCounter(metric log.MetricName, amount int64) { + l.mu.Lock() + defer l.mu.Unlock() + counter, ok := l.mu.counters[metric] + if !ok { + panic(errors.AssertionFailedf("MetricName not registered in LogMetricsRegistry: %q", string(metric))) + } + counter.Inc(amount) +} diff --git a/pkg/util/log/logmetrics/metrics_test.go b/pkg/util/log/logmetrics/metrics_test.go new file mode 100644 index 000000000000..e1a7630ab95e --- /dev/null +++ b/pkg/util/log/logmetrics/metrics_test.go @@ -0,0 +1,71 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package logmetrics + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/stretchr/testify/require" +) + +func TestIncrementCounter(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + t.Run("panics when log.MetricName not registered", func(t *testing.T) { + l := &LogMetricsRegistry{} + l.mu.counters = map[log.MetricName]*metric.Counter{} + require.PanicsWithErrorf(t, + `MetricName not registered in LogMetricsRegistry: "unregistered"`, + func() { + l.IncrementCounter("unregistered", 1) + }, "expected IncrementCounter to panic for unregistered metric") + }) + + t.Run("increments counter", func(t *testing.T) { + l := newLogMetricsRegistry() + func() { + l.mu.Lock() + defer l.mu.Unlock() + require.Zero(t, l.mu.metricsStruct.FluentSinkConnErrors.Count()) + }() + l.IncrementCounter(log.FluentSinkConnectionError, 1) + l.IncrementCounter(log.FluentSinkConnectionError, 2) + func() { + l.mu.Lock() + defer l.mu.Unlock() + require.Equal(t, int64(3), l.mu.metricsStruct.FluentSinkConnErrors.Count()) + }() + }) +} + +func TestNewRegistry(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + t.Run("panics when logMetricsReg is nil", func(t *testing.T) { + logMetricsReg = nil + require.PanicsWithErrorf(t, + "LogMetricsRegistry was not initialized", + func() { + _ = NewRegistry() + }, "expected NewRegistry() to panic with nil logMetricsReg package-level var") + }) +} + +type fakeLogMetrics struct{} + +func (*fakeLogMetrics) IncrementCounter(_ log.MetricName, _ int64) {} + +var _ log.LogMetrics = (*fakeLogMetrics)(nil) diff --git a/pkg/util/log/metric.go b/pkg/util/log/metric.go new file mode 100644 index 000000000000..3ec783c4b828 --- /dev/null +++ b/pkg/util/log/metric.go @@ -0,0 +1,43 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package log + +// LogMetrics enables the registration and recording of metrics +// within the log package. +// +// Because the log package is imported by nearly every package +// within CRDB, it's difficult to add new dependencies to the +// log package without introducing a circular dependency. +// +// The LogMetrics interface provides us with a way to still +// make use of the metrics library within the log package via +// dependency injection, allowing the implementation to live +// elsewhere (e.g. the metrics package). +type LogMetrics interface { + // IncrementCounter increments the Counter metric associated with the + // provided MetricName by the given amount, assuming the + // metric has been registered. + // + // The LogMetrics implementation must have metadata defined + // for the given MetricName within its own scope. See + // pkg/util/log/logmetrics for details. + IncrementCounter(metric MetricName, amount int64) +} + +// MetricName represents the name of a metric registered & +// used within the log package, available to use in the LogMetrics +// interface. +type MetricName string + +// FluentSinkConnectionError is the MetricName for the metric +// used to count fluent-server log sink connection errors. Please +// refer to its metric metadata for more details (hint: see usages). +const FluentSinkConnectionError MetricName = "fluent.sink.conn.errors" From c271f9062cd426dc5552fc8ca3aad0ad23103b7b Mon Sep 17 00:00:00 2001 From: Alex Barganier Date: Thu, 6 Jul 2023 14:37:20 -0400 Subject: [PATCH 2/7] pkg/util/log: plumb LogMetricRegistry into log package This patch plumbs the LogMetricRegistry into the log package as part of the initialization of the package where the implementation lives. With the log metrics plumbed into the log package and made available in the server, we can now add them to the metrics recorder used by the server, as well as begin using metrics within the log package. Release note: none --- pkg/server/status/BUILD.bazel | 1 + pkg/server/status/recorder.go | 3 +++ pkg/util/log/logmetrics/metrics.go | 12 ++++++++++++ 3 files changed, 16 insertions(+) diff --git a/pkg/server/status/BUILD.bazel b/pkg/server/status/BUILD.bazel index 63cec5244305..b5fbb8d85187 100644 --- a/pkg/server/status/BUILD.bazel +++ b/pkg/server/status/BUILD.bazel @@ -65,6 +65,7 @@ go_library( "//pkg/util/humanizeutil", "//pkg/util/log", "//pkg/util/log/eventpb", + "//pkg/util/log/logmetrics", "//pkg/util/metric", "//pkg/util/syncutil", "//pkg/util/system", diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index a6277336c801..2b72df324588 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -45,6 +45,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/log" + // Import the logmetrics package to trigger its own init function, which inits and injects + // metrics functionality into pkg/util/log. + _ "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/system" diff --git a/pkg/util/log/logmetrics/metrics.go b/pkg/util/log/logmetrics/metrics.go index 4bef93b775a8..afd2a77a9e18 100644 --- a/pkg/util/log/logmetrics/metrics.go +++ b/pkg/util/log/logmetrics/metrics.go @@ -28,6 +28,18 @@ var ( } ) +// Inject our singleton LogMetricsRegistry into the logging +// package. This ensures that the LogMetrics implementation within the +// log package is always defined. This should only be called once from +// a single init function. +// +// Since the primary user of the eventual metric.Registry's that come +// from LogMetricsRegistry is the MetricsRecorder, we trigger this +// init function via an import in pkg/util/log/logmetrics/metrics.go. +func init() { + log.SetLogMetrics(logMetricsReg) +} + // logMetricsStruct is a struct used to contain all metrics // tracked by the LogMetricsRegistry. This container is necessary // to register all the metrics with the Registry internal to the From d15a3625d4cda2d199512b5901ad112f36bf37dd Mon Sep 17 00:00:00 2001 From: Alex Barganier Date: Wed, 12 Jul 2023 16:34:30 -0400 Subject: [PATCH 3/7] pkg/server: plumb LogMetricRegistry to server MetricsRecorder This patch adds the necessary plumbing to inject the metrics used by the logging package into the MetricsRecorders used by servers in CockroachDB. With this plumbing complete, we're ready to begin using metrics within the log package. Release note: none --- pkg/server/BUILD.bazel | 1 + pkg/server/server.go | 8 +++++++ pkg/server/status/recorder.go | 37 +++++++++++++++++++++--------- pkg/server/status/recorder_test.go | 28 +++++++++++++++++++--- pkg/server/tenant.go | 8 +++++++ 5 files changed, 68 insertions(+), 14 deletions(-) diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index f6ac69d75461..e601f59561ff 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -305,6 +305,7 @@ go_library( "//pkg/util/log", "//pkg/util/log/eventpb", "//pkg/util/log/logcrash", + "//pkg/util/log/logmetrics", "//pkg/util/log/logpb", "//pkg/util/log/severity", "//pkg/util/metric", diff --git a/pkg/server/server.go b/pkg/server/server.go index 33d6a74abd3a..527c6f328b99 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -107,6 +107,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/goschedstats" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/netutil" @@ -1839,9 +1840,16 @@ func (s *Server) PreStart(ctx context.Context) error { }) }) + // Init a log metrics registry. + logRegistry := logmetrics.NewRegistry() + if logRegistry == nil { + panic(errors.AssertionFailedf("nil log metrics registry at server startup")) + } + // We can now add the node registry. s.recorder.AddNode( s.registry, + logRegistry, s.node.Descriptor, s.node.startedAt, s.cfg.AdvertiseAddr, diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index 2b72df324588..2cda3c054a98 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -133,8 +133,12 @@ type MetricsRecorder struct { // nodeRegistry contains, as subregistries, the multiple component-specific // registries which are recorded as "node level" metrics. nodeRegistry *metric.Registry - desc roachpb.NodeDescriptor - startedAt int64 + // logRegistry contains the global metrics registry used by the logging + // package. NB: The underlying metrics are global, but each server gets + // its own separate registry to avoid things such as colliding labels. + logRegistry *metric.Registry + desc roachpb.NodeDescriptor + startedAt int64 // storeRegistries contains a registry for each store on the node. These // are not stored as subregistries, but rather are treated as wholly @@ -193,6 +197,7 @@ func (mr *MetricsRecorder) AddTenantRegistry(tenantID roachpb.TenantID, rec *met // tenant is initialized. mr.mu.Do(func() { mr.mu.nodeRegistry.AddLabel("tenant", catconstants.SystemTenantName) + mr.mu.logRegistry.AddLabel("tenant", catconstants.SystemTenantName) }) } mr.mu.tenantRegistries[tenantID] = rec @@ -207,16 +212,17 @@ func (mr *MetricsRecorder) RemoveTenantRegistry(tenantID roachpb.TenantID) { } // AddNode adds the Registry from an initialized node, along with its descriptor -// and start time. +// and start time. It also adds the logging registry. func (mr *MetricsRecorder) AddNode( - reg *metric.Registry, + nodeReg, logReg *metric.Registry, desc roachpb.NodeDescriptor, startedAt int64, advertiseAddr, httpAddr, sqlAddr string, ) { mr.mu.Lock() defer mr.mu.Unlock() - mr.mu.nodeRegistry = reg + mr.mu.nodeRegistry = nodeReg + mr.mu.logRegistry = logReg mr.mu.desc = desc mr.mu.startedAt = startedAt @@ -233,20 +239,20 @@ func (mr *MetricsRecorder) AddNode( metadata.AddLabel(sqlAddrLabelKey, sqlAddr) nodeIDGauge := metric.NewGauge(metadata) nodeIDGauge.Update(int64(desc.NodeID)) - reg.AddMetric(nodeIDGauge) + nodeReg.AddMetric(nodeIDGauge) if !disableNodeAndTenantLabels { nodeIDInt := int(desc.NodeID) if nodeIDInt != 0 { - reg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) - // We assume that all stores have been added to the registry - // prior to calling `AddNode`. + nodeReg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) + logReg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) for _, s := range mr.mu.storeRegistries { s.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) } } if mr.tenantNameContainer != nil && mr.tenantNameContainer.String() != catconstants.SystemTenantName { - reg.AddLabel("tenant", mr.tenantNameContainer) + nodeReg.AddLabel("tenant", mr.tenantNameContainer) + logReg.AddLabel("tenant", mr.tenantNameContainer) } } } @@ -279,7 +285,8 @@ func (mr *MetricsRecorder) MarshalJSON() ([]byte, error) { return []byte("{}"), nil } topLevel := map[string]interface{}{ - fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry, + fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry, + fmt.Sprintf("node.%d.log", mr.mu.desc.NodeID): mr.mu.logRegistry, } // Add collection of stores to top level. JSON requires that keys be strings, // so we must convert the store ID to a string. @@ -304,6 +311,7 @@ func (mr *MetricsRecorder) ScrapeIntoPrometheus(pm *metric.PrometheusExporter) { } includeChildMetrics := ChildMetricsEnabled.Get(&mr.settings.SV) pm.ScrapeRegistry(mr.mu.nodeRegistry, includeChildMetrics) + pm.ScrapeRegistry(mr.mu.logRegistry, includeChildMetrics) for _, reg := range mr.mu.storeRegistries { pm.ScrapeRegistry(reg, includeChildMetrics) } @@ -363,6 +371,9 @@ func (mr *MetricsRecorder) GetTimeSeriesData() []tspb.TimeSeriesData { timestampNanos: now.UnixNano(), } recorder.record(&data) + // Now record the log metrics. + recorder.registry = mr.mu.logRegistry + recorder.record(&data) // Record time series from node-level registries for secondary tenants. for tenantID, r := range mr.mu.tenantRegistries { @@ -422,6 +433,7 @@ func (mr *MetricsRecorder) GetMetricsMetadata() map[string]metric.Metadata { metrics := make(map[string]metric.Metadata) mr.mu.nodeRegistry.WriteMetricsMetadata(metrics) + mr.mu.logRegistry.WriteMetricsMetadata(metrics) // Get a random storeID. var sID roachpb.StoreID @@ -511,6 +523,9 @@ func (mr *MetricsRecorder) GenerateNodeStatus(ctx context.Context) *statuspb.Nod eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) { nodeStat.Metrics[name] = val }) + eachRecordableValue(mr.mu.logRegistry, func(name string, val float64) { + nodeStat.Metrics[name] = val + }) // Generate status summaries for stores. for storeID, r := range mr.mu.storeRegistries { diff --git a/pkg/server/status/recorder_test.go b/pkg/server/status/recorder_test.go index 587b52c6a2a8..4dfdd93d9a90 100644 --- a/pkg/server/status/recorder_test.go +++ b/pkg/server/status/recorder_test.go @@ -118,7 +118,8 @@ func TestMetricsRecorderLabels(t *testing.T) { manual, st, ) - recorder.AddNode(reg1, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") + logReg := metric.NewRegistry() + recorder.AddNode(reg1, logReg, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") nodeDescTenant := roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(7), @@ -137,7 +138,7 @@ func TestMetricsRecorderLabels(t *testing.T) { manual, stTenant, ) - recorderTenant.AddNode(regTenant, nodeDescTenant, 50, "foo:26257", "foo:26258", "foo:5432") + recorderTenant.AddNode(regTenant, logReg, nodeDescTenant, 50, "foo:26257", "foo:26258", "foo:5432") // ======================================== // Verify that the recorder exports metrics for tenants as text. @@ -151,6 +152,10 @@ func TestMetricsRecorderLabels(t *testing.T) { regTenant.AddMetric(g2) g2.Update(456) + c1 := metric.NewCounter(metric.Metadata{Name: "some_log_metric"}) + logReg.AddMetric(c1) + c1.Inc(2) + recorder.AddTenantRegistry(tenantID, regTenant) buf := bytes.NewBuffer([]byte{}) @@ -211,6 +216,16 @@ func TestMetricsRecorderLabels(t *testing.T) { }, }, }, + { + Name: "cr.node.some_log_metric", + Source: "7", + Datapoints: []tspb.TimeSeriesDatapoint{ + { + TimestampNanos: manual.Now().UnixNano(), + Value: float64(2), + }, + }, + }, // App tenant metrics { Name: "cr.node.node-id", @@ -435,7 +450,8 @@ func TestMetricsRecorder(t *testing.T) { recorder := NewMetricsRecorder(roachpb.SystemTenantID, roachpb.NewTenantNameContainer(""), nil, nil, manual, st) recorder.AddStore(store1) recorder.AddStore(store2) - recorder.AddNode(reg1, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") + logReg := metric.NewRegistry() + recorder.AddNode(reg1, logReg, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") // Ensure the metric system's view of time does not advance during this test // as the test expects time to not advance too far which would age the actual @@ -467,6 +483,12 @@ func TestMetricsRecorder(t *testing.T) { source: 1, isNode: true, }, + { + reg: logReg, + prefix: "log.", + source: 1, + isNode: true, + }, { reg: store1.registry, prefix: "", diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go index 82bd026a9666..236d4312b978 100644 --- a/pkg/server/tenant.go +++ b/pkg/server/tenant.go @@ -66,6 +66,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/admission" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/netutil" "github.com/cockroachdb/cockroach/pkg/util/schedulerlatency" @@ -642,9 +643,16 @@ func (s *SQLServerWrapper) PreStart(ctx context.Context) error { }) }) + // Init a log metrics registry. + logRegistry := logmetrics.NewRegistry() + if logRegistry == nil { + panic(errors.AssertionFailedf("nil log metrics registry at server startup")) + } + // We can now add the node registry. s.recorder.AddNode( s.registry, + logRegistry, roachpb.NodeDescriptor{ NodeID: s.rpcContext.NodeID.Get(), }, From abd9c99cdc0f68f5532c3601c4e50182ee05a22c Mon Sep 17 00:00:00 2001 From: Alex Barganier Date: Tue, 11 Jul 2023 13:17:44 -0400 Subject: [PATCH 4/7] pkg/util/log: increment metric when failing to connect to fluentbit Previous patches introduced the infrastructure necessary to record metrics within the log package without introducing circular dependencies. With all that plumbing done, we can now increment the metric representing fluentbit connection errors whenever a fluent-server log sink fails to establish a connection. Release note (ops change): This patch introduces the counter metric `fluent.sink.conn.errors` to the CockroachDB tsdb, which is exposed to `/_status/vars` clients as `fluent_sink_conn_errors`. The metric is incremented whenever a `fluent-server` log sink fails to establish a connection to the log sink pointed to by the `address` for the sink in the provided log config. --- pkg/util/log/fluent_client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/util/log/fluent_client.go b/pkg/util/log/fluent_client.go index 84c746429665..c49f6e2ab398 100644 --- a/pkg/util/log/fluent_client.go +++ b/pkg/util/log/fluent_client.go @@ -74,6 +74,7 @@ func (l *fluentSink) output(b []byte, opts sinkOutputOptions) (err error) { } if err := l.ensureConnLocked(b); err != nil { + logging.metrics.IncrementCounter(FluentSinkConnectionError, 1) return err } return l.tryWriteLocked(b) From cdb8a4338565e34119d28694080b9d8a80f10f66 Mon Sep 17 00:00:00 2001 From: rharding6373 Date: Mon, 10 Jul 2023 13:58:40 -0700 Subject: [PATCH 5/7] tests, logictest, floatcmp: refactor comparison test util functions This commit moves some float comparison test util functions from logictest into the floatcmp package. It also moves a query result comparison function from the tlp file to query_comparison_util in the tests package. This commit also marks roachtests as testonly targets. Epic: none Release note: None --- pkg/cmd/roachtest/BUILD.bazel | 3 + pkg/cmd/roachtest/tests/BUILD.bazel | 3 + .../roachtest/tests/query_comparison_util.go | 19 ++++ pkg/cmd/roachtest/tests/tlp.go | 16 ---- pkg/sql/logictest/logic.go | 92 +------------------ pkg/sql/logictest/main_test.go | 39 -------- pkg/testutils/floatcmp/BUILD.bazel | 2 + pkg/testutils/floatcmp/floatcmp.go | 91 +++++++++++++++++- pkg/testutils/floatcmp/floatcmp_test.go | 40 ++++++++ 9 files changed, 159 insertions(+), 146 deletions(-) diff --git a/pkg/cmd/roachtest/BUILD.bazel b/pkg/cmd/roachtest/BUILD.bazel index 6b41ce75c459..314b431b3a2f 100644 --- a/pkg/cmd/roachtest/BUILD.bazel +++ b/pkg/cmd/roachtest/BUILD.bazel @@ -2,6 +2,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") go_library( name = "roachtest_lib", + testonly = 1, srcs = [ "cluster.go", "github.go", @@ -62,6 +63,7 @@ go_library( go_binary( name = "roachtest", + testonly = 1, embed = [":roachtest_lib"], visibility = ["//visibility:public"], ) @@ -69,6 +71,7 @@ go_binary( go_test( name = "roachtest_test", size = "small", + testonly = 1, srcs = [ "cluster_test.go", "github_test.go", diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index b09eb6c0e559..2e49242ebcd9 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "tests", + testonly = 1, srcs = [ "acceptance.go", "activerecord.go", @@ -224,6 +225,7 @@ go_library( "//pkg/storage", "//pkg/storage/enginepb", "//pkg/testutils", + "//pkg/testutils/floatcmp", "//pkg/testutils/jobutils", "//pkg/testutils/release", "//pkg/testutils/sqlutils", @@ -298,6 +300,7 @@ go_test( "//pkg/roachprod/logger", "//pkg/roachprod/prometheus", "//pkg/testutils/skip", + "//pkg/util/leaktest", "//pkg/util/version", "@com_github_golang_mock//gomock", "@com_github_google_go_github//github", diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go index da83b13fbd49..aace13f85b25 100644 --- a/pkg/cmd/roachtest/tests/query_comparison_util.go +++ b/pkg/cmd/roachtest/tests/query_comparison_util.go @@ -18,6 +18,8 @@ import ( "math/rand" "os" "path/filepath" + "runtime" + "sort" "strings" "time" @@ -28,10 +30,12 @@ import ( "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" "github.com/cockroachdb/cockroach/pkg/internal/workloadreplay" "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/testutils/floatcmp" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" + "github.com/google/go-cmp/cmp" ) const ( @@ -504,3 +508,18 @@ func (h *queryComparisonHelper) logVerboseOutput() { func (h *queryComparisonHelper) makeError(err error, msg string) error { return errors.Wrapf(err, "%s. %d statements run", msg, h.stmtNo) } + +// unsortedMatricesDiff sorts and compares rows of data. +func unsortedMatricesDiff(rowMatrix1, rowMatrix2 [][]string) string { + var rows1 []string + for _, row := range rowMatrix1 { + rows1 = append(rows1, strings.Join(row[:], ",")) + } + var rows2 []string + for _, row := range rowMatrix2 { + rows2 = append(rows2, strings.Join(row[:], ",")) + } + sort.Strings(rows1) + sort.Strings(rows2) + return cmp.Diff(rows1, rows2) +} diff --git a/pkg/cmd/roachtest/tests/tlp.go b/pkg/cmd/roachtest/tests/tlp.go index 2ee359cd1d23..45ee0a81019c 100644 --- a/pkg/cmd/roachtest/tests/tlp.go +++ b/pkg/cmd/roachtest/tests/tlp.go @@ -16,7 +16,6 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "time" @@ -29,7 +28,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/errors" - "github.com/google/go-cmp/cmp" ) const statementTimeout = time.Minute @@ -271,20 +269,6 @@ func runTLPQuery(conn *gosql.DB, smither *sqlsmith.Smither, logStmt func(string) }) } -func unsortedMatricesDiff(rowMatrix1, rowMatrix2 [][]string) string { - var rows1 []string - for _, row := range rowMatrix1 { - rows1 = append(rows1, strings.Join(row[:], ",")) - } - var rows2 []string - for _, row := range rowMatrix2 { - rows2 = append(rows2, strings.Join(row[:], ",")) - } - sort.Strings(rows1) - sort.Strings(rows2) - return cmp.Diff(rows1, rows2) -} - func runWithTimeout(f func() error) error { done := make(chan error, 1) go func() { diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index c11faafbfb01..9d5e0c6eed69 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -18,7 +18,6 @@ import ( gosql "database/sql" "flag" "fmt" - "math" "math/rand" "net" "net/url" @@ -3676,9 +3675,9 @@ func (t *logicTest) finishExecQuery(query logicQuery, rows *gosql.Rows, err erro // ('R') coltypes are approximately equal to take into account // platform differences in floating point calculations. if runtime.GOARCH == "s390x" && (colT == 'F' || colT == 'R') { - resultMatches, err = floatsMatchApprox(expected, actual) + resultMatches, err = floatcmp.FloatsMatchApprox(expected, actual) } else if colT == 'F' { - resultMatches, err = floatsMatch(expected, actual) + resultMatches, err = floatcmp.FloatsMatch(expected, actual) } if err != nil { return errors.CombineErrors(makeError(), err) @@ -3746,93 +3745,6 @@ func (t *logicTest) finishExecQuery(query logicQuery, rows *gosql.Rows, err erro return nil } -// parseExpectedAndActualFloats converts the strings expectedString and -// actualString to float64 values. -func parseExpectedAndActualFloats(expectedString, actualString string) (float64, float64, error) { - expected, err := strconv.ParseFloat(expectedString, 64 /* bitSize */) - if err != nil { - return 0, 0, errors.Wrap(err, "when parsing expected") - } - actual, err := strconv.ParseFloat(actualString, 64 /* bitSize */) - if err != nil { - return 0, 0, errors.Wrap(err, "when parsing actual") - } - return expected, actual, nil -} - -// floatsMatchApprox returns whether two floating point represented as -// strings are equal within a tolerance. -func floatsMatchApprox(expectedString, actualString string) (bool, error) { - expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) - if err != nil { - return false, err - } - return floatcmp.EqualApprox(expected, actual, floatcmp.CloseFraction, floatcmp.CloseMargin), nil -} - -// floatsMatch returns whether two floating point numbers represented as -// strings have matching 15 significant decimal digits (this is the precision -// that Postgres supports for 'double precision' type). -func floatsMatch(expectedString, actualString string) (bool, error) { - expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) - if err != nil { - return false, err - } - // Check special values - NaN, +Inf, -Inf, 0. - if math.IsNaN(expected) || math.IsNaN(actual) { - return math.IsNaN(expected) == math.IsNaN(actual), nil - } - if math.IsInf(expected, 0 /* sign */) || math.IsInf(actual, 0 /* sign */) { - bothNegativeInf := math.IsInf(expected, -1 /* sign */) == math.IsInf(actual, -1 /* sign */) - bothPositiveInf := math.IsInf(expected, 1 /* sign */) == math.IsInf(actual, 1 /* sign */) - return bothNegativeInf || bothPositiveInf, nil - } - if expected == 0 || actual == 0 { - return expected == actual, nil - } - // Check that the numbers have the same sign. - if expected*actual < 0 { - return false, nil - } - expected = math.Abs(expected) - actual = math.Abs(actual) - // Check that 15 significant digits match. We do so by normalizing the - // numbers and then checking one digit at a time. - // - // normalize converts f to base * 10**power representation where base is in - // [1.0, 10.0) range. - normalize := func(f float64) (base float64, power int) { - for f >= 10 { - f = f / 10 - power++ - } - for f < 1 { - f *= 10 - power-- - } - return f, power - } - var expPower, actPower int - expected, expPower = normalize(expected) - actual, actPower = normalize(actual) - if expPower != actPower { - return false, nil - } - // TODO(yuzefovich): investigate why we can't always guarantee deterministic - // 15 significant digits and switch back from 14 to 15 digits comparison - // here. See #56446 for more details. - for i := 0; i < 14; i++ { - expDigit := int(expected) - actDigit := int(actual) - if expDigit != actDigit { - return false, nil - } - expected -= (expected - float64(expDigit)) * 10 - actual -= (actual - float64(actDigit)) * 10 - } - return true, nil -} - func (t *logicTest) formatValues(vals []string, valsPerLine int) []string { var buf bytes.Buffer tw := tabwriter.NewWriter(&buf, 2, 1, 2, ' ', 0) diff --git a/pkg/sql/logictest/main_test.go b/pkg/sql/logictest/main_test.go index 08b4893b5f76..ab3fbecaa74d 100644 --- a/pkg/sql/logictest/main_test.go +++ b/pkg/sql/logictest/main_test.go @@ -20,7 +20,6 @@ import ( _ "github.com/cockroachdb/cockroach/pkg/sql/tests" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" - "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/randutil" ) @@ -33,41 +32,3 @@ func TestMain(m *testing.M) { serverutils.InitTestClusterFactory(testcluster.TestClusterFactory) os.Exit(m.Run()) } - -// TestFloatsMatch is a unit test for floatsMatch() and floatsMatchApprox() -// functions. -func TestFloatsMatch(t *testing.T) { - defer leaktest.AfterTest(t)() - for _, tc := range []struct { - f1, f2 string - match bool - }{ - {f1: "NaN", f2: "+Inf", match: false}, - {f1: "+Inf", f2: "+Inf", match: true}, - {f1: "NaN", f2: "NaN", match: true}, - {f1: "+Inf", f2: "-Inf", match: false}, - {f1: "-0.0", f2: "0.0", match: true}, - {f1: "0.0", f2: "NaN", match: false}, - {f1: "123.45", f2: "12.345", match: false}, - {f1: "0.1234567890123456", f2: "0.1234567890123455", match: true}, - {f1: "0.1234567890123456", f2: "0.1234567890123457", match: true}, - {f1: "-0.1234567890123456", f2: "0.1234567890123456", match: false}, - {f1: "-0.1234567890123456", f2: "-0.1234567890123455", match: true}, - } { - match, err := floatsMatch(tc.f1, tc.f2) - if err != nil { - t.Fatal(err) - } - if match != tc.match { - t.Fatalf("floatsMatch: wrong result on %v", tc) - } - - match, err = floatsMatchApprox(tc.f1, tc.f2) - if err != nil { - t.Fatal(err) - } - if match != tc.match { - t.Fatalf("floatsMatchApprox: wrong result on %v", tc) - } - } -} diff --git a/pkg/testutils/floatcmp/BUILD.bazel b/pkg/testutils/floatcmp/BUILD.bazel index 7b54e125c2da..62d098e08804 100644 --- a/pkg/testutils/floatcmp/BUILD.bazel +++ b/pkg/testutils/floatcmp/BUILD.bazel @@ -7,6 +7,7 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/testutils/floatcmp", visibility = ["//visibility:public"], deps = [ + "@com_github_cockroachdb_errors//:errors", "@com_github_google_go_cmp//cmp", "@com_github_google_go_cmp//cmp/cmpopts", ], @@ -18,4 +19,5 @@ go_test( srcs = ["floatcmp_test.go"], args = ["-test.timeout=55s"], embed = [":floatcmp"], + deps = ["//pkg/util/leaktest"], ) diff --git a/pkg/testutils/floatcmp/floatcmp.go b/pkg/testutils/floatcmp/floatcmp.go index 9bfe97199304..8c7f0d9f298e 100644 --- a/pkg/testutils/floatcmp/floatcmp.go +++ b/pkg/testutils/floatcmp/floatcmp.go @@ -14,10 +14,12 @@ package floatcmp import ( "fmt" + "math" "regexp" "strconv" "strings" + "github.com/cockroachdb/errors" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" ) @@ -40,7 +42,7 @@ const ( // // CloseMargin is greater than 0 otherwise if either expected or actual were // 0 the calculated tolerance from the fraction would be 0. - CloseMargin float64 = CloseFraction * CloseFraction + CloseMargin = CloseFraction * CloseFraction ) // EqualApprox reports whether expected and actual are deeply equal with the @@ -81,6 +83,79 @@ func EqualApprox(expected interface{}, actual interface{}, fraction float64, mar return cmp.Equal(expected, actual, cmpopts.EquateApprox(fraction, margin), cmpopts.EquateNaNs()) } +// FloatsMatchApprox returns whether two floating point represented as +// strings are equal within a tolerance. +func FloatsMatchApprox(expectedString, actualString string) (bool, error) { + expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) + if err != nil { + return false, err + } + return EqualApprox(expected, actual, CloseFraction, CloseMargin), nil +} + +// FloatsMatch returns whether two floating point numbers represented as +// strings have matching 15 significant decimal digits (this is the precision +// that Postgres supports for 'double precision' type). +func FloatsMatch(expectedString, actualString string) (bool, error) { + expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) + if err != nil { + return false, err + } + // Check special values - NaN, +Inf, -Inf, 0. + if math.IsNaN(expected) || math.IsNaN(actual) { + return math.IsNaN(expected) == math.IsNaN(actual), nil + } + if math.IsInf(expected, 0 /* sign */) || math.IsInf(actual, 0 /* sign */) { + bothNegativeInf := math.IsInf(expected, -1 /* sign */) == math.IsInf(actual, -1 /* sign */) + bothPositiveInf := math.IsInf(expected, 1 /* sign */) == math.IsInf(actual, 1 /* sign */) + return bothNegativeInf || bothPositiveInf, nil + } + if expected == 0 || actual == 0 { + return expected == actual, nil + } + // Check that the numbers have the same sign. + if expected*actual < 0 { + return false, nil + } + expected = math.Abs(expected) + actual = math.Abs(actual) + // Check that 15 significant digits match. We do so by normalizing the + // numbers and then checking one digit at a time. + // + // normalize converts f to base * 10**power representation where base is in + // [1.0, 10.0) range. + normalize := func(f float64) (base float64, power int) { + for f >= 10 { + f = f / 10 + power++ + } + for f < 1 { + f *= 10 + power-- + } + return f, power + } + var expPower, actPower int + expected, expPower = normalize(expected) + actual, actPower = normalize(actual) + if expPower != actPower { + return false, nil + } + // TODO(yuzefovich): investigate why we can't always guarantee deterministic + // 15 significant digits and switch back from 14 to 15 digits comparison + // here. See #56446 for more details. + for i := 0; i < 14; i++ { + expDigit := int(expected) + actDigit := int(actual) + if expDigit != actDigit { + return false, nil + } + expected -= (expected - float64(expDigit)) * 10 + actual -= (actual - float64(actDigit)) * 10 + } + return true, nil +} + // RoundFloatsInString rounds floats in a given string to the given number of significant figures. func RoundFloatsInString(s string, significantFigures int) string { return string(regexp.MustCompile(`(\d+\.\d+)`).ReplaceAllFunc([]byte(s), func(x []byte) []byte { @@ -103,3 +178,17 @@ func ParseRoundInStringsDirective(directive string) (int, error) { } return strconv.Atoi(kv[1]) } + +// parseExpectedAndActualFloats converts the strings expectedString and +// actualString to float64 values. +func parseExpectedAndActualFloats(expectedString, actualString string) (float64, float64, error) { + expected, err := strconv.ParseFloat(expectedString, 64 /* bitSize */) + if err != nil { + return 0, 0, errors.Wrap(err, "when parsing expected") + } + actual, err := strconv.ParseFloat(actualString, 64 /* bitSize */) + if err != nil { + return 0, 0, errors.Wrap(err, "when parsing actual") + } + return expected, actual, nil +} diff --git a/pkg/testutils/floatcmp/floatcmp_test.go b/pkg/testutils/floatcmp/floatcmp_test.go index a7f8d1490e56..a1a23879e643 100644 --- a/pkg/testutils/floatcmp/floatcmp_test.go +++ b/pkg/testutils/floatcmp/floatcmp_test.go @@ -13,6 +13,8 @@ package floatcmp import ( "math" "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" ) // EqualApprox takes an interface, allowing it to compare equality of both @@ -156,3 +158,41 @@ func TestEqualClose(t *testing.T) { }) } } + +// TestFloatsMatch is a unit test for floatsMatch() and floatsMatchApprox() +// functions. +func TestFloatsMatch(t *testing.T) { + defer leaktest.AfterTest(t)() + for _, tc := range []struct { + f1, f2 string + match bool + }{ + {f1: "NaN", f2: "+Inf", match: false}, + {f1: "+Inf", f2: "+Inf", match: true}, + {f1: "NaN", f2: "NaN", match: true}, + {f1: "+Inf", f2: "-Inf", match: false}, + {f1: "-0.0", f2: "0.0", match: true}, + {f1: "0.0", f2: "NaN", match: false}, + {f1: "123.45", f2: "12.345", match: false}, + {f1: "0.1234567890123456", f2: "0.1234567890123455", match: true}, + {f1: "0.1234567890123456", f2: "0.1234567890123457", match: true}, + {f1: "-0.1234567890123456", f2: "0.1234567890123456", match: false}, + {f1: "-0.1234567890123456", f2: "-0.1234567890123455", match: true}, + } { + match, err := FloatsMatch(tc.f1, tc.f2) + if err != nil { + t.Fatal(err) + } + if match != tc.match { + t.Fatalf("floatsMatch: wrong result on %v", tc) + } + + match, err = FloatsMatchApprox(tc.f1, tc.f2) + if err != nil { + t.Fatal(err) + } + if match != tc.match { + t.Fatalf("floatsMatchApprox: wrong result on %v", tc) + } + } +} From c9999ae84e3522ee49bd296277f41d35b560d39e Mon Sep 17 00:00:00 2001 From: rharding6373 Date: Mon, 10 Jul 2023 13:59:17 -0700 Subject: [PATCH 6/7] tests: support float approximation in roachtest query comparison utils Before this change unoptimized query oracle tests would compare results using simple string comparison. However, due to floating point precision limitations, it's possible for results with floating point to diverge during the course of normal computation. This results in test failures that are difficult to reproduce or determine whether they are expected behavior. This change utilizes existing floating point comparison functions used by logic tests to match float values only to a specific precision. Like the logic tests, we also have special handling for floats and decimals under the s390x architecture (see #63244). In order to avoid costly comparisons, we only check floating point precision if the naiive string comparison approach fails and there are float or decimal types in the result. Epic: None Fixes: #95665 Release note: None --- pkg/cmd/roachtest/tests/BUILD.bazel | 1 + pkg/cmd/roachtest/tests/costfuzz.go | 6 +- .../roachtest/tests/query_comparison_util.go | 98 ++++++++++++ .../tests/query_comparison_util_test.go | 140 ++++++++++++++++++ .../tests/unoptimized_query_oracle.go | 6 +- pkg/testutils/floatcmp/floatcmp.go | 18 +-- 6 files changed, 251 insertions(+), 18 deletions(-) create mode 100644 pkg/cmd/roachtest/tests/query_comparison_util_test.go diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index 2e49242ebcd9..58af5cf691c6 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -288,6 +288,7 @@ go_test( srcs = [ "blocklist_test.go", "drt_test.go", + "query_comparison_util_test.go", "tpcc_test.go", "util_load_group_test.go", ":mocks_drt", # keep diff --git a/pkg/cmd/roachtest/tests/costfuzz.go b/pkg/cmd/roachtest/tests/costfuzz.go index 7d0bbde72a9d..c5644c3b8871 100644 --- a/pkg/cmd/roachtest/tests/costfuzz.go +++ b/pkg/cmd/roachtest/tests/costfuzz.go @@ -114,7 +114,11 @@ func runCostFuzzQuery(qgen queryGenerator, rnd *rand.Rand, h queryComparisonHelp return nil } - if diff := unsortedMatricesDiff(controlRows, perturbRows); diff != "" { + diff, err := unsortedMatricesDiffWithFloatComp(controlRows, perturbRows, h.colTypes) + if err != nil { + return err + } + if diff != "" { // We have a mismatch in the perturbed vs control query outputs. h.logStatements() h.logVerboseOutput() diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go index aace13f85b25..a7b6c4671b11 100644 --- a/pkg/cmd/roachtest/tests/query_comparison_util.go +++ b/pkg/cmd/roachtest/tests/query_comparison_util.go @@ -428,6 +428,7 @@ type queryComparisonHelper struct { statements []string statementsAndExplains []sqlAndOutput + colTypes []string } // runQuery runs the given query and returns the output. If the stmt doesn't @@ -452,6 +453,14 @@ func (h *queryComparisonHelper) runQuery(stmt string) ([][]string, error) { return nil, err } defer rows.Close() + cts, err := rows.ColumnTypes() + if err != nil { + return nil, err + } + h.colTypes = make([]string, len(cts)) + for i, ct := range cts { + h.colTypes[i] = ct.DatabaseTypeName() + } return sqlutils.RowsToStrMatrix(rows) } @@ -509,6 +518,95 @@ func (h *queryComparisonHelper) makeError(err error, msg string) error { return errors.Wrapf(err, "%s. %d statements run", msg, h.stmtNo) } +func joinAndSortRows(rowMatrix1, rowMatrix2 [][]string, sep string) (rows1, rows2 []string) { + for _, row := range rowMatrix1 { + rows1 = append(rows1, strings.Join(row[:], sep)) + } + for _, row := range rowMatrix2 { + rows2 = append(rows2, strings.Join(row[:], sep)) + } + sort.Strings(rows1) + sort.Strings(rows2) + return rows1, rows2 +} + +// unsortedMatricesDiffWithFloatComp sorts and compares the rows in rowMatrix1 +// to rowMatrix2 and outputs a diff or message related to the comparison. If a +// string comparison of the rows fails, and they contain floats or decimals, it +// performs an approximate comparison of the values. +func unsortedMatricesDiffWithFloatComp( + rowMatrix1, rowMatrix2 [][]string, colTypes []string, +) (string, error) { + rows1, rows2 := joinAndSortRows(rowMatrix1, rowMatrix2, ",") + result := cmp.Diff(rows1, rows2) + if result == "" { + return result, nil + } + if len(rows1) != len(rows2) || len(colTypes) != len(rowMatrix1[0]) || len(colTypes) != len(rowMatrix2[0]) { + return result, nil + } + var needApproxMatch bool + for i := range colTypes { + // On s390x, check that values for both float and decimal coltypes are + // approximately equal to take into account platform differences in floating + // point calculations. On other architectures, check float values only. + if (runtime.GOARCH == "s390x" && colTypes[i] == "DECIMAL") || + colTypes[i] == "FLOAT4" || colTypes[i] == "FLOAT8" { + needApproxMatch = true + break + } + } + if !needApproxMatch { + return result, nil + } + // Use an unlikely string as a separator so that we can make a comparison + // using sorted rows. We don't use the rows sorted above because splitting + // the rows could be ambiguous. + sep := ",unsortedMatricesDiffWithFloatComp separator," + rows1, rows2 = joinAndSortRows(rowMatrix1, rowMatrix2, sep) + for i := range rows1 { + // Split the sorted rows. + row1 := strings.Split(rows1[i], sep) + row2 := strings.Split(rows2[i], sep) + + for j := range row1 { + if runtime.GOARCH == "s390x" && colTypes[j] == "DECIMAL" { + // On s390x, check that values for both float and decimal coltypes are + // approximately equal to take into account platform differences in floating + // point calculations. On other architectures, check float values only. + match, err := floatcmp.FloatsMatchApprox(row1[j], row2[j]) + if err != nil { + return "", err + } + if !match { + return result, nil + } + } else if colTypes[j] == "FLOAT4" || colTypes[j] == "FLOAT8" { + // Check that float values are approximately equal. + var err error + var match bool + if runtime.GOARCH == "s390x" { + match, err = floatcmp.FloatsMatchApprox(row1[j], row2[j]) + } else { + match, err = floatcmp.FloatsMatch(row1[j], row2[j]) + } + if err != nil { + return "", err + } + if !match { + return result, nil + } + } else { + // Check that other columns are equal with a string comparison. + if row1[j] != row2[j] { + return result, nil + } + } + } + } + return "", nil +} + // unsortedMatricesDiff sorts and compares rows of data. func unsortedMatricesDiff(rowMatrix1, rowMatrix2 [][]string) string { var rows1 []string diff --git a/pkg/cmd/roachtest/tests/query_comparison_util_test.go b/pkg/cmd/roachtest/tests/query_comparison_util_test.go new file mode 100644 index 000000000000..ac1d5452eefe --- /dev/null +++ b/pkg/cmd/roachtest/tests/query_comparison_util_test.go @@ -0,0 +1,140 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" +) + +// TestUnsortedMatricesDiff is a unit test for the +// unsortedMatricesDiffWithFloatComp() and unsortedMatricesDiff() utility +// functions. +func TestUnsortedMatricesDiff(t *testing.T) { + defer leaktest.AfterTest(t)() + tcs := []struct { + name string + colTypes []string + t1, t2 [][]string + exactMatch bool + approxMatch bool + }{ + { + name: "float exact match", + colTypes: []string{"FLOAT8"}, + t1: [][]string{{"1.2345678901234567"}}, + t2: [][]string{{"1.2345678901234567"}}, + exactMatch: true, + }, + { + name: "float approx match", + colTypes: []string{"FLOAT8"}, + t1: [][]string{{"1.2345678901234563"}}, + t2: [][]string{{"1.2345678901234564"}}, + exactMatch: false, + approxMatch: true, + }, + { + name: "float no match", + colTypes: []string{"FLOAT8"}, + t1: [][]string{{"1.234567890123"}}, + t2: [][]string{{"1.234567890124"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "multi float approx match", + colTypes: []string{"FLOAT8", "FLOAT8"}, + t1: [][]string{{"1.2345678901234567", "1.2345678901234567"}}, + t2: [][]string{{"1.2345678901234567", "1.2345678901234568"}}, + exactMatch: false, + approxMatch: true, + }, + { + name: "string no match", + colTypes: []string{"STRING"}, + t1: [][]string{{"hello"}}, + t2: [][]string{{"world"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "mixed types match", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}}, + t2: [][]string{{"hello", "1.2345678901234567"}}, + exactMatch: true, + }, + { + name: "mixed types float approx match", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.23456789012345678"}}, + t2: [][]string{{"hello", "1.23456789012345679"}}, + exactMatch: false, + approxMatch: true, + }, + { + name: "mixed types no match", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}}, + t2: [][]string{{"world", "1.2345678901234567"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "different col count", + colTypes: []string{"STRING"}, + t1: [][]string{{"hello", "1.2345678901234567"}}, + t2: [][]string{{"world", "1.2345678901234567"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "different row count", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}, {"aloha", "2.345"}}, + t2: [][]string{{"world", "1.2345678901234567"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "multi row unsorted", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}, {"world", "1.2345678901234560"}}, + t2: [][]string{{"world", "1.2345678901234560"}, {"hello", "1.2345678901234567"}}, + exactMatch: true, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + match := unsortedMatricesDiff(tc.t1, tc.t2) + if tc.exactMatch && match != "" { + t.Fatalf("unsortedMatricesDiff: expected exact match, got diff: %s", match) + } else if !tc.exactMatch && match == "" { + t.Fatalf("unsortedMatricesDiff: expected no exact match, got no diff") + } + + var err error + match, err = unsortedMatricesDiffWithFloatComp(tc.t1, tc.t2, tc.colTypes) + if err != nil { + t.Fatal(err) + } + if tc.exactMatch && match != "" { + t.Fatalf("unsortedMatricesDiffWithFloatComp: expected exact match, got diff: %s", match) + } else if !tc.exactMatch && tc.approxMatch && match != "" { + t.Fatalf("unsortedMatricesDiffWithFloatComp: expected approx match, got diff: %s", match) + } else if !tc.exactMatch && !tc.approxMatch && match == "" { + t.Fatalf("unsortedMatricesDiffWithFloatComp: expected no approx match, got no diff") + } + }) + } +} diff --git a/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go b/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go index f19f621b3ebe..7bea9cabb011 100644 --- a/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go +++ b/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go @@ -171,7 +171,11 @@ func runUnoptimizedQueryOracleImpl( //nolint:returnerrcheck return nil } - if diff := unsortedMatricesDiff(unoptimizedRows, optimizedRows); diff != "" { + diff, err := unsortedMatricesDiffWithFloatComp(unoptimizedRows, optimizedRows, h.colTypes) + if err != nil { + return err + } + if diff != "" { // We have a mismatch in the unoptimized vs optimized query outputs. verboseLogging = true return h.makeError(errors.Newf( diff --git a/pkg/testutils/floatcmp/floatcmp.go b/pkg/testutils/floatcmp/floatcmp.go index 8c7f0d9f298e..6b87db4820fb 100644 --- a/pkg/testutils/floatcmp/floatcmp.go +++ b/pkg/testutils/floatcmp/floatcmp.go @@ -121,23 +121,9 @@ func FloatsMatch(expectedString, actualString string) (bool, error) { actual = math.Abs(actual) // Check that 15 significant digits match. We do so by normalizing the // numbers and then checking one digit at a time. - // - // normalize converts f to base * 10**power representation where base is in - // [1.0, 10.0) range. - normalize := func(f float64) (base float64, power int) { - for f >= 10 { - f = f / 10 - power++ - } - for f < 1 { - f *= 10 - power-- - } - return f, power - } var expPower, actPower int - expected, expPower = normalize(expected) - actual, actPower = normalize(actual) + expected, expPower = math.Frexp(expected) + actual, actPower = math.Frexp(actual) if expPower != actPower { return false, nil } From 13f1ab0a4a7af32605681a2dbb75bf199afe8ff2 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 24 Jul 2023 11:45:30 -0400 Subject: [PATCH 7/7] Revert "github-pull-request-make: temporary workaround" This reverts commit 2bd61c09de2c600291d7e30f77692b66a91289d4. Informs #106920. Release note: None --- pkg/cmd/github-pull-request-make/main.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pkg/cmd/github-pull-request-make/main.go b/pkg/cmd/github-pull-request-make/main.go index 67ba34a77be2..ce543678a304 100644 --- a/pkg/cmd/github-pull-request-make/main.go +++ b/pkg/cmd/github-pull-request-make/main.go @@ -209,13 +209,6 @@ func main() { if len(pkgs) > 0 { for name, pkg := range pkgs { - // TODO(postamar): remove this temporary workaround - // This hack was added to get #106743 over the finish line. - if strings.HasPrefix(name, "pkg/ccl/schemachangerccl") || - strings.HasPrefix(name, "pkg/sql/schemachanger") { - continue - } - // 20 minutes total seems OK, but at least 2 minutes per test. // This should be reduced. See #46941. duration := (20 * time.Minute) / time.Duration(len(pkgs))