diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 02777aef3ab2..7e8f78f7aeb9 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -655,6 +655,7 @@ ALL_TESTS = [ "//pkg/util/log/eventpb:eventpb_test", "//pkg/util/log/logconfig:logconfig_test", "//pkg/util/log/logcrash:logcrash_test", + "//pkg/util/log/logmetrics:logmetrics_test", "//pkg/util/log/testshout:testshout_test", "//pkg/util/log:log_test", "//pkg/util/metric/aggmetric:aggmetric_test", @@ -2320,6 +2321,8 @@ GO_TARGETS = [ "//pkg/util/log/logcrash:logcrash", "//pkg/util/log/logcrash:logcrash_test", "//pkg/util/log/logflags:logflags", + "//pkg/util/log/logmetrics:logmetrics", + "//pkg/util/log/logmetrics:logmetrics_test", "//pkg/util/log/logpb:logpb", "//pkg/util/log/logtestutils:logtestutils", "//pkg/util/log/logutil:logutil", diff --git a/pkg/cmd/github-pull-request-make/main.go b/pkg/cmd/github-pull-request-make/main.go index 67ba34a77be2..ce543678a304 100644 --- a/pkg/cmd/github-pull-request-make/main.go +++ b/pkg/cmd/github-pull-request-make/main.go @@ -209,13 +209,6 @@ func main() { if len(pkgs) > 0 { for name, pkg := range pkgs { - // TODO(postamar): remove this temporary workaround - // This hack was added to get #106743 over the finish line. - if strings.HasPrefix(name, "pkg/ccl/schemachangerccl") || - strings.HasPrefix(name, "pkg/sql/schemachanger") { - continue - } - // 20 minutes total seems OK, but at least 2 minutes per test. // This should be reduced. See #46941. duration := (20 * time.Minute) / time.Duration(len(pkgs)) diff --git a/pkg/cmd/roachtest/BUILD.bazel b/pkg/cmd/roachtest/BUILD.bazel index 6b41ce75c459..314b431b3a2f 100644 --- a/pkg/cmd/roachtest/BUILD.bazel +++ b/pkg/cmd/roachtest/BUILD.bazel @@ -2,6 +2,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") go_library( name = "roachtest_lib", + testonly = 1, srcs = [ "cluster.go", "github.go", @@ -62,6 +63,7 @@ go_library( go_binary( name = "roachtest", + testonly = 1, embed = [":roachtest_lib"], visibility = ["//visibility:public"], ) @@ -69,6 +71,7 @@ go_binary( go_test( name = "roachtest_test", size = "small", + testonly = 1, srcs = [ "cluster_test.go", "github_test.go", diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index b09eb6c0e559..58af5cf691c6 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "tests", + testonly = 1, srcs = [ "acceptance.go", "activerecord.go", @@ -224,6 +225,7 @@ go_library( "//pkg/storage", "//pkg/storage/enginepb", "//pkg/testutils", + "//pkg/testutils/floatcmp", "//pkg/testutils/jobutils", "//pkg/testutils/release", "//pkg/testutils/sqlutils", @@ -286,6 +288,7 @@ go_test( srcs = [ "blocklist_test.go", "drt_test.go", + "query_comparison_util_test.go", "tpcc_test.go", "util_load_group_test.go", ":mocks_drt", # keep @@ -298,6 +301,7 @@ go_test( "//pkg/roachprod/logger", "//pkg/roachprod/prometheus", "//pkg/testutils/skip", + "//pkg/util/leaktest", "//pkg/util/version", "@com_github_golang_mock//gomock", "@com_github_google_go_github//github", diff --git a/pkg/cmd/roachtest/tests/costfuzz.go b/pkg/cmd/roachtest/tests/costfuzz.go index 7d0bbde72a9d..c5644c3b8871 100644 --- a/pkg/cmd/roachtest/tests/costfuzz.go +++ b/pkg/cmd/roachtest/tests/costfuzz.go @@ -114,7 +114,11 @@ func runCostFuzzQuery(qgen queryGenerator, rnd *rand.Rand, h queryComparisonHelp return nil } - if diff := unsortedMatricesDiff(controlRows, perturbRows); diff != "" { + diff, err := unsortedMatricesDiffWithFloatComp(controlRows, perturbRows, h.colTypes) + if err != nil { + return err + } + if diff != "" { // We have a mismatch in the perturbed vs control query outputs. h.logStatements() h.logVerboseOutput() diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go index da83b13fbd49..a7b6c4671b11 100644 --- a/pkg/cmd/roachtest/tests/query_comparison_util.go +++ b/pkg/cmd/roachtest/tests/query_comparison_util.go @@ -18,6 +18,8 @@ import ( "math/rand" "os" "path/filepath" + "runtime" + "sort" "strings" "time" @@ -28,10 +30,12 @@ import ( "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" "github.com/cockroachdb/cockroach/pkg/internal/workloadreplay" "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/testutils/floatcmp" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" + "github.com/google/go-cmp/cmp" ) const ( @@ -424,6 +428,7 @@ type queryComparisonHelper struct { statements []string statementsAndExplains []sqlAndOutput + colTypes []string } // runQuery runs the given query and returns the output. If the stmt doesn't @@ -448,6 +453,14 @@ func (h *queryComparisonHelper) runQuery(stmt string) ([][]string, error) { return nil, err } defer rows.Close() + cts, err := rows.ColumnTypes() + if err != nil { + return nil, err + } + h.colTypes = make([]string, len(cts)) + for i, ct := range cts { + h.colTypes[i] = ct.DatabaseTypeName() + } return sqlutils.RowsToStrMatrix(rows) } @@ -504,3 +517,107 @@ func (h *queryComparisonHelper) logVerboseOutput() { func (h *queryComparisonHelper) makeError(err error, msg string) error { return errors.Wrapf(err, "%s. %d statements run", msg, h.stmtNo) } + +func joinAndSortRows(rowMatrix1, rowMatrix2 [][]string, sep string) (rows1, rows2 []string) { + for _, row := range rowMatrix1 { + rows1 = append(rows1, strings.Join(row[:], sep)) + } + for _, row := range rowMatrix2 { + rows2 = append(rows2, strings.Join(row[:], sep)) + } + sort.Strings(rows1) + sort.Strings(rows2) + return rows1, rows2 +} + +// unsortedMatricesDiffWithFloatComp sorts and compares the rows in rowMatrix1 +// to rowMatrix2 and outputs a diff or message related to the comparison. If a +// string comparison of the rows fails, and they contain floats or decimals, it +// performs an approximate comparison of the values. +func unsortedMatricesDiffWithFloatComp( + rowMatrix1, rowMatrix2 [][]string, colTypes []string, +) (string, error) { + rows1, rows2 := joinAndSortRows(rowMatrix1, rowMatrix2, ",") + result := cmp.Diff(rows1, rows2) + if result == "" { + return result, nil + } + if len(rows1) != len(rows2) || len(colTypes) != len(rowMatrix1[0]) || len(colTypes) != len(rowMatrix2[0]) { + return result, nil + } + var needApproxMatch bool + for i := range colTypes { + // On s390x, check that values for both float and decimal coltypes are + // approximately equal to take into account platform differences in floating + // point calculations. On other architectures, check float values only. + if (runtime.GOARCH == "s390x" && colTypes[i] == "DECIMAL") || + colTypes[i] == "FLOAT4" || colTypes[i] == "FLOAT8" { + needApproxMatch = true + break + } + } + if !needApproxMatch { + return result, nil + } + // Use an unlikely string as a separator so that we can make a comparison + // using sorted rows. We don't use the rows sorted above because splitting + // the rows could be ambiguous. + sep := ",unsortedMatricesDiffWithFloatComp separator," + rows1, rows2 = joinAndSortRows(rowMatrix1, rowMatrix2, sep) + for i := range rows1 { + // Split the sorted rows. + row1 := strings.Split(rows1[i], sep) + row2 := strings.Split(rows2[i], sep) + + for j := range row1 { + if runtime.GOARCH == "s390x" && colTypes[j] == "DECIMAL" { + // On s390x, check that values for both float and decimal coltypes are + // approximately equal to take into account platform differences in floating + // point calculations. On other architectures, check float values only. + match, err := floatcmp.FloatsMatchApprox(row1[j], row2[j]) + if err != nil { + return "", err + } + if !match { + return result, nil + } + } else if colTypes[j] == "FLOAT4" || colTypes[j] == "FLOAT8" { + // Check that float values are approximately equal. + var err error + var match bool + if runtime.GOARCH == "s390x" { + match, err = floatcmp.FloatsMatchApprox(row1[j], row2[j]) + } else { + match, err = floatcmp.FloatsMatch(row1[j], row2[j]) + } + if err != nil { + return "", err + } + if !match { + return result, nil + } + } else { + // Check that other columns are equal with a string comparison. + if row1[j] != row2[j] { + return result, nil + } + } + } + } + return "", nil +} + +// unsortedMatricesDiff sorts and compares rows of data. +func unsortedMatricesDiff(rowMatrix1, rowMatrix2 [][]string) string { + var rows1 []string + for _, row := range rowMatrix1 { + rows1 = append(rows1, strings.Join(row[:], ",")) + } + var rows2 []string + for _, row := range rowMatrix2 { + rows2 = append(rows2, strings.Join(row[:], ",")) + } + sort.Strings(rows1) + sort.Strings(rows2) + return cmp.Diff(rows1, rows2) +} diff --git a/pkg/cmd/roachtest/tests/query_comparison_util_test.go b/pkg/cmd/roachtest/tests/query_comparison_util_test.go new file mode 100644 index 000000000000..ac1d5452eefe --- /dev/null +++ b/pkg/cmd/roachtest/tests/query_comparison_util_test.go @@ -0,0 +1,140 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" +) + +// TestUnsortedMatricesDiff is a unit test for the +// unsortedMatricesDiffWithFloatComp() and unsortedMatricesDiff() utility +// functions. +func TestUnsortedMatricesDiff(t *testing.T) { + defer leaktest.AfterTest(t)() + tcs := []struct { + name string + colTypes []string + t1, t2 [][]string + exactMatch bool + approxMatch bool + }{ + { + name: "float exact match", + colTypes: []string{"FLOAT8"}, + t1: [][]string{{"1.2345678901234567"}}, + t2: [][]string{{"1.2345678901234567"}}, + exactMatch: true, + }, + { + name: "float approx match", + colTypes: []string{"FLOAT8"}, + t1: [][]string{{"1.2345678901234563"}}, + t2: [][]string{{"1.2345678901234564"}}, + exactMatch: false, + approxMatch: true, + }, + { + name: "float no match", + colTypes: []string{"FLOAT8"}, + t1: [][]string{{"1.234567890123"}}, + t2: [][]string{{"1.234567890124"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "multi float approx match", + colTypes: []string{"FLOAT8", "FLOAT8"}, + t1: [][]string{{"1.2345678901234567", "1.2345678901234567"}}, + t2: [][]string{{"1.2345678901234567", "1.2345678901234568"}}, + exactMatch: false, + approxMatch: true, + }, + { + name: "string no match", + colTypes: []string{"STRING"}, + t1: [][]string{{"hello"}}, + t2: [][]string{{"world"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "mixed types match", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}}, + t2: [][]string{{"hello", "1.2345678901234567"}}, + exactMatch: true, + }, + { + name: "mixed types float approx match", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.23456789012345678"}}, + t2: [][]string{{"hello", "1.23456789012345679"}}, + exactMatch: false, + approxMatch: true, + }, + { + name: "mixed types no match", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}}, + t2: [][]string{{"world", "1.2345678901234567"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "different col count", + colTypes: []string{"STRING"}, + t1: [][]string{{"hello", "1.2345678901234567"}}, + t2: [][]string{{"world", "1.2345678901234567"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "different row count", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}, {"aloha", "2.345"}}, + t2: [][]string{{"world", "1.2345678901234567"}}, + exactMatch: false, + approxMatch: false, + }, + { + name: "multi row unsorted", + colTypes: []string{"STRING", "FLOAT8"}, + t1: [][]string{{"hello", "1.2345678901234567"}, {"world", "1.2345678901234560"}}, + t2: [][]string{{"world", "1.2345678901234560"}, {"hello", "1.2345678901234567"}}, + exactMatch: true, + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + match := unsortedMatricesDiff(tc.t1, tc.t2) + if tc.exactMatch && match != "" { + t.Fatalf("unsortedMatricesDiff: expected exact match, got diff: %s", match) + } else if !tc.exactMatch && match == "" { + t.Fatalf("unsortedMatricesDiff: expected no exact match, got no diff") + } + + var err error + match, err = unsortedMatricesDiffWithFloatComp(tc.t1, tc.t2, tc.colTypes) + if err != nil { + t.Fatal(err) + } + if tc.exactMatch && match != "" { + t.Fatalf("unsortedMatricesDiffWithFloatComp: expected exact match, got diff: %s", match) + } else if !tc.exactMatch && tc.approxMatch && match != "" { + t.Fatalf("unsortedMatricesDiffWithFloatComp: expected approx match, got diff: %s", match) + } else if !tc.exactMatch && !tc.approxMatch && match == "" { + t.Fatalf("unsortedMatricesDiffWithFloatComp: expected no approx match, got no diff") + } + }) + } +} diff --git a/pkg/cmd/roachtest/tests/tlp.go b/pkg/cmd/roachtest/tests/tlp.go index 2ee359cd1d23..45ee0a81019c 100644 --- a/pkg/cmd/roachtest/tests/tlp.go +++ b/pkg/cmd/roachtest/tests/tlp.go @@ -16,7 +16,6 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "time" @@ -29,7 +28,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/errors" - "github.com/google/go-cmp/cmp" ) const statementTimeout = time.Minute @@ -271,20 +269,6 @@ func runTLPQuery(conn *gosql.DB, smither *sqlsmith.Smither, logStmt func(string) }) } -func unsortedMatricesDiff(rowMatrix1, rowMatrix2 [][]string) string { - var rows1 []string - for _, row := range rowMatrix1 { - rows1 = append(rows1, strings.Join(row[:], ",")) - } - var rows2 []string - for _, row := range rowMatrix2 { - rows2 = append(rows2, strings.Join(row[:], ",")) - } - sort.Strings(rows1) - sort.Strings(rows2) - return cmp.Diff(rows1, rows2) -} - func runWithTimeout(f func() error) error { done := make(chan error, 1) go func() { diff --git a/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go b/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go index f19f621b3ebe..7bea9cabb011 100644 --- a/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go +++ b/pkg/cmd/roachtest/tests/unoptimized_query_oracle.go @@ -171,7 +171,11 @@ func runUnoptimizedQueryOracleImpl( //nolint:returnerrcheck return nil } - if diff := unsortedMatricesDiff(unoptimizedRows, optimizedRows); diff != "" { + diff, err := unsortedMatricesDiffWithFloatComp(unoptimizedRows, optimizedRows, h.colTypes) + if err != nil { + return err + } + if diff != "" { // We have a mismatch in the unoptimized vs optimized query outputs. verboseLogging = true return h.makeError(errors.Newf( diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index abe86994394c..fab9ff6a4c36 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -305,6 +305,7 @@ go_library( "//pkg/util/log", "//pkg/util/log/eventpb", "//pkg/util/log/logcrash", + "//pkg/util/log/logmetrics", "//pkg/util/log/logpb", "//pkg/util/log/severity", "//pkg/util/metric", diff --git a/pkg/server/server.go b/pkg/server/server.go index 0fad5d3f57b3..0edb40d8b7c3 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -110,6 +110,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/goschedstats" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/netutil" @@ -1842,9 +1843,16 @@ func (s *Server) PreStart(ctx context.Context) error { }) }) + // Init a log metrics registry. + logRegistry := logmetrics.NewRegistry() + if logRegistry == nil { + panic(errors.AssertionFailedf("nil log metrics registry at server startup")) + } + // We can now add the node registry. s.recorder.AddNode( s.registry, + logRegistry, s.node.Descriptor, s.node.startedAt, s.cfg.AdvertiseAddr, diff --git a/pkg/server/status/BUILD.bazel b/pkg/server/status/BUILD.bazel index 63cec5244305..b5fbb8d85187 100644 --- a/pkg/server/status/BUILD.bazel +++ b/pkg/server/status/BUILD.bazel @@ -65,6 +65,7 @@ go_library( "//pkg/util/humanizeutil", "//pkg/util/log", "//pkg/util/log/eventpb", + "//pkg/util/log/logmetrics", "//pkg/util/metric", "//pkg/util/syncutil", "//pkg/util/system", diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index a6277336c801..2cda3c054a98 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -45,6 +45,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/log" + // Import the logmetrics package to trigger its own init function, which inits and injects + // metrics functionality into pkg/util/log. + _ "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/system" @@ -130,8 +133,12 @@ type MetricsRecorder struct { // nodeRegistry contains, as subregistries, the multiple component-specific // registries which are recorded as "node level" metrics. nodeRegistry *metric.Registry - desc roachpb.NodeDescriptor - startedAt int64 + // logRegistry contains the global metrics registry used by the logging + // package. NB: The underlying metrics are global, but each server gets + // its own separate registry to avoid things such as colliding labels. + logRegistry *metric.Registry + desc roachpb.NodeDescriptor + startedAt int64 // storeRegistries contains a registry for each store on the node. These // are not stored as subregistries, but rather are treated as wholly @@ -190,6 +197,7 @@ func (mr *MetricsRecorder) AddTenantRegistry(tenantID roachpb.TenantID, rec *met // tenant is initialized. mr.mu.Do(func() { mr.mu.nodeRegistry.AddLabel("tenant", catconstants.SystemTenantName) + mr.mu.logRegistry.AddLabel("tenant", catconstants.SystemTenantName) }) } mr.mu.tenantRegistries[tenantID] = rec @@ -204,16 +212,17 @@ func (mr *MetricsRecorder) RemoveTenantRegistry(tenantID roachpb.TenantID) { } // AddNode adds the Registry from an initialized node, along with its descriptor -// and start time. +// and start time. It also adds the logging registry. func (mr *MetricsRecorder) AddNode( - reg *metric.Registry, + nodeReg, logReg *metric.Registry, desc roachpb.NodeDescriptor, startedAt int64, advertiseAddr, httpAddr, sqlAddr string, ) { mr.mu.Lock() defer mr.mu.Unlock() - mr.mu.nodeRegistry = reg + mr.mu.nodeRegistry = nodeReg + mr.mu.logRegistry = logReg mr.mu.desc = desc mr.mu.startedAt = startedAt @@ -230,20 +239,20 @@ func (mr *MetricsRecorder) AddNode( metadata.AddLabel(sqlAddrLabelKey, sqlAddr) nodeIDGauge := metric.NewGauge(metadata) nodeIDGauge.Update(int64(desc.NodeID)) - reg.AddMetric(nodeIDGauge) + nodeReg.AddMetric(nodeIDGauge) if !disableNodeAndTenantLabels { nodeIDInt := int(desc.NodeID) if nodeIDInt != 0 { - reg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) - // We assume that all stores have been added to the registry - // prior to calling `AddNode`. + nodeReg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) + logReg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) for _, s := range mr.mu.storeRegistries { s.AddLabel("node_id", strconv.Itoa(int(desc.NodeID))) } } if mr.tenantNameContainer != nil && mr.tenantNameContainer.String() != catconstants.SystemTenantName { - reg.AddLabel("tenant", mr.tenantNameContainer) + nodeReg.AddLabel("tenant", mr.tenantNameContainer) + logReg.AddLabel("tenant", mr.tenantNameContainer) } } } @@ -276,7 +285,8 @@ func (mr *MetricsRecorder) MarshalJSON() ([]byte, error) { return []byte("{}"), nil } topLevel := map[string]interface{}{ - fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry, + fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry, + fmt.Sprintf("node.%d.log", mr.mu.desc.NodeID): mr.mu.logRegistry, } // Add collection of stores to top level. JSON requires that keys be strings, // so we must convert the store ID to a string. @@ -301,6 +311,7 @@ func (mr *MetricsRecorder) ScrapeIntoPrometheus(pm *metric.PrometheusExporter) { } includeChildMetrics := ChildMetricsEnabled.Get(&mr.settings.SV) pm.ScrapeRegistry(mr.mu.nodeRegistry, includeChildMetrics) + pm.ScrapeRegistry(mr.mu.logRegistry, includeChildMetrics) for _, reg := range mr.mu.storeRegistries { pm.ScrapeRegistry(reg, includeChildMetrics) } @@ -360,6 +371,9 @@ func (mr *MetricsRecorder) GetTimeSeriesData() []tspb.TimeSeriesData { timestampNanos: now.UnixNano(), } recorder.record(&data) + // Now record the log metrics. + recorder.registry = mr.mu.logRegistry + recorder.record(&data) // Record time series from node-level registries for secondary tenants. for tenantID, r := range mr.mu.tenantRegistries { @@ -419,6 +433,7 @@ func (mr *MetricsRecorder) GetMetricsMetadata() map[string]metric.Metadata { metrics := make(map[string]metric.Metadata) mr.mu.nodeRegistry.WriteMetricsMetadata(metrics) + mr.mu.logRegistry.WriteMetricsMetadata(metrics) // Get a random storeID. var sID roachpb.StoreID @@ -508,6 +523,9 @@ func (mr *MetricsRecorder) GenerateNodeStatus(ctx context.Context) *statuspb.Nod eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) { nodeStat.Metrics[name] = val }) + eachRecordableValue(mr.mu.logRegistry, func(name string, val float64) { + nodeStat.Metrics[name] = val + }) // Generate status summaries for stores. for storeID, r := range mr.mu.storeRegistries { diff --git a/pkg/server/status/recorder_test.go b/pkg/server/status/recorder_test.go index 587b52c6a2a8..4dfdd93d9a90 100644 --- a/pkg/server/status/recorder_test.go +++ b/pkg/server/status/recorder_test.go @@ -118,7 +118,8 @@ func TestMetricsRecorderLabels(t *testing.T) { manual, st, ) - recorder.AddNode(reg1, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") + logReg := metric.NewRegistry() + recorder.AddNode(reg1, logReg, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") nodeDescTenant := roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(7), @@ -137,7 +138,7 @@ func TestMetricsRecorderLabels(t *testing.T) { manual, stTenant, ) - recorderTenant.AddNode(regTenant, nodeDescTenant, 50, "foo:26257", "foo:26258", "foo:5432") + recorderTenant.AddNode(regTenant, logReg, nodeDescTenant, 50, "foo:26257", "foo:26258", "foo:5432") // ======================================== // Verify that the recorder exports metrics for tenants as text. @@ -151,6 +152,10 @@ func TestMetricsRecorderLabels(t *testing.T) { regTenant.AddMetric(g2) g2.Update(456) + c1 := metric.NewCounter(metric.Metadata{Name: "some_log_metric"}) + logReg.AddMetric(c1) + c1.Inc(2) + recorder.AddTenantRegistry(tenantID, regTenant) buf := bytes.NewBuffer([]byte{}) @@ -211,6 +216,16 @@ func TestMetricsRecorderLabels(t *testing.T) { }, }, }, + { + Name: "cr.node.some_log_metric", + Source: "7", + Datapoints: []tspb.TimeSeriesDatapoint{ + { + TimestampNanos: manual.Now().UnixNano(), + Value: float64(2), + }, + }, + }, // App tenant metrics { Name: "cr.node.node-id", @@ -435,7 +450,8 @@ func TestMetricsRecorder(t *testing.T) { recorder := NewMetricsRecorder(roachpb.SystemTenantID, roachpb.NewTenantNameContainer(""), nil, nil, manual, st) recorder.AddStore(store1) recorder.AddStore(store2) - recorder.AddNode(reg1, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") + logReg := metric.NewRegistry() + recorder.AddNode(reg1, logReg, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432") // Ensure the metric system's view of time does not advance during this test // as the test expects time to not advance too far which would age the actual @@ -467,6 +483,12 @@ func TestMetricsRecorder(t *testing.T) { source: 1, isNode: true, }, + { + reg: logReg, + prefix: "log.", + source: 1, + isNode: true, + }, { reg: store1.registry, prefix: "", diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go index 0ab276b72815..87217b3e9bc9 100644 --- a/pkg/server/tenant.go +++ b/pkg/server/tenant.go @@ -69,6 +69,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/admission" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/netutil" "github.com/cockroachdb/cockroach/pkg/util/schedulerlatency" @@ -639,9 +640,16 @@ func (s *SQLServerWrapper) PreStart(ctx context.Context) error { }) }) + // Init a log metrics registry. + logRegistry := logmetrics.NewRegistry() + if logRegistry == nil { + panic(errors.AssertionFailedf("nil log metrics registry at server startup")) + } + // We can now add the node registry. s.recorder.AddNode( s.registry, + logRegistry, roachpb.NodeDescriptor{ NodeID: s.rpcContext.NodeID.Get(), }, diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index c11faafbfb01..9d5e0c6eed69 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -18,7 +18,6 @@ import ( gosql "database/sql" "flag" "fmt" - "math" "math/rand" "net" "net/url" @@ -3676,9 +3675,9 @@ func (t *logicTest) finishExecQuery(query logicQuery, rows *gosql.Rows, err erro // ('R') coltypes are approximately equal to take into account // platform differences in floating point calculations. if runtime.GOARCH == "s390x" && (colT == 'F' || colT == 'R') { - resultMatches, err = floatsMatchApprox(expected, actual) + resultMatches, err = floatcmp.FloatsMatchApprox(expected, actual) } else if colT == 'F' { - resultMatches, err = floatsMatch(expected, actual) + resultMatches, err = floatcmp.FloatsMatch(expected, actual) } if err != nil { return errors.CombineErrors(makeError(), err) @@ -3746,93 +3745,6 @@ func (t *logicTest) finishExecQuery(query logicQuery, rows *gosql.Rows, err erro return nil } -// parseExpectedAndActualFloats converts the strings expectedString and -// actualString to float64 values. -func parseExpectedAndActualFloats(expectedString, actualString string) (float64, float64, error) { - expected, err := strconv.ParseFloat(expectedString, 64 /* bitSize */) - if err != nil { - return 0, 0, errors.Wrap(err, "when parsing expected") - } - actual, err := strconv.ParseFloat(actualString, 64 /* bitSize */) - if err != nil { - return 0, 0, errors.Wrap(err, "when parsing actual") - } - return expected, actual, nil -} - -// floatsMatchApprox returns whether two floating point represented as -// strings are equal within a tolerance. -func floatsMatchApprox(expectedString, actualString string) (bool, error) { - expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) - if err != nil { - return false, err - } - return floatcmp.EqualApprox(expected, actual, floatcmp.CloseFraction, floatcmp.CloseMargin), nil -} - -// floatsMatch returns whether two floating point numbers represented as -// strings have matching 15 significant decimal digits (this is the precision -// that Postgres supports for 'double precision' type). -func floatsMatch(expectedString, actualString string) (bool, error) { - expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) - if err != nil { - return false, err - } - // Check special values - NaN, +Inf, -Inf, 0. - if math.IsNaN(expected) || math.IsNaN(actual) { - return math.IsNaN(expected) == math.IsNaN(actual), nil - } - if math.IsInf(expected, 0 /* sign */) || math.IsInf(actual, 0 /* sign */) { - bothNegativeInf := math.IsInf(expected, -1 /* sign */) == math.IsInf(actual, -1 /* sign */) - bothPositiveInf := math.IsInf(expected, 1 /* sign */) == math.IsInf(actual, 1 /* sign */) - return bothNegativeInf || bothPositiveInf, nil - } - if expected == 0 || actual == 0 { - return expected == actual, nil - } - // Check that the numbers have the same sign. - if expected*actual < 0 { - return false, nil - } - expected = math.Abs(expected) - actual = math.Abs(actual) - // Check that 15 significant digits match. We do so by normalizing the - // numbers and then checking one digit at a time. - // - // normalize converts f to base * 10**power representation where base is in - // [1.0, 10.0) range. - normalize := func(f float64) (base float64, power int) { - for f >= 10 { - f = f / 10 - power++ - } - for f < 1 { - f *= 10 - power-- - } - return f, power - } - var expPower, actPower int - expected, expPower = normalize(expected) - actual, actPower = normalize(actual) - if expPower != actPower { - return false, nil - } - // TODO(yuzefovich): investigate why we can't always guarantee deterministic - // 15 significant digits and switch back from 14 to 15 digits comparison - // here. See #56446 for more details. - for i := 0; i < 14; i++ { - expDigit := int(expected) - actDigit := int(actual) - if expDigit != actDigit { - return false, nil - } - expected -= (expected - float64(expDigit)) * 10 - actual -= (actual - float64(actDigit)) * 10 - } - return true, nil -} - func (t *logicTest) formatValues(vals []string, valsPerLine int) []string { var buf bytes.Buffer tw := tabwriter.NewWriter(&buf, 2, 1, 2, ' ', 0) diff --git a/pkg/sql/logictest/main_test.go b/pkg/sql/logictest/main_test.go index 08b4893b5f76..ab3fbecaa74d 100644 --- a/pkg/sql/logictest/main_test.go +++ b/pkg/sql/logictest/main_test.go @@ -20,7 +20,6 @@ import ( _ "github.com/cockroachdb/cockroach/pkg/sql/tests" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" - "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/randutil" ) @@ -33,41 +32,3 @@ func TestMain(m *testing.M) { serverutils.InitTestClusterFactory(testcluster.TestClusterFactory) os.Exit(m.Run()) } - -// TestFloatsMatch is a unit test for floatsMatch() and floatsMatchApprox() -// functions. -func TestFloatsMatch(t *testing.T) { - defer leaktest.AfterTest(t)() - for _, tc := range []struct { - f1, f2 string - match bool - }{ - {f1: "NaN", f2: "+Inf", match: false}, - {f1: "+Inf", f2: "+Inf", match: true}, - {f1: "NaN", f2: "NaN", match: true}, - {f1: "+Inf", f2: "-Inf", match: false}, - {f1: "-0.0", f2: "0.0", match: true}, - {f1: "0.0", f2: "NaN", match: false}, - {f1: "123.45", f2: "12.345", match: false}, - {f1: "0.1234567890123456", f2: "0.1234567890123455", match: true}, - {f1: "0.1234567890123456", f2: "0.1234567890123457", match: true}, - {f1: "-0.1234567890123456", f2: "0.1234567890123456", match: false}, - {f1: "-0.1234567890123456", f2: "-0.1234567890123455", match: true}, - } { - match, err := floatsMatch(tc.f1, tc.f2) - if err != nil { - t.Fatal(err) - } - if match != tc.match { - t.Fatalf("floatsMatch: wrong result on %v", tc) - } - - match, err = floatsMatchApprox(tc.f1, tc.f2) - if err != nil { - t.Fatal(err) - } - if match != tc.match { - t.Fatalf("floatsMatchApprox: wrong result on %v", tc) - } - } -} diff --git a/pkg/testutils/floatcmp/BUILD.bazel b/pkg/testutils/floatcmp/BUILD.bazel index 7b54e125c2da..62d098e08804 100644 --- a/pkg/testutils/floatcmp/BUILD.bazel +++ b/pkg/testutils/floatcmp/BUILD.bazel @@ -7,6 +7,7 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/testutils/floatcmp", visibility = ["//visibility:public"], deps = [ + "@com_github_cockroachdb_errors//:errors", "@com_github_google_go_cmp//cmp", "@com_github_google_go_cmp//cmp/cmpopts", ], @@ -18,4 +19,5 @@ go_test( srcs = ["floatcmp_test.go"], args = ["-test.timeout=55s"], embed = [":floatcmp"], + deps = ["//pkg/util/leaktest"], ) diff --git a/pkg/testutils/floatcmp/floatcmp.go b/pkg/testutils/floatcmp/floatcmp.go index 9bfe97199304..6b87db4820fb 100644 --- a/pkg/testutils/floatcmp/floatcmp.go +++ b/pkg/testutils/floatcmp/floatcmp.go @@ -14,10 +14,12 @@ package floatcmp import ( "fmt" + "math" "regexp" "strconv" "strings" + "github.com/cockroachdb/errors" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" ) @@ -40,7 +42,7 @@ const ( // // CloseMargin is greater than 0 otherwise if either expected or actual were // 0 the calculated tolerance from the fraction would be 0. - CloseMargin float64 = CloseFraction * CloseFraction + CloseMargin = CloseFraction * CloseFraction ) // EqualApprox reports whether expected and actual are deeply equal with the @@ -81,6 +83,65 @@ func EqualApprox(expected interface{}, actual interface{}, fraction float64, mar return cmp.Equal(expected, actual, cmpopts.EquateApprox(fraction, margin), cmpopts.EquateNaNs()) } +// FloatsMatchApprox returns whether two floating point represented as +// strings are equal within a tolerance. +func FloatsMatchApprox(expectedString, actualString string) (bool, error) { + expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) + if err != nil { + return false, err + } + return EqualApprox(expected, actual, CloseFraction, CloseMargin), nil +} + +// FloatsMatch returns whether two floating point numbers represented as +// strings have matching 15 significant decimal digits (this is the precision +// that Postgres supports for 'double precision' type). +func FloatsMatch(expectedString, actualString string) (bool, error) { + expected, actual, err := parseExpectedAndActualFloats(expectedString, actualString) + if err != nil { + return false, err + } + // Check special values - NaN, +Inf, -Inf, 0. + if math.IsNaN(expected) || math.IsNaN(actual) { + return math.IsNaN(expected) == math.IsNaN(actual), nil + } + if math.IsInf(expected, 0 /* sign */) || math.IsInf(actual, 0 /* sign */) { + bothNegativeInf := math.IsInf(expected, -1 /* sign */) == math.IsInf(actual, -1 /* sign */) + bothPositiveInf := math.IsInf(expected, 1 /* sign */) == math.IsInf(actual, 1 /* sign */) + return bothNegativeInf || bothPositiveInf, nil + } + if expected == 0 || actual == 0 { + return expected == actual, nil + } + // Check that the numbers have the same sign. + if expected*actual < 0 { + return false, nil + } + expected = math.Abs(expected) + actual = math.Abs(actual) + // Check that 15 significant digits match. We do so by normalizing the + // numbers and then checking one digit at a time. + var expPower, actPower int + expected, expPower = math.Frexp(expected) + actual, actPower = math.Frexp(actual) + if expPower != actPower { + return false, nil + } + // TODO(yuzefovich): investigate why we can't always guarantee deterministic + // 15 significant digits and switch back from 14 to 15 digits comparison + // here. See #56446 for more details. + for i := 0; i < 14; i++ { + expDigit := int(expected) + actDigit := int(actual) + if expDigit != actDigit { + return false, nil + } + expected -= (expected - float64(expDigit)) * 10 + actual -= (actual - float64(actDigit)) * 10 + } + return true, nil +} + // RoundFloatsInString rounds floats in a given string to the given number of significant figures. func RoundFloatsInString(s string, significantFigures int) string { return string(regexp.MustCompile(`(\d+\.\d+)`).ReplaceAllFunc([]byte(s), func(x []byte) []byte { @@ -103,3 +164,17 @@ func ParseRoundInStringsDirective(directive string) (int, error) { } return strconv.Atoi(kv[1]) } + +// parseExpectedAndActualFloats converts the strings expectedString and +// actualString to float64 values. +func parseExpectedAndActualFloats(expectedString, actualString string) (float64, float64, error) { + expected, err := strconv.ParseFloat(expectedString, 64 /* bitSize */) + if err != nil { + return 0, 0, errors.Wrap(err, "when parsing expected") + } + actual, err := strconv.ParseFloat(actualString, 64 /* bitSize */) + if err != nil { + return 0, 0, errors.Wrap(err, "when parsing actual") + } + return expected, actual, nil +} diff --git a/pkg/testutils/floatcmp/floatcmp_test.go b/pkg/testutils/floatcmp/floatcmp_test.go index a7f8d1490e56..a1a23879e643 100644 --- a/pkg/testutils/floatcmp/floatcmp_test.go +++ b/pkg/testutils/floatcmp/floatcmp_test.go @@ -13,6 +13,8 @@ package floatcmp import ( "math" "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" ) // EqualApprox takes an interface, allowing it to compare equality of both @@ -156,3 +158,41 @@ func TestEqualClose(t *testing.T) { }) } } + +// TestFloatsMatch is a unit test for floatsMatch() and floatsMatchApprox() +// functions. +func TestFloatsMatch(t *testing.T) { + defer leaktest.AfterTest(t)() + for _, tc := range []struct { + f1, f2 string + match bool + }{ + {f1: "NaN", f2: "+Inf", match: false}, + {f1: "+Inf", f2: "+Inf", match: true}, + {f1: "NaN", f2: "NaN", match: true}, + {f1: "+Inf", f2: "-Inf", match: false}, + {f1: "-0.0", f2: "0.0", match: true}, + {f1: "0.0", f2: "NaN", match: false}, + {f1: "123.45", f2: "12.345", match: false}, + {f1: "0.1234567890123456", f2: "0.1234567890123455", match: true}, + {f1: "0.1234567890123456", f2: "0.1234567890123457", match: true}, + {f1: "-0.1234567890123456", f2: "0.1234567890123456", match: false}, + {f1: "-0.1234567890123456", f2: "-0.1234567890123455", match: true}, + } { + match, err := FloatsMatch(tc.f1, tc.f2) + if err != nil { + t.Fatal(err) + } + if match != tc.match { + t.Fatalf("floatsMatch: wrong result on %v", tc) + } + + match, err = FloatsMatchApprox(tc.f1, tc.f2) + if err != nil { + t.Fatal(err) + } + if match != tc.match { + t.Fatalf("floatsMatchApprox: wrong result on %v", tc) + } + } +} diff --git a/pkg/util/log/BUILD.bazel b/pkg/util/log/BUILD.bazel index 73e3ac9a17f5..d4bde7c984ce 100644 --- a/pkg/util/log/BUILD.bazel +++ b/pkg/util/log/BUILD.bazel @@ -34,6 +34,7 @@ go_library( "log_decoder.go", "log_entry.go", "log_flush.go", + "metric.go", "redact.go", "registry.go", "sinks.go", diff --git a/pkg/util/log/clog.go b/pkg/util/log/clog.go index d0206c66cf0c..ea7c44bb474e 100644 --- a/pkg/util/log/clog.go +++ b/pkg/util/log/clog.go @@ -109,6 +109,19 @@ type loggingT struct { allSinkInfos sinkInfoRegistry allLoggers loggerRegistry + metrics LogMetrics +} + +// SetLogMetrics injects an initialized implementation of +// the LogMetrics interface into the logging package. The +// implementation must be injected to avoid a dependency +// cycle. +// +// Should be called within the init() function of the +// implementing package to avoid the possibility of a nil +// LogMetrics during server startups. +func SetLogMetrics(m LogMetrics) { + logging.metrics = m } func init() { diff --git a/pkg/util/log/fluent_client.go b/pkg/util/log/fluent_client.go index 84c746429665..c49f6e2ab398 100644 --- a/pkg/util/log/fluent_client.go +++ b/pkg/util/log/fluent_client.go @@ -74,6 +74,7 @@ func (l *fluentSink) output(b []byte, opts sinkOutputOptions) (err error) { } if err := l.ensureConnLocked(b); err != nil { + logging.metrics.IncrementCounter(FluentSinkConnectionError, 1) return err } return l.tryWriteLocked(b) diff --git a/pkg/util/log/logmetrics/BUILD.bazel b/pkg/util/log/logmetrics/BUILD.bazel new file mode 100644 index 000000000000..0278d9e27f79 --- /dev/null +++ b/pkg/util/log/logmetrics/BUILD.bazel @@ -0,0 +1,27 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "logmetrics", + srcs = ["metrics.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics", + visibility = ["//visibility:public"], + deps = [ + "//pkg/util/log", + "//pkg/util/metric", + "//pkg/util/syncutil", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "logmetrics_test", + srcs = ["metrics_test.go"], + args = ["-test.timeout=295s"], + embed = [":logmetrics"], + deps = [ + "//pkg/util/leaktest", + "//pkg/util/log", + "//pkg/util/metric", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/util/log/logmetrics/metrics.go b/pkg/util/log/logmetrics/metrics.go new file mode 100644 index 000000000000..afd2a77a9e18 --- /dev/null +++ b/pkg/util/log/logmetrics/metrics.go @@ -0,0 +1,120 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package logmetrics + +import ( + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/errors" +) + +var ( + // logMetricsReg is a singleton instance of the LogMetricsRegistry. + logMetricsReg = newLogMetricsRegistry() + FluentSinkConnErrors = metric.Metadata{ + Name: string(log.FluentSinkConnectionError), + Help: "Number of connection errors experienced by fluent-server logging sinks", + Measurement: "fluent-server log sink connection errors", + Unit: metric.Unit_COUNT, + } +) + +// Inject our singleton LogMetricsRegistry into the logging +// package. This ensures that the LogMetrics implementation within the +// log package is always defined. This should only be called once from +// a single init function. +// +// Since the primary user of the eventual metric.Registry's that come +// from LogMetricsRegistry is the MetricsRecorder, we trigger this +// init function via an import in pkg/util/log/logmetrics/metrics.go. +func init() { + log.SetLogMetrics(logMetricsReg) +} + +// logMetricsStruct is a struct used to contain all metrics +// tracked by the LogMetricsRegistry. This container is necessary +// to register all the metrics with the Registry internal to the +// LogMetricsRegistry. +type logMetricsStruct struct { + FluentSinkConnErrors *metric.Counter +} + +// LogMetricsRegistry is a log.LogMetrics implementation used in the +// logging package to give it access to metrics without introducing a +// circular dependency. +// +// All metrics meant to be available to the logging package must be +// registered at the time of initialization. +// +// LogMetricsRegistry is thread-safe. +type LogMetricsRegistry struct { + mu struct { + syncutil.Mutex + // metricsStruct holds the same metrics as the below structures, but + // provides an easy way to inject them into metric.Registry's on demand + // in NewRegistry(). + metricsStruct logMetricsStruct + counters map[log.MetricName]*metric.Counter + } +} + +var _ log.LogMetrics = (*LogMetricsRegistry)(nil) + +func newLogMetricsRegistry() *LogMetricsRegistry { + registry := &LogMetricsRegistry{} + registry.registerCounters() + return registry +} + +func (l *LogMetricsRegistry) registerCounters() { + l.mu.Lock() + defer l.mu.Unlock() + l.mu.counters = make(map[log.MetricName]*metric.Counter) + // Create the metrics struct for us to add to registries as they're + // requested. + l.mu.metricsStruct = logMetricsStruct{ + FluentSinkConnErrors: metric.NewCounter(FluentSinkConnErrors), + } + // Be sure to also add the metrics to our internal store, for + // recall in functions such as IncrementCounter. + l.mu.counters[log.MetricName(FluentSinkConnErrors.Name)] = l.mu.metricsStruct.FluentSinkConnErrors +} + +// NewRegistry initializes and returns a new metric.Registry, populated with metrics +// tracked by the LogMetricsRegistry. While the metrics tracked by the logmetrics package +// are global, they may be shared by multiple servers, test servers, etc. Therefore, we +// need the means to label the metrics separately depending on the server, tenant, etc. +// serving them. For this reason, we provide the ability to track the same log metrics +// across multiple registries. +func NewRegistry() *metric.Registry { + if logMetricsReg == nil { + panic(errors.AssertionFailedf("LogMetricsRegistry was not initialized")) + } + reg := metric.NewRegistry() + logMetricsReg.mu.Lock() + defer logMetricsReg.mu.Unlock() + reg.AddMetricStruct(logMetricsReg.mu.metricsStruct) + return reg +} + +// IncrementCounter increments thegi Counter held by the given alias. If a log.MetricName +// is provided as an argument, but is not registered with the LogMetricsRegistry, this function +// panics. +func (l *LogMetricsRegistry) IncrementCounter(metric log.MetricName, amount int64) { + l.mu.Lock() + defer l.mu.Unlock() + counter, ok := l.mu.counters[metric] + if !ok { + panic(errors.AssertionFailedf("MetricName not registered in LogMetricsRegistry: %q", string(metric))) + } + counter.Inc(amount) +} diff --git a/pkg/util/log/logmetrics/metrics_test.go b/pkg/util/log/logmetrics/metrics_test.go new file mode 100644 index 000000000000..e1a7630ab95e --- /dev/null +++ b/pkg/util/log/logmetrics/metrics_test.go @@ -0,0 +1,71 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package logmetrics + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/stretchr/testify/require" +) + +func TestIncrementCounter(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + t.Run("panics when log.MetricName not registered", func(t *testing.T) { + l := &LogMetricsRegistry{} + l.mu.counters = map[log.MetricName]*metric.Counter{} + require.PanicsWithErrorf(t, + `MetricName not registered in LogMetricsRegistry: "unregistered"`, + func() { + l.IncrementCounter("unregistered", 1) + }, "expected IncrementCounter to panic for unregistered metric") + }) + + t.Run("increments counter", func(t *testing.T) { + l := newLogMetricsRegistry() + func() { + l.mu.Lock() + defer l.mu.Unlock() + require.Zero(t, l.mu.metricsStruct.FluentSinkConnErrors.Count()) + }() + l.IncrementCounter(log.FluentSinkConnectionError, 1) + l.IncrementCounter(log.FluentSinkConnectionError, 2) + func() { + l.mu.Lock() + defer l.mu.Unlock() + require.Equal(t, int64(3), l.mu.metricsStruct.FluentSinkConnErrors.Count()) + }() + }) +} + +func TestNewRegistry(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + t.Run("panics when logMetricsReg is nil", func(t *testing.T) { + logMetricsReg = nil + require.PanicsWithErrorf(t, + "LogMetricsRegistry was not initialized", + func() { + _ = NewRegistry() + }, "expected NewRegistry() to panic with nil logMetricsReg package-level var") + }) +} + +type fakeLogMetrics struct{} + +func (*fakeLogMetrics) IncrementCounter(_ log.MetricName, _ int64) {} + +var _ log.LogMetrics = (*fakeLogMetrics)(nil) diff --git a/pkg/util/log/metric.go b/pkg/util/log/metric.go new file mode 100644 index 000000000000..3ec783c4b828 --- /dev/null +++ b/pkg/util/log/metric.go @@ -0,0 +1,43 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package log + +// LogMetrics enables the registration and recording of metrics +// within the log package. +// +// Because the log package is imported by nearly every package +// within CRDB, it's difficult to add new dependencies to the +// log package without introducing a circular dependency. +// +// The LogMetrics interface provides us with a way to still +// make use of the metrics library within the log package via +// dependency injection, allowing the implementation to live +// elsewhere (e.g. the metrics package). +type LogMetrics interface { + // IncrementCounter increments the Counter metric associated with the + // provided MetricName by the given amount, assuming the + // metric has been registered. + // + // The LogMetrics implementation must have metadata defined + // for the given MetricName within its own scope. See + // pkg/util/log/logmetrics for details. + IncrementCounter(metric MetricName, amount int64) +} + +// MetricName represents the name of a metric registered & +// used within the log package, available to use in the LogMetrics +// interface. +type MetricName string + +// FluentSinkConnectionError is the MetricName for the metric +// used to count fluent-server log sink connection errors. Please +// refer to its metric metadata for more details (hint: see usages). +const FluentSinkConnectionError MetricName = "fluent.sink.conn.errors"