From f308c4cd576c57249a635d2ed0d4bda4ad1a1ad6 Mon Sep 17 00:00:00 2001 From: Dinesh Gurumurthy Date: Tue, 26 Mar 2024 15:34:09 -0400 Subject: [PATCH] [internal/datadog] Fix Datarace in metrics client (#31964) **Description:** Fixes the data race. ``` fatal error: concurrent map read and map write goroutine 72665 [running]: github.com/open-telemetry/opentelemetry-collector-contrib/internal/datadog.(*metricsClient).Gauge.func1({0x7b895a0?, 0xc07c930790?}, {0x940dac8, 0xc1996ce180}) github.com/open-telemetry/opentelemetry-collector-contrib/internal/datadog@v0.96.0/metrics_client.go:49 +0x85 go.opentelemetry.io/otel/sdk/metric.(*meter).float64ObservableInstrument.func1.1({0x9443f08, 0xef10b00}) go.opentelemetry.io/otel/sdk/metric@v1.24.0/meter.go:286 +0x55 go.opentelemetry.io/otel/sdk/metric.(*pipeline).produce(0xc002852bd0, {0x9443f08, 0xef10b00}, 0xc108dd46c0) go.opentelemetry.io/otel/sdk/metric@v1.24.0/pipeline.go:122 +0x168 go.opentelemetry.io/otel/sdk/metric.(*ManualReader).Collect(0xc0030d2aa0, {0x9443f08, 0xef10b00}, 0xc108dd46c0) go.opentelemetry.io/otel/sdk/metric@v1.24.0/manual_reader.go:123 +0xe2 go.opentelemetry.io/otel/exporters/prometheus.(*collector).Collect(0xc0030e4aa0, 0xc10a0083c0) go.opentelemetry.io/otel/exporters/prometheus@v0.46.0/exporter.go:158 +0x72 github.com/prometheus/client_golang/prometheus.(*Registry).Gather.func1() github.com/prometheus/client_golang@v1.19.0/prometheus/registry.go:457 +0xe5 created by github.com/prometheus/client_golang/prometheus.(*Registry).Gather in goroutine 5 ``` **Link to tracking Issue:** **Testing:** **Documentation:** --------- Co-authored-by: Yang Song --- ...rumurthy_fix-data-race-metrics-client.yaml | 22 +++++++++++ internal/datadog/metrics_client.go | 2 + internal/datadog/metrics_client_test.go | 38 +++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 .chloggen/dinesh.gurumurthy_fix-data-race-metrics-client.yaml diff --git a/.chloggen/dinesh.gurumurthy_fix-data-race-metrics-client.yaml b/.chloggen/dinesh.gurumurthy_fix-data-race-metrics-client.yaml new file mode 100644 index 000000000000..9f605af173c2 --- /dev/null +++ b/.chloggen/dinesh.gurumurthy_fix-data-race-metrics-client.yaml @@ -0,0 +1,22 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: datadog/connector +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Fix data race in datadog metrics client" +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [31964] +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: "The PR ensures mutex protects gauges map in every code path." +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/internal/datadog/metrics_client.go b/internal/datadog/metrics_client.go index f2c9dcb47449..025001ce9036 100644 --- a/internal/datadog/metrics_client.go +++ b/internal/datadog/metrics_client.go @@ -46,6 +46,8 @@ func (m *metricsClient) Gauge(name string, value float64, tags []string, _ float m.gauges[name] = value _, err := m.meter.Float64ObservableGauge(name, metric.WithFloat64Callback(func(_ context.Context, f metric.Float64Observer) error { attr := m.attributeFromTags(tags) + m.mutex.Lock() + defer m.mutex.Unlock() if v, ok := m.gauges[name]; ok { f.Observe(v, metric.WithAttributeSet(attr)) } diff --git a/internal/datadog/metrics_client_test.go b/internal/datadog/metrics_client_test.go index fe697ab625d0..3165d4f53960 100644 --- a/internal/datadog/metrics_client_test.go +++ b/internal/datadog/metrics_client_test.go @@ -4,6 +4,7 @@ package datadog import ( "context" + "sync" "testing" "time" @@ -72,6 +73,43 @@ func TestGaugeMultiple(t *testing.T) { metricdatatest.AssertEqual(t, want, got, metricdatatest.IgnoreTimestamp()) } +func TestGaugeDataRace(t *testing.T) { + reader, metricClient, _ := setupMetricClient() + var wg sync.WaitGroup + wg.Add(2) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + go func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + default: + err := metricClient.Gauge("test_gauge", 1, []string{"otlp:true"}, 1) + assert.NoError(t, err) + } + } + }() + + go func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + default: + err := reader.Collect(context.Background(), &metricdata.ResourceMetrics{}) + assert.NoError(t, err) + } + } + }() + + wg.Wait() +} + func TestCount(t *testing.T) { reader, metricClient, _ := setupMetricClient()