From a75b1ca192f0bfdb52e682e9c13232a6ffc1689a Mon Sep 17 00:00:00 2001 From: Erwann Masson Date: Fri, 20 Dec 2024 11:24:04 +0100 Subject: [PATCH] Merge attributes and support multiple configs for failover (#31761) --- .../rcclient/rcclientimpl/rcclient.go | 92 ++++++++++++------- .../rcclient/rcclientimpl/rcclient_test.go | 92 +++++++++++++++++++ 2 files changed, 149 insertions(+), 35 deletions(-) diff --git a/comp/remote-config/rcclient/rcclientimpl/rcclient.go b/comp/remote-config/rcclient/rcclientimpl/rcclient.go index 4e43c1d99b1aa..374bb41bfe459 100644 --- a/comp/remote-config/rcclient/rcclientimpl/rcclient.go +++ b/comp/remote-config/rcclient/rcclientimpl/rcclient.go @@ -168,6 +168,10 @@ func (rc rcClient) start() { } } +// mrfUpdateCallback is the callback function for the AGENT_FAILOVER configs. +// It fetches all the configs targeting the agent and applies the failover settings +// using an OR strategy. In case of nil the value is not updated, for a false it does not update if +// the setting is already set to true. func (rc rcClient) mrfUpdateCallback(updates map[string]state.RawConfig, applyStateCallback func(string, state.ApplyStatus)) { // If the updates map is empty, we should unset the failover settings if they were set via RC previously if len(updates) == 0 { @@ -188,8 +192,13 @@ func (rc rcClient) mrfUpdateCallback(updates map[string]state.RawConfig, applySt return } - applied := false + var enableLogs, enableMetrics *bool + var enableLogsCfgPth, enableMetricsCfgPth string for cfgPath, update := range updates { + if (enableLogs != nil && *enableLogs) && (enableMetrics != nil && *enableMetrics) { + break + } + mrfUpdate, err := parseMultiRegionFailoverConfig(update.Config) if err != nil { pkglog.Errorf("Multi-Region Failover update unmarshal failed: %s", err) @@ -200,42 +209,55 @@ func (rc rcClient) mrfUpdateCallback(updates map[string]state.RawConfig, applySt continue } - if mrfUpdate != nil && (mrfUpdate.FailoverMetrics != nil || mrfUpdate.FailoverLogs != nil) { - // If we've received multiple config files updating the failover settings, we should disregard all but the first update and log it, as this is unexpected - if applied { - pkglog.Warnf("Multiple Multi-Region Failover updates received, disregarding update of `multi_region_failover.failover_metrics` to %v and `multi_region_failover.failover_logs` to %v", mrfUpdate.FailoverMetrics, mrfUpdate.FailoverLogs) - applyStateCallback(cfgPath, state.ApplyStatus{ - State: state.ApplyStateError, - Error: "Multiple Multi-Region Failover updates received. Only the first was applied.", - }) - continue - } + if mrfUpdate == nil || (mrfUpdate.FailoverMetrics == nil && mrfUpdate.FailoverLogs == nil) { + continue + } - if mrfUpdate.FailoverMetrics != nil { - err = rc.applyMRFRuntimeSetting("multi_region_failover.failover_metrics", *mrfUpdate.FailoverMetrics, cfgPath, applyStateCallback) - if err != nil { - continue - } - change := "disabled" - if *mrfUpdate.FailoverMetrics { - change = "enabled" - } - pkglog.Infof("Received remote update for Multi-Region Failover configuration: %s failover for metrics", change) - } - if mrfUpdate.FailoverLogs != nil { - err = rc.applyMRFRuntimeSetting("multi_region_failover.failover_logs", *mrfUpdate.FailoverLogs, cfgPath, applyStateCallback) - if err != nil { - continue - } - change := "disabled" - if *mrfUpdate.FailoverLogs { - change = "enabled" - } - pkglog.Infof("Received remote update for Multi-Region Failover configuration: %s failover for logs", change) - } - applyStateCallback(cfgPath, state.ApplyStatus{State: state.ApplyStateAcknowledged}) - applied = true + if !(enableMetrics != nil && *enableMetrics) && mrfUpdate.FailoverMetrics != nil { + enableMetrics = mrfUpdate.FailoverMetrics + enableMetricsCfgPth = cfgPath + } + + if !(enableLogs != nil && *enableLogs) && mrfUpdate.FailoverLogs != nil { + enableLogs = mrfUpdate.FailoverLogs + enableLogsCfgPth = cfgPath + } + } + + if enableMetrics != nil { + err := rc.applyMRFRuntimeSetting("multi_region_failover.failover_metrics", *enableMetrics, enableMetricsCfgPth, applyStateCallback) + if err != nil { + pkglog.Errorf("Multi-Region Failover failed to apply new metrics settings : %s", err) + applyStateCallback(enableMetricsCfgPth, state.ApplyStatus{ + State: state.ApplyStateError, + Error: err.Error(), + }) + return + } + change := "disabled" + if *enableMetrics { + change = "enabled" + } + pkglog.Infof("Received remote update for Multi-Region Failover configuration: %s failover for metrics", change) + applyStateCallback(enableMetricsCfgPth, state.ApplyStatus{State: state.ApplyStateAcknowledged}) + } + + if enableLogs != nil { + err := rc.applyMRFRuntimeSetting("multi_region_failover.failover_logs", *enableLogs, enableLogsCfgPth, applyStateCallback) + if err != nil { + pkglog.Errorf("Multi-Region Failover failed to apply new logs settings : %s", err) + applyStateCallback(enableMetricsCfgPth, state.ApplyStatus{ + State: state.ApplyStateError, + Error: err.Error(), + }) + return + } + change := "disabled" + if *enableLogs { + change = "enabled" } + pkglog.Infof("Received remote update for Multi-Region Failover configuration: %s failover for logs", change) + applyStateCallback(enableLogsCfgPth, state.ApplyStatus{State: state.ApplyStateAcknowledged}) } } diff --git a/comp/remote-config/rcclient/rcclientimpl/rcclient_test.go b/comp/remote-config/rcclient/rcclientimpl/rcclient_test.go index f6974273d3325..71b8bca9de2f2 100644 --- a/comp/remote-config/rcclient/rcclientimpl/rcclient_test.go +++ b/comp/remote-config/rcclient/rcclientimpl/rcclient_test.go @@ -6,6 +6,7 @@ package rcclientimpl import ( + "fmt" "testing" "time" @@ -61,6 +62,39 @@ func (m *mockLogLevelRuntimeSettings) Hidden() bool { func applyEmpty(_ string, _ state.ApplyStatus) {} +type MockComponent interface { + settings.Component + + SetRuntimeSetting(setting string, value interface{}, source model.Source) error +} + +type MockComponentImplMrf struct { + settings.Component + + logs *bool + metrics *bool + traces *bool +} + +func (m *MockComponentImplMrf) SetRuntimeSetting(setting string, value interface{}, _ model.Source) error { + v, ok := value.(bool) + if !ok { + return fmt.Errorf("unexpected value type %T", value) + } + + switch setting { + case "multi_region_failover.failover_metrics": + m.metrics = &v + case "multi_region_failover.failover_logs": + m.logs = &v + case "multi_region_failover.failover_traces": + m.traces = &v + default: + return &settings.SettingNotFoundError{Name: setting} + } + return nil +} + func TestRCClientCreate(t *testing.T) { _, err := newRemoteConfigClient( fxutil.Test[dependencies]( @@ -189,3 +223,61 @@ func TestAgentConfigCallback(t *testing.T) { assert.Equal(t, "debug", cfg.Get("log_level")) assert.Equal(t, model.SourceCLI, cfg.GetSource("log_level")) } + +func TestAgentMRFConfigCallback(t *testing.T) { + pkglog.SetupLogger(pkglog.Default(), "info") + cfg := configmock.New(t) + + rc := fxutil.Test[rcclient.Component](t, + fx.Options( + Module(), + fx.Provide(func() log.Component { return logmock.New(t) }), + fx.Provide(func() config.Component { return cfg }), + sysprobeconfig.NoneModule(), + fx.Supply( + rcclient.Params{ + AgentName: "test-agent", + AgentVersion: "7.0.0", + }, + ), + fx.Supply( + settings.Params{ + Settings: map[string]settings.RuntimeSetting{ + "log_level": &mockLogLevelRuntimeSettings{logLevel: "info"}, + }, + Config: cfg, + }, + ), + settingsimpl.Module(), + ), + ) + + allInactive := state.RawConfig{Config: []byte(`{"name": "none"}`)} + noLogs := state.RawConfig{Config: []byte(`{"name": "nologs", "failover_logs": false}`)} + activeMetrics := state.RawConfig{Config: []byte(`{"name": "yesmetrics", "failover_metrics": true}`)} + + structRC := rc.(rcClient) + + ipcAddress, err := pkgconfigsetup.GetIPCAddress(cfg) + assert.NoError(t, err) + + structRC.client, _ = client.NewUnverifiedGRPCClient( + ipcAddress, pkgconfigsetup.GetIPCPort(), func() (string, error) { return security.FetchAuthToken(cfg) }, + client.WithAgent("test-agent", "9.99.9"), + client.WithProducts(state.ProductAgentConfig), + client.WithPollInterval(time.Hour), + ) + structRC.settingsComponent = &MockComponentImplMrf{} + + // Should enable metrics failover and disable logs failover + structRC.mrfUpdateCallback(map[string]state.RawConfig{ + "datadog/2/AGENT_FAILOVER/none/configname": allInactive, + "datadog/2/AGENT_FAILOVER/nologs/configname": noLogs, + "datadog/2/AGENT_FAILOVER/yesmetrics/configname": activeMetrics, + }, applyEmpty) + + cmpntSettings := structRC.settingsComponent.(*MockComponentImplMrf) + assert.True(t, *cmpntSettings.metrics) + assert.False(t, *cmpntSettings.logs) + assert.Nil(t, cmpntSettings.traces) +}