From dfe7d15288cb9ec37e3e0491cf63743d9bfdb3d2 Mon Sep 17 00:00:00 2001 From: Len Gamburg Date: Thu, 3 Oct 2024 10:55:11 -0400 Subject: [PATCH 1/4] Few agent telemetry improvements * Finalize and categorize default metrics to emit split via 4 profiles * Eliminate Status-based telemetry * Add CLI `agent diagnose show-telemetry` command to dump emitted telemetry * Added host name Os info for agent telemetry metadata --- cmd/agent/subcommands/diagnose/command.go | 14 + comp/core/agenttelemetry/fx/fx.go | 8 +- .../agenttelemetry/impl/agenttelemetry.go | 288 +++++------------- .../impl/agenttelemetry_test.go | 190 +++++++++++- comp/core/agenttelemetry/impl/config.go | 175 +++-------- comp/core/agenttelemetry/impl/sender.go | 172 ++++------- .../agent-telemetry-basic.tmpl | 19 -- comp/core/agenttelemetry/impl/utils.go | 30 -- 8 files changed, 384 insertions(+), 512 deletions(-) delete mode 100644 comp/core/agenttelemetry/impl/status_templates/agent-telemetry-basic.tmpl diff --git a/cmd/agent/subcommands/diagnose/command.go b/cmd/agent/subcommands/diagnose/command.go index 46f9ccea0e730..b060a1981fe48 100644 --- a/cmd/agent/subcommands/diagnose/command.go +++ b/cmd/agent/subcommands/diagnose/command.go @@ -278,6 +278,20 @@ This command print the security-agent metadata payload. This payload is used by showPayloadCommand.AddCommand(payloadSecurityAgentCmd) diagnoseCommand.AddCommand(showPayloadCommand) + showAgentTelemetryCommand := &cobra.Command{ + Use: "show-telemetry", + Short: "Print agent telemetry payloads sent by the agent.", + Long: `.`, + RunE: func(_ *cobra.Command, _ []string) error { + return fxutil.OneShot(printPayload, + fx.Supply(payloadName("agent-telemetry")), + fx.Supply(command.GetDefaultCoreBundleParams(cliParams.GlobalParams)), + core.Bundle(), + ) + }, + } + diagnoseCommand.AddCommand(showAgentTelemetryCommand) + return []*cobra.Command{diagnoseCommand} } diff --git a/comp/core/agenttelemetry/fx/fx.go b/comp/core/agenttelemetry/fx/fx.go index 23b9be4733686..e3cec8fc125eb 100644 --- a/comp/core/agenttelemetry/fx/fx.go +++ b/comp/core/agenttelemetry/fx/fx.go @@ -7,17 +7,11 @@ package fx import ( - agenttelemetry "github.com/DataDog/datadog-agent/comp/core/agenttelemetry/def" agenttelemetryimpl "github.com/DataDog/datadog-agent/comp/core/agenttelemetry/impl" "github.com/DataDog/datadog-agent/pkg/util/fxutil" ) // Module defines the fx options for this component func Module() fxutil.Module { - return fxutil.Component( - fxutil.ProvideComponentConstructor( - agenttelemetryimpl.NewComponent, - ), - fxutil.ProvideOptional[agenttelemetry.Component](), - ) + return agenttelemetryimpl.Module() } diff --git a/comp/core/agenttelemetry/impl/agenttelemetry.go b/comp/core/agenttelemetry/impl/agenttelemetry.go index 2b2881ed1f72d..9d3401d66cd8b 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry.go @@ -13,37 +13,38 @@ package agenttelemetryimpl import ( - "bytes" "context" - "embed" "encoding/json" - "reflect" + "fmt" + "net/http" "strconv" - "strings" + "go.uber.org/fx" "golang.org/x/exp/maps" + api "github.com/DataDog/datadog-agent/comp/api/api/def" agenttelemetry "github.com/DataDog/datadog-agent/comp/core/agenttelemetry/def" "github.com/DataDog/datadog-agent/comp/core/config" log "github.com/DataDog/datadog-agent/comp/core/log/def" - "github.com/DataDog/datadog-agent/comp/core/status" "github.com/DataDog/datadog-agent/comp/core/telemetry" - compdef "github.com/DataDog/datadog-agent/comp/def" + "github.com/DataDog/datadog-agent/pkg/util/fxutil" + httputils "github.com/DataDog/datadog-agent/pkg/util/http" + "github.com/DataDog/datadog-agent/pkg/util/scrubber" dto "github.com/prometheus/client_model/go" ) -// Embed one or more rendering templated into this binary as a resource -// to be used at runtime. - -//go:embed status_templates -var templatesFS embed.FS +// Module defines the fx options for this component. +func Module() fxutil.Module { + return fxutil.Component( + fx.Provide(newAgentTelemetryProvider), + ) +} type atel struct { - cfgComp config.Component - logComp log.Component - telComp telemetry.Component - statusComp status.Component + cfgComp config.Component + logComp log.Component + telComp telemetry.Component enabled bool sender sender @@ -54,16 +55,21 @@ type atel struct { cancel context.CancelFunc } -// Requires defines the dependencies for the agenttelemtry component -type Requires struct { - compdef.In +type provides struct { + fx.Out + + Comp agenttelemetry.Component + Endpoint api.AgentEndpointProvider +} + +type dependencies struct { + fx.In Log log.Component Config config.Component Telemetry telemetry.Component - Status status.Component - Lifecycle compdef.Lifecycle + Lc fx.Lifecycle } // Interfacing with runner. @@ -100,7 +106,6 @@ func createAtel( cfgComp config.Component, logComp log.Component, telComp telemetry.Component, - statusComp status.Component, sender sender, runner runner) *atel { // Parse Agent Telemetry Configuration configuration @@ -127,32 +132,28 @@ func createAtel( } return &atel{ - enabled: true, - cfgComp: cfgComp, - logComp: logComp, - telComp: telComp, - statusComp: statusComp, - sender: sender, - runner: runner, - atelCfg: atelCfg, + enabled: true, + cfgComp: cfgComp, + logComp: logComp, + telComp: telComp, + sender: sender, + runner: runner, + atelCfg: atelCfg, } } -// NewComponent creates a new agent telemetry component. -func NewComponent(req Requires) agenttelemetry.Component { - // Wire up the agent telemetry provider (TODO: use FX for sender, client and runner?) +func newAgentTelemetryProvider(deps dependencies) provides { a := createAtel( - req.Config, - req.Log, - req.Telemetry, - req.Status, + deps.Config, + deps.Log, + deps.Telemetry, nil, nil, ) // If agent telemetry is enabled and configured properly add the start and stop hooks if a.enabled { - req.Lifecycle.Append(compdef.Hook{ + deps.Lc.Append(fx.Hook{ OnStart: func(_ context.Context) error { return a.start() }, @@ -162,7 +163,10 @@ func NewComponent(req Requires) agenttelemetry.Component { }) } - return a + return provides{ + Comp: a, + Endpoint: api.NewAgentEndpointProvider(a.writePayload, "/metadata/agent-telemetry", "GET"), + } } func (a *atel) aggregateMetricTags(mCfg *MetricConfig, mt dto.MetricType, ms []*dto.Metric) []*dto.Metric { @@ -308,7 +312,7 @@ func (a *atel) transformMetricFamily(p *Profile, mfam *dto.MetricFamily) *agentm } } -func (a *atel) reportAgentMetrics(session *senderSession, p *Profile) { +func (a *atel) reportAgentMetrics(session *senderSession, pms []*telemetry.MetricFamily, p *Profile) { // If no metrics are configured nothing to report if len(p.metricsMap) == 0 { return @@ -316,15 +320,6 @@ func (a *atel) reportAgentMetrics(session *senderSession, p *Profile) { a.logComp.Debugf("Collect Agent Metric telemetry for profile %s", p.Name) - // Gather all prom metrircs. Currently Gather() does not allow filtering by - // matric name, so we need to gather all metrics and filter them on our own. - // pms, err := a.telemetry.Gather(false) - pms, err := a.telComp.Gather(false) - if err != nil { - a.logComp.Errorf("failed to get filtered telemetry metrics: %s", err) - return - } - // ... and filter them according to the profile configuration var metrics []*agentmetric for _, pm := range pms { @@ -342,198 +337,79 @@ func (a *atel) reportAgentMetrics(session *senderSession, p *Profile) { // Send the metrics if any were filtered a.logComp.Debugf("Reporting Agent Metric telemetry for profile %s", p.Name) - err = a.sender.sendAgentMetricPayloads(session, metrics) - if err != nil { - a.logComp.Errorf("failed to get filtered telemetry metrics: %s", err) - } + a.sender.sendAgentMetricPayloads(session, metrics) } -// renderAgentStatus renders (transform) input status JSON object into output status using the template -func (a *atel) renderAgentStatus(p *Profile, inputStatus map[string]interface{}, statusOutput map[string]interface{}) { - // Render template if needed - if p.Status.Template == "none" { - return - } - - templateName := "agent-telemetry-" + p.Status.Template + ".tmpl" - var b = new(bytes.Buffer) - err := status.RenderText(templatesFS, templateName, b, inputStatus) +func (a *atel) loadPayloads(profiles []*Profile) (*senderSession, error) { + // Gather all prom metrircs. Currently Gather() does not allow filtering by + // matric name, so we need to gather all metrics and filter them on our own. + // pms, err := a.telemetry.Gather(false) + pms, err := a.telComp.Gather(false) if err != nil { - a.logComp.Errorf("Failed to collect Agent Status telemetry. Error: %s", err.Error()) - return - } - if len(b.Bytes()) == 0 { - a.logComp.Debug("Agent status rendering to agent telemetry payloads return empty payload") - return + a.logComp.Errorf("failed to get filtered telemetry metrics: %v", err) + return nil, err } - // Convert byte slice to JSON object - if err := json.Unmarshal(b.Bytes(), &statusOutput); err != nil { - a.logComp.Errorf("Failed to collect Agent Status telemetry. Error: %s", err.Error()) - return - } -} - -func (a *atel) addAgentStatusExtra(p *Profile, fullStatus map[string]interface{}, statusOutput map[string]interface{}) { - for _, builder := range p.statusExtraBuilder { - // Evaluate JQ expression against the agent status JSON object - jqResult := builder.jqSource.Run(fullStatus) - jqValue, ok := jqResult.Next() - if !ok { - a.logComp.Errorf("Failed to apply JQ expression for JSON path '%s' to Agent Status payload. Error unknown", - strings.Join(builder.jpathTarget, ".")) - continue - } - - // Validate JQ expression result - if err, ok := jqValue.(error); ok { - a.logComp.Errorf("Failed to apply JQ expression for JSON path '%s' to Agent Status payload. Error: %s", - strings.Join(builder.jpathTarget, "."), err.Error()) - continue - } - - // Validate JQ expression result type - var attrVal interface{} - switch jqValueType := jqValue.(type) { - case int: - attrVal = jqValueType - case float64: - attrVal = jqValueType - case bool: - attrVal = jqValueType - case nil: - a.logComp.Errorf("JQ expression return 'nil' value for JSON path '%s'", strings.Join(builder.jpathTarget, ".")) - continue - case string: - a.logComp.Errorf("string value (%v) for JSON path '%s' for extra status atttribute is not currently allowed", - strings.Join(builder.jpathTarget, "."), attrVal) - continue - default: - a.logComp.Errorf("'%v' value (%v) for JSON path '%s' for extra status atttribute is not currently allowed", - reflect.TypeOf(jqValueType), reflect.ValueOf(jqValueType), strings.Join(builder.jpathTarget, ".")) - continue - } - - // Add resulting value to the agent status telemetry payload (recursively creating missing JSON objects) - curNode := statusOutput - for i, p := range builder.jpathTarget { - // last element is the attribute name - if i == len(builder.jpathTarget)-1 { - curNode[p] = attrVal - break - } - - existSubNode, ok := curNode[p] - - // if the node doesn't exist, create it - if !ok { - newSubNode := make(map[string]interface{}) - curNode[p] = newSubNode - curNode = newSubNode - } else { - existSubNodeCasted, ok := existSubNode.(map[string]interface{}) - if !ok { - a.logComp.Errorf("JSON path '%s' points to non-object element", strings.Join(builder.jpathTarget[:i], ".")) - break - } - curNode = existSubNodeCasted - } - } + session := a.sender.startSession(a.cancelCtx) + for _, p := range profiles { + a.reportAgentMetrics(session, pms, p) } + return session, nil } -// Render Agent Status JSON object (using template if needed and adding extra attributes if specified in -// the profile). The rendered JSON object is then sent to the telemetry server. The "rendering" of the -// Agent Status JSON object may appear odd and flawed. For example ... -// - Agent Status, generally speaking, is moving into direction when multitudes of its providers are -// starting self-"rendering". Accordingly, its JSON representation is not fixed and certainly -// not-public and is subject to change, which may break both template and JQ style of rendering -// of Agent Telemetry (and which is implemented in this function). It is certainly a concern but -// its potential impact is minimized since Agent Telemetry is an internal feature. It is also -// the price of flexibility and decoupling rendering from the code. -// - There are a number of inefficiencies in the current implementation massaging Agent Status JSON -// object into JSON, then to bytes and JSON again. It should not be a big issue since the operations -// will be very infrequent and resulting objects relatively small. -// - In some way, the current implementation can be thought of as an architectural shortcut since -// Agent Status, arguably is not purely telemetry data (however it can be argue that it is also can -// be think of as quintessential Agent telemetry data) and perhaps more explicit Agent telemetry -// interfaces should be implemented where its loosely coupled "providers" supply their own internal -// telemetry to be emitted. -func (a *atel) reportAgentStatus(session *senderSession, p *Profile) { - // If no status is configured nothing to report - if p.Status == nil { - return - } - - a.logComp.Debugf("Collect Agent Status telemetry for profile %s", p.Name) +// run runs the agent telemetry for a given profile. It is triggered by the runner +// according to the profiles schedule. +func (a *atel) run(profiles []*Profile) { + a.logComp.Info("Starting agent telemetry run") - // Current "agent-telemetry-basic.tmpl" uses only "runneStats" and "dogstatsdStats" JSON sections - // These JSON sections are populated via "collector" and "DogStatsD" status providers sections - minimumReqSections := []string{"collector", "DogStatsD"} - statusBytes, err := a.statusComp.GetStatusBySections(minimumReqSections, "json", false) + session, err := a.loadPayloads(profiles) if err != nil { - a.logComp.Errorf("failed to get agent status: %s", err) + a.logComp.Errorf("failed to load agent telemetry session: %s", err) return } - var statusJSON = make(map[string]interface{}) - err = json.Unmarshal(statusBytes, &statusJSON) + err = a.sender.flushSession(session) if err != nil { - a.logComp.Errorf("failed to unmarshall agent status: %s", err) - return - } - - // Render Agent Status JSON object (using template if needed and adding extra attributes) - var statusPayloadJSON = make(map[string]interface{}) - a.renderAgentStatus(p, statusJSON, statusPayloadJSON) - a.addAgentStatusExtra(p, statusJSON, statusPayloadJSON) - if len(statusPayloadJSON) == 0 { - a.logComp.Debug("No Agent Status telemetry collected") + a.logComp.Errorf("failed to flush agent telemetry session: %s", err) return } +} - a.logComp.Debugf("Reporting Agent Status telemetry for profile %s", p.Name) +func (a *atel) writePayload(w http.ResponseWriter, _ *http.Request) { + a.logComp.Info("Dumping agent telemetry payload") - // Send the Agent Telemetry status payload - err = a.sender.sendAgentStatusPayload(session, statusPayloadJSON) + payload, err := a.GetAsJSON() if err != nil { - a.logComp.Errorf("failed to send agent status: %s", err) + httputils.SetJSONError(w, a.logComp.Error(err.Error()), 500) return } + + w.Write(payload) } -// run runs the agent telemetry for a given profile. It is triggered by the runner -// according to the profiles schedule. -func (a *atel) run(profiles []*Profile) { - if a.sender == nil { - a.logComp.Errorf("Agent telemetry sender is not initialized") - return +func (a *atel) GetAsJSON() ([]byte, error) { + session, err := a.loadPayloads(a.atelCfg.Profiles) + if err != nil { + return nil, fmt.Errorf("unable to load agent telemetry payload: %w", err) } + payload := session.flush() - a.logComp.Info("Starting agent telemetry run") - - session := a.sender.startSession(a.cancelCtx) - - for _, p := range profiles { - a.reportAgentMetrics(session, p) - a.reportAgentStatus(session, p) + jsonPayload, err := json.MarshalIndent(payload, "", " ") + if err != nil { + return nil, fmt.Errorf("unable to marshal agent telemetry payload: %w", err) } - err := a.sender.flushSession(session) + jsonPayloadScrubbed, err := scrubber.ScrubBytes(jsonPayload) if err != nil { - a.logComp.Errorf("failed to flush agent telemetry session: %s", err) - return + return nil, fmt.Errorf("unable to scrub agent telemetry payload: %w", err) } -} -// TODO: implement when CLI tool will be implemented -func (a *atel) GetAsJSON() ([]byte, error) { - return nil, nil + return jsonPayloadScrubbed, nil } // start is called by FX when the application starts. func (a *atel) start() error { - a.logComp.Info("Starting agent telemetry for %d schedules and %d profiles", len(a.atelCfg.schedule), len(a.atelCfg.Profiles)) + a.logComp.Infof("Starting agent telemetry for %d schedules and %d profiles", len(a.atelCfg.schedule), len(a.atelCfg.Profiles)) a.cancelCtx, a.cancel = context.WithCancel(context.Background()) diff --git a/comp/core/agenttelemetry/impl/agenttelemetry_test.go b/comp/core/agenttelemetry/impl/agenttelemetry_test.go index a47686d674389..e7edd23d06415 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry_test.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry_test.go @@ -13,6 +13,7 @@ package agenttelemetryimpl import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -28,8 +29,6 @@ import ( "github.com/DataDog/datadog-agent/comp/core/config" log "github.com/DataDog/datadog-agent/comp/core/log/def" logmock "github.com/DataDog/datadog-agent/comp/core/log/mock" - "github.com/DataDog/datadog-agent/comp/core/status" - "github.com/DataDog/datadog-agent/comp/core/status/statusimpl" "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/comp/core/telemetry/telemetryimpl" "github.com/DataDog/datadog-agent/pkg/util/fxutil" @@ -63,12 +62,8 @@ func (s *senderMock) startSession(_ context.Context) *senderSession { func (s *senderMock) flushSession(_ *senderSession) error { return nil } -func (s *senderMock) sendAgentStatusPayload(_ *senderSession, _ map[string]interface{}) error { - return nil -} -func (s *senderMock) sendAgentMetricPayloads(_ *senderSession, metrics []*agentmetric) error { +func (s *senderMock) sendAgentMetricPayloads(_ *senderSession, metrics []*agentmetric) { s.sentMetrics = append(s.sentMetrics, metrics...) - return nil } // Runner mock (TODO: use use mock.Mock) @@ -147,10 +142,6 @@ func makeLogMock(t *testing.T) log.Component { return logmock.New(t) } -func makeStatusMock(t *testing.T) status.Component { - return fxutil.Test[status.Mock](t, fx.Options(statusimpl.MockModule())) -} - func makeSenderImpl(t *testing.T, c string) sender { o := convertYamlStrToMap(t, c) cfg := makeCfgMock(t, o) @@ -190,7 +181,7 @@ func getTestAtel(t *testing.T, } assert.NoError(t, err) - atel := createAtel(cfg, log, tel, makeStatusMock(t), sndr, runner) + atel := createAtel(cfg, log, tel, sndr, runner) if atel == nil { err = fmt.Errorf("failed to create atel") } @@ -271,10 +262,10 @@ func TestRun(t *testing.T) { a.start() - // default configuration has 1 job with 2 profiles (more configurations needs to be tested) + // default configuration has 1 job with 4 profiles (more configurations needs to be tested) // will be improved in future by providing deterministic configuration assert.Equal(t, 1, len(r.(*runnerMock).jobs)) - assert.Equal(t, 2, len(r.(*runnerMock).jobs[0].profiles)) + assert.Equal(t, 4, len(r.(*runnerMock).jobs[0].profiles)) } func TestReportMetricBasic(t *testing.T) { @@ -522,6 +513,177 @@ func TestTagAggregateTotalCounter(t *testing.T) { assert.Equal(t, float64(210), m4.Counter.GetValue()) } +func TestTwoProfilesOnTheSameScheduleGenerateSinglePayload(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: foo + metric: + metrics: + - name: bar.bar + aggregate_tags: + - tag1 + - name: bar + metric: + metrics: + - name: foo.foo + aggregate_tags: + - tag1 + ` + // setup and initiate atel + tel := makeTelMock(t) + counter1 := tel.NewCounter("bar", "bar", []string{"tag1", "tag2", "tag3"}, "") + counter1.AddWithTags(10, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + counter2 := tel.NewCounter("foo", "foo", []string{"tag1", "tag2", "tag3"}, "") + counter2.AddWithTags(20, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + + payloadJSON, err := a.GetAsJSON() + assert.NoError(t, err) + var payload map[string]interface{} + err = json.Unmarshal(payloadJSON, &payload) + assert.NoError(t, err) + + // ----------------------- + // for 2 profiles there are2 metrics, but 1 payload (test is currently payload schema dependent, improve in future) + + // Single payload whcich has sub-payloads for each metric + requestType, ok := payload["request_type"] + assert.Equal(t, "agent-metrics", requestType) + metricsPayload, ok := payload["payload"].(map[string]interface{}) + assert.True(t, ok) + metrics, ok := metricsPayload["metrics"].(map[string]interface{}) + assert.True(t, ok) + // 2 metrics + _, ok = metrics["bar.bar"] + assert.True(t, ok) + _, ok = metrics["foo.foo"] + assert.True(t, ok) +} + +func TestOneProfileWithOneMetricMultipleContextsGenerateTwoPayloads(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: foo + metric: + metrics: + - name: bar.bar + aggregate_tags: + - tag1 + ` + // setup and initiate atel + tel := makeTelMock(t) + counter1 := tel.NewCounter("bar", "bar", []string{"tag1", "tag2", "tag3"}, "") + counter1.AddWithTags(10, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + counter1.AddWithTags(20, map[string]string{"tag1": "a2", "tag2": "b2", "tag3": "c2"}) + + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + + payloadJSON, err := a.GetAsJSON() + assert.NoError(t, err) + var payload map[string]interface{} + err = json.Unmarshal(payloadJSON, &payload) + assert.NoError(t, err) + + // ----------------------- + // for 1 profiles there are 2 metrics in 1 payload (test is currently payload schema dependent, improve in future) + + // One payloads each has the same metric (different tags) + requestType, ok := payload["request_type"] + assert.Equal(t, "message-batch", requestType) + metricPayloads, ok := payload["payload"].([]interface{}) + assert.True(t, ok) + + // --------- + // 2 metrics + // 1-st + payload1, ok := metricPayloads[0].(map[string]interface{}) + requestType1, ok := payload1["request_type"] + assert.True(t, ok) + assert.Equal(t, "agent-metrics", requestType1) + metricsPayload1, ok := payload1["payload"].(map[string]interface{}) + assert.True(t, ok) + metrics1, ok := metricsPayload1["metrics"].(map[string]interface{}) + assert.True(t, ok) + _, ok11 := metrics1["bar.bar"] + _, ok12 := metrics1["foo.foo"] + assert.True(t, (ok11 && !ok12) || (!ok11 && ok12)) + + // 2-nd + payload2, ok := metricPayloads[1].(map[string]interface{}) + requestType2, ok := payload2["request_type"] + assert.True(t, ok) + assert.Equal(t, "agent-metrics", requestType2) + metricsPayload2, ok := payload2["payload"].(map[string]interface{}) + assert.True(t, ok) + metrics2, ok := metricsPayload2["metrics"].(map[string]interface{}) + assert.True(t, ok) + _, ok21 := metrics2["bar.bar"] + _, ok22 := metrics2["foo.foo"] + assert.True(t, (ok21 && !ok22) || (!ok21 && ok22)) + +} + +func TestOneProfileWithTwoMetricGenerateSinglePayloads(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: foobar + metric: + metrics: + - name: bar.bar + aggregate_tags: + - tag1 + - name: foo.foo + aggregate_tags: + - tag1 + ` + // setup and initiate atel + tel := makeTelMock(t) + counter1 := tel.NewCounter("bar", "bar", []string{"tag1", "tag2", "tag3"}, "") + counter1.AddWithTags(10, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + counter2 := tel.NewCounter("foo", "foo", []string{"tag1", "tag2", "tag3"}, "") + counter2.AddWithTags(20, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + + payloadJSON, err := a.GetAsJSON() + assert.NoError(t, err) + var payload map[string]interface{} + err = json.Unmarshal(payloadJSON, &payload) + assert.NoError(t, err) + + // ----------------------- + // for 2 profiles there are2 metrics, but 1 payload (test is currently payload schema dependent, improve in future) + + // Single payload whcich has sub-payloads for each metric + requestType, ok := payload["request_type"] + assert.Equal(t, "agent-metrics", requestType) + metricsPayload, ok := payload["payload"].(map[string]interface{}) + assert.True(t, ok) + metrics, ok := metricsPayload["metrics"].(map[string]interface{}) + assert.True(t, ok) + // 2 metrics + _, ok = metrics["bar.bar"] + assert.True(t, ok) + _, ok = metrics["foo.foo"] + assert.True(t, ok) +} + func TestSenderConfigNoConfig(t *testing.T) { c := ` agent_telemetry: diff --git a/comp/core/agenttelemetry/impl/config.go b/comp/core/agenttelemetry/impl/config.go index 3109a400b573c..c08ada88a7806 100644 --- a/comp/core/agenttelemetry/impl/config.go +++ b/comp/core/agenttelemetry/impl/config.go @@ -28,8 +28,8 @@ const ( // Config is the top-level config for agent telemetry type Config struct { - Enabled bool `yaml:"enabled"` - Profiles []Profile `yaml:"profiles"` + Enabled bool `yaml:"enabled"` + Profiles []*Profile `yaml:"profiles"` // compiled schedule map[Schedule][]*Profile @@ -40,14 +40,12 @@ type Profile struct { // parsed Name string `yaml:"name"` Metric *AgentMetricConfig `yaml:"metric,omitempty"` - Status *AgentStatusConfig `yaml:"status"` Schedule *Schedule `yaml:"schedule"` // compiled - statusExtraBuilder []jBuilder - metricsMap map[string]*MetricConfig - excludeZeroMetric bool - excludeTagsMap map[string]any + metricsMap map[string]*MetricConfig + excludeZeroMetric bool + excludeTagsMap map[string]any } // AgentMetricConfig specifies agent telemetry metrics payloads to be generated and emitted @@ -73,12 +71,6 @@ type MetricConfig struct { aggregateTagsMap map[string]any } -// AgentStatusConfig is a single agent telemetry status payload -type AgentStatusConfig struct { - Template string `yaml:"template"` - Extra map[string]string `yaml:"extra,omitempty"` -} - // Schedule is a schedule for agent telemetry payloads to be generated and emitted type Schedule struct { // parsed @@ -135,70 +127,13 @@ type Schedule struct { // reserved tag"). If not specified, specified, default value of `false` will be used. // It is useful only if "aggregate_tags" is also specified and will be ignored otherwise. // -// profiles[].status (optional) -// -------------------------------- -// When included, agent telemetry status payloads will be generated and emitted. -// -// profiles[].status.template (optional) -// -------------------------------------- -// Name of agent status JSON rendering template which generates agent telemetry status -// payload. Used as a suffix to -// "pkg\status\render\templates\agent-telemetry-