From 672e942c210bcf6c82954ceb7eb89b0e34a31629 Mon Sep 17 00:00:00 2001 From: Len Gamburg Date: Wed, 25 Dec 2024 11:14:10 -0500 Subject: [PATCH] Route agent crash detection through agent telemetry component --- .../agentcrashdetectimpl/agentcrashdetect.go | 54 +++-- comp/core/agenttelemetry/def/component.go | 5 + .../agenttelemetry/impl/agenttelemetry.go | 33 +++ .../impl/agenttelemetry_test.go | 193 ++++++++++++++++++ comp/core/agenttelemetry/impl/config.go | 119 ++++++++--- comp/core/agenttelemetry/impl/sender.go | 96 +++++++-- pkg/internaltelemetry/client.go | 3 + ...ent-tel-bsod-payload-791ab2c7f553abb4.yaml | 14 ++ 8 files changed, 442 insertions(+), 75 deletions(-) create mode 100644 releasenotes/notes/agent-tel-bsod-payload-791ab2c7f553abb4.yaml diff --git a/comp/checks/agentcrashdetect/agentcrashdetectimpl/agentcrashdetect.go b/comp/checks/agentcrashdetect/agentcrashdetectimpl/agentcrashdetect.go index 6098dcf98f629d..423a68d25ca8aa 100644 --- a/comp/checks/agentcrashdetect/agentcrashdetectimpl/agentcrashdetect.go +++ b/comp/checks/agentcrashdetect/agentcrashdetectimpl/agentcrashdetect.go @@ -10,6 +10,7 @@ package agentcrashdetectimpl import ( "context" + "encoding/json" "fmt" "strings" @@ -18,15 +19,13 @@ import ( yaml "gopkg.in/yaml.v2" "github.com/DataDog/datadog-agent/comp/checks/agentcrashdetect" + agenttelemetry "github.com/DataDog/datadog-agent/comp/core/agenttelemetry/def" "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" compsysconfig "github.com/DataDog/datadog-agent/comp/core/sysprobeconfig" - comptraceconfig "github.com/DataDog/datadog-agent/comp/trace/config" "github.com/DataDog/datadog-agent/pkg/aggregator/sender" "github.com/DataDog/datadog-agent/pkg/collector/check" core "github.com/DataDog/datadog-agent/pkg/collector/corechecks" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/wincrashdetect/probe" - "github.com/DataDog/datadog-agent/pkg/internaltelemetry" - traceconfig "github.com/DataDog/datadog-agent/pkg/trace/config" "github.com/DataDog/datadog-agent/pkg/util/crashreport" "github.com/DataDog/datadog-agent/pkg/util/fxutil" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -60,6 +59,13 @@ var ( baseKey = `SOFTWARE\Datadog\Datadog Agent\agent_crash_reporting` ) +// AgentBSOD for Agent Telemetry reporting +type AgentBSOD struct { + Date string `json:"date"` + Offender string `json:"offender"` + BugCheck string `json:"bugcheck"` +} + // Module defines the fx options for this component. func Module() fxutil.Module { return fxutil.Component( @@ -79,19 +85,19 @@ type AgentCrashDetect struct { instance *WinCrashConfig reporter *crashreport.WinCrashReporter crashDetectionEnabled bool - tconfig *traceconfig.AgentConfig probeconfig compsysconfig.Component + atel agenttelemetry.Component } type agentCrashComponent struct { - tconfig *traceconfig.AgentConfig } type dependencies struct { fx.In - TConfig comptraceconfig.Component - SConfig compsysconfig.Component + Config compsysconfig.Component + Atel agenttelemetry.Component + Lifecycle fx.Lifecycle } @@ -168,33 +174,37 @@ func (wcd *AgentCrashDetect) Run() error { } log.Infof("Sending crash: %v", formatText(crash)) - lts := internaltelemetry.NewClient(wcd.tconfig.NewHTTPClient(), toTelemEndpoints(wcd.tconfig.TelemetryConfig.Endpoints), "ddnpm", true) - lts.SendLog("WARN", formatText(crash)) - return nil -} -func toTelemEndpoints(endpoints []*traceconfig.Endpoint) []*internaltelemetry.Endpoint { - telemEndpoints := make([]*internaltelemetry.Endpoint, 0, len(endpoints)) - for _, e := range endpoints { - telemEndpoints = append(telemEndpoints, &internaltelemetry.Endpoint{ - Host: e.Host, - APIKey: e.APIKey, - }) + bsod := AgentBSOD{ + Date: crash.DateString, + Offender: crash.Offender, + BugCheck: crash.BugCheck, + } + var bsodPayload []byte + bsodPayload, err = json.Marshal(bsod) + if err != nil { + return err + } + + // "agentbsod" is payload type registered with the Agent Telemetry component + err = wcd.atel.SendEvent("agentbsod", bsodPayload) + if err != nil { + return err } - return telemEndpoints + + return nil } func newAgentCrashComponent(deps dependencies) agentcrashdetect.Component { instance := &agentCrashComponent{} - instance.tconfig = deps.TConfig.Object() deps.Lifecycle.Append(fx.Hook{ OnStart: func(_ context.Context) error { core.RegisterCheck(CheckName, optional.NewOption(func() check.Check { checkInstance := &AgentCrashDetect{ CheckBase: core.NewCheckBase(CheckName), instance: &WinCrashConfig{}, - tconfig: instance.tconfig, - probeconfig: deps.SConfig, + probeconfig: deps.Config, + atel: deps.Atel, } return checkInstance })) diff --git a/comp/core/agenttelemetry/def/component.go b/comp/core/agenttelemetry/def/component.go index 06563b2de91922..611d80eb8c8282 100644 --- a/comp/core/agenttelemetry/def/component.go +++ b/comp/core/agenttelemetry/def/component.go @@ -12,4 +12,9 @@ package agenttelemetry type Component interface { // GetAsJSON returns the payload as a JSON string. Useful to be displayed in the CLI or added to a flare. GetAsJSON() ([]byte, error) + + // Sends event payload. + // payloadType - should be registered in datadog-agent\comp\core\agenttelemetry\impl\config.go + // payload - de-serializable into JSON + SendEvent(eventType string, eventPayload []byte) error } diff --git a/comp/core/agenttelemetry/impl/agenttelemetry.go b/comp/core/agenttelemetry/impl/agenttelemetry.go index e9362829179009..176dabd10ea475 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry.go @@ -527,6 +527,39 @@ func (a *atel) GetAsJSON() ([]byte, error) { return prettyPayload.Bytes(), nil } +func (a *atel) SendEvent(eventType string, eventPayload []byte) error { + // Check if the telemetry is enabled + if !a.enabled { + return errors.New("agent telemetry is not enabled") + } + + // Check if the payload type is registered + eventInfo, ok := a.atelCfg.events[eventType] + if !ok { + a.logComp.Errorf("Payload type `%s` has to be registered to be sent", eventType) + return fmt.Errorf("Payload type `%s` is not registered", eventType) + } + + // Convert payload to JSON + var eventPayloadJSON map[string]interface{} + err := json.Unmarshal(eventPayload, &eventPayloadJSON) + if err != nil { + a.logComp.Errorf("Failed to unmarshal payload: %s", err) + return fmt.Errorf("failed to unmarshal payload: %w", err) + } + + // Send the payload + ss := a.sender.startSession(a.cancelCtx) + a.sender.sendEventPayload(ss, eventInfo, eventPayloadJSON) + err = a.sender.flushSession(ss) + if err != nil { + a.logComp.Errorf("failed to flush sent payload: %w", err) + return err + } + + return nil +} + // start is called by FX when the application starts. func (a *atel) start() error { a.logComp.Infof("Starting agent telemetry for %d schedules and %d profiles", len(a.atelCfg.schedule), len(a.atelCfg.Profiles)) diff --git a/comp/core/agenttelemetry/impl/agenttelemetry_test.go b/comp/core/agenttelemetry/impl/agenttelemetry_test.go index d982800caddd5c..cb2075fbc76884 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry_test.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry_test.go @@ -28,6 +28,7 @@ import ( "github.com/DataDog/datadog-agent/comp/core/telemetry" "github.com/DataDog/datadog-agent/comp/core/telemetry/telemetryimpl" "github.com/DataDog/datadog-agent/pkg/util/fxutil" + "github.com/DataDog/datadog-agent/pkg/util/jsonquery" "github.com/DataDog/zstd" ) @@ -62,6 +63,8 @@ func (s *senderMock) flushSession(_ *senderSession) error { func (s *senderMock) sendAgentMetricPayloads(_ *senderSession, metrics []*agentmetric) { s.sentMetrics = append(s.sentMetrics, metrics...) } +func (s *senderMock) sendEventPayload(_ *senderSession, _ *Event, _ map[string]interface{}) { +} // Runner mock (TODO: use use mock.Mock) type runnerMock struct { @@ -1967,3 +1970,193 @@ func TestUsingPayloadCompressionInAgentTelemetrySender(t *testing.T) { nonCompressBodyLen := len(cl2.(*clientMock).body) assert.True(t, float64(nonCompressBodyLen)/float64(compressBodyLen) > 1.5) } + +func TestAgentTelemetryParseDefaultConfiguration(t *testing.T) { + c := defaultProfiles + o := convertYamlStrToMap(t, c) + cfg := makeCfgMock(t, o) + atCfg, err := parseConfig(cfg) + + require.NoError(t, err) + + assert.True(t, len(atCfg.events) > 0) + assert.True(t, len(atCfg.schedule) > 0) + assert.True(t, len(atCfg.Profiles) > len(atCfg.events)) +} + +func TestAgentTelemetryEventConfiguration(t *testing.T) { + // Use nearly full + c := ` + agent_telemetry: + enabled: true + profiles: + - name: checks + metric: + metrics: + - name: checks.execution_time + aggregate_tags: + - check_name + - name: pymem.inuse + schedule: + start_after: 123 + iterations: 0 + period: 456 + - name: logs-and-metrics + metric: + exclude: + zero_metric: true + metrics: + - name: dogstatsd.udp_packets_bytes + - name: dogstatsd.uds_packets_bytes + schedule: + start_after: 30 + iterations: 0 + period: 900 + - name: ondemand + events: + - name: agentbsod + request_type: agent-bsod + payload_key: agent_bsod + message: 'Agent BSOD' + - name: foobar + request_type: agent-foobar + payload_key: agent_foobar + message: 'Agent foobar' + - name: ondemand2 + events: + - name: agentbsod + request_type: agent-bsod + payload_key: agent_bsod + message: 'Agent BSOD' + - name: barfoo + request_type: agent-barfoo + payload_key: agent_barfoo + message: 'Agent barfoo' + ` + + o := convertYamlStrToMap(t, c) + cfg := makeCfgMock(t, o) + atCfg, err := parseConfig(cfg) + + require.NoError(t, err) + + // single event map keeps unique event names + assert.Len(t, atCfg.events, 3) + assert.Len(t, atCfg.schedule, 2) + assert.Len(t, atCfg.Profiles, 4) +} + +func TestAgentTelemetrySendRegisteredEvent(t *testing.T) { + // Use nearly full + var cfg = ` + agent_telemetry: + enabled: true + use_compression: false + profiles: + - name: xxx + metric: + metrics: + - name: foo.bar + - name: ondemand + events: + - name: agentbsod + request_type: agent-bsod + payload_key: agent_bsod + message: 'Agent BSOD' + - name: foobar + request_type: agent-foobar + payload_key: agent_foobar + message: 'Agent foobar' + ` + + payloadObj := struct { + Date string `json:"date"` + Offender string `json:"offender"` + BugCheck string `json:"bugcheck"` + }{ + Date: "2024-30-02 17:31:12", + Offender: "ddnpm+0x1a3", + BugCheck: "0x7A", + } + // conert to json + payload, err := json.Marshal(payloadObj) + require.NoError(t, err) + + // setup and initiate atel + o := convertYamlStrToMap(t, cfg) + cl := newClientMock() + s := makeSenderImpl(t, cl, cfg) + r := newRunnerMock() + a := getTestAtel(t, nil, o, s, cl, r) + require.True(t, a.enabled) + + a.start() + err = a.SendEvent("agentbsod", payload) + require.NoError(t, err) + assert.True(t, len(cl.(*clientMock).body) > 0) + + //deserialize the payload of cl.(*clientMock).body + var topPayload map[string]interface{} + err = json.Unmarshal(cl.(*clientMock).body, &topPayload) + require.NoError(t, err) + fmt.Print(string(cl.(*clientMock).body)) + + v, ok, err2 := jsonquery.RunSingleOutput(".payload.message", topPayload) + require.NoError(t, err2) + require.True(t, ok) + assert.Equal(t, "Agent BSOD", v) + + v, ok, err2 = jsonquery.RunSingleOutput(".payload.agent_bsod.offender", topPayload) + require.NoError(t, err2) + require.True(t, ok) + assert.Equal(t, "ddnpm+0x1a3", v) +} + +func TestAgentTelemetrySendNonRegisteredEvent(t *testing.T) { + // Use nearly full + var cfg = ` + agent_telemetry: + enabled: true + use_compression: false + profiles: + - name: xxx + metric: + metrics: + - name: foo.bar + - name: ondemand + events: + - name: agentbsod + request_type: agent-bsod + payload_key: agentbsod + message: 'Agent BSOD' + - name: foobar + request_type: agent-foobar + payload_key: agentfoobar + message: 'Agent foobar' + ` + + payloadObj := struct { + Date string `json:"date"` + Offender string `json:"offender"` + BugCheck string `json:"bugcheck"` + }{ + Date: "2024-30-02 17:31:12", + Offender: "ddnpm+0x1a3", + BugCheck: "0x7A", + } + // conert to json + payload, err := json.Marshal(payloadObj) + require.NoError(t, err) + + // setup and initiate atel + o := convertYamlStrToMap(t, cfg) + cl := newClientMock() + s := makeSenderImpl(t, cl, cfg) + r := newRunnerMock() + a := getTestAtel(t, nil, o, s, cl, r) + require.True(t, a.enabled) + + a.start() + err = a.SendEvent("agentbsod2", payload) + require.Error(t, err) +} diff --git a/comp/core/agenttelemetry/impl/config.go b/comp/core/agenttelemetry/impl/config.go index a6cb70ae43ce46..a5dfbcdf702eef 100644 --- a/comp/core/agenttelemetry/impl/config.go +++ b/comp/core/agenttelemetry/impl/config.go @@ -25,8 +25,9 @@ type Config struct { Enabled bool `yaml:"enabled"` Profiles []*Profile `yaml:"profiles"` - // compiled + // config-wide and "compiled" fields schedule map[Schedule][]*Profile + events map[string]*Event } // Profile is a single agent telemetry profile @@ -34,7 +35,8 @@ type Profile struct { // parsed Name string `yaml:"name"` Metric *AgentMetricConfig `yaml:"metric,omitempty"` - Schedule *Schedule `yaml:"schedule"` + Schedule *Schedule `yaml:"schedule,omitempty"` + Events []*Event `yaml:"events"` // compiled metricsMap map[string]*MetricConfig @@ -73,6 +75,14 @@ type Schedule struct { Period uint `yaml:"period"` } +// Event is a payload sent by Agent Telemetry component client +type Event struct { + Name string `yaml:"name"` // required + RequestType string `yaml:"request_type"` // required + PayloadKey string `yaml:"payload_key"` // required + Message string `yaml:"message"` // required +} + // profiles[].metric.metrics (optional) // -------------------------- // When included, agent telemetry metrics payloads will be generated and emitted. @@ -148,6 +158,26 @@ type Schedule struct { // ------------------------------------- // Number of seconds to wait between telemetry collection iteration for the profile. If not // specified, default values are specified above. +// +// profiles[].events (optional) +// ------------------------------------- +// List of registered events an agent telemetry client can send +// +// profiles[].events[].name (required) +// ------------------------------------- +// The name of the event to find corresponding request_type, payload_key and message values +// +// profiles[].events[].request_type (required) +// ------------------------------------- +// The value is required and used in the corresponding payload to identify the event +// +// profiles[].events[].payload_key (required) +// ------------------------------------- +// The value is required and used in the corresponding payload as a root of the event payload +// +// profiles[].events[].message (required) +// ------------------------------------- +// The value is required and used in the corresponding payload // ---------------------------------------------------------------------------------- // @@ -238,6 +268,12 @@ var defaultProfiles = ` start_after: 600 iterations: 0 period: 14400 + - name: ondemand + events: + - name: agentbsod + request_type: agent-bsod + payload_key: agent_bsod + message: 'Agent BSOD' ` func compileMetricsExclude(p *Profile) error { @@ -313,37 +349,35 @@ func compileMetric(p *Profile, m *MetricConfig) error { return nil } -// Compile metric section -func compileMetrics(p *Profile) error { - // No metric section - nothing to do - if p.Metric == nil || len(p.Metric.Metrics) == 0 { - return nil - } - - if err := compileMetricsExclude(p); err != nil { - return err - } - - // Compile metrics themselves - p.metricsMap = make(map[string]*MetricConfig) - for i := 0; i < len(p.Metric.Metrics); i++ { - if err := compileMetric(p, &p.Metric.Metrics[i]); err != nil { - return err +// Validate profiles +func validateProfiles(cfg *Config) error { + for i, p := range cfg.Profiles { + if len(p.Name) == 0 { + return fmt.Errorf("profile requires 'name' attribute to be specified. Profile index: %d", i) } } return nil } -// Compile profile -func compileProfile(p *Profile) error { - // Profile requires name - if len(p.Name) == 0 { - return fmt.Errorf("profile requires 'name' attribute to be specified") - } +func compileMetrics(cfg *Config) error { + for _, p := range cfg.Profiles { + // No metric section - nothing to do + if p.Metric == nil || len(p.Metric.Metrics) == 0 { + continue + } - if err := compileMetrics(p); err != nil { - return err + if err := compileMetricsExclude(p); err != nil { + return err + } + + // Compile metrics themselves + p.metricsMap = make(map[string]*MetricConfig) + for i := 0; i < len(p.Metric.Metrics); i++ { + if err := compileMetric(p, &p.Metric.Metrics[i]); err != nil { + return err + } + } } return nil @@ -356,6 +390,11 @@ func compileSchedules(cfg *Config) error { for i := 0; i < len(cfg.Profiles); i++ { p := cfg.Profiles[i] + // No metric section - schedule is not needed + if p.Metric == nil || len(p.Metric.Metrics) == 0 { + continue + } + // Setup default schedule if it is not specified partially or at all if p.Schedule == nil { p.Schedule = &Schedule{ @@ -388,19 +427,37 @@ func compileSchedules(cfg *Config) error { return nil } +func compileEvents(cfg *Config) error { + cfg.events = make(map[string]*Event) + for _, p := range cfg.Profiles { + if p.Events != nil { + for _, e := range p.Events { + cfg.events[e.Name] = e + } + } + } + + return nil +} + // Compile agent telemetry config func compileConfig(cfg *Config) error { - for i := 0; i < len(cfg.Profiles); i++ { - err := compileProfile(cfg.Profiles[i]) - if err != nil { - return err - } + if err := validateProfiles(cfg); err != nil { + return err + } + + if err := compileMetrics(cfg); err != nil { + return err } if err := compileSchedules(cfg); err != nil { return err } + if err := compileEvents(cfg); err != nil { + return err + } + return nil } diff --git a/comp/core/agenttelemetry/impl/sender.go b/comp/core/agenttelemetry/impl/sender.go index b7b62f460b9333..7798ab3cc4b26d 100644 --- a/comp/core/agenttelemetry/impl/sender.go +++ b/comp/core/agenttelemetry/impl/sender.go @@ -37,6 +37,9 @@ const ( telemetryIntakeTrackType = "agenttelemetry" telemetryPath = "/api/v2/apmtelemetry" + metricPayloadType = "agent-metrics" + batchPayloadType = "message-batch" + httpClientResetInterval = 5 * time.Minute httpClientTimeout = 10 * time.Second ) @@ -46,7 +49,9 @@ const ( type sender interface { startSession(cancelCtx context.Context) *senderSession flushSession(ss *senderSession) error + sendAgentMetricPayloads(ss *senderSession, metrics []*agentmetric) + sendEventPayload(ss *senderSession, eventInfo *Event, eventPayload map[string]interface{}) } type client interface { @@ -112,7 +117,13 @@ type payloadInfo struct { type senderSession struct { cancelCtx context.Context payloadTemplate Payload - metricPayloads []*AgentMetricsPayload + + // metric payloads + metricPayloads []*AgentMetricsPayload + + // event payload + eventInfo *Event + eventPayload map[string]interface{} } // BatchPayloadWrapper exported so it can be turned into json @@ -142,6 +153,8 @@ type MetricPayload struct { P99 *float64 `json:"p99,omitempty"` } +// ------------------- +// Utilities func httpClientFactory(cfg config.Reader, timeout time.Duration) func() *http.Client { return func() *http.Client { return &http.Client{ @@ -331,40 +344,72 @@ func (s *senderImpl) startSession(cancelCtx context.Context) *senderSession { } } +func (ss *senderSession) payloadCount() int { + payloadCount := len(ss.metricPayloads) + if ss.eventPayload != nil { + payloadCount++ + } + return payloadCount +} + func (ss *senderSession) flush() Payload { defer func() { - // Clear the payloads + // Clear payloads when done ss.metricPayloads = nil + ss.eventInfo = nil + ss.eventPayload = nil }() // Create a payload with a single message or batch of messages payload := ss.payloadTemplate payload.EventTime = time.Now().Unix() - if len(ss.metricPayloads) == 1 { - // Single payload will be sent directly using the request type of the payload - mp := ss.metricPayloads[0] - payload.RequestType = "agent-metrics" - payload.Payload = payloadInfo{"agent-metrics", mp}.payload - return payload + + // Create top-level event payload if needed + var eventWrapPayload map[string]interface{} + if ss.eventPayload != nil { + eventWrapPayload = make(map[string]interface{}) + eventWrapPayload["message"] = ss.eventInfo.Message + eventWrapPayload[ss.eventInfo.PayloadKey] = ss.eventPayload } - // Batch up multiple payloads into single "batch" payload type - batch := make([]BatchPayloadWrapper, 0) - for _, mp := range ss.metricPayloads { - batch = append(batch, - BatchPayloadWrapper{ - RequestType: "agent-metrics", - Payload: payloadInfo{"agent-metrics", mp}.payload, - }) + if ss.payloadCount() == 1 { + // Either metric or event payload (single payload will be sent directly using the request type of the payload) + if len(ss.metricPayloads) == 1 { + mp := ss.metricPayloads[0] + payload.RequestType = metricPayloadType + payload.Payload = mp + } else { + payload.RequestType = ss.eventInfo.RequestType + payload.Payload = eventWrapPayload + } + } else { + // Batch up multiple payloads into single "batch" payload type + batch := make([]BatchPayloadWrapper, 0) + for _, mp := range ss.metricPayloads { + batch = append(batch, + BatchPayloadWrapper{ + RequestType: metricPayloadType, + Payload: payloadInfo{metricPayloadType, mp}.payload, + }) + } + // add event payload if present + if ss.eventPayload != nil { + batch = append(batch, + BatchPayloadWrapper{ + RequestType: ss.eventInfo.RequestType, + Payload: eventWrapPayload, + }) + } + payload.RequestType = batchPayloadType + payload.Payload = batch } - payload.RequestType = "message-batch" - payload.Payload = batch + return payload } func (s *senderImpl) flushSession(ss *senderSession) error { // There is nothing to do if there are no payloads - if len(ss.metricPayloads) == 0 { + if ss.payloadCount() == 0 { return nil } @@ -385,12 +430,13 @@ func (s *senderImpl) flushSession(ss *senderSession) error { reqBody := reqBodyRaw compressed := false if s.compress { - reqBodyCompressed, err2 := zstd.CompressLevel(nil, reqBodyRaw, s.compressionLevel) - if err2 == nil { + // In case of failed to compress continue with uncompress body + reqBodyCompressed, errTemp := zstd.CompressLevel(nil, reqBodyRaw, s.compressionLevel) + if errTemp == nil { compressed = true reqBody = reqBodyCompressed } else { - s.logComp.Errorf("Failed to compress agent telemetry payload: %v", err) + s.logComp.Errorf("Failed to compress agent telemetry payload: %v", errTemp) } } @@ -455,6 +501,12 @@ func (s *senderImpl) sendAgentMetricPayloads(ss *senderSession, metrics []*agent } } +func (s *senderImpl) sendEventPayload(ss *senderSession, eventInfo *Event, eventPayload map[string]interface{}) { + ss.eventInfo = eventInfo + ss.eventPayload = eventPayload + ss.eventPayload["agent_metadata"] = s.metadataPayloadTemplate +} + func (s *senderImpl) addHeaders(req *http.Request, requesttype, apikey, bodylen string, compressed bool) { req.Header.Add("DD-Api-Key", apikey) req.Header.Add("Content-Type", "application/json") diff --git a/pkg/internaltelemetry/client.go b/pkg/internaltelemetry/client.go index ee7754fad190b6..6e7bcce47524b0 100644 --- a/pkg/internaltelemetry/client.go +++ b/pkg/internaltelemetry/client.go @@ -119,6 +119,9 @@ type httpClient interface { } // NewClient creates a new telemetry client +// Used in Fleet now (datadog-agent\pkg\fleet\telemetry\telemetry.go) +// Agentcrashdetect (datadog-agent\comp\checks\agentcrashdetect\agentcrashdetectimpl\agentcrashdetect.go +// switched to agenttelemetry component usage instead. func NewClient(httpClient httpClient, endpoints []*Endpoint, service string, debug bool) Client { info, err := host.Info() if err != nil { diff --git a/releasenotes/notes/agent-tel-bsod-payload-791ab2c7f553abb4.yaml b/releasenotes/notes/agent-tel-bsod-payload-791ab2c7f553abb4.yaml new file mode 100644 index 00000000000000..57feecc3c29138 --- /dev/null +++ b/releasenotes/notes/agent-tel-bsod-payload-791ab2c7f553abb4.yaml @@ -0,0 +1,14 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + In rare cases, when the Agent's Network Performance Monitor is enabled and + the Agent is identified as contributing to a `Blue Screen of Death` (BSOD) event, + Agent telemetry is used to generate a payload that includes Agent's driver code + offset for further analysis.