From 90368bd662317429ce5ec8b9aaf3b2b9c164466c Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Thu, 10 Feb 2022 11:04:34 +0100 Subject: [PATCH 01/13] render process-agent status on core agent status command --- pkg/status/render.go | 2 + pkg/status/status.go | 2 + pkg/status/status_process_agent.go | 35 +++++++++++ pkg/status/templates/process-agent.tmpl | 80 +++++++++++++++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 pkg/status/status_process_agent.go create mode 100644 pkg/status/templates/process-agent.tmpl diff --git a/pkg/status/render.go b/pkg/status/render.go index 7c43925c64d98..d6588e276e80b 100644 --- a/pkg/status/render.go +++ b/pkg/status/render.go @@ -52,6 +52,7 @@ func FormatStatus(data []byte) (string, error) { endpointsInfos := stats["endpointsInfos"] inventoriesStats := stats["inventories"] systemProbeStats := stats["systemProbeStats"] + processAgentStatus := stats["processAgentStatus"] snmpTrapsStats := stats["snmpTrapsStats"] title := fmt.Sprintf("Agent (v%s)", stats["version"]) stats["title"] = title @@ -64,6 +65,7 @@ func FormatStatus(data []byte) (string, error) { if config.Datadog.GetBool("system_probe_config.enabled") { renderStatusTemplate(b, "/systemprobe.tmpl", systemProbeStats) } + renderStatusTemplate(b, "/process-agent.tmpl", processAgentStatus) renderStatusTemplate(b, "/trace-agent.tmpl", stats["apmStats"]) renderStatusTemplate(b, "/aggregator.tmpl", aggregatorStats) renderStatusTemplate(b, "/dogstatsd.tmpl", dogstatsdStats) diff --git a/pkg/status/status.go b/pkg/status/status.go index 278d785c8a144..103fd7c8f6772 100644 --- a/pkg/status/status.go +++ b/pkg/status/status.go @@ -76,6 +76,8 @@ func GetStatus() (map[string]interface{}, error) { stats["systemProbeStats"] = GetSystemProbeStats(config.Datadog.GetString("system_probe_config.sysprobe_socket")) } + stats["processAgentStatus"] = GetProcessAgentStatus() + if !config.Datadog.GetBool("no_proxy_nonexact_match") { httputils.NoProxyMapMutex.Lock() stats["TransportWarnings"] = len(httputils.NoProxyIgnoredWarningMap)+len(httputils.NoProxyUsedInFuture)+len(httputils.NoProxyChanged) > 0 diff --git a/pkg/status/status_process_agent.go b/pkg/status/status_process_agent.go new file mode 100644 index 0000000000000..63512737044aa --- /dev/null +++ b/pkg/status/status_process_agent.go @@ -0,0 +1,35 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package status + +import ( + "fmt" +) + +// GetProcessAgentStats returns the status command of the process-agent +func GetProcessAgentStatus() map[string]interface{} { + //net.SetSystemProbePath(socketPath) + //probeUtil, err := net.GetRemoteSystemProbeUtil() + + //if err != nil { + // return map[string]interface{}{ + // "Errors": fmt.Sprintf("%v", err), + // } + //} + + processAgentStatus := map[string]interface{}{ + "Errors": fmt.Sprintf("Testing process-agent status"), + } + + //systemProbeDetails, err := probeUtil.GetStats() + //if err != nil { + // return map[string]interface{}{ + // "Errors": fmt.Sprintf("issue querying stats from system probe: %v", err), + // } + //} + + return processAgentStatus +} diff --git a/pkg/status/templates/process-agent.tmpl b/pkg/status/templates/process-agent.tmpl new file mode 100644 index 0000000000000..cd94f3d92d079 --- /dev/null +++ b/pkg/status/templates/process-agent.tmpl @@ -0,0 +1,80 @@ +============= +Process Agent +============= + +{{- if .Errors }} + Status: Testing Process Agent Status on main agent + Error: {{ .Errors }} +{{- else }} + Status: Running + Uptime: {{ .uptime }} + Last Updated: {{ formatUnixTime .updated_at }} +{{- end }} +{{- if .network_tracer }} + + NPM + === + {{- if .network_tracer.Error }} + Status: Not running + Error: {{ .network_tracer.Error }} + {{- else }} + Status: Running + {{- if .network_tracer.tracer.last_check }} + Last Check: {{ formatUnixTime .network_tracer.tracer.last_check }} + {{- end }} + {{- if .network_tracer.state.clients }} + Client Count: {{ len .network_tracer.state.clients }} + {{- end }} + {{- end }} +{{- end }} +{{- if .oom_kill_probe }} + + OOM Kill + ======== + {{- if .oom_kill_probe.Error }} + Status: Not running + Error: {{ .oom_kill_probe.Error }} + {{- else }} + Status: Running + {{- if .oom_kill_probe.last_check }} + Last Check: {{ formatUnixTime .oom_kill_probe.last_check }} + {{- end }} + {{- end }} +{{- end }} +{{- if .tcp_queue_length_tracer }} + + TCP Queue Length + ================ + {{- if .tcp_queue_length_tracer.Error }} + Status: Not running + Error: {{ .tcp_queue_length_tracer.Error }} + {{- else }} + Status: Running + {{- if .tcp_queue_length_tracer.last_check }} + Last Check: {{ formatUnixTime .tcp_queue_length_tracer.last_check }} + {{- end }} + {{- end }} +{{- end }} +{{- if .security_runtime }} + + Runtime Security + ================ + {{- if .security_runtime.Error }} + Status: Not running + Error: {{ .security_runtime.Error }} + {{- else }} + Status: Running + {{- end }} +{{- end }} +{{- if .process }} + + Process + ======= + {{- if .process.Error }} + Status: Not running + Error: {{ .process.Error }} + {{- else }} + Status: Running + {{- end }} +{{- end }} + From ccd5f8bfc618c126b0feedd060a2efdbc6a52d8a Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Thu, 10 Feb 2022 19:28:50 +0100 Subject: [PATCH 02/13] refactor status command in process-agent --- cmd/process-agent/api/status.go | 10 +- cmd/process-agent/app/status.go | 215 ++++----------------- cmd/process-agent/app/status_test.go | 272 +++++++++++++-------------- pkg/process/util/status.go | 146 ++++++++++++++ 4 files changed, 311 insertions(+), 332 deletions(-) create mode 100644 pkg/process/util/status.go diff --git a/cmd/process-agent/api/status.go b/cmd/process-agent/api/status.go index 7c4585944c7c3..a49d137736227 100644 --- a/cmd/process-agent/api/status.go +++ b/cmd/process-agent/api/status.go @@ -9,18 +9,14 @@ import ( "encoding/json" "net/http" - "github.com/DataDog/datadog-agent/pkg/status" + "github.com/DataDog/datadog-agent/pkg/process/util" "github.com/DataDog/datadog-agent/pkg/util/log" ) func statusHandler(w http.ResponseWriter, _ *http.Request) { - log.Trace("Received status request from process-agent") - - agentStatus, err := status.GetStatus() - if err != nil { - _ = log.Warn("failed to get status from agent:", agentStatus) - } + log.Info("Got a request for the status. Making status.") + agentStatus := util.GetStatus() b, err := json.Marshal(agentStatus) if err != nil { _ = log.Warn("failed to serialize status response from agent:", err) diff --git a/cmd/process-agent/app/status.go b/cmd/process-agent/app/status.go index fceabe94930d6..18c3f854e4f52 100644 --- a/cmd/process-agent/app/status.go +++ b/cmd/process-agent/app/status.go @@ -11,69 +11,21 @@ import ( "io" "os" "text/template" - "time" "github.com/spf13/cobra" "github.com/DataDog/datadog-agent/cmd/process-agent/api" - "github.com/DataDog/datadog-agent/pkg/api/util" + apiutil "github.com/DataDog/datadog-agent/pkg/api/util" ddconfig "github.com/DataDog/datadog-agent/pkg/config" - "github.com/DataDog/datadog-agent/pkg/metadata/host" "github.com/DataDog/datadog-agent/pkg/process/config" + "github.com/DataDog/datadog-agent/pkg/process/util" ddstatus "github.com/DataDog/datadog-agent/pkg/status" "github.com/DataDog/datadog-agent/pkg/util/log" ) -var httpClient = util.GetClient(false) +var httpClient = apiutil.GetClient(false) const ( - statusTemplate = ` -============================== -Process Agent ({{ .Core.AgentVersion }}) -============================== - - Status date: {{ formatUnixTime .Date }} - Process Agent Start: {{ formatUnixTime .Expvars.UptimeNano }} - Pid: {{ .Expvars.Pid }} - Go Version: {{ .Core.GoVersion }} - Python Version: {{ .Core.PythonVersion }} - Build arch: {{ .Core.Arch }} - Log Level: {{ .Core.Config.LogLevel }} - Enabled Checks: {{ .Expvars.EnabledChecks }} - Allocated Memory: {{ .Expvars.MemStats.Alloc }} bytes - Hostname: {{ .Core.Metadata.Meta.Hostname }} - -================= -Process Endpoints -================= -{{- with .Expvars.Endpoints}} - {{- range $key, $value := .}} - {{$key}} - API Key{{ if gt (len $value) 1}}s{{end}} ending with: - {{- range $idx, $apikey := $value }} - - {{$apikey}} - {{- end}} - {{- end}} -{{- else }} - - No endpoints information. The agent may be misconfigured. -{{- end }} - -========= -Collector -========= - - Last collection time: {{.Expvars.LastCollectTime}} - Docker socket: {{.Expvars.DockerSocket}} - Number of processes: {{.Expvars.ProcessCount}} - Number of containers: {{.Expvars.ContainerCount}} - Process Queue length: {{.Expvars.ProcessQueueSize}} - RTProcess Queue length: {{.Expvars.RTProcessQueueSize}} - Pod Queue length: {{.Expvars.PodQueueSize}} - Process Bytes enqueued: {{.Expvars.ProcessQueueBytes}} - RTProcess Bytes enqueued: {{.Expvars.RTProcessQueueBytes}} - Pod Bytes enqueued: {{.Expvars.PodQueueBytes}} - -` notRunning = ` ============= Process Agent @@ -93,122 +45,6 @@ Error ` ) -type coreStatus struct { - AgentVersion string `json:"version"` - GoVersion string `json:"go_version"` - PythonVersion string `json:"python_version"` - Arch string `json:"build_arch"` - Config struct { - LogLevel string `json:"log_level"` - } `json:"config"` - Metadata host.Payload `json:"metadata"` -} - -type infoVersion struct { - Version string - GitCommit string - GitBranch string - BuildDate string - GoVersion string -} - -type processExpvars struct { - Pid int `json:"pid"` - Uptime int `json:"uptime"` - UptimeNano float64 `json:"uptime_nano"` - MemStats struct{ Alloc uint64 } `json:"memstats"` - Version infoVersion `json:"version"` - DockerSocket string `json:"docker_socket"` - LastCollectTime string `json:"last_collect_time"` - ProcessCount int `json:"process_count"` - ContainerCount int `json:"container_count"` - ProcessQueueSize int `json:"process_queue_size"` - RTProcessQueueSize int `json:"rtprocess_queue_size"` - PodQueueSize int `json:"pod_queue_size"` - ProcessQueueBytes int `json:"process_queue_bytes"` - RTProcessQueueBytes int `json:"rtprocess_queue_bytes"` - PodQueueBytes int `json:"pod_queue_bytes"` - ContainerID string `json:"container_id"` - ProxyURL string `json:"proxy_url"` - LogFile string `json:"log_file"` - EnabledChecks []string `json:"enabled_checks"` - Endpoints map[string][]string `json:"endpoints"` -} - -type status struct { - Date float64 - Core coreStatus // Contains the status from the core agent - Expvars processExpvars // Contains the expvars retrieved from the process agent -} - -type statusOption func(s *status) - -type connectionError struct { - error -} - -func overrideTime(t time.Time) statusOption { - return func(s *status) { - s.Date = float64(t.UnixNano()) - } -} - -func getCoreStatus() (s coreStatus, err error) { - addressPort, err := api.GetAPIAddressPort() - if err != nil { - return coreStatus{}, fmt.Errorf("config error: %s", err.Error()) - } - - statusEndpoint := fmt.Sprintf("http://%s/agent/status", addressPort) - b, err := util.DoGet(httpClient, statusEndpoint) - if err != nil { - return s, connectionError{err} - } - - err = json.Unmarshal(b, &s) - return -} - -func getExpvars() (s processExpvars, err error) { - ipcAddr, err := ddconfig.GetIPCAddress() - if err != nil { - return processExpvars{}, fmt.Errorf("config error: %s", err.Error()) - } - - port := ddconfig.Datadog.GetInt("process_config.expvar_port") - if port <= 0 { - _ = log.Warnf("Invalid process_config.expvar_port -- %d, using default port %d\n", port, ddconfig.DefaultProcessExpVarPort) - port = ddconfig.DefaultProcessExpVarPort - } - expvarEndpoint := fmt.Sprintf("http://%s:%d/debug/vars", ipcAddr, port) - b, err := util.DoGet(httpClient, expvarEndpoint) - if err != nil { - return s, connectionError{err} - } - - err = json.Unmarshal(b, &s) - return -} - -func getStatus() (status, error) { - coreStatus, err := getCoreStatus() - if err != nil { - return status{}, err - } - - processStatus, err := getExpvars() - if err != nil { - return status{}, err - } - - s := status{ - Date: float64(time.Now().UnixNano()), - Core: coreStatus, - Expvars: processStatus, - } - return s, nil -} - func writeNotRunning(w io.Writer) { _, err := fmt.Fprint(w, notRunning) if err != nil { @@ -227,32 +63,51 @@ func writeError(w io.Writer, e error) { _ = log.Error(err) } } +func fetchStatus() ([]byte, error) { + addressPort, err := api.GetAPIAddressPort() + if err != nil { + return nil, err + } + + statusEndpoint := fmt.Sprintf("http://%s/agent/status", addressPort) + body, err := apiutil.DoGet(httpClient, statusEndpoint) + if err != nil { + return nil, util.NewConnectionError(err) + } + + return body, nil +} // getAndWriteStatus calls the status server and writes it to `w` -func getAndWriteStatus(w io.Writer, options ...statusOption) { - status, err := getStatus() +func getAndWriteStatus(w io.Writer, options ...util.StatusOption) { + body, err := fetchStatus() if err != nil { switch err.(type) { - case connectionError: + case util.ConnectionError: writeNotRunning(w) default: writeError(w, err) } return } - for _, option := range options { - option(&status) - } - tpl, err := template.New("").Funcs(ddstatus.Textfmap()).Parse(statusTemplate) - if err != nil { - _ = log.Error(err) - } + // If options to override the status are provided, we need to deserialize and serialize it again + if len(options) > 0 { + var s util.Status + err = json.Unmarshal(body, &s) - err = tpl.Execute(w, status) - if err != nil { - _ = log.Error(err) + for _, option := range options { + option(&s) + } + + body, err = json.Marshal(s) + if err != nil { + writeError(w, err) + } } + + stats, err := ddstatus.FormatProcessAgentStatus(body) + w.Write([]byte(stats)) } // StatusCmd returns a cobra command that prints the current status diff --git a/cmd/process-agent/app/status_test.go b/cmd/process-agent/app/status_test.go index cd6b9dfd8b1dc..af249c253bc11 100644 --- a/cmd/process-agent/app/status_test.go +++ b/cmd/process-agent/app/status_test.go @@ -1,147 +1,129 @@ package app -import ( - "context" - "encoding/json" - "fmt" - "net" - "net/http" - "strings" - "sync" - "testing" - "text/template" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/DataDog/datadog-agent/pkg/config" - "github.com/DataDog/datadog-agent/pkg/metadata/host" - ddstatus "github.com/DataDog/datadog-agent/pkg/status" -) - -type statusServer struct { - shutdownWg *sync.WaitGroup - coreStatusServer, expvarsServer *http.Server -} - -func (s *statusServer) stop() error { - err := s.coreStatusServer.Shutdown(context.Background()) - if err != nil { - return err - } - - err = s.expvarsServer.Shutdown(context.Background()) - if err != nil { - return err - } - - s.shutdownWg.Wait() - return nil -} - -func startTestServer(t *testing.T, cfg config.Config, expectedStatus status) statusServer { - var serverWg sync.WaitGroup - serverWg.Add(2) - - statusMux := http.NewServeMux() - statusMux.HandleFunc("/agent/status", func(w http.ResponseWriter, _ *http.Request) { - b, err := json.Marshal(expectedStatus.Core) - require.NoError(t, err) - - _, err = w.Write(b) - require.NoError(t, err) - }) - statusEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.cmd_port")) - coreStatusServer := http.Server{Addr: statusEndpoint, Handler: statusMux} - statusListener, err := net.Listen("tcp", statusEndpoint) - require.NoError(t, err) - go func() { - _ = coreStatusServer.Serve(statusListener) - serverWg.Done() - }() - - expvarMux := http.NewServeMux() - expvarMux.HandleFunc("/debug/vars", func(w http.ResponseWriter, _ *http.Request) { - b, err := json.Marshal(expectedStatus.Expvars) - require.NoError(t, err) - - _, err = w.Write(b) - require.NoError(t, err) - }) - expvarEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.expvar_port")) - expvarsServer := http.Server{Addr: expvarEndpoint, Handler: expvarMux} - expvarsListener, err := net.Listen("tcp", expvarEndpoint) - require.NoError(t, err) - go func() { - _ = expvarsServer.Serve(expvarsListener) - serverWg.Done() - }() - - return statusServer{coreStatusServer: &coreStatusServer, expvarsServer: &expvarsServer, shutdownWg: &serverWg} -} - -func TestStatus(t *testing.T) { - testTime := time.Now() - expectedStatus := status{ - Date: float64(testTime.UnixNano()), - Core: coreStatus{ - Metadata: host.Payload{ - Meta: &host.Meta{}, - }, - }, - Expvars: processExpvars{}, - } - - // Use different ports in case the host is running a real agent - cfg := config.Mock() - cfg.Set("process_config.expvar_port", 8081) - cfg.Set("process_config.cmd_port", 8082) - server := startTestServer(t, cfg, expectedStatus) - - var statusBuilder, expectedStatusBuilder strings.Builder - - // Build what the expected status should be - tpl, err := template.New("").Funcs(ddstatus.Textfmap()).Parse(statusTemplate) - require.NoError(t, err) - err = tpl.Execute(&expectedStatusBuilder, expectedStatus) - require.NoError(t, err) - - // Build the actual status - getAndWriteStatus(&statusBuilder, overrideTime(testTime)) - - assert.Equal(t, expectedStatusBuilder.String(), statusBuilder.String()) - - err = server.stop() - require.NoError(t, err) -} - -func TestNotRunning(t *testing.T) { - // Use different ports in case the host is running a real agent - cfg := config.Mock() - cfg.Set("process_config.expvar_port", 8081) - cfg.Set("process_config.cmd_port", 8082) - - var b strings.Builder - getAndWriteStatus(&b) - - assert.Equal(t, notRunning, b.String()) -} - -// TestError tests an example error to make sure that the error template prints properly if we get something other than -// a connection error -func TestError(t *testing.T) { - cfg := config.Mock() - cfg.Set("ipc_address", "8.8.8.8") // Non-local ip address will cause error in `GetIPCAddress` - _, ipcError := config.GetIPCAddress() - - var errText, expectedErrText strings.Builder - getAndWriteStatus(&errText) - - tpl, err := template.New("").Parse(errorMessage) - require.NoError(t, err) - err = tpl.Execute(&expectedErrText, fmt.Errorf("config error: %s", ipcError)) - require.NoError(t, err) - - assert.Equal(t, expectedErrText.String(), errText.String()) -} +// TODO: mock status server here to test status cmd. Move expvar server logic to pkg/process/util + +//type statusServer struct { +// shutdownWg *sync.WaitGroup +// statusServer, expvarsServer *http.Server +//} +// +//func (s *statusServer) stop() error { +// err := s.statusServer.Shutdown(context.Background()) +// if err != nil { +// return err +// } +// +// err = s.expvarsServer.Shutdown(context.Background()) +// if err != nil { +// return err +// } +// +// s.shutdownWg.Wait() +// return nil +//} +// +//func startTestServer(t *testing.T, cfg config.Config, expectedStatus util.Status) statusServer { +// var serverWg sync.WaitGroup +// serverWg.Add(2) +// +// statusMux := http.NewServeMux() +// statusMux.HandleFunc("/agent/status", func(w http.ResponseWriter, _ *http.Request) { +// b, err := json.Marshal(expectedStatus) +// require.NoError(t, err) +// +// _, err = w.Write(b) +// require.NoError(t, err) +// }) +// statusEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.cmd_port")) +// statusServer := http.Server{Addr: statusEndpoint, Handler: statusMux} +// statusListener, err := net.Listen("tcp", statusEndpoint) +// require.NoError(t, err) +// go func() { +// _ = statusServer.Serve(statusListener) +// serverWg.Done() +// }() +// +// expvarMux := http.NewServeMux() +// expvarMux.HandleFunc("/debug/vars", func(w http.ResponseWriter, _ *http.Request) { +// b, err := json.Marshal(expectedStatus.Expvars) +// require.NoError(t, err) +// +// _, err = w.Write(b) +// require.NoError(t, err) +// }) +// expvarEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.expvar_port")) +// expvarsServer := http.Server{Addr: expvarEndpoint, Handler: expvarMux} +// expvarsListener, err := net.Listen("tcp", expvarEndpoint) +// require.NoError(t, err) +// go func() { +// _ = expvarsServer.Serve(expvarsListener) +// serverWg.Done() +// }() +// +// return statusServer{coreStatusServer: &coreStatusServer, expvarsServer: &expvarsServer, shutdownWg: &serverWg} +//} +// +//func TestStatus(t *testing.T) { +// testTime := time.Now() +// expectedStatus := util.Status{ +// Date: float64(testTime.UnixNano()), +// Core: util.CoreStatus{ +// Metadata: host.Payload{ +// Meta: &host.Meta{}, +// }, +// }, +// Expvars: util.ProcessExpvars{}, +// } +// +// // Use different ports in case the host is running a real agent +// cfg := config.Mock() +// cfg.Set("process_config.expvar_port", 8081) +// cfg.Set("process_config.cmd_port", 8082) +// server := startTestServer(t, cfg, expectedStatus) +// +// var statusBuilder, expectedStatusBuilder strings.Builder +// +// // Build what the expected status should be +// tpl, err := template.New("").Funcs(ddstatus.Textfmap()).Parse(statusTemplate) +// require.NoError(t, err) +// err = tpl.Execute(&expectedStatusBuilder, expectedStatus) +// require.NoError(t, err) +// +// // Build the actual status +// getAndWriteStatus(&statusBuilder, util.OverrideTime(testTime)) +// +// assert.Equal(t, expectedStatusBuilder.String(), statusBuilder.String()) +// +// err = server.stop() +// require.NoError(t, err) +//} +// +//func TestNotRunning(t *testing.T) { +// // Use different ports in case the host is running a real agent +// cfg := config.Mock() +// cfg.Set("process_config.expvar_port", 8081) +// cfg.Set("process_config.cmd_port", 8082) +// +// var b strings.Builder +// getAndWriteStatus(&b) +// +// assert.Equal(t, notRunning, b.String()) +//} +// +//// TestError tests an example error to make sure that the error template prints properly if we get something other than +//// a connection error +//func TestError(t *testing.T) { +// cfg := config.Mock() +// cfg.Set("ipc_address", "8.8.8.8") // Non-local ip address will cause error in `GetIPCAddress` +// _, ipcError := config.GetIPCAddress() +// +// var errText, expectedErrText strings.Builder +// getAndWriteStatus(&errText) +// +// tpl, err := template.New("").Parse(errorMessage) +// require.NoError(t, err) +// err = tpl.Execute(&expectedErrText, fmt.Errorf("config error: %s", ipcError)) +// require.NoError(t, err) +// +// assert.Equal(t, expectedErrText.String(), errText.String()) +//} diff --git a/pkg/process/util/status.go b/pkg/process/util/status.go new file mode 100644 index 0000000000000..8ed13f9c40e17 --- /dev/null +++ b/pkg/process/util/status.go @@ -0,0 +1,146 @@ +package util + +import ( + "context" + "encoding/json" + "fmt" + "github.com/DataDog/datadog-agent/pkg/util/log" + "runtime" + "time" + + apiutil "github.com/DataDog/datadog-agent/pkg/api/util" + ddconfig "github.com/DataDog/datadog-agent/pkg/config" + "github.com/DataDog/datadog-agent/pkg/metadata/host" + "github.com/DataDog/datadog-agent/pkg/util" + "github.com/DataDog/datadog-agent/pkg/version" +) + +var httpClient = apiutil.GetClient(false) + +type CoreStatus struct { + AgentVersion string `json:"version"` + GoVersion string `json:"go_version"` + PythonVersion string `json:"python_version"` + Arch string `json:"build_arch"` + Config ConfigStatus `json:"config"` + Metadata host.Payload `json:"metadata"` +} + +type ConfigStatus struct { + LogLevel string `json:"log_level"` +} +type InfoVersion struct { + Version string + GitCommit string + GitBranch string + BuildDate string + GoVersion string +} + +type ProcessExpvars struct { + Pid int `json:"pid"` + Uptime int `json:"uptime"` + UptimeNano float64 `json:"uptime_nano"` + MemStats struct { + Alloc uint64 `json:"alloc"` + } `json:"memstats"` + Version InfoVersion `json:"version"` + DockerSocket string `json:"docker_socket"` + LastCollectTime string `json:"last_collect_time"` + ProcessCount int `json:"process_count"` + ContainerCount int `json:"container_count"` + ProcessQueueSize int `json:"process_queue_size"` + RTProcessQueueSize int `json:"rtprocess_queue_size"` + PodQueueSize int `json:"pod_queue_size"` + ProcessQueueBytes int `json:"process_queue_bytes"` + RTProcessQueueBytes int `json:"rtprocess_queue_bytes"` + PodQueueBytes int `json:"pod_queue_bytes"` + ContainerID string `json:"container_id"` + ProxyURL string `json:"proxy_url"` + LogFile string `json:"log_file"` + EnabledChecks []string `json:"enabled_checks"` + Endpoints map[string][]string `json:"endpoints"` +} + +type Status struct { + Date float64 + Core CoreStatus // Contains the status from the core agent + Expvars ProcessExpvars // Contains the expvars retrieved from the process agent +} + +type StatusOption func(s *Status) + +type ConnectionError struct { + error +} + +func NewConnectionError(err error) ConnectionError { + return ConnectionError{err} +} + +func OverrideTime(t time.Time) StatusOption { + return func(s *Status) { + s.Date = float64(t.UnixNano()) + } +} + +func getCoreStatus() (s CoreStatus) { + hostnameData, err := util.GetHostnameData(context.TODO()) + var metadata *host.Payload + if err != nil { + log.Errorf("Error grabbing hostname for status: %v", err) + metadata = host.GetPayloadFromCache(context.TODO(), util.HostnameData{Hostname: "unknown", Provider: "unknown"}) + } else { + metadata = host.GetPayloadFromCache(context.TODO(), hostnameData) + } + + return CoreStatus{ + AgentVersion: version.AgentVersion, + GoVersion: runtime.Version(), + PythonVersion: host.GetPythonVersion(), + Arch: runtime.GOARCH, + Config: ConfigStatus{ + LogLevel: ddconfig.Datadog.GetString("log_level"), + }, + Metadata: *metadata, + } +} + +func getExpvars() (s ProcessExpvars, err error) { + ipcAddr, err := ddconfig.GetIPCAddress() + if err != nil { + return ProcessExpvars{}, fmt.Errorf("config error: %s", err.Error()) + } + + port := ddconfig.Datadog.GetInt("process_config.expvar_port") + if port <= 0 { + _ = log.Warnf("Invalid process_config.expvar_port -- %d, using default port %d\n", port, ddconfig.DefaultProcessExpVarPort) + port = ddconfig.DefaultProcessExpVarPort + } + expvarEndpoint := fmt.Sprintf("http://%s:%d/debug/vars", ipcAddr, port) + b, err := apiutil.DoGet(httpClient, expvarEndpoint) + if err != nil { + return s, ConnectionError{err} + } + + err = json.Unmarshal(b, &s) + return +} + +func GetStatus() map[string]interface{} { + stats := make(map[string]interface{}) + + coreStatus := getCoreStatus() + + processStatus, err := getExpvars() + if err != nil { + stats["error"] = fmt.Sprintf("%v", err.Error()) + return stats + } + + stats["date"] = float64(time.Now().UnixNano()) + stats["core"] = coreStatus + stats["expvars"] = processStatus + + return stats +} From 105db35920997fbf12e99ea24ca25096ca08eb29 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Thu, 10 Feb 2022 19:30:34 +0100 Subject: [PATCH 03/13] fecth process-agent status in core agent --- pkg/status/render.go | 11 +++ pkg/status/status_process_agent.go | 43 ++++++---- pkg/status/templates/process-agent.tmpl | 106 +++++++++--------------- 3 files changed, 73 insertions(+), 87 deletions(-) diff --git a/pkg/status/render.go b/pkg/status/render.go index d6588e276e80b..c81fc71b67f66 100644 --- a/pkg/status/render.go +++ b/pkg/status/render.go @@ -139,6 +139,17 @@ func FormatSecurityAgentStatus(data []byte) (string, error) { return b.String(), nil } +// FormatProcessAgentStatus takes a json bytestring and prints out the formatted status for process-agent +func FormatProcessAgentStatus(data []byte) (string, error) { + var b = new(bytes.Buffer) + + stats := make(map[string]interface{}) + json.Unmarshal(data, &stats) //nolint:errcheck + renderStatusTemplate(b, "/process-agent.tmpl", stats) + + return b.String(), nil +} + // FormatMetadataMapCLI builds the rendering in the metadataMapper template. func FormatMetadataMapCLI(data []byte) (string, error) { var b = new(bytes.Buffer) diff --git a/pkg/status/status_process_agent.go b/pkg/status/status_process_agent.go index 63512737044aa..15deaa4d17686 100644 --- a/pkg/status/status_process_agent.go +++ b/pkg/status/status_process_agent.go @@ -6,30 +6,37 @@ package status import ( + "encoding/json" "fmt" + + "github.com/DataDog/datadog-agent/cmd/process-agent/api" + apiutil "github.com/DataDog/datadog-agent/pkg/api/util" ) -// GetProcessAgentStats returns the status command of the process-agent -func GetProcessAgentStatus() map[string]interface{} { - //net.SetSystemProbePath(socketPath) - //probeUtil, err := net.GetRemoteSystemProbeUtil() +//TODO: do not use a global var for this +var httpClient = apiutil.GetClient(false) - //if err != nil { - // return map[string]interface{}{ - // "Errors": fmt.Sprintf("%v", err), - // } - //} +// GetProcessAgentStatus returns the status command of the process-agent +func GetProcessAgentStatus() map[string]interface{} { + s := make(map[string]interface{}) + addressPort, err := api.GetAPIAddressPort() + if err != nil { + s["error"] = fmt.Sprintf("%v", err.Error()) + return s + } - processAgentStatus := map[string]interface{}{ - "Errors": fmt.Sprintf("Testing process-agent status"), + statusEndpoint := fmt.Sprintf("http://%s/agent/status", addressPort) + b, err := apiutil.DoGet(httpClient, statusEndpoint) + if err != nil { + s["error"] = fmt.Sprintf("%v", err.Error()) + return s } - //systemProbeDetails, err := probeUtil.GetStats() - //if err != nil { - // return map[string]interface{}{ - // "Errors": fmt.Sprintf("issue querying stats from system probe: %v", err), - // } - //} + err = json.Unmarshal(b, &s) + if err != nil { + s["error"] = fmt.Sprintf("%v", err.Error()) + return s + } - return processAgentStatus + return s } diff --git a/pkg/status/templates/process-agent.tmpl b/pkg/status/templates/process-agent.tmpl index cd94f3d92d079..29110f76bf908 100644 --- a/pkg/status/templates/process-agent.tmpl +++ b/pkg/status/templates/process-agent.tmpl @@ -2,79 +2,47 @@ Process Agent ============= -{{- if .Errors }} - Status: Testing Process Agent Status on main agent - Error: {{ .Errors }} +{{- if .error }} + Status: Not running or unreachable {{- else }} - Status: Running - Uptime: {{ .uptime }} - Last Updated: {{ formatUnixTime .updated_at }} -{{- end }} -{{- if .network_tracer }} + Version: {{ .core.version }} + Status date: {{ formatUnixTime .date }} + Process Agent Start: {{ formatUnixTime .expvars.uptime_nano }} + Pid: {{ .expvars.pid }} + Go Version: {{ .core.go_version }} + Build arch: {{ .core.build_arch }} + Log Level: {{ .core.config.log_level }} + Enabled Checks: {{ .expvars.enabled_checks }} + Allocated Memory: {{ .expvars.memstats.alloc }} bytes + Hostname: {{ .core.metadata.meta.hostname }} - NPM - === - {{- if .network_tracer.Error }} - Status: Not running - Error: {{ .network_tracer.Error }} + ================= + Process Endpoints + ================= + {{- with .expvars.endpoints}} + {{- range $key, $value := .}} + {{$key}} - API Key{{ if gt (len $value) 1}}s{{end}} ending with: + {{- range $idx, $apikey := $value }} + - {{$apikey}} + {{- end}} + {{- end}} {{- else }} - Status: Running - {{- if .network_tracer.tracer.last_check }} - Last Check: {{ formatUnixTime .network_tracer.tracer.last_check }} - {{- end }} - {{- if .network_tracer.state.clients }} - Client Count: {{ len .network_tracer.state.clients }} - {{- end }} - {{- end }} -{{- end }} -{{- if .oom_kill_probe }} - - OOM Kill - ======== - {{- if .oom_kill_probe.Error }} - Status: Not running - Error: {{ .oom_kill_probe.Error }} - {{- else }} - Status: Running - {{- if .oom_kill_probe.last_check }} - Last Check: {{ formatUnixTime .oom_kill_probe.last_check }} - {{- end }} - {{- end }} -{{- end }} -{{- if .tcp_queue_length_tracer }} - - TCP Queue Length - ================ - {{- if .tcp_queue_length_tracer.Error }} - Status: Not running - Error: {{ .tcp_queue_length_tracer.Error }} - {{- else }} - Status: Running - {{- if .tcp_queue_length_tracer.last_check }} - Last Check: {{ formatUnixTime .tcp_queue_length_tracer.last_check }} - {{- end }} - {{- end }} -{{- end }} -{{- if .security_runtime }} - Runtime Security - ================ - {{- if .security_runtime.Error }} - Status: Not running - Error: {{ .security_runtime.Error }} - {{- else }} - Status: Running + No endpoints information. The agent may be misconfigured. {{- end }} -{{- end }} -{{- if .process }} - Process - ======= - {{- if .process.Error }} - Status: Not running - Error: {{ .process.Error }} - {{- else }} - Status: Running - {{- end }} -{{- end }} + ========= + Collector + ========= + Last collection time: {{.expvars.last_collect_time}} + Docker socket: {{.expvars.docker_socket}} + Number of processes: {{.expvars.process_count}} + Number of containers: {{.expvars.container_count}} + Process Queue length: {{.expvars.process_queue_size}} + RTProcess Queue length: {{.expvars.rtprocess_queue_size}} + Pod Queue length: {{.expvars.pod_queue_size}} + Process Bytes enqueued: {{.expvars.process_queue_bytes}} + RTProcess Bytes enqueued: {{.expvars.rtprocess_queue_bytes}} + Pod Bytes enqueued: {{.expvars.pod_queue_bytes}} +{{ end }} From 86183fba9aebf326540fda94315ed37e5963f7d4 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Thu, 10 Feb 2022 21:35:22 +0100 Subject: [PATCH 04/13] refactor app/status tests; add comment for exported structs; handle errors in api server; update GetStatus signature --- cmd/process-agent/api/status.go | 12 +- cmd/process-agent/app/status.go | 2 +- cmd/process-agent/app/status_test.go | 248 +++++++++++++-------------- pkg/process/util/status.go | 55 +++--- pkg/process/util/status_test.go | 57 ++++++ 5 files changed, 222 insertions(+), 152 deletions(-) create mode 100644 pkg/process/util/status_test.go diff --git a/cmd/process-agent/api/status.go b/cmd/process-agent/api/status.go index a49d137736227..2058ed0f65bfe 100644 --- a/cmd/process-agent/api/status.go +++ b/cmd/process-agent/api/status.go @@ -16,10 +16,20 @@ import ( func statusHandler(w http.ResponseWriter, _ *http.Request) { log.Info("Got a request for the status. Making status.") - agentStatus := util.GetStatus() + agentStatus, err := util.GetStatus() + if err != nil { + if err != nil { + _ = log.Warn("failed to get status from agent:", agentStatus) + body, _ := json.Marshal(map[string]string{"error": err.Error()}) + http.Error(w, string(body), http.StatusInternalServerError) + } + } + b, err := json.Marshal(agentStatus) if err != nil { _ = log.Warn("failed to serialize status response from agent:", err) + body, _ := json.Marshal(map[string]string{"error": err.Error()}) + http.Error(w, string(body), http.StatusInternalServerError) } _, err = w.Write(b) diff --git a/cmd/process-agent/app/status.go b/cmd/process-agent/app/status.go index 18c3f854e4f52..25c7c5398f52d 100644 --- a/cmd/process-agent/app/status.go +++ b/cmd/process-agent/app/status.go @@ -66,7 +66,7 @@ func writeError(w io.Writer, e error) { func fetchStatus() ([]byte, error) { addressPort, err := api.GetAPIAddressPort() if err != nil { - return nil, err + return nil, fmt.Errorf("config error: %s", err.Error()) } statusEndpoint := fmt.Sprintf("http://%s/agent/status", addressPort) diff --git a/cmd/process-agent/app/status_test.go b/cmd/process-agent/app/status_test.go index af249c253bc11..c3842138a5f73 100644 --- a/cmd/process-agent/app/status_test.go +++ b/cmd/process-agent/app/status_test.go @@ -1,129 +1,127 @@ package app -// TODO: mock status server here to test status cmd. Move expvar server logic to pkg/process/util +import ( + "context" + "encoding/json" + "fmt" + "github.com/DataDog/datadog-agent/pkg/process/util" + "net" + "net/http" + "strings" + "sync" + "testing" + "text/template" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/datadog-agent/pkg/config" + "github.com/DataDog/datadog-agent/pkg/metadata/host" + ddstatus "github.com/DataDog/datadog-agent/pkg/status" +) + +type statusServer struct { + shutdownWg *sync.WaitGroup + server *http.Server +} + +func (s *statusServer) stop() error { + err := s.server.Shutdown(context.Background()) + if err != nil { + return err + } + + s.shutdownWg.Wait() + return nil +} + +func startTestServer(t *testing.T, cfg config.Config, expectedStatus util.Status) statusServer { + var serverWg sync.WaitGroup + serverWg.Add(1) + + statusMux := http.NewServeMux() + statusMux.HandleFunc("/agent/status", func(w http.ResponseWriter, _ *http.Request) { + b, err := json.Marshal(expectedStatus) + require.NoError(t, err) + + _, err = w.Write(b) + require.NoError(t, err) + }) + + statusEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.cmd_port")) + server := http.Server{Addr: statusEndpoint, Handler: statusMux} + statusListener, err := net.Listen("tcp", statusEndpoint) + require.NoError(t, err) + go func() { + _ = server.Serve(statusListener) + serverWg.Done() + }() + + return statusServer{server: &server, shutdownWg: &serverWg} +} + +func TestStatus(t *testing.T) { + testTime := time.Now() + expectedStatus := util.Status{ + Date: float64(testTime.UnixNano()), + Core: util.CoreStatus{ + Metadata: host.Payload{ + Meta: &host.Meta{}, + }, + }, + Expvars: util.ProcessExpvars{}, + } + + // Use different ports in case the host is running a real agent + cfg := config.Mock() + cfg.Set("process_config.cmd_port", 8082) + server := startTestServer(t, cfg, expectedStatus) + + var statusBuilder strings.Builder + + j, err := json.Marshal(expectedStatus) + require.NoError(t, err) + + // Build what the expected status should be + expectedOutput, err := ddstatus.FormatProcessAgentStatus(j) + require.NoError(t, err) + + // Build the actual status + getAndWriteStatus(&statusBuilder, util.OverrideTime(testTime)) + + assert.Equal(t, expectedOutput, statusBuilder.String()) + + err = server.stop() + require.NoError(t, err) +} + +func TestNotRunning(t *testing.T) { + // Use different ports in case the host is running a real agent + cfg := config.Mock() + cfg.Set("process_config.cmd_port", 8082) + + var b strings.Builder + getAndWriteStatus(&b) + + assert.Equal(t, notRunning, b.String()) +} -//type statusServer struct { -// shutdownWg *sync.WaitGroup -// statusServer, expvarsServer *http.Server -//} -// -//func (s *statusServer) stop() error { -// err := s.statusServer.Shutdown(context.Background()) -// if err != nil { -// return err -// } -// -// err = s.expvarsServer.Shutdown(context.Background()) -// if err != nil { -// return err -// } -// -// s.shutdownWg.Wait() -// return nil -//} -// -//func startTestServer(t *testing.T, cfg config.Config, expectedStatus util.Status) statusServer { -// var serverWg sync.WaitGroup -// serverWg.Add(2) -// -// statusMux := http.NewServeMux() -// statusMux.HandleFunc("/agent/status", func(w http.ResponseWriter, _ *http.Request) { -// b, err := json.Marshal(expectedStatus) -// require.NoError(t, err) -// -// _, err = w.Write(b) -// require.NoError(t, err) -// }) -// statusEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.cmd_port")) -// statusServer := http.Server{Addr: statusEndpoint, Handler: statusMux} -// statusListener, err := net.Listen("tcp", statusEndpoint) -// require.NoError(t, err) -// go func() { -// _ = statusServer.Serve(statusListener) -// serverWg.Done() -// }() -// -// expvarMux := http.NewServeMux() -// expvarMux.HandleFunc("/debug/vars", func(w http.ResponseWriter, _ *http.Request) { -// b, err := json.Marshal(expectedStatus.Expvars) -// require.NoError(t, err) -// -// _, err = w.Write(b) -// require.NoError(t, err) -// }) -// expvarEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.expvar_port")) -// expvarsServer := http.Server{Addr: expvarEndpoint, Handler: expvarMux} -// expvarsListener, err := net.Listen("tcp", expvarEndpoint) -// require.NoError(t, err) -// go func() { -// _ = expvarsServer.Serve(expvarsListener) -// serverWg.Done() -// }() -// -// return statusServer{coreStatusServer: &coreStatusServer, expvarsServer: &expvarsServer, shutdownWg: &serverWg} -//} -// -//func TestStatus(t *testing.T) { -// testTime := time.Now() -// expectedStatus := util.Status{ -// Date: float64(testTime.UnixNano()), -// Core: util.CoreStatus{ -// Metadata: host.Payload{ -// Meta: &host.Meta{}, -// }, -// }, -// Expvars: util.ProcessExpvars{}, -// } -// -// // Use different ports in case the host is running a real agent -// cfg := config.Mock() -// cfg.Set("process_config.expvar_port", 8081) -// cfg.Set("process_config.cmd_port", 8082) -// server := startTestServer(t, cfg, expectedStatus) -// -// var statusBuilder, expectedStatusBuilder strings.Builder -// -// // Build what the expected status should be -// tpl, err := template.New("").Funcs(ddstatus.Textfmap()).Parse(statusTemplate) -// require.NoError(t, err) -// err = tpl.Execute(&expectedStatusBuilder, expectedStatus) -// require.NoError(t, err) -// -// // Build the actual status -// getAndWriteStatus(&statusBuilder, util.OverrideTime(testTime)) -// -// assert.Equal(t, expectedStatusBuilder.String(), statusBuilder.String()) -// -// err = server.stop() -// require.NoError(t, err) -//} -// -//func TestNotRunning(t *testing.T) { -// // Use different ports in case the host is running a real agent -// cfg := config.Mock() -// cfg.Set("process_config.expvar_port", 8081) -// cfg.Set("process_config.cmd_port", 8082) -// -// var b strings.Builder -// getAndWriteStatus(&b) -// -// assert.Equal(t, notRunning, b.String()) -//} -// -//// TestError tests an example error to make sure that the error template prints properly if we get something other than -//// a connection error -//func TestError(t *testing.T) { -// cfg := config.Mock() -// cfg.Set("ipc_address", "8.8.8.8") // Non-local ip address will cause error in `GetIPCAddress` -// _, ipcError := config.GetIPCAddress() -// -// var errText, expectedErrText strings.Builder -// getAndWriteStatus(&errText) -// -// tpl, err := template.New("").Parse(errorMessage) -// require.NoError(t, err) -// err = tpl.Execute(&expectedErrText, fmt.Errorf("config error: %s", ipcError)) -// require.NoError(t, err) // -// assert.Equal(t, expectedErrText.String(), errText.String()) -//} +// TestError tests an example error to make sure that the error template prints properly if we get something other than +// a connection error +func TestError(t *testing.T) { + cfg := config.Mock() + cfg.Set("ipc_address", "8.8.8.8") // Non-local ip address will cause error in `GetIPCAddress` + _, ipcError := config.GetIPCAddress() + + var errText, expectedErrText strings.Builder + getAndWriteStatus(&errText) + + tpl, err := template.New("").Parse(errorMessage) + require.NoError(t, err) + err = tpl.Execute(&expectedErrText, fmt.Errorf("config error: %s", ipcError)) + require.NoError(t, err) + + assert.Equal(t, expectedErrText.String(), errText.String()) +} diff --git a/pkg/process/util/status.go b/pkg/process/util/status.go index 8ed13f9c40e17..f80803ea16bd3 100644 --- a/pkg/process/util/status.go +++ b/pkg/process/util/status.go @@ -17,18 +17,21 @@ import ( var httpClient = apiutil.GetClient(false) +// CoreStatus holds core info about the process-agent type CoreStatus struct { - AgentVersion string `json:"version"` - GoVersion string `json:"go_version"` - PythonVersion string `json:"python_version"` - Arch string `json:"build_arch"` - Config ConfigStatus `json:"config"` - Metadata host.Payload `json:"metadata"` + AgentVersion string `json:"version"` + GoVersion string `json:"go_version"` + Arch string `json:"build_arch"` + Config ConfigStatus `json:"config"` + Metadata host.Payload `json:"metadata"` } +// ConfigStatus holds config settings from process-agent type ConfigStatus struct { LogLevel string `json:"log_level"` } + +// InfoVersion holds information about process-agent version type InfoVersion struct { Version string GitCommit string @@ -37,6 +40,7 @@ type InfoVersion struct { GoVersion string } +// ProcessExpvars holds values fetched from the exp var server type ProcessExpvars struct { Pid int `json:"pid"` Uptime int `json:"uptime"` @@ -62,22 +66,27 @@ type ProcessExpvars struct { Endpoints map[string][]string `json:"endpoints"` } +// Status holds status info from process-agent type Status struct { - Date float64 - Core CoreStatus // Contains the status from the core agent - Expvars ProcessExpvars // Contains the expvars retrieved from the process agent + Date float64 `json:"date"` + Core CoreStatus `json:"core"` // Contains the status from the core agent + Expvars ProcessExpvars `json:"expvars"` // Contains the expvars retrieved from the process agent } +// StatusOption is a function that acts on a Status object type StatusOption func(s *Status) +// ConnectionError represents an error to connect to a HTTP server type ConnectionError struct { error } +// NewConnectionError returns a new ConnectionError func NewConnectionError(err error) ConnectionError { return ConnectionError{err} } +// OverrideTime overrides the Date from a Status object func OverrideTime(t time.Time) StatusOption { return func(s *Status) { s.Date = float64(t.UnixNano()) @@ -95,10 +104,9 @@ func getCoreStatus() (s CoreStatus) { } return CoreStatus{ - AgentVersion: version.AgentVersion, - GoVersion: runtime.Version(), - PythonVersion: host.GetPythonVersion(), - Arch: runtime.GOARCH, + AgentVersion: version.AgentVersion, + GoVersion: runtime.Version(), + Arch: runtime.GOARCH, Config: ConfigStatus{ LogLevel: ddconfig.Datadog.GetString("log_level"), }, @@ -127,20 +135,17 @@ func getExpvars() (s ProcessExpvars, err error) { return } -func GetStatus() map[string]interface{} { - stats := make(map[string]interface{}) - +// GetStatus returns a Status object with runtime information about process-agent +func GetStatus() (*Status, error) { coreStatus := getCoreStatus() - - processStatus, err := getExpvars() + processExpVars, err := getExpvars() if err != nil { - stats["error"] = fmt.Sprintf("%v", err.Error()) - return stats + return nil, err } - stats["date"] = float64(time.Now().UnixNano()) - stats["core"] = coreStatus - stats["expvars"] = processStatus - - return stats + return &Status{ + Date: float64(time.Now().UnixNano()), + Core: coreStatus, + Expvars: processExpVars, + }, nil } diff --git a/pkg/process/util/status_test.go b/pkg/process/util/status_test.go new file mode 100644 index 0000000000000..af74fb3026dde --- /dev/null +++ b/pkg/process/util/status_test.go @@ -0,0 +1,57 @@ +package util + +import ( + "context" + "encoding/json" + "fmt" + "github.com/DataDog/datadog-agent/pkg/config" + "net" + "net/http" + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +type statusServer struct { + shutdownWg *sync.WaitGroup + expVarServer *http.Server +} + +func (s *statusServer) stop() error { + err := s.expVarServer.Shutdown(context.Background()) + if err != nil { + return err + } + + s.shutdownWg.Wait() + return nil +} + +func startTestServer(t *testing.T, cfg config.Config, expectedExpVars ProcessExpvars) statusServer { + var serverWg sync.WaitGroup + serverWg.Add(1) + + expVarMux := http.NewServeMux() + expVarMux.HandleFunc("/debug/vars", func(w http.ResponseWriter, _ *http.Request) { + b, err := json.Marshal(expectedExpVars) + require.NoError(t, err) + + _, err = w.Write(b) + require.NoError(t, err) + }) + expVarEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.expvar_port")) + expVarsServer := http.Server{Addr: expCarEndpoint, Handler: expVarMux} + expVarsListener, err := net.Listen("tcp", expVarEndpoint) + require.NoError(t, err) + go func() { + _ = expVarsServer.Serve(expVarsListener) + serverWg.Done() + }() + + return statusServer{expVarServer: &expVarsServer, shutdownWg: &serverWg} +} + +func TestGetStatus(t *testing.T) { + +} From 9a5c1b0cd3ac39f04c61903582db0772ba3c1841 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Fri, 11 Feb 2022 11:14:33 +0100 Subject: [PATCH 05/13] add test for pkg/process/util/status --- pkg/process/util/status.go | 19 +++++--- pkg/process/util/status_test.go | 78 +++++++++++++++++++++++++++++- pkg/status/status_process_agent.go | 5 +- 3 files changed, 89 insertions(+), 13 deletions(-) diff --git a/pkg/process/util/status.go b/pkg/process/util/status.go index f80803ea16bd3..6f7e94e6568ba 100644 --- a/pkg/process/util/status.go +++ b/pkg/process/util/status.go @@ -40,14 +40,17 @@ type InfoVersion struct { GoVersion string } +// MemInfo holds information about memory usage from process-agent +type MemInfo struct { + Alloc uint64 `json:"alloc"` +} + // ProcessExpvars holds values fetched from the exp var server type ProcessExpvars struct { - Pid int `json:"pid"` - Uptime int `json:"uptime"` - UptimeNano float64 `json:"uptime_nano"` - MemStats struct { - Alloc uint64 `json:"alloc"` - } `json:"memstats"` + Pid int `json:"pid"` + Uptime int `json:"uptime"` + UptimeNano float64 `json:"uptime_nano"` + MemStats MemInfo `json:"memstats"` Version InfoVersion `json:"version"` DockerSocket string `json:"docker_socket"` LastCollectTime string `json:"last_collect_time"` @@ -66,7 +69,7 @@ type ProcessExpvars struct { Endpoints map[string][]string `json:"endpoints"` } -// Status holds status info from process-agent +// Status holds runtime information from process-agent type Status struct { Date float64 `json:"date"` Core CoreStatus `json:"core"` // Contains the status from the core agent @@ -76,7 +79,7 @@ type Status struct { // StatusOption is a function that acts on a Status object type StatusOption func(s *Status) -// ConnectionError represents an error to connect to a HTTP server +// ConnectionError represents an error to connect to an HTTP server type ConnectionError struct { error } diff --git a/pkg/process/util/status_test.go b/pkg/process/util/status_test.go index af74fb3026dde..5d5ca472847f2 100644 --- a/pkg/process/util/status_test.go +++ b/pkg/process/util/status_test.go @@ -4,13 +4,21 @@ import ( "context" "encoding/json" "fmt" - "github.com/DataDog/datadog-agent/pkg/config" "net" "net/http" + "runtime" "sync" "testing" + "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/DataDog/datadog-agent/pkg/config" + ddconfig "github.com/DataDog/datadog-agent/pkg/config" + "github.com/DataDog/datadog-agent/pkg/metadata/host" + "github.com/DataDog/datadog-agent/pkg/util" + "github.com/DataDog/datadog-agent/pkg/version" ) type statusServer struct { @@ -41,7 +49,7 @@ func startTestServer(t *testing.T, cfg config.Config, expectedExpVars ProcessExp require.NoError(t, err) }) expVarEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.expvar_port")) - expVarsServer := http.Server{Addr: expCarEndpoint, Handler: expVarMux} + expVarsServer := http.Server{Addr: expVarEndpoint, Handler: expVarMux} expVarsListener, err := net.Listen("tcp", expVarEndpoint) require.NoError(t, err) go func() { @@ -53,5 +61,71 @@ func startTestServer(t *testing.T, cfg config.Config, expectedExpVars ProcessExp } func TestGetStatus(t *testing.T) { + testTime := time.Now() + + expectedExpVars := ProcessExpvars{ + Pid: 1, + Uptime: time.Now().Add(-time.Hour).Nanosecond(), + EnabledChecks: []string{"process", "rtprocess"}, + MemStats: MemInfo{ + Alloc: 1234, + }, + Endpoints: map[string][]string{ + "https://process.datadoghq.com": { + "fakeAPIKey", + }, + }, + LastCollectTime: "2022-02-011 10:10:00", + DockerSocket: "/var/run/docker.sock", + ProcessCount: 30, + ContainerCount: 2, + ProcessQueueSize: 1, + RTProcessQueueSize: 3, + PodQueueSize: 4, + ProcessQueueBytes: 2 * 1024, + RTProcessQueueBytes: 512, + PodQueueBytes: 4 * 1024, + } + + // Feature detection needs to run before host methods are called. During runtime, feature detection happens + // when the datadog.yaml file is loaded + cfg := ddconfig.Mock() + ddconfig.DetectFeatures() + + hostnameData, err := util.GetHostnameData(context.TODO()) + var metadata *host.Payload + if err != nil { + metadata = host.GetPayloadFromCache(context.TODO(), util.HostnameData{Hostname: "unknown", Provider: "unknown"}) + } else { + metadata = host.GetPayloadFromCache(context.TODO(), hostnameData) + } + + expectedStatus := &Status{ + Date: float64(testTime.UnixNano()), + Core: CoreStatus{ + AgentVersion: version.AgentVersion, + GoVersion: runtime.Version(), + Arch: runtime.GOARCH, + Config: ConfigStatus{ + LogLevel: ddconfig.Datadog.GetString("log_level"), + }, + Metadata: *metadata, + }, + Expvars: expectedExpVars, + } + + // Use different port in case the host is running a real agent + cfg.Set("process_config.expvar_port", 8081) + + expVarSrv := startTestServer(t, cfg, expectedExpVars) + defer func() { + err := expVarSrv.stop() + require.NoError(t, err) + }() + + stats, err := GetStatus() + require.NoError(t, err) + OverrideTime(testTime)(stats) + assert.Equal(t, expectedStatus, stats) } diff --git a/pkg/status/status_process_agent.go b/pkg/status/status_process_agent.go index 15deaa4d17686..4af35fd07d451 100644 --- a/pkg/status/status_process_agent.go +++ b/pkg/status/status_process_agent.go @@ -13,11 +13,10 @@ import ( apiutil "github.com/DataDog/datadog-agent/pkg/api/util" ) -//TODO: do not use a global var for this -var httpClient = apiutil.GetClient(false) - // GetProcessAgentStatus returns the status command of the process-agent func GetProcessAgentStatus() map[string]interface{} { + httpClient := apiutil.GetClient(false) + s := make(map[string]interface{}) addressPort, err := api.GetAPIAddressPort() if err != nil { From b9b531f88f23ddeb73143f771d8fb60c43c05c09 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Fri, 11 Feb 2022 11:37:25 +0100 Subject: [PATCH 06/13] improce process-agent status template --- pkg/status/templates/process-agent.tmpl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/status/templates/process-agent.tmpl b/pkg/status/templates/process-agent.tmpl index 29110f76bf908..14ac0d3a684d7 100644 --- a/pkg/status/templates/process-agent.tmpl +++ b/pkg/status/templates/process-agent.tmpl @@ -1,10 +1,11 @@ ============= Process Agent ============= - {{- if .error }} + Status: Not running or unreachable {{- else }} + Version: {{ .core.version }} Status date: {{ formatUnixTime .date }} Process Agent Start: {{ formatUnixTime .expvars.uptime_nano }} @@ -34,7 +35,6 @@ Process Agent ========= Collector ========= - Last collection time: {{.expvars.last_collect_time}} Docker socket: {{.expvars.docker_socket}} Number of processes: {{.expvars.process_count}} @@ -45,4 +45,5 @@ Process Agent Process Bytes enqueued: {{.expvars.process_queue_bytes}} RTProcess Bytes enqueued: {{.expvars.rtprocess_queue_bytes}} Pod Bytes enqueued: {{.expvars.pod_queue_bytes}} -{{ end }} +{{- end }} + From 5617a603938fec98104f90a8a4666456e06283a7 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Fri, 11 Feb 2022 11:55:12 +0100 Subject: [PATCH 07/13] some nit --- cmd/process-agent/app/status.go | 5 +++++ cmd/process-agent/app/status_test.go | 1 - pkg/process/util/status.go | 8 ++++---- pkg/process/util/status_test.go | 14 +++++++------- pkg/status/status_process_agent.go | 2 +- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cmd/process-agent/app/status.go b/cmd/process-agent/app/status.go index 25c7c5398f52d..c7959e54edb07 100644 --- a/cmd/process-agent/app/status.go +++ b/cmd/process-agent/app/status.go @@ -95,6 +95,10 @@ func getAndWriteStatus(w io.Writer, options ...util.StatusOption) { if len(options) > 0 { var s util.Status err = json.Unmarshal(body, &s) + if err != nil { + writeError(w, err) + return + } for _, option := range options { option(&s) @@ -103,6 +107,7 @@ func getAndWriteStatus(w io.Writer, options ...util.StatusOption) { body, err = json.Marshal(s) if err != nil { writeError(w, err) + return } } diff --git a/cmd/process-agent/app/status_test.go b/cmd/process-agent/app/status_test.go index c3842138a5f73..8aafc5f324552 100644 --- a/cmd/process-agent/app/status_test.go +++ b/cmd/process-agent/app/status_test.go @@ -107,7 +107,6 @@ func TestNotRunning(t *testing.T) { assert.Equal(t, notRunning, b.String()) } -// // TestError tests an example error to make sure that the error template prints properly if we get something other than // a connection error func TestError(t *testing.T) { diff --git a/pkg/process/util/status.go b/pkg/process/util/status.go index 6f7e94e6568ba..4a3854fd8c04e 100644 --- a/pkg/process/util/status.go +++ b/pkg/process/util/status.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "github.com/DataDog/datadog-agent/pkg/util/log" "runtime" "time" @@ -12,11 +11,10 @@ import ( ddconfig "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/metadata/host" "github.com/DataDog/datadog-agent/pkg/util" + "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/DataDog/datadog-agent/pkg/version" ) -var httpClient = apiutil.GetClient(false) - // CoreStatus holds core info about the process-agent type CoreStatus struct { AgentVersion string `json:"version"` @@ -72,7 +70,7 @@ type ProcessExpvars struct { // Status holds runtime information from process-agent type Status struct { Date float64 `json:"date"` - Core CoreStatus `json:"core"` // Contains the status from the core agent + Core CoreStatus `json:"core"` // Contains fields that are collected similarly to the core agent in pkg/status Expvars ProcessExpvars `json:"expvars"` // Contains the expvars retrieved from the process agent } @@ -129,6 +127,8 @@ func getExpvars() (s ProcessExpvars, err error) { port = ddconfig.DefaultProcessExpVarPort } expvarEndpoint := fmt.Sprintf("http://%s:%d/debug/vars", ipcAddr, port) + + httpClient := apiutil.GetClient(false) b, err := apiutil.DoGet(httpClient, expvarEndpoint) if err != nil { return s, ConnectionError{err} diff --git a/pkg/process/util/status_test.go b/pkg/process/util/status_test.go index 5d5ca472847f2..e86d7485e99e7 100644 --- a/pkg/process/util/status_test.go +++ b/pkg/process/util/status_test.go @@ -21,13 +21,13 @@ import ( "github.com/DataDog/datadog-agent/pkg/version" ) -type statusServer struct { - shutdownWg *sync.WaitGroup - expVarServer *http.Server +type expVarServer struct { + shutdownWg *sync.WaitGroup + server *http.Server } -func (s *statusServer) stop() error { - err := s.expVarServer.Shutdown(context.Background()) +func (s *expVarServer) stop() error { + err := s.server.Shutdown(context.Background()) if err != nil { return err } @@ -36,7 +36,7 @@ func (s *statusServer) stop() error { return nil } -func startTestServer(t *testing.T, cfg config.Config, expectedExpVars ProcessExpvars) statusServer { +func startTestServer(t *testing.T, cfg config.Config, expectedExpVars ProcessExpvars) expVarServer { var serverWg sync.WaitGroup serverWg.Add(1) @@ -57,7 +57,7 @@ func startTestServer(t *testing.T, cfg config.Config, expectedExpVars ProcessExp serverWg.Done() }() - return statusServer{expVarServer: &expVarsServer, shutdownWg: &serverWg} + return expVarServer{server: &expVarsServer, shutdownWg: &serverWg} } func TestGetStatus(t *testing.T) { diff --git a/pkg/status/status_process_agent.go b/pkg/status/status_process_agent.go index 4af35fd07d451..23e41b3ca4247 100644 --- a/pkg/status/status_process_agent.go +++ b/pkg/status/status_process_agent.go @@ -13,7 +13,7 @@ import ( apiutil "github.com/DataDog/datadog-agent/pkg/api/util" ) -// GetProcessAgentStatus returns the status command of the process-agent +// GetProcessAgentStatus fetches the process-agent status from the process-agent API server func GetProcessAgentStatus() map[string]interface{} { httpClient := apiutil.GetClient(false) From 3b3950ad406ca43e45e60eb92825dbceffedb09c Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Fri, 11 Feb 2022 12:00:00 +0100 Subject: [PATCH 08/13] add release note; fix ineffectual assigment to err --- cmd/process-agent/app/status.go | 5 +++++ ...cess-agent-add-status-to-core-agent-38d1cd109ab5bb0e.yaml | 4 ++++ 2 files changed, 9 insertions(+) create mode 100644 releasenotes/notes/process-agent-add-status-to-core-agent-38d1cd109ab5bb0e.yaml diff --git a/cmd/process-agent/app/status.go b/cmd/process-agent/app/status.go index c7959e54edb07..ae4215c91e38e 100644 --- a/cmd/process-agent/app/status.go +++ b/cmd/process-agent/app/status.go @@ -112,6 +112,11 @@ func getAndWriteStatus(w io.Writer, options ...util.StatusOption) { } stats, err := ddstatus.FormatProcessAgentStatus(body) + if err != nil { + writeError(w, err) + return + } + w.Write([]byte(stats)) } diff --git a/releasenotes/notes/process-agent-add-status-to-core-agent-38d1cd109ab5bb0e.yaml b/releasenotes/notes/process-agent-add-status-to-core-agent-38d1cd109ab5bb0e.yaml new file mode 100644 index 0000000000000..9dc3600b9a083 --- /dev/null +++ b/releasenotes/notes/process-agent-add-status-to-core-agent-38d1cd109ab5bb0e.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Add `process-agent status` output to the core Agent status command. From 94099b303bf36d2698b22dcd97265c86b21636ec Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Fri, 11 Feb 2022 12:25:56 +0100 Subject: [PATCH 09/13] check for error while writing rendered template --- cmd/process-agent/app/status.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/process-agent/app/status.go b/cmd/process-agent/app/status.go index ae4215c91e38e..a25e2b5398cc2 100644 --- a/cmd/process-agent/app/status.go +++ b/cmd/process-agent/app/status.go @@ -117,7 +117,10 @@ func getAndWriteStatus(w io.Writer, options ...util.StatusOption) { return } - w.Write([]byte(stats)) + _, err = w.Write([]byte(stats)) + if err != nil { + _ = log.Error(err) + } } // StatusCmd returns a cobra command that prints the current status From 63387aa60e2a86d82fd8b26fac85f8f49c5807e3 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Mon, 14 Feb 2022 17:25:02 +0100 Subject: [PATCH 10/13] add helper function to write http error; use context.Background instead of TODO --- cmd/process-agent/api/status.go | 13 ++++++++----- pkg/process/util/status.go | 6 +++--- pkg/process/util/status_test.go | 6 +++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/cmd/process-agent/api/status.go b/cmd/process-agent/api/status.go index 2058ed0f65bfe..305cf0df05f51 100644 --- a/cmd/process-agent/api/status.go +++ b/cmd/process-agent/api/status.go @@ -13,23 +13,26 @@ import ( "github.com/DataDog/datadog-agent/pkg/util/log" ) +func writeError(err error, code int, w http.ResponseWriter) { + body, _ := json.Marshal(map[string]string{"error": err.Error()}) + http.Error(w, string(body), code) +} + func statusHandler(w http.ResponseWriter, _ *http.Request) { log.Info("Got a request for the status. Making status.") agentStatus, err := util.GetStatus() if err != nil { if err != nil { - _ = log.Warn("failed to get status from agent:", agentStatus) - body, _ := json.Marshal(map[string]string{"error": err.Error()}) - http.Error(w, string(body), http.StatusInternalServerError) + _ = log.Warn("failed to get status from agent:", err) + writeError(err, http.StatusInternalServerError, w) } } b, err := json.Marshal(agentStatus) if err != nil { _ = log.Warn("failed to serialize status response from agent:", err) - body, _ := json.Marshal(map[string]string{"error": err.Error()}) - http.Error(w, string(body), http.StatusInternalServerError) + writeError(err, http.StatusInternalServerError, w) } _, err = w.Write(b) diff --git a/pkg/process/util/status.go b/pkg/process/util/status.go index 4a3854fd8c04e..d792bbafb3a0d 100644 --- a/pkg/process/util/status.go +++ b/pkg/process/util/status.go @@ -95,13 +95,13 @@ func OverrideTime(t time.Time) StatusOption { } func getCoreStatus() (s CoreStatus) { - hostnameData, err := util.GetHostnameData(context.TODO()) + hostnameData, err := util.GetHostnameData(context.Background()) var metadata *host.Payload if err != nil { log.Errorf("Error grabbing hostname for status: %v", err) - metadata = host.GetPayloadFromCache(context.TODO(), util.HostnameData{Hostname: "unknown", Provider: "unknown"}) + metadata = host.GetPayloadFromCache(context.Background(), util.HostnameData{Hostname: "unknown", Provider: "unknown"}) } else { - metadata = host.GetPayloadFromCache(context.TODO(), hostnameData) + metadata = host.GetPayloadFromCache(context.Background(), hostnameData) } return CoreStatus{ diff --git a/pkg/process/util/status_test.go b/pkg/process/util/status_test.go index e86d7485e99e7..ff1a325b83870 100644 --- a/pkg/process/util/status_test.go +++ b/pkg/process/util/status_test.go @@ -92,12 +92,12 @@ func TestGetStatus(t *testing.T) { cfg := ddconfig.Mock() ddconfig.DetectFeatures() - hostnameData, err := util.GetHostnameData(context.TODO()) + hostnameData, err := util.GetHostnameData(context.Background()) var metadata *host.Payload if err != nil { - metadata = host.GetPayloadFromCache(context.TODO(), util.HostnameData{Hostname: "unknown", Provider: "unknown"}) + metadata = host.GetPayloadFromCache(context.Background(), util.HostnameData{Hostname: "unknown", Provider: "unknown"}) } else { - metadata = host.GetPayloadFromCache(context.TODO(), hostnameData) + metadata = host.GetPayloadFromCache(context.Background(), hostnameData) } expectedStatus := &Status{ From 4e296b0d0f35d6c00aee0bb8b6e7e5dd3707f291 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Mon, 14 Feb 2022 19:25:47 +0100 Subject: [PATCH 11/13] use httptest --- cmd/process-agent/api/status.go | 17 ++++++- cmd/process-agent/app/status.go | 32 +++++++----- cmd/process-agent/app/status_test.go | 73 +++++++++------------------- pkg/process/util/status.go | 21 ++------ pkg/process/util/status_test.go | 57 +++++----------------- 5 files changed, 74 insertions(+), 126 deletions(-) diff --git a/cmd/process-agent/api/status.go b/cmd/process-agent/api/status.go index 305cf0df05f51..c8982e733a35c 100644 --- a/cmd/process-agent/api/status.go +++ b/cmd/process-agent/api/status.go @@ -7,8 +7,10 @@ package api import ( "encoding/json" + "fmt" "net/http" + ddconfig "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/process/util" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -21,7 +23,20 @@ func writeError(err error, code int, w http.ResponseWriter) { func statusHandler(w http.ResponseWriter, _ *http.Request) { log.Info("Got a request for the status. Making status.") - agentStatus, err := util.GetStatus() + ipcAddr, err := ddconfig.GetIPCAddress() + if err != nil { + writeError(err, http.StatusInternalServerError, w) + _ = log.Warn("config error:", err) + } + + port := ddconfig.Datadog.GetInt("process_config.expvar_port") + if port <= 0 { + _ = log.Warnf("Invalid process_config.expvar_port -- %d, using default port %d\n", port, ddconfig.DefaultProcessExpVarPort) + port = ddconfig.DefaultProcessExpVarPort + } + expvarEndpoint := fmt.Sprintf("http://%s:%d/debug/vars", ipcAddr, port) + + agentStatus, err := util.GetStatus(expvarEndpoint) if err != nil { if err != nil { _ = log.Warn("failed to get status from agent:", err) diff --git a/cmd/process-agent/app/status.go b/cmd/process-agent/app/status.go index a25e2b5398cc2..7b87c803f63f7 100644 --- a/cmd/process-agent/app/status.go +++ b/cmd/process-agent/app/status.go @@ -63,14 +63,8 @@ func writeError(w io.Writer, e error) { _ = log.Error(err) } } -func fetchStatus() ([]byte, error) { - addressPort, err := api.GetAPIAddressPort() - if err != nil { - return nil, fmt.Errorf("config error: %s", err.Error()) - } - - statusEndpoint := fmt.Sprintf("http://%s/agent/status", addressPort) - body, err := apiutil.DoGet(httpClient, statusEndpoint) +func fetchStatus(statusURL string) ([]byte, error) { + body, err := apiutil.DoGet(httpClient, statusURL) if err != nil { return nil, util.NewConnectionError(err) } @@ -79,8 +73,8 @@ func fetchStatus() ([]byte, error) { } // getAndWriteStatus calls the status server and writes it to `w` -func getAndWriteStatus(w io.Writer, options ...util.StatusOption) { - body, err := fetchStatus() +func getAndWriteStatus(statusURL string, w io.Writer, options ...util.StatusOption) { + body, err := fetchStatus(statusURL) if err != nil { switch err.(type) { case util.ConnectionError: @@ -123,6 +117,14 @@ func getAndWriteStatus(w io.Writer, options ...util.StatusOption) { } } +func getStatusURL() (string, error) { + addressPort, err := api.GetAPIAddressPort() + if err != nil { + return "", fmt.Errorf("config error: %s", err.Error()) + } + return fmt.Sprintf("http://%s/agent/status", addressPort), nil +} + // StatusCmd returns a cobra command that prints the current status func StatusCmd() *cobra.Command { return &cobra.Command{ @@ -137,6 +139,7 @@ func runStatus(cmd *cobra.Command, _ []string) error { err := config.LoadConfigIfExists(cmd.Flag("config").Value.String()) if err != nil { writeError(os.Stdout, err) + return err } err = ddconfig.SetupLogger( @@ -150,8 +153,15 @@ func runStatus(cmd *cobra.Command, _ []string) error { ) if err != nil { writeError(os.Stdout, err) + return err + } + + statusURL, err := getStatusURL() + if err != nil { + writeError(os.Stdout, err) + return err } - getAndWriteStatus(os.Stdout) + getAndWriteStatus(statusURL, os.Stdout) return nil } diff --git a/cmd/process-agent/app/status_test.go b/cmd/process-agent/app/status_test.go index 8aafc5f324552..2d83e0087393d 100644 --- a/cmd/process-agent/app/status_test.go +++ b/cmd/process-agent/app/status_test.go @@ -1,14 +1,11 @@ package app import ( - "context" "encoding/json" "fmt" - "github.com/DataDog/datadog-agent/pkg/process/util" - "net" "net/http" + "net/http/httptest" "strings" - "sync" "testing" "text/template" "time" @@ -16,49 +13,24 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/DataDog/datadog-agent/cmd/process-agent/api" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/metadata/host" + "github.com/DataDog/datadog-agent/pkg/process/util" ddstatus "github.com/DataDog/datadog-agent/pkg/status" ) -type statusServer struct { - shutdownWg *sync.WaitGroup - server *http.Server -} - -func (s *statusServer) stop() error { - err := s.server.Shutdown(context.Background()) - if err != nil { - return err - } - - s.shutdownWg.Wait() - return nil -} - -func startTestServer(t *testing.T, cfg config.Config, expectedStatus util.Status) statusServer { - var serverWg sync.WaitGroup - serverWg.Add(1) - - statusMux := http.NewServeMux() - statusMux.HandleFunc("/agent/status", func(w http.ResponseWriter, _ *http.Request) { - b, err := json.Marshal(expectedStatus) +func fakeStatusServer(t *testing.T, stats util.Status) *httptest.Server { + handler := func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + b, err := json.Marshal(stats) require.NoError(t, err) _, err = w.Write(b) require.NoError(t, err) - }) - - statusEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.cmd_port")) - server := http.Server{Addr: statusEndpoint, Handler: statusMux} - statusListener, err := net.Listen("tcp", statusEndpoint) - require.NoError(t, err) - go func() { - _ = server.Serve(statusListener) - serverWg.Done() - }() + } - return statusServer{server: &server, shutdownWg: &serverWg} + return httptest.NewServer(http.HandlerFunc(handler)) } func TestStatus(t *testing.T) { @@ -73,27 +45,20 @@ func TestStatus(t *testing.T) { Expvars: util.ProcessExpvars{}, } - // Use different ports in case the host is running a real agent - cfg := config.Mock() - cfg.Set("process_config.cmd_port", 8082) - server := startTestServer(t, cfg, expectedStatus) - - var statusBuilder strings.Builder + server := fakeStatusServer(t, expectedStatus) + defer server.Close() + // Build what the expected status should be j, err := json.Marshal(expectedStatus) require.NoError(t, err) - - // Build what the expected status should be expectedOutput, err := ddstatus.FormatProcessAgentStatus(j) require.NoError(t, err) // Build the actual status - getAndWriteStatus(&statusBuilder, util.OverrideTime(testTime)) + var statusBuilder strings.Builder + getAndWriteStatus(server.URL, &statusBuilder, util.OverrideTime(testTime)) assert.Equal(t, expectedOutput, statusBuilder.String()) - - err = server.stop() - require.NoError(t, err) } func TestNotRunning(t *testing.T) { @@ -101,8 +66,12 @@ func TestNotRunning(t *testing.T) { cfg := config.Mock() cfg.Set("process_config.cmd_port", 8082) + addressPort, err := api.GetAPIAddressPort() + require.NoError(t, err) + statusURL := fmt.Sprintf("http://%s/agent/status", addressPort) + var b strings.Builder - getAndWriteStatus(&b) + getAndWriteStatus(statusURL, &b) assert.Equal(t, notRunning, b.String()) } @@ -115,7 +84,9 @@ func TestError(t *testing.T) { _, ipcError := config.GetIPCAddress() var errText, expectedErrText strings.Builder - getAndWriteStatus(&errText) + url, err := getStatusURL() + assert.Equal(t, "", url) + writeError(&errText, err) tpl, err := template.New("").Parse(errorMessage) require.NoError(t, err) diff --git a/pkg/process/util/status.go b/pkg/process/util/status.go index d792bbafb3a0d..3a62d145e9cce 100644 --- a/pkg/process/util/status.go +++ b/pkg/process/util/status.go @@ -3,7 +3,6 @@ package util import ( "context" "encoding/json" - "fmt" "runtime" "time" @@ -115,21 +114,9 @@ func getCoreStatus() (s CoreStatus) { } } -func getExpvars() (s ProcessExpvars, err error) { - ipcAddr, err := ddconfig.GetIPCAddress() - if err != nil { - return ProcessExpvars{}, fmt.Errorf("config error: %s", err.Error()) - } - - port := ddconfig.Datadog.GetInt("process_config.expvar_port") - if port <= 0 { - _ = log.Warnf("Invalid process_config.expvar_port -- %d, using default port %d\n", port, ddconfig.DefaultProcessExpVarPort) - port = ddconfig.DefaultProcessExpVarPort - } - expvarEndpoint := fmt.Sprintf("http://%s:%d/debug/vars", ipcAddr, port) - +func getExpvars(expVarURL string) (s ProcessExpvars, err error) { httpClient := apiutil.GetClient(false) - b, err := apiutil.DoGet(httpClient, expvarEndpoint) + b, err := apiutil.DoGet(httpClient, expVarURL) if err != nil { return s, ConnectionError{err} } @@ -139,9 +126,9 @@ func getExpvars() (s ProcessExpvars, err error) { } // GetStatus returns a Status object with runtime information about process-agent -func GetStatus() (*Status, error) { +func GetStatus(expVarURL string) (*Status, error) { coreStatus := getCoreStatus() - processExpVars, err := getExpvars() + processExpVars, err := getExpvars(expVarURL) if err != nil { return nil, err } diff --git a/pkg/process/util/status_test.go b/pkg/process/util/status_test.go index ff1a325b83870..1f425f303ef9d 100644 --- a/pkg/process/util/status_test.go +++ b/pkg/process/util/status_test.go @@ -3,61 +3,32 @@ package util import ( "context" "encoding/json" - "fmt" - "net" "net/http" + "net/http/httptest" "runtime" - "sync" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/DataDog/datadog-agent/pkg/config" ddconfig "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/metadata/host" "github.com/DataDog/datadog-agent/pkg/util" "github.com/DataDog/datadog-agent/pkg/version" ) -type expVarServer struct { - shutdownWg *sync.WaitGroup - server *http.Server -} - -func (s *expVarServer) stop() error { - err := s.server.Shutdown(context.Background()) - if err != nil { - return err - } - - s.shutdownWg.Wait() - return nil -} - -func startTestServer(t *testing.T, cfg config.Config, expectedExpVars ProcessExpvars) expVarServer { - var serverWg sync.WaitGroup - serverWg.Add(1) - - expVarMux := http.NewServeMux() - expVarMux.HandleFunc("/debug/vars", func(w http.ResponseWriter, _ *http.Request) { - b, err := json.Marshal(expectedExpVars) +func fakeExpVarServer(t *testing.T, expVars ProcessExpvars) *httptest.Server { + handler := func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + b, err := json.Marshal(expVars) require.NoError(t, err) _, err = w.Write(b) require.NoError(t, err) - }) - expVarEndpoint := fmt.Sprintf("localhost:%d", cfg.GetInt("process_config.expvar_port")) - expVarsServer := http.Server{Addr: expVarEndpoint, Handler: expVarMux} - expVarsListener, err := net.Listen("tcp", expVarEndpoint) - require.NoError(t, err) - go func() { - _ = expVarsServer.Serve(expVarsListener) - serverWg.Done() - }() + } - return expVarServer{server: &expVarsServer, shutdownWg: &serverWg} + return httptest.NewServer(http.HandlerFunc(handler)) } func TestGetStatus(t *testing.T) { @@ -89,7 +60,7 @@ func TestGetStatus(t *testing.T) { // Feature detection needs to run before host methods are called. During runtime, feature detection happens // when the datadog.yaml file is loaded - cfg := ddconfig.Mock() + ddconfig.Mock() ddconfig.DetectFeatures() hostnameData, err := util.GetHostnameData(context.Background()) @@ -114,16 +85,10 @@ func TestGetStatus(t *testing.T) { Expvars: expectedExpVars, } - // Use different port in case the host is running a real agent - cfg.Set("process_config.expvar_port", 8081) - - expVarSrv := startTestServer(t, cfg, expectedExpVars) - defer func() { - err := expVarSrv.stop() - require.NoError(t, err) - }() + expVarSrv := fakeExpVarServer(t, expectedExpVars) + defer expVarSrv.Close() - stats, err := GetStatus() + stats, err := GetStatus(expVarSrv.URL) require.NoError(t, err) OverrideTime(testTime)(stats) From cf9815c45461bca48168b4fc1c4eb65effdd0582 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Mon, 14 Feb 2022 20:50:58 +0100 Subject: [PATCH 12/13] fix error handling --- cmd/process-agent/api/status.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmd/process-agent/api/status.go b/cmd/process-agent/api/status.go index c8982e733a35c..acc2209edf1f9 100644 --- a/cmd/process-agent/api/status.go +++ b/cmd/process-agent/api/status.go @@ -23,6 +23,7 @@ func writeError(err error, code int, w http.ResponseWriter) { func statusHandler(w http.ResponseWriter, _ *http.Request) { log.Info("Got a request for the status. Making status.") + // Get expVar server address ipcAddr, err := ddconfig.GetIPCAddress() if err != nil { writeError(err, http.StatusInternalServerError, w) @@ -38,10 +39,8 @@ func statusHandler(w http.ResponseWriter, _ *http.Request) { agentStatus, err := util.GetStatus(expvarEndpoint) if err != nil { - if err != nil { - _ = log.Warn("failed to get status from agent:", err) - writeError(err, http.StatusInternalServerError, w) - } + _ = log.Warn("failed to get status from agent:", err) + writeError(err, http.StatusInternalServerError, w) } b, err := json.Marshal(agentStatus) From 03e1084566d5d1b11d4296b4af36a3c3f83be376 Mon Sep 17 00:00:00 2001 From: Moises Botarro Date: Tue, 15 Feb 2022 11:46:31 +0100 Subject: [PATCH 13/13] update API server: return if error is produced --- cmd/process-agent/api/status.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/process-agent/api/status.go b/cmd/process-agent/api/status.go index acc2209edf1f9..2f7854bc8a02f 100644 --- a/cmd/process-agent/api/status.go +++ b/cmd/process-agent/api/status.go @@ -28,6 +28,7 @@ func statusHandler(w http.ResponseWriter, _ *http.Request) { if err != nil { writeError(err, http.StatusInternalServerError, w) _ = log.Warn("config error:", err) + return } port := ddconfig.Datadog.GetInt("process_config.expvar_port") @@ -41,12 +42,14 @@ func statusHandler(w http.ResponseWriter, _ *http.Request) { if err != nil { _ = log.Warn("failed to get status from agent:", err) writeError(err, http.StatusInternalServerError, w) + return } b, err := json.Marshal(agentStatus) if err != nil { _ = log.Warn("failed to serialize status response from agent:", err) writeError(err, http.StatusInternalServerError, w) + return } _, err = w.Write(b)