Skip to content

Commit

Permalink
add common generic Get for system-probe checks (#31254)
Browse files Browse the repository at this point in the history
  • Loading branch information
brycekahle authored Nov 21, 2024
1 parent 78d29ba commit 0fdbad2
Show file tree
Hide file tree
Showing 13 changed files with 151 additions and 328 deletions.
50 changes: 50 additions & 0 deletions cmd/system-probe/api/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
package client

import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"

"github.com/DataDog/datadog-agent/cmd/system-probe/config/types"
"github.com/DataDog/datadog-agent/pkg/util/funcs"
)

Expand All @@ -35,3 +41,47 @@ func get(socketPath string) *http.Client {
},
}
}

// GetCheck returns data unmarshalled from JSON to T, from the specified module at the /<module>/check endpoint.
func GetCheck[T any](client *http.Client, module types.ModuleName) (T, error) {
var data T
req, err := http.NewRequest("GET", ModuleURL(module, "/check"), nil)
if err != nil {
return data, err
}

resp, err := client.Do(req)
if err != nil {
return data, err
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
return data, err
}
if resp.StatusCode != http.StatusOK {
return data, fmt.Errorf("non-ok status code: url %s, status_code: %d, response: `%s`", req.URL, resp.StatusCode, string(body))
}

err = json.Unmarshal(body, &data)
return data, err
}

func constructURL(module string, endpoint string) string {
u, _ := url.Parse("http://sysprobe")
if module != "" {
u = u.JoinPath(module)
}
path, query, found := strings.Cut(endpoint, "?")
u = u.JoinPath(path)
if found {
u.RawQuery = query
}
return u.String()
}

// ModuleURL constructs a system-probe ModuleURL given the specified module and endpoint.
func ModuleURL(module types.ModuleName, endpoint string) string {
return constructURL(string(module), endpoint)
}
53 changes: 53 additions & 0 deletions cmd/system-probe/api/client/client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

package client

import (
"context"
"net"
"net/http"
"net/http/httptest"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestConstructURL(t *testing.T) {
u := constructURL("", "/asdf?a=b")
assert.Equal(t, "http://sysprobe/asdf?a=b", u)

u = constructURL("zzzz", "/asdf?a=b")
assert.Equal(t, "http://sysprobe/zzzz/asdf?a=b", u)

u = constructURL("zzzz", "asdf")
assert.Equal(t, "http://sysprobe/zzzz/asdf", u)
}

func TestGetCheck(t *testing.T) {
type testData struct {
Str string
Num int
}

server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/test/check" {
_, _ = w.Write([]byte(`{"Str": "asdf", "Num": 42}`))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
t.Cleanup(server.Close)

client := &http.Client{Transport: &http.Transport{DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("tcp", server.Listener.Addr().String())
}}}

resp, err := GetCheck[testData](client, "test")
require.NoError(t, err)
assert.Equal(t, "asdf", resp.Str)
assert.Equal(t, 42, resp.Num)
}
31 changes: 7 additions & 24 deletions pkg/collector/corechecks/ebpf/ebpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,27 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

//go:build cgo && linux
//go:build linux

// Package ebpf contains all the ebpf-based checks.
package ebpf

import (
"fmt"
"net/http"
"strings"

"github.com/cihub/seelog"
"gopkg.in/yaml.v2"

sysprobeclient "github.com/DataDog/datadog-agent/cmd/system-probe/api/client"
sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
"github.com/DataDog/datadog-agent/pkg/aggregator/sender"
"github.com/DataDog/datadog-agent/pkg/collector/check"
core "github.com/DataDog/datadog-agent/pkg/collector/corechecks"
ebpfcheck "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/ebpfcheck/model"
pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
processnet "github.com/DataDog/datadog-agent/pkg/process/net"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/optional"
)
Expand All @@ -38,8 +39,8 @@ type EBPFCheckConfig struct {

// EBPFCheck grabs eBPF map/program/perf buffer metrics
type EBPFCheck struct {
config *EBPFCheckConfig
sysProbeUtil processnet.SysProbeUtil
config *EBPFCheckConfig
sysProbeClient *http.Client
core.CheckBase
}

Expand Down Expand Up @@ -68,26 +69,13 @@ func (m *EBPFCheck) Configure(senderManager sender.SenderManager, _ uint64, conf
if err := m.config.Parse(config); err != nil {
return fmt.Errorf("ebpf check config: %s", err)
}
if err := processnet.CheckPath(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket")); err != nil {
return fmt.Errorf("sysprobe socket: %s", err)
}

m.sysProbeClient = sysprobeclient.Get(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))
return nil
}

// Run executes the check
func (m *EBPFCheck) Run() error {
if m.sysProbeUtil == nil {
var err error
m.sysProbeUtil, err = processnet.GetRemoteSystemProbeUtil(
pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"),
)
if err != nil {
return fmt.Errorf("sysprobe connection: %s", err)
}
}

data, err := m.sysProbeUtil.GetCheck(sysconfig.EBPFModule)
stats, err := sysprobeclient.GetCheck[ebpfcheck.EBPFStats](m.sysProbeClient, sysconfig.EBPFModule)
if err != nil {
return fmt.Errorf("get ebpf check: %s", err)
}
Expand All @@ -97,11 +85,6 @@ func (m *EBPFCheck) Run() error {
return fmt.Errorf("get metric sender: %s", err)
}

stats, ok := data.(ebpfcheck.EBPFStats)
if !ok {
return log.Errorf("ebpf check raw data has incorrect type: %T", stats)
}

totalMapMaxSize, totalMapRSS := uint64(0), uint64(0)
moduleTotalMapMaxSize, moduleTotalMapRSS := make(map[string]uint64), make(map[string]uint64)
reportBaseMap := func(mapStats ebpfcheck.EBPFMapStats) {
Expand Down
27 changes: 8 additions & 19 deletions pkg/collector/corechecks/ebpf/oomkill/oom_kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

// FIXME: we require the `cgo` build tag because of this dep relationship:
// github.com/DataDog/datadog-agent/pkg/process/net depends on `github.com/DataDog/agent-payload/v5/process`,
// which has a hard dependency on `github.com/DataDog/zstd_0`, which requires CGO.
// Should be removed once `github.com/DataDog/agent-payload/v5/process` can be imported with CGO disabled.
//go:build cgo && linux
//go:build linux

// Package oomkill contains the OOMKill check.
package oomkill

import (
"fmt"
"net/http"
"strings"

yaml "gopkg.in/yaml.v2"

sysprobeclient "github.com/DataDog/datadog-agent/cmd/system-probe/api/client"
sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
Expand All @@ -28,7 +26,6 @@ import (
"github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/oomkill/model"
pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
"github.com/DataDog/datadog-agent/pkg/metrics/event"
process_net "github.com/DataDog/datadog-agent/pkg/process/net"
"github.com/DataDog/datadog-agent/pkg/util/cgroups"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/optional"
Expand All @@ -47,8 +44,9 @@ type OOMKillConfig struct {
// OOMKillCheck grabs OOM Kill metrics
type OOMKillCheck struct {
core.CheckBase
instance *OOMKillConfig
tagger tagger.Component
instance *OOMKillConfig
tagger tagger.Component
sysProbeClient *http.Client
}

// Factory creates a new check factory
Expand Down Expand Up @@ -80,6 +78,7 @@ func (m *OOMKillCheck) Configure(senderManager sender.SenderManager, _ uint64, c
if err != nil {
return err
}
m.sysProbeClient = sysprobeclient.Get(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))

return m.instance.Parse(config)
}
Expand All @@ -90,13 +89,7 @@ func (m *OOMKillCheck) Run() error {
return nil
}

sysProbeUtil, err := process_net.GetRemoteSystemProbeUtil(
pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))
if err != nil {
return err
}

data, err := sysProbeUtil.GetCheck(sysconfig.OOMKillProbeModule)
oomkillStats, err := sysprobeclient.GetCheck[[]model.OOMKillStats](m.sysProbeClient, sysconfig.OOMKillProbeModule)
if err != nil {
return err
}
Expand All @@ -109,10 +102,6 @@ func (m *OOMKillCheck) Run() error {

triggerType := ""
triggerTypeText := ""
oomkillStats, ok := data.([]model.OOMKillStats)
if !ok {
return log.Errorf("Raw data has incorrect type")
}
for _, line := range oomkillStats {
containerID, err := cgroups.ContainerFilter("", line.CgroupName)
if err != nil || containerID == "" {
Expand Down
33 changes: 11 additions & 22 deletions pkg/collector/corechecks/ebpf/tcpqueuelength/tcp_queue_length.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

// FIXME: we require the `cgo` build tag because of this dep relationship:
// github.com/DataDog/datadog-agent/pkg/process/net depends on `github.com/DataDog/agent-payload/v5/process`,
// which has a hard dependency on `github.com/DataDog/zstd_0`, which requires CGO.
// Should be removed once `github.com/DataDog/agent-payload/v5/process` can be imported with CGO disabled.
//go:build cgo && linux
//go:build linux

//nolint:revive // TODO(PLINT) Fix revive linter
// Package tcpqueuelength contains the TCP Queue Length check
package tcpqueuelength

import (
yaml "gopkg.in/yaml.v2"
"net/http"

"gopkg.in/yaml.v2"

sysprobeclient "github.com/DataDog/datadog-agent/cmd/system-probe/api/client"
sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
Expand All @@ -24,7 +23,6 @@ import (
core "github.com/DataDog/datadog-agent/pkg/collector/corechecks"
"github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/tcpqueuelength/model"
pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
process_net "github.com/DataDog/datadog-agent/pkg/process/net"
"github.com/DataDog/datadog-agent/pkg/util/cgroups"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/optional"
Expand All @@ -43,8 +41,9 @@ type TCPQueueLengthConfig struct {
// TCPQueueLengthCheck grabs TCP queue length metrics
type TCPQueueLengthCheck struct {
core.CheckBase
instance *TCPQueueLengthConfig
tagger tagger.Component
instance *TCPQueueLengthConfig
tagger tagger.Component
sysProbeClient *http.Client
}

// Factory creates a new check factory
Expand Down Expand Up @@ -76,6 +75,7 @@ func (t *TCPQueueLengthCheck) Configure(senderManager sender.SenderManager, _ ui
if err != nil {
return err
}
t.sysProbeClient = sysprobeclient.Get(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))

return t.instance.Parse(config)
}
Expand All @@ -86,13 +86,7 @@ func (t *TCPQueueLengthCheck) Run() error {
return nil
}

sysProbeUtil, err := process_net.GetRemoteSystemProbeUtil(
pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))
if err != nil {
return err
}

data, err := sysProbeUtil.GetCheck(sysconfig.TCPQueueLengthTracerModule)
stats, err := sysprobeclient.GetCheck[model.TCPQueueLengthStats](t.sysProbeClient, sysconfig.TCPQueueLengthTracerModule)
if err != nil {
return err
}
Expand All @@ -102,11 +96,6 @@ func (t *TCPQueueLengthCheck) Run() error {
return err
}

stats, ok := data.(model.TCPQueueLengthStats)
if !ok {
return log.Errorf("Raw data has incorrect type")
}

for k, v := range stats {
containerID, err := cgroups.ContainerFilter("", k)
if err != nil || containerID == "" {
Expand Down
Loading

0 comments on commit 0fdbad2

Please sign in to comment.