Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add common generic Get for system-probe checks #31254

Merged
merged 4 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions cmd/system-probe/api/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
package client

import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"

"github.com/DataDog/datadog-agent/cmd/system-probe/config/types"
"github.com/DataDog/datadog-agent/pkg/util/funcs"
)

Expand All @@ -35,3 +41,47 @@ func get(socketPath string) *http.Client {
},
}
}

// GetCheck returns data unmarshalled from JSON to T, from the specified module at the /<module>/check endpoint.
func GetCheck[T any](client *http.Client, module types.ModuleName) (T, error) {
var data T
req, err := http.NewRequest("GET", ModuleURL(module, "/check"), nil)
if err != nil {
return data, err
}

resp, err := client.Do(req)
if err != nil {
return data, err
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
return data, err
}
if resp.StatusCode != http.StatusOK {
return data, fmt.Errorf("non-ok status code: url %s, status_code: %d, response: `%s`", req.URL, resp.StatusCode, string(body))
}

err = json.Unmarshal(body, &data)
return data, err
}

func constructURL(module string, endpoint string) string {
brycekahle marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need that function?
isn't url.JoinPath("http://sysprobe", module, endpoint) sufficient?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

url.JoinPath doesn't support query parameters, while this function does. It isn't important for the generic checks, but there are other endpoints which use query parameters. Thus this property becomes important for some of the follow up PRs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it does support query params
https://go.dev/play/p/fKevIj3hvh6

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we actually need unescape, I'm assuming http client knows how to handle it

u, _ := url.Parse("http://sysprobe")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since the operation is constant and it won't change if module and endpoint will be different, there is no need to preform the redundant parsing, and instead let's create a simple object that contains whatever you need

if module != "" {
u = u.JoinPath(module)
}
path, query, found := strings.Cut(endpoint, "?")
u = u.JoinPath(path)
if found {
u.RawQuery = query
}
return u.String()
}

// ModuleURL constructs a system-probe ModuleURL given the specified module and endpoint.
func ModuleURL(module types.ModuleName, endpoint string) string {
return constructURL(string(module), endpoint)
}
53 changes: 53 additions & 0 deletions cmd/system-probe/api/client/client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

package client

import (
"context"
"net"
"net/http"
"net/http/httptest"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestConstructURL(t *testing.T) {
u := constructURL("", "/asdf?a=b")
assert.Equal(t, "http://sysprobe/asdf?a=b", u)

u = constructURL("zzzz", "/asdf?a=b")
assert.Equal(t, "http://sysprobe/zzzz/asdf?a=b", u)

u = constructURL("zzzz", "asdf")
assert.Equal(t, "http://sysprobe/zzzz/asdf", u)
}

func TestGetCheck(t *testing.T) {
type testData struct {
Str string
Num int
}

server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/test/check" {
_, _ = w.Write([]byte(`{"Str": "asdf", "Num": 42}`))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
t.Cleanup(server.Close)

client := &http.Client{Transport: &http.Transport{DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("tcp", server.Listener.Addr().String())
}}}

resp, err := GetCheck[testData](client, "test")
require.NoError(t, err)
assert.Equal(t, "asdf", resp.Str)
assert.Equal(t, 42, resp.Num)
}
31 changes: 7 additions & 24 deletions pkg/collector/corechecks/ebpf/ebpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,27 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

//go:build cgo && linux
//go:build linux

// Package ebpf contains all the ebpf-based checks.
package ebpf

import (
"fmt"
"net/http"
"strings"

"github.com/cihub/seelog"
"gopkg.in/yaml.v2"

sysprobeclient "github.com/DataDog/datadog-agent/cmd/system-probe/api/client"
sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
"github.com/DataDog/datadog-agent/pkg/aggregator/sender"
"github.com/DataDog/datadog-agent/pkg/collector/check"
core "github.com/DataDog/datadog-agent/pkg/collector/corechecks"
ebpfcheck "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/ebpfcheck/model"
pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
processnet "github.com/DataDog/datadog-agent/pkg/process/net"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/optional"
)
Expand All @@ -38,8 +39,8 @@ type EBPFCheckConfig struct {

// EBPFCheck grabs eBPF map/program/perf buffer metrics
type EBPFCheck struct {
config *EBPFCheckConfig
sysProbeUtil processnet.SysProbeUtil
config *EBPFCheckConfig
sysProbeClient *http.Client
core.CheckBase
}

Expand Down Expand Up @@ -68,26 +69,13 @@ func (m *EBPFCheck) Configure(senderManager sender.SenderManager, _ uint64, conf
if err := m.config.Parse(config); err != nil {
return fmt.Errorf("ebpf check config: %s", err)
}
if err := processnet.CheckPath(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket")); err != nil {
return fmt.Errorf("sysprobe socket: %s", err)
}

m.sysProbeClient = sysprobeclient.Get(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))
return nil
}

// Run executes the check
func (m *EBPFCheck) Run() error {
if m.sysProbeUtil == nil {
var err error
m.sysProbeUtil, err = processnet.GetRemoteSystemProbeUtil(
pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"),
)
if err != nil {
return fmt.Errorf("sysprobe connection: %s", err)
}
}

data, err := m.sysProbeUtil.GetCheck(sysconfig.EBPFModule)
stats, err := sysprobeclient.GetCheck[ebpfcheck.EBPFStats](m.sysProbeClient, sysconfig.EBPFModule)
if err != nil {
return fmt.Errorf("get ebpf check: %s", err)
}
Expand All @@ -97,11 +85,6 @@ func (m *EBPFCheck) Run() error {
return fmt.Errorf("get metric sender: %s", err)
}

stats, ok := data.(ebpfcheck.EBPFStats)
if !ok {
return log.Errorf("ebpf check raw data has incorrect type: %T", stats)
}

totalMapMaxSize, totalMapRSS := uint64(0), uint64(0)
moduleTotalMapMaxSize, moduleTotalMapRSS := make(map[string]uint64), make(map[string]uint64)
reportBaseMap := func(mapStats ebpfcheck.EBPFMapStats) {
Expand Down
27 changes: 8 additions & 19 deletions pkg/collector/corechecks/ebpf/oomkill/oom_kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

// FIXME: we require the `cgo` build tag because of this dep relationship:
// github.com/DataDog/datadog-agent/pkg/process/net depends on `github.com/DataDog/agent-payload/v5/process`,
// which has a hard dependency on `github.com/DataDog/zstd_0`, which requires CGO.
// Should be removed once `github.com/DataDog/agent-payload/v5/process` can be imported with CGO disabled.
//go:build cgo && linux
//go:build linux

// Package oomkill contains the OOMKill check.
package oomkill

import (
"fmt"
"net/http"
"strings"

yaml "gopkg.in/yaml.v2"

sysprobeclient "github.com/DataDog/datadog-agent/cmd/system-probe/api/client"
sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
Expand All @@ -28,7 +26,6 @@ import (
"github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/oomkill/model"
pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
"github.com/DataDog/datadog-agent/pkg/metrics/event"
process_net "github.com/DataDog/datadog-agent/pkg/process/net"
"github.com/DataDog/datadog-agent/pkg/util/cgroups"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/optional"
Expand All @@ -47,8 +44,9 @@ type OOMKillConfig struct {
// OOMKillCheck grabs OOM Kill metrics
type OOMKillCheck struct {
core.CheckBase
instance *OOMKillConfig
tagger tagger.Component
instance *OOMKillConfig
tagger tagger.Component
sysProbeClient *http.Client
}

// Factory creates a new check factory
Expand Down Expand Up @@ -80,6 +78,7 @@ func (m *OOMKillCheck) Configure(senderManager sender.SenderManager, _ uint64, c
if err != nil {
return err
}
m.sysProbeClient = sysprobeclient.Get(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))

return m.instance.Parse(config)
}
Expand All @@ -90,13 +89,7 @@ func (m *OOMKillCheck) Run() error {
return nil
}

sysProbeUtil, err := process_net.GetRemoteSystemProbeUtil(
pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))
if err != nil {
return err
}

data, err := sysProbeUtil.GetCheck(sysconfig.OOMKillProbeModule)
oomkillStats, err := sysprobeclient.GetCheck[[]model.OOMKillStats](m.sysProbeClient, sysconfig.OOMKillProbeModule)
if err != nil {
return err
}
Expand All @@ -109,10 +102,6 @@ func (m *OOMKillCheck) Run() error {

triggerType := ""
triggerTypeText := ""
oomkillStats, ok := data.([]model.OOMKillStats)
if !ok {
return log.Errorf("Raw data has incorrect type")
}
for _, line := range oomkillStats {
containerID, err := cgroups.ContainerFilter("", line.CgroupName)
if err != nil || containerID == "" {
Expand Down
33 changes: 11 additions & 22 deletions pkg/collector/corechecks/ebpf/tcpqueuelength/tcp_queue_length.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

// FIXME: we require the `cgo` build tag because of this dep relationship:
// github.com/DataDog/datadog-agent/pkg/process/net depends on `github.com/DataDog/agent-payload/v5/process`,
// which has a hard dependency on `github.com/DataDog/zstd_0`, which requires CGO.
// Should be removed once `github.com/DataDog/agent-payload/v5/process` can be imported with CGO disabled.
//go:build cgo && linux
//go:build linux

//nolint:revive // TODO(PLINT) Fix revive linter
// Package tcpqueuelength contains the TCP Queue Length check
package tcpqueuelength

import (
yaml "gopkg.in/yaml.v2"
"net/http"

"gopkg.in/yaml.v2"

sysprobeclient "github.com/DataDog/datadog-agent/cmd/system-probe/api/client"
sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
Expand All @@ -24,7 +23,6 @@ import (
core "github.com/DataDog/datadog-agent/pkg/collector/corechecks"
"github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/tcpqueuelength/model"
pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
process_net "github.com/DataDog/datadog-agent/pkg/process/net"
"github.com/DataDog/datadog-agent/pkg/util/cgroups"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/optional"
Expand All @@ -43,8 +41,9 @@ type TCPQueueLengthConfig struct {
// TCPQueueLengthCheck grabs TCP queue length metrics
type TCPQueueLengthCheck struct {
core.CheckBase
instance *TCPQueueLengthConfig
tagger tagger.Component
instance *TCPQueueLengthConfig
tagger tagger.Component
sysProbeClient *http.Client
}

// Factory creates a new check factory
Expand Down Expand Up @@ -76,6 +75,7 @@ func (t *TCPQueueLengthCheck) Configure(senderManager sender.SenderManager, _ ui
if err != nil {
return err
}
t.sysProbeClient = sysprobeclient.Get(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))

return t.instance.Parse(config)
}
Expand All @@ -86,13 +86,7 @@ func (t *TCPQueueLengthCheck) Run() error {
return nil
}

sysProbeUtil, err := process_net.GetRemoteSystemProbeUtil(
pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))
if err != nil {
return err
}

data, err := sysProbeUtil.GetCheck(sysconfig.TCPQueueLengthTracerModule)
stats, err := sysprobeclient.GetCheck[model.TCPQueueLengthStats](t.sysProbeClient, sysconfig.TCPQueueLengthTracerModule)
if err != nil {
return err
}
Expand All @@ -102,11 +96,6 @@ func (t *TCPQueueLengthCheck) Run() error {
return err
}

stats, ok := data.(model.TCPQueueLengthStats)
if !ok {
return log.Errorf("Raw data has incorrect type")
}

for k, v := range stats {
containerID, err := cgroups.ContainerFilter("", k)
if err != nil || containerID == "" {
Expand Down
Loading
Loading