Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NPM-3666] Add dmesg to agent flare #32559

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions cmd/system-probe/api/debug/handlers_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,80 @@
package debug

import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
"os/exec"
"regexp"
"syscall"
"time"
)

var klogRegexp = regexp.MustCompile(`<(\d+)>(.*)`)

func readAllDmesg() ([]byte, error) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

readAllDmesg() is relocated from flare/archive_linux.go

const syslogActionSizeBuffer = 10
const syslogActionReadAll = 3

n, err := syscall.Klogctl(syslogActionSizeBuffer, nil)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
n, err := syscall.Klogctl(syslogActionSizeBuffer, nil)
n, err := syscall.Klogctl(unix.SYSLOG_ACTION_SIZE_BUFFER, nil)

if err != nil {
return nil, fmt.Errorf("failed to query size of log buffer [%w]", err)
}

b := make([]byte, n)

m, err := syscall.Klogctl(syslogActionReadAll, b)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
m, err := syscall.Klogctl(syslogActionReadAll, b)
m, err := syscall.Klogctl(unix.SYSLOG_ACTION_READ_ALL, b)

if err != nil {
return nil, fmt.Errorf("failed to read messages from log buffer [%w]", err)
}

return b[:m], nil
}

func parseDmesg(buffer []byte) (string, error) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parseDmesg() is relocated from flare/archive_linux.go

buf := bytes.NewBuffer(buffer)
var result string

for {
line, err := buf.ReadString('\n')
if err == io.EOF {
break
} else if err != nil {
return result, err
}

parts := klogRegexp.FindStringSubmatch(line)
if parts != nil {
result += parts[2] + "\n"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we are discarding the log level? It seems like it could be helpful in finding important messages. Perhaps we should translate from the number to the text version of the level and prefix the message?

} else {
result += line
}
}

return result, nil
}

// HandleLinuxDmesg writes linux dmesg into the HTTP response.
func HandleLinuxDmesg(w http.ResponseWriter, _ *http.Request) {
dmesg, err := readAllDmesg()
if err != nil {
w.WriteHeader(500)
fmt.Fprintf(w, "failed to read dmesg: %s", err)
return
}
dmesgStr, err := parseDmesg(dmesg)
if err != nil {
w.WriteHeader(500)
fmt.Fprintf(w, "failed to parse dmesg: %s", err)
return
}

io.WriteString(w, dmesgStr)
}

// handleCommand runs commandName with the provided arguments and writes it to the HTTP response.
// If the command exits with a failure or doesn't exist in the PATH, it will still 200 but report the failure.
// Any other kind of error will 500.
Expand Down
6 changes: 6 additions & 0 deletions cmd/system-probe/api/debug/handlers_nolinux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ import (
"net/http"
)

// HandleLinuxDmesg is not supported
func HandleLinuxDmesg(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(500)
io.WriteString(w, "HandleLinuxDmesg is not supported on this platform")
}

// HandleSelinuxSestatus is not supported
func HandleSelinuxSestatus(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(500)
Expand Down
1 change: 1 addition & 0 deletions cmd/system-probe/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func StartServer(cfg *sysconfigtypes.Config, telemetry telemetry.Component, wmet

if runtime.GOOS == "linux" {
mux.HandleFunc("/debug/ebpf_btf_loader_info", ebpf.HandleBTFLoaderInfo)
mux.HandleFunc("/debug/dmesg", debug.HandleLinuxDmesg)
mux.HandleFunc("/debug/selinux_sestatus", debug.HandleSelinuxSestatus)
mux.HandleFunc("/debug/selinux_semodule_list", debug.HandleSelinuxSemoduleList)
}
Expand Down
78 changes: 17 additions & 61 deletions pkg/flare/archive_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,7 @@
package flare

import (
"bytes"
"fmt"
"io"
"path/filepath"
"regexp"
"syscall"

"github.com/DataDog/ebpf-manager/tracefs"

Expand All @@ -38,11 +33,24 @@ func addSystemProbePlatformSpecificEntries(fb flaretypes.FlareBuilder) {
_ = fb.AddFileFromFunc(filepath.Join("system-probe", "conntrack_cached.log"), getSystemProbeConntrackCached)
_ = fb.AddFileFromFunc(filepath.Join("system-probe", "conntrack_host.log"), getSystemProbeConntrackHost)
_ = fb.AddFileFromFunc(filepath.Join("system-probe", "ebpf_btf_loader.log"), getSystemProbeBTFLoaderInfo)
_ = fb.AddFileFromFunc(filepath.Join("system-probe", "dmesg.log"), getLinuxDmesg)
_ = fb.AddFileFromFunc(filepath.Join("system-probe", "selinux_sestatus.log"), getSystemProbeSelinuxSestatus)
_ = fb.AddFileFromFunc(filepath.Join("system-probe", "selinux_semodule_list.log"), getSystemProbeSelinuxSemoduleList)
}
}

// only used in tests when running on linux
var linuxKernelSymbols = getLinuxKernelSymbols

func addSecurityAgentPlatformSpecificEntries(fb flaretypes.FlareBuilder) {
linuxKernelSymbols(fb) //nolint:errcheck
getLinuxPid1MountInfo(fb) //nolint:errcheck
fb.AddFileFromFunc("dmesg", getLinuxDmesg) //nolint:errcheck
getLinuxKprobeEvents(fb) //nolint:errcheck
getLinuxTracingAvailableEvents(fb) //nolint:errcheck
getLinuxTracingAvailableFilterFunctions(fb) //nolint:errcheck
}

func getLinuxKernelSymbols(fb flaretypes.FlareBuilder) error {
return fb.CopyFile("/proc/kallsyms")
}
Expand All @@ -59,62 +67,10 @@ func getLinuxPid1MountInfo(fb flaretypes.FlareBuilder) error {
return fb.CopyFile("/proc/1/mountinfo")
}

var klogRegexp = regexp.MustCompile(`<(\d+)>(.*)`)

func readAllDmesg() ([]byte, error) {
const syslogActionSizeBuffer = 10
const syslogActionReadAll = 3

n, err := syscall.Klogctl(syslogActionSizeBuffer, nil)
if err != nil {
return nil, fmt.Errorf("failed to query size of log buffer [%w]", err)
}

b := make([]byte, n)

m, err := syscall.Klogctl(syslogActionReadAll, b)
if err != nil {
return nil, fmt.Errorf("failed to read messages from log buffer [%w]", err)
}

return b[:m], nil
}

func parseDmesg(buffer []byte) (string, error) {
buf := bytes.NewBuffer(buffer)
var result string

for {
line, err := buf.ReadString('\n')
if err == io.EOF {
break
} else if err != nil {
return result, err
}

parts := klogRegexp.FindStringSubmatch(line)
if parts != nil {
result += parts[2] + "\n"
} else {
result += line
}
}

return result, nil
}

func getLinuxDmesg(fb flaretypes.FlareBuilder) error {
dmesg, err := readAllDmesg()
if err != nil {
return err
}

content, err := parseDmesg(dmesg)
if err != nil {
return err
}

return fb.AddFile("dmesg", []byte(content))
func getLinuxDmesg() ([]byte, error) {
sysProbeClient := sysprobeclient.Get(getSystemProbeSocketPath())
url := sysprobeclient.DebugURL("/dmesg")
return getHTTPData(sysProbeClient, url)
}

func getLinuxTracingAvailableEvents(fb flaretypes.FlareBuilder) error {
Expand Down
23 changes: 4 additions & 19 deletions pkg/flare/archive_nolinux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,11 @@ import (

func addSystemProbePlatformSpecificEntries(_ flaretypes.FlareBuilder) {}

func getLinuxKernelSymbols(_ flaretypes.FlareBuilder) error {
return nil
}
func addSecurityAgentPlatformSpecificEntries(_ flaretypes.FlareBuilder) {}

func getLinuxKprobeEvents(_ flaretypes.FlareBuilder) error {
return nil
}

func getLinuxDmesg(_ flaretypes.FlareBuilder) error {
return nil
}

func getLinuxPid1MountInfo(_ flaretypes.FlareBuilder) error {
return nil
}

func getLinuxTracingAvailableEvents(_ flaretypes.FlareBuilder) error {
return nil
}
// only used in tests when running on linux
var linuxKernelSymbols = getLinuxKernelSymbols //nolint:unused

func getLinuxTracingAvailableFilterFunctions(_ flaretypes.FlareBuilder) error {
func getLinuxKernelSymbols(_ flaretypes.FlareBuilder) error { //nolint:unused
return nil
}
11 changes: 2 additions & 9 deletions pkg/flare/archive_security.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ import (
"github.com/DataDog/datadog-agent/pkg/util/log"
)

// for testing purpose
var linuxKernelSymbols = getLinuxKernelSymbols

// CreateSecurityAgentArchive packages up the files
func CreateSecurityAgentArchive(local bool, logFilePath string, statusComponent status.Component) (string, error) {
fb, err := flarehelpers.NewFlareBuilder(local, flaretypes.FlareArgs{})
Expand Down Expand Up @@ -52,12 +49,8 @@ func createSecurityAgentArchive(fb flaretypes.FlareBuilder, logFilePath string,
getRuntimeFiles(fb) //nolint:errcheck
getExpVar(fb) //nolint:errcheck
fb.AddFileFromFunc("envvars.log", getEnvVars) //nolint:errcheck
linuxKernelSymbols(fb) //nolint:errcheck
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did we need to refactor this function?

getLinuxPid1MountInfo(fb) //nolint:errcheck
getLinuxDmesg(fb) //nolint:errcheck
getLinuxKprobeEvents(fb) //nolint:errcheck
getLinuxTracingAvailableEvents(fb) //nolint:errcheck
getLinuxTracingAvailableFilterFunctions(fb) //nolint:errcheck

addSecurityAgentPlatformSpecificEntries(fb)
}

func getComplianceFiles(fb flaretypes.FlareBuilder) error {
Expand Down
11 changes: 11 additions & 0 deletions releasenotes/notes/agent-flare-dmesg-d1de3cbb876c05d8.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
enhancements:
- |
Added the Linux kernel's dmesg logs into the Agent flare. This information will appear in ``system-probe/dmesg.log``.
Loading