diff --git a/cmd/beyla/main.go b/cmd/beyla/main.go index c129030aa..d8fcbe98f 100644 --- a/cmd/beyla/main.go +++ b/cmd/beyla/main.go @@ -52,6 +52,15 @@ func main() { os.Exit(-1) } + if err := beyla.CheckOSCapabilities(config); err != nil { + if config.EnforceSysCaps { + slog.Error("can't start Beyla", "error", err) + os.Exit(-1) + } + + slog.Warn("Required system capabilities not present, Beyla may malfunction", "error", err) + } + if config.ProfilePort != 0 { go func() { slog.Info("starting PProf HTTP listener", "port", config.ProfilePort) diff --git a/docs/sources/configure/options.md b/docs/sources/configure/options.md index 651eb88e5..c6988da2d 100644 --- a/docs/sources/configure/options.md +++ b/docs/sources/configure/options.md @@ -188,6 +188,18 @@ formats are: | `json` | prints a compact JSON object | | `json_indent` | prints an indented JSON object | +| YAML | Environment variable | Type | Default | +| ----------------- | ------------------------ | -------- | ---------- | +| `enforce_sys_caps` | `BEYLA_ENFORCE_SYS_CAPS` | boolean | `true` | + + + +If you have set the `enforce_sys_caps` to true, if the required system +capabilities are not present Beyla aborts its startup and logs a list of the +missing capabilities. + +If you have set the configuration option to `false`, Beyla logs a list of the +missing capabilities only. ## Service discovery @@ -213,7 +225,7 @@ namespace. For more details about this section, go to the [discovery services section](#discovery-services-section) of this document. -| YAML | Environment variable | Type | Default | +| YAML | Environment variable | Type | Default | | -------------------------- | -------------------------------- | ------- | ------- | | `skip_go_specific_tracers` | `BEYLA_SKIP_GO_SPECIFIC_TRACERS` | boolean | false | @@ -552,7 +564,7 @@ attributes: dns: false ``` -| YAML | Environment variable | Type | Default | +| YAML | Environment variable | Type | Default | | ----- | ------------------------------- | ------- | ------- | | `dns` | `BEYLA_HOSTNAME_DNS_RESOLUTION` | boolean | `true` | @@ -605,7 +617,7 @@ It is IMPORTANT to consider that enabling this feature requires a previous step providing some extra permissions to the Beyla Pod. Consult the ["Configuring Kubernetes metadata decoration section" in the "Running Beyla in Kubernetes"]({{< relref "../setup/kubernetes.md" >}}) page. -| YAML | Environment variable | Type | Default | +| YAML | Environment variable | Type | Default | | -------- | ---------------------------- | ------- | ------- | | `enable` | `BEYLA_KUBE_METADATA_ENABLE` | boolean | `false` | @@ -818,9 +830,9 @@ If this property is not provided, Beyla will guess it according to the following - Beyla will guess `http/protobuf` if the port ends in `4318` (`4318`, `14318`, `24318`, ...), as `4318` is the usual Port number for the OTEL HTTP collector. -| YAML | Environment variable | Type | Default | -| ---------------------- | --------------------------------- | ---- | ------- | -| `insecure_skip_verify` | `BEYLA_OTEL_INSECURE_SKIP_VERIFY` | bool | `false` | +| YAML | Environment variable | Type | Default | +| ---------------------- | --------------------------------- | ------- | ------- | +| `insecure_skip_verify` | `BEYLA_OTEL_INSECURE_SKIP_VERIFY` | boolean | `false` | Controls whether the OTEL client verifies the server's certificate chain and host name. If set to `true`, the OTEL client accepts any certificate presented by the server @@ -1018,9 +1030,9 @@ If this property is not provided, Beyla will guess it according to the following - Beyla will guess `http/protobuf` if the port ends in `4318` (`4318`, `14318`, `24318`, ...), as `4318` is the usual Port number for the OTEL HTTP collector. -| YAML | Environment variable | Type | Default | -| ---------------------- | --------------------------------- | ---- | ------- | -| `insecure_skip_verify` | `BEYLA_OTEL_INSECURE_SKIP_VERIFY` | bool | `false` | +| YAML | Environment variable | Type | Default | +| ---------------------- | --------------------------------- | ------- | ------- | +| `insecure_skip_verify` | `BEYLA_OTEL_INSECURE_SKIP_VERIFY` | boolean | `false` | Controls whether the OTEL client verifies the server's certificate chain and host name. If set to `true`, the OTEL client accepts any certificate presented by the server diff --git a/pkg/beyla/config.go b/pkg/beyla/config.go index c30fefdcd..23bf4f224 100644 --- a/pkg/beyla/config.go +++ b/pkg/beyla/config.go @@ -43,6 +43,7 @@ const ( var DefaultConfig = Config{ ChannelBufferLen: 10, LogLevel: "INFO", + EnforceSysCaps: true, EBPF: ebpfcommon.TracerConfig{ BatchLength: 100, BatchTimeout: time.Second, @@ -165,6 +166,11 @@ type Config struct { LogLevel string `yaml:"log_level" env:"BEYLA_LOG_LEVEL"` + // Check for required system capabilities and bail if they are not + // present. If set to 'false', Beyla will still print a list of missing + // capabilities, but the execution will continue + EnforceSysCaps bool `yaml:"enforce_sys_caps" env:"BEYLA_ENFORCE_SYS_CAPS"` + // From this comment, the properties below will remain undocumented, as they // are useful for development purposes. They might be helpful for customer support. diff --git a/pkg/beyla/config_test.go b/pkg/beyla/config_test.go index e496d17b8..ef2bb5353 100644 --- a/pkg/beyla/config_test.go +++ b/pkg/beyla/config_test.go @@ -106,6 +106,7 @@ network: ServiceName: "svc-name", ChannelBufferLen: 33, LogLevel: "INFO", + EnforceSysCaps: true, Printer: false, TracePrinter: "json", EBPF: ebpfcommon.TracerConfig{ diff --git a/pkg/beyla/os.go b/pkg/beyla/os.go index 771b7f71c..17d758c03 100644 --- a/pkg/beyla/os.go +++ b/pkg/beyla/os.go @@ -2,8 +2,12 @@ package beyla import ( "fmt" + "strings" + + "golang.org/x/sys/unix" ebpfcommon "github.com/grafana/beyla/pkg/internal/ebpf/common" + "github.com/grafana/beyla/pkg/internal/helpers" ) // Minimum required Kernel version: 5.8 @@ -21,3 +25,87 @@ func CheckOSSupport() error { } return nil } + +type osCapabilitiesError uint64 + +func (e *osCapabilitiesError) Set(c helpers.OSCapability) { + *e |= 1 << c +} + +func (e *osCapabilitiesError) Clear(c helpers.OSCapability) { + *e &= ^(1 << c) +} + +func (e osCapabilitiesError) IsSet(c helpers.OSCapability) bool { + return e&(1< 0 +} + +func (e osCapabilitiesError) Empty() bool { + return e == 0 +} + +func (e osCapabilitiesError) Error() string { + if e == 0 { + return "" + } + + var sb strings.Builder + + sb.WriteString("the following capabilities are required: ") + + sep := "" + + for i := helpers.OSCapability(0); i <= unix.CAP_LAST_CAP; i++ { + if e.IsSet(i) { + sb.WriteString(sep) + sb.WriteString(i.String()) + + sep = ", " + } + } + + return sb.String() +} + +func CheckOSCapabilities(config *Config) error { + caps, err := helpers.GetCurrentProcCapabilities() + + if err != nil { + return fmt.Errorf("unable to query OS capabilities: %w", err) + } + + var capError osCapabilitiesError + + testAndSet := func(c helpers.OSCapability) { + if !caps.Has(c) { + capError.Set(c) + } + } + + // core capabilities + testAndSet(unix.CAP_BPF) + testAndSet(unix.CAP_PERFMON) + testAndSet(unix.CAP_DAC_READ_SEARCH) + + major, minor := kernelVersion() + + // CAP_SYS_RESOURCE is only required on kernels < 5.11 + if (major == 5 && minor < 11) || (major < 5) { + testAndSet(unix.CAP_SYS_RESOURCE) + } + + if config.Enabled(FeatureAppO11y) { + testAndSet(unix.CAP_CHECKPOINT_RESTORE) + testAndSet(unix.CAP_SYS_PTRACE) + } + + if config.Enabled(FeatureNetO11y) { + testAndSet(unix.CAP_NET_RAW) + } + + if capError.Empty() { + return nil + } + + return capError +} diff --git a/pkg/beyla/os_test.go b/pkg/beyla/os_test.go index 048c00f67..ecee62592 100644 --- a/pkg/beyla/os_test.go +++ b/pkg/beyla/os_test.go @@ -1,10 +1,15 @@ package beyla import ( + "errors" "fmt" "testing" "github.com/stretchr/testify/assert" + "golang.org/x/sys/unix" + + "github.com/grafana/beyla/pkg/internal/helpers" + "github.com/grafana/beyla/pkg/services" ) type testCase struct { @@ -44,3 +49,109 @@ func TestCheckOSSupport_Unsupported(t *testing.T) { }) } } + +func TestOSCapabilitiesError_Empty(t *testing.T) { + var capErr osCapabilitiesError + + assert.True(t, capErr.Empty()) + assert.Equal(t, "", capErr.Error()) +} + +func TestOSCapabilitiesError_Set(t *testing.T) { + var capErr osCapabilitiesError + + for c := helpers.OSCapability(0); c <= unix.CAP_LAST_CAP; c++ { + assert.False(t, capErr.IsSet(c)) + capErr.Set(c) + assert.True(t, capErr.IsSet(c)) + capErr.Clear(c) + assert.False(t, capErr.IsSet(c)) + } +} + +func TestOSCapabilitiesError_ErrorString(t *testing.T) { + var capErr osCapabilitiesError + + assert.Equal(t, "", capErr.Error()) + + capErr.Set(unix.CAP_BPF) + + // no separator (,) + assert.Equal(t, "the following capabilities are required: CAP_BPF", capErr.Error()) + + capErr.Set(unix.CAP_NET_RAW) + + // capabilities appear in ascending order (they are just numeric + // constants) separated by a comma + assert.True(t, unix.CAP_NET_RAW < unix.CAP_BPF) + assert.Equal(t, "the following capabilities are required: CAP_NET_RAW, CAP_BPF", capErr.Error()) +} + +type capClass int + +const ( + capCore = capClass(iota + 1) + capApp + capNet +) + +type capTestData struct { + osCap helpers.OSCapability + class capClass + kernMaj int + kernMin int +} + +var capTests = []capTestData{ + {osCap: unix.CAP_BPF, class: capCore}, + {osCap: unix.CAP_PERFMON, class: capCore}, + {osCap: unix.CAP_DAC_READ_SEARCH, class: capCore}, + {osCap: unix.CAP_SYS_RESOURCE, class: capCore, kernMaj: 5, kernMin: 10}, + {osCap: unix.CAP_SYS_RESOURCE, class: capCore, kernMaj: 4, kernMin: 11}, + {osCap: unix.CAP_CHECKPOINT_RESTORE, class: capApp}, + {osCap: unix.CAP_SYS_PTRACE, class: capApp}, + {osCap: unix.CAP_NET_RAW, class: capNet}, +} + +func TestCheckOSCapabilities(t *testing.T) { + caps, err := helpers.GetCurrentProcCapabilities() + + assert.NoError(t, err) + + // assume this proc doesn't have any caps set (which is usually the case + // for non privileged processes) instead of turning this into a privileged + // test and manually dropping capabilities + assert.Zero(t, caps[0].Effective) + assert.Zero(t, caps[1].Effective) + + test := func(data *capTestData) { + overrideKernelVersion(testCase{data.kernMaj, data.kernMin}) + + cfg := Config{ + NetworkFlows: NetworkConfig{Enable: data.class == capNet}, + Discovery: services.DiscoveryConfig{SystemWide: data.class == capApp}, + } + + err := CheckOSCapabilities(&cfg) + + if !assert.Error(t, err) { + assert.FailNow(t, "CheckOSCapabilities() should have returned an error") + } + + var osCapErr osCapabilitiesError + + if !errors.As(err, &osCapErr) { + assert.Fail(t, "CheckOSCapabilities failed", err) + } + + assert.True(t, osCapErr.IsSet(data.osCap), + fmt.Sprintf("%s should be present in error", data.osCap.String())) + } + + for i := range capTests { + c := capTests[i] + t.Run(fmt.Sprintf("%s %d.%d", c.osCap.String(), c.kernMaj, c.kernMin), func(*testing.T) { + test(&c) + }) + } +} diff --git a/pkg/internal/discover/attacher_linux.go b/pkg/internal/discover/attacher_linux.go index 0fea98533..99025add5 100644 --- a/pkg/internal/discover/attacher_linux.go +++ b/pkg/internal/discover/attacher_linux.go @@ -6,6 +6,8 @@ import ( "github.com/cilium/ebpf/rlimit" "golang.org/x/sys/unix" + + "github.com/grafana/beyla/pkg/internal/helpers" ) func (ta *TraceAttacher) close() { @@ -46,6 +48,12 @@ func (ta *TraceAttacher) bpfMount(pinPath string) error { return err } if !mounted { + caps, err := helpers.GetCurrentProcCapabilities() + + if err == nil && !caps.Has(unix.CAP_SYS_ADMIN) { + return fmt.Errorf("beyla requires CAP_SYS_ADMIN in order to mount %s", pinPath) + } + return unix.Mount(pinPath, pinPath, "bpf", 0, "") } if !bpffsInstance { diff --git a/pkg/internal/ebpf/common/common.go b/pkg/internal/ebpf/common/common.go index cf2ccf370..994ab8427 100644 --- a/pkg/internal/ebpf/common/common.go +++ b/pkg/internal/ebpf/common/common.go @@ -13,8 +13,10 @@ import ( "github.com/cilium/ebpf" "github.com/cilium/ebpf/ringbuf" + "golang.org/x/sys/unix" "github.com/grafana/beyla/pkg/internal/goexec" + "github.com/grafana/beyla/pkg/internal/helpers" "github.com/grafana/beyla/pkg/internal/request" ) @@ -167,6 +169,13 @@ func SupportsContextPropagation(log *slog.Logger) bool { return true } + // bpf_probe_write_user(), used to inject the context, requires CAP_SYS_ADMIN + + if !hasCapSysAdmin() { + log.Info("trace context propagation disabled due to missing capability CAP_SYS_ADMIN") + return false + } + lockdown := KernelLockdownMode() if lockdown == KernelLockdownNone { @@ -221,6 +230,11 @@ func KernelLockdownMode() KernelLockdown { return KernelLockdownNone } +func hasCapSysAdmin() bool { + caps, err := helpers.GetCurrentProcCapabilities() + return err == nil && caps.Has(unix.CAP_SYS_ADMIN) +} + func cstr(chars []uint8) string { addrLen := bytes.IndexByte(chars, 0) if addrLen < 0 { diff --git a/pkg/internal/ebpf/grpc/grpc.go b/pkg/internal/ebpf/grpc/grpc.go index 5237b7b45..3b939827b 100644 --- a/pkg/internal/ebpf/grpc/grpc.go +++ b/pkg/internal/ebpf/grpc/grpc.go @@ -80,7 +80,8 @@ func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { loader = loadBpf_tp_debug } } else { - p.log.Info("Kernel in lockdown mode or older than 5.17, trace info propagation in gRPC headers is disabled.") + p.log.Info("Kernel in lockdown mode, missing CAP_SYS_ADMIN" + + " or older than 5.17, trace info propagation in gRPC headers is disabled.") } return loader() } diff --git a/pkg/internal/ebpf/nethttp/nethttp.go b/pkg/internal/ebpf/nethttp/nethttp.go index ae19c3b99..f3e0895df 100644 --- a/pkg/internal/ebpf/nethttp/nethttp.go +++ b/pkg/internal/ebpf/nethttp/nethttp.go @@ -79,7 +79,8 @@ func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { loader = loadBpf_tp_debug } } else { - p.log.Info("Kernel in lockdown mode, trace info propagation in HTTP headers is disabled.") + p.log.Info("Kernel in lockdown mode or missing CAP_SYS_ADMIN," + + " trace info propagation in HTTP headers is disabled.") } return loader() } diff --git a/pkg/internal/helpers/capabilities.go b/pkg/internal/helpers/capabilities.go new file mode 100644 index 000000000..a117c7070 --- /dev/null +++ b/pkg/internal/helpers/capabilities.go @@ -0,0 +1,96 @@ +package helpers + +import ( + "os" + + "golang.org/x/sys/unix" +) + +type OSCapability uint8 + +var capDesc = map[OSCapability]string{ + unix.CAP_AUDIT_CONTROL: "CAP_AUDIT_CONTROL", + unix.CAP_AUDIT_READ: "CAP_AUDIT_READ", + unix.CAP_AUDIT_WRITE: "CAP_AUDIT_WRITE", + unix.CAP_BLOCK_SUSPEND: "CAP_BLOCK_SUSPEND", + unix.CAP_BPF: "CAP_BPF", + unix.CAP_CHECKPOINT_RESTORE: "CAP_CHECKPOINT_RESTORE", + unix.CAP_CHOWN: "CAP_CHOWN", + unix.CAP_DAC_OVERRIDE: "CAP_DAC_OVERRIDE", + unix.CAP_DAC_READ_SEARCH: "CAP_DAC_READ_SEARCH", + unix.CAP_FOWNER: "CAP_FOWNER", + unix.CAP_FSETID: "CAP_FSETID", + unix.CAP_IPC_LOCK: "CAP_IPC_LOCK", + unix.CAP_IPC_OWNER: "CAP_IPC_OWNER", + unix.CAP_KILL: "CAP_KILL", + unix.CAP_LEASE: "CAP_LEASE", + unix.CAP_LINUX_IMMUTABLE: "CAP_LINUX_IMMUTABLE", + unix.CAP_MAC_ADMIN: "CAP_MAC_ADMIN", + unix.CAP_MAC_OVERRIDE: "CAP_MAC_OVERRIDE", + unix.CAP_MKNOD: "CAP_MKNOD", + unix.CAP_NET_ADMIN: "CAP_NET_ADMIN", + unix.CAP_NET_BIND_SERVICE: "CAP_NET_BIND_SERVICE", + unix.CAP_NET_BROADCAST: "CAP_NET_BROADCAST", + unix.CAP_NET_RAW: "CAP_NET_RAW", + unix.CAP_PERFMON: "CAP_PERFMON", + unix.CAP_SETFCAP: "CAP_SETFCAP", + unix.CAP_SETGID: "CAP_SETGID", + unix.CAP_SETPCAP: "CAP_SETPCAP", + unix.CAP_SETUID: "CAP_SETUID", + unix.CAP_SYSLOG: "CAP_SYSLOG", + unix.CAP_SYS_ADMIN: "CAP_SYS_ADMIN", + unix.CAP_SYS_BOOT: "CAP_SYS_BOOT", + unix.CAP_SYS_CHROOT: "CAP_SYS_CHROOT", + unix.CAP_SYS_MODULE: "CAP_SYS_MODULE", + unix.CAP_SYS_NICE: "CAP_SYS_NICE", + unix.CAP_SYS_PACCT: "CAP_SYS_PACCT", + unix.CAP_SYS_PTRACE: "CAP_SYS_PTRACE", + unix.CAP_SYS_RAWIO: "CAP_SYS_RAWIO", + unix.CAP_SYS_RESOURCE: "CAP_SYS_RESOURCE", + unix.CAP_SYS_TIME: "CAP_SYS_TIME", + unix.CAP_SYS_TTY_CONFIG: "CAP_SYS_TTY_CONFIG", + unix.CAP_WAKE_ALARM: "CAP_WAKE_ALARM", +} + +func (c OSCapability) String() string { + if str, ok := capDesc[c]; ok { + return str + } + + return "UNKNOWN" +} + +// From the capget(2) manpage: +// Note that 64-bit capabilities use datap[0] and datap[1], whereas 32-bit capabilities use only datap[0]. +type OSCapabilities [2]unix.CapUserData + +func capUserHeader() *unix.CapUserHeader { + return &unix.CapUserHeader{ + Version: unix.LINUX_CAPABILITY_VERSION_3, + Pid: int32(os.Getpid()), + } +} + +func GetCurrentProcCapabilities() (*OSCapabilities, error) { + caps := OSCapabilities{} + + err := unix.Capget(capUserHeader(), &caps[0]) + + return &caps, err +} + +func SetCurrentProcCapabilities(caps *OSCapabilities) error { + return unix.Capset(capUserHeader(), &caps[0]) +} + +func (caps *OSCapabilities) Has(c OSCapability) bool { + return ((*caps)[c>>5].Effective & (1 << (c & 31))) > 0 +} + +func (caps *OSCapabilities) Clear(c OSCapability) { + (*caps)[c>>5].Effective &= ^(1 << (c & 31)) +} + +func (caps *OSCapabilities) Set(c OSCapability) { + (*caps)[c>>5].Effective |= (1 << (c & 31)) +} diff --git a/pkg/internal/helpers/capabilities_linux_privilieged_test.go b/pkg/internal/helpers/capabilities_linux_privilieged_test.go new file mode 100644 index 000000000..1797fb782 --- /dev/null +++ b/pkg/internal/helpers/capabilities_linux_privilieged_test.go @@ -0,0 +1,71 @@ +package helpers + +import ( + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "golang.org/x/sys/unix" +) + +const privilegedEnv = "PRIVILEGED_TESTS" + +var expectedProcCaps *OSCapabilities +var errResetCaps error + +// This needs to run in the main thread (called by TestMain() below) +// capset() can fail with EPERM when called from a different thread. From the +// manpage: +// +// EPERM The caller attempted to use capset() to modify the capabilities of +// a thread other than itself, but lacked sufficient privilege. For kernels +// supporting VFS capabilities, this is never permitted. +// For kernels lacking VFS support, the CAP_SETPCAP capability is required. +// +// We need to drop capabilities to correctly test TestCheckOSCapabilities() +func resetProcCapabilities() { + var err error + + expectedProcCaps, err = GetCurrentProcCapabilities() + + errRef := &err + cleanup := func() { + if *errRef != nil { + errResetCaps = fmt.Errorf("failed to reset capabilities: %w", *errRef) + } + } + + defer cleanup() + + if err != nil { + return + } + + expectedProcCaps.Clear(unix.CAP_BPF) + expectedProcCaps.Set(unix.CAP_BPF) + + err = SetCurrentProcCapabilities(expectedProcCaps) +} + +func TestGetSetCurrentProcCaps(t *testing.T) { + if os.Getenv(privilegedEnv) == "" { + t.Skipf("Set %s to run this test\n", privilegedEnv) + } + + if errResetCaps != nil { + assert.Fail(t, errResetCaps.Error()) + } + + caps, err := GetCurrentProcCapabilities() + assert.NoError(t, err) + assert.Equal(t, expectedProcCaps, caps) +} + +func TestMain(m *testing.M) { + if os.Getenv(privilegedEnv) != "" { + resetProcCapabilities() + } + + os.Exit(m.Run()) +} diff --git a/pkg/internal/helpers/capabilities_test.go b/pkg/internal/helpers/capabilities_test.go new file mode 100644 index 000000000..30af8d07e --- /dev/null +++ b/pkg/internal/helpers/capabilities_test.go @@ -0,0 +1,30 @@ +package helpers + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestOSCapabilities_SetClear(t *testing.T) { + var caps OSCapabilities + + assert.Zero(t, caps[0]) + assert.Zero(t, caps[1]) + + for k := range capDesc { + assert.False(t, caps.Has(k)) + caps.Set(k) + assert.True(t, caps.Has(k)) + caps.Clear(k) + assert.False(t, caps.Has(k)) + } +} + +func TestOSCapabilities_String(t *testing.T) { + for k, str := range capDesc { + assert.Equal(t, str, k.String()) + } + + assert.Equal(t, "UNKNOWN", OSCapability(99).String()) +} diff --git a/test/integration/docker-compose-client.yml b/test/integration/docker-compose-client.yml index 46fa4e557..1f3b02369 100644 --- a/test/integration/docker-compose-client.yml +++ b/test/integration/docker-compose-client.yml @@ -33,6 +33,7 @@ services: BEYLA_METRICS_INTERVAL: "10ms" BEYLA_BPF_BATCH_TIMEOUT: "10ms" BEYLA_LOG_LEVEL: "DEBUG" + BEYLA_ENFORCE_SYS_CAPS: "false" BEYLA_BPF_DEBUG: "TRUE" BEYLA_METRICS_REPORT_TARGET: "true" BEYLA_METRICS_REPORT_PEER: "true"