From 4321f8ae67a95f152aa5e83074e1e06f588811c1 Mon Sep 17 00:00:00 2001 From: xavpaice Date: Tue, 12 Oct 2021 14:31:33 +1300 Subject: [PATCH] fix: patched intel rdt to allow sudo (#9527) Co-authored-by: Joe Guo --- plugins/inputs/intel_rdt/README.md | 27 +++++++++++++++++++ plugins/inputs/intel_rdt/intel_rdt.go | 38 +++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/plugins/inputs/intel_rdt/README.md b/plugins/inputs/intel_rdt/README.md index 8a0f0a1ea6e75..cc98c13b6c0e0 100644 --- a/plugins/inputs/intel_rdt/README.md +++ b/plugins/inputs/intel_rdt/README.md @@ -24,6 +24,29 @@ Note: pqos tool needs root privileges to work properly. Metrics will be constantly reported from the following `pqos` commands within the given interval: +#### If telegraf does not run as the root user + +The `pqos` binary needs to run as root. If telegraf is running as a non-root user, you may enable sudo +to allow `pqos` to run correctly. +The `pqos` command requires root level access to run. There are two options to +overcome this if you run telegraf as a non-root user. + +It is possible to update the pqos binary with setuid using `chmod u+s +/path/to/pqos`. This approach is simple and requires no modification to the +Telegraf configuration, however pqos is not a read-only tool and there are +security implications for making such a command setuid root. + +Alternately, you may enable sudo to allow `pqos` to run correctly, as follows: + +Add the following to your sudoers file (assumes telegraf runs as a user named `telegraf`): + +``` +telegraf ALL=(ALL) NOPASSWD:/usr/sbin/pqos -r --iface-os --mon-file-type=csv --mon-interval=* +``` + +If you wish to use sudo, you must also add `use_sudo = true` to the Telegraf +configuration (see below). + #### In case of cores monitoring: ``` pqos -r --iface-os --mon-file-type=csv --mon-interval=INTERVAL --mon-core=all:[CORES]\;mbt:[CORES] @@ -76,6 +99,10 @@ More about Intel RDT: https://www.intel.com/content/www/us/en/architecture-and-t ## Mandatory if cores aren't set and forbidden if cores are specified. ## e.g. ["qemu", "pmd"] # processes = ["process"] + + ## Specify if the pqos process should be called with sudo. + ## Mandatory if the telegraf process does not run as root. + # use_sudo = false ``` ### Exposed metrics diff --git a/plugins/inputs/intel_rdt/intel_rdt.go b/plugins/inputs/intel_rdt/intel_rdt.go index e0c7de526b067..486a13c98c535 100644 --- a/plugins/inputs/intel_rdt/intel_rdt.go +++ b/plugins/inputs/intel_rdt/intel_rdt.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "sync" + "syscall" "time" "github.com/google/go-cmp/cmp" @@ -46,6 +47,7 @@ type IntelRDT struct { Processes []string `toml:"processes"` SamplingInterval int32 `toml:"sampling_interval"` ShortenedMetrics bool `toml:"shortened_metrics"` + UseSudo bool `toml:"use_sudo"` Log telegraf.Logger `toml:"-"` Publisher Publisher `toml:"-"` @@ -97,6 +99,10 @@ func (r *IntelRDT) SampleConfig() string { ## Mandatory if cores aren't set and forbidden if cores are specified. ## e.g. ["qemu", "pmd"] # processes = ["process"] + + ## Specify if the pqos process should be called with sudo. + ## Mandatory if the telegraf process does not run as root. + # use_sudo = false ` } @@ -254,6 +260,12 @@ func (r *IntelRDT) readData(ctx context.Context, args []string, processesPIDsAss cmd := exec.Command(r.PqosPath, append(args)...) + if r.UseSudo { + // run pqos with `/bin/sh -c "sudo /path/to/pqos ..."` + args = []string{"-c", fmt.Sprintf("sudo %s %s", r.PqosPath, strings.Replace(strings.Join(args, " "), ";", "\\;", -1))} + cmd = exec.Command("/bin/sh", args...) + } + cmdReader, err := cmd.StdoutPipe() if err != nil { r.errorChan <- err @@ -334,14 +346,30 @@ func (r *IntelRDT) processOutput(cmdReader io.ReadCloser, processesPIDsAssociati } func shutDownPqos(pqos *exec.Cmd) error { + timeout := time.Second * 2 + if pqos.Process != nil { - err := pqos.Process.Signal(os.Interrupt) - if err != nil { - err = pqos.Process.Kill() - if err != nil { - return fmt.Errorf("failed to shut down pqos: %v", err) + // try to send interrupt signal, ignore err for now + _ = pqos.Process.Signal(os.Interrupt) + + // wait and constantly check if pqos is still running + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + for { + if err := pqos.Process.Signal(syscall.Signal(0)); err == os.ErrProcessDone { + return nil + } else if ctx.Err() != nil { + break } } + + // if pqos is still running after some period, try to kill it + // this will send SIGTERM to pqos, and leave garbage in `/sys/fs/resctrl/mon_groups` + // fixed in https://github.com/intel/intel-cmt-cat/issues/197 + err := pqos.Process.Kill() + if err != nil { + return fmt.Errorf("failed to shut down pqos: %v", err) + } } return nil }