diff --git a/docs/measurements.md b/docs/measurements.md index 0b38e966c..8f12bce39 100644 --- a/docs/measurements.md +++ b/docs/measurements.md @@ -17,6 +17,7 @@ Collects latencies from the different pod startup phases, these **latency metric This measurement sends its metrics to the index configured by *esIndex*. The metrics collected are pod latency histograms and pod latency quantiles P99, P95 and P50. Pod latency sample: + ```json { "timestamp": "2020-11-15T20:28:59.598727718Z", @@ -108,10 +109,13 @@ WARN[2020-12-15 12:37:08] P99 Ready latency (2929ms) higher than configured thre ## Pprof collection -This measurement takes care of collecting golang profiling information from pods. To do so, kube-burner connects to pods with the given labels running in certain namespaces. This measurement uses an implementation similar to `kubectl exec`, and as soon as it connects to one pod it executes the command `curl ` to get the pprof data. Pprof files are collected in a regular basis given by the parameter `pprofInterval` and these files are stored in the directory configured by the parameter `pprofDirectory` which by default is `pprof`. -It's also possible to configure a token to get pprof data from authenticated endoints such as kube-apiserver with the variable `bearerToken`. +This measurement takes care of collecting golang profiling information from pods. To do so, kube-burner connects to pods with the given labels running in certain namespaces. This measurement uses an implementation similar to `kubectl exec`, and as soon as it connects to one pod it executes the command `curl ` to get the pprof data. Pprof files are collected in a regular basis configured by the parameter `pprofInterval`, the collected pprof files are stored in the directory configured by the parameter `pprofDirectory` which by default is `pprof`. +As some components require authentication to get profiling information, `kube-burner` provides two different methods to address it: + +- bearerToken: This variable holds a valid Bearer token which used by cURL to get pprof data. This method is usually valid with kube-apiserver and kube-controller-managers components +- cert + key: These variables point to a local certificate and private key files respectively. These files are copied to the remote pods and used by cURL to get pprof data. This method is usually valid with etcd. -An example of how to configure this measurement to collect pprof HEAP and CPU profiling data from kube-apiserver is shown below: +An example of how to configure this measurement to collect pprof HEAP and CPU profiling data from kube-apiserver and etcd is shown below: ```yaml measurements: @@ -130,6 +134,13 @@ An example of how to configure this measurement to collect pprof HEAP and CPU pr labelSelector: {app: openshift-kube-apiserver} bearerToken: thisIsNotAValidToken url: https://localhost:6443/debug/pprof/profile?timeout=30 + + - name: etcd-heap + namespace: "openshift-etcd" + labelSelector: {app: etcd} + cert: etcd-peer-pert.crt + key: etcd-peer-pert.key + url: https://localhost:2379/debug/pprof/heap ``` -**Note**: As mentioned before, this measurement requires cURL to be installed in the target pods. +**Note**: As mentioned before, this measurement requires the `curl` command to be available in the target pods. diff --git a/pkg/config/types.go b/pkg/config/types.go index 724fcdc41..87e46c77c 100644 --- a/pkg/config/types.go +++ b/pkg/config/types.go @@ -81,6 +81,10 @@ type PProftarget struct { BearerToken string `yaml:"bearerToken"` // URL target URL URL string `yaml:"url"` + // Cert Client certificate file + Cert string `yaml:"cert"` + // Key Private key file + Key string `yaml:"key"` } // Measurement holds the measurement configuration diff --git a/pkg/measurements/pprof.go b/pkg/measurements/pprof.go index 2c9b68d17..982069a4a 100644 --- a/pkg/measurements/pprof.go +++ b/pkg/measurements/pprof.go @@ -15,8 +15,10 @@ package measurements import ( + "bytes" "context" "fmt" + "io" "os" "path" "sync" @@ -51,12 +53,14 @@ func (p *pprof) setConfig(cfg config.Measurement) { } func (p *pprof) start() { + var wg sync.WaitGroup err := os.MkdirAll(p.directory, 0744) if err != nil { log.Fatalf("Error creating pprof directory: %s", err) } p.stopChannel = make(chan bool) - p.getPProf() + p.getPProf(&wg, true) + wg.Wait() go func() { defer close(p.stopChannel) ticker := time.NewTicker(p.config.PProfInterval) @@ -64,8 +68,11 @@ func (p *pprof) start() { for { select { case <-ticker.C: - p.getPProf() + // Copy certificates only in the first iteration + p.getPProf(&wg, false) + wg.Wait() case <-p.stopChannel: + ticker.Stop() return } } @@ -81,10 +88,13 @@ func getPods(target config.PProftarget) []corev1.Pod { return podList.Items } -func (p *pprof) getPProf() { - var wg sync.WaitGroup +func (p *pprof) getPProf(wg *sync.WaitGroup, copyCerts bool) { var command []string for _, target := range p.config.PProfTargets { + if target.BearerToken != "" && target.Cert != "" { + log.Errorf("bearerToken and cert auth methods cannot be specified together, skipping pprof target") + continue + } log.Infof("Collecting %s pprof", target.Name) podList := getPods(target) for _, pod := range podList { @@ -93,15 +103,35 @@ func (p *pprof) getPProf() { defer wg.Done() pprofFile := fmt.Sprintf("%s-%s-%d.pprof", target.Name, pod.Name, time.Now().Unix()) f, err := os.Create(path.Join(p.directory, pprofFile)) + var stderr bytes.Buffer if err != nil { log.Errorf("Error creating pprof file %s: %s", pprofFile, err) return } defer f.Close() + if target.Cert != "" && target.Key != "" && copyCerts { + cert, privKey, err := readCerts(target.Cert, target.Key) + if err != nil { + log.Error(err) + return + } + defer cert.Close() + defer privKey.Close() + if err != nil { + log.Error(err) + return + } + if err = copyCertsToPod(pod, cert, privKey); err != nil { + log.Error(err) + return + } + } if target.BearerToken != "" { command = []string{"curl", "-sSLkH", fmt.Sprintf("Authorization: Bearer %s", target.BearerToken), target.URL} + } else if target.Cert != "" && target.Key != "" { + command = []string{"curl", "-sSLk", "--cert", "/tmp/pprof.crt", "--key", "/tmp/pprof.key", target.URL} } else { - command = []string{"curl", "-sSLkH", target.URL} + command = []string{"curl", "-sSLk", target.URL} } req := factory.clientSet.CoreV1(). RESTClient(). @@ -110,6 +140,7 @@ func (p *pprof) getPProf() { Name(pod.Name). Namespace(pod.Namespace). SubResource("exec") + log.Debugf("Collecting pprof using URL: %s", req.URL()) req.VersionedParams(&corev1.PodExecOptions{ Command: command, Container: pod.Spec.Containers[0].Name, @@ -117,6 +148,7 @@ func (p *pprof) getPProf() { Stderr: true, Stdout: true, }, scheme.ParameterCodec) + log.Debugf("Executing %s in pod %s", command, pod.Name) exec, err := remotecommand.NewSPDYExecutor(factory.restConfig, "POST", req.URL()) if err != nil { log.Errorf("Failed to execute pprof command on %s: %s", target.Name, err) @@ -124,10 +156,10 @@ func (p *pprof) getPProf() { err = exec.Stream(remotecommand.StreamOptions{ Stdin: nil, Stdout: f, - Stderr: f, + Stderr: &stderr, }) if err != nil { - log.Errorf("Failed to get results from %s: %s", target.Name, err) + log.Errorf("Failed to get pprof from %s: %s", pod.Name, stderr.String()) } }(target, pod) } @@ -139,3 +171,55 @@ func (p *pprof) stop() (int, error) { p.stopChannel <- true return 0, nil } + +func readCerts(cert, privKey string) (*os.File, *os.File, error) { + var certFd, privKeyFd *os.File + certFd, err := os.Open(cert) + if err != nil { + return certFd, privKeyFd, fmt.Errorf("Cannot read %s, skipping: %v", cert, err) + } + privKeyFd, err = os.Open(privKey) + if err != nil { + return certFd, privKeyFd, fmt.Errorf("Cannot read %s, skipping: %v", cert, err) + } + return certFd, privKeyFd, nil +} + +func copyCertsToPod(pod corev1.Pod, cert, privKey io.Reader) error { + var stderr bytes.Buffer + log.Infof("Copying certificate and private key into %s %s", pod.Name, pod.Spec.Containers[0].Name) + fMap := map[string]io.Reader{ + "/tmp/pprof.crt": cert, + "/tmp/pprof.key": privKey, + } + for dest, f := range fMap { + req := factory.clientSet.CoreV1(). + RESTClient(). + Post(). + Resource("pods"). + Name(pod.Name). + Namespace(pod.Namespace). + SubResource("exec") + req.VersionedParams(&corev1.PodExecOptions{ + Command: []string{"tee", dest}, + Container: pod.Spec.Containers[0].Name, + Stdin: true, + Stderr: true, + Stdout: false, + }, scheme.ParameterCodec) + exec, err := remotecommand.NewSPDYExecutor(factory.restConfig, "POST", req.URL()) + if err != nil { + return fmt.Errorf("Failed to establish SPDYExecutor on %s: %s", pod.Name, err) + } + err = exec.Stream(remotecommand.StreamOptions{ + Stdin: f, + Stdout: nil, + Stderr: &stderr, + }) + if err != nil { + return fmt.Errorf("Failed to copy file to %s: %s", pod.Name, stderr.Bytes()) + } + } + log.Infof("Certificate and private key copied into %s %s", pod.Name, pod.Spec.Containers[0].Name) + return nil +}