Skip to content

Commit

Permalink
Add certificate auth to pprof collection
Browse files Browse the repository at this point in the history
Signed-off-by: Raul Sevilla <[email protected]>
  • Loading branch information
rsevilla87 committed Apr 13, 2021
1 parent a4304fd commit 137bd1b
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 11 deletions.
19 changes: 15 additions & 4 deletions docs/measurements.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Collects latencies from the different pod startup phases, these **latency metric
This measurement sends its metrics to the index configured by *esIndex*. The metrics collected are pod latency histograms and pod latency quantiles P99, P95 and P50.
Pod latency sample:
```json
{
"timestamp": "2020-11-15T20:28:59.598727718Z",
Expand Down Expand Up @@ -108,10 +109,13 @@ WARN[2020-12-15 12:37:08] P99 Ready latency (2929ms) higher than configured thre
## Pprof collection
This measurement takes care of collecting golang profiling information from pods. To do so, kube-burner connects to pods with the given labels running in certain namespaces. This measurement uses an implementation similar to `kubectl exec`, and as soon as it connects to one pod it executes the command `curl <pprofURL>` to get the pprof data. Pprof files are collected in a regular basis given by the parameter `pprofInterval` and these files are stored in the directory configured by the parameter `pprofDirectory` which by default is `pprof`.
It's also possible to configure a token to get pprof data from authenticated endoints such as kube-apiserver with the variable `bearerToken`.
This measurement takes care of collecting golang profiling information from pods. To do so, kube-burner connects to pods with the given labels running in certain namespaces. This measurement uses an implementation similar to `kubectl exec`, and as soon as it connects to one pod it executes the command `curl <pprofURL>` to get the pprof data. Pprof files are collected in a regular basis configured by the parameter `pprofInterval`, the collected pprof files are stored in the directory configured by the parameter `pprofDirectory` which by default is `pprof`.
As some components require authentication to get profiling information, `kube-burner` provides two different methods to address it:

- bearerToken: This variable holds a valid Bearer token which used by cURL to get pprof data. This method is usually valid with kube-apiserver and kube-controller-managers components
- cert + key: These variables point to a local certificate and private key files respectively. These files are copied to the remote pods and used by cURL to get pprof data. This method is usually valid with etcd.

An example of how to configure this measurement to collect pprof HEAP and CPU profiling data from kube-apiserver is shown below:
An example of how to configure this measurement to collect pprof HEAP and CPU profiling data from kube-apiserver and etcd is shown below:

```yaml
measurements:
Expand All @@ -130,6 +134,13 @@ An example of how to configure this measurement to collect pprof HEAP and CPU pr
labelSelector: {app: openshift-kube-apiserver}
bearerToken: thisIsNotAValidToken
url: https://localhost:6443/debug/pprof/profile?timeout=30
- name: etcd-heap
namespace: "openshift-etcd"
labelSelector: {app: etcd}
cert: etcd-peer-pert.crt
key: etcd-peer-pert.key
url: https://localhost:2379/debug/pprof/heap
```

**Note**: As mentioned before, this measurement requires cURL to be installed in the target pods.
**Note**: As mentioned before, this measurement requires the `curl` command to be available in the target pods.
4 changes: 4 additions & 0 deletions pkg/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ type PProftarget struct {
BearerToken string `yaml:"bearerToken"`
// URL target URL
URL string `yaml:"url"`
// Cert Client certificate file
Cert string `yaml:"cert"`
// Key Private key file
Key string `yaml:"key"`
}

// Measurement holds the measurement configuration
Expand Down
98 changes: 91 additions & 7 deletions pkg/measurements/pprof.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
package measurements

import (
"bytes"
"context"
"fmt"
"io"
"os"
"path"
"sync"
Expand Down Expand Up @@ -51,21 +53,26 @@ func (p *pprof) setConfig(cfg config.Measurement) {
}

func (p *pprof) start() {
var wg sync.WaitGroup
err := os.MkdirAll(p.directory, 0744)
if err != nil {
log.Fatalf("Error creating pprof directory: %s", err)
}
p.stopChannel = make(chan bool)
p.getPProf()
p.getPProf(&wg, true)
wg.Wait()
go func() {
defer close(p.stopChannel)
ticker := time.NewTicker(p.config.PProfInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
p.getPProf()
// Copy certificates only in the first iteration
p.getPProf(&wg, false)
wg.Wait()
case <-p.stopChannel:
ticker.Stop()
return
}
}
Expand All @@ -81,10 +88,13 @@ func getPods(target config.PProftarget) []corev1.Pod {
return podList.Items
}

func (p *pprof) getPProf() {
var wg sync.WaitGroup
func (p *pprof) getPProf(wg *sync.WaitGroup, copyCerts bool) {
var command []string
for _, target := range p.config.PProfTargets {
if target.BearerToken != "" && target.Cert != "" {
log.Errorf("bearerToken and cert auth methods cannot be specified together, skipping pprof target")
continue
}
log.Infof("Collecting %s pprof", target.Name)
podList := getPods(target)
for _, pod := range podList {
Expand All @@ -93,15 +103,35 @@ func (p *pprof) getPProf() {
defer wg.Done()
pprofFile := fmt.Sprintf("%s-%s-%d.pprof", target.Name, pod.Name, time.Now().Unix())
f, err := os.Create(path.Join(p.directory, pprofFile))
var stderr bytes.Buffer
if err != nil {
log.Errorf("Error creating pprof file %s: %s", pprofFile, err)
return
}
defer f.Close()
if target.Cert != "" && target.Key != "" && copyCerts {
cert, privKey, err := readCerts(target.Cert, target.Key)
if err != nil {
log.Error(err)
return
}
defer cert.Close()
defer privKey.Close()
if err != nil {
log.Error(err)
return
}
if err = copyCertsToPod(pod, cert, privKey); err != nil {
log.Error(err)
return
}
}
if target.BearerToken != "" {
command = []string{"curl", "-sSLkH", fmt.Sprintf("Authorization: Bearer %s", target.BearerToken), target.URL}
} else if target.Cert != "" && target.Key != "" {
command = []string{"curl", "-sSLk", "--cert", "/tmp/pprof.crt", "--key", "/tmp/pprof.key", target.URL}
} else {
command = []string{"curl", "-sSLkH", target.URL}
command = []string{"curl", "-sSLk", target.URL}
}
req := factory.clientSet.CoreV1().
RESTClient().
Expand All @@ -110,24 +140,26 @@ func (p *pprof) getPProf() {
Name(pod.Name).
Namespace(pod.Namespace).
SubResource("exec")
log.Debugf("Collecting pprof using URL: %s", req.URL())
req.VersionedParams(&corev1.PodExecOptions{
Command: command,
Container: pod.Spec.Containers[0].Name,
Stdin: false,
Stderr: true,
Stdout: true,
}, scheme.ParameterCodec)
log.Debugf("Executing %s in pod %s", command, pod.Name)
exec, err := remotecommand.NewSPDYExecutor(factory.restConfig, "POST", req.URL())
if err != nil {
log.Errorf("Failed to execute pprof command on %s: %s", target.Name, err)
}
err = exec.Stream(remotecommand.StreamOptions{
Stdin: nil,
Stdout: f,
Stderr: f,
Stderr: &stderr,
})
if err != nil {
log.Errorf("Failed to get results from %s: %s", target.Name, err)
log.Errorf("Failed to get pprof from %s: %s", pod.Name, stderr.String())
}
}(target, pod)
}
Expand All @@ -139,3 +171,55 @@ func (p *pprof) stop() (int, error) {
p.stopChannel <- true
return 0, nil
}

func readCerts(cert, privKey string) (*os.File, *os.File, error) {
var certFd, privKeyFd *os.File
certFd, err := os.Open(cert)
if err != nil {
return certFd, privKeyFd, fmt.Errorf("Cannot read %s, skipping: %v", cert, err)
}
privKeyFd, err = os.Open(privKey)
if err != nil {
return certFd, privKeyFd, fmt.Errorf("Cannot read %s, skipping: %v", cert, err)
}
return certFd, privKeyFd, nil
}

func copyCertsToPod(pod corev1.Pod, cert, privKey io.Reader) error {
var stderr bytes.Buffer
log.Infof("Copying certificate and private key into %s %s", pod.Name, pod.Spec.Containers[0].Name)
fMap := map[string]io.Reader{
"/tmp/pprof.crt": cert,
"/tmp/pprof.key": privKey,
}
for dest, f := range fMap {
req := factory.clientSet.CoreV1().
RESTClient().
Post().
Resource("pods").
Name(pod.Name).
Namespace(pod.Namespace).
SubResource("exec")
req.VersionedParams(&corev1.PodExecOptions{
Command: []string{"tee", dest},
Container: pod.Spec.Containers[0].Name,
Stdin: true,
Stderr: true,
Stdout: false,
}, scheme.ParameterCodec)
exec, err := remotecommand.NewSPDYExecutor(factory.restConfig, "POST", req.URL())
if err != nil {
return fmt.Errorf("Failed to establish SPDYExecutor on %s: %s", pod.Name, err)
}
err = exec.Stream(remotecommand.StreamOptions{
Stdin: f,
Stdout: nil,
Stderr: &stderr,
})
if err != nil {
return fmt.Errorf("Failed to copy file to %s: %s", pod.Name, stderr.Bytes())
}
}
log.Infof("Certificate and private key copied into %s %s", pod.Name, pod.Spec.Containers[0].Name)
return nil
}

0 comments on commit 137bd1b

Please sign in to comment.