Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CONTINT-3892] unify mutation metrics for admission controller webhooks #23770

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions pkg/clusteragent/admission/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@ const (
WebhooksControllerName = "webhooks"
)

// Mutation errors
const (
InvalidInput = "invalid_input"
InternalError = "internal_error"
ConfigInjectionError = "config_injection_error"
)

// Status tags
const (
StatusSuccess = "success"
StatusError = "error"
)

// Telemetry metrics
var (
ReconcileSuccess = telemetry.NewGaugeWithOpts("admission_webhooks", "reconcile_success",
Expand All @@ -32,10 +45,7 @@ var (
[]string{}, "Time left before the certificate expires in hours.",
telemetry.Options{NoDoubleUnderscoreSep: true})
MutationAttempts = telemetry.NewGaugeWithOpts("admission_webhooks", "mutation_attempts",
[]string{"mutation_type", "injected", "language", "auto_detected"}, "Number of pod mutation attempts by mutation type (agent config, standard tags, lib injection).",
telemetry.Options{NoDoubleUnderscoreSep: true})
MutationErrors = telemetry.NewGaugeWithOpts("admission_webhooks", "mutation_errors",
[]string{"mutation_type", "reason", "language", "auto_detected"}, "Number of mutation failures by mutation type (agent config, standard tags, lib injection).",
[]string{"mutation_type", "status", "injected", "error"}, "Number of pod mutation attempts by mutation type",
telemetry.Options{NoDoubleUnderscoreSep: true})
WebhooksReceived = telemetry.NewCounterWithOpts("admission_webhooks", "webhooks_received",
[]string{"mutation_type"}, "Number of mutation webhook requests received.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"k8s.io/client-go/dynamic"

"github.com/DataDog/datadog-agent/cmd/cluster-agent/admission"
"github.com/DataDog/datadog-agent/pkg/clusteragent/admission/metrics"
"github.com/DataDog/datadog-agent/pkg/clusteragent/admission/mutate/common"
"github.com/DataDog/datadog-agent/pkg/config"
apiCommon "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver/common"
Expand Down Expand Up @@ -107,36 +108,38 @@ func (w *Webhook) MutateFunc() admission.WebhookFunc {

// mutate handles mutating pod requests for the agentsidecar webhook
func (w *Webhook) mutate(request *admission.MutateRequest) ([]byte, error) {
return common.Mutate(request.Raw, request.Namespace, w.injectAgentSidecar, request.DynamicClient)
return common.Mutate(request.Raw, request.Namespace, w.Name(), w.injectAgentSidecar, request.DynamicClient)
}

func (w *Webhook) injectAgentSidecar(pod *corev1.Pod, _ string, _ dynamic.Interface) error {
func (w *Webhook) injectAgentSidecar(pod *corev1.Pod, _ string, _ dynamic.Interface) (bool, error) {
if pod == nil {
return errors.New("can't inject agent sidecar into nil pod")
return false, errors.New(metrics.InvalidInput)
}

for _, container := range pod.Spec.Containers {
if container.Name == agentSidecarContainerName {
log.Info("skipping agent sidecar injection: agent sidecar already exists")
return nil
return false, nil
}
}

agentSidecarContainer := getDefaultSidecarTemplate(w.containerRegistry)

err := applyProviderOverrides(agentSidecarContainer)
if err != nil {
return err
log.Errorf("Failed to apply provider overrides: %v", err)
return false, errors.New(metrics.InvalidInput)
}

// User-provided overrides should always be applied last in order to have highest override-priority
err = applyProfileOverrides(agentSidecarContainer)
if err != nil {
return err
log.Errorf("Failed to apply profile overrides: %v", err)
return false, errors.New(metrics.InvalidInput)
}

pod.Spec.Containers = append(pod.Spec.Containers, *agentSidecarContainer)
return nil
return true, nil
}

func getDefaultSidecarTemplate(containerRegistry string) *corev1.Container {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ package agentsidecar

import (
"fmt"
"github.com/DataDog/datadog-agent/pkg/config"
apicommon "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver/common"
"reflect"
"testing"

"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"reflect"
"testing"

"github.com/DataDog/datadog-agent/pkg/config"
apicommon "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver/common"
)

const commonRegistry = "gcr.io/datadoghq"
Expand All @@ -28,6 +30,7 @@ func TestInjectAgentSidecar(t *testing.T) {
provider string
profilesJSON string
ExpectError bool
ExpectInjection bool
ExpectedPodAfterInjection func() *corev1.Pod
}{
{
Expand All @@ -36,6 +39,7 @@ func TestInjectAgentSidecar(t *testing.T) {
provider: "",
profilesJSON: "",
ExpectError: true,
ExpectInjection: false,
ExpectedPodAfterInjection: func() *corev1.Pod { return nil },
},
{
Expand All @@ -50,9 +54,10 @@ func TestInjectAgentSidecar(t *testing.T) {
},
},
},
provider: "",
profilesJSON: "[]",
ExpectError: false,
provider: "",
profilesJSON: "[]",
ExpectError: false,
ExpectInjection: true,
ExpectedPodAfterInjection: func() *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -80,9 +85,10 @@ func TestInjectAgentSidecar(t *testing.T) {
},
},
},
provider: "",
profilesJSON: "[]",
ExpectError: false,
provider: "",
profilesJSON: "[]",
ExpectError: false,
ExpectInjection: false,
ExpectedPodAfterInjection: func() *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -110,9 +116,10 @@ func TestInjectAgentSidecar(t *testing.T) {
},
},
},
provider: "",
profilesJSON: "[]",
ExpectError: false,
provider: "",
profilesJSON: "[]",
ExpectError: false,
ExpectInjection: false,
ExpectedPodAfterInjection: func() *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -139,9 +146,10 @@ func TestInjectAgentSidecar(t *testing.T) {
},
},
},
provider: "fargate",
profilesJSON: "[]",
ExpectError: false,
provider: "fargate",
profilesJSON: "[]",
ExpectError: false,
ExpectInjection: true,
ExpectedPodAfterInjection: func() *corev1.Pod {
sidecar := *getDefaultSidecarTemplate(commonRegistry)
withEnvOverrides(&sidecar, corev1.EnvVar{
Expand Down Expand Up @@ -191,7 +199,8 @@ func TestInjectAgentSidecar(t *testing.T) {
}
}
}]`,
ExpectError: false,
ExpectError: false,
ExpectInjection: true,
ExpectedPodAfterInjection: func() *corev1.Pod {
sidecar := *getDefaultSidecarTemplate(commonRegistry)

Expand Down Expand Up @@ -231,14 +240,20 @@ func TestInjectAgentSidecar(t *testing.T) {

webhook := NewWebhook()

err := webhook.injectAgentSidecar(test.Pod, "", nil)
injected, err := webhook.injectAgentSidecar(test.Pod, "", nil)

if test.ExpectError {
assert.Error(tt, err, "expected non-nil error to be returned")
} else {
assert.NoError(tt, err, "expected returned error to be nil")
}

if test.ExpectInjection {
assert.True(t, injected)
} else {
assert.False(t, injected)
}

expectedPod := test.ExpectedPodAfterInjection()
if expectedPod == nil {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ func (w *Webhook) MutateFunc() admission.WebhookFunc {

// injectAutoInstrumentation injects APM libraries into pods
func (w *Webhook) injectAutoInstrumentation(request *admission.MutateRequest) ([]byte, error) {
return mutatecommon.Mutate(request.Raw, request.Namespace, w.inject, request.DynamicClient)
return mutatecommon.Mutate(request.Raw, request.Namespace, w.Name(), w.inject, request.DynamicClient)
}

func initContainerName(lang language) string {
Expand All @@ -283,34 +283,34 @@ func libImageName(registry string, lang language, tag string) string {
return fmt.Sprintf(imageFormat, registry, lang, tag)
}

func (w *Webhook) inject(pod *corev1.Pod, _ string, _ dynamic.Interface) error {
func (w *Webhook) inject(pod *corev1.Pod, _ string, _ dynamic.Interface) (bool, error) {
if pod == nil {
return errors.New("cannot inject lib into nil pod")
return false, errors.New(metrics.InvalidInput)
}
injectApmTelemetryConfig(pod)

if w.isEnabledInNamespace(pod.Namespace) {
// if Single Step Instrumentation is enabled, pods can still opt out using the label
if pod.GetLabels()[common.EnabledLabelKey] == "false" {
log.Debugf("Skipping single step instrumentation of pod %q due to label", mutatecommon.PodString(pod))
return nil
return false, nil
}
} else if !mutatecommon.ShouldMutatePod(pod) {
log.Debugf("Skipping auto instrumentation of pod %q because pod mutation is not allowed", mutatecommon.PodString(pod))
return nil
return false, nil
}
for _, lang := range supportedLanguages {
if containsInitContainer(pod, initContainerName(lang)) {
// The admission can be reinvocated for the same pod
// Fast return if we injected the library already
log.Debugf("Init container %q already exists in pod %q", initContainerName(lang), mutatecommon.PodString(pod))
return nil
return false, nil
}
}

libsToInject, autoDetected := w.extractLibInfo(pod)
if len(libsToInject) == 0 {
return nil
return false, nil
}
// Inject env variables used for Onboarding KPIs propagation
var injectionType string
Expand All @@ -324,7 +324,12 @@ func (w *Webhook) inject(pod *corev1.Pod, _ string, _ dynamic.Interface) error {
injectionType = localLibraryInstrumentationInstallType
}

return w.injectAutoInstruConfig(pod, libsToInject, autoDetected, injectionType)
if err := w.injectAutoInstruConfig(pod, libsToInject, autoDetected, injectionType); err != nil {
log.Errorf("failed to inject auto instrumentation configurations: %v", err)
return false, errors.New(metrics.ConfigInjectionError)
}

return true, nil
}

func injectApmTelemetryConfig(pod *corev1.Pod) {
Expand Down Expand Up @@ -579,7 +584,6 @@ func (w *Webhook) injectAutoInstruConfig(pod *corev1.Pod, libsToInject []libInfo
langStr := string(lib.lang)
defer func() {
metrics.LibInjectionAttempts.Inc(langStr, strconv.FormatBool(injected), strconv.FormatBool(autoDetected), injectionType)
metrics.MutationAttempts.Inc(w.Name(), strconv.FormatBool(injected), langStr, strconv.FormatBool(autoDetected))
}()

_ = mutatecommon.InjectEnv(pod, localLibraryInstrumentationInstallTypeEnvVar)
Expand Down Expand Up @@ -637,14 +641,12 @@ func (w *Webhook) injectAutoInstruConfig(pod *corev1.Pod, libsToInject []libInfo
}})
default:
metrics.LibInjectionErrors.Inc(langStr, strconv.FormatBool(autoDetected), injectionType)
metrics.MutationErrors.Inc(w.Name(), "unsupported language", langStr, strconv.FormatBool(autoDetected))
lastError = fmt.Errorf("language %q is not supported. Supported languages are %v", lib.lang, supportedLanguages)
continue
}

if err != nil {
metrics.LibInjectionErrors.Inc(langStr, strconv.FormatBool(autoDetected), injectionType)
metrics.MutationErrors.Inc(w.Name(), "requirements config error", langStr, strconv.FormatBool(autoDetected))
lastError = err
log.Errorf("Error injecting library config requirements: %s", err)
}
Expand All @@ -659,15 +661,13 @@ func (w *Webhook) injectAutoInstruConfig(pod *corev1.Pod, libsToInject []libInfo
if err != nil {
langStr := string(lang)
metrics.LibInjectionErrors.Inc(langStr, strconv.FormatBool(autoDetected), injectionType)
metrics.MutationErrors.Inc(w.Name(), "cannot inject init container", langStr, strconv.FormatBool(autoDetected))
lastError = err
log.Errorf("Cannot inject init container into pod %s: %s", mutatecommon.PodString(pod), err)
}
err = injectLibConfig(pod, lang)
if err != nil {
langStr := string(lang)
metrics.LibInjectionErrors.Inc(langStr, strconv.FormatBool(autoDetected), injectionType)
metrics.MutationErrors.Inc(w.Name(), "cannot inject lib config", langStr, strconv.FormatBool(autoDetected))
lastError = err
log.Errorf("Cannot inject library configuration into pod %s: %s", mutatecommon.PodString(pod), err)
}
Expand All @@ -676,7 +676,6 @@ func (w *Webhook) injectAutoInstruConfig(pod *corev1.Pod, libsToInject []libInfo
// try to inject all if the annotation is set
if err := injectLibConfig(pod, "all"); err != nil {
metrics.LibInjectionErrors.Inc("all", strconv.FormatBool(autoDetected), injectionType)
metrics.MutationErrors.Inc(w.Name(), "cannot inject lib config", "all", strconv.FormatBool(autoDetected))
lastError = err
log.Errorf("Cannot inject library configuration into pod %s: %s", mutatecommon.PodString(pod), err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1877,7 +1877,7 @@ func TestInjectAutoInstrumentation(t *testing.T) {
apmInstrumentationWebhook, errInitAPMInstrumentation = NewWebhook(wmeta)
require.NoError(t, errInitAPMInstrumentation)

err := apmInstrumentationWebhook.inject(tt.pod, "", fake.NewSimpleDynamicClient(scheme.Scheme))
_, err := apmInstrumentationWebhook.inject(tt.pod, "", fake.NewSimpleDynamicClient(scheme.Scheme))
require.False(t, (err != nil) != tt.wantErr)

container := tt.pod.Spec.Containers[0]
Expand Down
12 changes: 8 additions & 4 deletions pkg/clusteragent/admission/mutate/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,33 @@ package common
import (
"encoding/json"
"fmt"
"strconv"

"github.com/wI2L/jsondiff"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/dynamic"

admCommon "github.com/DataDog/datadog-agent/pkg/clusteragent/admission/common"
"github.com/DataDog/datadog-agent/pkg/clusteragent/admission/metrics"
"github.com/DataDog/datadog-agent/pkg/config"
"github.com/DataDog/datadog-agent/pkg/util/log"
)

// MutationFunc is a function that mutates a pod
type MutationFunc func(*corev1.Pod, string, dynamic.Interface) error
type MutationFunc func(*corev1.Pod, string, dynamic.Interface) (bool, error)

// Mutate handles mutating pods and encoding and decoding admission
// requests and responses for the public mutate functions
func Mutate(rawPod []byte, ns string, m MutationFunc, dc dynamic.Interface) ([]byte, error) {
func Mutate(rawPod []byte, ns string, mutationType string, m MutationFunc, dc dynamic.Interface) ([]byte, error) {
var pod corev1.Pod
if err := json.Unmarshal(rawPod, &pod); err != nil {
return nil, fmt.Errorf("failed to decode raw object: %v", err)
}

if err := m(&pod, ns, dc); err != nil {
return nil, err
if injected, err := m(&pod, ns, dc); err != nil {
metrics.MutationAttempts.Inc(mutationType, metrics.StatusError, strconv.FormatBool(injected), err.Error())
} else {
metrics.MutationAttempts.Inc(mutationType, metrics.StatusSuccess, strconv.FormatBool(injected), "")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happen if we provided an empty error type. Do we still have a tag like error: or, the Inc() function will not put the error tag?

should we do instead?

Suggested change
metrics.MutationAttempts.Inc(mutationType, metrics.StatusSuccess, strconv.FormatBool(injected), "")
metrics.MutationAttempts.Inc(mutationType, metrics.StatusSuccess, strconv.FormatBool(injected))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if the metrics.MutationAttempts field should be created with NewSimpleCounterWithOpts()instead of a gauge. AnSimple` to not have a fix number of tags

}

bytes, err := json.Marshal(pod)
Expand Down
Loading
Loading