Skip to content

Commit

Permalink
[cluster-agent] Fix CWS Instrumentation metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
Gui774ume committed May 7, 2024
1 parent 46aac3c commit 7b563ef
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 28 deletions.
16 changes: 12 additions & 4 deletions pkg/clusteragent/admission/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,19 @@ var (
LibInjectionErrors = telemetry.NewCounterWithOpts("admission_webhooks", "library_injection_errors",
[]string{"language", "auto_detected", "injection_type"}, "Number of library injection failures by language and injection type",
telemetry.Options{NoDoubleUnderscoreSep: true})
CWSExecInstrumentationAttempts = telemetry.NewCounterWithOpts("admission_webhooks", "cws_exec_instrumentation_attempts",
[]string{"mode", "injected", "reason"}, "Number of exec requests instrumentation attempts by CWS Instrumentation mode",
CWSExecInstrumentationAttempts = telemetry.NewHistogramWithOpts(
"admission_webhooks",
"cws_exec_instrumentation_attempts",
[]string{"mode", "injected", "reason"},
"Distribution of exec requests instrumentation attempts by CWS Instrumentation mode",
prometheus.LinearBuckets(0, 1, 1),
telemetry.Options{NoDoubleUnderscoreSep: true})
CWSPodInstrumentationAttempts = telemetry.NewCounterWithOpts("admission_webhooks", "cws_pod_instrumentation_attempts",
[]string{"mode", "injected", "reason"}, "Number of pod requests instrumentation attempts by CWS Instrumentation mode",
CWSPodInstrumentationAttempts = telemetry.NewHistogramWithOpts(
"admission_webhooks",
"cws_pod_instrumentation_attempts",
[]string{"mode", "injected", "reason"},
"Distribution of pod requests instrumentation attempts by CWS Instrumentation mode",
prometheus.LinearBuckets(0, 1, 1),
telemetry.Options{NoDoubleUnderscoreSep: true})
RemoteConfigs = telemetry.NewGaugeWithOpts("admission_webhooks", "rc_provider_configs",
[]string{}, "Number of valid remote configurations.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,47 +408,47 @@ func (ci *CWSInstrumentation) injectCWSCommandInstrumentation(exec *corev1.PodEx

if exec == nil || userInfo == nil {
log.Errorf("cannot inject CWS instrumentation into nil exec options or nil userInfo")
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsNilInputReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsNilInputReason)
return false, errors.New(metrics.InvalidInput)
}
if len(exec.Command) == 0 {
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsNilCommandReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsNilCommandReason)
return false, nil
}

// ignore the copy command from this admission controller
if ci.mode == RemoteCopy {
if userInfo.Username == ci.clusterAgentServiceAccount {
log.Debugf("Ignoring exec request to %s from the cluster agent", name)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsClusterAgentServiceAccountReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsClusterAgentServiceAccountReason)
return false, nil
}

// fall back in case the service account filter somehow didn't work
if len(exec.Command) > len(k8scp.CWSRemoteCopyCommand) && slices.Equal(exec.Command[0:len(k8scp.CWSRemoteCopyCommand)], k8scp.CWSRemoteCopyCommand) {
log.Debugf("Ignoring kubectl cp requests to %s from the cluster agent", name)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsClusterAgentKubectlCPReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsClusterAgentKubectlCPReason)
return false, nil
}
}

// is the namespace / container targeted by the instrumentation ?
if ci.filter.IsExcluded(nil, exec.Container, "", ns) {
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsExcludedResourceReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsExcludedResourceReason)
return false, nil
}

// check if the pod has been instrumented
pod, err := apiClient.CoreV1().Pods(ns).Get(context.Background(), name, metav1.GetOptions{})
if err != nil || pod == nil {
log.Errorf("couldn't describe pod %s in namespace %s from the API server: %v", name, ns, err)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsDescribePodErrorReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsDescribePodErrorReason)
return false, errors.New(metrics.InternalError)
}

// is the pod targeted by the instrumentation ?
if ci.filter.IsExcluded(pod.Annotations, "", "", "") {
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsExcludedByAnnotationReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsExcludedByAnnotationReason)
return false, nil
}

Expand All @@ -461,7 +461,7 @@ func (ci *CWSInstrumentation) injectCWSCommandInstrumentation(exec *corev1.PodEx
if !isPodCWSInstrumentationReady(pod.Annotations) {
// pod isn't instrumented, do not attempt to override the pod exec command
log.Debugf("Ignoring exec request into %s, pod not instrumented yet", common.PodString(pod))
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsPodNotInstrumentedReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsPodNotInstrumentedReason)
return false, nil
}
case RemoteCopy:
Expand All @@ -470,7 +470,7 @@ func (ci *CWSInstrumentation) injectCWSCommandInstrumentation(exec *corev1.PodEx
if !isPodCWSInstrumentationReady(pod.Annotations) {
// pod isn't instrumented, do not attempt to override the pod exec command
log.Debugf("Ignoring exec request into %s, pod not instrumented yet", common.PodString(pod))
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsPodNotInstrumentedReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsPodNotInstrumentedReason)
return false, nil
}
cwsInstrumentationRemotePath = filepath.Join(cwsMountPath, "cws-instrumentation")
Expand All @@ -479,7 +479,7 @@ func (ci *CWSInstrumentation) injectCWSCommandInstrumentation(exec *corev1.PodEx
if readOnly := ci.hasReadonlyRootfs(pod, exec.Container); readOnly {
// readonly rootfs containers can't be instrumented
log.Errorf("Ignoring exec request into %s, container %s has read only rootfs. Try enabling admission_controller.cws_instrumentation.remote_copy.mount_volume", common.PodString(pod), exec.Container)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsReadonlyFilesystemReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsReadonlyFilesystemReason)
return false, errors.New(metrics.InvalidInput)
}
cwsInstrumentationRemotePath = "/cws-instrumentation"
Expand All @@ -488,43 +488,43 @@ func (ci *CWSInstrumentation) injectCWSCommandInstrumentation(exec *corev1.PodEx
arch, err := ci.resolveNodeArch(pod.Spec.NodeName, apiClient)
if err != nil {
log.Errorf("Ignoring exec request into %s: %v", common.PodString(pod), err)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsMissingArchReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsMissingArchReason)
return false, errors.New(metrics.InternalError)
}
cwsInstrumentationLocalPath := filepath.Join(cwsInstrumentationEmbeddedPath, "cws-instrumentation."+arch)

// check if the pod is ready to be exec-ed into
if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
log.Errorf("Ignoring exec request into %s: cannot exec into a container in a completed pod; current phase is %s", common.PodString(pod), pod.Status.Phase)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsCompletedPodReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsCompletedPodReason)
return false, errors.New(metrics.InvalidInput)
}

// check if the input container exists, or select the default one to which the user will be redirected
container, err := podcmd.FindOrDefaultContainerByName(pod, exec.Container, true, nil)
if err != nil {
log.Errorf("Ignoring exec request into %s, invalid container: %v", common.PodString(pod), err)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsInvalidInputContainerReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsInvalidInputContainerReason)
return false, errors.New(metrics.InvalidInput)
}

// copy CWS instrumentation directly to the target container
if err := ci.injectCWSCommandInstrumentationRemoteCopy(pod, container.Name, cwsInstrumentationLocalPath, cwsInstrumentationRemotePath); err != nil {
log.Errorf("Ignoring exec request into %s, remote copy failed: %v", common.PodString(pod), err)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsRemoteCopyFailedReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsRemoteCopyFailedReason)
return false, errors.New(metrics.InternalError)
}
default:
log.Errorf("Ignoring exec request into %s, unknown CWS Instrumentation mode %v", common.PodString(pod), ci.mode)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsUnknownModeReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsUnknownModeReason)
return false, errors.New(metrics.InvalidInput)
}

// prepare the user session context
userSessionCtx, err := usersessions.PrepareK8SUserSessionContext(userInfo, cwsUserSessionDataMaxSize)
if err != nil {
log.Debugf("ignoring instrumentation of %s: %v", common.PodString(pod), err)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsCredentialsSerializationErrorReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsCredentialsSerializationErrorReason)
return false, errors.New(metrics.InternalError)
}

Expand All @@ -539,7 +539,7 @@ func (ci *CWSInstrumentation) injectCWSCommandInstrumentation(exec *corev1.PodEx

if exec.Command[5] == string(userSessionCtx) {
log.Debugf("Exec request into %s is already instrumented, ignoring", common.PodString(pod))
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsAlreadyInstrumentedReason)
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsAlreadyInstrumentedReason)
return true, nil
}
}
Expand All @@ -557,7 +557,7 @@ func (ci *CWSInstrumentation) injectCWSCommandInstrumentation(exec *corev1.PodEx
}, exec.Command...)

log.Debugf("Pod exec request to %s by %s is now instrumented for CWS", common.PodString(pod), userInfo.Username)
metrics.CWSExecInstrumentationAttempts.Inc(ci.mode.String(), "true", "")
metrics.CWSExecInstrumentationAttempts.Observe(1, ci.mode.String(), "true", "")
injected = true

return injected, nil
Expand All @@ -580,19 +580,19 @@ func (ci *CWSInstrumentation) injectForPod(request *admission.MutateRequest) ([]
func (ci *CWSInstrumentation) injectCWSPodInstrumentation(pod *corev1.Pod, ns string, _ dynamic.Interface) (bool, error) {
if pod == nil {
log.Errorf("cannot inject CWS instrumentation into nil pod")
metrics.CWSPodInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsNilInputReason)
metrics.CWSPodInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsNilInputReason)
return false, errors.New(metrics.InvalidInput)
}

// is the pod targeted by the instrumentation ?
if ci.filter.IsExcluded(pod.Annotations, "", "", ns) {
metrics.CWSPodInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsExcludedResourceReason)
metrics.CWSPodInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsExcludedResourceReason)
return false, nil
}

// check if the pod has already been instrumented
if isPodCWSInstrumentationReady(pod.Annotations) {
metrics.CWSPodInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsAlreadyInstrumentedReason)
metrics.CWSPodInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsAlreadyInstrumentedReason)
// nothing to do, return
return true, nil
}
Expand All @@ -607,7 +607,7 @@ func (ci *CWSInstrumentation) injectCWSPodInstrumentation(pod *corev1.Pod, ns st
instrumented = ci.injectCWSPodInstrumentationRemoteCopy(pod)
default:
log.Errorf("Ignoring Pod %s admission request: unknown CWS Instrumentation mode %v", common.PodString(pod), ci.mode)
metrics.CWSPodInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsUnknownModeReason)
metrics.CWSPodInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsUnknownModeReason)
return false, errors.New(metrics.InvalidInput)
}

Expand All @@ -618,9 +618,9 @@ func (ci *CWSInstrumentation) injectCWSPodInstrumentation(pod *corev1.Pod, ns st
}
pod.Annotations[cwsInstrumentationPodAnotationStatus] = cwsInstrumentationPodAnotationReady
log.Debugf("Pod %s is now instrumented for CWS", common.PodString(pod))
metrics.CWSPodInstrumentationAttempts.Inc(ci.mode.String(), "true", "")
metrics.CWSPodInstrumentationAttempts.Observe(1, ci.mode.String(), "true", "")
} else {
metrics.CWSPodInstrumentationAttempts.Inc(ci.mode.String(), "false", cwsNoInstrumentationNeededReason)
metrics.CWSPodInstrumentationAttempts.Observe(1, ci.mode.String(), "false", cwsNoInstrumentationNeededReason)
}

return true, nil
Expand Down

0 comments on commit 7b563ef

Please sign in to comment.