From 6e80c052674f6d14dfb1e5a2b071810541fd3f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Wei=C3=9Fe?= Date: Thu, 23 May 2024 17:16:04 +0200 Subject: [PATCH] coordinator: make metrics endpoint configurable --- coordinator/main.go | 16 ++++++++++++++-- docs/docs/architecture/observability.md | 11 +++++------ e2e/internal/contrasttest/contrasttest.go | 1 + e2e/openssl/openssl_test.go | 8 +++++++- internal/kuberesource/mutators.go | 23 ++++++++++++++++++++++- internal/kuberesource/parts.go | 5 +---- 6 files changed, 50 insertions(+), 14 deletions(-) diff --git a/coordinator/main.go b/coordinator/main.go index 1846a3bb6..771173549 100644 --- a/coordinator/main.go +++ b/coordinator/main.go @@ -19,6 +19,10 @@ import ( "golang.org/x/sync/errgroup" ) +const ( + metricsPortEnvVar = "CONTRAST_METRICS_PORT" +) + func main() { if err := run(); err != nil { os.Exit(1) @@ -43,6 +47,8 @@ func run() (retErr error) { return fmt.Errorf("setting up mount: %w", err) } + metricsPort := os.Getenv(metricsPortEnvVar) + caInstance, err := ca.New() if err != nil { return fmt.Errorf("creating CA: %w", err) @@ -57,7 +63,13 @@ func run() (retErr error) { eg := errgroup.Group{} eg.Go(func() error { - logger.Info("Starting prometheus /metrics endpoint") + if metricsPort == "" { + return nil + } + if metricsPort == userapi.Port || metricsPort == meshapi.Port { + return fmt.Errorf("invalid port for metrics endpoint: %s", metricsPort) + } + logger.Info("Starting prometheus /metrics endpoint on port " + metricsPort) mux := http.NewServeMux() mux.Handle("/metrics", promhttp.InstrumentMetricHandler( promRegistry, promhttp.HandlerFor( @@ -65,7 +77,7 @@ func run() (retErr error) { promhttp.HandlerOpts{Registry: promRegistry}, ), )) - if err := http.ListenAndServe(":9102", mux); err != nil { + if err := http.ListenAndServe(":"+metricsPort, mux); err != nil { return fmt.Errorf("serving Prometheus endpoint: %w", err) } return nil diff --git a/docs/docs/architecture/observability.md b/docs/docs/architecture/observability.md index 189caa709..fc71fbffa 100644 --- a/docs/docs/architecture/observability.md +++ b/docs/docs/architecture/observability.md @@ -1,6 +1,6 @@ # Observability -The Contrast Coordinator exposes metrics in the +The Contrast Coordinator can expose metrics in the [Prometheus](https://prometheus.io/) format. These can be monitored to quickly identify problems in the gRPC layer or attestation errors. Prometheus metrics are numerical values associated with a name and additional key/values pairs, @@ -8,11 +8,10 @@ called labels. ## Exposed metrics -The Coordinator pod has the annotation `prometheus.io/scrape` set to `true` so -it can be found by the [service discovery of -Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config). -The metrics can be accessed at the Coordinator pod at port `9102` under the -`/metrics` endpoint. +The metrics can be accessed at the Coordinator pod at the port specified in the +`CONTRAST_METRICS_PORT` environment variable under the `/metrics` endpoint. By +default, this environment variable isn't specified, hence no metrics will be +exposed. The Coordinator starts two gRPC servers, one for the user API on port `1313` and one for the mesh API on port `7777`. Metrics for both servers can be accessed diff --git a/e2e/internal/contrasttest/contrasttest.go b/e2e/internal/contrasttest/contrasttest.go index 58a9c4eda..fbe611c60 100644 --- a/e2e/internal/contrasttest/contrasttest.go +++ b/e2e/internal/contrasttest/contrasttest.go @@ -96,6 +96,7 @@ func (ct *ContrastTest) Init(t *testing.T, resources []any) { resources = kuberesource.PatchImages(resources, ct.ImageReplacements) resources = kuberesource.PatchNamespaces(resources, ct.Namespace) resources = kuberesource.PatchServiceMeshAdminInterface(resources, 9901) + resources = kuberesource.PatchCoordinatorMetrics(resources, 9102) resources = kuberesource.AddLogging(resources, "debug") unstructuredResources, err := kuberesource.ResourcesToUnstructured(resources) require.NoError(err) diff --git a/e2e/openssl/openssl_test.go b/e2e/openssl/openssl_test.go index 9f86d5228..36ae39414 100644 --- a/e2e/openssl/openssl_test.go +++ b/e2e/openssl/openssl_test.go @@ -12,6 +12,7 @@ import ( "encoding/json" "flag" "log" + "net" "os" "testing" "time" @@ -64,7 +65,12 @@ func TestOpenSSL(t *testing.T) { require.NoError(err) require.Len(frontendPods, 1, "pod not found: %s/%s", ct.Namespace, opensslFrontend) - _, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, frontendPods[0].Name, []string{"/bin/bash", "-c", "curl --fail coordinator:9102/metrics"}) + coordinatorPods, err := ct.Kubeclient.PodsFromOwner(ctx, ct.Namespace, "StatefulSet", "coordinator") + require.NoError(err) + require.NotEmpty(coordinatorPods, "pod not found: %s/%s", ct.Namespace, "coordinator") + + argv := []string{"/bin/bash", "-c", "curl --fail " + net.JoinHostPort(coordinatorPods[0].Status.PodIP, "9102") + "/metrics"} + _, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, frontendPods[0].Name, argv) require.NoError(err, "stderr: %q", stderr) }) diff --git a/internal/kuberesource/mutators.go b/internal/kuberesource/mutators.go index 359852f7f..cc6b2b8bf 100644 --- a/internal/kuberesource/mutators.go +++ b/internal/kuberesource/mutators.go @@ -12,7 +12,10 @@ import ( applycorev1 "k8s.io/client-go/applyconfigurations/core/v1" ) -const exposeServiceAnnotation = "contrast.edgeless.systems/expose-service" +const ( + exposeServiceAnnotation = "contrast.edgeless.systems/expose-service" + contrastRoleAnnotationKey = "contrast.edgeless.systems/pod-role" +) // AddInitializer adds an initializer and its shared volume to the resource. // @@ -194,6 +197,24 @@ func PatchServiceMeshAdminInterface(resources []any, port int32) []any { return resources } +// PatchCoordinatorMetrics enables Coordinator metrics on the specified port. +func PatchCoordinatorMetrics(resources []any, port int32) []any { + for _, resource := range resources { + switch r := resource.(type) { + case *applyappsv1.StatefulSetApplyConfiguration: + if r.Spec.Template.Annotations[contrastRoleAnnotationKey] == "coordinator" { + r.Spec.Template.Spec.Containers[0].WithEnv(NewEnvVar("CONTRAST_METRICS_PORT", fmt.Sprint(port))) + r.Spec.Template.Spec.Containers[0].WithPorts( + ContainerPort(). + WithName("prometheus"). + WithContainerPort(port), + ) + } + } + } + return resources +} + // MapPodSpec applies a function to a PodSpec in a Kubernetes resource. func MapPodSpec(resource any, f func(spec *applycorev1.PodSpecApplyConfiguration) *applycorev1.PodSpecApplyConfiguration) any { if resource == nil { diff --git a/internal/kuberesource/parts.go b/internal/kuberesource/parts.go index 789c2e7f9..8cd4d8b05 100644 --- a/internal/kuberesource/parts.go +++ b/internal/kuberesource/parts.go @@ -147,7 +147,7 @@ func Coordinator(namespace string) *CoordinatorConfig { WithWhenScaled(appsv1.DeletePersistentVolumeClaimRetentionPolicyType)). // TODO(burgerdev): this should be RETAIN for released coordinators. WithTemplate(PodTemplateSpec(). WithLabels(map[string]string{"app.kubernetes.io/name": "coordinator"}). - WithAnnotations(map[string]string{"contrast.edgeless.systems/pod-role": "coordinator", "prometheus.io/scrape": "true"}). + WithAnnotations(map[string]string{"contrast.edgeless.systems/pod-role": "coordinator"}). WithSpec(PodSpec(). WithRuntimeClassName(runtimeHandler). WithContainers( @@ -170,9 +170,6 @@ func Coordinator(namespace string) *CoordinatorConfig { ContainerPort(). WithName("meshapi"). WithContainerPort(7777), - ContainerPort(). - WithName("prometheus"). - WithContainerPort(9102), ). WithReadinessProbe(Probe(). WithInitialDelaySeconds(1).