diff --git a/coordinator/main.go b/coordinator/main.go index 862edd0d8b..8ccc08b583 100644 --- a/coordinator/main.go +++ b/coordinator/main.go @@ -18,6 +18,10 @@ import ( "golang.org/x/sync/errgroup" ) +const ( + metricsPortEnvVar = "EDG_METRICS_PORT" +) + func main() { if err := run(); err != nil { os.Exit(1) @@ -38,6 +42,8 @@ func run() (retErr error) { logger.Info("Coordinator started") + metricsPort := os.Getenv(metricsPortEnvVar) + caInstance, err := ca.New() if err != nil { return fmt.Errorf("creating CA: %w", err) @@ -52,7 +58,13 @@ func run() (retErr error) { eg := errgroup.Group{} eg.Go(func() error { - logger.Info("Starting prometheus /metrics endpoint") + if metricsPort == "" { + return nil + } + if metricsPort == userapi.Port || metricsPort == meshapi.Port { + return fmt.Errorf("invalid port for metrics endpoint: %s", metricsPort) + } + logger.Info("Starting prometheus /metrics endpoint on port " + metricsPort) mux := http.NewServeMux() mux.Handle("/metrics", promhttp.InstrumentMetricHandler( promRegistry, promhttp.HandlerFor( @@ -60,7 +72,7 @@ func run() (retErr error) { promhttp.HandlerOpts{Registry: promRegistry}, ), )) - if err := http.ListenAndServe(":9102", mux); err != nil { + if err := http.ListenAndServe(":"+metricsPort, mux); err != nil { return fmt.Errorf("serving Prometheus endpoint: %w", err) } return nil diff --git a/docs/docs/architecture/observability.md b/docs/docs/architecture/observability.md index e95caa371d..ebf5cc7fdb 100644 --- a/docs/docs/architecture/observability.md +++ b/docs/docs/architecture/observability.md @@ -1,6 +1,6 @@ # Observability -The Contrast Coordinator exposes metrics in the +The Contrast Coordinator can expose metrics in the [Prometheus](https://prometheus.io/) format. These can be monitored to quickly identify problems in the gRPC layer or attestation errors. Prometheus metrics are numerical values associated with a name and additional key/values pairs, @@ -8,11 +8,10 @@ called labels. ## Exposed metrics -The Coordinator pod has the annotation `prometheus.io/scrape` set to `true` so -it can be found by the [service discovery of -Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config). -The metrics can be accessed at the Coordinator pod at port `9102` under the -`/metrics` endpoint. +The metrics can be accessed at the Coordinator pod at the port specified in the +`EDG_METRICS_PORT` environment variable under the `/metrics` endpoint. By +default, this environment variable isn't specified, hence no metrics will be +exposed. The Coordinator starts two gRPC servers, one for the user API on port `1313` and one for the mesh API on port `7777`. Metrics for both servers can be accessed diff --git a/e2e/internal/contrasttest/contrasttest.go b/e2e/internal/contrasttest/contrasttest.go index fd17f13696..95df4c5760 100644 --- a/e2e/internal/contrasttest/contrasttest.go +++ b/e2e/internal/contrasttest/contrasttest.go @@ -96,6 +96,7 @@ func (ct *ContrastTest) Init(t *testing.T, resources []any) { resources = kuberesource.PatchImages(resources, ct.ImageReplacements) resources = kuberesource.PatchNamespaces(resources, ct.Namespace) resources = kuberesource.PatchServiceMeshAdminInterface(resources, 9901) + resources = kuberesource.PatchCoordinatorMetrics(resources, 9102) resources = kuberesource.AddLogging(resources, "debug") unstructuredResources, err := kuberesource.ResourcesToUnstructured(resources) require.NoError(err) diff --git a/e2e/openssl/openssl_test.go b/e2e/openssl/openssl_test.go index 9f86d52280..681ef7bda3 100644 --- a/e2e/openssl/openssl_test.go +++ b/e2e/openssl/openssl_test.go @@ -12,6 +12,7 @@ import ( "encoding/json" "flag" "log" + "net" "os" "testing" "time" @@ -64,7 +65,12 @@ func TestOpenSSL(t *testing.T) { require.NoError(err) require.Len(frontendPods, 1, "pod not found: %s/%s", ct.Namespace, opensslFrontend) - _, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, frontendPods[0].Name, []string{"/bin/bash", "-c", "curl --fail coordinator:9102/metrics"}) + coordinatorPods, err := ct.Kubeclient.PodsFromDeployment(ctx, ct.Namespace, "coordinator") + require.NoError(err) + require.NotEmpty(coordinatorPods, "pod not found: %s/%s", ct.Namespace, "coordinator") + + argv := []string{"/bin/bash", "-c", "curl --fail " + net.JoinHostPort(coordinatorPods[0].Status.PodIP, "9102") + "/metrics"} + _, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, frontendPods[0].Name, argv) require.NoError(err, "stderr: %q", stderr) }) diff --git a/internal/kuberesource/mutators.go b/internal/kuberesource/mutators.go index 7f5af9c9e9..6fb0e7718b 100644 --- a/internal/kuberesource/mutators.go +++ b/internal/kuberesource/mutators.go @@ -194,6 +194,24 @@ func PatchServiceMeshAdminInterface(resources []any, port int32) []any { return resources } +// PatchCoordinatorMetrics enables Coordinator metrics on the specified port. +func PatchCoordinatorMetrics(resources []any, port int32) []any { + for _, resource := range resources { + switch r := resource.(type) { + case *applyappsv1.DeploymentApplyConfiguration: + if r.Spec.Template.Annotations["contrast.edgeless.systems/pod-role"] == "coordinator" { + r.Spec.Template.Spec.Containers[0].WithEnv(NewEnvVar("EDG_METRICS_PORT", fmt.Sprint(port))) + r.Spec.Template.Spec.Containers[0].WithPorts( + ContainerPort(). + WithName("prometheus"). + WithContainerPort(port), + ) + } + } + } + return resources +} + // MapPodSpec applies a function to a PodSpec in a Kubernetes resource. func MapPodSpec(resource any, f func(spec *applycorev1.PodSpecApplyConfiguration) *applycorev1.PodSpecApplyConfiguration) any { if resource == nil { diff --git a/internal/kuberesource/parts.go b/internal/kuberesource/parts.go index 2450b8d91b..b82ab44c50 100644 --- a/internal/kuberesource/parts.go +++ b/internal/kuberesource/parts.go @@ -142,7 +142,7 @@ func Coordinator(namespace string) *CoordinatorConfig { ). WithTemplate(PodTemplateSpec(). WithLabels(map[string]string{"app.kubernetes.io/name": "coordinator"}). - WithAnnotations(map[string]string{"contrast.edgeless.systems/pod-role": "coordinator", "prometheus.io/scrape": "true"}). + WithAnnotations(map[string]string{"contrast.edgeless.systems/pod-role": "coordinator"}). WithSpec(PodSpec(). WithRuntimeClassName(runtimeHandler). WithContainers( @@ -156,9 +156,6 @@ func Coordinator(namespace string) *CoordinatorConfig { ContainerPort(). WithName("meshapi"). WithContainerPort(7777), - ContainerPort(). - WithName("prometheus"). - WithContainerPort(9102), ). WithReadinessProbe(Probe(). WithInitialDelaySeconds(1).