Skip to content

Commit

Permalink
coordinator: make metrics endpoint configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
davidweisse committed May 29, 2024
1 parent 8b3ee2c commit 6e80c05
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 14 deletions.
16 changes: 14 additions & 2 deletions coordinator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ import (
"golang.org/x/sync/errgroup"
)

const (
metricsPortEnvVar = "CONTRAST_METRICS_PORT"
)

func main() {
if err := run(); err != nil {
os.Exit(1)
Expand All @@ -43,6 +47,8 @@ func run() (retErr error) {
return fmt.Errorf("setting up mount: %w", err)
}

metricsPort := os.Getenv(metricsPortEnvVar)

caInstance, err := ca.New()
if err != nil {
return fmt.Errorf("creating CA: %w", err)
Expand All @@ -57,15 +63,21 @@ func run() (retErr error) {
eg := errgroup.Group{}

eg.Go(func() error {
logger.Info("Starting prometheus /metrics endpoint")
if metricsPort == "" {
return nil
}
if metricsPort == userapi.Port || metricsPort == meshapi.Port {
return fmt.Errorf("invalid port for metrics endpoint: %s", metricsPort)
}
logger.Info("Starting prometheus /metrics endpoint on port " + metricsPort)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.InstrumentMetricHandler(
promRegistry, promhttp.HandlerFor(
promRegistry,
promhttp.HandlerOpts{Registry: promRegistry},
),
))
if err := http.ListenAndServe(":9102", mux); err != nil {
if err := http.ListenAndServe(":"+metricsPort, mux); err != nil {
return fmt.Errorf("serving Prometheus endpoint: %w", err)
}
return nil
Expand Down
11 changes: 5 additions & 6 deletions docs/docs/architecture/observability.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
# Observability

The Contrast Coordinator exposes metrics in the
The Contrast Coordinator can expose metrics in the
[Prometheus](https://prometheus.io/) format. These can be monitored to quickly
identify problems in the gRPC layer or attestation errors. Prometheus metrics
are numerical values associated with a name and additional key/values pairs,
called labels.

## Exposed metrics

The Coordinator pod has the annotation `prometheus.io/scrape` set to `true` so
it can be found by the [service discovery of
Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config).
The metrics can be accessed at the Coordinator pod at port `9102` under the
`/metrics` endpoint.
The metrics can be accessed at the Coordinator pod at the port specified in the
`CONTRAST_METRICS_PORT` environment variable under the `/metrics` endpoint. By
default, this environment variable isn't specified, hence no metrics will be
exposed.

The Coordinator starts two gRPC servers, one for the user API on port `1313` and
one for the mesh API on port `7777`. Metrics for both servers can be accessed
Expand Down
1 change: 1 addition & 0 deletions e2e/internal/contrasttest/contrasttest.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ func (ct *ContrastTest) Init(t *testing.T, resources []any) {
resources = kuberesource.PatchImages(resources, ct.ImageReplacements)
resources = kuberesource.PatchNamespaces(resources, ct.Namespace)
resources = kuberesource.PatchServiceMeshAdminInterface(resources, 9901)
resources = kuberesource.PatchCoordinatorMetrics(resources, 9102)
resources = kuberesource.AddLogging(resources, "debug")
unstructuredResources, err := kuberesource.ResourcesToUnstructured(resources)
require.NoError(err)
Expand Down
8 changes: 7 additions & 1 deletion e2e/openssl/openssl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"encoding/json"
"flag"
"log"
"net"
"os"
"testing"
"time"
Expand Down Expand Up @@ -64,7 +65,12 @@ func TestOpenSSL(t *testing.T) {
require.NoError(err)
require.Len(frontendPods, 1, "pod not found: %s/%s", ct.Namespace, opensslFrontend)

_, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, frontendPods[0].Name, []string{"/bin/bash", "-c", "curl --fail coordinator:9102/metrics"})
coordinatorPods, err := ct.Kubeclient.PodsFromOwner(ctx, ct.Namespace, "StatefulSet", "coordinator")
require.NoError(err)
require.NotEmpty(coordinatorPods, "pod not found: %s/%s", ct.Namespace, "coordinator")

argv := []string{"/bin/bash", "-c", "curl --fail " + net.JoinHostPort(coordinatorPods[0].Status.PodIP, "9102") + "/metrics"}
_, stderr, err := ct.Kubeclient.Exec(ctx, ct.Namespace, frontendPods[0].Name, argv)
require.NoError(err, "stderr: %q", stderr)
})

Expand Down
23 changes: 22 additions & 1 deletion internal/kuberesource/mutators.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ import (
applycorev1 "k8s.io/client-go/applyconfigurations/core/v1"
)

const exposeServiceAnnotation = "contrast.edgeless.systems/expose-service"
const (
exposeServiceAnnotation = "contrast.edgeless.systems/expose-service"
contrastRoleAnnotationKey = "contrast.edgeless.systems/pod-role"
)

// AddInitializer adds an initializer and its shared volume to the resource.
//
Expand Down Expand Up @@ -194,6 +197,24 @@ func PatchServiceMeshAdminInterface(resources []any, port int32) []any {
return resources
}

// PatchCoordinatorMetrics enables Coordinator metrics on the specified port.
func PatchCoordinatorMetrics(resources []any, port int32) []any {
for _, resource := range resources {
switch r := resource.(type) {
case *applyappsv1.StatefulSetApplyConfiguration:
if r.Spec.Template.Annotations[contrastRoleAnnotationKey] == "coordinator" {
r.Spec.Template.Spec.Containers[0].WithEnv(NewEnvVar("CONTRAST_METRICS_PORT", fmt.Sprint(port)))
r.Spec.Template.Spec.Containers[0].WithPorts(
ContainerPort().
WithName("prometheus").
WithContainerPort(port),
)
}
}
}
return resources
}

// MapPodSpec applies a function to a PodSpec in a Kubernetes resource.
func MapPodSpec(resource any, f func(spec *applycorev1.PodSpecApplyConfiguration) *applycorev1.PodSpecApplyConfiguration) any {
if resource == nil {
Expand Down
5 changes: 1 addition & 4 deletions internal/kuberesource/parts.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func Coordinator(namespace string) *CoordinatorConfig {
WithWhenScaled(appsv1.DeletePersistentVolumeClaimRetentionPolicyType)). // TODO(burgerdev): this should be RETAIN for released coordinators.
WithTemplate(PodTemplateSpec().
WithLabels(map[string]string{"app.kubernetes.io/name": "coordinator"}).
WithAnnotations(map[string]string{"contrast.edgeless.systems/pod-role": "coordinator", "prometheus.io/scrape": "true"}).
WithAnnotations(map[string]string{"contrast.edgeless.systems/pod-role": "coordinator"}).
WithSpec(PodSpec().
WithRuntimeClassName(runtimeHandler).
WithContainers(
Expand All @@ -170,9 +170,6 @@ func Coordinator(namespace string) *CoordinatorConfig {
ContainerPort().
WithName("meshapi").
WithContainerPort(7777),
ContainerPort().
WithName("prometheus").
WithContainerPort(9102),
).
WithReadinessProbe(Probe().
WithInitialDelaySeconds(1).
Expand Down

0 comments on commit 6e80c05

Please sign in to comment.