From 39f54e90d93f80c926c1a89542a2c006be090c8e Mon Sep 17 00:00:00 2001 From: Kashif Khan <kashif.khan@est.tech> Date: Thu, 12 Dec 2024 10:15:48 +0200 Subject: [PATCH] Add e2e test for metrics service Signed-off-by: Kashif Khan <kashif.khan@est.tech> --- .golangci.yaml | 1 + config/base/manager.yaml | 96 +++++++++++++++-------------- config/render/capm3.yaml | 3 + main.go | 4 +- test/e2e/e2e_suite_test.go | 123 +++++++++++++++++++++++++++++++++++++ 5 files changed, 180 insertions(+), 47 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index ec0300a8b0..87266094c9 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -120,6 +120,7 @@ issues: linters: - gci - goconst + - gosec - path: _test\.go linters: - errcheck diff --git a/config/base/manager.yaml b/config/base/manager.yaml index d4a1af4447..22f7fc975f 100644 --- a/config/base/manager.yaml +++ b/config/base/manager.yaml @@ -19,52 +19,56 @@ spec: webhook: metal3-io-v1alpha1-baremetalhost spec: containers: - - command: - - /baremetal-operator - args: - - --enable-leader-election - - --tls-min-version=TLS13 - image: quay.io/metal3-io/baremetal-operator - imagePullPolicy: Always - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - envFrom: - - configMapRef: - name: ironic - name: manager - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - runAsUser: 65532 - runAsGroup: 65532 - livenessProbe: - httpGet: - path: /healthz - port: 9440 - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 2 - successThreshold: 1 - failureThreshold: 10 - readinessProbe: - httpGet: - path: /readyz - port: 9440 - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 2 - successThreshold: 1 - failureThreshold: 10 + - command: + - /baremetal-operator + args: + - --enable-leader-election + - --tls-min-version=TLS13 + ports: + - containerPort: 8443 + protocol: TCP + name: https + image: quay.io/metal3-io/baremetal-operator + imagePullPolicy: Always + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + envFrom: + - configMapRef: + name: ironic + name: manager + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + runAsUser: 65532 + runAsGroup: 65532 + livenessProbe: + httpGet: + path: /healthz + port: 9440 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + successThreshold: 1 + failureThreshold: 10 + readinessProbe: + httpGet: + path: /readyz + port: 9440 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + successThreshold: 1 + failureThreshold: 10 terminationGracePeriodSeconds: 10 securityContext: runAsNonRoot: true diff --git a/config/render/capm3.yaml b/config/render/capm3.yaml index 683a850280..d857a6a003 100644 --- a/config/render/capm3.yaml +++ b/config/render/capm3.yaml @@ -2598,6 +2598,9 @@ spec: - containerPort: 9443 name: webhook-server protocol: TCP + - containerPort: 8443 + name: https + protocol: TCP readinessProbe: failureThreshold: 10 httpGet: diff --git a/main.go b/main.go index 7186af8d83..69c6043ace 100644 --- a/main.go +++ b/main.go @@ -137,7 +137,7 @@ func main() { // namespace. flag.StringVar(&watchNamespace, "namespace", os.Getenv("WATCH_NAMESPACE"), "Namespace that the controller watches to reconcile host resources.") - flag.StringVar(&metricsBindAddr, "metrics-addr", "127.0.0.1:8085", + flag.StringVar(&metricsBindAddr, "metrics-addr", ":8443", "The address the metric endpoint binds to.") flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for controller manager. "+ @@ -217,7 +217,9 @@ func main() { Scheme: scheme, Metrics: metricsserver.Options{ BindAddress: metricsBindAddr, + SecureServing: true, FilterProvider: filters.WithAuthenticationAndAuthorization, + TLSOpts: tlsOptionOverrides, }, WebhookServer: webhook.NewServer(webhook.Options{ Port: webhookPort, diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index bad02f6021..805b76145f 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -5,11 +5,15 @@ package e2e import ( "context" + "encoding/json" "flag" + "fmt" "os" + "os/exec" "path/filepath" "strings" "testing" + "time" metal3api "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1" . "github.com/onsi/ginkgo/v2" @@ -79,6 +83,11 @@ func TestE2e(t *testing.T) { RunSpecs(t, "E2e Suite") } +const namespace = "baremetal-operator-system" +const serviceAccountName = "baremetal-operator-controller-manager" +const metricsServiceName = "baremetal-operator-controller-manager-metrics-service" +const metricsRoleBindingName = "baremetal-operator-metrics-binding" + var _ = SynchronizedBeforeSuite(func() []byte { var kubeconfigPath string @@ -161,6 +170,62 @@ var _ = SynchronizedBeforeSuite(func() []byte { Expect(err).NotTo(HaveOccurred()) } + // Metrics test start + By("creating a ClusterRoleBinding for the service account to allow access to metrics") + cmd := exec.Command("kubectl", "create", "clusterrolebinding", metricsRoleBindingName, + "--clusterrole=baremetal-operator-metrics-reader", + fmt.Sprintf("--serviceaccount=%s:%s", namespace, serviceAccountName), + ) + _, err := cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Failed to create ClusterRoleBinding") + + By("validating that the metrics service is available") + cmd = exec.Command("kubectl", "get", "service", metricsServiceName, "-n", namespace) + _, err = cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Metrics service should exist") + + By("getting the service account token") + token, err := serviceAccountToken() + Expect(err).NotTo(HaveOccurred()) + Expect(token).NotTo(BeEmpty()) + + By("waiting for the metrics endpoint to be ready") + verifyMetricsEndpointReady := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace) + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready") + } + Eventually(verifyMetricsEndpointReady).Should(Succeed()) + + By("creating the curl-metrics pod to access the metrics endpoint") + cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never", + "--namespace", namespace, + "--image=curlimages/curl:7.87.0", + "--command", + "--", "curl", "-v", "--tlsv1.3", "-k", "-H", fmt.Sprintf("Authorization:Bearer %s", token), + fmt.Sprintf("https://%s.%s.svc.cluster.local:8443/metrics", metricsServiceName, namespace)) + _, err = cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod") + + By("waiting for the curl-metrics pod to complete.") + verifyCurlUp := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "pods", "curl-metrics", + "-o", "jsonpath={.status.phase}", + "-n", namespace) + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(string(output)).To(Equal("Succeeded"), "curl pod in wrong status") + } + Eventually(verifyCurlUp, 5*time.Minute).Should(Succeed()) + + By("getting the metrics by checking curl-metrics logs") + metricsOutput := getMetricsOutput() + Expect(metricsOutput).To(ContainSubstring( + "controller_runtime_reconcile_total", + )) + // Metrics test end + return []byte(strings.Join([]string{clusterProxy.GetKubeconfigPath()}, ",")) }, func(data []byte) { // Before each parallel node @@ -179,6 +244,64 @@ var _ = SynchronizedBeforeSuite(func() []byte { clusterProxy = framework.NewClusterProxy("bmo-e2e", kubeconfigPath, scheme) }) +// serviceAccountToken returns a token for the specified service account in the given namespace. +// It uses the Kubernetes TokenRequest API to generate a token by directly sending a request +// and parsing the resulting token from the API response. +func serviceAccountToken() (string, error) { + const tokenRequestRawString = `{ + "apiVersion": "authentication.k8s.io/v1", + "kind": "TokenRequest" + }` + + // Temporary file to store the token request + secretName := fmt.Sprintf("%s-token-request", serviceAccountName) + tokenRequestFile := filepath.Join("/tmp", secretName) //nolint: gocritic + err := os.WriteFile(tokenRequestFile, []byte(tokenRequestRawString), os.FileMode(0o644)) + if err != nil { + return "", err + } + + var out string + verifyTokenCreation := func(g Gomega) { + // Execute kubectl command to create the token + cmd := exec.Command("kubectl", "create", "--raw", fmt.Sprintf( + "/api/v1/namespaces/%s/serviceaccounts/%s/token", + namespace, + serviceAccountName, + ), "-f", tokenRequestFile) + + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + + // Parse the JSON output to extract the token + var token tokenRequest + err = json.Unmarshal(output, &token) + g.Expect(err).NotTo(HaveOccurred()) + + out = token.Status.Token + } + Eventually(verifyTokenCreation).Should(Succeed()) + + return out, err +} + +// tokenRequest is a simplified representation of the Kubernetes TokenRequest API response, +// containing only the token field that we need to extract. +type tokenRequest struct { + Status struct { + Token string `json:"token"` + } `json:"status"` +} + +// getMetricsOutput retrieves and returns the logs from the curl pod used to access the metrics endpoint. +func getMetricsOutput() string { + By("getting the curl-metrics logs") + cmd := exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from curl pod") + return string(metricsOutput) +} + // Using a SynchronizedAfterSuite for controlling how to delete resources shared across ParallelNodes (~ginkgo threads). // The kubernetes cluster is shared across all the tests, so it should be deleted only after all ParallelNodes completes. // The artifact folder is preserved.