diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index e22a2fac..f78502ee 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -348,6 +348,47 @@ jobs: VAULT_N: "1.17.2" VAULT_N_1: "1.16.6" VAULT_N_2: "1.15.12" + oom-tests: + runs-on: ubuntu-latest + needs: + - get-product-version + - build-pre-checks + - build-docker + - versions + strategy: + fail-fast: false + matrix: + k8s-version: ${{ fromJson(needs.versions.outputs.K8S_VERSIONS) }} + steps: + - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: ${{ github.event.repository.name }}_release-default_linux_amd64_${{ needs.get-product-version.outputs.product-version }}_${{ github.sha }}.docker.tar + - name: Load docker image + shell: bash + run: | + docker load --input ${{ github.event.repository.name }}_release-default_linux_amd64_${{ needs.get-product-version.outputs.product-version }}_${{ github.sha }}.docker.tar + - name: Install kind + uses: helm/kind-action@0025e74a8c7512023d06dc019c617aa3cf561fde # v1.10.0 + with: + version: "v0.25.0" + install_only: true + - uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0 + id: setup-helm + with: + version: "v3.15.1" + - name: Add repo + shell: bash + run: | + helm repo add hashicorp https://helm.releases.hashicorp.com + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Setup go + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 + with: + go-version-file: .go-version + - name: Run tests + shell: bash + run: | + make integration-test-oom KIND_K8S_VERSION="v${{ matrix.k8s-version }}" latest-vault: name: vault:${{ matrix.vault-version }} kind:${{ matrix.k8s-version }} ${{ matrix.installation-method }} enterprise=${{ matrix.vault-enterprise }} needs: @@ -432,6 +473,7 @@ jobs: - unit-tests - latest-vault - latest-k8s + - oom-tests steps: - name: cancelled if: ${{ (contains(needs.*.result, 'cancelled')) }} diff --git a/Makefile b/Makefile index 1af46aae..791c3853 100644 --- a/Makefile +++ b/Makefile @@ -345,6 +345,14 @@ integration-test-chart: INTEGRATION_TESTS=true \ go test github.com/hashicorp/vault-secrets-operator/test/chart/... $(TESTARGS) -timeout=10m +.PHONY: integration-test-oom +integration-test-oom: + IMAGE_TAG_BASE=$(IMAGE_TAG_BASE) \ + VERSION=$(VERSION) \ + INTEGRATION_TESTS=true \ + KIND_K8S_VERSION=$(KIND_K8S_VERSION) \ + go test github.com/hashicorp/vault-secrets-operator/test/oom/... $(TESTARGS) -timeout=10m + .PHONY: setup-kind setup-kind: ## create a kind cluster for running the acceptance tests locally kind get clusters | grep --silent "^$(KIND_CLUSTER_NAME)$$" || \ diff --git a/internal/testutils/install.go b/internal/testutils/install.go new file mode 100644 index 00000000..036f8efc --- /dev/null +++ b/internal/testutils/install.go @@ -0,0 +1,98 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package testutils + +import ( + "context" + "io" + "os" + "os/exec" + "os/signal" + "syscall" + "testing" + "time" +) + +var onlyOneSignalHandler = make(chan struct{}) + +var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM} + +// InstallVSO installs a Vault Secrets Operator Helm release. +func InstallVSO(t *testing.T, ctx context.Context, extraArgs ...string) error { + t.Helper() + return RunHelm(t, ctx, time.Minute*5, nil, nil, append([]string{"install"}, extraArgs...)...) +} + +// UpgradeVSO upgrades a Vault Secrets Operator Helm release. +func UpgradeVSO(t *testing.T, ctx context.Context, extraArgs ...string) error { + t.Helper() + return RunHelm(t, ctx, time.Minute*5, nil, nil, append([]string{"upgrade"}, extraArgs...)...) +} + +// UninstallVSO uninstalls a Vault Secrets Operator Helm release. +func UninstallVSO(t *testing.T, ctx context.Context, extraArgs ...string) error { + t.Helper() + return RunHelm(t, ctx, time.Minute*3, nil, nil, append([]string{"uninstall"}, extraArgs...)...) +} + +// RunHelm runs the helm command with the given arguments. +func RunHelm(t *testing.T, ctx context.Context, timeout time.Duration, stdout, stderr io.Writer, args ...string) error { + t.Helper() + return RunCommandWithTimeout(t, ctx, timeout, stdout, stderr, "helm", args...) +} + +// RunKind runs the kind command with the given arguments. +func RunKind(t *testing.T, ctx context.Context, args ...string) error { + t.Helper() + return RunCommandWithTimeout(t, ctx, time.Minute*5, nil, nil, "kind", args...) +} + +// RunCommandWithTimeout runs a command with a timeout. If the timeout is 0, the command will run indefinitely. +func RunCommandWithTimeout(t *testing.T, ctx context.Context, timeout time.Duration, stdout, stderr io.Writer, name string, args ...string) error { + t.Helper() + var ctx_ context.Context + var cancel context.CancelFunc + if timeout > 0 { + ctx_, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } else { + ctx_ = ctx + } + + cmd := exec.CommandContext(ctx_, name, args...) + if stdout != nil { + cmd.Stdout = stdout + } else { + cmd.Stdout = os.Stdout + } + if stderr != nil { + cmd.Stderr = stderr + } else { + cmd.Stderr = os.Stderr + } + + t.Logf("Running command %q", cmd) + return cmd.Run() +} + +// SetupSignalHandler registers for SIGTERM and SIGINT. A context is returned +// which is canceled on one of these signals. If a second signal is caught, the program +// is terminated with exit code 1. +// Can only be called once. +func SetupSignalHandler() (context.Context, context.CancelFunc) { + close(onlyOneSignalHandler) // panics when called twice + + ctx, cancel := context.WithCancel(context.Background()) + + c := make(chan os.Signal, 2) + signal.Notify(c, shutdownSignals...) + go func() { + <-c + cancel() + <-c + os.Exit(1) // second signal. Exit directly. + }() + + return ctx, cancel +} diff --git a/test/chart/chart_test.go b/test/chart/chart_test.go index 5f011284..0da68bb6 100644 --- a/test/chart/chart_test.go +++ b/test/chart/chart_test.go @@ -7,18 +7,15 @@ import ( "bytes" "context" "fmt" - "io" "log" "os" "os/exec" - "os/signal" "path" "path/filepath" "reflect" "runtime" "strings" "sync" - "syscall" "testing" "time" @@ -30,15 +27,14 @@ import ( ctrl "sigs.k8s.io/controller-runtime" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/hashicorp/vault-secrets-operator/internal/testutils" "github.com/hashicorp/vault-secrets-operator/utils" ) var ( - testRoot string - chartPath string - onlyOneSignalHandler = make(chan struct{}) - shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM} - vsoNamespace = "vault-secrets-operator-system" + testRoot string + chartPath string + vsoNamespace = "vault-secrets-operator-system" // kindClusterName is set in TestMain kindClusterName string // set in TestMain @@ -104,7 +100,7 @@ func TestMain(m *testing.M) { wg := sync.WaitGroup{} wg.Add(1) - ctx, cancel := setupSignalHandler() + ctx, cancel := testutils.SetupSignalHandler() { go func() { select { @@ -154,7 +150,7 @@ func TestChart_upgradeCRDs(t *testing.T) { b := bytes.NewBuffer([]byte{}) chart := "hashicorp/vault-secrets-operator" require.NoError(t, - runHelm(t, context.Background(), time.Second*30, b, nil, + testutils.RunHelm(t, context.Background(), time.Second*30, b, nil, "show", "crds", "--version", startChartVersion, chart, @@ -204,19 +200,19 @@ func TestChart_upgradeCRDs(t *testing.T) { releaseName := strings.Replace(strings.ToLower(t.Name()), "_", "-", -1) ctx := context.Background() t.Cleanup(func() { - assert.NoError(t, uninstallVSO(t, ctx, + assert.NoError(t, testutils.UninstallVSO(t, ctx, "--wait", "--namespace", vsoNamespace, releaseName, )) }) - require.NoError(t, runKind(t, ctx, + require.NoError(t, testutils.RunKind(t, ctx, "load", "docker-image", image, "--name", kindClusterName, )) - require.NoError(t, installVSO(t, ctx, + require.NoError(t, testutils.InstallVSO(t, ctx, "--wait", "--create-namespace", "--namespace", vsoNamespace, @@ -233,7 +229,7 @@ func TestChart_upgradeCRDs(t *testing.T) { installedCRDsMap[o.Name] = o } - require.NoError(t, upgradeVSO(t, ctx, + require.NoError(t, testutils.UpgradeVSO(t, ctx, "--wait", "--namespace", vsoNamespace, "--set", fmt.Sprintf("controller.manager.image.repository=%s", operatorImageRepo), @@ -278,75 +274,3 @@ func TestChart_upgradeCRDs(t *testing.T) { assert.Equal(t, len(updatedCRD.Status.StoredVersions), len(wantCRD.Spec.Versions), "CRD %q .status.storedVersions", wantCRD.Name) } } - -func installVSO(t *testing.T, ctx context.Context, extraArgs ...string) error { - t.Helper() - return runHelm(t, ctx, time.Minute*5, nil, nil, append([]string{"install"}, extraArgs...)...) -} - -func upgradeVSO(t *testing.T, ctx context.Context, extraArgs ...string) error { - t.Helper() - return runHelm(t, ctx, time.Minute*5, nil, nil, append([]string{"upgrade"}, extraArgs...)...) -} - -func uninstallVSO(t *testing.T, ctx context.Context, extraArgs ...string) error { - t.Helper() - return runHelm(t, ctx, time.Minute*3, nil, nil, append([]string{"uninstall"}, extraArgs...)...) -} - -func runHelm(t *testing.T, ctx context.Context, timeout time.Duration, stdout, stderr io.Writer, args ...string) error { - t.Helper() - return runCommandWithTimeout(t, ctx, timeout, stdout, stderr, "helm", args...) -} - -func runKind(t *testing.T, ctx context.Context, args ...string) error { - t.Helper() - return runCommandWithTimeout(t, ctx, time.Minute*5, nil, nil, "kind", args...) -} - -func runCommandWithTimeout(t *testing.T, ctx context.Context, timeout time.Duration, stdout, stderr io.Writer, name string, args ...string) error { - t.Helper() - var ctx_ context.Context - var cancel context.CancelFunc - if timeout > 0 { - ctx_, cancel = context.WithTimeout(ctx, timeout) - defer cancel() - } else { - ctx_ = ctx - } - - cmd := exec.CommandContext(ctx_, name, args...) - if stdout != nil { - cmd.Stdout = stdout - } else { - cmd.Stdout = os.Stdout - } - if stderr != nil { - cmd.Stderr = stderr - } else { - cmd.Stderr = os.Stderr - } - - t.Logf("Running command %q", cmd) - return cmd.Run() -} - -// // setupSignalHandler registers for SIGTERM and SIGINT. A context is returned -// // which is canceled on one of these signals. If a second signal is caught, the program -// // is terminated with exit code 1. -func setupSignalHandler() (context.Context, context.CancelFunc) { - close(onlyOneSignalHandler) // panics when called twice - - ctx, cancel := context.WithCancel(context.Background()) - - c := make(chan os.Signal, 2) - signal.Notify(c, shutdownSignals...) - go func() { - <-c - cancel() - <-c - os.Exit(1) // second signal. Exit directly. - }() - - return ctx, cancel -} diff --git a/test/oom/oom_test.go b/test/oom/oom_test.go new file mode 100644 index 00000000..4f3d6d1d --- /dev/null +++ b/test/oom/oom_test.go @@ -0,0 +1,263 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package oom + +import ( + "context" + "crypto/rand" + "fmt" + "log" + "os" + "os/exec" + "path" + "path/filepath" + "runtime" + "strings" + "sync" + "testing" + "time" + + "github.com/cenkalti/backoff/v4" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrlruntime "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/hashicorp/vault-secrets-operator/internal/testutils" +) + +var ( + testRoot string + chartPath string + + vsoNamespace = "vault-secrets-operator-system" + // kindClusterName is set in TestMain + kindClusterName string + // set in TestMain + client ctrlclient.Client + scheme = ctrlruntime.NewScheme() +) + +func init() { + _, curFilePath, _, _ := runtime.Caller(0) + testRoot = path.Dir(curFilePath) + + var err error + chartPath, err = filepath.Abs(filepath.Join(testRoot, "..", "..", "chart")) + if err != nil { + panic(err) + } + + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) +} + +func TestMain(m *testing.M) { + if os.Getenv("INTEGRATION_TESTS") == "" { + os.Exit(0) + } + + kindK8sVersion := os.Getenv("KIND_K8S_VERSION") + + kindClusterName = fmt.Sprintf("vso-oom-%d", time.Now().UnixNano()) + + var err error + var result int + + var tempDir string + tempDir, err = os.MkdirTemp(os.TempDir(), "MainTestOOM") + if err != nil { + log.Printf("ERROR: Failed to create tempdir: %s", err) + os.Exit(1) + } + + kubeConfig := filepath.Join(tempDir, ".kube-config") + os.Setenv("KUBECONFIG", kubeConfig) + cleanupFunc := func() { + if tempDir != "" { + os.RemoveAll(tempDir) + } + + cmd := exec.Command("kind", + "delete", "cluster", "--name", kindClusterName, + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err = cmd.Run(); err != nil { + result = 1 + log.Printf("WARN: Failed to delete the kind cluster: %s", err) + } + } + + cmd := exec.Command("kind", + "create", "cluster", + "--wait", "5m", + "--name", kindClusterName, + "--kubeconfig", kubeConfig, + ) + + if kindK8sVersion != "" { + cmd.Args = append(cmd.Args, "--image", fmt.Sprintf("kindest/node:%s", kindK8sVersion)) + } + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + wg := sync.WaitGroup{} + wg.Add(1) + ctx, cancel := testutils.SetupSignalHandler() + { + go func() { + select { + case <-ctx.Done(): + cleanupFunc() + wg.Done() + } + }() + } + + err = cmd.Run() + if err != nil { + log.Printf("ERROR: Failed to create kind cluster: %s", err) + os.Exit(1) + } + + config := ctrl.GetConfigOrDie() + client, err = ctrlclient.New(config, ctrlclient.Options{Scheme: scheme}) + if err != nil { + log.Printf("ERROR: Failed to setup k8s client: %s", err) + os.Exit(1) + } + + result = m.Run() + + cancel() + wg.Wait() + os.Exit(result) +} + +func TestOOM_Secrets(t *testing.T) { + operatorImageRepo := os.Getenv("IMAGE_TAG_BASE") + if operatorImageRepo == "" { + require.Fail(t, "IMAGE_TAG_BASE is not set") + } + operatorImageTag := os.Getenv("VERSION") + if operatorImageTag == "" { + require.Fail(t, "VERSION is not set") + } + + image := fmt.Sprintf("%s:%s", operatorImageRepo, operatorImageTag) + releaseName := strings.Replace(strings.ToLower(t.Name()), "_", "-", -1) + ctx := context.Background() + t.Cleanup(func() { + assert.NoError(t, testutils.UninstallVSO(t, ctx, + "--wait", + "--namespace", vsoNamespace, + releaseName, + )) + }) + + require.NoError(t, testutils.RunKind(t, ctx, + "load", "docker-image", image, + "--name", kindClusterName, + )) + + require.NoError(t, testutils.InstallVSO(t, ctx, + "--wait", + "--create-namespace", + "--namespace", vsoNamespace, + "--set", fmt.Sprintf("controller.manager.image.tag=%s", operatorImageTag), + releaseName, + chartPath, + )) + + var ds appsv1.DeploymentList + assert.NoError(t, client.List(ctx, &ds, + ctrlclient.InNamespace(vsoNamespace), + ctrlclient.MatchingLabels{"app.kubernetes.io/instance": releaseName}, + ), + ) + + require.Len(t, ds.Items, 1, "expected exactly one deployment") + d := ds.Items[0] + var c corev1.Container + for _, c = range d.Spec.Template.Spec.Containers { + if c.Name == "manager" { + break + } + } + + require.NotNil(t, c, "manager container not found") + + i, ok := c.Resources.Limits.Memory().AsInt64() + require.True(t, ok, "failed to get memory limit") + require.Greater(t, i, int64(0), "memory limit is 0") + + secCount := i / 1024 / 1024 + require.Greater(t, secCount, int64(0), "secret count is %d", secCount) + + // 1MiB of random data + data := make([]byte, 1024*1024) + copied, err := rand.Read(data) + require.NoError(t, err, "failed to generate random data") + require.Equal(t, len(data), copied, "failed to generate enough random data") + // create enough secrets to cause OOM (probably more than enough) + for i := int64(0); i < secCount; i++ { + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("test-secret-%d", i), + Namespace: vsoNamespace, + }, + Data: map[string][]byte{ + "secret": data, + }, + } + require.NoError(t, client.Create(ctx, sec)) + } + + // expect no OOM after 30 seconds + bo := backoff.NewConstantBackOff(time.Second * 2) + maxTries := uint64(15) + var count uint64 + require.NoError(t, backoff.Retry(func() error { + count += 1 + var pods corev1.PodList + if err := client.List(ctx, &pods, + ctrlclient.InNamespace(vsoNamespace), + ctrlclient.MatchingLabels{ + "app.kubernetes.io/instance": releaseName, + "control-plane": "controller-manager", + }, + ); err != nil { + return backoff.Permanent(err) + } + + if len(pods.Items) == 0 { + return fmt.Errorf("no pods found") + } + + for _, pod := range pods.Items { + for _, cstat := range pod.Status.ContainerStatuses { + if cstat.LastTerminationState.Terminated != nil { + if cstat.LastTerminationState.Terminated.Reason == "OOMKilled" { + return backoff.Permanent(fmt.Errorf("pod %s OOMKilled", pod.Name)) + } else { + return backoff.Permanent(fmt.Errorf("pod %s terminated with other reason %s", + pod.Name, cstat.LastTerminationState.Terminated.Reason)) + } + } + } + } + + if count == maxTries { + return nil + } + + return fmt.Errorf("not done yet") + }, backoff.WithMaxRetries(bo, maxTries))) +}