Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROX-17987: Remove unused Operator deployments #1118

Merged
merged 8 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
control-plane: controller-manager
## Name field must contain up to 63 characters
## https://www.rfc-editor.org/rfc/rfc1123
name: {{ $.Values.operator.deploymentPrefix | lower }}{{ .tag | lower }}
name: {{ .deploymentName | lower }}
namespace: stackrox-operator
spec:
replicas: 1
Expand Down
12 changes: 6 additions & 6 deletions fleetshard/pkg/central/charts/data/rhacs-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
# Declare variables to be passed into your templates.

operator:
deploymentPrefix: rhacs-operator-manager-

# Each item in images should contain `repository` and `tag` key
# example:
# images:
# - repository: quay.io/rhacs-eng/stackrox-operator
# tag: 3.74.0
# - repository: quay.io/rhacs-eng/stackrox-operator
# tag: 3.74.1
# - deploymentName: rhacs-operator-manager-4.0.0
# repository: quay.io/rhacs-eng/stackrox-operator
# tag: 4.0.0
# - deploymentName: rhacs-operator-manager-4.0.1
# repository: quay.io/rhacs-eng/stackrox-operator
# tag: 4.0.1
images: []
# TODO: add values for resource limits and requests for both proxy and manager container
# TODO: add value for kube-rbac-proxy image
52 changes: 47 additions & 5 deletions fleetshard/pkg/central/operator/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ import (
"strings"

"github.com/stackrox/acs-fleet-manager/fleetshard/pkg/central/charts"
"golang.org/x/exp/slices"
"helm.sh/helm/v3/pkg/chart"
"helm.sh/helm/v3/pkg/chartutil"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/api/errors"
ctrlClient "sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -35,12 +37,13 @@ func parseOperatorImages(images []string) ([]chartutil.Values, error) {
return nil, fmt.Errorf("failed to split image and tag from %q", img)
}
repo, tag := strs[0], strs[1]
if len(operatorDeploymentPrefix+"-"+tag) > maxOperatorDeploymentNameLength {
return nil, fmt.Errorf("%s-%s contains more than %d characters and cannot be used as a deployment name", operatorDeploymentPrefix, tag, maxOperatorDeploymentNameLength)
deploymentName := generateDeploymentName(tag)
if len(deploymentName) > maxOperatorDeploymentNameLength {
return nil, fmt.Errorf("%s contains more than %d characters and cannot be used as a deployment name", deploymentName, maxOperatorDeploymentNameLength)
}
if _, used := uniqueImages[repo+tag]; !used {
uniqueImages[repo+tag] = true
img := chartutil.Values{"repository": repo, "tag": tag}
img := chartutil.Values{"deploymentName": deploymentName, "repository": repo, "tag": tag}
operatorImages = append(operatorImages, img)
}
}
Expand All @@ -62,8 +65,7 @@ func (u *ACSOperatorManager) InstallOrUpgrade(ctx context.Context, images []stri
}
chartVals := chartutil.Values{
"operator": chartutil.Values{
"deploymentPrefix": operatorDeploymentPrefix + "-",
"images": operatorImages,
"images": operatorImages,
},
}

Expand Down Expand Up @@ -115,6 +117,46 @@ func (u *ACSOperatorManager) ListVersionsWithReplicas(ctx context.Context) (map[
return versionWithReplicas, nil
}

// RemoveUnusedOperators removes unused operator deployments from the cluster
kurlov marked this conversation as resolved.
Show resolved Hide resolved
func (u *ACSOperatorManager) RemoveUnusedOperators(ctx context.Context, desiredImages []string) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curiosity: What was the reason for you to not go with GarbageCollectOperators or similar?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't want to put Garbage next to the Operator.
Seriously though, garbage collect sounds like it's about interacting with memory, having cycles/references/generations. But we just deleting deployment(s) and letting k8s do the rest for us

deployments := &appsv1.DeploymentList{}
labels := map[string]string{"app": "rhacs-operator"}
err := u.client.List(ctx, deployments,
ctrlClient.InNamespace(operatorNamespace),
ctrlClient.MatchingLabels(labels),
)
if err != nil {
return fmt.Errorf("failed list operator deployments: %w", err)
}

var unusedDeployments []string
for _, deployment := range deployments.Items {
for _, container := range deployment.Spec.Template.Spec.Containers {
if container.Name == "manager" && !slices.Contains(desiredImages, container.Image) {
unusedDeployments = append(unusedDeployments, deployment.Name)
}
}
}

for _, deploymentName := range unusedDeployments {
deployment := &appsv1.Deployment{}
err := u.client.Get(ctx, ctrlClient.ObjectKey{Namespace: operatorNamespace, Name: deploymentName}, deployment)
if err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("retrieving operator deployment %s: %w", deploymentName, err)
}
err = u.client.Delete(ctx, deployment)
if err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("deleting operator deployment %s: %w", deploymentName, err)
}
}

return nil
}

func generateDeploymentName(tag string) string {
return operatorDeploymentPrefix + "-" + tag
}

func (u *ACSOperatorManager) generateCRDTemplateUrls(tag string) []string {
stackroxWithTag := fmt.Sprintf(u.crdURL, tag)
centralCrdURL := stackroxWithTag + "platform.stackrox.io_centrals.yaml"
Expand Down
117 changes: 98 additions & 19 deletions fleetshard/pkg/central/operator/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"helm.sh/helm/v3/pkg/chartutil"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/stretchr/testify/assert"
Expand All @@ -14,6 +15,7 @@ import (

"github.com/stackrox/acs-fleet-manager/fleetshard/pkg/testutils"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -25,6 +27,8 @@ const (
operatorImage2 = "quay.io/rhacs-eng/stackrox-operator:4.0.2"
crdTag1 = "4.0.1"
crdURL = "https://raw.githubusercontent.com/stackrox/stackrox/%s/operator/bundle/manifests/"
deploymentName1 = operatorDeploymentPrefix + "-4.0.1"
deploymentName2 = operatorDeploymentPrefix + "-4.0.2"
)

var securedClusterCRD = &unstructured.Unstructured{
Expand Down Expand Up @@ -58,19 +62,9 @@ var serviceAccount = &unstructured.Unstructured{
},
}

var operatorDeployment1 = &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "rhacs-operator-manager-4.0.1",
Namespace: operatorNamespace,
},
}
var operatorDeployment1 = createOperatorDeployment(deploymentName1, operatorImage1)

var operatorDeployment2 = &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "rhacs-operator-manager-4.0.2",
Namespace: operatorNamespace,
},
}
var operatorDeployment2 = createOperatorDeployment(deploymentName2, operatorImage2)

var metricService = &unstructured.Unstructured{
Object: map[string]interface{}{
Expand All @@ -83,6 +77,23 @@ var metricService = &unstructured.Unstructured{
},
}

func createOperatorDeployment(name string, image string) *appsv1.Deployment {
return &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: operatorNamespace,
Labels: map[string]string{"app": "rhacs-operator"},
},
Spec: appsv1.DeploymentSpec{
Template: v1.PodTemplateSpec{
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "manager", Image: image}},
},
},
},
}
}

func TestOperatorUpgradeFreshInstall(t *testing.T) {
fakeClient := testutils.NewFakeClientBuilder(t).Build()
u := NewACSOperatorManager(fakeClient, crdURL)
Expand Down Expand Up @@ -161,6 +172,74 @@ func TestOperatorUpgradeDoNotInstallLongTagVersion(t *testing.T) {
assert.Len(t, deployments.Items, 0)
}

func TestRemoveUnusedEmpty(t *testing.T) {
fakeClient := testutils.NewFakeClientBuilder(t).Build()
u := NewACSOperatorManager(fakeClient, crdURL)
ctx := context.Background()

err := u.RemoveUnusedOperators(ctx, []string{})
require.NoError(t, err)
}

func TestRemoveOneUnusedOperator(t *testing.T) {
fakeClient := testutils.NewFakeClientBuilder(t, operatorDeployment1, serviceAccount).Build()
u := NewACSOperatorManager(fakeClient, crdURL)
ctx := context.Background()

err := fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: operatorDeployment1.Name}, operatorDeployment1)
require.NoError(t, err)
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: serviceAccount.GetName()}, serviceAccount)
require.NoError(t, err)

err = u.RemoveUnusedOperators(ctx, []string{operatorImage2})
require.NoError(t, err)
// deployment is deleted but service account still persist
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: operatorDeployment1.Name}, operatorDeployment1)
require.True(t, errors.IsNotFound(err))
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: serviceAccount.GetName()}, serviceAccount)
require.NoError(t, err)
}

func TestRemoveOneUnusedOperatorFromMany(t *testing.T) {
fakeClient := testutils.NewFakeClientBuilder(t, operatorDeployment1, operatorDeployment2, serviceAccount).Build()
u := NewACSOperatorManager(fakeClient, crdURL)
ctx := context.Background()

err := u.RemoveUnusedOperators(ctx, []string{operatorImage2})
require.NoError(t, err)
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: operatorDeployment1.Name}, operatorDeployment1)
require.True(t, errors.IsNotFound(err))
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: operatorDeployment2.Name}, operatorDeployment2)
require.NoError(t, err)
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: serviceAccount.GetName()}, serviceAccount)
require.NoError(t, err)

// remove remaining
err = u.RemoveUnusedOperators(ctx, []string{})
require.NoError(t, err)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you assert the operators were installed?
Imho it is not always necessary to execute InstallOrUpgrade, you can also directly pass Deployment object to the fake client.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really like your suggestion about passing Deployment object to the fake client.
I also added small check that only deployment is delete (e.g. serviceAccount still persists)

err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: operatorDeployment1.Name}, operatorDeployment1)
require.True(t, errors.IsNotFound(err))
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: operatorDeployment2.Name}, operatorDeployment2)
require.True(t, errors.IsNotFound(err))
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: serviceAccount.GetName()}, serviceAccount)
require.NoError(t, err)
}

func TestRemoveMultipleUnusedOperators(t *testing.T) {
fakeClient := testutils.NewFakeClientBuilder(t, operatorDeployment1, operatorDeployment2, serviceAccount).Build()
u := NewACSOperatorManager(fakeClient, crdURL)
ctx := context.Background()

err := u.RemoveUnusedOperators(ctx, []string{})
require.NoError(t, err)
deployments := &appsv1.DeploymentList{}
err = fakeClient.List(context.Background(), deployments)
require.NoError(t, err)
assert.Len(t, deployments.Items, 0)
err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: operatorNamespace, Name: serviceAccount.GetName()}, serviceAccount)
require.NoError(t, err)
}

func TestParseOperatorImages(t *testing.T) {
cases := map[string]struct {
images []string
Expand All @@ -170,20 +249,20 @@ func TestParseOperatorImages(t *testing.T) {
"should parse one valid operator image": {
images: []string{operatorImage1},
expected: []map[string]string{
{"repository": operatorRepository, "tag": "4.0.1"},
{"deploymentName": deploymentName1, "repository": operatorRepository, "tag": "4.0.1"},
},
},
"should parse two valid operator images": {
images: []string{operatorImage1, operatorImage2},
expected: []map[string]string{
{"repository": operatorRepository, "tag": "4.0.1"},
{"repository": operatorRepository, "tag": "4.0.2"},
{"deploymentName": deploymentName1, "repository": operatorRepository, "tag": "4.0.1"},
{"deploymentName": deploymentName2, "repository": operatorRepository, "tag": "4.0.2"},
},
},
"should ignore duplicate operator images": {
images: []string{operatorImage1, operatorImage1},
expected: []map[string]string{
{"repository": operatorRepository, "tag": "4.0.1"},
{"deploymentName": deploymentName1, "repository": operatorRepository, "tag": "4.0.1"},
},
},
"do not fail if images list is empty": {
Expand All @@ -193,8 +272,8 @@ func TestParseOperatorImages(t *testing.T) {
"should accept images from multiple repositories with the same tag": {
images: []string{"repo1:tag", "repo2:tag"},
expected: []map[string]string{
{"repository": "repo1", "tag": "tag"},
{"repository": "repo2", "tag": "tag"},
{"deploymentName": operatorDeploymentPrefix + "-tag", "repository": "repo1", "tag": "tag"},
{"deploymentName": operatorDeploymentPrefix + "-tag", "repository": "repo2", "tag": "tag"},
},
},
"fail if image does contain colon": {
Expand All @@ -220,7 +299,7 @@ func TestParseOperatorImages(t *testing.T) {
assert.NoError(t, err)
var expectedRepositoryAndTags []chartutil.Values
for _, m := range c.expected {
val := chartutil.Values{"repository": m["repository"], "tag": m["tag"]}
val := chartutil.Values{"deploymentName": m["deploymentName"], "repository": m["repository"], "tag": m["tag"]}
expectedRepositoryAndTags = append(expectedRepositoryAndTags, val)
}
assert.Equal(t, expectedRepositoryAndTags, gotImages)
Expand Down
13 changes: 8 additions & 5 deletions fleetshard/pkg/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,23 +245,26 @@ func (r *Runtime) deleteStaleReconcilers(list *private.ManagedCentralList) {
}

func (r *Runtime) upgradeOperator(list private.ManagedCentralList) error {
var operatorImages []string
var desiredOperatorImages []string
for _, central := range list.Items {
operatorImages = append(operatorImages, central.Spec.OperatorImage)
desiredOperatorImages = append(desiredOperatorImages, central.Spec.OperatorImage)
}
ctx := context.Background()

for _, img := range operatorImages {
for _, img := range desiredOperatorImages {
glog.Infof("Installing Operator: %s", img)
}
//TODO(ROX-15080): Download CRD on operator upgrades to always install the latest CRD
crdTag := "4.0.1"
err := r.operatorManager.InstallOrUpgrade(ctx, operatorImages, crdTag)
err := r.operatorManager.InstallOrUpgrade(ctx, desiredOperatorImages, crdTag)
if err != nil {
return fmt.Errorf("ensuring initial operator installation failed: %w", err)
}

// TODO: delete unused operator versions
err = r.operatorManager.RemoveUnusedOperators(ctx, desiredOperatorImages)
if err != nil {
glog.Warningf("Failed removing unused operators: %v", err)
}
return nil
}

Expand Down