Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trigger machine pool instance refresh also on user data change (unless it's only the bootstrap token) #4245

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions exp/controllers/awsmachinepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
canUpdateLaunchTemplate := func() (bool, error) {
// If there is a change: before changing the template, check if there exist an ongoing instance refresh,
// because only 1 instance refresh can be "InProgress". If template is updated when refresh cannot be started,
// that change will not trigger a refresh. Do not start an instance refresh if only userdata changed.
// that change will not trigger a refresh.
return asgsvc.CanStartASGInstanceRefresh(machinePoolScope)
}
runPostLaunchTemplateUpdateOperation := func() error {
Expand All @@ -238,15 +238,14 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
machinePoolScope.Debug("instance refresh disabled, skipping instance refresh")
return nil
}

// After creating a new version of launch template, instance refresh is required
// to trigger a rolling replacement of all previously launched instances.
// If ONLY the userdata changed, previously launched instances continue to use the old launch
// template.
// If ONLY the the bootstrap token in userdata changed, previously launched instances continue
// using the old launch template.
//
// FIXME(dlipovetsky,sedefsavas): If the controller terminates, or the StartASGInstanceRefresh returns an error,
// this conditional will not evaluate to true the next reconcile. If any machines use an older
// Launch Template version, and the difference between the older and current versions is _more_
// than userdata, we should start an Instance Refresh.
// this conditional will not evaluate to true the next reconcile.
machinePoolScope.Info("starting instance refresh", "number of instances", machinePoolScope.MachinePool.Spec.Replicas)
return asgsvc.StartASGInstanceRefresh(machinePoolScope)
}
Expand Down
215 changes: 195 additions & 20 deletions exp/controllers/awsmachinepool_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@ import (
"context"
"flag"
"fmt"
"strings"
"testing"

"github.com/aws/aws-sdk-go/aws"
"github.com/go-logr/logr"
"github.com/golang/mock/gomock"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"
Expand All @@ -40,6 +43,7 @@ import (
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services"
ec2Service "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/ec2"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/mock_services"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand All @@ -50,17 +54,38 @@ import (
"sigs.k8s.io/cluster-api/util/patch"
)

const (
clusterApiTestUserData string = `
---
- path: /run/kubeadm/kubeadm-join-config.yaml
owner: root:root
permissions: '0640'
content: |
---
apiVersion: kubeadm.k8s.io/v1beta3
discovery:
bootstrapToken:
apiServerEndpoint: my-apiserver-123.eu-west-2.elb.amazonaws.com:6443
caCertHashes:
- sha256:45d45ge54dc2ea64cd30b133713371337133771ac2cffc43932f984f3e76babc
token: 31o123.abcd4lsoz1ruwxyz

...some other stuff that is not valid YAML, since it should not matter...
`
)

func TestAWSMachinePoolReconciler(t *testing.T) {
var (
reconciler AWSMachinePoolReconciler
cs *scope.ClusterScope
ms *scope.MachinePoolScope
mockCtrl *gomock.Controller
ec2Svc *mock_services.MockEC2Interface
asgSvc *mock_services.MockASGInterface
recorder *record.FakeRecorder
awsMachinePool *expinfrav1.AWSMachinePool
secret *corev1.Secret
reconciler AWSMachinePoolReconciler
cs *scope.ClusterScope
ms *scope.MachinePoolScope
mockCtrl *gomock.Controller
ec2Svc *mock_services.MockEC2Interface
asgSvc *mock_services.MockASGInterface
recorder *record.FakeRecorder
awsMachinePool *expinfrav1.AWSMachinePool
secret *corev1.Secret
bootstrapDataToCreate = []byte("shell-script")
)
setup := func(t *testing.T, g *WithT) {
t.Helper()
Expand All @@ -75,10 +100,12 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
}
ctx := context.TODO()

ns := string(uuid.NewUUID())

awsMachinePool = &expinfrav1.AWSMachinePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "default",
Namespace: ns,
},
Spec: expinfrav1.AWSMachinePoolSpec{
MinSize: int32(0),
Expand All @@ -89,16 +116,24 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
secret = &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "bootstrap-data",
Namespace: "default",
Namespace: ns,
},
Data: map[string][]byte{
"value": []byte("shell-script"),
"value": bootstrapDataToCreate,
},
}

g.Expect(testEnv.Create(ctx, &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: ns,
},
})).To(Succeed())
g.Expect(testEnv.Create(ctx, awsMachinePool)).To(Succeed())
g.Expect(testEnv.Create(ctx, secret)).To(Succeed())

cs, err = setupCluster("test-cluster")
g.Expect(err).To(BeNil())

ms, err = scope.NewMachinePoolScope(
scope.MachinePoolScopeParams{
Client: testEnv.Client,
Expand All @@ -110,7 +145,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
MachinePool: &expclusterv1.MachinePool{
ObjectMeta: metav1.ObjectMeta{
Name: "mp",
Namespace: "default",
Namespace: ns,
},
Spec: expclusterv1.MachinePoolSpec{
ClusterName: "test",
Expand All @@ -130,9 +165,6 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
)
g.Expect(err).To(BeNil())

cs, err = setupCluster("test-cluster")
g.Expect(err).To(BeNil())

mockCtrl = gomock.NewController(t)
ec2Svc = mock_services.NewMockEC2Interface(mockCtrl)
asgSvc = mock_services.NewMockASGInterface(mockCtrl)
Expand Down Expand Up @@ -170,7 +202,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
getASG := func(t *testing.T, g *WithT) {
t.Helper()

ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", expectedErr).AnyTimes()
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, nil, expectedErr).AnyTimes()
asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, expectedErr).AnyTimes()
}
t.Run("should exit immediately on an error state", func(t *testing.T) {
Expand Down Expand Up @@ -364,7 +396,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes()
asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil).Times(1)
asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil).AnyTimes()
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, nil, nil).AnyTimes()
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(nil, nil).AnyTimes()
ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any()).Return("", nil).AnyTimes()
ec2Svc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
Expand Down Expand Up @@ -464,7 +496,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
finalizer(t, g)

asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil)
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil).AnyTimes()
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, nil, nil).AnyTimes()

buf := new(bytes.Buffer)
klog.SetOutput(buf)
Expand All @@ -486,7 +518,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
Status: expinfrav1.ASGStatusDeleteInProgress,
}
asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&inProgressASG, nil)
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil).AnyTimes()
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, nil, nil).AnyTimes()

buf := new(bytes.Buffer)
klog.SetOutput(buf)
Expand All @@ -496,6 +528,149 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
g.Eventually(recorder.Events).Should(Receive(ContainSubstring("DeletionInProgress")))
})
})

t.Run("Reconciling the launch template", func(t *testing.T) {
// Test `ReconcileLaunchTemplate` which is called by reconciliation

t.Run("should create launch template if none exists", func(t *testing.T) {
g := NewWithT(t)
setup(t, g)
defer teardown(t, g)

ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, nil, nil)
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(aws.String("ami-123456"), nil)
ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Eq([]byte("shell-script"))).Return("lt-456789", nil)

instanceRefreshTriggered := false
ec2Service.ReconcileLaunchTemplate(ms, func() (bool, error) { return true, nil }, func() error {
instanceRefreshTriggered = true
return nil
}, ec2Svc)
// First launch template, so no instance refresh needed
g.Expect(instanceRefreshTriggered).To(BeFalse())
})

t.Run("should not update launch template with unchanged AMI and user data", func(t *testing.T) {
g := NewWithT(t)
setup(t, g)
defer teardown(t, g)

ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(&expinfrav1.AWSLaunchTemplate{
AMI: infrav1.AMIReference{
ID: aws.String("ami-123456"),
},
}, []byte("shell-script"), nil)
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(aws.String("ami-123456"), nil)
ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil)

ms.AWSMachinePool.Status.LaunchTemplateID = "lt-foo123"
ms.AWSMachinePool.Status.LaunchTemplateVersion = aws.String("1")

instanceRefreshTriggered := false
ec2Service.ReconcileLaunchTemplate(ms, func() (bool, error) { return true, nil }, func() error {
instanceRefreshTriggered = true
return nil
}, ec2Svc)
// No changes, so no instance refresh needed
g.Expect(instanceRefreshTriggered).To(BeFalse())
})

t.Run("should trigger instance refresh if user data change is more than only the bootstrap token", func(t *testing.T) {
g := NewWithT(t)
bootstrapDataToCreate = []byte(clusterApiTestUserData + "this has changed")
setup(t, g)
defer teardown(t, g)

ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(&expinfrav1.AWSLaunchTemplate{
AMI: infrav1.AMIReference{
ID: aws.String("ami-123456"),
},
}, []byte(clusterApiTestUserData), nil)
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(aws.String("ami-123456"), nil)
ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil)

ms.AWSMachinePool.Status.LaunchTemplateID = "lt-foo123"
ms.AWSMachinePool.Status.LaunchTemplateVersion = aws.String("1")

// User data changed, so a new launch template version should be created
ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Eq("lt-foo123")).Return(nil)
ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Eq("lt-foo123"), gomock.Any(), gomock.Eq(aws.String("ami-123456")), gomock.Eq([]byte(clusterApiTestUserData+"this has changed"))).Return(nil)
ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Eq("lt-foo123")).Return("2", nil)

instanceRefreshTriggered := false
ec2Service.ReconcileLaunchTemplate(ms, func() (bool, error) { return true, nil }, func() error {
instanceRefreshTriggered = true
return nil
}, ec2Svc)
// User data changed (more than just the bootstrap token), so an instance refresh is desired
g.Expect(instanceRefreshTriggered).To(BeTrue())
})

t.Run("should not trigger instance refresh if only bootstrap token changed in user data", func(t *testing.T) {
g := NewWithT(t)
bootstrapDataToCreate = []byte(strings.Replace(clusterApiTestUserData, "31o123.abcd4lsoz1ruwxyz", "some-refreshed-token", 1))
setup(t, g)
defer teardown(t, g)

ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(&expinfrav1.AWSLaunchTemplate{
AMI: infrav1.AMIReference{
ID: aws.String("ami-123456"),
},
}, []byte(clusterApiTestUserData), nil)
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(aws.String("ami-123456"), nil)
ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil)

ms.AWSMachinePool.Status.LaunchTemplateID = "lt-foo123"
ms.AWSMachinePool.Status.LaunchTemplateVersion = aws.String("1")

// User data changed, so a new launch template version should be created
ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Eq("lt-foo123")).Return(nil)
ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Eq("lt-foo123"), gomock.Any(), gomock.Eq(aws.String("ami-123456")), gomock.Eq([]byte(strings.Replace(clusterApiTestUserData, "31o123.abcd4lsoz1ruwxyz", "some-refreshed-token", 1)))).Return(nil)
ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Eq("lt-foo123")).Return("2", nil)

instanceRefreshTriggered := false
ec2Service.ReconcileLaunchTemplate(ms, func() (bool, error) { return true, nil }, func() error {
instanceRefreshTriggered = true
return nil
}, ec2Svc)
// Only the bootstrap token changed within the user data. This should not trigger an instance
// refresh because the token is refreshed regularly.
g.Expect(instanceRefreshTriggered).To(BeFalse())
})

t.Run("should not trigger instance refresh if the bootstrap token cannot be parsed in user data", func(t *testing.T) {
g := NewWithT(t)
bootstrapDataToCreate = []byte(strings.Replace(clusterApiTestUserData, "token: 31o123.abcd4lsoz1ruwxyz", "--unparseable--", 1))
setup(t, g)
defer teardown(t, g)

ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(&expinfrav1.AWSLaunchTemplate{
AMI: infrav1.AMIReference{
ID: aws.String("ami-123456"),
},
}, []byte(clusterApiTestUserData), nil)
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(aws.String("ami-123456"), nil)
ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil)

ms.AWSMachinePool.Status.LaunchTemplateID = "lt-foo123"
ms.AWSMachinePool.Status.LaunchTemplateVersion = aws.String("1")

// User data changed, so a new launch template version should be created
ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Eq("lt-foo123")).Return(nil)
ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Eq("lt-foo123"), gomock.Any(), gomock.Eq(aws.String("ami-123456")), gomock.Eq([]byte(strings.Replace(clusterApiTestUserData, "token: 31o123.abcd4lsoz1ruwxyz", "--unparseable--", 1)))).Return(nil)
ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Eq("lt-foo123")).Return("2", nil)

instanceRefreshTriggered := false
ec2Service.ReconcileLaunchTemplate(ms, func() (bool, error) { return true, nil }, func() error {
instanceRefreshTriggered = true
return nil
}, ec2Svc)
// If we cannot parse the bootstrap token out of the user data, we should go the safe route and
// not trigger an instance refresh (since that may lead to re-creating nodes all the time, meaning
// workload application downtime).
g.Expect(instanceRefreshTriggered).To(BeFalse())
})
})
}

//TODO: This was taken from awsmachine_controller_test, i think it should be moved to elsewhere in both locations like test/helpers
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloud/scope/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func (m *MachinePoolScope) getBootstrapData() ([]byte, string, error) {
key := types.NamespacedName{Namespace: m.Namespace(), Name: *m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName}

if err := m.Client.Get(context.TODO(), key, secret); err != nil {
return nil, "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSMachine %s/%s", m.Namespace(), m.Name())
return nil, "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSMachinePool %s/%s", m.Namespace(), m.Name())
}

value, ok := secret.Data["value"]
Expand Down
Loading