Skip to content

Commit

Permalink
Use CMC as default config when set (#50039)
Browse files Browse the repository at this point in the history
* autoupdate: Use CMC as default config when set

Part of: [RFD-184](#47126)

This commit implements backward compatibility when CMC is specified.
After this PR, if the user has no `autoupdate_config` resource but a
`cluster_maintenance_config` resource from RFD 109, we will use the CMC
to generate the config (update hour and update days) and craft the
`autoupdate_agent_rollout`.

* Update lib/autoupdate/rollout/client_test.go

Co-authored-by: Edoardo Spadolini <[email protected]>

* address feedback

* lint

---------

Co-authored-by: Edoardo Spadolini <[email protected]>
  • Loading branch information
hugoShaka and espadolini authored Dec 13, 2024
1 parent b0a6972 commit fe89b6d
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 11 deletions.
4 changes: 4 additions & 0 deletions lib/autoupdate/rollout/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"context"

autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
)

// Client is the subset of the Teleport client RPCs the controller needs.
Expand All @@ -43,4 +44,7 @@ type Client interface {

// DeleteAutoUpdateAgentRollout deletes the AutoUpdateAgentRollout singleton resource.
DeleteAutoUpdateAgentRollout(ctx context.Context) error

// GetClusterMaintenanceConfig loads the current maintenance config singleton.
GetClusterMaintenanceConfig(ctx context.Context) (types.ClusterMaintenanceConfig, error)
}
40 changes: 40 additions & 0 deletions lib/autoupdate/rollout/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ import (

"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/protoadapt"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
apiutils "github.com/gravitational/teleport/api/utils"
)

// mockClient is a mock implementation if the Client interface for testing purposes.
Expand All @@ -39,6 +42,7 @@ type mockClient struct {
createAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
updateAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
deleteAutoUpdateAgentRollout *deleteHandler
getClusterMaintenanceConfig *legacyGetHandler[*types.ClusterMaintenanceConfigV1]
}

func (m mockClient) GetAutoUpdateConfig(ctx context.Context) (*autoupdate.AutoUpdateConfig, error) {
Expand All @@ -65,13 +69,18 @@ func (m mockClient) DeleteAutoUpdateAgentRollout(ctx context.Context) error {
return m.deleteAutoUpdateAgentRollout.handle(ctx)
}

func (m mockClient) GetClusterMaintenanceConfig(ctx context.Context) (types.ClusterMaintenanceConfig, error) {
return m.getClusterMaintenanceConfig.handle(ctx)
}

func (m mockClient) checkIfEmpty(t *testing.T) {
require.True(t, m.getAutoUpdateConfig.isEmpty(), "Get autoupdate_config mock not empty")
require.True(t, m.getAutoUpdateVersion.isEmpty(), "Get autoupdate_version mock not empty")
require.True(t, m.getAutoUpdateAgentRollout.isEmpty(), "Get autoupdate_agent_rollout mock not empty")
require.True(t, m.createAutoUpdateAgentRollout.isEmpty(), "Create autoupdate_agent_rollout mock not empty")
require.True(t, m.updateAutoUpdateAgentRollout.isEmpty(), "Update autoupdate_agent_rollout mock not empty")
require.True(t, m.deleteAutoUpdateAgentRollout.isEmpty(), "Delete autoupdate_agent_rollout mock not empty")
require.True(t, m.getClusterMaintenanceConfig.isEmpty(), "Get cluster_maintenance config mock not empty")
}

func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient {
Expand All @@ -86,6 +95,7 @@ func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient {
createAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.createRolloutExpects, stubs.createRolloutAnswers},
updateAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.updateRolloutExpects, stubs.updateRolloutAnswers},
deleteAutoUpdateAgentRollout: &deleteHandler{t, stubs.deleteRolloutAnswers},
getClusterMaintenanceConfig: &legacyGetHandler[*types.ClusterMaintenanceConfigV1]{t, stubs.cmcAnswers},
}
}

Expand All @@ -98,6 +108,7 @@ type mockClientStubs struct {
updateRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout]
updateRolloutExpects []require.ValueAssertionFunc
deleteRolloutAnswers []error
cmcAnswers []callAnswer[*types.ClusterMaintenanceConfigV1]
}

type callAnswer[T any] struct {
Expand Down Expand Up @@ -131,6 +142,35 @@ func (h *getHandler[T]) isEmpty() bool {
return len(h.answers) == 0
}

// legacyGetHandler is a getHandler for legacy teleport types (gogo proto-based)
// A first iteration was trying to be smart and reuse the getHandler logic
// by converting fixtures before to protoadapt.MessageV2, and converting back to
// protoadapt.MessageV1 before returning. The resulting code was hard to read and
// duplicating the logic seems more maintainable.
type legacyGetHandler[T protoadapt.MessageV1] struct {
t *testing.T
answers []callAnswer[T]
}

func (h *legacyGetHandler[T]) handle(_ context.Context) (T, error) {
if len(h.answers) == 0 {
require.Fail(h.t, "no answers left")
}

entry := h.answers[0]
h.answers = h.answers[1:]

// We need to deep copy because the reconciler might do updates in place.
// We don't want the original resource to be edited as this would mess with other tests.
result := apiutils.CloneProtoMsg(entry.result)
return result, entry.err
}

// isEmpty returns true only if all stubs were consumed
func (h *legacyGetHandler[T]) isEmpty() bool {
return len(h.answers) == 0
}

// createUpdateHandler is used in a mock client to answer create or update resource requests during tests (any request whose arity is 2).
// It first validates the input using the provided validation function, then it returns the predefined answer and error.
// If there are no stubs left it fails the test.
Expand Down
53 changes: 44 additions & 9 deletions lib/autoupdate/rollout/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
update "github.com/gravitational/teleport/api/types/autoupdate"
"github.com/gravitational/teleport/api/utils"
)
Expand All @@ -40,8 +41,9 @@ const (
defaultStrategy = update.AgentsStrategyHaltOnError
maxConflictRetry = 3

defaultGroupName = "default"
defaultStartHour = 12
defaultGroupName = "default"
defaultCMCGroupName = defaultGroupName + "-cmc"
defaultStartHour = 12
)

var (
Expand Down Expand Up @@ -294,15 +296,15 @@ func (r *reconciler) computeStatus(
groups := status.GetGroups()
var err error
if len(groups) == 0 {
groups, err = makeGroupsStatus(configSchedules, now)
groups, err = r.makeGroupsStatus(ctx, configSchedules, now)
if err != nil {
return nil, trace.Wrap(err, "creating groups status")
}
}

err = r.progressRollout(ctx, newSpec.GetStrategy(), groups)
// Failing to progress the update is not a hard failure.
// We expected to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user.
// We want to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user.
if err != nil {
r.log.ErrorContext(ctx, "Errors encountered during rollout progress. Some groups might not get updated properly.",
"error", err)
Expand All @@ -328,10 +330,10 @@ func (r *reconciler) progressRollout(ctx context.Context, strategyName string, g

// makeGroupStatus creates the autoupdate_agent_rollout.status.groups based on the autoupdate_config.
// This should be called if the status groups have not been initialized or must be reset.
func makeGroupsStatus(schedules *autoupdate.AgentAutoUpdateSchedules, now time.Time) ([]*autoupdate.AutoUpdateAgentRolloutStatusGroup, error) {
func (r *reconciler) makeGroupsStatus(ctx context.Context, schedules *autoupdate.AgentAutoUpdateSchedules, now time.Time) ([]*autoupdate.AutoUpdateAgentRolloutStatusGroup, error) {
configGroups := schedules.GetRegular()
if len(configGroups) == 0 {
defaultGroup, err := defaultConfigGroup()
defaultGroup, err := r.defaultConfigGroup(ctx)
if err != nil {
return nil, trace.Wrap(err, "retrieving default group")
}
Expand All @@ -357,12 +359,45 @@ func makeGroupsStatus(schedules *autoupdate.AgentAutoUpdateSchedules, now time.T
// defaultConfigGroup returns the default group in case of missing autoupdate_config resource.
// This is a function and not a variable because we will need to add more logic there in the future
// lookup maintenance information from RFD 109's cluster_maintenance_config.
func defaultConfigGroup() (*autoupdate.AgentAutoUpdateGroup, error) {
// TODO: get group from CMC if possible
func (r *reconciler) defaultConfigGroup(ctx context.Context) (*autoupdate.AgentAutoUpdateGroup, error) {
cmc, err := r.clt.GetClusterMaintenanceConfig(ctx)
if err != nil {
if trace.IsNotFound(err) {
// There's no CMC, we return the default group.
return defaultGroup(), nil
}

// If we had an error, and it's not trace.ErrNotFound, we stop.
return nil, trace.Wrap(err, "retrieving the cluster maintenance config")
}
// We got a CMC, we generate the default from it.
upgradeWindow, ok := cmc.GetAgentUpgradeWindow()

if !ok {
// The CMC is here but does not contain upgrade window.
return defaultGroup(), nil
}

weekdays := upgradeWindow.Weekdays
// A CMC upgrade window not specifying weekdays should update every day.
if len(weekdays) == 0 {
weekdays = []string{types.Wildcard}
}

return &autoupdate.AgentAutoUpdateGroup{
Name: defaultCMCGroupName,
Days: weekdays,
StartHour: int32(upgradeWindow.UTCStartHour),
WaitDays: 0,
}, nil

}

func defaultGroup() *autoupdate.AgentAutoUpdateGroup {
return &autoupdate.AgentAutoUpdateGroup{
Name: defaultGroupName,
Days: defaultUpdateDays,
StartHour: defaultStartHour,
WaitDays: 0,
}, nil
}
}
109 changes: 107 additions & 2 deletions lib/autoupdate/rollout/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
update "github.com/gravitational/teleport/api/types/autoupdate"
apiutils "github.com/gravitational/teleport/api/utils"
"github.com/gravitational/teleport/lib/backend"
Expand Down Expand Up @@ -576,6 +577,7 @@ func TestReconciler_Reconcile(t *testing.T) {

func Test_makeGroupsStatus(t *testing.T) {
now := time.Now()
ctx := context.Background()

tests := []struct {
name string
Expand Down Expand Up @@ -680,7 +682,15 @@ func Test_makeGroupsStatus(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := makeGroupsStatus(tt.schedules, now)
// We craft a mock client always answering there's no cmc.
// It's not the point of this test to check the cmc client usage so we don't count the number of calls here.
// CMC-specific tests happen in TestDefaultConfigGroup().
clt := newMockClient(t, mockClientStubs{cmcAnswers: []callAnswer[*types.ClusterMaintenanceConfigV1]{{
result: nil,
err: trace.NotFound("no cmc"),
}}})
r := reconciler{clt: clt}
result, err := r.makeGroupsStatus(ctx, tt.schedules, now)
require.NoError(t, err)
require.Equal(t, tt.expected, result)
})
Expand Down Expand Up @@ -734,7 +744,8 @@ func Test_reconciler_computeStatus(t *testing.T) {
},
},
}
newGroups, err := makeGroupsStatus(schedules, clock.Now())
r := reconciler{}
newGroups, err := r.makeGroupsStatus(ctx, schedules, clock.Now())
require.NoError(t, err)
newStatus := &autoupdate.AutoUpdateAgentRolloutStatus{
Groups: newGroups,
Expand Down Expand Up @@ -863,3 +874,97 @@ func Test_reconciler_computeStatus(t *testing.T) {
})
}
}

func TestDefaultConfigGroup(t *testing.T) {
ctx := context.Background()
testStartHour := 16

tests := []struct {
name string
cmcAnswer callAnswer[*types.ClusterMaintenanceConfigV1]
expectedResult *autoupdate.AgentAutoUpdateGroup
expectError require.ErrorAssertionFunc
}{
{
name: "no CMC",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
nil, trace.NotFound("no cmc"),
},
expectedResult: defaultGroup(),
expectError: require.NoError,
},
{
name: "CMC with no upgrade window",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
&types.ClusterMaintenanceConfigV1{
Spec: types.ClusterMaintenanceConfigSpecV1{
AgentUpgrades: nil,
},
}, nil,
},
expectedResult: defaultGroup(),
expectError: require.NoError,
},
{
name: "CMC with no weekdays",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
&types.ClusterMaintenanceConfigV1{
Spec: types.ClusterMaintenanceConfigSpecV1{
AgentUpgrades: &types.AgentUpgradeWindow{
UTCStartHour: uint32(testStartHour),
Weekdays: nil,
},
},
}, nil,
},
expectedResult: &autoupdate.AgentAutoUpdateGroup{
Name: defaultCMCGroupName,
Days: []string{"*"},
StartHour: int32(testStartHour),
WaitDays: 0,
},
expectError: require.NoError,
},
{
name: "CMC with weekdays",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
&types.ClusterMaintenanceConfigV1{
Spec: types.ClusterMaintenanceConfigSpecV1{
AgentUpgrades: &types.AgentUpgradeWindow{
UTCStartHour: uint32(testStartHour),
Weekdays: everyWeekdayButSunday,
},
},
}, nil,
},
expectedResult: &autoupdate.AgentAutoUpdateGroup{
Name: defaultCMCGroupName,
Days: everyWeekdayButSunday,
StartHour: int32(testStartHour),
WaitDays: 0,
},
expectError: require.NoError,
},
{
name: "unexpected error getting CMC",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
nil, trace.ConnectionProblem(trace.Errorf("oh no"), "connection failed"),
},
expectedResult: nil,
expectError: require.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test setup: loading fixtures.
clt := newMockClient(t, mockClientStubs{cmcAnswers: []callAnswer[*types.ClusterMaintenanceConfigV1]{tt.cmcAnswer}})
r := &reconciler{clt: clt}
// Test execution.
result, err := r.defaultConfigGroup(ctx)
tt.expectError(t, err)
require.Equal(t, tt.expectedResult, result)
// Test validation: the mock client should be empty.
clt.checkIfEmpty(t)
})
}
}

0 comments on commit fe89b6d

Please sign in to comment.