Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use CMC as default config when set #50039

Merged
merged 4 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/autoupdate/rollout/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"context"

autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
)

// Client is the subset of the Teleport client RPCs the controller needs.
Expand All @@ -43,4 +44,7 @@ type Client interface {

// DeleteAutoUpdateAgentRollout deletes the AutoUpdateAgentRollout singleton resource.
DeleteAutoUpdateAgentRollout(ctx context.Context) error

// GetClusterMaintenanceConfig loads the current maintenance config singleton.
GetClusterMaintenanceConfig(ctx context.Context) (types.ClusterMaintenanceConfig, error)
}
40 changes: 40 additions & 0 deletions lib/autoupdate/rollout/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ import (

"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/protoadapt"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
apiutils "github.com/gravitational/teleport/api/utils"
)

// mockClient is a mock implementation if the Client interface for testing purposes.
Expand All @@ -39,6 +42,7 @@ type mockClient struct {
createAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
updateAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
deleteAutoUpdateAgentRollout *deleteHandler
getClusterMaintenanceConfig *legacyGetHandler[*types.ClusterMaintenanceConfigV1]
}

func (m mockClient) GetAutoUpdateConfig(ctx context.Context) (*autoupdate.AutoUpdateConfig, error) {
Expand All @@ -65,13 +69,18 @@ func (m mockClient) DeleteAutoUpdateAgentRollout(ctx context.Context) error {
return m.deleteAutoUpdateAgentRollout.handle(ctx)
}

func (m mockClient) GetClusterMaintenanceConfig(ctx context.Context) (types.ClusterMaintenanceConfig, error) {
return m.getClusterMaintenanceConfig.handle(ctx)
}

func (m mockClient) checkIfEmpty(t *testing.T) {
require.True(t, m.getAutoUpdateConfig.isEmpty(), "Get autoupdate_config mock not empty")
require.True(t, m.getAutoUpdateVersion.isEmpty(), "Get autoupdate_version mock not empty")
require.True(t, m.getAutoUpdateAgentRollout.isEmpty(), "Get autoupdate_agent_rollout mock not empty")
require.True(t, m.createAutoUpdateAgentRollout.isEmpty(), "Create autoupdate_agent_rollout mock not empty")
require.True(t, m.updateAutoUpdateAgentRollout.isEmpty(), "Update autoupdate_agent_rollout mock not empty")
require.True(t, m.deleteAutoUpdateAgentRollout.isEmpty(), "Delete autoupdate_agent_rollout mock not empty")
require.True(t, m.getClusterMaintenanceConfig.isEmpty(), "Get cluster_maintenance config mock not empty")
}

func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient {
Expand All @@ -86,6 +95,7 @@ func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient {
createAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.createRolloutExpects, stubs.createRolloutAnswers},
updateAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.updateRolloutExpects, stubs.updateRolloutAnswers},
deleteAutoUpdateAgentRollout: &deleteHandler{t, stubs.deleteRolloutAnswers},
getClusterMaintenanceConfig: &legacyGetHandler[*types.ClusterMaintenanceConfigV1]{t, stubs.cmcAnswers},
}
}

Expand All @@ -98,6 +108,7 @@ type mockClientStubs struct {
updateRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout]
updateRolloutExpects []require.ValueAssertionFunc
deleteRolloutAnswers []error
cmcAnswers []callAnswer[*types.ClusterMaintenanceConfigV1]
}

type callAnswer[T any] struct {
Expand Down Expand Up @@ -131,6 +142,35 @@ func (h *getHandler[T]) isEmpty() bool {
return len(h.answers) == 0
}

// legacyGetHandler is a getHandler for legacy teleport types (gogo proto-based)
// A first iteration was trying to be smart and reuse the getHandler logic
// by converting fixtures before to protoadapt.MessageV2, and converting back to
// protoadapt.MessageV1 before returning. The resulting code was hard to read and
// duplicating the logic seems more maintainable.
type legacyGetHandler[T protoadapt.MessageV1] struct {
t *testing.T
answers []callAnswer[T]
}

func (h *legacyGetHandler[T]) handle(_ context.Context) (T, error) {
if len(h.answers) == 0 {
require.Fail(h.t, "no answers left")
}

entry := h.answers[0]
h.answers = h.answers[1:]

// We need to deep copy because the reconciler might do updates in place.
// We don't want the original resource to be edited as this would mess with other tests.
result := apiutils.CloneProtoMsg(entry.result)
return result, entry.err
}

// isEmpty returns true only if all stubs were consumed
func (h *legacyGetHandler[T]) isEmpty() bool {
return len(h.answers) == 0
}

// createUpdateHandler is used in a mock client to answer create or update resource requests during tests (any request whose arity is 2).
// It first validates the input using the provided validation function, then it returns the predefined answer and error.
// If there are no stubs left it fails the test.
Expand Down
53 changes: 44 additions & 9 deletions lib/autoupdate/rollout/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
update "github.com/gravitational/teleport/api/types/autoupdate"
"github.com/gravitational/teleport/api/utils"
)
Expand All @@ -40,8 +41,9 @@ const (
defaultStrategy = update.AgentsStrategyHaltOnError
maxConflictRetry = 3

defaultGroupName = "default"
defaultStartHour = 12
defaultGroupName = "default"
defaultCMCGroupName = defaultGroupName + "-cmc"
defaultStartHour = 12
)

var (
Expand Down Expand Up @@ -294,15 +296,15 @@ func (r *reconciler) computeStatus(
groups := status.GetGroups()
var err error
if len(groups) == 0 {
groups, err = makeGroupsStatus(configSchedules, now)
groups, err = r.makeGroupsStatus(ctx, configSchedules, now)
if err != nil {
return nil, trace.Wrap(err, "creating groups status")
}
}

err = r.progressRollout(ctx, newSpec.GetStrategy(), groups)
// Failing to progress the update is not a hard failure.
// We expected to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user.
// We want to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user.
if err != nil {
r.log.ErrorContext(ctx, "Errors encountered during rollout progress. Some groups might not get updated properly.",
"error", err)
Expand All @@ -328,10 +330,10 @@ func (r *reconciler) progressRollout(ctx context.Context, strategyName string, g

// makeGroupStatus creates the autoupdate_agent_rollout.status.groups based on the autoupdate_config.
// This should be called if the status groups have not been initialized or must be reset.
func makeGroupsStatus(schedules *autoupdate.AgentAutoUpdateSchedules, now time.Time) ([]*autoupdate.AutoUpdateAgentRolloutStatusGroup, error) {
func (r *reconciler) makeGroupsStatus(ctx context.Context, schedules *autoupdate.AgentAutoUpdateSchedules, now time.Time) ([]*autoupdate.AutoUpdateAgentRolloutStatusGroup, error) {
configGroups := schedules.GetRegular()
if len(configGroups) == 0 {
defaultGroup, err := defaultConfigGroup()
defaultGroup, err := r.defaultConfigGroup(ctx)
if err != nil {
return nil, trace.Wrap(err, "retrieving default group")
}
Expand All @@ -357,12 +359,45 @@ func makeGroupsStatus(schedules *autoupdate.AgentAutoUpdateSchedules, now time.T
// defaultConfigGroup returns the default group in case of missing autoupdate_config resource.
// This is a function and not a variable because we will need to add more logic there in the future
// lookup maintenance information from RFD 109's cluster_maintenance_config.
func defaultConfigGroup() (*autoupdate.AgentAutoUpdateGroup, error) {
// TODO: get group from CMC if possible
func (r *reconciler) defaultConfigGroup(ctx context.Context) (*autoupdate.AgentAutoUpdateGroup, error) {
cmc, err := r.clt.GetClusterMaintenanceConfig(ctx)
if err != nil {
if trace.IsNotFound(err) {
// There's no CMC, we return the default group.
return defaultGroup(), nil
}

// If we had an error, and it's not trace.ErrNotFound, we stop.
return nil, trace.Wrap(err, "retrieving the cluster maintenance config")
}
// We got a CMC, we generate the default from it.
upgradeWindow, ok := cmc.GetAgentUpgradeWindow()

if !ok {
// The CMC is here but does not contain upgrade window.
return defaultGroup(), nil
}

weekdays := upgradeWindow.Weekdays
// A CMC upgrade window not specifying weekdays should update every day.
if len(weekdays) == 0 {
weekdays = []string{types.Wildcard}
}

return &autoupdate.AgentAutoUpdateGroup{
Name: defaultCMCGroupName,
Days: weekdays,
StartHour: int32(upgradeWindow.UTCStartHour),
WaitDays: 0,
}, nil

}

func defaultGroup() *autoupdate.AgentAutoUpdateGroup {
return &autoupdate.AgentAutoUpdateGroup{
Name: defaultGroupName,
Days: defaultUpdateDays,
StartHour: defaultStartHour,
WaitDays: 0,
}, nil
}
}
109 changes: 107 additions & 2 deletions lib/autoupdate/rollout/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
update "github.com/gravitational/teleport/api/types/autoupdate"
apiutils "github.com/gravitational/teleport/api/utils"
"github.com/gravitational/teleport/lib/backend"
Expand Down Expand Up @@ -576,6 +577,7 @@ func TestReconciler_Reconcile(t *testing.T) {

func Test_makeGroupsStatus(t *testing.T) {
now := time.Now()
ctx := context.Background()

tests := []struct {
name string
Expand Down Expand Up @@ -680,7 +682,15 @@ func Test_makeGroupsStatus(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := makeGroupsStatus(tt.schedules, now)
// We craft a mock client always answering there's no cmc.
// It's not the point of this test to check the cmc client usage so we don't count the number of calls here.
// CMC-specific tests happen in TestDefaultConfigGroup().
clt := newMockClient(t, mockClientStubs{cmcAnswers: []callAnswer[*types.ClusterMaintenanceConfigV1]{{
result: nil,
err: trace.NotFound("no cmc"),
}}})
r := reconciler{clt: clt}
result, err := r.makeGroupsStatus(ctx, tt.schedules, now)
require.NoError(t, err)
require.Equal(t, tt.expected, result)
})
Expand Down Expand Up @@ -734,7 +744,8 @@ func Test_reconciler_computeStatus(t *testing.T) {
},
},
}
newGroups, err := makeGroupsStatus(schedules, clock.Now())
r := reconciler{}
newGroups, err := r.makeGroupsStatus(ctx, schedules, clock.Now())
require.NoError(t, err)
newStatus := &autoupdate.AutoUpdateAgentRolloutStatus{
Groups: newGroups,
Expand Down Expand Up @@ -863,3 +874,97 @@ func Test_reconciler_computeStatus(t *testing.T) {
})
}
}

func TestDefaultConfigGroup(t *testing.T) {
ctx := context.Background()
testStartHour := 16

tests := []struct {
name string
cmcAnswer callAnswer[*types.ClusterMaintenanceConfigV1]
expectedResult *autoupdate.AgentAutoUpdateGroup
expectError require.ErrorAssertionFunc
}{
{
name: "no CMC",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
nil, trace.NotFound("no cmc"),
},
expectedResult: defaultGroup(),
expectError: require.NoError,
},
{
name: "CMC with no upgrade window",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
&types.ClusterMaintenanceConfigV1{
Spec: types.ClusterMaintenanceConfigSpecV1{
AgentUpgrades: nil,
},
}, nil,
},
expectedResult: defaultGroup(),
expectError: require.NoError,
},
{
name: "CMC with no weekdays",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
&types.ClusterMaintenanceConfigV1{
Spec: types.ClusterMaintenanceConfigSpecV1{
AgentUpgrades: &types.AgentUpgradeWindow{
UTCStartHour: uint32(testStartHour),
Weekdays: nil,
},
},
}, nil,
},
expectedResult: &autoupdate.AgentAutoUpdateGroup{
Name: defaultCMCGroupName,
Days: []string{"*"},
StartHour: int32(testStartHour),
WaitDays: 0,
},
expectError: require.NoError,
},
{
name: "CMC with weekdays",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
&types.ClusterMaintenanceConfigV1{
Spec: types.ClusterMaintenanceConfigSpecV1{
AgentUpgrades: &types.AgentUpgradeWindow{
UTCStartHour: uint32(testStartHour),
Weekdays: everyWeekdayButSunday,
},
},
}, nil,
},
expectedResult: &autoupdate.AgentAutoUpdateGroup{
Name: defaultCMCGroupName,
Days: everyWeekdayButSunday,
StartHour: int32(testStartHour),
WaitDays: 0,
},
expectError: require.NoError,
},
{
name: "unexpected error getting CMC",
cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{
nil, trace.ConnectionProblem(trace.Errorf("oh no"), "connection failed"),
},
expectedResult: nil,
expectError: require.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test setup: loading fixtures.
clt := newMockClient(t, mockClientStubs{cmcAnswers: []callAnswer[*types.ClusterMaintenanceConfigV1]{tt.cmcAnswer}})
r := &reconciler{clt: clt}
// Test execution.
result, err := r.defaultConfigGroup(ctx)
tt.expectError(t, err)
require.Equal(t, tt.expectedResult, result)
// Test validation: the mock client should be empty.
clt.checkIfEmpty(t)
})
}
}
Loading