diff --git a/pkg/apis/deployment/v1alpha/server_group_spec.go b/pkg/apis/deployment/v1alpha/server_group_spec.go index d8393c77f..7b21c2ae7 100644 --- a/pkg/apis/deployment/v1alpha/server_group_spec.go +++ b/pkg/apis/deployment/v1alpha/server_group_spec.go @@ -77,6 +77,7 @@ func (s ServerGroupSpec) Validate(group ServerGroup, used bool, mode DeploymentM if used { minCount := 1 if env == EnvironmentProduction { + // Set validation boundaries for production mode switch group { case ServerGroupSingle: if mode == DeploymentModeActiveFailover { @@ -87,6 +88,16 @@ func (s ServerGroupSpec) Validate(group ServerGroup, used bool, mode DeploymentM case ServerGroupDBServers, ServerGroupCoordinators, ServerGroupSyncMasters, ServerGroupSyncWorkers: minCount = 2 } + } else { + // Set validation boundaries for development mode + switch group { + case ServerGroupSingle: + if mode == DeploymentModeActiveFailover { + minCount = 2 + } + case ServerGroupDBServers: + minCount = 2 + } } if s.GetCount() < minCount { return maskAny(errors.Wrapf(ValidationError, "Invalid count value %d. Expected >= %d", s.GetCount(), minCount)) diff --git a/pkg/deployment/cluster_scaling_integration.go b/pkg/deployment/cluster_scaling_integration.go index ce1b69d26..962eee48d 100644 --- a/pkg/deployment/cluster_scaling_integration.go +++ b/pkg/deployment/cluster_scaling_integration.go @@ -33,6 +33,7 @@ import ( api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" "github.com/arangodb/kube-arangodb/pkg/util" "github.com/arangodb/kube-arangodb/pkg/util/arangod" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" ) // clusterScalingIntegration is a helper to communicate with the clusters @@ -150,15 +151,24 @@ func (ci *clusterScalingIntegration) inspectCluster(ctx context.Context, expectS log.Debug().Err(err).Msg("Failed to get current deployment") return maskAny(err) } + newSpec := current.Spec.DeepCopy() if coordinatorsChanged { - current.Spec.Coordinators.Count = util.NewInt(req.GetCoordinators()) + newSpec.Coordinators.Count = util.NewInt(req.GetCoordinators()) } if dbserversChanged { - current.Spec.DBServers.Count = util.NewInt(req.GetDBServers()) - } - if err := ci.depl.updateCRSpec(current.Spec); err != nil { - log.Warn().Err(err).Msg("Failed to update current deployment") - return maskAny(err) + newSpec.DBServers.Count = util.NewInt(req.GetDBServers()) + } + if err := newSpec.Validate(); err != nil { + // Log failure & create event + log.Warn().Err(err).Msg("Validation of updated spec has failed") + ci.depl.CreateEvent(k8sutil.NewErrorEvent("Validation failed", err, apiObject)) + // Restore original spec in cluster + ci.SendUpdateToCluster(current.Spec) + } else { + if err := ci.depl.updateCRSpec(*newSpec); err != nil { + log.Warn().Err(err).Msg("Failed to update current deployment") + return maskAny(err) + } } return nil } diff --git a/pkg/deployment/reconcile/action_cleanout_member.go b/pkg/deployment/reconcile/action_cleanout_member.go index eedefab15..fa6572fff 100644 --- a/pkg/deployment/reconcile/action_cleanout_member.go +++ b/pkg/deployment/reconcile/action_cleanout_member.go @@ -131,6 +131,12 @@ func (a *actionCleanoutMember) CheckProgress(ctx context.Context) (bool, bool, e } if jobStatus.IsFailed() { log.Warn().Str("reason", jobStatus.Reason()).Msg("Cleanout Job failed. Aborting plan") + // Revert cleanout state + m.Phase = api.MemberPhaseCreated + m.CleanoutJobID = "" + if a.actionCtx.UpdateMember(m); err != nil { + return false, false, maskAny(err) + } return false, true, nil } return false, false, nil diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 002f96f69..4440b173d 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -359,7 +359,9 @@ func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api Msg("Creating scale-up plan") } else if len(members) > count { // Note, we scale down 1 member at a time - if m, err := members.SelectMemberToRemove(); err == nil { + if m, err := members.SelectMemberToRemove(); err != nil { + log.Warn().Err(err).Str("role", group.AsRole()).Msg("Failed to select member to remove") + } else { if group == api.ServerGroupDBServers { plan = append(plan, api.NewAction(api.ActionTypeCleanOutMember, group, m.ID),