Skip to content

Commit

Permalink
Improving the status message when it is stuck
Browse files Browse the repository at this point in the history
  • Loading branch information
cniackz committed Jun 20, 2024
1 parent fd7ede7 commit 8a41a66
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 24 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ examples/**/obj/
public.crt
go_build_operator_
operator.iml
.run/
59 changes: 38 additions & 21 deletions pkg/controller/main-controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,27 +89,33 @@ const (
MessageResourceExists = "Resource %q already exists and is not managed by MinIO Operator"
)

// Standard Events for Tenant
const (
UsersCreatedFailedReason = "UsersCreatedFailed"
)

// Standard Status messages for Tenant
const (
StatusInitialized = "Initialized"
StatusProvisioningCIService = "Provisioning MinIO Cluster IP Service"
StatusProvisioningHLService = "Provisioning MinIO Headless Service"
StatusProvisioningStatefulSet = "Provisioning MinIO Statefulset"
StatusProvisioningConsoleService = "Provisioning Console Service"
StatusProvisioningKESStatefulSet = "Provisioning KES StatefulSet"
StatusProvisioningInitialUsers = "Provisioning initial users"
StatusProvisioningDefaultBuckets = "Provisioning default buckets"
StatusWaitingMinIOCert = "Waiting for MinIO TLS Certificate"
StatusWaitingMinIOClientCert = "Waiting for MinIO TLS Client Certificate"
StatusWaitingKESCert = "Waiting for KES TLS Certificate"
StatusUpdatingMinIOVersion = "Updating MinIO Version"
StatusUpdatingKES = "Updating KES"
StatusNotOwned = "Statefulset not controlled by operator"
StatusFailedAlreadyExists = "Another MinIO Tenant already exists in the namespace"
StatusTenantCredentialsNotSet = "Tenant credentials are not set properly"
StatusInconsistentMinIOVersions = "Different versions across MinIO Pools"
StatusRestartingMinIO = "Restarting MinIO"
StatusDecommissioningNotAllowed = "Pool Decommissioning Not Allowed"
StatusInitialized = "Initialized"
StatusProvisioningCIService = "Provisioning MinIO Cluster IP Service"
StatusProvisioningHLService = "Provisioning MinIO Headless Service"
StatusProvisioningStatefulSet = "Provisioning MinIO Statefulset"
StatusProvisioningConsoleService = "Provisioning Console Service"
StatusProvisioningKESStatefulSet = "Provisioning KES StatefulSet"
StatusProvisioningInitialUsers = "Provisioning initial users"
StatusProvisioningInitialUsersStuck = "Provisioning initial users probably stuck, check k8s events"
StatusProvisioningDefaultBuckets = "Provisioning default buckets"
StatusWaitingMinIOCert = "Waiting for MinIO TLS Certificate"
StatusWaitingMinIOClientCert = "Waiting for MinIO TLS Client Certificate"
StatusWaitingKESCert = "Waiting for KES TLS Certificate"
StatusUpdatingMinIOVersion = "Updating MinIO Version"
StatusUpdatingKES = "Updating KES"
StatusNotOwned = "Statefulset not controlled by operator"
StatusFailedAlreadyExists = "Another MinIO Tenant already exists in the namespace"
StatusTenantCredentialsNotSet = "Tenant credentials are not set properly"
StatusInconsistentMinIOVersions = "Different versions across MinIO Pools"
StatusRestartingMinIO = "Restarting MinIO"
StatusDecommissioningNotAllowed = "Pool Decommissioning Not Allowed"
)

// ErrMinIONotReady is the error returned when MinIO is not Ready
Expand Down Expand Up @@ -1316,9 +1322,20 @@ func (c *Controller) syncHandler(key string) (Result, error) {

// Ensure we are only provisioning users one time
if !tenant.Status.ProvisionedUsers && len(tenant.Spec.Users) > 0 {
if err := c.createUsers(ctx, tenant, tenantConfiguration); err != nil {
events, _ := c.kubeClientSet.CoreV1().Events(tenant.Namespace).List(ctx, metav1.ListOptions{TypeMeta: metav1.TypeMeta{Kind: "Pod"}})
counter := int32(0)
for _, event := range events.Items {
reason := event.Reason
if reason == UsersCreatedFailedReason {
counter = counter + 1
}
if event.Count > counter {
counter = event.Count
}
}
if err := c.createUsers(ctx, tenant, tenantConfiguration, counter); err != nil {
klog.V(2).Infof("Unable to create MinIO users: %v", err)
c.recorder.Event(tenant, corev1.EventTypeWarning, "UsersCreatedFailed", fmt.Sprintf("Users creation failed: %s", err))
c.recorder.Event(tenant, corev1.EventTypeWarning, UsersCreatedFailedReason, fmt.Sprintf("Users creation failed: %s", err))
// retry after 5sec
return WrapResult(Result{RequeueAfter: time.Second * 5}, nil)
}
Expand Down
14 changes: 11 additions & 3 deletions pkg/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ func (c *Controller) checkOpenshiftSignerCACertInOperatorNamespace(ctx context.C
return nil
}

func (c *Controller) createUsers(ctx context.Context, tenant *miniov2.Tenant, tenantConfiguration map[string][]byte) (err error) {
func (c *Controller) createUsers(ctx context.Context, tenant *miniov2.Tenant, tenantConfiguration map[string][]byte, counter int32) (err error) {
defer func() {
if err == nil {
if _, err = c.updateProvisionedUsersStatus(ctx, tenant, true); err != nil {
Expand All @@ -421,8 +421,16 @@ func (c *Controller) createUsers(ctx context.Context, tenant *miniov2.Tenant, te
return nil
}

if _, err = c.updateTenantStatus(ctx, tenant, StatusProvisioningInitialUsers, 0); err != nil {
return err
// Count the events where the creation of user(s) failed.
// If there are many such events, dynamically change the status of the tenant for better understanding.
if counter > 3 {
if _, err = c.updateTenantStatus(ctx, tenant, StatusProvisioningInitialUsersStuck, 0); err != nil {
return err
}
} else {
if _, err = c.updateTenantStatus(ctx, tenant, StatusProvisioningInitialUsers, 0); err != nil {
return err
}
}

// get a new admin client
Expand Down

0 comments on commit 8a41a66

Please sign in to comment.