Skip to content

Commit

Permalink
add support for decommissioning a tenant (#1044)
Browse files Browse the repository at this point in the history
* add support for decommissioning a tenant
* Fixes to pool decomissioning (#1)
Co-authored-by: Daniel Valdivia <[email protected]>
  • Loading branch information
harshavardhana authored Mar 24, 2022
1 parent ae7a32a commit 8891a6c
Show file tree
Hide file tree
Showing 15 changed files with 349 additions and 173 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/kubernetes-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -281,14 +281,14 @@ jobs:
with:
path: |
minio-operator
key: ${{ runner.os }}-operator-${{ hashFiles('minio-operator') }}
key: ${{ runner.os }}-operator-${{ github.run_id }}
restore-keys: |
${{ runner.os }}-go-
- uses: actions/cache@v2
with:
path: |
logsearchapi-bin
key: ${{ runner.os }}-lsa-${{ hashFiles('logsearchapi-bin') }}
key: ${{ runner.os }}-lsa-${{ github.run_id }}
restore-keys: |
${{ runner.os }}-go-
- uses: actions/setup-go@v2
Expand Down Expand Up @@ -328,14 +328,14 @@ jobs:
with:
path: |
minio-operator
key: ${{ runner.os }}-operator-${{ hashFiles('minio-operator') }}
key: ${{ runner.os }}-operator-${{ github.run_id }}
restore-keys: |
${{ runner.os }}-go-
- uses: actions/cache@v2
with:
path: |
logsearchapi-bin
key: ${{ runner.os }}-lsa-${{ hashFiles('logsearchapi-bin') }}
key: ${{ runner.os }}-lsa-${{ github.run_id }}
restore-keys: |
${{ runner.os }}-go-
- uses: actions/setup-go@v2
Expand Down Expand Up @@ -375,14 +375,14 @@ jobs:
with:
path: |
minio-operator
key: ${{ runner.os }}-operator-${{ hashFiles('minio-operator') }}
key: ${{ runner.os }}-operator-${{ github.run_id }}
restore-keys: |
${{ runner.os }}-go-
- uses: actions/cache@v2
with:
path: |
logsearchapi-bin
key: ${{ runner.os }}-lsa-${{ hashFiles('logsearchapi-bin') }}
key: ${{ runner.os }}-lsa-${{ github.run_id }}
restore-keys: |
${{ runner.os }}-go-
- uses: actions/setup-go@v2
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ kubectl-minio/kubectl-minio
*.zip
kubectl-minio/crds
logsearchapi/logsearchapi
logsearchapi-bin
*.log
.vscode
minio.yaml
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ getdeps:

verify: getdeps govet gotest lint

operator:
operator: verify
@CGO_ENABLED=0 GOOS=linux go build -trimpath --ldflags $(LDFLAGS) -o minio-operator

docker:
@docker build -t $(TAG) .
docker: operator logsearchapi
@docker build --no-cache -t $(TAG) .

build: regen-crd verify plugin logsearchapi operator docker

Expand Down
48 changes: 48 additions & 0 deletions docs/DECOMISSION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
## Decommission a Pool in Operator

### Decommission using `mc`

First you need to pick a pool that you need to decommission.
```
mc admin decommission start myminio/ https://minio-ss-0-{0...3}.minio-hl..svc.cluster.local/export{0...3}
```

Then monitor the status of the decommissioned pool
```
mc admin decom status myminio/
```

More details documentation available [here](https://docs.min.io/minio/baremetal/installation/decommission-pool.html#minio-decommissioning)

### Update `tenant.yaml`

After the pool says completed you can remove the pool from your `tenant.yaml` and apply the change using `kubectl apply -f <tenant.yaml>`.

#### Caveats

Tenant CRD does not mandate `spec.pools[].Name` to be non-empty, however to safely perform decommission of a `pool` it is mandatory to have a `spec.pools[].Name`. if a `spec.pools[].Name` is empty for any `pool` removal of that `pool` is rejected. Following changes are necessary in such scenarios to proceed with removal of the `pool`.

First figure out what is the current autogenerated `ssName` (statefulset name) of the individual pools.

```
~ kubectl get workers -n ns-1 all-nodes -o json | jq .status.pools[].ssName
"worker-ss-0"
"worker-ss-1"
```

In this case your tenant name is `worker` and pools are `ss-0` and `ss-1`, proceed to make following changes to your `tenant.yaml`


```yaml
spec:
pools:
...
- name: "ss-0"
...
...
- name: "ss-1"
...
...
```

**NOTE: This applies to helm chart users as well**
2 changes: 2 additions & 0 deletions examples/kustomization/base/tenant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ spec:
## For standalone mode, supply 1. For distributed mode, supply 4 or more.
## Note that the operator does not support upgrading from standalone to distributed mode.
- servers: 4
## custom name for the pool
name: pool-0
## volumesPerServer specifies the number of volumes attached per MinIO Tenant Pod / Server.
volumesPerServer: 4
## nodeSelector parameters for MinIO Pods. It specifies a map of key-value pairs. For the pod to be
Expand Down
2 changes: 2 additions & 0 deletions examples/kustomization/tenant-lite/tenant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ spec:
## For standalone mode, supply 1. For distributed mode, supply 4 or more.
## Note that the operator does not support upgrading from standalone to distributed mode.
- servers: 4
## custom pool name
name: pool-0
## volumesPerServer specifies the number of volumes attached per MinIO Tenant Pod / Server.
volumesPerServer: 2
## This VolumeClaimTemplate is used across all the volumes provisioned for MinIO Tenant in this Pool.
Expand Down
2 changes: 2 additions & 0 deletions examples/kustomization/tenant-tiny/tenant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ spec:
## For standalone mode, supply 1. For distributed mode, supply 4 or more.
## Note that the operator does not support upgrading from standalone to distributed mode.
- servers: 1
## custom pool name
name: pool-0
## volumesPerServer specifies the number of volumes attached per MinIO Tenant Pod / Server.
volumesPerServer: 4
## This VolumeClaimTemplate is used across all the volumes provisioned for MinIO Tenant in this
Expand Down
1 change: 1 addition & 0 deletions helm/tenant/templates/tenant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ spec:
pools:
{{ range (dig "pools" (list) .) }}
- servers: {{ dig "servers" 4 . }}
name: {{ dig "name" "" . }}
volumesPerServer: {{ dig "volumesPerServer" 4 . }}
volumeClaimTemplate:
metadata:
Expand Down
2 changes: 2 additions & 0 deletions helm/tenant/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ tenants:
## For standalone mode, supply 1. For distributed mode, supply 4 or more.
## Note that the operator does not support upgrading from standalone to distributed mode.
- servers: 4
## custom name for the pool
name: pool-0
## volumesPerServer specifies the number of volumes attached per MinIO Tenant Pod / Server.
volumesPerServer: 4
## size specifies the capacity per volume
Expand Down
122 changes: 122 additions & 0 deletions pkg/controller/cluster/decomission.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Copyright (C) 2022, MinIO, Inc.
//
// This code is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License, version 3,
// as published by the Free Software Foundation.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License, version 3,
// along with this program. If not, see <http://www.gnu.org/licenses/>

package cluster

import (
"context"
"errors"
"fmt"

"github.com/minio/minio-go/v7/pkg/set"
miniov2 "github.com/minio/operator/pkg/apis/minio.min.io/v2"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
)

// checkForPoolDecommission validates the spec of the tenant and it's status to detect a pool being removed
func (c *Controller) checkForPoolDecommission(ctx context.Context, key string, tenant *miniov2.Tenant, tenantConfiguration map[string][]byte) (*miniov2.Tenant, error) {
var err error
// if the number of pools in the spec is less that what we know in the status, a decomission is taking place
if len(tenant.Status.Pools) > len(tenant.Spec.Pools) {
// check for empty pool names
var noDecom bool
for _, pool := range tenant.Spec.Pools {
if pool.Name != "" {
continue
} // pool.Name empty decommission is not allowed.
noDecom = true
break
}
if noDecom {
klog.Warningf("%s Detected we are removing a pool but spec.Pool[].Name is empty - disallowing removal", key)
if tenant, err = c.updateTenantStatus(ctx, tenant, StatusDecommissioningNotAllowed, 0); err != nil {
return nil, err
}
return nil, errors.New("removing pool not allowed")
}
// Check for duplicate names
var noDecomCommon bool
commonNames := set.NewStringSet()
for _, pool := range tenant.Spec.Pools {
if commonNames.Contains(pool.Name) {
noDecomCommon = true
break
}
commonNames.Add(pool.Name)
}
if noDecomCommon {
klog.Warningf("%s Detected we are removing a pool but spec.Pool[].Name's are duplicated - disallowing removal", key)
return nil, errors.New("removing pool not allowed")
}

klog.Infof("%s Detected we are removing a pool", key)
// This means we are attempting to remove a "pool", perhaps after a decommission event.
var poolNamesRemoved []string
var initializedPool miniov2.Pool
for i, pstatus := range tenant.Status.Pools {
var found bool
for _, pool := range tenant.Spec.Pools {
if pstatus.SSName == tenant.PoolStatefulsetName(&pool) {
found = true
if pstatus.State == miniov2.PoolInitialized {
initializedPool = pool
}
continue
}
}
if !found {
poolNamesRemoved = append(poolNamesRemoved, pstatus.SSName)
tenant.Status.Pools = append(tenant.Status.Pools[:i], tenant.Status.Pools[i+1:]...)
}
}

var restarted bool
// Only restart if there is an initialized pool to fetch the new args.
if len(poolNamesRemoved) > 0 && initializedPool.Name != "" {
// Restart services to get new args since we are shrinking the deployment here.
if err := c.restartInitializedPool(ctx, tenant, initializedPool, tenantConfiguration); err != nil {
return nil, err
}
metaNowTime := metav1.Now()
tenant.Status.WaitingOnReady = &metaNowTime
tenant.Status.CurrentState = StatusRestartingMinIO
if tenant, err = c.updatePoolStatus(ctx, tenant); err != nil {
klog.Infof("'%s' Can't update tenant status: %v", key, err)
return nil, err
}
klog.Infof("'%s' was restarted", key)
restarted = true
}

for _, ssName := range poolNamesRemoved {
c.RegisterEvent(ctx, tenant, corev1.EventTypeNormal, "PoolRemoved", fmt.Sprintf("Tenant pool %s removed", ssName))
if err = c.kubeClientSet.AppsV1().StatefulSets(tenant.Namespace).Delete(ctx, ssName, metav1.DeleteOptions{}); err != nil {
if k8serrors.IsNotFound(err) {
continue
}
return nil, err
}
}

if restarted {
return nil, ErrMinIORestarting
}

return nil, nil
}
return tenant, err
}
Loading

0 comments on commit 8891a6c

Please sign in to comment.