Skip to content

Commit

Permalink
feat(garbage-collector): garbage-collector to remove stale NFS resouc…
Browse files Browse the repository at this point in the history
…es (#80)

* feat(garbage-collector): garbage-collector to remove stale NFS resources.

Garbage collector runs at an interval of 5 minutes and checks the PVC(backend PVC) created by the nfs-provisioner in the nfs-server namespace.
Garbage collector process the PVC from nfs-server namespace having a label "openebs.io/cas-type=nfs-kernel".

To map the backend PVC to NFS PVC(created by a user to provision NFS PV), provisioner adds the following labels to the backend PVC.
- nfs.openebs.io/nfs-pvc-name: <nfs-pvc-name>
- nfs.openebs.io/nfs-pvc-namespace: <nfs-pvc-ns>
- nfs.openebs.io/nfs-pvc-uid: <nfs-pvc-uid>

If backend PVC is missing any label from the above list, then the garbage collector will not process that backend PVC.

If relevant NFS PVC(nfs-pvc-ns/nfs-pvc-name) exists in cluster and having UID value <nfs-pvc-uid> or, garbage collector couldn't find
the relevant NFS PVC due to network/permission issues, then garbage collector will not perform clean-up for that backend PVC.

If relevant NFS PVC doesn't exist for the backend PVC then the garbage collector will check for the relevant NFS PV resource to process the backend PVC.
If NFS PV resource doesn't exist then the garbage collector will remove the resources created for NFS PV, i.e Service, Deployment, backend PVC.

If NFS PV exists for the backend PVC then the garbage collector will not remove the resources for that PV.

--

This commit also modifies the NFS-Provisioner provisioning behavior. While provisioning, the provisioner will wait for 1 minute
to bound backend PVC(created by provisioner for NFS PV). If backend PVC is not bound within 1 minute period then provisioner will
return an error "timed out waiting for PVC{ns/name} to bound".

Signed-off-by: mayank <[email protected]>
  • Loading branch information
mynktl authored Aug 9, 2021
1 parent 3de660a commit 185189b
Show file tree
Hide file tree
Showing 14 changed files with 1,104 additions and 94 deletions.
2 changes: 1 addition & 1 deletion buildscripts/check-diff.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ TEST_NAME=$1


if [[ `git diff --shortstat | wc -l` != 0 ]]; then
echo "Some files got changed after $1";printf "\n";git diff --stat;printf "\n"; exit 1;
echo "Some files got changed after $1";printf "\n";git --no-pager diff;printf "\n"; exit 1;
fi
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ require (
github.com/prometheus/client_golang v1.0.0
github.com/spf13/cobra v1.1.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.4.0
k8s.io/api v0.17.3
k8s.io/apimachinery v0.17.3
k8s.io/client-go v11.0.0+incompatible
Expand Down
170 changes: 170 additions & 0 deletions provisioner/garbage_collector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
Copyright 2021 The OpenEBS Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package provisioner

import (
"fmt"
"time"

mayav1alpha1 "github.com/openebs/maya/pkg/apis/openebs.io/v1alpha1"
errors "github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/klog"
)

var (
// GarbageCollectorInterval defines periodic interval to run garbage collector
GarbageCollectorInterval = 5 * time.Minute
)

func RunGarbageCollector(client kubernetes.Interface, pvTracker ProvisioningTracker, ns string, stopCh <-chan struct{}) {
// NewTicker sends tick only after mentioned interval.
// So to ensure that the garbage collector gets executed at the beginning,
// we are running it here.
klog.V(4).Infof("Running garbage collector for stale NFS resources")
err := cleanUpStalePvc(client, pvTracker, ns)
klog.V(4).Infof("Garbage collection completed for stale NFS resources with error=%v", err)

ticker := time.NewTicker(GarbageCollectorInterval)

for {
select {
case <-stopCh:
ticker.Stop()
return
case <-ticker.C:
klog.V(4).Infof("Running garbage collector for stale NFS resources")
err = cleanUpStalePvc(client, pvTracker, ns)
klog.V(4).Infof("Garbage collection completed for stale NFS resources with error=%v", err)
}
}
}

func cleanUpStalePvc(client kubernetes.Interface, pvTracker ProvisioningTracker, ns string) error {
backendPvcLabel := fmt.Sprintf("%s=%s", mayav1alpha1.CASTypeKey, "nfs-kernel")
pvcList, err := client.CoreV1().PersistentVolumeClaims(ns).List(metav1.ListOptions{LabelSelector: backendPvcLabel})
if err != nil {
klog.Errorf("Failed to list PVC, err=%s", err)
return err
}

for _, pvc := range pvcList.Items {
pvcExists, err := nfsPvcExists(client, pvc)
if err != nil {
// failed to check NFS PVC existence,
// will check in next retry
klog.Errorf("Failed to check NFS PVC for backendPVC=%s/%s, err=%v", ns, pvc.Name, err)
continue
}

if pvcExists {
// NFS PVC exists for backend PVC
continue
}

// check if NFS PV exists for this PVC or not
nfsPvName := ""
fmt.Sscanf(pvc.Name, "nfs-%s", &nfsPvName)
if nfsPvName == "" {
continue
}

if pvTracker.Inprogress(nfsPvName) {
// provisioner is processing request for this PV
continue
}

pvExists, err := pvExists(client, nfsPvName)
if err != nil {
// failed to check pv existence, will check in next retry
klog.Errorf("Failed to check NFS PV for backendPVC=%s/%s, err=%v", ns, pvc.Name, err)
continue
}

if pvExists {
// Relevant NFS PV exists for backend PVC
continue
}

// perform cleanup for stale NFS resource for this backend PVC
err = deleteBackendStaleResources(client, pvc.Namespace, nfsPvName)
if err != nil {
klog.Errorf("Failed to delete NFS resources for backendPVC=%s/%s, err=%v", ns, pvc.Name, err)
}
}

return nil
}

func deleteBackendStaleResources(client kubernetes.Interface, nfsServerNs, nfsPvName string) error {
klog.Infof("Deleting stale resources for PV=%s", nfsPvName)

p := &Provisioner{
kubeClient: client,
serverNamespace: nfsServerNs,
}

nfsServerOpts := &KernelNFSServerOptions{
pvName: nfsPvName,
}

return p.deleteNFSServer(nfsServerOpts)
}

func nfsPvcExists(client kubernetes.Interface, backendPvcObj corev1.PersistentVolumeClaim) (bool, error) {
nfsPvcName, nameExists := backendPvcObj.Labels[nfsPvcNameLabelKey]
nfsPvcNs, nsExists := backendPvcObj.Labels[nfsPvcNsLabelKey]
nfsPvcUID, uidExists := backendPvcObj.Labels[nfsPvcUIDLabelKey]

if !nameExists || !nsExists || !uidExists {
return false, errors.New("backend PVC doesn't have sufficient information of nfs pvc")
}

pvcObj, err := client.CoreV1().PersistentVolumeClaims(nfsPvcNs).Get(nfsPvcName, metav1.GetOptions{})
if err != nil {
if !k8serrors.IsNotFound(err) {
// couldn't get the nfs pvc information due to network error or
// we don't have permission to fetch pvc from user namespace
return false, err
}
return false, nil
}

if nfsPvcUID != string(pvcObj.UID) {
klog.Infof("different UID=%s actual=%s", nfsPvcUID, string(pvcObj.UID))
// pvc is having different UID than nfs PVC, so
// original nfs pvc is deleted
return false, nil
}

return true, nil
}

func pvExists(client kubernetes.Interface, pvName string) (bool, error) {
_, err := client.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
if err == nil {
return true, nil
}

if k8serrors.IsNotFound(err) {
return false, nil
}
return false, err
}
Loading

0 comments on commit 185189b

Please sign in to comment.