From 7ab857ba5a3e0a73d33c617e30b351561267a078 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 3 Mar 2020 10:02:58 +0530 Subject: [PATCH 1/3] Revert "Skip snapshot testing in CI" This reverts commit 90fef919d54e85208d0e0a62de967f2dadb3e567. --- e2e/rbd.go | 110 ++++++++++++---------- e2e/utils.go | 251 ++++++++++++++++++++++++++------------------------- 2 files changed, 186 insertions(+), 175 deletions(-) diff --git a/e2e/rbd.go b/e2e/rbd.go index 05a54f714d2..8bf359a2297 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -91,19 +91,31 @@ var _ = Describe("RBD", func() { deleteConfigMap(rbdDirPath) deleteResource(rbdExamplePath + "secret.yaml") deleteResource(rbdExamplePath + "storageclass.yaml") +<<<<<<< HEAD // deleteResource(rbdExamplePath + "snapshotclass.yaml") deleteVault() +======= + deleteResource(rbdExamplePath + "snapshotclass.yaml") +>>>>>>> parent of 90fef919d... Skip snapshot testing in CI }) Context("Test RBD CSI", func() { It("Test RBD CSI", func() { pvcPath := rbdExamplePath + "pvc.yaml" appPath := rbdExamplePath + "pod.yaml" +<<<<<<< HEAD rawPvcPath := rbdExamplePath + "raw-block-pvc.yaml" rawAppPath := rbdExamplePath + "raw-block-pod.yaml" // pvcClonePath := rbdExamplePath + "pvc-restore.yaml" // appClonePath := rbdExamplePath + "pod-restore.yaml" // snapshotPath := rbdExamplePath + "snapshot.yaml" +======= + // rawPvcPath := rbdExamplePath + "raw-block-pvc.yaml" + // rawAppPath := rbdExamplePath + "raw-block-pod.yaml" + pvcClonePath := rbdExamplePath + "pvc-restore.yaml" + appClonePath := rbdExamplePath + "pod-restore.yaml" + snapshotPath := rbdExamplePath + "snapshot.yaml" +>>>>>>> parent of 90fef919d... Skip snapshot testing in CI By("checking provisioner deployment is running") var err error @@ -154,56 +166,54 @@ var _ = Describe("RBD", func() { createRBDStorageClass(f.ClientSet, f, make(map[string]string)) }) - // skipping snapshot testing - - // By("create a PVC clone and Bind it to an app", func() { - // createRBDSnapshotClass(f) - // pvc, err := loadPVC(pvcPath) - // if err != nil { - // Fail(err.Error()) - // } - - // pvc.Namespace = f.UniqueName - // e2elog.Logf("The PVC template %+v", pvc) - // err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) - // if err != nil { - // Fail(err.Error()) - // } - // // validate created backend rbd images - // images := listRBDImages(f) - // if len(images) != 1 { - // e2elog.Logf("backend image count %d expected image count %d", len(images), 1) - // Fail("validate backend image failed") - // } - // snap := getSnapshot(snapshotPath) - // snap.Namespace = f.UniqueName - // snap.Spec.Source.Name = pvc.Name - // snap.Spec.Source.Kind = "PersistentVolumeClaim" - // err = createSnapshot(&snap, deployTimeout) - // if err != nil { - // Fail(err.Error()) - // } - // pool := "replicapool" - // snapList, err := listSnapshots(f, pool, images[0]) - // if err != nil { - // Fail(err.Error()) - // } - // if len(snapList) != 1 { - // e2elog.Logf("backend snapshot not matching kube snap count,snap count = % kube snap count %d", len(snapList), 1) - // Fail("validate backend snapshot failed") - // } - - // validatePVCAndAppBinding(pvcClonePath, appClonePath, f) - - // err = deleteSnapshot(&snap, deployTimeout) - // if err != nil { - // Fail(err.Error()) - // } - // err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) - // if err != nil { - // Fail(err.Error()) - // } - // }) + By("create a PVC clone and Bind it to an app", func() { + createRBDSnapshotClass(f) + pvc, err := loadPVC(pvcPath) + if err != nil { + Fail(err.Error()) + } + + pvc.Namespace = f.UniqueName + e2elog.Logf("The PVC template %+v", pvc) + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + Fail(err.Error()) + } + // validate created backend rbd images + images := listRBDImages(f) + if len(images) != 1 { + e2elog.Logf("backend image count %d expected image count %d", len(images), 1) + Fail("validate backend image failed") + } + snap := getSnapshot(snapshotPath) + snap.Namespace = f.UniqueName + snap.Spec.Source.Name = pvc.Name + snap.Spec.Source.Kind = "PersistentVolumeClaim" + err = createSnapshot(&snap, deployTimeout) + if err != nil { + Fail(err.Error()) + } + pool := "replicapool" + snapList, err := listSnapshots(f, pool, images[0]) + if err != nil { + Fail(err.Error()) + } + if len(snapList) != 1 { + e2elog.Logf("backend snapshot not matching kube snap count,snap count = % kube snap count %d", len(snapList), 1) + Fail("validate backend snapshot failed") + } + + validatePVCAndAppBinding(pvcClonePath, appClonePath, f) + + err = deleteSnapshot(&snap, deployTimeout) + if err != nil { + Fail(err.Error()) + } + err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + Fail(err.Error()) + } + }) By("create a block type PVC and Bind it to an app", func() { validatePVCAndAppBinding(rawPvcPath, rawAppPath, f) diff --git a/e2e/utils.go b/e2e/utils.go index e2b30f91742..b8df90cc6e1 100644 --- a/e2e/utils.go +++ b/e2e/utils.go @@ -9,8 +9,8 @@ import ( "strings" "time" - // _ "github.com/kubernetes-csi/external-snapshotter/pkg/apis/volumesnapshot/v1alpha1" // nolint - // _ "github.com/kubernetes-csi/external-snapshotter/pkg/client/clientset/versioned/typed/volumesnapshot/v1alpha1" // nolint + "github.com/kubernetes-csi/external-snapshotter/pkg/apis/volumesnapshot/v1alpha1" + snapClient "github.com/kubernetes-csi/external-snapshotter/pkg/client/clientset/versioned/typed/volumesnapshot/v1alpha1" . "github.com/onsi/ginkgo" // nolint . "github.com/onsi/gomega" // nolint apps "k8s.io/api/apps/v1" @@ -23,6 +23,7 @@ import ( utilyaml "k8s.io/apimachinery/pkg/util/yaml" "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog" "k8s.io/kubernetes/pkg/client/conditions" "k8s.io/kubernetes/test/e2e/framework" e2elog "k8s.io/kubernetes/test/e2e/framework/log" @@ -38,12 +39,12 @@ const ( var poll = 2 * time.Second -// type snapInfo struct { -// ID int64 `json:"id"` -// Name string `json:"name"` -// Size int64 `json:"size"` -// Timestamp string `json:"timestamp"` -// } +type snapInfo struct { + ID int64 `json:"id"` + Name string `json:"name"` + Size int64 `json:"size"` + Timestamp string `json:"timestamp"` +} func waitForDaemonSets(name, ns string, c clientset.Interface, t int) error { timeout := time.Duration(t) * time.Minute @@ -169,21 +170,21 @@ func getStorageClass(path string) scv1.StorageClass { return sc } -// func getSnapshotClass(path string) v1alpha1.VolumeSnapshotClass { -// sc := v1alpha1.VolumeSnapshotClass{} -// sc.Kind = "VolumeSnapshotClass" -// sc.APIVersion = "snapshot.storage.k8s.io/v1alpha1" -// err := unmarshal(path, &sc) -// Expect(err).Should(BeNil()) -// return sc -// } +func getSnapshotClass(path string) v1alpha1.VolumeSnapshotClass { + sc := v1alpha1.VolumeSnapshotClass{} + sc.Kind = "VolumeSnapshotClass" + sc.APIVersion = "snapshot.storage.k8s.io/v1alpha1" + err := unmarshal(path, &sc) + Expect(err).Should(BeNil()) + return sc +} -// func getSnapshot(path string) v1alpha1.VolumeSnapshot { -// sc := v1alpha1.VolumeSnapshot{} -// err := unmarshal(path, &sc) -// Expect(err).Should(BeNil()) -// return sc -// } +func getSnapshot(path string) v1alpha1.VolumeSnapshot { + sc := v1alpha1.VolumeSnapshot{} + err := unmarshal(path, &sc) + Expect(err).Should(BeNil()) + return sc +} func createCephfsStorageClass(c kubernetes.Interface, f *framework.Framework, enablePool bool) { scPath := fmt.Sprintf("%s/%s", cephfsExamplePath, "storageclass.yaml") @@ -225,33 +226,33 @@ func createRBDStorageClass(c kubernetes.Interface, f *framework.Framework, param Expect(err).Should(BeNil()) } -// func newSnapshotClient() (*snapClient.SnapshotV1alpha1Client, error) { -// config, err := framework.LoadConfig() -// if err != nil { -// return nil, fmt.Errorf("error creating client: %v", err.Error()) -// } -// c, err := snapClient.NewForConfig(config) -// if err != nil { -// return nil, fmt.Errorf("error creating snapshot client: %v", err.Error()) -// } -// return c, err -// } -// func createRBDSnapshotClass(f *framework.Framework) { -// scPath := fmt.Sprintf("%s/%s", rbdExamplePath, "snapshotclass.yaml") -// sc := getSnapshotClass(scPath) - -// opt := metav1.ListOptions{ -// LabelSelector: "app=rook-ceph-tools", -// } -// fsID := execCommandInPod(f, "ceph fsid", rookNS, &opt) -// // remove new line present in fsID -// fsID = strings.Trim(fsID, "\n") -// sc.Parameters["clusterID"] = fsID -// sclient, err := newSnapshotClient() -// Expect(err).Should(BeNil()) -// _, err = sclient.VolumeSnapshotClasses().Create(&sc) -// Expect(err).Should(BeNil()) -// } +func newSnapshotClient() (*snapClient.SnapshotV1alpha1Client, error) { + config, err := framework.LoadConfig() + if err != nil { + return nil, fmt.Errorf("error creating client: %v", err.Error()) + } + c, err := snapClient.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("error creating snapshot client: %v", err.Error()) + } + return c, err +} +func createRBDSnapshotClass(f *framework.Framework) { + scPath := fmt.Sprintf("%s/%s", rbdExamplePath, "snapshotclass.yaml") + sc := getSnapshotClass(scPath) + + opt := metav1.ListOptions{ + LabelSelector: "app=rook-ceph-tools", + } + fsID := execCommandInPod(f, "ceph fsid", rookNS, &opt) + // remove new line present in fsID + fsID = strings.Trim(fsID, "\n") + sc.Parameters["clusterID"] = fsID + sclient, err := newSnapshotClient() + Expect(err).Should(BeNil()) + _, err = sclient.VolumeSnapshotClasses().Create(&sc) + Expect(err).Should(BeNil()) +} func deleteConfigMap(pluginPath string) { path := pluginPath + configMap @@ -758,72 +759,72 @@ func validateNormalUserPVCAccess(pvcPath string, f *framework.Framework) { } } -// func createSnapshot(snap *v1alpha1.VolumeSnapshot, t int) error { - -// sclient, err := newSnapshotClient() -// if err != nil { -// return err -// } -// _, err = sclient.VolumeSnapshots(snap.Namespace).Create(snap) -// if err != nil { -// return err -// } -// e2elog.Logf("snapshot with name %v created in %v namespace", snap.Name, snap.Namespace) - -// timeout := time.Duration(t) * time.Minute -// name := snap.Name -// start := time.Now() -// e2elog.Logf("Waiting up to %v to be in Ready state", snap) - -// return wait.PollImmediate(poll, timeout, func() (bool, error) { -// e2elog.Logf("waiting for snapshot %s (%d seconds elapsed)", snap.Name, int(time.Since(start).Seconds())) -// snaps, err := sclient.VolumeSnapshots(snap.Namespace).Get(name, metav1.GetOptions{}) -// if err != nil { -// e2elog.Logf("Error getting snapshot in namespace: '%s': %v", snap.Namespace, err) -// if testutils.IsRetryableAPIError(err) { -// return false, nil -// } -// if apierrs.IsNotFound(err) { -// return false, nil -// } -// return false, err -// } -// if snaps.Status.ReadyToUse { -// return true, nil -// } -// return false, nil -// }) -// } - -// func deleteSnapshot(snap *v1alpha1.VolumeSnapshot, t int) error { -// sclient, err := newSnapshotClient() -// if err != nil { -// return err -// } -// err = sclient.VolumeSnapshots(snap.Namespace).Delete(snap.Name, &metav1.DeleteOptions{}) -// if err != nil { -// return err -// } - -// timeout := time.Duration(t) * time.Minute -// name := snap.Name -// start := time.Now() -// e2elog.Logf("Waiting up to %v to be deleted", snap) - -// return wait.PollImmediate(poll, timeout, func() (bool, error) { -// e2elog.Logf("deleting snapshot %s (%d seconds elapsed)", name, int(time.Since(start).Seconds())) -// _, err := sclient.VolumeSnapshots(snap.Namespace).Get(name, metav1.GetOptions{}) -// if err == nil { -// return false, nil -// } - -// if !apierrs.IsNotFound(err) { -// return false, fmt.Errorf("get on deleted snapshot %v failed with error other than \"not found\": %v", name, err) -// } - -// return true, nil -// }) -// } +func createSnapshot(snap *v1alpha1.VolumeSnapshot, t int) error { + + sclient, err := newSnapshotClient() + if err != nil { + return err + } + _, err = sclient.VolumeSnapshots(snap.Namespace).Create(snap) + if err != nil { + return err + } + e2elog.Logf("snapshot with name %v created in %v namespace", snap.Name, snap.Namespace) + + timeout := time.Duration(t) * time.Minute + name := snap.Name + start := time.Now() + e2elog.Logf("Waiting up to %v to be in Ready state", snap) + + return wait.PollImmediate(poll, timeout, func() (bool, error) { + e2elog.Logf("waiting for snapshot %s (%d seconds elapsed)", snap.Name, int(time.Since(start).Seconds())) + snaps, err := sclient.VolumeSnapshots(snap.Namespace).Get(name, metav1.GetOptions{}) + if err != nil { + e2elog.Logf("Error getting snapshot in namespace: '%s': %v", snap.Namespace, err) + if testutils.IsRetryableAPIError(err) { + return false, nil + } + if apierrs.IsNotFound(err) { + return false, nil + } + return false, err + } + if snaps.Status.ReadyToUse { + return true, nil + } + return false, nil + }) +} + +func deleteSnapshot(snap *v1alpha1.VolumeSnapshot, t int) error { + sclient, err := newSnapshotClient() + if err != nil { + return err + } + err = sclient.VolumeSnapshots(snap.Namespace).Delete(snap.Name, &metav1.DeleteOptions{}) + if err != nil { + return err + } + + timeout := time.Duration(t) * time.Minute + name := snap.Name + start := time.Now() + e2elog.Logf("Waiting up to %v to be deleted", snap) + + return wait.PollImmediate(poll, timeout, func() (bool, error) { + e2elog.Logf("deleting snapshot %s (%d seconds elapsed)", name, int(time.Since(start).Seconds())) + _, err := sclient.VolumeSnapshots(snap.Namespace).Get(name, metav1.GetOptions{}) + if err == nil { + return false, nil + } + + if !apierrs.IsNotFound(err) { + return false, fmt.Errorf("get on deleted snapshot %v failed with error other than \"not found\": %v", name, err) + } + + return true, nil + }) +} func deleteBackingCephFSVolume(f *framework.Framework, pvc *v1.PersistentVolumeClaim) error { volname, _, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) @@ -858,18 +859,18 @@ func listRBDImages(f *framework.Framework) []string { return imgInfos } -// func listSnapshots(f *framework.Framework, pool, imageName string) ([]snapInfo, error) { -// opt := metav1.ListOptions{ -// LabelSelector: "app=rook-ceph-tools", -// } -// command := fmt.Sprintf("rbd snap ls %s/%s --format=json", pool, imageName) -// stdout := execCommandInPod(f, command, rookNS, &opt) +func listSnapshots(f *framework.Framework, pool, imageName string) ([]snapInfo, error) { + opt := metav1.ListOptions{ + LabelSelector: "app=rook-ceph-tools", + } + command := fmt.Sprintf("rbd snap ls %s/%s --format=json", pool, imageName) + stdout := execCommandInPod(f, command, rookNS, &opt) -// var snapInfos []snapInfo + var snapInfos []snapInfo -// err := json.Unmarshal([]byte(stdout), &snapInfos) -// return snapInfos, err -// } + err := json.Unmarshal([]byte(stdout), &snapInfos) + return snapInfos, err +} func checkDataPersist(pvcPath, appPath string, f *framework.Framework) error { data := "checking data persist" From a027fdb3ea78b527dcfd3c1aac720ad7204c16e6 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 3 Mar 2020 10:03:50 +0530 Subject: [PATCH 2/3] update Travis CI to bionic Signed-off-by: Madhu Rajanna --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8e5519beaa0..ac67f7c31a9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,11 @@ --- # need for docker build sudo: true -dist: xenial +dist: bionic addons: apt: packages: - - realpath - ruby services: - docker From 70c3b528f21c4f2e65c87a6637282b13a9e083c4 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 3 Mar 2020 12:11:30 +0530 Subject: [PATCH 3/3] update snapshot implementation with new design ``` * create temporary RBD snapshot on source volume's RBD image * RBD clone CO source volume's RBD image's snapshot to new RBD image * remove temporary RBD snapshot on CO source volume's RBD image * serialize work on RBD snapshot image clone * test RBD snapshot image clone's depth * if depth over hard limit flatten clone * create RBD snapshot on RBD snapshot image clone * create RBD volume image clone from RBD snapshot image clone * delete temporary RBD image * delete temporary RBD snapshot ``` Signed-off-by: Madhu Rajanna --- cmd/cephcsi.go | 8 ++ docs/deploy-rbd.md | 1 + e2e/rbd.go | 58 +------- pkg/rbd/controllerserver.go | 185 +++++++++++++++---------- pkg/rbd/driver.go | 6 + pkg/rbd/errors.go | 9 ++ pkg/rbd/rbd_journal.go | 77 +++++++++-- pkg/rbd/rbd_util.go | 262 +++++++++++++++++++++--------------- pkg/rbd/snapshot.go | 146 ++++++++++++++++++++ pkg/util/util.go | 3 + 10 files changed, 504 insertions(+), 251 deletions(-) create mode 100644 pkg/rbd/snapshot.go diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index 11b30db231c..4db11677444 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -76,6 +76,7 @@ func init() { flag.BoolVar(&conf.Version, "version", false, "Print cephcsi version information") + flag.UintVar(&conf.RbdHardMaxCloneDepth, "rbdhardmaxclonedepth", 5, "Hard limit for maximum number of nested volume clones that are taken before a flatten occurs") klog.InitFlags(nil) if err := flag.Set("logtostderr", "true"); err != nil { klog.Exitf("failed to set logtostderr flag: %v", err) @@ -169,6 +170,7 @@ func main() { klog.Infof("Starting driver type: %v with name: %v", conf.Vtype, dname) switch conf.Vtype { case rbdType: + validateCloneDepthFlag(&conf) driver := rbd.NewDriver() driver.Run(&conf, cp) @@ -188,3 +190,9 @@ func main() { os.Exit(0) } + +func validateCloneDepthFlag(conf *util.Config) { + if conf.RbdHardMaxCloneDepth > 16 { + klog.Fatalln("rbdhardmaxclonedepth flag value should be less than 16") + } +} diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md index 3e58169e83b..0d50df97cde 100644 --- a/docs/deploy-rbd.md +++ b/docs/deploy-rbd.md @@ -57,6 +57,7 @@ make image-cephcsi | `mounter` | no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images | | `encrypted` | no | disabled by default, use `"true"` to enable LUKS encryption on pvc and `"false"` to disable it. **Do not change for existing storageclasses** | | `encryptionKMSID` | no | required if encryption is enabled and a kms is used to store passphrases | +| `--rbdhardmaxclonedepth` | `10` | Hard limit for maximum number of nested volume clones that are taken before a flatten occurs | **NOTE:** An accompanying CSI configuration file, needs to be provided to the running pods. Refer to [Creating CSI configuration](../examples/README.md#creating-csi-configuration) diff --git a/e2e/rbd.go b/e2e/rbd.go index 8bf359a2297..01061aa9254 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -91,31 +91,19 @@ var _ = Describe("RBD", func() { deleteConfigMap(rbdDirPath) deleteResource(rbdExamplePath + "secret.yaml") deleteResource(rbdExamplePath + "storageclass.yaml") -<<<<<<< HEAD // deleteResource(rbdExamplePath + "snapshotclass.yaml") deleteVault() -======= - deleteResource(rbdExamplePath + "snapshotclass.yaml") ->>>>>>> parent of 90fef919d... Skip snapshot testing in CI }) Context("Test RBD CSI", func() { It("Test RBD CSI", func() { pvcPath := rbdExamplePath + "pvc.yaml" appPath := rbdExamplePath + "pod.yaml" -<<<<<<< HEAD rawPvcPath := rbdExamplePath + "raw-block-pvc.yaml" rawAppPath := rbdExamplePath + "raw-block-pod.yaml" - // pvcClonePath := rbdExamplePath + "pvc-restore.yaml" - // appClonePath := rbdExamplePath + "pod-restore.yaml" - // snapshotPath := rbdExamplePath + "snapshot.yaml" -======= - // rawPvcPath := rbdExamplePath + "raw-block-pvc.yaml" - // rawAppPath := rbdExamplePath + "raw-block-pod.yaml" pvcClonePath := rbdExamplePath + "pvc-restore.yaml" appClonePath := rbdExamplePath + "pod-restore.yaml" snapshotPath := rbdExamplePath + "snapshot.yaml" ->>>>>>> parent of 90fef919d... Skip snapshot testing in CI By("checking provisioner deployment is running") var err error @@ -168,51 +156,7 @@ var _ = Describe("RBD", func() { By("create a PVC clone and Bind it to an app", func() { createRBDSnapshotClass(f) - pvc, err := loadPVC(pvcPath) - if err != nil { - Fail(err.Error()) - } - - pvc.Namespace = f.UniqueName - e2elog.Logf("The PVC template %+v", pvc) - err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) - if err != nil { - Fail(err.Error()) - } - // validate created backend rbd images - images := listRBDImages(f) - if len(images) != 1 { - e2elog.Logf("backend image count %d expected image count %d", len(images), 1) - Fail("validate backend image failed") - } - snap := getSnapshot(snapshotPath) - snap.Namespace = f.UniqueName - snap.Spec.Source.Name = pvc.Name - snap.Spec.Source.Kind = "PersistentVolumeClaim" - err = createSnapshot(&snap, deployTimeout) - if err != nil { - Fail(err.Error()) - } - pool := "replicapool" - snapList, err := listSnapshots(f, pool, images[0]) - if err != nil { - Fail(err.Error()) - } - if len(snapList) != 1 { - e2elog.Logf("backend snapshot not matching kube snap count,snap count = % kube snap count %d", len(snapList), 1) - Fail("validate backend snapshot failed") - } - - validatePVCAndAppBinding(pvcClonePath, appClonePath, f) - - err = deleteSnapshot(&snap, deployTimeout) - if err != nil { - Fail(err.Error()) - } - err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) - if err != nil { - Fail(err.Error()) - } + validateCloneFromSnapshot(pvcPath, appPath, snapshotPath, pvcClonePath, appClonePath, 1, true, false, f) }) By("create a block type PVC and Bind it to an app", func() { diff --git a/pkg/rbd/controllerserver.go b/pkg/rbd/controllerserver.go index de90ed5482a..9e2cc6ce44d 100644 --- a/pkg/rbd/controllerserver.go +++ b/pkg/rbd/controllerserver.go @@ -18,7 +18,6 @@ package rbd import ( "context" - "fmt" csicommon "github.com/ceph/ceph-csi/pkg/csi-common" "github.com/ceph/ceph-csi/pkg/util" @@ -143,12 +142,21 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol } defer cs.VolumeLocks.Release(req.GetName()) - found, err := checkVolExists(ctx, rbdVol, cr) + clone := false + if req.VolumeContentSource != nil { + clone = true + } + + found, err := checkVolExists(ctx, rbdVol, clone, cr) if err != nil { if _, ok := err.(ErrVolNameConflict); ok { return nil, status.Error(codes.AlreadyExists, err.Error()) } + if _, ok := err.(ErrFlattenInProgress); ok { + return nil, status.Error(codes.Aborted, err.Error()) + } + return nil, status.Error(codes.Internal, err.Error()) } if found { @@ -176,15 +184,20 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol } defer func() { if err != nil { - errDefer := undoVolReservation(ctx, rbdVol, cr) - if errDefer != nil { - klog.Warningf(util.Log(ctx, "failed undoing reservation of volume: %s (%s)"), req.GetName(), errDefer) + if _, ok := err.(ErrFlattenInProgress); !ok { + errDefer := undoVolReservation(ctx, rbdVol, cr) + if errDefer != nil { + klog.Warningf(util.Log(ctx, "failed undoing reservation of volume: %s (%s)"), req.GetName(), errDefer) + } } } }() err = cs.createBackingImage(ctx, rbdVol, req, util.RoundOffVolSize(rbdVol.VolSize)) if err != nil { + if _, ok := err.(ErrFlattenInProgress); ok { + return nil, status.Error(codes.Aborted, err.Error()) + } return nil, err } @@ -217,7 +230,7 @@ func (cs *ControllerServer) createBackingImage(ctx context.Context, rbdVol *rbdV // if VolumeContentSource is not nil, this request is for snapshot if req.VolumeContentSource != nil { - if err = cs.checkSnapshot(ctx, req, rbdVol); err != nil { + if err = cs.createVolumeFromSnapshot(ctx, req, rbdVol); err != nil { return err } } else { @@ -239,7 +252,7 @@ func (cs *ControllerServer) createBackingImage(ctx context.Context, rbdVol *rbdV return nil } -func (cs *ControllerServer) checkSnapshot(ctx context.Context, req *csi.CreateVolumeRequest, rbdVol *rbdVolume) error { +func (cs *ControllerServer) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVolumeRequest, rbdVol *rbdVolume) error { snapshot := req.VolumeContentSource.GetSnapshot() if snapshot == nil { return status.Error(codes.InvalidArgument, "volume Snapshot cannot be empty") @@ -257,21 +270,52 @@ func (cs *ControllerServer) checkSnapshot(ctx context.Context, req *csi.CreateVo defer cr.DeleteCredentials() rbdSnap := &rbdSnapshot{} - if err = genSnapFromSnapID(ctx, rbdSnap, snapshotID, cr); err != nil { - if _, ok := err.(ErrSnapNotFound); !ok { - return status.Error(codes.Internal, err.Error()) - } + if acquired := cs.SnapshotLocks.TryAcquire(snapshotID); !acquired { + klog.Infof(util.Log(ctx, util.SnapshotOperationAlreadyExistsFmt), snapshotID) + return status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, snapshotID) + } + defer cs.SnapshotLocks.Release(snapshotID) + + if err = genSnapFromSnapID(ctx, rbdSnap, snapshotID, cr); err != nil { if _, ok := err.(util.ErrPoolNotFound); ok { klog.Errorf(util.Log(ctx, "failed to get backend snapshot for %s: %v"), snapshotID, err) return status.Error(codes.InvalidArgument, err.Error()) } + if _, ok := err.(ErrSnapNotFound); ok { + snapVol := generateVolFromSnap(rbdSnap) + _, err = getImageInfo(ctx, snapVol.Monitors, cr, snapVol.Pool, snapVol.RbdImageName) + if err != nil { + if _, ok := err.(ErrImageNotFound); ok { + return status.Error(codes.InvalidArgument, "missing requested Snapshot ID") + } + return status.Error(codes.Internal, err.Error()) + } + // flatten rbd image + err = flattenRbdImage(ctx, snapVol, rbdHardMaxCloneDepth, cr) + if err != nil { + if _, ok := err.(ErrFlattenInProgress); ok { + return err + } + return status.Error(codes.Internal, err.Error()) + } + // create snapshot + err = createSnapshot(ctx, rbdSnap, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to create snapshot: %v"), err) + } - return status.Error(codes.InvalidArgument, "missing requested Snapshot ID") + } + if err != nil { + return status.Error(codes.Internal, err.Error()) + } } - err = restoreSnapshot(ctx, rbdVol, rbdSnap, cr) + err = cloneRbdImageFromSnapshot(ctx, rbdVol, rbdSnap, cr) if err != nil { + if errDel := deleteSnapshot(ctx, rbdSnap, cr); errDel != nil { + klog.Errorf(util.Log(ctx, "failed to delete rbd snapshot: %s/%s@ with error: %v"), rbdSnap.Pool, rbdSnap.RbdImageName, rbdSnap.RbdSnapName, errDel) + } return status.Error(codes.Internal, err.Error()) } klog.V(4).Infof(util.Log(ctx, "create volume %s from snapshot %s"), req.GetName(), rbdSnap.RbdSnapName) @@ -507,12 +551,24 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS // Need to check for already existing snapshot name, and if found // check for the requested source volume id and already allocated source volume id - found, err := checkSnapExists(ctx, rbdSnap, cr) + found, err := checkSnapCloneExists(ctx, rbdSnap, rbdVol, cr) if err != nil { if _, ok := err.(util.ErrSnapNameConflict); ok { return nil, status.Error(codes.AlreadyExists, err.Error()) } + if _, ok := err.(ErrFlattenInProgress); ok { + return &csi.CreateSnapshotResponse{ + Snapshot: &csi.Snapshot{ + SizeBytes: rbdSnap.SizeBytes, + SnapshotId: rbdSnap.SnapID, + SourceVolumeId: rbdSnap.SourceVolumeID, + CreationTime: rbdSnap.CreatedAt, + ReadyToUse: false, + }, + }, nil + } + return nil, status.Errorf(codes.Internal, err.Error()) } if found { @@ -527,7 +583,7 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS }, nil } - err = reserveSnap(ctx, rbdSnap, cr) + err = reserveSnap(ctx, rbdSnap, rbdVol, cr) if err != nil { return nil, status.Error(codes.Internal, err.Error()) } @@ -540,7 +596,8 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS } }() - err = cs.doSnapshot(ctx, rbdSnap, cr) + ready := false + ready, err = cs.doSnapshot(ctx, rbdSnap, rbdVol, cr) if err != nil { return nil, err } @@ -550,8 +607,8 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS SizeBytes: rbdSnap.SizeBytes, SnapshotId: rbdSnap.SnapID, SourceVolumeId: req.GetSourceVolumeId(), - CreationTime: rbdSnap.CreatedAt, - ReadyToUse: true, + CreationTime: rbdVol.CreatedAt, + ReadyToUse: ready, }, }, nil } @@ -578,49 +635,19 @@ func (cs *ControllerServer) validateSnapshotReq(ctx context.Context, req *csi.Cr return nil } -func (cs *ControllerServer) doSnapshot(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credentials) (err error) { - err = createSnapshot(ctx, rbdSnap, cr) - // If snap creation fails, even due to snapname already used, fail, next attempt will get a new - // uuid for use as the snap name - if err != nil { - klog.Errorf(util.Log(ctx, "failed to create snapshot: %v"), err) - return status.Error(codes.Internal, err.Error()) - } - defer func() { - if err != nil { - errDefer := deleteSnapshot(ctx, rbdSnap, cr) - if errDefer != nil { - klog.Errorf(util.Log(ctx, "failed to delete snapshot: %v"), errDefer) - err = fmt.Errorf("snapshot created but failed to delete snapshot due to"+ - " other failures: %v", err) - } - err = status.Error(codes.Internal, err.Error()) - } - }() - err = protectSnapshot(ctx, rbdSnap, cr) - if err != nil { - klog.Errorf(util.Log(ctx, "failed to protect snapshot: %v"), err) - return status.Error(codes.Internal, err.Error()) - } - defer func() { - if err != nil { - errDefer := unprotectSnapshot(ctx, rbdSnap, cr) - if errDefer != nil { - klog.Errorf(util.Log(ctx, "failed to unprotect snapshot: %v"), errDefer) - err = fmt.Errorf("snapshot created but failed to unprotect snapshot due to"+ - " other failures: %v", err) - } - err = status.Error(codes.Internal, err.Error()) - } - }() +func (cs *ControllerServer) doSnapshot(ctx context.Context, rbdSnap *rbdSnapshot, rbdVol *rbdVolume, cr *util.Credentials) (bool, error) { + // generate cloned volume details from snapshot + cloneRbd := generateVolFromSnap(rbdSnap) + // add image feature for cloneRbd + cloneRbd.ImageFormat = rbdImageFormat2 + cloneRbd.ImageFeatures = "layering" - err = getSnapshotMetadata(ctx, rbdSnap, cr) + ready, err := createRBDClone(ctx, rbdVol, cloneRbd, cr) if err != nil { - klog.Errorf(util.Log(ctx, "failed to fetch snapshot metadata: %v"), err) - return status.Error(codes.Internal, err.Error()) + klog.Errorf(util.Log(ctx, "failed to create snapshot: %v"), err) + return ready, status.Error(codes.Internal, err.Error()) } - - return nil + return ready, nil } // DeleteSnapshot deletes the snapshot in backend and removes the @@ -669,6 +696,23 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS return nil, status.Error(codes.Internal, err.Error()) } + rbdVol := generateVolFromSnap(rbdSnap) + err = updateVolWithImageInfo(ctx, rbdVol, cr) + if err != nil { + if _, ok := err.(ErrImageNotFound); !ok { + klog.Errorf(util.Log(ctx, "failed to delete rbd image: %s/%s with error: %v"), rbdVol.Pool, rbdVol.VolName, err) + return nil, status.Error(codes.Internal, err.Error()) + } + } else { + err = deleteImage(ctx, rbdVol, cr) + if err != nil { + if _, ok := err.(ErrImageNotFound); !ok { + klog.Errorf(util.Log(ctx, "failed to delete rbd image: %s/%s with error: %v"), rbdVol.Pool, rbdVol.VolName, err) + return nil, status.Error(codes.Internal, err.Error()) + } + } + } + // Consider missing snap as already deleted, and proceed to remove the omap values, // safeguarding against parallel create or delete requests against the // same name. @@ -692,22 +736,23 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS } defer cs.SnapshotLocks.Release(rbdSnap.RequestName) - // Unprotect snapshot - err = unprotectSnapshot(ctx, rbdSnap, cr) + rbdSnap.RbdImageName = rbdSnap.RbdSnapName + // generate cloned volume details from snapshot + cloneRbd := generateVolFromSnap(rbdSnap) + // Deleting snapshot and cloned volume + klog.V(4).Infof(util.Log(ctx, "deleting Snaphot and cloned rbd volume %s"), rbdSnap.RbdSnapName) + err = cleanUpSnapshot(ctx, rbdSnap, cloneRbd, cr) if err != nil { - return nil, status.Errorf(codes.FailedPrecondition, - "failed to unprotect snapshot: %s/%s with error: %v", + return nil, status.Errorf(codes.Internal, + "failed to delete snapshot or image: %s/%s with error: %v", rbdSnap.Pool, rbdSnap.RbdSnapName, err) } - - // Deleting snapshot - klog.V(4).Infof(util.Log(ctx, "deleting Snaphot %s"), rbdSnap.RbdSnapName) - if err := deleteSnapshot(ctx, rbdSnap, cr); err != nil { - return nil, status.Errorf(codes.FailedPrecondition, - "failed to delete snapshot: %s/%s with error: %v", - rbdSnap.Pool, rbdSnap.RbdSnapName, err) + err = undoSnapReservation(ctx, rbdSnap, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) on image (%s) (%s)"), + rbdSnap.RequestName, rbdSnap.RbdSnapName, rbdSnap.RbdImageName, err) + return nil, status.Error(codes.Internal, err.Error()) } - return &csi.DeleteSnapshotResponse{}, nil } diff --git a/pkg/rbd/driver.go b/pkg/rbd/driver.go index 89118ff59f3..ddd9b9adb46 100644 --- a/pkg/rbd/driver.go +++ b/pkg/rbd/driver.go @@ -52,6 +52,9 @@ var ( // VolumeName to backing RBD images volJournal *util.CSIJournal snapJournal *util.CSIJournal + + // rbdHardMaxCloneDepth is the hard limit for maximum number of nested volume clones that are taken before a flatten occurs + rbdHardMaxCloneDepth uint ) // NewDriver returns new rbd driver @@ -101,6 +104,9 @@ func (r *Driver) Run(conf *util.Config, cachePersister util.CachePersister) { CSIInstanceID = conf.InstanceID } + // update clone limit + rbdHardMaxCloneDepth = conf.RbdHardMaxCloneDepth + // Get an instance of the volume and snapshot journal keys volJournal = util.NewCSIVolumeJournal() snapJournal = util.NewCSISnapshotJournal() diff --git a/pkg/rbd/errors.go b/pkg/rbd/errors.go index 865292e5396..7fb8db38f63 100644 --- a/pkg/rbd/errors.go +++ b/pkg/rbd/errors.go @@ -66,3 +66,12 @@ type ErrMissingStash struct { func (e ErrMissingStash) Error() string { return e.err.Error() } + +// ErrFlattenInProgress is returned when flatten is inprogess for an image +type ErrFlattenInProgress struct { + err error +} + +func (e ErrFlattenInProgress) Error() string { + return e.err.Error() +} diff --git a/pkg/rbd/rbd_journal.go b/pkg/rbd/rbd_journal.go index 809e3765979..95eaf4565d4 100644 --- a/pkg/rbd/rbd_journal.go +++ b/pkg/rbd/rbd_journal.go @@ -19,6 +19,7 @@ package rbd import ( "context" "fmt" + "strings" "github.com/ceph/ceph-csi/pkg/util" @@ -87,8 +88,8 @@ func validateRbdVol(rbdVol *rbdVolume) error { } /* -checkSnapExists, and its counterpart checkVolExists, function checks if the passed in rbdSnapshot -or rbdVolume exists on the backend. +checkSnapCloneExists, and its counterpart checkVolExists, function checks if +the passed in rbdSnapshot or rbdVolume exists on the backend. **NOTE:** These functions manipulate the rados omaps that hold information regarding volume names as requested by the CSI drivers. Hence, these need to be invoked only when the @@ -108,7 +109,7 @@ because, the order of omap creation and deletion are inverse of each other, and request name lock, and hence any stale omaps are leftovers from incomplete transactions and are hence safe to garbage collect. */ -func checkSnapExists(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credentials) (bool, error) { +func checkSnapCloneExists(ctx context.Context, rbdSnap *rbdSnapshot, rbdVol *rbdVolume, cr *util.Credentials) (bool, error) { err := validateRbdSnap(rbdSnap) if err != nil { return false, err @@ -131,13 +132,20 @@ func checkSnapExists(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credent return false, err } + // update rbdImageName as snapshot will be created for cloned image with + // name RbdSnapName + rbdSnap.RbdImageName = rbdSnap.RbdSnapName // Fetch on-disk image attributes - err = updateSnapWithImageInfo(ctx, rbdSnap, cr) + + vol := generateVolFromSnap(rbdSnap) + + // Fetch on-disk image attributes + err = updateVolWithImageInfo(ctx, vol, cr) if err != nil { - if _, ok := err.(ErrSnapNotFound); ok { - err = snapJournal.UndoReservation(ctx, rbdSnap.Monitors, cr, rbdSnap.Pool, - rbdSnap.RbdSnapName, rbdSnap.RequestName) - return false, err + if _, ok := err.(ErrImageNotFound); ok { + snap := rbdSnap + snap.RbdImageName = rbdVol.RbdImageName + err = undoSnapshotCloning(ctx, snap, vol, cr, false) } return false, err } @@ -149,21 +157,56 @@ func checkSnapExists(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credent return false, err } + rbdSnap.CreatedAt = vol.CreatedAt + + err = flattenRbdImage(ctx, vol, rbdHardMaxCloneDepth, cr) + if err != nil { + if _, ok := err.(ErrFlattenInProgress); !ok { + klog.Errorf(util.Log(ctx, "failed to add flatten task"), err) + flatErr := undoSnapshotCloning(ctx, rbdSnap, vol, cr, true) + if flatErr != nil { + klog.Errorf(util.Log(ctx, "failed to undo snapshot cloning %v"), flatErr) + } + } + return false, err + } + klog.V(4).Infof(util.Log(ctx, "found existing snap (%s) with snap name (%s) for request (%s)"), rbdSnap.SnapID, rbdSnap.RbdSnapName, rbdSnap.RequestName) return true, nil } +func undoSnapshotCloning(ctx context.Context, rbdSnap *rbdSnapshot, rbdVol *rbdVolume, cr *util.Credentials, deleteVol bool) error { + err := deleteSnapshot(ctx, rbdSnap, cr) + if err != nil && !strings.Contains(err.Error(), "No such file or directory") { + + klog.Errorf(util.Log(ctx, "failed to delete snapshot: %v"), err) + return err + } + if deleteVol { + err = deleteImage(ctx, rbdVol, cr) + if err != nil { + if _, ok := err.(ErrImageNotFound); !ok { + klog.Errorf(util.Log(ctx, "failed to delete rbd image %s/%s: %v"), rbdVol.Pool, rbdVol.RbdImageName, err) + return err + } + } + } + err = snapJournal.UndoReservation(ctx, rbdSnap.Monitors, cr, rbdSnap.Pool, + rbdSnap.RbdSnapName, rbdSnap.RequestName) + return err +} + /* -Check comment on checkSnapExists, to understand how this function behaves +Check comment on checkSnapCloneExists, to understand how this function behaves **NOTE:** These functions manipulate the rados omaps that hold information regarding volume names as requested by the CSI drivers. Hence, these need to be invoked only when the respective CSI snapshot or volume name based locks are held, as otherwise racy access to these omaps may end up leaving the omaps in an inconsistent state. */ -func checkVolExists(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials) (bool, error) { +func checkVolExists(ctx context.Context, rbdVol *rbdVolume, clone bool, cr *util.Credentials) (bool, error) { err := validateRbdVol(rbdVol) if err != nil { return false, err @@ -219,6 +262,15 @@ func checkVolExists(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials return false, err } + if clone { + flatErr := flattenRbdImage(ctx, rbdVol, rbdHardMaxCloneDepth, cr) + if flatErr != nil { + klog.Errorf(util.Log(ctx, "failed to list flatten task"), flatErr) + return false, flatErr + } + + } + klog.V(4).Infof(util.Log(ctx, "found existing volume (%s) with image name (%s) for request (%s)"), rbdVol.VolID, rbdVol.RbdImageName, rbdVol.RequestName) @@ -227,14 +279,14 @@ func checkVolExists(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials // reserveSnap is a helper routine to request a rbdSnapshot name reservation and generate the // volume ID for the generated name -func reserveSnap(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credentials) error { +func reserveSnap(ctx context.Context, rbdSnap *rbdSnapshot, rbdVol *rbdVolume, cr *util.Credentials) error { var ( snapUUID string err error ) snapUUID, rbdSnap.RbdSnapName, err = snapJournal.ReserveName(ctx, rbdSnap.Monitors, cr, rbdSnap.Pool, - rbdSnap.RequestName, rbdSnap.NamePrefix, rbdSnap.RbdImageName, "") + rbdSnap.RequestName, rbdSnap.NamePrefix, rbdVol.RbdImageName, "") if err != nil { return err } @@ -244,6 +296,7 @@ func reserveSnap(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credentials if err != nil { return err } + rbdSnap.RbdImageName = rbdSnap.RbdSnapName klog.V(4).Infof(util.Log(ctx, "generated Volume ID (%s) and image name (%s) for request name (%s)"), rbdSnap.SnapID, rbdSnap.RbdSnapName, rbdSnap.RequestName) diff --git a/pkg/rbd/rbd_util.go b/pkg/rbd/rbd_util.go index 0bc1e76df45..d77ae325aeb 100644 --- a/pkg/rbd/rbd_util.go +++ b/pkg/rbd/rbd_util.go @@ -75,6 +75,7 @@ type rbdVolume struct { Monitors string `json:"monitors"` Pool string `json:"pool"` DataPool string + ImageID string ImageFormat string `json:"imageFormat"` ImageFeatures string `json:"imageFeatures"` AdminID string `json:"adminId"` @@ -88,6 +89,7 @@ type rbdVolume struct { DisableInUseChecks bool `json:"disableInUseChecks"` Encrypted bool KMS util.EncryptionKMS + CreatedAt *timestamp.Timestamp } // rbdSnapshot represents a CSI snapshot and its RBD snapshot specifics @@ -176,30 +178,17 @@ func rbdStatus(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) (boo return false, output, nil } -// rbdManagerTaskDelete adds a ceph manager task to delete an rbd image, thus deleting -// it asynchronously. If command is not found returns a bool set to false -func rbdManagerTaskDeleteImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) (bool, error) { +// rbdManagerTaskTrashRemove adds a ceph manager task to remove image from trash, thus removing it asynchronously. If command is not found returns a bool set to false +func rbdManagerTaskTrashRemove(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) (bool, error) { var output []byte - args := []string{"rbd", "task", "add", "remove", - pOpts.Pool + "/" + pOpts.RbdImageName, - "--id", cr.ID, - "--keyfile=" + cr.KeyFile, - "-m", pOpts.Monitors, - } + klog.V(4).Infof(util.Log(ctx, "rbd: task add trash remove for image (%s) with id (%s) using mon %s, pool %s"), pOpts.RbdImageName, pOpts.ImageID, pOpts.Monitors, pOpts.Pool) + args := []string{"rbd", "task", "add", "trash", "remove", pOpts.Pool + "/" + pOpts.ImageID, "--id", cr.ID, "--keyfile=" + cr.KeyFile, "-m", pOpts.Monitors} output, err := execCommand("ceph", args) if err != nil { - if strings.Contains(string(output), rbdTaskRemoveCmdInvalidString1) && - strings.Contains(string(output), rbdTaskRemoveCmdInvalidString2) { - klog.Infof(util.Log(ctx, "cluster with cluster ID (%s) does not support Ceph manager based rbd image"+ - " deletion (minimum ceph version required is v14.2.3)"), pOpts.ClusterID) - return false, err - } else if strings.HasPrefix(string(output), rbdTaskRemoveCmdAccessDeniedMessage) { - klog.Infof(util.Log(ctx, "access denied to Ceph MGR-based RBD image deletion "+ - "on cluster ID (%s)"), pOpts.ClusterID) - return false, err - } + valid := isValidCommand(ctx, string(output), pOpts) + return valid, err } return true, err @@ -208,7 +197,7 @@ func rbdManagerTaskDeleteImage(ctx context.Context, pOpts *rbdVolume, cr *util.C // deleteImage deletes a ceph image with provision and volume options. func deleteImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) error { var output []byte - + var id string image := pOpts.RbdImageName found, _, err := rbdStatus(ctx, pOpts, cr) if err != nil { @@ -219,13 +208,18 @@ func deleteImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) er return fmt.Errorf("rbd %s is still being used", image) } - klog.V(4).Infof(util.Log(ctx, "rbd: rm %s using mon %s, pool %s"), image, pOpts.Monitors, pOpts.Pool) - - // attempt to use Ceph manager based deletion support if available - rbdCephMgrSupported, err := rbdManagerTaskDeleteImage(ctx, pOpts, cr) + err = moveImageToTrash(ctx, pOpts, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to move image %s to trash: %v"), image, err) + return err + } + // need to get this ID and add to id + // attempt to use Ceph manager based trash remove support if available + rbdCephMgrSupported, err := rbdManagerTaskTrashRemove(ctx, pOpts, cr) if !rbdCephMgrSupported { // attempt older style deletion - args := []string{"rm", image, "--pool", pOpts.Pool, "--id", cr.ID, "-m", pOpts.Monitors, + klog.Infof(util.Log(ctx, "rbd: attempting to delete (%s) image with id (%s) from trash"), image, id) + args := []string{"trash", "rm", id, "--pool", pOpts.Pool, "--id", cr.ID, "-m", pOpts.Monitors, "--keyfile=" + cr.KeyFile} output, err = execCommand("rbd", args) } @@ -237,11 +231,26 @@ func deleteImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) er return err } +// moveImageToTrash moves the image to trash. +func moveImageToTrash(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) error { + image := pOpts.RbdImageName + + klog.V(4).Infof(util.Log(ctx, "rbd: trash mv %s using mon %s, pool %s"), image, pOpts.Monitors, pOpts.Pool) + + args := []string{"trash", "mv", image, "--pool", pOpts.Pool, "--id", cr.ID, "-m", pOpts.Monitors, + "--keyfile=" + cr.KeyFile} + output, err := execCommand("rbd", args) + + if err != nil { + klog.Errorf(util.Log(ctx, "failed to move rbd image: %v to trash, command output: %s"), err, string(output)) + } + return err +} + // updateSnapWithImageInfo updates provided rbdSnapshot with information from on-disk data // regarding the same func updateSnapWithImageInfo(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credentials) error { - snapInfo, err := getSnapInfo(ctx, rbdSnap.Monitors, cr, rbdSnap.Pool, - rbdSnap.RbdImageName, rbdSnap.RbdSnapName) + snapInfo, err := getSnapInfo(ctx, rbdSnap, cr) if err != nil { return err } @@ -274,7 +283,17 @@ func updateVolWithImageInfo(ctx context.Context, rbdVol *rbdVolume, cr *util.Cre rbdVol.VolSize = imageInfo.Size rbdVol.ImageFeatures = strings.Join(imageInfo.Features, ",") + rbdVol.ImageID = imageInfo.ID + + tm, err := time.Parse(time.ANSIC, imageInfo.CreatedAt) + if err != nil { + return err + } + rbdVol.CreatedAt, err = ptypes.TimestampProto(tm) + if err != nil { + return err + } return nil } @@ -314,6 +333,9 @@ func genSnapFromSnapID(ctx context.Context, rbdSnap *rbdSnapshot, snapshotID str return err } + // update rbd image name to point to cloned image name + rbdSnap.RbdImageName = rbdSnap.RbdSnapName + err = updateSnapWithImageInfo(ctx, rbdSnap, cr) return err @@ -558,32 +580,13 @@ func hasSnapshotFeature(imageFeatures string) bool { return false } -func protectSnapshot(ctx context.Context, pOpts *rbdSnapshot, cr *util.Credentials) error { - var output []byte - - image := pOpts.RbdImageName - snapName := pOpts.RbdSnapName - - klog.V(4).Infof(util.Log(ctx, "rbd: snap protect %s using mon %s, pool %s "), image, pOpts.Monitors, pOpts.Pool) - args := []string{"snap", "protect", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", - cr.ID, "-m", pOpts.Monitors, "--keyfile=" + cr.KeyFile} - - output, err := execCommand("rbd", args) - - if err != nil { - return errors.Wrapf(err, "failed to protect snapshot, command output: %s", string(output)) - } - - return nil -} - func createSnapshot(ctx context.Context, pOpts *rbdSnapshot, cr *util.Credentials) error { var output []byte image := pOpts.RbdImageName snapName := pOpts.RbdSnapName - klog.V(4).Infof(util.Log(ctx, "rbd: snap create %s using mon %s, pool %s"), image, pOpts.Monitors, pOpts.Pool) + klog.V(4).Infof(util.Log(ctx, "rbd: snap create %s/%s@%s using mon %s"), pOpts.Pool, image, snapName, pOpts.Monitors) args := []string{"snap", "create", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", cr.ID, "-m", pOpts.Monitors, "--keyfile=" + cr.KeyFile} @@ -596,32 +599,13 @@ func createSnapshot(ctx context.Context, pOpts *rbdSnapshot, cr *util.Credential return nil } -func unprotectSnapshot(ctx context.Context, pOpts *rbdSnapshot, cr *util.Credentials) error { - var output []byte - - image := pOpts.RbdImageName - snapName := pOpts.RbdSnapName - - klog.V(4).Infof(util.Log(ctx, "rbd: snap unprotect %s using mon %s, pool %s"), image, pOpts.Monitors, pOpts.Pool) - args := []string{"snap", "unprotect", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", - cr.ID, "-m", pOpts.Monitors, "--keyfile=" + cr.KeyFile} - - output, err := execCommand("rbd", args) - - if err != nil { - return errors.Wrapf(err, "failed to unprotect snapshot, command output: %s", string(output)) - } - - return nil -} - func deleteSnapshot(ctx context.Context, pOpts *rbdSnapshot, cr *util.Credentials) error { var output []byte image := pOpts.RbdImageName snapName := pOpts.RbdSnapName - klog.V(4).Infof(util.Log(ctx, "rbd: snap rm %s using mon %s, pool %s"), image, pOpts.Monitors, pOpts.Pool) + klog.V(4).Infof(util.Log(ctx, "rbd: snap rm %s/%s@%s using mon %s"), pOpts.Pool, image, snapName, pOpts.Monitors) args := []string{"snap", "rm", "--pool", pOpts.Pool, "--snap", snapName, image, "--id", cr.ID, "-m", pOpts.Monitors, "--keyfile=" + cr.KeyFile} @@ -639,15 +623,15 @@ func deleteSnapshot(ctx context.Context, pOpts *rbdSnapshot, cr *util.Credential return nil } -func restoreSnapshot(ctx context.Context, pVolOpts *rbdVolume, pSnapOpts *rbdSnapshot, cr *util.Credentials) error { +func cloneRbdImageFromSnapshot(ctx context.Context, pVolOpts *rbdVolume, pSnapOpts *rbdSnapshot, cr *util.Credentials) error { var output []byte image := pVolOpts.RbdImageName snapName := pSnapOpts.RbdSnapName - klog.V(4).Infof(util.Log(ctx, "rbd: clone %s using mon %s, pool %s"), image, pVolOpts.Monitors, pVolOpts.Pool) + klog.V(4).Infof(util.Log(ctx, "rbd: clone %s/%s@%s %s using mon %s"), pVolOpts.Pool, pSnapOpts.RbdImageName, snapName, image, pVolOpts.Monitors) args := []string{"clone", pSnapOpts.Pool + "/" + pSnapOpts.RbdImageName + "@" + snapName, - pVolOpts.Pool + "/" + image, "--id", cr.ID, "-m", pVolOpts.Monitors, "--keyfile=" + cr.KeyFile} + pVolOpts.Pool + "/" + image, "--rbd-default-clone-format=2", "--id", cr.ID, "-m", pVolOpts.Monitors, "--keyfile=" + cr.KeyFile, "--image-feature", pVolOpts.ImageFeatures} output, err := execCommand("rbd", args) @@ -658,39 +642,22 @@ func restoreSnapshot(ctx context.Context, pVolOpts *rbdVolume, pSnapOpts *rbdSna return nil } -// getSnapshotMetadata fetches on-disk metadata about the snapshot and populates the passed in -// rbdSnapshot structure -func getSnapshotMetadata(ctx context.Context, pSnapOpts *rbdSnapshot, cr *util.Credentials) error { - imageName := pSnapOpts.RbdImageName - snapName := pSnapOpts.RbdSnapName - - snapInfo, err := getSnapInfo(ctx, pSnapOpts.Monitors, cr, pSnapOpts.Pool, imageName, snapName) - if err != nil { - return err - } - - pSnapOpts.SizeBytes = snapInfo.Size - - tm, err := time.Parse(time.ANSIC, snapInfo.Timestamp) - if err != nil { - return err - } - - pSnapOpts.CreatedAt, err = ptypes.TimestampProto(tm) - if err != nil { - return err - } - - return nil +// parentInfo spec for parent volume info +type parentInfo struct { + Image string `json:"image"` + Pool string `json:"pool"` + Snapshot string `json:"snapshost"` } // imageInfo strongly typed JSON spec for image info type imageInfo struct { - ObjectUUID string `json:"name"` - Size int64 `json:"size"` - Format int64 `json:"format"` - Features []string `json:"features"` - CreatedAt string `json:"create_timestamp"` + ObjectUUID string `json:"name"` + ID string `json:"id"` + Size int64 `json:"size"` + Format int64 `json:"format"` + Features []string `json:"features"` + CreatedAt string `json:"create_timestamp"` + Parent parentInfo `json:"parent"` } // getImageInfo queries rbd about the given image and returns its metadata, and returns @@ -709,7 +676,7 @@ func getImageInfo(ctx context.Context, monitors string, cr *util.Credentials, po "--format="+"json", "info", poolName+"/"+imageName) if err != nil { - klog.Errorf(util.Log(ctx, "failed getting information for image (%s): (%s)"), poolName+"/"+imageName, err) + klog.Errorf(util.Log(ctx, "failed getting information for image (%s) err: (%s) and stdErr: (%s)"), poolName+"/"+imageName, err, string(stderr)) if strings.Contains(string(stderr), "rbd: error opening image "+imageName+ ": (2) No such file or directory") { return imgInfo, ErrImageNotFound{imageName, err} @@ -741,7 +708,7 @@ getSnapInfo queries rbd about the snapshots of the given image and returns its m returns ErrImageNotFound if provided image is not found, and ErrSnapNotFound if provided snap is not found in the images snapshot list */ -func getSnapInfo(ctx context.Context, monitors string, cr *util.Credentials, poolName, imageName, snapName string) (snapInfo, error) { +func getSnapInfo(ctx context.Context, rbdSnap *rbdSnapshot, cr *util.Credentials) (snapInfo, error) { // rbd --format=json snap ls [image-spec] var ( @@ -751,38 +718,36 @@ func getSnapInfo(ctx context.Context, monitors string, cr *util.Credentials, poo stdout, stderr, err := util.ExecCommand( "rbd", - "-m", monitors, + "-m", rbdSnap.Monitors, "--id", cr.ID, "--keyfile="+cr.KeyFile, "-c", util.CephConfigPath, "--format="+"json", - "snap", "ls", poolName+"/"+imageName) + "snap", "ls", rbdSnap.Pool+"/"+rbdSnap.RbdImageName) if err != nil { klog.Errorf(util.Log(ctx, "failed getting snap (%s) information from image (%s): (%s)"), - snapName, poolName+"/"+imageName, err) - if strings.Contains(string(stderr), "rbd: error opening image "+imageName+ - ": (2) No such file or directory") { - return snpInfo, ErrImageNotFound{imageName, err} + rbdSnap.RbdSnapName, rbdSnap.Pool+"/"+rbdSnap.RbdImageName, err) + if strings.Contains(string(stderr), "rbd: error opening image "+rbdSnap.RbdImageName+": (2) No such file or directory") { + return snpInfo, ErrImageNotFound{rbdSnap.RbdImageName, err} } return snpInfo, err } err = json.Unmarshal(stdout, &snaps) if err != nil { - klog.Errorf(util.Log(ctx, "failed to parse JSON output of image snap list (%s): (%s)"), - poolName+"/"+imageName, err) + klog.Errorf(util.Log(ctx, "failed to parse JSON output of image snap list (%s): (%s)"), rbdSnap.Pool+"/"+rbdSnap.RbdImageName, err) return snpInfo, fmt.Errorf("unmarshal failed: %+v. raw buffer response: %s", err, string(stdout)) } for _, snap := range snaps { - if snap.Name == snapName { + if snap.Name == rbdSnap.RbdSnapName { return snap, nil } } - return snpInfo, ErrSnapNotFound{snapName, fmt.Errorf("snap (%s) for image (%s) not found", - snapName, poolName+"/"+imageName)} + return snpInfo, ErrSnapNotFound{rbdSnap.RbdSnapName, fmt.Errorf("snap (%s) for image (%s) not found", + rbdSnap.RbdSnapName, rbdSnap.Pool+"/"+rbdSnap.RbdImageName)} } // rbdImageMetadataStash strongly typed JSON spec for stashed RBD image metadata @@ -897,3 +862,76 @@ func ensureEncryptionMetadataSet(ctx context.Context, cr *util.Credentials, rbdV return nil } + +func flattenRbdImage(ctx context.Context, rbdVol *rbdVolume, hardmaxDepth uint, cr *util.Credentials) error { + + depth, err := getCloneDepth(ctx, rbdVol.Monitors, rbdVol.Pool, rbdVol.RbdImageName, 0, cr) + if err != nil { + return err + } + + klog.Infof(util.Log(ctx, "image depth is %v and maximum configured clone depth hard limit is %v"), depth, hardmaxDepth) + + if depth >= int(hardmaxDepth) { + klog.Infof(util.Log(ctx, "maximum clone depth (%d) has been reached, flattening %v volume"), hardmaxDepth, rbdVol.RbdImageName) + ns, err := rbdManagerTaskAddFlatten(ctx, rbdVol, cr) + if ns && err != nil { + klog.Errorf(util.Log(ctx, "failed to add task flatten for %s : %v"), rbdVol.RbdImageName, err) + return err + } + if !ns { + klog.Errorf(util.Log(ctx, "rbd task add flatten not supported will flatten image once hardlimit is reached: %v"), err) + return errors.New("ceph rdd add task flatten not supported") + } + return ErrFlattenInProgress{err: fmt.Errorf("flatten is in progress for image %s", rbdVol.RbdImageName)} + } + return nil +} + +// rbdManagerTaskAddFlatten adds a ceph manager task to flatten image +// asynchronously in the background. If command is not found returns a bool set to false +func rbdManagerTaskAddFlatten(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) (bool, error) { + var output []byte + + klog.V(4).Infof(util.Log(ctx, "rbd: task add flatten for image (%s) using mon %s, pool %s"), pOpts.RbdImageName, pOpts.Monitors, pOpts.Pool) + args := []string{"rbd", "task", "add", "flatten", pOpts.Pool + "/" + pOpts.RbdImageName, "--id", cr.ID, "--keyfile=" + cr.KeyFile, "-m", pOpts.Monitors} + + output, err := execCommand("ceph", args) + if err != nil { + valid := isValidCommand(ctx, string(output), pOpts) + return valid, err + } + + return true, nil +} + +func isValidCommand(ctx context.Context, output string, pOpts *rbdVolume) bool { + if strings.Contains(output, rbdTaskRemoveCmdInvalidString1) && + strings.Contains(output, rbdTaskRemoveCmdInvalidString2) { + klog.Infof(util.Log(ctx, "cluster with cluster ID (%s) does not support Ceph manager based rbd command (minimum ceph version required is v14.2.3)"), pOpts.ClusterID) + return false + } else if strings.HasPrefix(output, rbdTaskRemoveCmdAccessDeniedMessage) { + klog.Infof(util.Log(ctx, "access denied to Ceph MGR-based RBD command on cluster ID (%s)"), pOpts.ClusterID) + return false + } + return true +} + +func getCloneDepth(ctx context.Context, monitors, poolName, imageName string, depth int, cr *util.Credentials) (int, error) { + + image, err := getImageInfo(ctx, monitors, cr, poolName, imageName) + if err != nil { + // return errors other than image not found + if _, ok := err.(ErrImageNotFound); !ok { + return 0, err + } + // mark error as nil if image not found + err = nil + } + + if image.Parent.Image != "" { + depth++ + return getCloneDepth(ctx, monitors, image.Parent.Pool, image.Parent.Image, depth, cr) + } + return depth, err +} diff --git a/pkg/rbd/snapshot.go b/pkg/rbd/snapshot.go new file mode 100644 index 00000000000..3464335312b --- /dev/null +++ b/pkg/rbd/snapshot.go @@ -0,0 +1,146 @@ +/* +Copyright 2019 The Ceph-CSI Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package rbd + +import ( + "context" + "fmt" + + "github.com/ceph/ceph-csi/pkg/util" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "k8s.io/klog" +) + +func createRBDClone(ctx context.Context, rbdVol, cloneRbdVol *rbdVolume, cr *util.Credentials) (bool, error) { + + // generate snapshot from parent volume + snap := generateSnapFromVol(rbdVol) + + // update snapshot name as cloned volume name as it will be always unique + snap.RbdSnapName = cloneRbdVol.RbdImageName + + // create snapshot + err := createSnapshot(ctx, snap, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to create snapshot: %v"), err) + return false, status.Error(codes.Internal, err.Error()) + + } + + cloneFailed := false + ready := true + // create clone image and delete snapshot + err = cloneRbdImageFromSnapshot(ctx, cloneRbdVol, snap, cr) + if err != nil { + if _, ok := err.(ErrFlattenInProgress); ok { + ready = false + } else { + klog.Errorf(util.Log(ctx, "failed to clone rbd image %s from snapshot %s: %v"), cloneRbdVol.RbdImageName, snap.RbdSnapName, err) + cloneFailed = true + } + } + + err = flattenRbdImage(ctx, cloneRbdVol, rbdHardMaxCloneDepth, cr) + if err != nil { + if _, ok := err.(ErrFlattenInProgress); !ok { + delErr := deleteImage(ctx, cloneRbdVol, cr) + if delErr != nil { + klog.Errorf(util.Log(ctx, "rbd: failed to delete %s/%s using mon %s: %v"), cloneRbdVol.Pool, cloneRbdVol.RbdImageName, cloneRbdVol.Monitors, delErr) + } + } + ready = false + + } + + err = deleteSnapshot(ctx, snap, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to delete snapshot: %v"), err) + if !cloneFailed { + err = fmt.Errorf("clone created but failed to delete snapshot due to other failures: %v", err) + } + errCleanUp := cleanUpSnapshot(ctx, snap, cloneRbdVol, cr) + if errCleanUp != nil { + klog.Errorf(util.Log(ctx, "failed to delete snapshot or image: %s/%s with error: %v"), snap.Pool, snap.RbdSnapName, errCleanUp) + } + err = status.Error(codes.Internal, err.Error()) + } + + if cloneFailed { + err = fmt.Errorf("failed to clone rbd image %s from snapshot %s: %v", cloneRbdVol.RbdImageName, snap.RbdSnapName, err) + } else { + err = updateVolWithImageInfo(ctx, cloneRbdVol, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to get rbd image: %s/%s details with error: %v"), cloneRbdVol.Pool, cloneRbdVol.VolName, err) + delErr := deleteImage(ctx, cloneRbdVol, cr) + if delErr != nil { + klog.Errorf(util.Log(ctx, "failed to delete rbd image: %s/%s with error: %v"), rbdVol.Pool, rbdVol.VolName, delErr) + } + } + } + return ready, err +} + +func cleanUpSnapshot(ctx context.Context, rbdSnap *rbdSnapshot, rbdVol *rbdVolume, cr *util.Credentials) error { + + _, err := getSnapInfo(ctx, rbdSnap, cr) + if err != nil { + if _, ok := err.(ErrSnapNotFound); !ok { + return err + } + } else { + err = deleteSnapshot(ctx, rbdSnap, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to delete snapshot: %v"), err) + return err + } + } + + vol, err := getImageInfo(ctx, rbdVol.Monitors, cr, rbdVol.Pool, rbdVol.RbdImageName) + if err != nil { + if _, ok := err.(ErrImageNotFound); !ok { + return err + } + } else { + rbdVol.ImageID = vol.ID + err = deleteImage(ctx, rbdVol, cr) + if err != nil { + klog.Errorf(util.Log(ctx, "failed to delete rbd image: %s/%s with error: %v"), rbdVol.Pool, rbdVol.VolName, err) + return err + } + } + + return nil +} + +func generateVolFromSnap(rbdSnap *rbdSnapshot) *rbdVolume { + vol := new(rbdVolume) + vol.ClusterID = rbdSnap.ClusterID + vol.Monitors = rbdSnap.Monitors + vol.Pool = rbdSnap.Pool + vol.RbdImageName = rbdSnap.RbdSnapName + return vol +} + +func generateSnapFromVol(rbdVol *rbdVolume) *rbdSnapshot { + snap := new(rbdSnapshot) + snap.ClusterID = rbdVol.ClusterID + snap.Monitors = rbdVol.Monitors + snap.Pool = rbdVol.Pool + snap.RbdImageName = rbdVol.RbdImageName + snap.RbdSnapName = rbdVol.RbdImageName + return snap +} diff --git a/pkg/util/util.go b/pkg/util/util.go index 1931c0bd5c3..f12b8fc359f 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -102,6 +102,9 @@ type Config struct { // cephfs related flags ForceKernelCephFS bool // force to use the ceph kernel client even if the kernel is < 4.17 + // rbd related flags + RbdHardMaxCloneDepth uint // rbd clone depth hard limit + } // CreatePersistanceStorage creates storage path and initializes new cache