From f83da276e6a26a0f639cc1cfd44a67c425972997 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Wed, 6 Sep 2023 09:01:04 +0200 Subject: [PATCH 1/8] rbd: use `librbd.OpenImageById()` if `rbdVol.ImageID` is set `librbd.OpenImageById()` works if the image is in the trash, so it makes it possible to get the parent of the image. Signed-off-by: Niels de Vos --- internal/rbd/rbd_util.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 08f9f759889..f273d40ab81 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -505,7 +505,14 @@ func (ri *rbdImage) open() (*librbd.Image, error) { return nil, err } - image, err := librbd.OpenImage(ri.ioctx, ri.RbdImageName, librbd.NoSnapshot) + var image *librbd.Image + + // try to open by id, that works for images in trash too + if ri.ImageID != "" { + image, err = librbd.OpenImageById(ri.ioctx, ri.ImageID, librbd.NoSnapshot) + } else { + image, err = librbd.OpenImage(ri.ioctx, ri.RbdImageName, librbd.NoSnapshot) + } if err != nil { if errors.Is(err, librbd.ErrNotFound) { err = util.JoinErrors(ErrImageNotFound, err) From b78dcd58f6f86ffde4af1eff70b647e9878a8b6f Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Wed, 15 Nov 2023 10:04:43 +0100 Subject: [PATCH 2/8] rbd: prevent presetting the ImageID of a new volume When a new volume is not created yet, the ImageID should not be set to the ID of the snapshot. Signed-off-by: Niels de Vos --- internal/rbd/snapshot.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/internal/rbd/snapshot.go b/internal/rbd/snapshot.go index bb420475c3d..92823e40ef9 100644 --- a/internal/rbd/snapshot.go +++ b/internal/rbd/snapshot.go @@ -107,7 +107,15 @@ func generateVolFromSnap(rbdSnap *rbdSnapshot) *rbdVolume { vol.JournalPool = rbdSnap.JournalPool vol.RadosNamespace = rbdSnap.RadosNamespace vol.RbdImageName = rbdSnap.RbdSnapName - vol.ImageID = rbdSnap.ImageID + + // /!\ WARNING /!\ + // + // Do not set the ImageID to the ID of the snapshot, a new image will + // be created based on the returned rbdVolume. If the ImageID is set to + // the ID of the snapshot, accessing the new image by ID will actually + // access the snapshot! + // vol.ImageID = rbdSnap.ImageID + // copyEncryptionConfig cannot be used here because the volume and the // snapshot will have the same volumeID which cases the panic in // copyEncryptionConfig function. From e1cd7a5a9c1b37fd2455c6488fc704475f5e7595 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Fri, 17 Nov 2023 10:31:38 +0100 Subject: [PATCH 3/8] rbd: set/get correct ImageID in more places In some places the ImageID is used as the ID of the parent. That is very confusing and prone to errors. Instead, fetch the right ImageID where possible, and set ParentID for referencing to parent images. Signed-off-by: Niels de Vos --- internal/rbd/controllerserver.go | 1 + internal/rbd/rbd_util.go | 17 +++++++++++++++++ internal/rbd/snapshot.go | 2 ++ 3 files changed, 20 insertions(+) diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index e3796b6611d..9037e1c2371 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -1173,6 +1173,7 @@ func (cs *ControllerServer) CreateSnapshot( // Update the metadata on snapshot not on the original image rbdVol.RbdImageName = rbdSnap.RbdSnapName + rbdVol.ImageID = vol.ImageID rbdVol.ClusterName = cs.ClusterName defer func() { diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index f273d40ab81..2dbb7d5fe67 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -114,6 +114,8 @@ type rbdImage struct { NamePrefix string // ParentName represents the parent image name of the image. ParentName string + // ParentID represents the parent image ID of the image. + ParentID string // Parent Pool is the pool that contains the parent image. ParentPool string // Cluster name @@ -711,6 +713,7 @@ func (ri *rbdImage) getCloneDepth(ctx context.Context) (uint, error) { vol.Pool = ri.Pool vol.Monitors = ri.Monitors vol.RbdImageName = ri.RbdImageName + vol.ImageID = ri.ImageID vol.RadosNamespace = ri.RadosNamespace vol.conn = ri.conn.Copy() @@ -743,6 +746,7 @@ func (ri *rbdImage) getCloneDepth(ctx context.Context) (uint, error) { depth++ } vol.RbdImageName = vol.ParentName + vol.ImageID = vol.ParentID vol.Pool = vol.ParentPool } } @@ -882,6 +886,9 @@ func (ri *rbdImage) getParentName() (string, error) { return "", err } + ri.ParentName = parentInfo.Image.ImageName + ri.ParentID = parentInfo.Image.ImageID + return parentInfo.Image.ImageName, nil } @@ -1595,6 +1602,11 @@ func (ri *rbdImage) getImageInfo() error { // TODO: can rv.VolSize not be a uint64? Or initialize it to -1? ri.VolSize = int64(imageInfo.Size) + ri.ImageID, err = image.GetId() + if err != nil { + return err + } + features, err := image.GetFeatures() if err != nil { return err @@ -1608,11 +1620,13 @@ func (ri *rbdImage) getImageInfo() error { // the parent is an error or not. if errors.Is(err, librbd.ErrNotFound) { ri.ParentName = "" + ri.ParentID = "" } else { return err } } else { ri.ParentName = parentInfo.Image.ImageName + ri.ParentID = parentInfo.Image.ImageID ri.ParentPool = parentInfo.Image.PoolName } // Get image creation time @@ -1643,6 +1657,7 @@ func (ri *rbdImage) getParent() (*rbdImage, error) { parentImage.Pool = ri.ParentPool parentImage.RadosNamespace = ri.RadosNamespace parentImage.RbdImageName = ri.ParentName + parentImage.ImageID = ri.ParentID err = parentImage.getImageInfo() if err != nil { @@ -1699,6 +1714,7 @@ type rbdImageMetadataStash struct { Pool string `json:"pool"` RadosNamespace string `json:"radosNamespace"` ImageName string `json:"image"` + ImageID string `json:"imageID"` UnmapOptions string `json:"unmapOptions"` NbdAccess bool `json:"accessType"` Encrypted bool `json:"encrypted"` @@ -1728,6 +1744,7 @@ func stashRBDImageMetadata(volOptions *rbdVolume, metaDataPath string) error { Pool: volOptions.Pool, RadosNamespace: volOptions.RadosNamespace, ImageName: volOptions.RbdImageName, + ImageID: volOptions.ImageID, Encrypted: volOptions.isBlockEncrypted(), UnmapOptions: volOptions.UnmapOptions, } diff --git a/internal/rbd/snapshot.go b/internal/rbd/snapshot.go index 92823e40ef9..85e5c08b00e 100644 --- a/internal/rbd/snapshot.go +++ b/internal/rbd/snapshot.go @@ -107,6 +107,8 @@ func generateVolFromSnap(rbdSnap *rbdSnapshot) *rbdVolume { vol.JournalPool = rbdSnap.JournalPool vol.RadosNamespace = rbdSnap.RadosNamespace vol.RbdImageName = rbdSnap.RbdSnapName + vol.ParentName = rbdSnap.ParentName + vol.ParentID = rbdSnap.ParentID // /!\ WARNING /!\ // From 9ac559cfc1f77511db6499bcc34d0f556969f27e Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Mon, 27 Nov 2023 18:03:27 +0100 Subject: [PATCH 4/8] rbd: skip flattening if an image in trash Signed-off-by: Niels de Vos --- internal/rbd/rbd_util.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 2dbb7d5fe67..822c1b3b05c 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -841,7 +841,9 @@ func (ri *rbdImage) flattenRbdImage( _, err = ta.AddFlatten(admin.NewImageSpec(ri.Pool, ri.RadosNamespace, ri.RbdImageName)) rbdCephMgrSupported := isCephMgrSupported(ctx, ri.ClusterID, err) if rbdCephMgrSupported { - if err != nil { + // it is not possible to flatten an image that is in trash (ErrNotFound) + switch { + case err != nil && !errors.Is(err, rados.ErrNotFound): // discard flattening error if the image does not have any parent rbdFlattenNoParent := fmt.Sprintf("Image %s/%s does not have a parent", ri.Pool, ri.RbdImageName) if strings.Contains(err.Error(), rbdFlattenNoParent) { @@ -850,11 +852,11 @@ func (ri *rbdImage) flattenRbdImage( log.ErrorLog(ctx, "failed to add task flatten for %s : %v", ri, err) return err - } - if forceFlatten || depth >= hardlimit { + case forceFlatten || depth >= hardlimit: return fmt.Errorf("%w: flatten is in progress for image %s", ErrFlattenInProgress, ri.RbdImageName) + default: + log.DebugLog(ctx, "successfully added task to flatten image %q", ri) } - log.DebugLog(ctx, "successfully added task to flatten image %q", ri) } if !rbdCephMgrSupported { log.ErrorLog( From 2a9d723b899bb19777f26cc85d59fa9828d63995 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Mon, 27 Nov 2023 17:31:05 +0100 Subject: [PATCH 5/8] rbd: the DeleteVolume CSI procedure should succeed on deleted images If the RBD-image is deleted already, the DeleteVolume CSI procedure is expected to report success (as it should be idempotent). In case the returned error indicates "RBD image not found", the error is ignored and the DeleteVolume procedure continues. Signed-off-by: Niels de Vos --- internal/rbd/controllerserver.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index 9037e1c2371..7ad5378ea8f 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -1017,9 +1017,10 @@ func cleanupRBDImage(ctx context.Context, } } - // Deleting rbd image + // Deleting rbd image, it isn't a failure if the image was deleted already. log.DebugLog(ctx, "deleting image %s", rbdVol.RbdImageName) - if err = rbdVol.deleteImage(ctx); err != nil { + err = rbdVol.deleteImage(ctx) + if err != nil && !errors.Is(err, librbd.ErrNotFound) { log.ErrorLog(ctx, "failed to delete rbd image: %s with error: %v", rbdVol, err) From f18c57178c28fd32bfcd28afd06a5565ae5ffed7 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Tue, 22 Aug 2023 17:08:26 +0200 Subject: [PATCH 6/8] rbd: include trashed parent images while calculating the clone depth The `getCloneDepth()` function did not account for images that are in the trash. A trashed image can only be opened by the image-id, and not by name anymore. Closes: #4013 Signed-off-by: Niels de Vos --- internal/rbd/nodeserver.go | 2 +- internal/rbd/rbd_util.go | 86 ++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/internal/rbd/nodeserver.go b/internal/rbd/nodeserver.go index 773557e0c1d..dbb07f997f3 100644 --- a/internal/rbd/nodeserver.go +++ b/internal/rbd/nodeserver.go @@ -595,7 +595,7 @@ func flattenImageBeforeMapping( if err != nil { return err } - depth, err = volOptions.getCloneDepth(ctx) + depth, err = volOptions.getCloneDepth() if err != nil { return err } diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 822c1b3b05c..c617544f0d7 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -706,49 +706,65 @@ func (ri *rbdImage) trashRemoveImage(ctx context.Context) error { return nil } -func (ri *rbdImage) getCloneDepth(ctx context.Context) (uint, error) { - var depth uint - vol := rbdVolume{} +// getCloneDepth walks the parents of the image and returns the number of +// images in the chain. +// +// This function re-uses the ioctx of the image to open all images in the +// chain. There is no need to open new ioctx's for every image. +func (ri *rbdImage) getCloneDepth() (uint, error) { + var ( + depth uint + info *librbd.ParentInfo + ) + + image, err := ri.open() + if err != nil { + return 0, fmt.Errorf("failed to open image %s: %w", ri, err) + } + + // get the librbd.ImageSpec of the parent + info, err = image.GetParent() - vol.Pool = ri.Pool - vol.Monitors = ri.Monitors - vol.RbdImageName = ri.RbdImageName - vol.ImageID = ri.ImageID - vol.RadosNamespace = ri.RadosNamespace - vol.conn = ri.conn.Copy() + // Close this image, it is not used anymore. Using defer to close it + // and replacing the image with an other image can result in resource + // leaks according to golangci-lint. + image.Close() for { - if vol.RbdImageName == "" { - return depth, nil - } - err := vol.openIoctx() - if err != nil { - return depth, err + if errors.Is(err, librbd.ErrNotFound) { + // image does not have more parents + break + } else if err != nil { + return 0, fmt.Errorf("failed to get parent of image %s at depth %d: %w", ri, depth, err) } - err = vol.getImageInfo() - // FIXME: create and destroy the vol inside the loop. - // see https://github.com/ceph/ceph-csi/pull/1838#discussion_r598530807 - vol.ioctx.Destroy() - vol.ioctx = nil - if err != nil { - // if the parent image is moved to trash the name will be present - // in rbd image info but the image will be in trash, in that case - // return the found depth - if errors.Is(err, ErrImageNotFound) { - return depth, nil + // if there is a parent, count it to the depth + depth++ + + // open the parent image, so that the for-loop can continue + // with checking for the parent of the parent + image, err = librbd.OpenImageById(ri.ioctx, info.Image.ImageID, info.Snap.SnapName) + if errors.Is(err, librbd.ErrNotFound) { + // image does not have more parents + break + } else if err != nil { + imageSpec := info.Image.ImageID + if info.Snap.SnapName != librbd.NoSnapshot { + imageSpec += "@" + info.Snap.SnapName } - log.ErrorLog(ctx, "failed to check depth on image %s: %s", &vol, err) - return depth, err - } - if vol.ParentName != "" { - depth++ + return 0, fmt.Errorf("failed to open parent image ID %q, at depth %d: %w", imageSpec, depth, err) } - vol.RbdImageName = vol.ParentName - vol.ImageID = vol.ParentID - vol.Pool = vol.ParentPool + + info, err = image.GetParent() + // info and err checking is done in the top of the for loop + + // Using defer in a for loop seems to be problematic. Just + // always close the image. + image.Close() } + + return depth, nil } type trashSnapInfo struct { @@ -814,7 +830,7 @@ func (ri *rbdImage) flattenRbdImage( // skip clone depth check if request is for force flatten if !forceFlatten { - depth, err = ri.getCloneDepth(ctx) + depth, err = ri.getCloneDepth() if err != nil { return err } From b8a567577916be892d46015ea53c6b1c41d9a60b Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Thu, 24 Aug 2023 11:48:41 +0200 Subject: [PATCH 7/8] rbd: pass a max-depth to `getCloneDepth()` The `getCloneDepth()` function does not need to traverse the whole chain of parents when a certain max-limit is configured. The traversing can be aborted once the hard-limit is reached. This makes the procedure a little more efficient, as unnecessary traversing is prevented. Signed-off-by: Niels de Vos --- internal/rbd/nodeserver.go | 2 +- internal/rbd/rbd_util.go | 29 ++++++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/internal/rbd/nodeserver.go b/internal/rbd/nodeserver.go index dbb07f997f3..2a0963b85b1 100644 --- a/internal/rbd/nodeserver.go +++ b/internal/rbd/nodeserver.go @@ -595,7 +595,7 @@ func flattenImageBeforeMapping( if err != nil { return err } - depth, err = volOptions.getCloneDepth() + depth, err = volOptions.getCloneDepth(rbdHardMaxCloneDepth + 1) if err != nil { return err } diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index c617544f0d7..6aa9ec9fdff 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -707,11 +707,13 @@ func (ri *rbdImage) trashRemoveImage(ctx context.Context) error { } // getCloneDepth walks the parents of the image and returns the number of -// images in the chain. +// images in the chain. The `maxDepth` argument to the function is used to +// specify a limit of the depth to check. When `maxDepth` depth is reached, no +// further traversing of the depth is required. // // This function re-uses the ioctx of the image to open all images in the // chain. There is no need to open new ioctx's for every image. -func (ri *rbdImage) getCloneDepth() (uint, error) { +func (ri *rbdImage) getCloneDepth(maxDepth uint) (uint, error) { var ( depth uint info *librbd.ParentInfo @@ -732,20 +734,33 @@ func (ri *rbdImage) getCloneDepth() (uint, error) { for { if errors.Is(err, librbd.ErrNotFound) { - // image does not have more parents + // image does not have a parent break } else if err != nil { return 0, fmt.Errorf("failed to get parent of image %s at depth %d: %w", ri, depth, err) } + // if the parent is in trash, return maxDepth to trigger + // flattening + if info.Image.Trash { + return maxDepth, nil + } + // if there is a parent, count it to the depth depth++ + // if maxDepth (usually hard-limit) is reached, further + // traversing parents is not needed, some action (flattening) + // will be triggered regardless of deeper depth + if depth == maxDepth { + break + } + // open the parent image, so that the for-loop can continue // with checking for the parent of the parent - image, err = librbd.OpenImageById(ri.ioctx, info.Image.ImageID, info.Snap.SnapName) - if errors.Is(err, librbd.ErrNotFound) { - // image does not have more parents + image, err = librbd.OpenImageByIdReadOnly(ri.ioctx, info.Image.ImageID, librbd.NoSnapshot) + if err != nil && errors.Is(err, librbd.ErrNotFound) { + // parent image does not exist, no parent after all break } else if err != nil { imageSpec := info.Image.ImageID @@ -830,7 +845,7 @@ func (ri *rbdImage) flattenRbdImage( // skip clone depth check if request is for force flatten if !forceFlatten { - depth, err = ri.getCloneDepth() + depth, err = ri.getCloneDepth(hardlimit) if err != nil { return err } From a29bea631de961a3d94cfc80a3e66876834cfa4e Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Fri, 1 Dec 2023 12:08:44 +0100 Subject: [PATCH 8/8] rbd: return CSI expected errors while flattening a snapshot or volume By returned `ABORTED` or `PRECONDITION_FAILED` in the right places, the CO will retry with the same arguments until the snapshot is `ReadyToUse`. This causes restoring a volume from a snapshot to be delayed, until the snapshot can be used. Signed-off-by: Niels de Vos --- internal/rbd/controllerserver.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index 7ad5378ea8f..b39ad1d9925 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -559,7 +559,9 @@ func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *ut rbdVol.Monitors, rbdVol.RbdImageName, cr) - if err != nil { + if errors.Is(err, ErrFlattenInProgress) { + return status.Error(codes.Aborted, err.Error()) + } else if err != nil { return status.Error(codes.Internal, err.Error()) } @@ -995,7 +997,7 @@ func cleanupRBDImage(ctx context.Context, if inUse { log.ErrorLog(ctx, "rbd %s is still being used", rbdVol) - return nil, status.Errorf(codes.Internal, "rbd %s is still being used", rbdVol.RbdImageName) + return nil, status.Errorf(codes.FailedPrecondition, "rbd %s is still being used", rbdVol.RbdImageName) } // delete the temporary rbd image created as part of volume clone during @@ -1167,8 +1169,12 @@ func (cs *ControllerServer) CreateSnapshot( } }() + readyToUse := true + vol, err := cs.doSnapshotClone(ctx, rbdVol, rbdSnap, cr) - if err != nil { + if errors.Is(err, ErrFlattenInProgress) { + readyToUse = false + } else if err != nil { return nil, status.Error(codes.Internal, err.Error()) } @@ -1205,7 +1211,7 @@ func (cs *ControllerServer) CreateSnapshot( SnapshotId: vol.VolID, SourceVolumeId: req.GetSourceVolumeId(), CreationTime: vol.CreatedAt, - ReadyToUse: true, + ReadyToUse: readyToUse, }, }, nil } @@ -1236,10 +1242,12 @@ func cloneFromSnapshot( return nil, status.Error(codes.Internal, err.Error()) } + readyToUse := true + err = vol.flattenRbdImage(ctx, false, rbdHardMaxCloneDepth, rbdSoftMaxCloneDepth) if errors.Is(err, ErrFlattenInProgress) { // if flattening is in progress, return error and do not cleanup - return nil, status.Errorf(codes.Internal, err.Error()) + readyToUse = false } else if err != nil { uErr := undoSnapshotCloning(ctx, rbdVol, rbdSnap, vol, cr) if uErr != nil { @@ -1265,7 +1273,7 @@ func cloneFromSnapshot( SnapshotId: rbdSnap.VolID, SourceVolumeId: rbdSnap.SourceVolumeID, CreationTime: rbdSnap.CreatedAt, - ReadyToUse: true, + ReadyToUse: readyToUse, }, }, nil }