From 6577c11f4eaca938932f05b2e7bd8a434ecba33b Mon Sep 17 00:00:00 2001 From: Peter Schuurman Date: Wed, 3 Apr 2024 18:01:18 -0700 Subject: [PATCH] Add support for checking if a device is being used by a filesystem --- pkg/deviceutils/device-utils.go | 8 +++ pkg/deviceutils/device-utils_linux.go | 22 +++++++ pkg/deviceutils/device-utils_windows.go | 8 +++ pkg/deviceutils/fake-device-utils.go | 11 +++- pkg/gce-pd-csi-driver/node.go | 42 ++++++++++--- test/e2e/tests/single_zone_e2e_test.go | 78 ++++++++++++++++++++++++- 6 files changed, 159 insertions(+), 10 deletions(-) diff --git a/pkg/deviceutils/device-utils.go b/pkg/deviceutils/device-utils.go index 90214a3fa1..134ae0f22d 100644 --- a/pkg/deviceutils/device-utils.go +++ b/pkg/deviceutils/device-utils.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" + "k8s.io/mount-utils" pathutils "k8s.io/utils/path" "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/resizefs" ) @@ -91,6 +92,13 @@ type DeviceUtils interface { // Resize returns whether or not a device needs resizing. Resize(resizer resizefs.Resizefs, devicePath string, deviceMountPath string) (bool, error) + + // IsDeviceFilesystemInUse returns if a device path with the specified fstype + // TODO: Mounter is passed in in order to call GetDiskFormat() + // This is currently only implemented in mounter_linux, not mounter_windows. + // Refactor this interface and function call up the stack to the caller once it is + // implemented in mounter_windows. + IsDeviceFilesystemInUse(mounter *mount.SafeFormatAndMount, devicePath, devFsPath string) (bool, error) } type deviceUtils struct { diff --git a/pkg/deviceutils/device-utils_linux.go b/pkg/deviceutils/device-utils_linux.go index 3e1cda5c4f..f480359a78 100644 --- a/pkg/deviceutils/device-utils_linux.go +++ b/pkg/deviceutils/device-utils_linux.go @@ -20,9 +20,31 @@ import ( "fmt" "os" "path/filepath" + + "k8s.io/mount-utils" ) func (_ *deviceUtils) DisableDevice(devicePath string) error { deviceName := filepath.Base(devicePath) return os.WriteFile(fmt.Sprintf("/sys/block/%s/device/state", deviceName), []byte("offline"), 0644) } + +func (_ *deviceUtils) IsDeviceFilesystemInUse(mounter *mount.SafeFormatAndMount, devicePath, devFsPath string) (bool, error) { + fstype, err := mounter.GetDiskFormat(devicePath) + if err != nil { + return false, fmt.Errorf("failed to get disk format for %s (aka %s): %v", devicePath, devFsPath, err) + } + + devFsName := filepath.Base(devFsPath) + sysFsTypePath := fmt.Sprintf("/sys/fs/%s/%s", fstype, devFsName) + stat, err := os.Stat(sysFsTypePath) + if err != nil { + if os.IsNotExist(err) { + // Path doesn't exist, indicating the device is NOT in use + return false, nil + } + return false, err + } + + return stat.IsDir(), nil +} diff --git a/pkg/deviceutils/device-utils_windows.go b/pkg/deviceutils/device-utils_windows.go index 8b2055a178..b147217b51 100644 --- a/pkg/deviceutils/device-utils_windows.go +++ b/pkg/deviceutils/device-utils_windows.go @@ -16,7 +16,15 @@ limitations under the License. package deviceutils +import "k8s.io/mount-utils" + func (_ *deviceUtils) DisableDevice(devicePath string) error { // No disabling is necessary on windows. return nil } + +func (_ *deviceUtils) IsDeviceFilesystemInUse(mounter *mount.SafeFormatAndMount, devicePath, devFsPath string) (bool, error) { + // We don't support checking if a device filesystem is captured elsewhere by the system + // Return false, to skip this check. Assume the filesystem is not in use. + return false, nil +} diff --git a/pkg/deviceutils/fake-device-utils.go b/pkg/deviceutils/fake-device-utils.go index d17c43e55e..1b0c641e9f 100644 --- a/pkg/deviceutils/fake-device-utils.go +++ b/pkg/deviceutils/fake-device-utils.go @@ -14,7 +14,10 @@ limitations under the License. package deviceutils -import "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/resizefs" +import ( + "k8s.io/mount-utils" + "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/resizefs" +) type fakeDeviceUtils struct { skipResize bool @@ -50,3 +53,9 @@ func (du *fakeDeviceUtils) Resize(resizer resizefs.Resizefs, devicePath string, } return resizer.Resize(devicePath, deviceMountPath) } + +func (_ *fakeDeviceUtils) IsDeviceFilesystemInUse(mounter *mount.SafeFormatAndMount, devicePath, devFsPath string) (bool, error) { + // We don't support checking if a device filesystem is captured elsewhere by the system + // Return false, to skip this check. + return false, nil +} diff --git a/pkg/gce-pd-csi-driver/node.go b/pkg/gce-pd-csi-driver/node.go index 21988b6e8b..a33e37ad20 100644 --- a/pkg/gce-pd-csi-driver/node.go +++ b/pkg/gce-pd-csi-driver/node.go @@ -16,6 +16,7 @@ package gceGCEDriver import ( "context" + "errors" "fmt" "os" "path/filepath" @@ -392,14 +393,12 @@ func (ns *GCENodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUns return nil, status.Error(codes.Internal, fmt.Sprintf("NodeUnstageVolume failed: %v\nUnmounting arguments: %s\n", err.Error(), stagingTargetPath)) } - devicePath, err := getDevicePath(ns, volumeID, "" /* partition, which is unused */) - if err != nil { - klog.Errorf("Failed to find device path for volume %s. Device may not be detached cleanly (error is ignored and unstaging is continuing): %v", volumeID, err.Error()) - } else { - if devFsPath, err := filepath.EvalSymlinks(devicePath); err != nil { - klog.Warningf("filepath.EvalSymlinks(%q) failed when trying to disable device: %w (ignored, unstaging continues)", devicePath, err) - } else if err := ns.DeviceUtils.DisableDevice(devFsPath); err != nil { - klog.Warningf("Failed to disabled device %s (aka %s) for volume %s. Device may not be detached cleanly (ignored, unstaging continues): %w", devicePath, devFsPath, volumeID, err) + if err := ns.safelyDisableDevice(volumeID); err != nil { + var targetErr *ignoreableError + if errors.As(err, &targetErr) { + klog.Warningf("Device may not be detached cleanly for volume %s (ignored, unstaging continues): %v", volumeID, err) + } else { + return nil, status.Errorf(codes.Internal, "NodeUnstageVolume for volume %s failed: %v", volumeID, err) } } @@ -407,6 +406,33 @@ func (ns *GCENodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUns return &csi.NodeUnstageVolumeResponse{}, nil } +// A private error wrapper used to pass control flow decisions up to the caller +type ignoreableError struct{ error } + +func (ns *GCENodeServer) safelyDisableDevice(volumeID string) error { + devicePath, err := getDevicePath(ns, volumeID, "" /* partition, which is unused */) + if err != nil { + return &ignoreableError{fmt.Errorf("failed to find device path for volume %s: %v", volumeID, err.Error())} + } + + devFsPath, err := filepath.EvalSymlinks(devicePath) + if err != nil { + return &ignoreableError{fmt.Errorf("filepath.EvalSymlinks(%q) failed: %v", devicePath, err)} + } + + if inUse, err := ns.DeviceUtils.IsDeviceFilesystemInUse(ns.Mounter, devicePath, devFsPath); err != nil { + return &ignoreableError{fmt.Errorf("failed to check if device %s (aka %s) is in use: %v", devicePath, devFsPath, err)} + } else if inUse { + return fmt.Errorf("device %s (aka %s) is still in use", devicePath, devFsPath) + } + + if err := ns.DeviceUtils.DisableDevice(devFsPath); err != nil { + return &ignoreableError{fmt.Errorf("failed to disable device %s (aka %s): %v", devicePath, devFsPath, err)} + } + + return nil +} + func (ns *GCENodeServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error) { return &csi.NodeGetCapabilitiesResponse{ Capabilities: ns.Driver.nscap, diff --git a/test/e2e/tests/single_zone_e2e_test.go b/test/e2e/tests/single_zone_e2e_test.go index 2953ebb7a1..f8dc086366 100644 --- a/test/e2e/tests/single_zone_e2e_test.go +++ b/test/e2e/tests/single_zone_e2e_test.go @@ -1341,7 +1341,83 @@ var _ = Describe("GCE PD CSI Driver", func() { Expect(err).To(BeNil(), "no error expected when passed valid compute url") }) -}) + + It("Should block unstage if filesystem mounted", func() { + testContext := getRandomTestContext() + + p, z, _ := testContext.Instance.GetIdentity() + client := testContext.Client + instance := testContext.Instance + + // Create Disk + volName, volID := createAndValidateUniqueZonalDisk(client, p, z, standardDiskType) + + defer func() { + // Delete Disk + err := client.DeleteVolume(volID) + Expect(err).To(BeNil(), "DeleteVolume failed") + + // Validate Disk Deleted + _, err = computeService.Disks.Get(p, z, volName).Do() + Expect(gce.IsGCEError(err, "notFound")).To(BeTrue(), "Expected disk to not be found") + }() + + // Attach Disk + err := client.ControllerPublishVolumeReadWrite(volID, instance.GetNodeID(), false /* forceAttach */) + Expect(err).To(BeNil(), "ControllerPublishVolume failed with error for disk %v on node %v: %v", volID, instance.GetNodeID(), err) + + defer func() { + // Detach Disk + err = client.ControllerUnpublishVolume(volID, instance.GetNodeID()) + if err != nil { + klog.Errorf("Failed to detach disk: %v", err) + } + }() + + // Stage Disk + stageDir := filepath.Join("/tmp/", volName, "stage") + err = client.NodeStageExt4Volume(volID, stageDir) + Expect(err).To(BeNil(), "failed to stage volume: %v", err) + + // Create private bind mount + boundMountStageDir := filepath.Join("/tmp/bindmount", volName, "bindmount") + boundMountStageMkdirOutput, err := instance.SSH("mkdir", "-p", boundMountStageDir) + Expect(err).To(BeNil(), "mkdir failed on instance %v: output: %v: %v", instance.GetNodeID(), boundMountStageMkdirOutput, err) + bindMountOutput, err := instance.SSH("mount", "--rbind", "--make-private", stageDir, boundMountStageDir) + Expect(err).To(BeNil(), "Bind mount failed on instance %v: output: %v: %v", instance.GetNodeID(), bindMountOutput, err) + + privateBindMountRemoved := false + unmountAndRmPrivateBindMount := func() { + if !privateBindMountRemoved { + // Umount and delete private mount staging directory + bindUmountOutput, err := instance.SSH("umount", boundMountStageDir) + Expect(err).To(BeNil(), "Bind mount failed on instance %v: output: %v: %v", instance.GetNodeID(), bindUmountOutput, err) + err = testutils.RmAll(instance, boundMountStageDir) + Expect(err).To(BeNil(), "Failed to rm mount stage dir %s: %v", boundMountStageDir, err) + } + privateBindMountRemoved = true + } + + defer func() { + unmountAndRmPrivateBindMount() + }() + + // Unstage Disk + err = client.NodeUnstageVolume(volID, stageDir) + Expect(err).ToNot(BeNil(), "Expected failure during unstage") + Expect(err).To(MatchError(ContainSubstring(("is still in use")))) + + // Unmount private bind mount and try again + unmountAndRmPrivateBindMount() + + // Unstage Disk + err = client.NodeUnstageVolume(volID, stageDir) + Expect(err).To(BeNil(), "Failed to unstage volume: %v", err) + fp := filepath.Join("/tmp/", volName) + err = testutils.RmAll(instance, fp) + Expect(err).To(BeNil(), "Failed to rm file path %s: %v", fp, err) + }) +} func equalWithinEpsilon(a, b, epsiolon int64) bool { if a > b {