From af7314dcf55ee3923486e66689516639145bde1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9iy=C3=AC=20Zhang?= Date: Tue, 2 Jan 2024 11:42:49 -0800 Subject: [PATCH] change GetDisk error reporting to temporary in CreateVolume codepath --- pkg/gce-cloud-provider/compute/gce-compute.go | 22 +++++++++++++------ pkg/gce-pd-csi-driver/controller.go | 9 +++++--- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pkg/gce-cloud-provider/compute/gce-compute.go b/pkg/gce-cloud-provider/compute/gce-compute.go index 56aa9fc3f4..daeeb134ce 100644 --- a/pkg/gce-cloud-provider/compute/gce-compute.go +++ b/pkg/gce-cloud-provider/compute/gce-compute.go @@ -558,9 +558,11 @@ func (cloud *CloudProvider) insertRegionalDisk( } if err != nil { if IsGCEError(err, "alreadyExists") { + // failed to GetDisk, however the Disk may already exist + // the error code should be non-Final disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Error(codes.Unavailable, err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -572,16 +574,18 @@ func (cloud *CloudProvider) insertRegionalDisk( klog.Warningf("GCE PD %s already exists, reusing", volKey.Name) return nil } - return status.Error(codes.Internal, fmt.Sprintf("unknown Insert disk error: %v", err.Error())) + return fmt.Errorf("unknown Insert Regional disk error: %w", err) } klog.V(5).Infof("InsertDisk operation %s for disk %s", opName, diskToCreate.Name) err = cloud.waitForRegionalOp(ctx, project, opName, volKey.Region) + // failed to wait for Op to finish, however, the Op possibly is still running as expected + // the error code returned should be non-final if err != nil { if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Errorf(codes.Unavailable, "error when getting disk: %v", err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -593,7 +597,7 @@ func (cloud *CloudProvider) insertRegionalDisk( klog.Warningf("GCE PD %s already exists after wait, reusing", volKey.Name) return nil } - return fmt.Errorf("unknown Insert disk operation error: %w", err) + return status.Errorf(codes.Unavailable, "unknown error when polling the operation: %v", err.Error()) } return nil } @@ -693,9 +697,11 @@ func (cloud *CloudProvider) insertZonalDisk( if err != nil { if IsGCEError(err, "alreadyExists") { + // failed to GetDisk, however the Disk may already exist + // the error code should be non-Final disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Error(codes.Unavailable, err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -714,10 +720,12 @@ func (cloud *CloudProvider) insertZonalDisk( err = cloud.waitForZonalOp(ctx, project, opName, volKey.Zone) if err != nil { + // failed to wait for Op to finish, however, the Op possibly is still running as expected + // the error code returned should be non-final if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Errorf(codes.Unavailable, "error when getting disk: %v", err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -729,7 +737,7 @@ func (cloud *CloudProvider) insertZonalDisk( klog.Warningf("GCE PD %s already exists after wait, reusing", volKey.Name) return nil } - return fmt.Errorf("unknown Insert disk operation error: %w", err) + return status.Errorf(codes.Unavailable, "unknown error when polling the operation: %v", err.Error()) } return nil } diff --git a/pkg/gce-pd-csi-driver/controller.go b/pkg/gce-pd-csi-driver/controller.go index a3e52467eb..31424c5384 100644 --- a/pkg/gce-pd-csi-driver/controller.go +++ b/pkg/gce-pd-csi-driver/controller.go @@ -324,7 +324,8 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre existingDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gceAPIVersion) if err != nil { if !gce.IsGCEError(err, "notFound") { - return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", err) + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final + return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", status.Error(codes.Unavailable, err.Error())) } } if err == nil { @@ -1876,9 +1877,10 @@ func createRegionalDisk(ctx context.Context, cloudProvider gce.GCECompute, name if multiWriter { gceAPIVersion = gce.GCEAPIVersionBeta } + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final disk, err := cloudProvider.GetDisk(ctx, project, meta.RegionalKey(name, region), gceAPIVersion) if err != nil { - return nil, fmt.Errorf("failed to get disk after creating regional disk: %w", err) + return nil, status.Errorf(codes.Unavailable, "failed to get disk after creating regional disk: %v", err.Error()) } return disk, nil } @@ -1898,9 +1900,10 @@ func createSingleZoneDisk(ctx context.Context, cloudProvider gce.GCECompute, nam if multiWriter { gceAPIVersion = gce.GCEAPIVersionBeta } + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final disk, err := cloudProvider.GetDisk(ctx, project, meta.ZonalKey(name, diskZone), gceAPIVersion) if err != nil { - return nil, err + return nil, status.Errorf(codes.Unavailable, "failed to get disk after creating zonal disk: %v", err.Error()) } return disk, nil }