diff --git a/pkg/gce-cloud-provider/compute/gce-compute.go b/pkg/gce-cloud-provider/compute/gce-compute.go index 56aa9fc3f..5749be437 100644 --- a/pkg/gce-cloud-provider/compute/gce-compute.go +++ b/pkg/gce-cloud-provider/compute/gce-compute.go @@ -560,7 +560,9 @@ func (cloud *CloudProvider) insertRegionalDisk( if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + // failed to GetDisk, however the Disk may already exist + // the error code should be non-Final + return status.Error(codes.Unavailable, err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -572,16 +574,19 @@ func (cloud *CloudProvider) insertRegionalDisk( klog.Warningf("GCE PD %s already exists, reusing", volKey.Name) return nil } - return status.Error(codes.Internal, fmt.Sprintf("unknown Insert disk error: %v", err.Error())) + // if the error code is considered "final", RegionDisks.Insert might not be retried + return fmt.Errorf("unknown Insert Regional disk error: %w", err) } klog.V(5).Infof("InsertDisk operation %s for disk %s", opName, diskToCreate.Name) err = cloud.waitForRegionalOp(ctx, project, opName, volKey.Region) + // failed to wait for Op to finish, however, the Op possibly is still running as expected + // the error code returned should be non-final if err != nil { if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Errorf(codes.Unavailable, "error when getting disk: %v", err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -593,7 +598,7 @@ func (cloud *CloudProvider) insertRegionalDisk( klog.Warningf("GCE PD %s already exists after wait, reusing", volKey.Name) return nil } - return fmt.Errorf("unknown Insert disk operation error: %w", err) + return status.Errorf(codes.Unavailable, "unknown error when polling the operation: %v", err.Error()) } return nil } @@ -695,7 +700,9 @@ func (cloud *CloudProvider) insertZonalDisk( if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + // failed to GetDisk, however the Disk may already exist + // the error code should be non-Final + return status.Error(codes.Unavailable, err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -707,6 +714,7 @@ func (cloud *CloudProvider) insertZonalDisk( klog.Warningf("GCE PD %s already exists, reusing", volKey.Name) return nil } + // if the error code is considered "final", Disks.Insert might not be retried return fmt.Errorf("unknown Insert disk error: %w", err) } klog.V(5).Infof("InsertDisk operation %s for disk %s", opName, diskToCreate.Name) @@ -714,10 +722,12 @@ func (cloud *CloudProvider) insertZonalDisk( err = cloud.waitForZonalOp(ctx, project, opName, volKey.Zone) if err != nil { + // failed to wait for Op to finish, however, the Op possibly is still running as expected + // the error code returned should be non-final if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Errorf(codes.Unavailable, "error when getting disk: %v", err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -729,7 +739,7 @@ func (cloud *CloudProvider) insertZonalDisk( klog.Warningf("GCE PD %s already exists after wait, reusing", volKey.Name) return nil } - return fmt.Errorf("unknown Insert disk operation error: %w", err) + return status.Errorf(codes.Unavailable, "unknown error when polling the operation: %v", err.Error()) } return nil } diff --git a/pkg/gce-pd-csi-driver/controller.go b/pkg/gce-pd-csi-driver/controller.go index a3e52467e..93306c33d 100644 --- a/pkg/gce-pd-csi-driver/controller.go +++ b/pkg/gce-pd-csi-driver/controller.go @@ -324,7 +324,8 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre existingDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gceAPIVersion) if err != nil { if !gce.IsGCEError(err, "notFound") { - return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", err) + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final + return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", status.Error(codes.Unavailable, err.Error())) } } if err == nil { @@ -339,10 +340,10 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre ready, err := isDiskReady(existingDisk) if err != nil { - return nil, common.LoggedError("CreateVolume disk "+volKey.String()+" had error checking ready status: ", err) + return nil, status.Errorf(codes.Aborted, "CreateVolume disk %q had error checking ready status: %v", volKey.String(), err.Error()) } if !ready { - return nil, status.Errorf(codes.Internal, "CreateVolume existing disk %v is not ready", volKey) + return nil, status.Errorf(codes.Aborted, "CreateVolume existing disk %v is not ready", volKey) } // If there is no validation error, immediately return success @@ -417,10 +418,10 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre // Verify the source disk is ready. ready, err := isDiskReady(diskFromSourceVolume) if err != nil { - return nil, common.LoggedError("CreateVolume disk from source volume "+sourceVolKey.String()+" had error checking ready status: ", err) + return nil, status.Errorf(codes.Aborted, "CreateVolume disk from source volume %q had error checking ready status: %v", sourceVolKey.String(), err.Error()) } if !ready { - return nil, status.Errorf(codes.Internal, "CreateVolume disk from source volume %v is not ready", sourceVolKey) + return nil, status.Errorf(codes.Aborted, "CreateVolume disk from source volume %v is not ready", sourceVolKey) } } } else { // if VolumeContentSource is nil, validate access mode is not read only @@ -1876,9 +1877,10 @@ func createRegionalDisk(ctx context.Context, cloudProvider gce.GCECompute, name if multiWriter { gceAPIVersion = gce.GCEAPIVersionBeta } + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final disk, err := cloudProvider.GetDisk(ctx, project, meta.RegionalKey(name, region), gceAPIVersion) if err != nil { - return nil, fmt.Errorf("failed to get disk after creating regional disk: %w", err) + return nil, status.Errorf(codes.Unavailable, "failed to get disk after creating regional disk: %v", err.Error()) } return disk, nil } @@ -1898,9 +1900,10 @@ func createSingleZoneDisk(ctx context.Context, cloudProvider gce.GCECompute, nam if multiWriter { gceAPIVersion = gce.GCEAPIVersionBeta } + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final disk, err := cloudProvider.GetDisk(ctx, project, meta.ZonalKey(name, diskZone), gceAPIVersion) if err != nil { - return nil, err + return nil, status.Errorf(codes.Unavailable, "failed to get disk after creating zonal disk: %v", err.Error()) } return disk, nil }