Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify attach process #322

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions internal/driver/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,6 @@ func (cs *ControllerServer) ControllerPublishVolume(ctx context.Context, req *cs
}, nil
}

// Check if the instance can accommodate the volume attachment
if capErr := cs.checkAttachmentCapacity(ctx, instance); capErr != nil {
metrics.RecordMetrics(metrics.ControllerPublishVolumeTotal, metrics.ControllerPublishVolumeDuration, metrics.Failed, functionStartTime)
return resp, capErr
}

// Attach the volume to the specified instance
if attachErr := cs.attachVolume(ctx, volumeID, linodeID); attachErr != nil {
metrics.RecordMetrics(metrics.ControllerPublishVolumeTotal, metrics.ControllerPublishVolumeDuration, metrics.Failed, functionStartTime)
Expand Down
68 changes: 9 additions & 59 deletions internal/driver/controllerserver_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ import (

"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/linode/linodego"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

linodevolumes "github.com/linode/linode-blockstorage-csi-driver/pkg/linode-volumes"
"github.com/linode/linode-blockstorage-csi-driver/pkg/logger"
Expand Down Expand Up @@ -95,31 +93,6 @@ type VolumeParams struct {
Region string
}

// canAttach indicates whether or not another volume can be attached to the
// Linode with the given ID.
//
// Whether or not another volume can be attached is based on how many instance
// disks and block storage volumes are currently attached to the instance.
func (cs *ControllerServer) canAttach(ctx context.Context, instance *linodego.Instance) (canAttach bool, err error) {
log := logger.GetLogger(ctx)
log.V(4).Info("Checking if volume can be attached", "instance_id", instance.ID)

// Get the maximum number of volume attachments allowed for the instance
limit, err := cs.maxAllowedVolumeAttachments(ctx, instance)
if err != nil {
return false, err
}

// List the volumes currently attached to the instance
volumes, err := cs.client.ListInstanceVolumes(ctx, instance.ID, nil)
if err != nil {
return false, errInternal("list instance volumes: %v", err)
}

// Return true if the number of attached volumes is less than the limit
return len(volumes) < limit, nil
}

// maxAllowedVolumeAttachments calculates the maximum number of volumes that can be attached to a Linode instance,
// taking into account the instance's memory and currently attached disks.
func (cs *ControllerServer) maxAllowedVolumeAttachments(ctx context.Context, instance *linodego.Instance) (int, error) {
Expand Down Expand Up @@ -677,33 +650,6 @@ func (cs *ControllerServer) getInstance(ctx context.Context, linodeID int) (*lin
return instance, nil
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there really no value in having these checks client side at all?

Copy link
Contributor Author

@guilhem guilhem Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tend to prefer to rely on API internal logic and validation and for CSI to act as a simple pass-through.

The problem I see could be a rate-limiting issue on API (GET vs. POST).
But attacher already has a backoff on error
https://github.com/kubernetes-csi/external-attacher?tab=readme-ov-file#csi-error-and-timeout-handling

But that may be questionable, or we may have to wait for linodego to implement rate-limiting.
That's why this PR was in draft state at first.

// checkAttachmentCapacity checks if the specified instance can accommodate
// additional volume attachments. It retrieves the maximum number of allowed
// attachments and compares it with the currently attached volumes. If the
// limit is exceeded, it returns an error indicating the maximum volume
// attachments allowed.
func (cs *ControllerServer) checkAttachmentCapacity(ctx context.Context, instance *linodego.Instance) error {
log := logger.GetLogger(ctx)
log.V(4).Info("Entering checkAttachmentCapacity()", "linodeID", instance.ID)
defer log.V(4).Info("Exiting checkAttachmentCapacity()")

canAttach, err := cs.canAttach(ctx, instance)
if err != nil {
return err
}
if !canAttach {
// If the instance cannot accommodate more attachments, retrieve the maximum allowed attachments.
limit, err := cs.maxAllowedVolumeAttachments(ctx, instance)
if errors.Is(err, errNilInstance) {
return errInternal("cannot calculate max volume attachments for a nil instance")
} else if err != nil {
return errMaxAttachments // Return an error indicating the maximum attachments limit has been reached.
}
return errMaxVolumeAttachments(limit) // Return an error indicating the maximum volume attachments allowed.
}
return nil // Return nil if the instance can accommodate more attachments.
}

// attachVolume attaches the specified volume to the given Linode instance.
// It logs the action and handles any errors that may occur during the
// attachment process. If the volume is already attached, it allows for a
Expand All @@ -719,13 +665,17 @@ func (cs *ControllerServer) attachVolume(ctx context.Context, volumeID, linodeID
PersistAcrossBoots: &persist,
})
if err != nil {
code := codes.Internal // Default error code is Internal.
// Check if the error indicates that the volume is already attached.
// https://github.com/container-storage-interface/spec/blob/master/spec.md#controllerpublishvolume-errors
var apiErr *linodego.Error
if errors.As(err, &apiErr) && strings.Contains(apiErr.Message, "is already attached") {
code = codes.Unavailable // Allow a retry if the volume is already attached: race condition can occur here
if errors.As(err, &apiErr) {
switch {
case strings.Contains(apiErr.Message, "is already attached"):
return errAlreadyAttached
Comment on lines +672 to +673
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: We only get this err when the volume is attached to another linode?

I think pass the specific err message from the api which could include linode id of another node. This would be helpful in debugging issue in future.

case strings.Contains(apiErr.Message, "Maximum number of block storage volumes are attached to this Linode"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, but this seems brittle when the API changes their text message. How can we check this error, but not rely on the spelling of an API text error message?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's an excellent question.
API doesn't document errors
image
And linodego report request as it

But we have to notice this logic was already done before in CSI :/

if errors.As(err, &apiErr) && strings.Contains(apiErr.Message, "is already attached") {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah def not the ideal way to do it. Hopefully linodego can provide better error codes in future so we don't have to build logic based on the err strings :/

return errMaxAttachments
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to my suggestion from other comment. We should try to pass along the err message by the api here too

}
}
return status.Errorf(code, "attach volume: %v", err)
return errInternal("attach volume: %v", err)
}
return nil // Return nil if the volume is successfully attached.
}
62 changes: 2 additions & 60 deletions internal/driver/controllerserver_helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -933,64 +933,6 @@ func TestGetAndValidateVolume(t *testing.T) {
}
}

func TestCheckAttachmentCapacity(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockClient := mocks.NewMockLinodeClient(ctrl)
cs := &ControllerServer{
client: mockClient,
}

testCases := []struct {
name string
instance *linodego.Instance
setupMocks func()
expectedError error
}{
{
name: "Can attach volume",
instance: &linodego.Instance{
ID: 123,
Specs: &linodego.InstanceSpec{
Memory: 4096,
},
},
setupMocks: func() {
mockClient.EXPECT().ListInstanceVolumes(gomock.Any(), 123, gomock.Any()).Return([]linodego.Volume{}, nil)
mockClient.EXPECT().ListInstanceDisks(gomock.Any(), 123, gomock.Any()).Return([]linodego.InstanceDisk{}, nil)
},
expectedError: nil,
},
{
name: "Cannot attach volume - max attachments reached",
instance: &linodego.Instance{
ID: 456,
Specs: &linodego.InstanceSpec{
Memory: 1024,
},
},
setupMocks: func() {
mockClient.EXPECT().ListInstanceDisks(gomock.Any(), 456, gomock.Any()).Return([]linodego.InstanceDisk{{ID: 1}, {ID: 2}}, nil).AnyTimes()
mockClient.EXPECT().ListInstanceVolumes(gomock.Any(), 456, gomock.Any()).Return([]linodego.Volume{{ID: 1}, {ID: 2}, {ID: 3}, {ID: 4}, {ID: 5}, {ID: 6}}, nil)
},
expectedError: errMaxVolumeAttachments(6),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tc.setupMocks()

err := cs.checkAttachmentCapacity(context.Background(), tc.instance)

if err != nil && !reflect.DeepEqual(tc.expectedError, err) {
t.Errorf("expected error %v, got %v", tc.expectedError, err)
}
})
}
}

func TestGetContentSourceVolume(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down Expand Up @@ -1169,7 +1111,7 @@ func TestAttachVolume(t *testing.T) {
setupMocks: func() {
mockClient.EXPECT().AttachVolume(gomock.Any(), 789, gomock.Any()).Return(nil, &linodego.Error{Message: "Volume 789 is already attached"})
},
expectedError: status.Error(codes.Unavailable, "attach volume: [000] Volume 789 is already attached"),
expectedError: errAlreadyAttached,
},
{
name: "API error",
Expand All @@ -1178,7 +1120,7 @@ func TestAttachVolume(t *testing.T) {
setupMocks: func() {
mockClient.EXPECT().AttachVolume(gomock.Any(), 202, gomock.Any()).Return(nil, errors.New("API error"))
},
expectedError: status.Error(codes.Internal, "attach volume: API error"),
expectedError: errInternal("attach volume: API error"),
},
}

Expand Down
78 changes: 0 additions & 78 deletions internal/driver/controllerserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,6 @@ func TestControllerPublishVolume(t *testing.T) {
m.EXPECT().GetVolume(gomock.Any(), gomock.Any()).Return(&linodego.Volume{ID: 1001, LinodeID: createLinodeID(1003), Size: 10, Status: linodego.VolumeActive}, nil).AnyTimes()
m.EXPECT().WaitForVolumeLinodeID(gomock.Any(), 630706045, gomock.Any(), gomock.Any()).Return(&linodego.Volume{ID: 1001, LinodeID: createLinodeID(1003), Size: 10, Status: linodego.VolumeActive}, nil)
m.EXPECT().AttachVolume(gomock.Any(), 630706045, gomock.Any()).Return(&linodego.Volume{ID: 1001, LinodeID: createLinodeID(1003), Size: 10, Status: linodego.VolumeActive}, nil)
m.EXPECT().ListInstanceVolumes(gomock.Any(), 1003, gomock.Any()).Return([]linodego.Volume{{ID: 1001, LinodeID: createLinodeID(1003), Size: 10, Status: linodego.VolumeActive}}, nil)
m.EXPECT().ListInstanceDisks(gomock.Any(), 1003, gomock.Any()).Return([]linodego.InstanceDisk{}, nil)
},
expectedError: nil,
},
Expand Down Expand Up @@ -679,82 +677,6 @@ func createLinodeID(i int) *int {
return &i
}

func TestControllerCanAttach(t *testing.T) {
t.Parallel()

tests := []struct {
memory uint // memory in bytes
nvols int // number of volumes already attached
ndisks int // number of attached disks
want bool // can we attach another?
fail bool // should we expect a non-nil error
}{
{
memory: 1 << 30, // 1GiB
nvols: 7, // maxed out
ndisks: 1,
},
{
memory: 16 << 30, // 16GiB
nvols: 14, // should allow one more
ndisks: 1,
want: true,
},
{
memory: 16 << 30,
nvols: 15,
ndisks: 1,
},
{
memory: 256 << 30, // 256GiB
nvols: 64, // maxed out
},
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for _, tt := range tests {
tname := fmt.Sprintf("%dGB-%d", tt.memory>>30, tt.nvols)
t.Run(tname, func(t *testing.T) {
vols := make([]linodego.Volume, 0, tt.nvols)
for i := 0; i < tt.nvols; i++ {
vols = append(vols, linodego.Volume{ID: i})
}

disks := make([]linodego.InstanceDisk, 0, tt.ndisks)
for i := 0; i < tt.ndisks; i++ {
disks = append(disks, linodego.InstanceDisk{ID: i})
}

memMB := 8192
if tt.memory != 0 {
memMB = int(tt.memory >> 20) // convert bytes -> MB
}
instance := &linodego.Instance{
Specs: &linodego.InstanceSpec{Memory: memMB},
}

srv := ControllerServer{
client: &fakeLinodeClient{
volumes: vols,
disks: disks,
},
}

got, err := srv.canAttach(ctx, instance)
if err != nil && !tt.fail {
t.Fatal(err)
} else if err == nil && tt.fail {
t.Fatal("should have failed")
}

if got != tt.want {
t.Errorf("got=%t want=%t", got, tt.want)
}
})
}
}

func TestControllerMaxVolumeAttachments(t *testing.T) {
tests := []struct {
name string
Expand Down
8 changes: 4 additions & 4 deletions internal/driver/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ var (
// attachments allowed for the instance, call errMaxVolumeAttachments.
errMaxAttachments = status.Error(codes.ResourceExhausted, "max number of volumes already attached to instance")

// errAlreadyAttached is used to indicate that a volume is already attached
// to a Linode instance.
errAlreadyAttached = status.Error(codes.FailedPrecondition, "volume is already attached")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before merging, I want to wait for an answer on this issue
kubernetes-csi/external-attacher#604


// errResizeDown indicates a request would result in a volume being resized
// to be smaller than it currently is.
//
Expand Down Expand Up @@ -61,10 +65,6 @@ func errRegionMismatch(gotRegion, wantRegion string) error {
return status.Errorf(codes.InvalidArgument, "source volume is in region %q, needs to be in region %q", gotRegion, wantRegion)
}

func errMaxVolumeAttachments(numAttachments int) error {
return status.Errorf(codes.ResourceExhausted, "max number of volumes (%d) already attached to instance", numAttachments)
}

func errInstanceNotFound(linodeID int) error {
return status.Errorf(codes.NotFound, "linode instance %d not found", linodeID)
}
Expand Down
Loading