Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow volume attach limit overwrite via command line parameter #522

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func main() {
driver.WithEndpoint(options.ServerOptions.Endpoint),
driver.WithExtraVolumeTags(options.ControllerOptions.ExtraVolumeTags),
driver.WithMode(options.DriverMode),
driver.WithVolumeAttachLimit(options.NodeOptions.VolumeAttachLimit),
)
if err != nil {
klog.Fatalln(err)
Expand Down
16 changes: 14 additions & 2 deletions cmd/options/node_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ import (
)

// NodeOptions contains options and configuration settings for the node service.
type NodeOptions struct{}
type NodeOptions struct {
// VolumeAttachLimit specifies the value that shall be reported as "maximum number of attachable volumes"
// in CSINode objects. It is similar to https://kubernetes.io/docs/concepts/storage/storage-limits/#custom-limits
// which allowed administrators to specify custom volume limits by configuring the kube-scheduler. Also, each AWS
// machine type has different volume limits. By default, the EBS CSI driver parses the machine type name and then
// decides the volume limit. However, this is only a rough approximation and not good enough in most cases.
// Specifying the volume attach limit via command line is the alternative until a more sophisticated solution presents
// itself (dynamically discovering the maximum number of attachable volume per EC2 machine type, see also
// https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347).
VolumeAttachLimit int64
}

func (s *NodeOptions) AddFlags(fs *flag.FlagSet) {}
func (o *NodeOptions) AddFlags(fs *flag.FlagSet) {
fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value value for the maximum number of volumes attachable for all nodes. If the flag is not specified then the value is approximated from the instance type.")
}
5 changes: 5 additions & 0 deletions cmd/options/node_options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ func TestNodeOptions(t *testing.T) {
flag string
found bool
}{
{
name: "lookup desired flag",
flag: "volume-attach-limit",
found: true,
},
{
name: "fail for non-desired flag",
flag: "some-flag",
Expand Down
17 changes: 17 additions & 0 deletions cmd/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"flag"
"os"
"reflect"
"strconv"
"testing"

"github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/driver"
Expand All @@ -45,6 +46,9 @@ func TestGetOptions(t *testing.T) {
extraVolumeTagKey: extraVolumeTagValue,
}

VolumeAttachLimitFlagName := "volume-attach-limit"
var VolumeAttachLimit int64 = 42

args := append([]string{
"aws-ebs-csi-driver",
}, additionalArgs...)
Expand All @@ -55,6 +59,9 @@ func TestGetOptions(t *testing.T) {
if withControllerOptions {
args = append(args, "-"+extraVolumeTagsFlagName+"="+extraVolumeTagKey+"="+extraVolumeTagValue)
}
if withNodeOptions {
args = append(args, "-"+VolumeAttachLimitFlagName+"="+strconv.FormatInt(VolumeAttachLimit, 10))
}

oldArgs := os.Args
defer func() { os.Args = oldArgs }()
Expand Down Expand Up @@ -82,6 +89,16 @@ func TestGetOptions(t *testing.T) {
}
}

if withNodeOptions {
VolumeAttachLimitFlag := flagSet.Lookup(VolumeAttachLimitFlagName)
if VolumeAttachLimitFlag == nil {
t.Fatalf("expected %q flag to be added but it is not", VolumeAttachLimitFlagName)
}
if options.NodeOptions.VolumeAttachLimit != VolumeAttachLimit {
t.Fatalf("expected VolumeAttachLimit to be %d but it is %d", VolumeAttachLimit, options.NodeOptions.VolumeAttachLimit)
}
}

return options
}

Expand Down
1 change: 1 addition & 0 deletions deploy/kubernetes/base/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ spec:
args:
- node
- --endpoint=$(CSI_ENDPOINT)
# - --volume-attach-limit=42
- --logtostderr
- --v=5
env:
Expand Down
12 changes: 12 additions & 0 deletions docs/design.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,3 +235,15 @@ Example 2: `AWS_REGION=us-west-1 /bin/aws-ebs-csi-driver controller --extra-volu

- `node`: This will only start the node service of the CSI driver.\
Example: `/bin/aws-ebs-csi-driver node --endpoint=unix://...`

## Custom volume limits

For the Kubernetes in-tree volume provisioners (including the `kubernetes.io/aws-ebs` provisioner) it was possible for administrators to provide a custom volume limit overwrite (see https://kubernetes.io/docs/concepts/storage/storage-limits/#custom-limits).
This solution is not working with CSI any longer.
As part of [#347](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347) we discuss how we can implement a sophisticated computation of the volume attach limit per node (e.g., based on the used machine types and already attached network interfaces).
However, it turns out that such optimal implementation is not easily achievable.
Each AWS machine type has different volume limits.
Today, the EBS CSI driver parses the machine type name and then decides the volume limit.
Unfortunately, this is only a rough approximation and not good enough in most cases.
In order to allow existing clusters that are leveraging/relying on this feature to migrate to CSI, the EBS CSI driver is supporting the `--volume-attach-limit` flag.
Specifying the volume attach limit via command line is the alternative until a more sophisticated solution presents itself (dynamically discovering the maximum number of attachable volume per EC2 machine type).
17 changes: 12 additions & 5 deletions pkg/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ type Driver struct {
}

type DriverOptions struct {
endpoint string
extraVolumeTags map[string]string
mode Mode
endpoint string
extraVolumeTags map[string]string
mode Mode
volumeAttachLimit int64
}

func NewDriver(options ...func(*DriverOptions)) (*Driver, error) {
Expand All @@ -81,10 +82,10 @@ func NewDriver(options ...func(*DriverOptions)) (*Driver, error) {
case ControllerMode:
driver.controllerService = newControllerService(&driverOptions)
case NodeMode:
driver.nodeService = newNodeService()
driver.nodeService = newNodeService(&driverOptions)
case AllMode:
driver.controllerService = newControllerService(&driverOptions)
driver.nodeService = newNodeService()
driver.nodeService = newNodeService(&driverOptions)
default:
return nil, fmt.Errorf("unknown mode: %s", driverOptions.mode)
}
Expand Down Expand Up @@ -155,3 +156,9 @@ func WithMode(mode Mode) func(*DriverOptions) {
o.mode = mode
}
}

func WithVolumeAttachLimit(volumeAttachLimit int64) func(*DriverOptions) {
return func(o *DriverOptions) {
o.volumeAttachLimit = volumeAttachLimit
}
}
58 changes: 58 additions & 0 deletions pkg/driver/driver_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
Copyright 2020 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"reflect"
"testing"
)

func TestWithEndpoint(t *testing.T) {
value := "endpoint"
options := &DriverOptions{}
WithEndpoint(value)(options)
if options.endpoint != value {
t.Fatalf("expected endpoint option got set to %q but is set to %q", value, options.endpoint)
}
}

func TestWithExtraVolumeTags(t *testing.T) {
value := map[string]string{"foo": "bar"}
options := &DriverOptions{}
WithExtraVolumeTags(value)(options)
if !reflect.DeepEqual(options.extraVolumeTags, value) {
t.Fatalf("expected extraVolumeTags option got set to %+v but is set to %+v", value, options.extraVolumeTags)
}
}

func TestWithMode(t *testing.T) {
value := Mode("mode")
options := &DriverOptions{}
WithMode(value)(options)
if options.mode != value {
t.Fatalf("expected mode option got set to %q but is set to %q", value, options.mode)
}
}

func TestWithVolumeAttachLimit(t *testing.T) {
var value int64 = 42
options := &DriverOptions{}
WithVolumeAttachLimit(value)(options)
if options.volumeAttachLimit != value {
t.Fatalf("expected volumeAttachLimit option got set to %d but is set to %d", value, options.volumeAttachLimit)
}
}
19 changes: 12 additions & 7 deletions pkg/driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,25 @@ var (

// nodeService represents the node service of CSI driver
type nodeService struct {
metadata cloud.MetadataService
mounter Mounter
inFlight *internal.InFlight
metadata cloud.MetadataService
mounter Mounter
inFlight *internal.InFlight
driverOptions *DriverOptions
}

// newNodeService creates a new node service
// it panics if failed to create the service
func newNodeService() nodeService {
func newNodeService(driverOptions *DriverOptions) nodeService {
metadata, err := cloud.NewMetadata()
if err != nil {
panic(err)
}

return nodeService{
metadata: metadata,
mounter: newNodeMounter(),
inFlight: internal.NewInFlight(),
metadata: metadata,
mounter: newNodeMounter(),
inFlight: internal.NewInFlight(),
driverOptions: driverOptions,
}
}

Expand Down Expand Up @@ -514,6 +516,9 @@ func findNvmeVolume(findName string) (device string, err error) {

// getVolumesLimit returns the limit of volumes that the node supports
func (d *nodeService) getVolumesLimit() int64 {
if d.driverOptions.volumeAttachLimit >= 0 {
return d.driverOptions.volumeAttachLimit
}
ebsNitroInstanceTypeRegex := "^[cmr]5.*|t3|z1d"
instanceType := d.metadata.GetInstanceType()
if ok, _ := regexp.MatchString(ebsNitroInstanceTypeRegex, instanceType); ok {
Expand Down
67 changes: 47 additions & 20 deletions pkg/driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1243,43 +1243,70 @@ func TestNodeGetCapabilities(t *testing.T) {

func TestNodeGetInfo(t *testing.T) {
testCases := []struct {
name string
instanceID string
instanceType string
availabilityZone string
expMaxVolumes int64
name string
instanceID string
instanceType string
availabilityZone string
volumeAttachLimit int64
expMaxVolumes int64
}{
{
name: "success normal",
instanceID: "i-123456789abcdef01",
instanceType: "t2.medium",
availabilityZone: "us-west-2b",
expMaxVolumes: 39,
name: "success normal",
instanceID: "i-123456789abcdef01",
instanceType: "t2.medium",
availabilityZone: "us-west-2b",
volumeAttachLimit: -1,
expMaxVolumes: 39,
},
{
name: "success normal with NVMe",
instanceID: "i-123456789abcdef01",
instanceType: "m5d.large",
availabilityZone: "us-west-2b",
expMaxVolumes: 25,
name: "success normal with overwrite",
instanceID: "i-123456789abcdef01",
instanceType: "t2.medium",
availabilityZone: "us-west-2b",
volumeAttachLimit: 42,
expMaxVolumes: 42,
},
{
name: "success normal with NVMe",
instanceID: "i-123456789abcdef01",
instanceType: "m5d.large",
availabilityZone: "us-west-2b",
volumeAttachLimit: -1,
expMaxVolumes: 25,
},
{
name: "success normal with NVMe and overwrite",
instanceID: "i-123456789abcdef01",
instanceType: "m5d.large",
availabilityZone: "us-west-2b",
volumeAttachLimit: 30,
expMaxVolumes: 30,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
mockCtl := gomock.NewController(t)
defer mockCtl.Finish()

driverOptions := &DriverOptions{
volumeAttachLimit: tc.volumeAttachLimit,
}

mockMounter := mocks.NewMockMounter(mockCtl)

mockMetadata := mocks.NewMockMetadataService(mockCtl)
mockMetadata.EXPECT().GetInstanceID().Return(tc.instanceID)
mockMetadata.EXPECT().GetInstanceType().Return(tc.instanceType)
mockMetadata.EXPECT().GetAvailabilityZone().Return(tc.availabilityZone)

mockMounter := mocks.NewMockMounter(mockCtl)
if tc.volumeAttachLimit < 0 {
mockMetadata.EXPECT().GetInstanceType().Return(tc.instanceType)
}

awsDriver := &nodeService{
metadata: mockMetadata,
mounter: mockMounter,
inFlight: internal.NewInFlight(),
metadata: mockMetadata,
mounter: mockMounter,
inFlight: internal.NewInFlight(),
driverOptions: driverOptions,
}

resp, err := awsDriver.NodeGetInfo(context.TODO(), &csi.NodeGetInfoRequest{})
Expand Down
5 changes: 3 additions & 2 deletions pkg/driver/sanity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ func TestSanity(t *testing.T) {
Region: "region",
AvailabilityZone: "az",
},
mounter: newFakeMounter(),
inFlight: internal.NewInFlight(),
mounter: newFakeMounter(),
inFlight: internal.NewInFlight(),
driverOptions: &DriverOptions{},
},
}
defer func() {
Expand Down