From de3d948e71efdedbd8b8fb146303bad88eeb74eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 8 Apr 2020 17:31:53 +0200 Subject: [PATCH] dp: Expose link based resources by default (#11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix formatting Signed-off-by: Jaime Caamaño Ruiz * DP: fix and improve documentation of OnLinkEvent Wrong break statement was preventing the subsciption to be properly terminated. Improved function doc to better explain behavior on error. Signed-off-by: Jaime Caamaño Ruiz * DP: make structs private for cleanliness Signed-off-by: Jaime Caamaño Ruiz * DP: work on main process namespace Allocate, ListAndWatch and the netlink subsciption callbacks are executed on their own goroutines. As documented here [1], go thread model may relocate goroutines among different OS threads that might be running on different namespaces. This patch makes sure that network operations always run on initial main thread namespace. Otherwise the device plugin may end up operating in different namespaces than the one intended. [1] https://github.com/containernetworking/plugins/blob/master/pkg/ns/README.md Signed-off-by: Jaime Caamaño Ruiz * DP: added explicit config tests Signed-off-by: Jaime Caamaño Ruiz * DP: expose link based resources by default When no explicit configuration is provided, the device plugin will expose default resources corresponding to links present on the node. So, for every link considered suitable as a macvtap parent, a resource to consume macvtap interfaces would be setup as macvtap.network.kubevirt.io/ Right now only physical and bond interfaces are considered suitable macvtap parents. These resources will be operating in bridge mode and capacity of 100. Signed-off-by: Jaime Caamaño Ruiz * DP: exit on Discover initial error Signed-off-by: Jaime Caamaño Ruiz --- README.md | 9 +- cluster/sync.sh | 2 +- cmd/deviceplugin/macvtap.go | 10 +- .../macvtap-deviceplugin-config-default.yaml | 6 + ...macvtap-deviceplugin-config-explicit.yaml} | 0 go.mod | 1 + pkg/deviceplugin/lister.go | 131 ++++++++-- pkg/deviceplugin/plugin.go | 57 +++-- pkg/deviceplugin/plugin_test.go | 231 +++++++++++++----- pkg/util/netlink.go | 100 +++++++- tests/e2e/macvtap_test.go | 6 +- 11 files changed, 438 insertions(+), 115 deletions(-) create mode 100644 examples/macvtap-deviceplugin-config-default.yaml rename examples/{macvtap-deviceplugin-config.yaml => macvtap-deviceplugin-config-explicit.yaml} (100%) diff --git a/README.md b/README.md index 760bd23b..2c62dbd4 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ to be made available: * `capacity` (uint, optional, default=100) the capacity of the resource In the default deployment, this configuration shall be provided through a -config map, for [example](examples/macvtap-deviceplugin-config.yaml): +config map, for [example](examples/macvtap-deviceplugin-config-explicit.yaml): ```yaml kind: ConfigMap @@ -51,6 +51,13 @@ This configuration will result in up to 50 macvtap interfaces being offered for consumption, using eth0 as the lower device, in bridge mode, and under resource name `macvtap.network.kubevirt.io/dataplane`. +A configuration consisting of an empty json array, as proposed in the default +[example](examples/macvtap-deviceplugin-config-default.yaml), causes the device +plugin to expose one resource for every physical link or bond on each node. For +example, if a node has a physical link called eth0, a resourced named +`macvtap.network.kubevirt.io/eth0` would be made available to use macvtap +interfaces with eth0 as the lower device + The macvtap CNI can be deployed using the proposed [daemon set](manifests/macvtap.yaml): diff --git a/cluster/sync.sh b/cluster/sync.sh index de9d8c1d..d9f2ef00 100755 --- a/cluster/sync.sh +++ b/cluster/sync.sh @@ -19,5 +19,5 @@ DESTINATION=$destination IMAGE_REGISTRY=registry:5000 make manifests ./cluster/kubectl.sh delete --ignore-not-found configmap macvtap-deviceplugin-config ./cluster/kubectl.sh delete --ignore-not-found ds macvtap-cni -./cluster/kubectl.sh create -f examples/macvtap-deviceplugin-config.yaml +./cluster/kubectl.sh create -f examples/macvtap-deviceplugin-config-default.yaml ./cluster/kubectl.sh create -f _out/manifests/macvtap.yaml diff --git a/cmd/deviceplugin/macvtap.go b/cmd/deviceplugin/macvtap.go index ac36d1c7..0c1e7b3c 100644 --- a/cmd/deviceplugin/macvtap.go +++ b/cmd/deviceplugin/macvtap.go @@ -7,16 +7,24 @@ import ( "github.com/golang/glog" "github.com/kubevirt/device-plugin-manager/pkg/dpm" macvtap "github.com/kubevirt/macvtap-cni/pkg/deviceplugin" + "github.com/kubevirt/macvtap-cni/pkg/util" ) func main() { flag.Parse() + // Device plugin operates with several goroutines that might be + // relocated among different OS threads with different namespaces. + // We capture the main namespace here and make sure that we do any + // network operation on that namespace. + // See + // https://github.com/containernetworking/plugins/blob/master/pkg/ns/README.md + mainNsPath := util.GetMainThreadNetNsPath() _, configDefined := os.LookupEnv(macvtap.ConfigEnvironmentVariable) if !configDefined { glog.Exitf("%s environment variable must be set", macvtap.ConfigEnvironmentVariable) } - manager := dpm.NewManager(macvtap.MacvtapLister{}) + manager := dpm.NewManager(macvtap.NewMacvtapLister(mainNsPath)) manager.Run() } diff --git a/examples/macvtap-deviceplugin-config-default.yaml b/examples/macvtap-deviceplugin-config-default.yaml new file mode 100644 index 00000000..bd51fdfa --- /dev/null +++ b/examples/macvtap-deviceplugin-config-default.yaml @@ -0,0 +1,6 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: macvtap-deviceplugin-config +data: + DP_MACVTAP_CONF: "[]" diff --git a/examples/macvtap-deviceplugin-config.yaml b/examples/macvtap-deviceplugin-config-explicit.yaml similarity index 100% rename from examples/macvtap-deviceplugin-config.yaml rename to examples/macvtap-deviceplugin-config-explicit.yaml diff --git a/go.mod b/go.mod index ffb84632..7278a05a 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/onsi/gomega v1.8.1 github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect github.com/vishvananda/netlink v1.1.0 + github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df github.com/voxelbrain/goptions v0.0.0-20180630082107-58cddc247ea2 // indirect golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3 // indirect golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc diff --git a/pkg/deviceplugin/lister.go b/pkg/deviceplugin/lister.go index 2d84f408..23289e10 100644 --- a/pkg/deviceplugin/lister.go +++ b/pkg/deviceplugin/lister.go @@ -4,8 +4,10 @@ import ( "encoding/json" "os" + "github.com/containernetworking/plugins/pkg/ns" "github.com/golang/glog" "github.com/kubevirt/device-plugin-manager/pkg/dpm" + "github.com/kubevirt/macvtap-cni/pkg/util" ) const ( @@ -13,23 +15,32 @@ const ( ConfigEnvironmentVariable = "DP_MACVTAP_CONF" ) -type MacvtapConfig struct { +type macvtapConfig struct { Name string `json:"name"` Master string `json:"master"` Mode string `json:"mode"` Capacity int `json:"capacity"` } -type MacvtapLister struct { +type macvtapLister struct { + Config map[string]macvtapConfig + // NetNsPath is the path to the network namespace the lister operates in. + NetNsPath string } -func (ml MacvtapLister) GetResourceNamespace() string { +func NewMacvtapLister(netNsPath string) *macvtapLister { + return &macvtapLister{ + NetNsPath: netNsPath, + } +} + +func (ml macvtapLister) GetResourceNamespace() string { return resourceNamespace } -func readConfig() (map[string]MacvtapConfig, error) { - var config []MacvtapConfig - configMap := make(map[string]MacvtapConfig) +func readConfig() (map[string]macvtapConfig, error) { + var config []macvtapConfig + configMap := make(map[string]macvtapConfig) configEnv := os.Getenv(ConfigEnvironmentVariable) err := json.Unmarshal([]byte(configEnv), &config) @@ -44,13 +55,13 @@ func readConfig() (map[string]MacvtapConfig, error) { return configMap, nil } -func (ml MacvtapLister) Discover(pluginListCh chan dpm.PluginNameList) { +func discoverByConfig(pluginListCh chan dpm.PluginNameList) (map[string]macvtapConfig, error) { var plugins = make(dpm.PluginNameList, 0) config, err := readConfig() if err != nil { glog.Errorf("Error reading config: %v", err) - return + return nil, err } glog.V(3).Infof("Read configuration %+v", config) @@ -59,16 +70,100 @@ func (ml MacvtapLister) Discover(pluginListCh chan dpm.PluginNameList) { plugins = append(plugins, macvtapConfig.Name) } - pluginListCh <- plugins + if len(plugins) > 0 { + pluginListCh <- plugins + } + return config, nil +} + +func discoverByLinks(pluginListCh chan dpm.PluginNameList, netNsPath string) error { + // To know when the manager is stoping, we need to read from pluginListCh. + // We avoid reading our own updates by using a middle channel. + // We buffer up to one msg because of the initial call to sendSuitableParents. + parentListCh := make(chan []string, 1) + defer close(parentListCh) + + sendSuitableParents := func() error { + var linkNames []string + err := ns.WithNetNSPath(netNsPath, func(_ ns.NetNS) error { + var err error + linkNames, err = util.FindSuitableMacvtapParents() + return err + }) + + if err != nil { + glog.Errorf("Error while finding links: %v", err) + return err + } + + parentListCh <- linkNames + return nil + } + + // Do an initial search to catch early permanent runtime problems + err := sendSuitableParents() + if err != nil { + return err + } + + // Keep updating on changes for suitable parents. + stop := make(chan struct{}) + defer close(stop) + go util.OnSuitableMacvtapParentEvent( + netNsPath, + // Wrapper to ignore error + func() { + sendSuitableParents() + }, + stop, + func(err error) { + glog.Error(err) + }) + + // Keep forwarding updates to the manager until it closes down + for { + select { + case parentNames := <-parentListCh: + pluginListCh <- parentNames + case _, open := <-pluginListCh: + if !open { + return nil + } + } + } } -func (ml MacvtapLister) NewPlugin(name string) dpm.PluginInterface { - config, _ := readConfig() - glog.V(3).Infof("Creating device plugin with config %+v", config[name]) - return NewMacvtapDevicePlugin( - config[name].Name, - config[name].Master, - config[name].Mode, - config[name].Capacity, - ) +func (ml *macvtapLister) Discover(pluginListCh chan dpm.PluginNameList) { + config, err := discoverByConfig(pluginListCh) + if err != nil { + os.Exit(1) + } + + // Configuration is static and we don't need to do anything else + ml.Config = config + if len(config) > 0 { + return + } + + // If there was no configuration, we setup resources based on the existing + // links of the host. + err = discoverByLinks(pluginListCh, ml.NetNsPath) + if err != nil { + os.Exit(1) + } +} + +func (ml *macvtapLister) NewPlugin(name string) dpm.PluginInterface { + c, ok := ml.Config[name] + if !ok { + c = macvtapConfig{ + Name: name, + Master: name, + Mode: DefaultMode, + Capacity: DefaultCapacity, + } + } + + glog.V(3).Infof("Creating device plugin with config %+v", c) + return NewMacvtapDevicePlugin(c.Name, c.Master, c.Mode, c.Capacity, ml.NetNsPath) } diff --git a/pkg/deviceplugin/plugin.go b/pkg/deviceplugin/plugin.go index 628aee9f..bea73e58 100644 --- a/pkg/deviceplugin/plugin.go +++ b/pkg/deviceplugin/plugin.go @@ -3,6 +3,7 @@ package deviceplugin import ( "fmt" + "github.com/containernetworking/plugins/pkg/ns" "github.com/golang/glog" "golang.org/x/net/context" pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1" @@ -13,34 +14,40 @@ import ( const ( tapPath = "/dev/tap" // Interfaces will be named as [0-] - suffix = "Mvp" - defaultCapacity = 100 + suffix = "Mvp" + // DefaultCapacity is the default when no capacity is provided + DefaultCapacity = 100 + // DefaultMode is the default when no mode is provided + DefaultMode = "bridge" ) -type MacvtapDevicePlugin struct { - Name string - Master string - Mode string - Capacity int +type macvtapDevicePlugin struct { + Name string + Master string + Mode string + Capacity int + // NetNsPath is the path to the network namespace the plugin operates in. + NetNsPath string stopWatcher chan struct{} } -func NewMacvtapDevicePlugin(name string, master string, mode string, capacity int) *MacvtapDevicePlugin { - return &MacvtapDevicePlugin{ +func NewMacvtapDevicePlugin(name string, master string, mode string, capacity int, netNsPath string) *macvtapDevicePlugin { + return &macvtapDevicePlugin{ Name: name, Master: master, Mode: mode, Capacity: capacity, + NetNsPath: netNsPath, stopWatcher: make(chan struct{}), } } -func (mdp *MacvtapDevicePlugin) generateMacvtapDevices() []*pluginapi.Device { +func (mdp *macvtapDevicePlugin) generateMacvtapDevices() []*pluginapi.Device { var macvtapDevs []*pluginapi.Device var capacity = mdp.Capacity if capacity <= 0 { - capacity = defaultCapacity + capacity = DefaultCapacity } for i := 0; i < capacity; i++ { @@ -54,7 +61,7 @@ func (mdp *MacvtapDevicePlugin) generateMacvtapDevices() []*pluginapi.Device { return macvtapDevs } -func (mdp *MacvtapDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { +func (mdp *macvtapDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { // Initialize two arrays, one for devices offered when master exists, // and no devices if master does not exist. allocatableDevs := mdp.generateMacvtapDevices() @@ -72,11 +79,17 @@ func (mdp *MacvtapDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.Dev didMasterExist := false onMasterEvent := func() { - doesMasterExist, err := util.LinkExists(mdp.Master) + var doesMasterExist bool + err := ns.WithNetNSPath(mdp.NetNsPath, func(_ ns.NetNS) error { + var err error + doesMasterExist, err = util.LinkExists(mdp.Master) + return err + }) if err != nil { glog.Warningf("Error while checking on master %s: %v", mdp.Master, err) return } + if didMasterExist != doesMasterExist { emitResponse(doesMasterExist) didMasterExist = doesMasterExist @@ -84,10 +97,11 @@ func (mdp *MacvtapDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.Dev } // Listen for events of master interface. On any, check if master a - // interface exists. If it does, offer up to capacity mactvtap devices. Do + // interface exists. If it does, offer up to capacity macvtap devices. Do // not offer any otherwise. util.OnLinkEvent( mdp.Master, + mdp.NetNsPath, onMasterEvent, mdp.stopWatcher, func(err error) { @@ -97,7 +111,7 @@ func (mdp *MacvtapDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.Dev return nil } -func (mdp *MacvtapDevicePlugin) Allocate(ctx context.Context, r *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { +func (mdp *macvtapDevicePlugin) Allocate(ctx context.Context, r *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { var response pluginapi.AllocateResponse for _, req := range r.ContainerRequests { @@ -113,7 +127,12 @@ func (mdp *MacvtapDevicePlugin) Allocate(ctx context.Context, r *pluginapi.Alloc // no de-allocate flow to clean up. So we attempt to delete a // possibly existing existing interface before creating it to reset // its state. - index, err := util.RecreateMacvtap(name, mdp.Master, mdp.Mode) + var index int + err := ns.WithNetNSPath(mdp.NetNsPath, func(_ ns.NetNS) error { + var err error + index, err = util.RecreateMacvtap(name, mdp.Master, mdp.Mode) + return err + }) if err != nil { return nil, err } @@ -133,15 +152,15 @@ func (mdp *MacvtapDevicePlugin) Allocate(ctx context.Context, r *pluginapi.Alloc return &response, nil } -func (mdp *MacvtapDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) { +func (mdp *macvtapDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) { return nil, nil } -func (mdp *MacvtapDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) { +func (mdp *macvtapDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) { return nil, nil } -func (mdp *MacvtapDevicePlugin) Stop() error { +func (mdp *macvtapDevicePlugin) Stop() error { close(mdp.stopWatcher) return nil } diff --git a/pkg/deviceplugin/plugin_test.go b/pkg/deviceplugin/plugin_test.go index 2c5e5811..9ebc07cd 100644 --- a/pkg/deviceplugin/plugin_test.go +++ b/pkg/deviceplugin/plugin_test.go @@ -1,20 +1,20 @@ -package deviceplugin_test +package deviceplugin import ( "fmt" "math/rand" - "runtime" + "os" "strconv" "strings" "github.com/containernetworking/plugins/pkg/ns" "github.com/containernetworking/plugins/pkg/testutils" + "github.com/kubevirt/device-plugin-manager/pkg/dpm" "github.com/vishvananda/netlink" "golang.org/x/net/context" "google.golang.org/grpc/metadata" pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1" - . "github.com/kubevirt/macvtap-cni/pkg/deviceplugin" "github.com/kubevirt/macvtap-cni/pkg/util" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -56,18 +56,14 @@ func (s *ListAndWatchServerSendSpy) SetHeader(m metadata.MD) error { func (s *ListAndWatchServerSendSpy) SetTrailer(m metadata.MD) { } -var _ = Describe("Macvtap device plugin", func() { - var cleanup func() - var mvdp *MacvtapDevicePlugin +var _ = Describe("Macvtap", func() { var masterIfaceName string var masterIface netlink.Link - var sendSpy *ListAndWatchServerSendSpy + var testNs ns.NetNS BeforeEach(func() { - currNs, err := ns.GetCurrentNS() - Expect(err).NotTo(HaveOccurred()) - - testNs, err := testutils.NewNS() + var err error + testNs, err = testutils.NewNS() Expect(err).NotTo(HaveOccurred()) masterIfaceName = fmt.Sprintf("master%d", rand.Intn(100)) @@ -80,80 +76,187 @@ var _ = Describe("Macvtap device plugin", func() { err = netlink.LinkAdd(masterIface) Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + testNs.Do(func(ns ns.NetNS) error { + netlink.LinkDel(masterIface) + return nil + }) + }) + + Describe("plugin", func() { + var mvdp dpm.PluginInterface + var sendSpy *ListAndWatchServerSendSpy + + BeforeEach(func() { + mvdp = NewMacvtapDevicePlugin(masterIfaceName, masterIfaceName, "bridge", 0, testNs.Path()) + sendSpy = &ListAndWatchServerSendSpy{} + go func() { + err := mvdp.ListAndWatch(nil, sendSpy) + Expect(err).NotTo(HaveOccurred()) + }() + }) + + AfterEach(func() { + mvdp.(dpm.PluginInterfaceStop).Stop() + }) - mvdp = NewMacvtapDevicePlugin(masterIfaceName, masterIfaceName, "bridge", 0) + It("should allocate a new device upon request", func() { + ifaceName := masterIfaceName + "Mvp99" + req := &pluginapi.AllocateRequest{ + ContainerRequests: []*pluginapi.ContainerAllocateRequest{ + { + DevicesIDs: []string{ + ifaceName, + }, + }, + }, + } - sendSpy = &ListAndWatchServerSendSpy{} - go func() { - err := testNs.Do(func(ns ns.NetNS) error { - return mvdp.ListAndWatch(nil, sendSpy) + res, err := mvdp.Allocate(nil, req) + Expect(err).NotTo(HaveOccurred()) + + var iface netlink.Link + err = testNs.Do(func(ns ns.NetNS) error { + var err error + iface, err = netlink.LinkByName(ifaceName) + return err }) Expect(err).NotTo(HaveOccurred()) - }() + Expect(iface.Type()).To(Equal("macvtap")) - runtime.LockOSThread() - err = testNs.Set() - Expect(err).NotTo(HaveOccurred()) + dev := res.ContainerResponses[0].Devices[0] + index := iface.Attrs().Index + Expect(strings.HasSuffix(dev.ContainerPath, strconv.Itoa(index))).To(BeTrue()) + Expect(dev.HostPath).To(Equal(dev.ContainerPath)) + }) - cleanup = func() { - mvdp.Stop() - netlink.LinkDel(masterIface) - currNs.Set() - } - }) + Context("when master device does not exist", func() { + It("should not advertise devices", func() { + By("first advertising healthy devices", func() { + Eventually(func() int { + return sendSpy.calls + }).Should(Equal(1)) - AfterEach(func() { - cleanup() + Expect(sendSpy.last.Devices).To(HaveLen(100)) + }) + + By("then deleting the master device", func() { + err := testNs.Do(func(ns ns.NetNS) error { + return util.LinkDelete(masterIfaceName) + }) + Expect(err).NotTo(HaveOccurred()) + }) + + By("then no longer advertising devices", func() { + Eventually(func() int { + return sendSpy.calls + }).Should(Equal(2)) + + Expect(sendSpy.last.Devices).To(HaveLen(0)) + }) + }) + }) }) - It("should allocate a new device upon request", func() { - ifaceName := masterIfaceName + "Mvp99" - req := &pluginapi.AllocateRequest{ - ContainerRequests: []*pluginapi.ContainerAllocateRequest{ - { - DevicesIDs: []string{ - ifaceName, - }, - }, - }, - } + Describe("lister", func() { + var lister dpm.ListerInterface + var pluginListCh chan dpm.PluginNameList - res, err := mvdp.Allocate(nil, req) - Expect(err).NotTo(HaveOccurred()) + BeforeEach(func() { + pluginListCh = make(chan dpm.PluginNameList) + lister = NewMacvtapLister(testNs.Path()) + }) - iface, err := netlink.LinkByName(ifaceName) - Expect(err).NotTo(HaveOccurred()) - Expect(iface.Type()).To(Equal("macvtap")) + JustBeforeEach(func() { + go func() { + lister.Discover(pluginListCh) + }() + }) - dev := res.ContainerResponses[0].Devices[0] - index := iface.Attrs().Index - Expect(strings.HasSuffix(dev.ContainerPath, strconv.Itoa(index))).To(BeTrue()) - Expect(dev.HostPath).To(Equal(dev.ContainerPath)) - }) + AfterEach(func() { + close(pluginListCh) + }) - Context("when master device does not exist", func() { - It("should not advertise devices", func() { - By("first advertising healthy devices", func() { - Eventually(func() int { - return sendSpy.calls - }).Should(Equal(1)) + Context("WHEN provided a non empty configuration", func() { + resourceName := "dataplane" + mode := "vepa" + capacity := 30 + config := `[{"name":"%s","master":"%s","mode":"%s","capacity":%d}]` - Expect(sendSpy.last.Devices).To(HaveLen(100)) + BeforeEach(func() { + config = fmt.Sprintf(config, resourceName, masterIfaceName, mode, capacity) + os.Setenv(ConfigEnvironmentVariable, config) }) - By("then deleting the master device", func() { - err := util.LinkDelete(masterIfaceName) - Expect(err).NotTo(HaveOccurred()) + AfterEach(func() { + os.Unsetenv(ConfigEnvironmentVariable) }) - By("then no longer advertising devices", func() { - Eventually(func() int { - return sendSpy.calls - }).Should(Equal(2)) + It("SHOULD report the appropriate list of resources", func() { + Eventually(pluginListCh).Should(Receive(ConsistOf(resourceName))) + Consistently(pluginListCh).ShouldNot(Receive(Not(ConsistOf(resourceName)))) + + plugin := lister.NewPlugin(resourceName) + Expect(plugin.(*macvtapDevicePlugin).Name).To(Equal(resourceName)) + Expect(plugin.(*macvtapDevicePlugin).Master).To(Equal(masterIfaceName)) + Expect(plugin.(*macvtapDevicePlugin).Mode).To(Equal(mode)) + Expect(plugin.(*macvtapDevicePlugin).Capacity).To(Equal(capacity)) + }) + }) - Expect(sendSpy.last.Devices).To(HaveLen(0)) + Context("WHEN provided an empty configuration", func() { + BeforeEach(func() { + os.Setenv(ConfigEnvironmentVariable, "[]") }) + AfterEach(func() { + os.Unsetenv(ConfigEnvironmentVariable) + }) + + It("SHOULD update the list of available resources", func() { + const parentName = "bond0" + + By("initially reporting the appropriate list of resources", func() { + Eventually(pluginListCh).Should(Receive(BeEmpty())) + Consistently(pluginListCh).ShouldNot(Receive(Not(BeEmpty()))) + }) + + By("adding a new resource when a suitable macvtap parent appears", func() { + parent := netlink.NewLinkBond( + netlink.LinkAttrs{ + Name: parentName, + Namespace: netlink.NsFd(int(testNs.Fd())), + }, + ) + err := netlink.LinkAdd(parent) + Expect(err).NotTo(HaveOccurred()) + + Eventually(pluginListCh).Should(Receive(ConsistOf(parentName))) + Consistently(pluginListCh).ShouldNot(Receive(Not(ConsistOf(parentName)))) + + plugin := lister.NewPlugin(parentName) + Expect(plugin.(*macvtapDevicePlugin).Name).To(Equal(parentName)) + Expect(plugin.(*macvtapDevicePlugin).Master).To(Equal(parentName)) + Expect(plugin.(*macvtapDevicePlugin).Mode).To(Equal(DefaultMode)) + Expect(plugin.(*macvtapDevicePlugin).Capacity).To(Equal(DefaultCapacity)) + }) + + By("removing the resource when a suitable macvtap parent disappears", func() { + err := testNs.Do(func(ns ns.NetNS) error { + parent, err := netlink.LinkByName(parentName) + if err == nil { + err = netlink.LinkDel(parent) + } + return err + }) + Expect(err).NotTo(HaveOccurred()) + + Eventually(pluginListCh).Should(Receive(BeEmpty())) + Consistently(pluginListCh).ShouldNot(Receive(Not(BeEmpty()))) + }) + }) }) }) }) diff --git a/pkg/util/netlink.go b/pkg/util/netlink.go index 714dcd63..d3fc8ead 100644 --- a/pkg/util/netlink.go +++ b/pkg/util/netlink.go @@ -3,10 +3,12 @@ package util import ( "fmt" "net" + "os" "strings" "time" "github.com/vishvananda/netlink" + "github.com/vishvananda/netns" "github.com/containernetworking/cni/pkg/types/current" @@ -101,14 +103,80 @@ func LinkDelete(link string) error { return err } -// Listen for events on a specific interface and callback if any. The interface -// does not have to exist. Use the stop channel to stop listening. -func OnLinkEvent(name string, do func(), stop <-chan struct{}, errcb func(error)) { +func isLoopback(link netlink.Link) bool { + return link.Attrs().Flags&net.FlagLoopback != 0 +} + +func isSuitableMacvtapParent(link netlink.Link) bool { + if isLoopback(link) { + return false + } + + switch link.(type) { + case *netlink.Bond, *netlink.Device: + default: + return false + } + + return true +} + +// FindSuitableMacvtapParents lists all the links on the system and filters out +// those deemed inappropriate to be used as macvtap parents. +func FindSuitableMacvtapParents() ([]string, error) { + links, err := netlink.LinkList() + if err != nil { + return nil, err + } + + linkNames := make([]string, 0) + for _, link := range links { + if isSuitableMacvtapParent(link) { + linkNames = append(linkNames, link.Attrs().Name) + } + } + + return linkNames, nil +} + +// OnLinkEvent listens for events on a specific interface and namespace, and +// callbacks if any. See onLinkEvent for more details. +func OnLinkEvent(name string, nsPath string, do func(), stop <-chan struct{}, errcb func(error)) { + matcher := func(link netlink.Link) bool { + return name == link.Attrs().Name + } + + onLinkEvent(matcher, nsPath, do, stop, errcb) +} + +// OnSuitableMacvtapParentEvent listens for events on any suitable macvtap +// parent link on a given namespace and callbacks if any. See onLinkEvent +// for more details. +func OnSuitableMacvtapParentEvent(nsPath string, do func(), stop <-chan struct{}, errcb func(error)) { + onLinkEvent(isSuitableMacvtapParent, nsPath, do, stop, errcb) +} + +// onLinkEvent upkeeps a subscription to netlink events and callbacks for any +// that matches the predicate on the related link. +// The subscription might temporarily fail. On re-subscription, the callback is +// invoked to cover for events that might have been missed during that time. +// That means some spurious callbacks unrelated to the predicate might happen +// and the caller should account for it. For convenience, to avoid losing any +// relevant information between the time of this function call (or a previous +// time when the caller initializes state) and the time the subscription is +// effective, the callback is also invoked upon first subscription. As a +// summary, callback is invoked: +// +// * A first time, after first subscription +// * Once every re-subscription +// * On any event matching the predicate +// +func onLinkEvent(match func(netlink.Link) bool, nsPath string, do func(), stop <-chan struct{}, errcb func(error)) { done := make(chan struct{}) defer close(done) options := netlink.LinkSubscribeOptions{ - ListExisting: true, + ListExisting: false, ErrorCallback: func(err error) { errcb(fmt.Errorf("Error while listening on link events: %v", err)) }, @@ -117,13 +185,24 @@ func OnLinkEvent(name string, do func(), stop <-chan struct{}, errcb func(error) subscribed := false var netlinkCh chan netlink.LinkUpdate subscribe := func() { + ns, err := netns.GetFromPath(nsPath) + if err != nil { + errcb(fmt.Errorf("Could not open namespace: %v", err)) + return + } + defer ns.Close() + + options.Namespace = &ns netlinkCh = make(chan netlink.LinkUpdate) - err := netlink.LinkSubscribeWithOptions(netlinkCh, done, options) + err = netlink.LinkSubscribeWithOptions(netlinkCh, done, options) if err != nil { errcb(fmt.Errorf("Error while subscribing for link events: %v", err)) return } subscribed = true + + // Callback on every subscription + do() } subscribe() @@ -135,7 +214,7 @@ func OnLinkEvent(name string, do func(), stop <-chan struct{}, errcb func(error) subscribe() continue case <-stop: - break + return } } @@ -143,12 +222,12 @@ func OnLinkEvent(name string, do func(), stop <-chan struct{}, errcb func(error) select { case update, subscribed = <-netlinkCh: if subscribed { - if name == update.Link.Attrs().Name { + if match(update.Link) { do() } } case <-stop: - break + return } } } @@ -251,3 +330,8 @@ func configureArp(ifaceName string) error { return nil } + +// GetMainThreadNetNsPath returns the path of the main thread's namespace +func GetMainThreadNetNsPath() string { + return fmt.Sprintf("/proc/%d/ns/net", os.Getpid()) +} diff --git a/tests/e2e/macvtap_test.go b/tests/e2e/macvtap_test.go index 6786c82d..3e725b4e 100644 --- a/tests/e2e/macvtap_test.go +++ b/tests/e2e/macvtap_test.go @@ -81,10 +81,10 @@ var _ = Describe("macvtap-cni", func() { }) It("THEN a macvtap custom network resource is exposed", func() { - quantity := 50 + quantity := 100 expectedResourceName := v1.ResourceName(buildMacvtapResourceName(lowerDevice)) - waitForNodeResourceAvailability(1 * time.Minute, buildMacvtapResourceName(lowerDevice)) + waitForNodeResourceAvailability(1*time.Minute, buildMacvtapResourceName(lowerDevice)) nodes, _ := clientset.CoreV1().Nodes().List(metav1.ListOptions{}) for _, node := range nodes.Items { @@ -240,7 +240,7 @@ func waitForNodeResourceAvailability(timeout time.Duration, resourceName string) Expect(err).NotTo(HaveOccurred()) for _, node := range nodeList.Items { - if _, ok := node.Status.Capacity[v1.ResourceName(resourceName)]; ! ok { + if _, ok := node.Status.Capacity[v1.ResourceName(resourceName)]; !ok { return false } }