From 402fb8d9cd7d2f93a3e8d1f980829a6f6d1ae897 Mon Sep 17 00:00:00 2001 From: Tuomas Katila Date: Tue, 20 Aug 2024 14:02:30 +0300 Subject: [PATCH 1/2] gpu: add support for CDI devices Signed-off-by: Tuomas Katila --- cmd/gpu_plugin/README.md | 14 + cmd/gpu_plugin/gpu_plugin.go | 74 ++++- cmd/gpu_plugin/gpu_plugin_test.go | 292 +++++++++++++++++- .../gpu_plugin/base/intel-gpu-plugin.yaml | 6 + pkg/controllers/gpu/controller_test.go | 14 + 5 files changed, 377 insertions(+), 23 deletions(-) diff --git a/cmd/gpu_plugin/README.md b/cmd/gpu_plugin/README.md index e706bb362..c59ca0259 100644 --- a/cmd/gpu_plugin/README.md +++ b/cmd/gpu_plugin/README.md @@ -16,6 +16,7 @@ Table of Contents * [Running GPU plugin as non-root](#running-gpu-plugin-as-non-root) * [Labels created by GPU plugin](#labels-created-by-gpu-plugin) * [SR-IOV use with the plugin](#sr-iov-use-with-the-plugin) + * [CDI support](#cdi-support) * [KMD and UMD](#kmd-and-umd) * [Issues with media workloads on multi-GPU setups](#issues-with-media-workloads-on-multi-gpu-setups) * [Workaround for QSV and VA-API](#workaround-for-qsv-and-va-api) @@ -218,6 +219,19 @@ GPU plugin does __not__ setup SR-IOV. It has to be configured by the cluster adm GPU plugin does however support provisioning Virtual Functions (VFs) to containers for a SR-IOV enabled GPU. When the plugin detects a GPU with SR-IOV VFs configured, it will only provision the VFs and leaves the PF device on the host. +### CDI support + +GPU plugin supports [CDI](https://github.com/container-orchestrated-devices/container-device-interface) to provide device details to the container. It does not yet provide any benefits compared to the traditional Kubernetes Device Plugin API. The CDI device specs will improve in the future with features that are not possible with the Device Plugin API. + +To enable CDI support, container runtime has to support it. The support varies depending on the versions: +* CRI-O supports CDI by default v1.24.0 onwards. +* Containerd supports CDI from 1.7.0 onwards. 2.0.0 release will enable it by default. +* Docker supports CDI from v25 onwards. + +Kubernetes CDI support is included since 1.28 release. In 1.28 it needs to be enabled via `DevicePluginCDIDevices` feature gate. From 1.29 onwards the feature is enabled by default. + +> *NOTE*: To use CDI outside of Kubernetes, for example with Docker or Podman, CDI specs can be generated with the [Intel CDI specs generator](https://github.com/intel/intel-resource-drivers-for-kubernetes/releases/tag/specs-generator-v0.1.0). + ### KMD and UMD There are 3 different Kernel Mode Drivers (KMD) available: `i915 upstream`, `i915 backport` and `xe`: diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index 5350c98e3..f048b13d5 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -34,6 +34,7 @@ import ( "github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm" "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/labeler" dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" + cdispec "tags.cncf.io/container-device-interface/specs-go" ) const ( @@ -202,13 +203,10 @@ func packedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string { return deviceIds } -// Returns a slice of by-path Mounts for a cardPath&Name. -// by-path files are searched from the given bypathDir. -// In the by-path dir, any files that start with "pci-" will be added to mounts. -func (dp *devicePlugin) bypathMountsForPci(cardPath, cardName, bypathDir string) []pluginapi.Mount { +func (dp *devicePlugin) pciAddressForCard(cardPath, cardName string) (string, error) { linkPath, err := os.Readlink(cardPath) if err != nil { - return nil + return "", err } // Fetches the pci address for a drm card by reading the @@ -220,9 +218,27 @@ func (dp *devicePlugin) bypathMountsForPci(cardPath, cardName, bypathDir string) if !dp.pciAddressReg.MatchString(pciAddress) { klog.Warningf("Invalid pci address for %s: %s", cardPath, pciAddress) - return nil + return "", os.ErrInvalid } + return pciAddress, nil +} + +func pciDeviceIDForCard(cardPath string) (string, error) { + idPath := filepath.Join(cardPath, "device", "device") + + idBytes, err := os.ReadFile(idPath) + if err != nil { + return "", err + } + + return strings.Split(string(idBytes), "\n")[0], nil +} + +// Returns a slice of by-path Mounts for a pciAddress. +// by-path files are searched from the given bypathDir. +// In the by-path dir, any files that start with "pci-" will be added to mounts. +func (dp *devicePlugin) bypathMountsForPci(pciAddress, bypathDir string) []pluginapi.Mount { files, err := os.ReadDir(bypathDir) if err != nil { klog.Warningf("Failed to read by-path directory: %+v", err) @@ -481,6 +497,45 @@ func (dp *devicePlugin) createDeviceSpecsFromDrmFiles(cardPath string) []plugina return specs } +func (dp *devicePlugin) createMountsAndCDIDevices(cardPath, name string, devSpecs []pluginapi.DeviceSpec) ([]pluginapi.Mount, *cdispec.Spec) { + mounts := []pluginapi.Mount{} + + if dp.bypathFound { + if pciAddr, pciErr := dp.pciAddressForCard(cardPath, name); pciErr == nil { + mounts = dp.bypathMountsForPci(pciAddr, dp.bypathDir) + } + } + + spec := &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/gpu", + Devices: make([]cdispec.Device, 1), + } + + spec.Devices[0].Name = name + + cedits := &spec.Devices[0].ContainerEdits + + for _, dspec := range devSpecs { + cedits.DeviceNodes = append(cedits.DeviceNodes, &cdispec.DeviceNode{ + HostPath: dspec.HostPath, + Path: dspec.ContainerPath, + Permissions: dspec.Permissions, + }) + } + + for _, mount := range mounts { + cedits.Mounts = append(cedits.Mounts, &cdispec.Mount{ + HostPath: mount.HostPath, + ContainerPath: mount.ContainerPath, + Type: "none", + Options: []string{"bind", "ro"}, + }) + } + + return mounts, spec +} + func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { files, err := os.ReadDir(dp.sysfsDir) if err != nil { @@ -509,12 +564,9 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { continue } - mounts := []pluginapi.Mount{} - if dp.bypathFound { - mounts = dp.bypathMountsForPci(cardPath, name, dp.bypathDir) - } + mounts, cdiDevices := dp.createMountsAndCDIDevices(cardPath, name, devSpecs) - deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devSpecs, mounts, nil, nil, nil) + deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devSpecs, mounts, nil, nil, cdiDevices) for i := 0; i < dp.options.sharedDevNum; i++ { devID := fmt.Sprintf("%s-%d", name, i) diff --git a/cmd/gpu_plugin/gpu_plugin_test.go b/cmd/gpu_plugin/gpu_plugin_test.go index 230e0629f..447a87e53 100644 --- a/cmd/gpu_plugin/gpu_plugin_test.go +++ b/cmd/gpu_plugin/gpu_plugin_test.go @@ -29,6 +29,7 @@ import ( "github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm" dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" + cdispec "tags.cncf.io/container-device-interface/specs-go" ) func init() { @@ -402,14 +403,14 @@ func TestScan(t *testing.T) { t.Run(tc.name, func(t *testing.T) { root, err := os.MkdirTemp("", "test_new_device_plugin") if err != nil { - t.Fatalf("can't create temporary directory: %+v", err) + t.Fatalf("Can't create temporary directory: %+v", err) } // dirs/files need to be removed for the next test defer os.RemoveAll(root) sysfs, devfs, err := createTestFiles(root, tc) if err != nil { - t.Errorf("unexpected error: %+v", err) + t.Errorf("Unexpected error: %+v", err) } plugin := newDevicePlugin(sysfs, devfs, tc.options) @@ -421,7 +422,7 @@ func TestScan(t *testing.T) { err = plugin.Scan(notifier) // Scans in GPU plugin never fail if err != nil { - t.Errorf("unexpected error: %+v", err) + t.Errorf("Unexpected error: %+v", err) } if tc.expectedI915Devs != notifier.i915Count { t.Errorf("Expected %d, discovered %d devices (i915)", @@ -464,14 +465,14 @@ func TestScanFails(t *testing.T) { t.Run(tc.name, func(t *testing.T) { root, err := os.MkdirTemp("", "test_new_device_plugin") if err != nil { - t.Fatalf("can't create temporary directory: %+v", err) + t.Fatalf("Can't create temporary directory: %+v", err) } // dirs/files need to be removed for the next test defer os.RemoveAll(root) sysfs, devfs, err := createTestFiles(root, tc) if err != nil { - t.Errorf("unexpected error: %+v", err) + t.Errorf("Unexpected error: %+v", err) } plugin := newDevicePlugin(sysfs, devfs, tc.options) @@ -484,7 +485,7 @@ func TestScanFails(t *testing.T) { err = plugin.Scan(notifier) if err == nil { - t.Error("unexpected nil error") + t.Error("Unexpected nil error") } }) } @@ -544,14 +545,14 @@ func TestScanWithRmAndTiles(t *testing.T) { t.Run(tc.name, func(t *testing.T) { root, err := os.MkdirTemp("", "test_new_device_plugin") if err != nil { - t.Fatalf("can't create temporary directory: %+v", err) + t.Fatalf("Can't create temporary directory: %+v", err) } // dirs/files need to be removed for the next test defer os.RemoveAll(root) sysfs, devfs, err := createTestFiles(root, tc) if err != nil { - t.Errorf("unexpected error: %+v", err) + t.Errorf("Unexpected error: %+v", err) } plugin := newDevicePlugin(sysfs, devfs, tc.options) @@ -565,10 +566,10 @@ func TestScanWithRmAndTiles(t *testing.T) { err = plugin.Scan(notifier) if err != nil { - t.Error("unexpected error") + t.Error("Unexpected error") } if rm.tileCount != expectedTileCounts[i] { - t.Error("unexpected tilecount for RM") + t.Error("Unexpected tilecount for RM") } }) } @@ -618,6 +619,7 @@ func TestBypath(t *testing.T) { desc string linkpath string bypathFiles []string + pciAddrOk bool mountCount int } @@ -628,36 +630,42 @@ func TestBypath(t *testing.T) { "card with two by-path files", "00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm/" + cardName, []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + true, 2, }, { "different by-path files", "00.10.2/00.334.302/0.0.1.00/0000:ff:05.0/drm/" + cardName, []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + true, 0, }, { "invalid pci address", "00.10.2/00.334.302/0.0.1.00/000:ff:05.1/drm/" + cardName, []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + false, 0, }, { "symlink without card", "00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm", []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + false, 0, }, { "no symlink", "", []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + false, 0, }, { "no by-path files", "00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm/" + cardName, []string{}, + true, 0, }, } @@ -665,7 +673,7 @@ func TestBypath(t *testing.T) { for _, td := range tds { root, err := os.MkdirTemp("", "test_bypath_mounting") if err != nil { - t.Fatalf("can't create temporary directory: %+v", err) + t.Fatalf("Can't create temporary directory: %+v", err) } // dirs/files need to be removed for the next test defer os.RemoveAll(root) @@ -674,7 +682,17 @@ func TestBypath(t *testing.T) { drmPath, byPath := createBypathTestFiles(t, cardName, root, td.linkpath, td.bypathFiles) - mounts := plugin.bypathMountsForPci(drmPath, cardName, byPath) + pciAddr, pciErr := plugin.pciAddressForCard(drmPath, cardName) + + if pciErr != nil && td.pciAddrOk { + t.Errorf("%s: failed to retrieve pci address when it should have", td.desc) + } + + if pciErr != nil { + continue + } + + mounts := plugin.bypathMountsForPci(pciAddr, byPath) if len(mounts) != td.mountCount { t.Errorf("%s: Wrong number of mounts %d vs. %d", td.desc, len(mounts), td.mountCount) @@ -696,3 +714,253 @@ func TestBypath(t *testing.T) { } } } + +func TestPciDeviceForCard(t *testing.T) { + root, err := os.MkdirTemp("", "test_pci_device_for_card") + if err != nil { + t.Fatalf("Can't create temporary directory: %+v", err) + } + // dirs/files need to be removed for the next test + defer os.RemoveAll(root) + + sysfs := path.Join(root, "sys") + + cardPath := filepath.Join(sysfs, "class", "drm", "card0") + cardDevicePath := filepath.Join(cardPath, "device") + + if err := os.MkdirAll(cardDevicePath, 0750); err != nil { + t.Fatalf("Card device path creation failed: %+v", err) + } + + data := "0x5959" + + err = os.WriteFile(filepath.Join(cardDevicePath, "device"), []byte(data), 0o600) + if err != nil { + t.Fatalf("Device id write failed: %+v", err) + } + + id, err := pciDeviceIDForCard(cardPath) + + if err != nil { + t.Errorf("Failed to get device id for card: %+v", err) + } + + if id != data { + t.Errorf("Wrong id received %s vs %s", id, data) + } + + // Check bad device + + cardPath = filepath.Join(sysfs, "class", "drm", "card1") + cardDevicePath = filepath.Join(cardPath, "device") + + if err := os.MkdirAll(cardDevicePath, 0750); err != nil { + t.Fatalf("Card device path creation failed: %+v", err) + } + + err = os.WriteFile(filepath.Join(cardDevicePath, "devicebad"), []byte(data), 0o600) + if err != nil { + t.Fatalf("Device id write failed: %+v", err) + } + + id, err = pciDeviceIDForCard(cardPath) + + if err == nil { + t.Errorf("ID received when it shouldn't be possible: %s", id) + } +} + +type symlinkItem struct { + old string + new string +} + +func createSymlinks(t *testing.T, base string, links []symlinkItem) { + for _, link := range links { + linkOld := filepath.Join(base, link.old) + linkNew := filepath.Join(base, link.new) + + if _, err := os.Stat(linkOld); err != nil { + if err := os.MkdirAll(linkOld, 0o750); err != nil && !errors.Is(err, os.ErrExist) { + t.Fatalf("Failed to create symlink base dir: %+v", err) + } + } + + d := filepath.Dir(linkNew) + if err := os.MkdirAll(d, 0o750); err != nil { + t.Fatal("Failed to create symlink new dir", err) + } + + if err := os.Symlink(linkOld, linkNew); err != nil { + t.Fatal("Failed to create symlink from old to new", err) + } + } +} + +func createFiles(t *testing.T, base string, files map[string][]byte) { + for file, content := range files { + fp := filepath.Join(base, file) + dir := filepath.Dir(fp) + + if err := os.MkdirAll(dir, 0o750); err != nil { + t.Fatal("Failed to create dev directories", err) + } + + if err := os.WriteFile(fp, content, 0o600); err != nil { + t.Fatal("Failed to create dev file", err) + } + } +} + +func createDirs(t *testing.T, base string, dirs []string) { + for _, dir := range dirs { + if err := os.MkdirAll(filepath.Join(base, dir), 0o750); err != nil { + t.Fatal("Failed to create sysfs directories", err) + } + } +} + +func TestCDIDeviceInclusion(t *testing.T) { + root, err := os.MkdirTemp("", "test_cdidevice") + if err != nil { + t.Fatalf("Can't create temporary directory: %+v", err) + } + // dirs/files need to be removed for the next test + defer os.RemoveAll(root) + + sysfs := path.Join(root, "sys") + devfs := path.Join(root, "dev") + + sysfslinks := []symlinkItem{ + {"/0042:01:02.0", "/class/drm/card0"}, + {"/0042:01:05.0", "/class/drm/card1"}, + {"driver/i915", "/class/drm/card0/device/driver"}, + {"driver/xe", "/class/drm/card1/device/driver"}, + } + + devfslinks := []symlinkItem{ + {"/dri/card0", "/dri/by-path/pci-0042:01:02.0-card"}, + {"/dri/renderD128", "/dri/by-path/pci-0042:01:02.0-render"}, + {"/dri/card1", "/dri/by-path/pci-0042:01:05.0-card"}, + {"/dri/renderD129", "/dri/by-path/pci-0042:01:05.0-render"}, + } + + sysfsDirs := []string{ + "class/drm/card0/device/drm/card0", + "class/drm/card0/device/drm/renderD128", + "class/drm/card1/device/drm/card1", + "class/drm/card1/device/drm/renderD129", + } + + sysfsFiles := map[string][]byte{ + "class/drm/card0/device/device": []byte("0x9a49"), + "class/drm/card0/device/vendor": []byte("0x8086"), + "class/drm/card1/device/device": []byte("0x9a48"), + "class/drm/card1/device/vendor": []byte("0x8086"), + } + + devfsfiles := map[string][]byte{ + "/dri/card0": []byte("1"), + "/dri/renderD128": []byte("1"), + "/dri/card1": []byte("1"), + "/dri/renderD129": []byte("1"), + } + + createSymlinks(t, sysfs, sysfslinks) + createFiles(t, devfs, devfsfiles) + createFiles(t, sysfs, sysfsFiles) + createDirs(t, sysfs, sysfsDirs) + createSymlinks(t, devfs, devfslinks) + + plugin := newDevicePlugin(sysfs+"/class/drm", devfs+"/dri", cliOptions{sharedDevNum: 1}) + plugin.bypathFound = true + + tree, err := plugin.scan() + + if err != nil { + t.Error("Failed to get device id for card") + } + + refTree := dpapi.NewDeviceTree() + refTree.AddDevice("i915", "card0-0", dpapi.NewDeviceInfo("Healthy", []v1beta1.DeviceSpec{ + {ContainerPath: devfs + "/dri/card0", HostPath: devfs + "/dri/card0", Permissions: "rw"}, + {ContainerPath: devfs + "/dri/renderD128", HostPath: devfs + "/dri/renderD128", Permissions: "rw"}, + }, []v1beta1.Mount{ + {ContainerPath: devfs + "/dri/by-path/pci-0042:01:02.0-card", HostPath: devfs + "/dri/by-path/pci-0042:01:02.0-card", ReadOnly: true}, + {ContainerPath: devfs + "/dri/by-path/pci-0042:01:02.0-render", HostPath: devfs + "/dri/by-path/pci-0042:01:02.0-render", ReadOnly: true}, + }, nil, nil, &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/gpu", + Devices: []cdispec.Device{ + { + Name: "card0", + ContainerEdits: cdispec.ContainerEdits{ + DeviceNodes: []*cdispec.DeviceNode{ + {Path: devfs + "/dri/card0", HostPath: devfs + "/dri/card0", Permissions: "rw"}, + {Path: devfs + "/dri/renderD128", HostPath: devfs + "/dri/renderD128", Permissions: "rw"}, + }, + Mounts: []*cdispec.Mount{ + { + HostPath: devfs + "/dri/by-path/pci-0042:01:02.0-card", + ContainerPath: devfs + "/dri/by-path/pci-0042:01:02.0-card", + Options: []string{"bind", "ro"}, + Type: "none", + }, + { + HostPath: devfs + "/dri/by-path/pci-0042:01:02.0-render", + ContainerPath: devfs + "/dri/by-path/pci-0042:01:02.0-render", + Options: []string{"bind", "ro"}, + Type: "none", + }, + }, + }, + }, + }, + })) + refTree.AddDevice("xe", "card1-0", dpapi.NewDeviceInfo("Healthy", []v1beta1.DeviceSpec{ + {ContainerPath: devfs + "/dri/card1", HostPath: devfs + "/dri/card1", Permissions: "rw"}, + {ContainerPath: devfs + "/dri/renderD129", HostPath: devfs + "/dri/renderD129", Permissions: "rw"}, + }, []v1beta1.Mount{ + {ContainerPath: devfs + "/dri/by-path/pci-0042:01:05.0-card", HostPath: devfs + "/dri/by-path/pci-0042:01:05.0-card", ReadOnly: true}, + {ContainerPath: devfs + "/dri/by-path/pci-0042:01:05.0-render", HostPath: devfs + "/dri/by-path/pci-0042:01:05.0-render", ReadOnly: true}, + }, nil, nil, &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/gpu", + Devices: []cdispec.Device{ + { + Name: "card1", + ContainerEdits: cdispec.ContainerEdits{ + DeviceNodes: []*cdispec.DeviceNode{ + {Path: devfs + "/dri/card1", HostPath: devfs + "/dri/card1", Permissions: "rw"}, + {Path: devfs + "/dri/renderD129", HostPath: devfs + "/dri/renderD129", Permissions: "rw"}, + }, + Mounts: []*cdispec.Mount{ + { + HostPath: devfs + "/dri/by-path/pci-0042:01:05.0-card", + ContainerPath: devfs + "/dri/by-path/pci-0042:01:05.0-card", + Options: []string{"bind", "ro"}, + Type: "none", + }, + { + HostPath: devfs + "/dri/by-path/pci-0042:01:05.0-render", + ContainerPath: devfs + "/dri/by-path/pci-0042:01:05.0-render", + Options: []string{"bind", "ro"}, + Type: "none", + }, + }, + }, + }, + }, + })) + + if !reflect.DeepEqual(tree, refTree) { + t.Error("Received device tree isn't expected\n", tree, "\n", refTree) + } + + if tree.DeviceTypeCount("i915") != 1 { + t.Error("Invalid count for device (i915)") + } + if tree.DeviceTypeCount("xe") != 1 { + t.Error("Invalid count for device (xe)") + } +} diff --git a/deployments/gpu_plugin/base/intel-gpu-plugin.yaml b/deployments/gpu_plugin/base/intel-gpu-plugin.yaml index 3092c4e45..b64b5f10a 100644 --- a/deployments/gpu_plugin/base/intel-gpu-plugin.yaml +++ b/deployments/gpu_plugin/base/intel-gpu-plugin.yaml @@ -45,6 +45,8 @@ spec: readOnly: true - name: kubeletsockets mountPath: /var/lib/kubelet/device-plugins + - name: cdipath + mountPath: /var/run/cdi volumes: - name: devfs hostPath: @@ -55,5 +57,9 @@ spec: - name: kubeletsockets hostPath: path: /var/lib/kubelet/device-plugins + - name: cdipath + hostPath: + path: /var/run/cdi + type: DirectoryOrCreate nodeSelector: kubernetes.io/arch: amd64 diff --git a/pkg/controllers/gpu/controller_test.go b/pkg/controllers/gpu/controller_test.go index f244c8c4b..9edda978f 100644 --- a/pkg/controllers/gpu/controller_test.go +++ b/pkg/controllers/gpu/controller_test.go @@ -39,6 +39,7 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet yes := true no := false + directoryOrCreate := v1.HostPathDirectoryOrCreate maxUnavailable := intstr.FromInt(1) maxSurge := intstr.FromInt(0) @@ -120,6 +121,10 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet Name: "kubeletsockets", MountPath: "/var/lib/kubelet/device-plugins", }, + { + Name: "cdipath", + MountPath: "/var/run/cdi", + }, }, }, }, @@ -149,6 +154,15 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet }, }, }, + { + Name: "cdipath", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/run/cdi", + Type: &directoryOrCreate, + }, + }, + }, }, }, }, From 8dd5b4aa4438f3ace4677676662079ff364473da Mon Sep 17 00:00:00 2001 From: Tuomas Katila Date: Mon, 2 Sep 2024 21:42:07 +0300 Subject: [PATCH 2/2] e2e: gpu: add tests for different deployments Signed-off-by: Tuomas Katila --- .golangci.yml | 2 + test/e2e/dlb/dlb.go | 4 +- test/e2e/dsa/dsa.go | 2 +- test/e2e/fpga/fpga.go | 2 +- test/e2e/gpu/gpu.go | 153 ++++++++++++++++++++++++------- test/e2e/iaa/iaa.go | 2 +- test/e2e/operator/operator.go | 2 +- test/e2e/qat/qatplugin_dpdk.go | 2 +- test/e2e/qat/qatplugin_kernel.go | 2 +- test/e2e/sgx/sgx.go | 6 +- test/e2e/utils/utils.go | 20 +++- 11 files changed, 151 insertions(+), 46 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index a0d05ef61..c094331ff 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -73,6 +73,8 @@ issues: - path: test/e2e/ linters: - wsl + - gocognit + - gocyclo - path: cmd/gpu_fakedev/ linters: - wsl diff --git a/test/e2e/dlb/dlb.go b/test/e2e/dlb/dlb.go index fa6304909..3d0666e45 100644 --- a/test/e2e/dlb/dlb.go +++ b/test/e2e/dlb/dlb.go @@ -84,7 +84,7 @@ func describe() { ginkgo.Context("When PF resources are available [Resource:pf]", func() { ginkgo.BeforeEach(func(ctx context.Context) { resource := v1.ResourceName("dlb.intel.com/pf") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resource, 30*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resource, 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource %s: %v", resource, err) } }) @@ -101,7 +101,7 @@ func describe() { ginkgo.Context("When VF resources are available [Resource:vf]", func() { ginkgo.BeforeEach(func(ctx context.Context) { resource := v1.ResourceName("dlb.intel.com/vf") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resource, 30*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resource, 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource %s: %v", resource, err) } }) diff --git a/test/e2e/dsa/dsa.go b/test/e2e/dsa/dsa.go index e2e871251..322783dce 100644 --- a/test/e2e/dsa/dsa.go +++ b/test/e2e/dsa/dsa.go @@ -97,7 +97,7 @@ func describe() { ginkgo.Context("When DSA resources are available [Resource:dedicated]", func() { ginkgo.BeforeEach(func(ctx context.Context) { ginkgo.By("checking if the resource is allocatable") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "dsa.intel.com/wq-user-dedicated", 300*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "dsa.intel.com/wq-user-dedicated", 300*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) } }) diff --git a/test/e2e/fpga/fpga.go b/test/e2e/fpga/fpga.go index acc224017..fc0c6a96a 100644 --- a/test/e2e/fpga/fpga.go +++ b/test/e2e/fpga/fpga.go @@ -129,7 +129,7 @@ func runDevicePlugin(ctx context.Context, fmw *framework.Framework, pluginKustom ginkgo.By("checking if the resource is allocatable") - if err = utils.WaitForNodesWithResource(ctx, fmw.ClientSet, resource, 30*time.Second); err != nil { + if err = utils.WaitForNodesWithResource(ctx, fmw.ClientSet, resource, 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) } } diff --git a/test/e2e/gpu/gpu.go b/test/e2e/gpu/gpu.go index 783d556cb..e53a3dbc5 100644 --- a/test/e2e/gpu/gpu.go +++ b/test/e2e/gpu/gpu.go @@ -37,51 +37,112 @@ import ( const ( kustomizationYaml = "deployments/gpu_plugin/kustomization.yaml" + monitoringYaml = "deployments/gpu_plugin/overlays/monitoring_shared-dev_nfd/kustomization.yaml" + rmEnabledYaml = "deployments/gpu_plugin/overlays/fractional_resources//kustomization.yaml" + nfdRulesYaml = "deployments/nfd/overlays/node-feature-rules/kustomization.yaml" containerName = "testcontainer" tfKustomizationYaml = "deployments/gpu_tensorflow_test/kustomization.yaml" tfPodName = "training-pod" ) func init() { - ginkgo.Describe("GPU plugin [Device:gpu]", describe) + // This needs to be Ordered because only one GPU plugin can function on the node at once. + ginkgo.Describe("GPU plugin [Device:gpu]", describe, ginkgo.Ordered) +} + +func createPluginAndVerifyExistence(f *framework.Framework, ctx context.Context, kustomizationPath, baseResource string) { + ginkgo.By("deploying GPU plugin") + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "apply", "-k", filepath.Dir(kustomizationPath)) + + ginkgo.By("waiting for GPU plugin's availability") + podList, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, f.ClientSet, f.Namespace.Name, + labels.Set{"app": "intel-gpu-plugin"}.AsSelector(), 1 /* one replica */, 100*time.Second) + if err != nil { + e2edebug.DumpAllNamespaceInfo(ctx, f.ClientSet, f.Namespace.Name) + e2ekubectl.LogFailedContainers(ctx, f.ClientSet, f.Namespace.Name, framework.Logf) + framework.Failf("unable to wait for all pods to be running and ready: %v", err) + } + + ginkgo.By("checking GPU plugin's securityContext") + if err = utils.TestPodsFileSystemInfo(podList.Items); err != nil { + framework.Failf("container filesystem info checks failed: %v", err) + } + + ginkgo.By("checking if the resource is allocatable") + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, v1.ResourceName(baseResource), 30*time.Second, utils.WaitForPositiveResource); err != nil { + framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) + } } func describe() { f := framework.NewDefaultFramework("gpuplugin") f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged - kustomizationPath, errFailedToLocateRepoFile := utils.LocateRepoFile(kustomizationYaml) + vanillaPath, errFailedToLocateRepoFile := utils.LocateRepoFile(kustomizationYaml) if errFailedToLocateRepoFile != nil { framework.Failf("unable to locate %q: %v", kustomizationYaml, errFailedToLocateRepoFile) } - ginkgo.BeforeEach(func(ctx context.Context) { - ginkgo.By("deploying GPU plugin") - e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "apply", "-k", filepath.Dir(kustomizationPath)) - - ginkgo.By("waiting for GPU plugin's availability") - podList, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, f.ClientSet, f.Namespace.Name, - labels.Set{"app": "intel-gpu-plugin"}.AsSelector(), 1 /* one replica */, 100*time.Second) - if err != nil { - e2edebug.DumpAllNamespaceInfo(ctx, f.ClientSet, f.Namespace.Name) - e2ekubectl.LogFailedContainers(ctx, f.ClientSet, f.Namespace.Name, framework.Logf) - framework.Failf("unable to wait for all pods to be running and ready: %v", err) - } - - ginkgo.By("checking GPU plugin's securityContext") - if err = utils.TestPodsFileSystemInfo(podList.Items); err != nil { - framework.Failf("container filesystem info checks failed: %v", err) - } - }) + monitoringPath, errFailedToLocateRepoFile := utils.LocateRepoFile(monitoringYaml) + if errFailedToLocateRepoFile != nil { + framework.Failf("unable to locate %q: %v", monitoringYaml, errFailedToLocateRepoFile) + } + + nfdRulesPath, errFailedToLocateRepoFile := utils.LocateRepoFile(nfdRulesYaml) + if errFailedToLocateRepoFile != nil { + framework.Failf("unable to locate %q: %v", nfdRulesYaml, errFailedToLocateRepoFile) + } + + resourceManagerPath, errFailedToLocateRepoFile := utils.LocateRepoFile(rmEnabledYaml) + if errFailedToLocateRepoFile != nil { + framework.Failf("unable to locate %q: %v", rmEnabledYaml, errFailedToLocateRepoFile) + } + + ginkgo.Context("When GPU plugin is deployed [Resource:i915]", func() { + ginkgo.AfterEach(func(ctx context.Context) { + framework.Logf("Removing gpu-plugin manually") - ginkgo.Context("When GPU resources are available [Resource:i915]", func() { - ginkgo.BeforeEach(func(ctx context.Context) { - ginkgo.By("checking if the resource is allocatable") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "gpu.intel.com/i915", 30*time.Second); err != nil { - framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "delete", "-k", filepath.Dir(vanillaPath)) + + framework.Logf("Waiting for i915 resources to go to zero") + + // Wait for resources to go to zero + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "gpu.intel.com/i915", 30*time.Second, utils.WaitForZeroResource); err != nil { + framework.Failf("unable to wait for nodes to have no resources: %v", err) } }) + ginkgo.It("checks availability of GPU resources [App:busybox]", func(ctx context.Context) { + createPluginAndVerifyExistence(f, ctx, vanillaPath, "gpu.intel.com/i915") + + podListFunc := framework.ListObjects(f.ClientSet.CoreV1().Pods(f.Namespace.Name).List, metav1.ListOptions{}) + + pods, err := podListFunc(ctx) + if err != nil { + framework.Failf("Couldn't list pods: %+v", err) + } + + if len(pods.Items) != 1 { + framework.Failf("Invalid amount of Pods listed %d", len(pods.Items)) + } + + pluginPod := pods.Items[0] + + ginkgo.By("checking if CDI path is included in volumes") + found := false + for _, v := range pluginPod.Spec.Volumes { + if v.HostPath != nil && v.HostPath.Path == "/var/run/cdi" { + framework.Logf("CDI volume found") + found = true + + break + } + } + + if !found { + framework.Fail("Couldn't find CDI volume in GPU plugin deployment") + } + ginkgo.By("submitting a pod requesting GPU resources") podSpec := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{Name: "gpuplugin-tester"}, @@ -122,7 +183,41 @@ func describe() { framework.Logf("found card and renderD from the log") }) + ginkgo.Context("When [Deployment:monitoring] deployment is applied [Resource:i915]", func() { + ginkgo.It("check if monitoring resource is available", func(ctx context.Context) { + createPluginAndVerifyExistence(f, ctx, monitoringPath, "gpu.intel.com/i915") + + ginkgo.By("checking if the monitoring resource is allocatable") + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "gpu.intel.com/i915_monitoring", 30*time.Second, utils.WaitForPositiveResource); err != nil { + framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) + } + }) + }) + + ginkgo.Context("When [Deployment:resourceManager] deployment is applied [Resource:i915]", func() { + ginkgo.It("check if i915 resources is available", func(ctx context.Context) { + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "apply", "-k", filepath.Dir(nfdRulesPath)) + + createPluginAndVerifyExistence(f, ctx, resourceManagerPath, "gpu.intel.com/i915") + + // To speed up extended resource detection, let's restart NFD worker + e2ekubectl.RunKubectlOrDie("node-feature-discovery", "rollout", "restart", "daemonset", "nfd-worker") + + ginkgo.By("checking if the millicores resource is allocatable") + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "gpu.intel.com/millicores", 30*time.Second, utils.WaitForPositiveResource); err != nil { + framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) + } + + ginkgo.By("checking if the tiles resource is allocatable") + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "gpu.intel.com/tiles", 30*time.Second, utils.WaitForPositiveResource); err != nil { + framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) + } + }) + }) + ginkgo.It("run a small workload on the GPU [App:tensorflow]", func(ctx context.Context) { + createPluginAndVerifyExistence(f, ctx, vanillaPath, "gpu.intel.com/i915") + kustomYaml, err := utils.LocateRepoFile(tfKustomizationYaml) if err != nil { framework.Failf("unable to locate %q: %v", tfKustomizationYaml, err) @@ -146,13 +241,9 @@ func describe() { }) ginkgo.Context("When GPU resources are available [Resource:xe]", func() { - ginkgo.BeforeEach(func(ctx context.Context) { - ginkgo.By("checking if the resource is allocatable") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "gpu.intel.com/xe", 30*time.Second); err != nil { - framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) - } - }) ginkgo.It("checks availability of GPU resources [App:busybox]", func(ctx context.Context) { + createPluginAndVerifyExistence(f, ctx, vanillaPath, "gpu.intel.com/xe") + ginkgo.By("submitting a pod requesting GPU resources") podSpec := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{Name: "gpuplugin-tester"}, diff --git a/test/e2e/iaa/iaa.go b/test/e2e/iaa/iaa.go index 669fef1f7..8d575226e 100644 --- a/test/e2e/iaa/iaa.go +++ b/test/e2e/iaa/iaa.go @@ -97,7 +97,7 @@ func describe() { ginkgo.Context("When IAA resources are available [Resource:dedicated]", func() { ginkgo.BeforeEach(func(ctx context.Context) { ginkgo.By("checking if the resource is allocatable") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "iaa.intel.com/wq-user-dedicated", 300*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "iaa.intel.com/wq-user-dedicated", 300*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) } }) diff --git a/test/e2e/operator/operator.go b/test/e2e/operator/operator.go index 6eb3122de..48b0803a1 100644 --- a/test/e2e/operator/operator.go +++ b/test/e2e/operator/operator.go @@ -89,7 +89,7 @@ func testPluginWithOperator(deviceName string, resourceNames []v1.ResourceName, } for _, resourceName := range resourceNames { - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, timeout); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, timeout, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) } } diff --git a/test/e2e/qat/qatplugin_dpdk.go b/test/e2e/qat/qatplugin_dpdk.go index 0852dc1e6..8d513041c 100644 --- a/test/e2e/qat/qatplugin_dpdk.go +++ b/test/e2e/qat/qatplugin_dpdk.go @@ -98,7 +98,7 @@ func describeQatDpdkPlugin() { } ginkgo.By("checking if the resource is allocatable") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, 30*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) } }) diff --git a/test/e2e/qat/qatplugin_kernel.go b/test/e2e/qat/qatplugin_kernel.go index 39ed28655..3a53206a5 100644 --- a/test/e2e/qat/qatplugin_kernel.go +++ b/test/e2e/qat/qatplugin_kernel.go @@ -82,7 +82,7 @@ func describeQatKernelPlugin() { ginkgo.Context("When QAT resources are available [Resource:cy1_dc0]", func() { ginkgo.BeforeEach(func(ctx context.Context) { ginkgo.By("checking if the resource is allocatable") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "qat.intel.com/cy1_dc0", 30*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "qat.intel.com/cy1_dc0", 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) } }) diff --git a/test/e2e/sgx/sgx.go b/test/e2e/sgx/sgx.go index b8bd2cf3a..a41daf9b3 100644 --- a/test/e2e/sgx/sgx.go +++ b/test/e2e/sgx/sgx.go @@ -82,13 +82,13 @@ func describe() { ginkgo.Context("When SGX resources are available", func() { ginkgo.BeforeEach(func(ctx context.Context) { ginkgo.By("checking if the resource is allocatable") - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/epc", 150*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/epc", 150*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable epc resource: %v", err) } - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/enclave", 30*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/enclave", 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable enclave resource: %v", err) } - if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/provision", 30*time.Second); err != nil { + if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/provision", 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable provision resource: %v", err) } }) diff --git a/test/e2e/utils/utils.go b/test/e2e/utils/utils.go index c44c36ce3..d33f5af53 100644 --- a/test/e2e/utils/utils.go +++ b/test/e2e/utils/utils.go @@ -52,9 +52,20 @@ func GetPodLogs(ctx context.Context, f *framework.Framework, podName, containerN return fmt.Sprintf("log output of the container %s in the pod %s:%s", containerName, podName, log) } -// WaitForNodesWithResource waits for nodes to have positive allocatable resource. -func WaitForNodesWithResource(ctx context.Context, c clientset.Interface, res v1.ResourceName, timeout time.Duration) error { - framework.Logf("Waiting up to %s for any positive allocatable resource %q", timeout, res) +type WaitForResourceFunc func(resourceCount int) bool + +func WaitForPositiveResource(resourceCount int) bool { + return resourceCount > 0 +} + +func WaitForZeroResource(resourceCount int) bool { + return resourceCount == 0 +} + +// WaitForNodesWithResource waits for node's resources to change. +// Depending on the waitOperation, function waits for positive resource count or a zero resource count. +func WaitForNodesWithResource(ctx context.Context, c clientset.Interface, res v1.ResourceName, timeout time.Duration, waitForResourceFunc WaitForResourceFunc) error { + framework.Logf("Waiting up to %s for allocatable resource %q", timeout, res) start := time.Now() @@ -73,7 +84,8 @@ func WaitForNodesWithResource(ctx context.Context, c clientset.Interface, res v1 } } framework.Logf("Found %d of %q. Elapsed: %s", resNum, res, time.Since(start)) - if resNum > 0 { + + if waitForResourceFunc(resNum) { return true, nil } }