diff --git a/.github/workflows/lib-e2e.yaml b/.github/workflows/lib-e2e.yaml
index 057c03687..1678243be 100644
--- a/.github/workflows/lib-e2e.yaml
+++ b/.github/workflows/lib-e2e.yaml
@@ -25,6 +25,7 @@ jobs:
- name: e2e-gpu
runner: gpu
images: intel-gpu-plugin intel-gpu-initcontainer
+ targetJob: e2e-gpu SKIP=Resource:xe
- name: e2e-iaa-spr
targetjob: e2e-iaa
runner: simics-spr
diff --git a/README.md b/README.md
index 8417a91ed..bc81fad78 100644
--- a/README.md
+++ b/README.md
@@ -229,7 +229,7 @@ The summary of resources available via plugins in this repository is given in th
* [dsa-accel-config-demo-pod.yaml](demo/dsa-accel-config-demo-pod.yaml)
* `fpga.intel.com` : custom, see [mappings](cmd/fpga_admissionwebhook/README.md#mappings)
* [intelfpga-job.yaml](demo/intelfpga-job.yaml)
- * `gpu.intel.com` : `i915`
+ * `gpu.intel.com` : `i915`, `i915_monitoring`, `xe` or `xe_monitoring`
* [intelgpu-job.yaml](demo/intelgpu-job.yaml)
* `iaa.intel.com` : `wq-user-[shared or dedicated]`
* [iaa-accel-config-demo-pod.yaml](demo/iaa-accel-config-demo-pod.yaml)
diff --git a/cmd/gpu_plugin/README.md b/cmd/gpu_plugin/README.md
index 7019de164..e706bb362 100644
--- a/cmd/gpu_plugin/README.md
+++ b/cmd/gpu_plugin/README.md
@@ -16,6 +16,7 @@ Table of Contents
* [Running GPU plugin as non-root](#running-gpu-plugin-as-non-root)
* [Labels created by GPU plugin](#labels-created-by-gpu-plugin)
* [SR-IOV use with the plugin](#sr-iov-use-with-the-plugin)
+ * [KMD and UMD](#kmd-and-umd)
* [Issues with media workloads on multi-GPU setups](#issues-with-media-workloads-on-multi-gpu-setups)
* [Workaround for QSV and VA-API](#workaround-for-qsv-and-va-api)
@@ -36,11 +37,23 @@ For example containers with Intel media driver (and components using that), can
video transcoding operations, and containers with the Intel OpenCL / oneAPI Level Zero
backend libraries can offload compute operations to GPU.
+Intel GPU plugin may register four node resources to the Kubernetes cluster:
+| Resource | Description |
+|:---- |:-------- |
+| gpu.intel.com/i915 | GPU instance running legacy `i915` KMD |
+| gpu.intel.com/i915_monitoring | Monitoring resource for the legacy `i915` KMD devices |
+| gpu.intel.com/xe | GPU instance running new `xe` KMD |
+| gpu.intel.com/xe_monitoring | Monitoring resource for the new `xe` KMD devices |
+
+While GPU plugin basic operations support nodes having both (`i915` and `xe`) KMDs on the same node, its resource management (=GAS) does not, for that node needs to have only one of the KMDs present.
+
+For workloads on different KMDs, see [KMD and UMD](#kmd-and-umd).
+
## Modes and Configuration Options
| Flag | Argument | Default | Meaning |
|:---- |:-------- |:------- |:------- |
-| -enable-monitoring | - | disabled | Enable 'i915_monitoring' resource that provides access to all Intel GPU devices on the node |
+| -enable-monitoring | - | disabled | Enable '*_monitoring' resource that provides access to all Intel GPU devices on the node, [see use](./monitoring.md) |
| -resource-manager | - | disabled | Enable fractional resource management, [see use](./fractional.md) |
| -shared-dev-num | int | 1 | Number of containers that can share the same GPU device |
| -allocation-policy | string | none | 3 possible values: balanced, packed, none. For shared-dev-num > 1: _balanced_ mode spreads workloads among GPU devices, _packed_ mode fills one GPU fully before moving to next, and _none_ selects first available device from kubelet. Default is _none_. Allocation policy does not have an effect when resource manager is enabled. |
@@ -205,6 +218,31 @@ GPU plugin does __not__ setup SR-IOV. It has to be configured by the cluster adm
GPU plugin does however support provisioning Virtual Functions (VFs) to containers for a SR-IOV enabled GPU. When the plugin detects a GPU with SR-IOV VFs configured, it will only provision the VFs and leaves the PF device on the host.
+### KMD and UMD
+
+There are 3 different Kernel Mode Drivers (KMD) available: `i915 upstream`, `i915 backport` and `xe`:
+* `i915 upstream` is a vanilla driver that comes from the upstream kernel and is included in the common Linux distributions, like Ubuntu.
+* `i915 backport` is an [out-of-tree driver](https://github.com/intel-gpu/intel-gpu-i915-backports/) for older enterprise / LTS kernel versions, having better support for new HW before upstream kernel does. API it provides to user-space can differ from the eventual upstream version.
+* `xe` is a new KMD that is intended to support future GPUs. While it has [experimental support for latest current GPUs](https://docs.kernel.org/gpu/rfc/xe.html) (starting from Tigerlake), it will not support them officially.
+
+For optimal performance, the KMD should be paired with the same UMD variant. When creating a workload container, depending on the target hardware, the UMD packages should be selected approriately.
+
+| KMD | UMD packages | Support notes |
+|:---- |:-------- |:------- |
+| `i915 upstream` | Distro Repository | For Integrated GPUs. Newer Linux kernels will introduce support for Arc, Flex or Max series. |
+| `i915 backport` | [Intel Repository](https://dgpu-docs.intel.com/driver/installation.html#install-steps) | Best for Arc, Flex and Max series. Untested for Integrated GPUs. |
+| `xe` | Source code only | Experimental support for Arc, Flex and Max series. |
+
+> *NOTE*: Xe UMD is in active development and should be considered as experimental.
+
+Creating a workload that would support all the different KMDs is not currently possible. Below is a table that clarifies how each domain supports different KMDs.
+
+| Domain | i915 upstream | i915 backport | xe | Notes |
+|:---- |:-------- |:------- |:------- |:------- |
+| Compute | Default | [NEO_ENABLE_i915_PRELIM_DETECTION](https://github.com/intel/compute-runtime/blob/3341de7a0d5fddd2ea5f505b5d2ef5c13faa0681/CMakeLists.txt#L496-L502) | [NEO_ENABLE_XE_DRM_DETECTION](https://github.com/intel/compute-runtime/blob/3341de7a0d5fddd2ea5f505b5d2ef5c13faa0681/CMakeLists.txt#L504-L510) | All three KMDs can be supported at the same time. |
+| Media | Default | [ENABLE_PRODUCTION_KMD](https://github.com/intel/media-driver/blob/a66b076e83876fbfa9c9ab633ad9c5517f8d74fd/CMakeLists.txt#L58) | [ENABLE_XE_KMD](https://github.com/intel/media-driver/blob/a66b076e83876fbfa9c9ab633ad9c5517f8d74fd/media_driver/cmake/linux/media_feature_flags_linux.cmake#L187-L190) | Xe with upstream or backport i915, not all three. |
+| Graphics | Default | Unknown | [intel-xe-kmd](https://gitlab.freedesktop.org/mesa/mesa/-/blob/e9169881dbd1f72eab65a68c2b8e7643f74489b7/meson_options.txt#L708) | i915 and xe KMDs can be supported at the same time. |
+
### Issues with media workloads on multi-GPU setups
OneVPL media API, 3D and compute APIs provide device discovery
diff --git a/cmd/gpu_plugin/device_props.go b/cmd/gpu_plugin/device_props.go
new file mode 100644
index 000000000..e6daf2f28
--- /dev/null
+++ b/cmd/gpu_plugin/device_props.go
@@ -0,0 +1,85 @@
+// Copyright 2024 Intel Corporation. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "slices"
+
+ "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/labeler"
+ "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/pluginutils"
+ "k8s.io/klog/v2"
+)
+
+type DeviceProperties struct {
+ currentDriver string
+ drmDrivers map[string]bool
+ tileCounts []uint64
+ isPfWithVfs bool
+}
+
+type invalidTileCountErr struct {
+ error
+}
+
+func newDeviceProperties() *DeviceProperties {
+ return &DeviceProperties{
+ drmDrivers: make(map[string]bool),
+ }
+}
+
+func (d *DeviceProperties) fetch(cardPath string) {
+ d.isPfWithVfs = pluginutils.IsSriovPFwithVFs(cardPath)
+
+ d.tileCounts = append(d.tileCounts, labeler.GetTileCount(cardPath))
+
+ driverName, err := pluginutils.ReadDeviceDriver(cardPath)
+ if err != nil {
+ klog.Warningf("card (%s) doesn't have driver, using default: %s", cardPath, deviceTypeDefault)
+
+ driverName = deviceTypeDefault
+ }
+
+ d.currentDriver = driverName
+ d.drmDrivers[d.currentDriver] = true
+}
+
+func (d *DeviceProperties) drmDriverCount() int {
+ return len(d.drmDrivers)
+}
+
+func (d *DeviceProperties) driver() string {
+ return d.currentDriver
+}
+
+func (d *DeviceProperties) monitorResource() string {
+ return d.currentDriver + monitorSuffix
+}
+
+func (d *DeviceProperties) maxTileCount() (uint64, error) {
+ if len(d.tileCounts) == 0 {
+ return 0, invalidTileCountErr{}
+ }
+
+ minCount := slices.Min(d.tileCounts)
+ maxCount := slices.Max(d.tileCounts)
+
+ if minCount != maxCount {
+ klog.Warningf("Node's GPUs are heterogenous (min: %d, max: %d tiles)", minCount, maxCount)
+
+ return 0, invalidTileCountErr{}
+ }
+
+ return maxCount, nil
+}
diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go
index 6f1ad4018..44c504263 100644
--- a/cmd/gpu_plugin/gpu_plugin.go
+++ b/cmd/gpu_plugin/gpu_plugin.go
@@ -17,6 +17,7 @@ package main
import (
"flag"
"fmt"
+ "io/fs"
"os"
"path"
"path/filepath"
@@ -32,7 +33,6 @@ import (
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm"
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/labeler"
- "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/pluginutils"
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
)
@@ -47,12 +47,14 @@ const (
vendorString = "0x8086"
// Device plugin settings.
- namespace = "gpu.intel.com"
- deviceType = "i915"
+ namespace = "gpu.intel.com"
+ deviceTypeI915 = "i915"
+ deviceTypeXe = "xe"
+ deviceTypeDefault = deviceTypeI915
// telemetry resource settings.
- monitorType = "i915_monitoring"
- monitorID = "all"
+ monitorSuffix = "_monitoring"
+ monitorID = "all"
// Period of device scans.
scanPeriod = 5 * time.Second
@@ -68,6 +70,10 @@ type cliOptions struct {
resourceManagement bool
}
+type rmWithMultipleDriversErr struct {
+ error
+}
+
type preferredAllocationPolicyFunc func(*pluginapi.ContainerPreferredAllocationRequest) []string
// nonePolicy is used for allocating GPU devices randomly, while trying
@@ -283,7 +289,11 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi
if options.resourceManagement {
var err error
- dp.resMan, err = rm.NewResourceManager(monitorID, namespace+"/"+deviceType)
+ dp.resMan, err = rm.NewResourceManager(monitorID,
+ []string{
+ namespace + "/" + deviceTypeI915,
+ namespace + "/" + deviceTypeXe,
+ })
if err != nil {
klog.Errorf("Failed to create resource manager: %+v", err)
return nil
@@ -345,13 +355,20 @@ func (dp *devicePlugin) GetPreferredAllocation(rqt *pluginapi.PreferredAllocatio
func (dp *devicePlugin) Scan(notifier dpapi.Notifier) error {
defer dp.scanTicker.Stop()
- klog.V(1).Infof("GPU '%s' resource share count = %d", deviceType, dp.options.sharedDevNum)
+ klog.V(1).Infof("GPU (%s/%s) resource share count = %d", deviceTypeI915, deviceTypeXe, dp.options.sharedDevNum)
- previousCount := map[string]int{deviceType: 0, monitorType: 0}
+ previousCount := map[string]int{
+ deviceTypeI915: 0, deviceTypeXe: 0,
+ deviceTypeXe + monitorSuffix: 0,
+ deviceTypeI915 + monitorSuffix: 0}
for {
devTree, err := dp.scan()
if err != nil {
+ if errors.Is(err, rmWithMultipleDriversErr{}) {
+ return err
+ }
+
klog.Warning("Failed to scan: ", err)
}
@@ -426,81 +443,116 @@ func (dp *devicePlugin) devSpecForDrmFile(drmFile string) (devSpec pluginapi.Dev
return
}
+func (dp *devicePlugin) filterOutInvalidCards(files []fs.DirEntry) []fs.DirEntry {
+ filtered := []fs.DirEntry{}
+
+ for _, f := range files {
+ if !dp.isCompatibleDevice(f.Name()) {
+ continue
+ }
+
+ _, err := os.Stat(path.Join(dp.sysfsDir, f.Name(), "device/drm"))
+ if err != nil {
+ continue
+ }
+
+ filtered = append(filtered, f)
+ }
+
+ return filtered
+}
+
+func (dp *devicePlugin) createDeviceSpecsFromDrmFiles(cardPath string) []pluginapi.DeviceSpec {
+ specs := []pluginapi.DeviceSpec{}
+
+ drmFiles, _ := os.ReadDir(path.Join(cardPath, "device/drm"))
+
+ for _, drmFile := range drmFiles {
+ devSpec, devPath, devSpecErr := dp.devSpecForDrmFile(drmFile.Name())
+ if devSpecErr != nil {
+ continue
+ }
+
+ klog.V(4).Infof("Adding %s to GPU %s", devPath, filepath.Base(cardPath))
+
+ specs = append(specs, devSpec)
+ }
+
+ return specs
+}
+
func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
files, err := os.ReadDir(dp.sysfsDir)
if err != nil {
return nil, errors.Wrap(err, "Can't read sysfs folder")
}
- var monitor []pluginapi.DeviceSpec
+ monitor := make(map[string][]pluginapi.DeviceSpec, 0)
devTree := dpapi.NewDeviceTree()
rmDevInfos := rm.NewDeviceInfoMap()
- tileCounts := []uint64{}
+ devProps := newDeviceProperties()
- for _, f := range files {
- var nodes []pluginapi.DeviceSpec
+ for _, f := range dp.filterOutInvalidCards(files) {
+ name := f.Name()
+ cardPath := path.Join(dp.sysfsDir, name)
- if !dp.isCompatibleDevice(f.Name()) {
+ devProps.fetch(cardPath)
+
+ if devProps.isPfWithVfs {
continue
}
- cardPath := path.Join(dp.sysfsDir, f.Name())
+ devSpecs := dp.createDeviceSpecsFromDrmFiles(cardPath)
- drmFiles, err := os.ReadDir(path.Join(cardPath, "device/drm"))
- if err != nil {
- return nil, errors.Wrap(err, "Can't read device folder")
+ if len(devSpecs) == 0 {
+ continue
}
- isPFwithVFs := pluginutils.IsSriovPFwithVFs(path.Join(dp.sysfsDir, f.Name()))
- tileCounts = append(tileCounts, labeler.GetTileCount(dp.sysfsDir, f.Name()))
-
- for _, drmFile := range drmFiles {
- devSpec, devPath, devSpecErr := dp.devSpecForDrmFile(drmFile.Name())
- if devSpecErr != nil {
- continue
- }
-
- if !isPFwithVFs {
- klog.V(4).Infof("Adding %s to GPU %s", devPath, f.Name())
+ mounts := []pluginapi.Mount{}
+ if dp.bypathFound {
+ mounts = dp.bypathMountsForPci(cardPath, name, dp.bypathDir)
+ }
- nodes = append(nodes, devSpec)
- }
+ deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devSpecs, mounts, nil, nil)
- if dp.options.enableMonitoring {
- klog.V(4).Infof("Adding %s to GPU %s/%s", devPath, monitorType, monitorID)
+ for i := 0; i < dp.options.sharedDevNum; i++ {
+ devID := fmt.Sprintf("%s-%d", name, i)
+ devTree.AddDevice(devProps.driver(), devID, deviceInfo)
- monitor = append(monitor, devSpec)
- }
+ rmDevInfos[devID] = rm.NewDeviceInfo(devSpecs, mounts, nil)
}
- if len(nodes) > 0 {
- mounts := []pluginapi.Mount{}
- if dp.bypathFound {
- mounts = dp.bypathMountsForPci(cardPath, f.Name(), dp.bypathDir)
- }
-
- deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil, nil)
-
- for i := 0; i < dp.options.sharedDevNum; i++ {
- devID := fmt.Sprintf("%s-%d", f.Name(), i)
- // Currently only one device type (i915) is supported.
- // TODO: check model ID to differentiate device models.
- devTree.AddDevice(deviceType, devID, deviceInfo)
+ if dp.options.enableMonitoring {
+ res := devProps.monitorResource()
+ klog.V(4).Infof("For %s/%s, adding nodes: %+v", res, monitorID, devSpecs)
- rmDevInfos[devID] = rm.NewDeviceInfo(nodes, mounts, nil)
- }
+ monitor[res] = append(monitor[res], devSpecs...)
}
}
- // all Intel GPUs are under single monitoring resource
+
+ // all Intel GPUs are under single monitoring resource per KMD
if len(monitor) > 0 {
- deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, monitor, nil, nil, nil)
- devTree.AddDevice(monitorType, monitorID, deviceInfo)
+ for resourceName, devices := range monitor {
+ deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devices, nil, nil, nil)
+ devTree.AddDevice(resourceName, monitorID, deviceInfo)
+ }
}
if dp.resMan != nil {
- dp.resMan.SetDevInfos(rmDevInfos)
- dp.resMan.SetTileCountPerCard(tileCounts)
+ if devProps.drmDriverCount() <= 1 {
+ dp.resMan.SetDevInfos(rmDevInfos)
+
+ if tileCount, err := devProps.maxTileCount(); err == nil {
+ dp.resMan.SetTileCountPerCard(tileCount)
+ }
+ } else {
+ klog.Warning("Plugin with RM doesn't support multiple DRM drivers:", devProps.drmDrivers)
+
+ err := rmWithMultipleDriversErr{}
+
+ return nil, err
+ }
}
return devTree, nil
@@ -521,7 +573,7 @@ func main() {
)
flag.StringVar(&prefix, "prefix", "", "Prefix for devfs & sysfs paths")
- flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable 'i915_monitoring' (= all GPUs) resource")
+ flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable '*_monitoring' (= all GPUs) resource")
flag.BoolVar(&opts.resourceManagement, "resource-manager", false, "fractional GPU resource management")
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
flag.StringVar(&opts.preferredAllocationPolicy, "allocation-policy", "none", "modes of allocating GPU devices: balanced, packed and none")
diff --git a/cmd/gpu_plugin/gpu_plugin_test.go b/cmd/gpu_plugin/gpu_plugin_test.go
index 0277a089f..e0ecd6b24 100644
--- a/cmd/gpu_plugin/gpu_plugin_test.go
+++ b/cmd/gpu_plugin/gpu_plugin_test.go
@@ -37,20 +37,26 @@ func init() {
// mockNotifier implements Notifier interface.
type mockNotifier struct {
- scanDone chan bool
- devCount int
- monitorCount int
+ scanDone chan bool
+ i915Count int
+ xeCount int
+ i915monitorCount int
+ xeMonitorCount int
}
// Notify stops plugin Scan.
func (n *mockNotifier) Notify(newDeviceTree dpapi.DeviceTree) {
- n.monitorCount = len(newDeviceTree[monitorType])
- n.devCount = len(newDeviceTree[deviceType])
+ n.xeCount = len(newDeviceTree[deviceTypeXe])
+ n.xeMonitorCount = len(newDeviceTree[deviceTypeXe+monitorSuffix])
+ n.i915Count = len(newDeviceTree[deviceTypeI915])
+ n.i915monitorCount = len(newDeviceTree[deviceTypeDefault+monitorSuffix])
n.scanDone <- true
}
-type mockResourceManager struct{}
+type mockResourceManager struct {
+ tileCount uint64
+}
func (m *mockResourceManager) CreateFractionalResourceResponse(*v1beta1.AllocateRequest) (*v1beta1.AllocateResponse, error) {
return &v1beta1.AllocateResponse{}, &dpapi.UseDefaultMethodError{}
@@ -61,31 +67,62 @@ func (m *mockResourceManager) GetPreferredFractionalAllocation(*v1beta1.Preferre
return &v1beta1.PreferredAllocationResponse{}, &dpapi.UseDefaultMethodError{}
}
-func (m *mockResourceManager) SetTileCountPerCard(counts []uint64) {
+func (m *mockResourceManager) SetTileCountPerCard(count uint64) {
+ m.tileCount = count
+}
+
+type TestCaseDetails struct {
+ name string
+ // test-case environment
+ sysfsdirs []string
+ sysfsfiles map[string][]byte
+ symlinkfiles map[string]string
+ devfsdirs []string
+ // how plugin should interpret it
+ options cliOptions
+ // what the result should be (i915)
+ expectedI915Devs int
+ expectedI915Monitors int
+ // what the result should be (xe)
+ expectedXeDevs int
+ expectedXeMonitors int
}
-func createTestFiles(root string, devfsdirs, sysfsdirs []string, sysfsfiles map[string][]byte) (string, string, error) {
+func createTestFiles(root string, tc TestCaseDetails) (string, string, error) {
sysfs := path.Join(root, "sys")
devfs := path.Join(root, "dev")
- for _, devfsdir := range devfsdirs {
+ for _, devfsdir := range tc.devfsdirs {
if err := os.MkdirAll(path.Join(devfs, devfsdir), 0750); err != nil {
return "", "", errors.Wrap(err, "Failed to create fake device directory")
}
}
- for _, sysfsdir := range sysfsdirs {
+ for _, sysfsdir := range tc.sysfsdirs {
if err := os.MkdirAll(path.Join(sysfs, sysfsdir), 0750); err != nil {
return "", "", errors.Wrap(err, "Failed to create fake device directory")
}
}
- for filename, body := range sysfsfiles {
+ for filename, body := range tc.sysfsfiles {
if err := os.WriteFile(path.Join(sysfs, filename), body, 0600); err != nil {
return "", "", errors.Wrap(err, "Failed to create fake vendor file")
}
}
+ for source, target := range tc.symlinkfiles {
+ driverPath := path.Join(sysfs, target)
+ symlinkPath := path.Join(sysfs, source)
+
+ if err := os.MkdirAll(driverPath, 0750); err != nil {
+ return "", "", errors.Wrap(err, "Failed to create fake driver file.")
+ }
+
+ if err := os.Symlink(driverPath, symlinkPath); err != nil {
+ return "", "", errors.Wrap(err, "Failed to create fake driver symlink file.")
+ }
+ }
+
return sysfs, devfs, nil
}
@@ -186,18 +223,7 @@ func TestAllocate(t *testing.T) {
}
func TestScan(t *testing.T) {
- tcases := []struct {
- name string
- // test-case environment
- sysfsdirs []string
- sysfsfiles map[string][]byte
- devfsdirs []string
- // how plugin should interpret it
- options cliOptions
- // what the result should be
- expectedDevs int
- expectedMonitors int
- }{
+ tcases := []TestCaseDetails{
{
name: "no sysfs mounted",
},
@@ -223,7 +249,71 @@ func TestScan(t *testing.T) {
"by-path/pci-0000:00:00.0-card",
"by-path/pci-0000:00:00.0-render",
},
- expectedDevs: 1,
+ expectedI915Devs: 1,
+ },
+ {
+ name: "one device with xe driver",
+ sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64"},
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ },
+ symlinkfiles: map[string]string{
+ "card0/device/driver": "drivers/xe",
+ },
+ devfsdirs: []string{
+ "card0",
+ "by-path/pci-0000:00:00.0-card",
+ "by-path/pci-0000:00:00.0-render",
+ },
+ expectedXeDevs: 1,
+ },
+ {
+ name: "two devices with xe driver and monitoring",
+ sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64", "card1/device/drm/card1"},
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ "card1/device/vendor": []byte("0x8086"),
+ },
+ symlinkfiles: map[string]string{
+ "card0/device/driver": "drivers/xe",
+ "card1/device/driver": "drivers/xe",
+ },
+ devfsdirs: []string{
+ "card0",
+ "by-path/pci-0000:00:00.0-card",
+ "by-path/pci-0000:00:00.0-render",
+ "card1",
+ "by-path/pci-0000:00:01.0-card",
+ "by-path/pci-0000:00:01.0-render",
+ },
+ options: cliOptions{enableMonitoring: true},
+ expectedXeDevs: 2,
+ expectedXeMonitors: 1,
+ },
+ {
+ name: "two devices with xe and i915 drivers",
+ sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64", "card1/device/drm/card1"},
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ "card1/device/vendor": []byte("0x8086"),
+ },
+ symlinkfiles: map[string]string{
+ "card0/device/driver": "drivers/xe",
+ "card1/device/driver": "drivers/i915",
+ },
+ devfsdirs: []string{
+ "card0",
+ "by-path/pci-0000:00:00.0-card",
+ "by-path/pci-0000:00:00.0-render",
+ "card1",
+ "by-path/pci-0000:00:01.0-card",
+ "by-path/pci-0000:00:01.0-render",
+ },
+ options: cliOptions{enableMonitoring: true},
+ expectedXeDevs: 1,
+ expectedXeMonitors: 1,
+ expectedI915Devs: 1,
+ expectedI915Monitors: 1,
},
{
name: "sriov-1-pf-no-vfs + monitoring",
@@ -232,10 +322,10 @@ func TestScan(t *testing.T) {
"card0/device/vendor": []byte("0x8086"),
"card0/device/sriov_numvfs": []byte("0"),
},
- devfsdirs: []string{"card0"},
- options: cliOptions{enableMonitoring: true},
- expectedDevs: 1,
- expectedMonitors: 1,
+ devfsdirs: []string{"card0"},
+ options: cliOptions{enableMonitoring: true},
+ expectedI915Devs: 1,
+ expectedI915Monitors: 1,
},
{
name: "two sysfs records but one dev node",
@@ -247,8 +337,8 @@ func TestScan(t *testing.T) {
"card0/device/vendor": []byte("0x8086"),
"card1/device/vendor": []byte("0x8086"),
},
- devfsdirs: []string{"card0"},
- expectedDevs: 1,
+ devfsdirs: []string{"card0"},
+ expectedI915Devs: 1,
},
{
name: "sriov-1-pf-and-2-vfs",
@@ -263,8 +353,8 @@ func TestScan(t *testing.T) {
"card1/device/vendor": []byte("0x8086"),
"card2/device/vendor": []byte("0x8086"),
},
- devfsdirs: []string{"card0", "card1", "card2"},
- expectedDevs: 2,
+ devfsdirs: []string{"card0", "card1", "card2"},
+ expectedI915Devs: 2,
},
{
name: "two devices with 13 shares + monitoring",
@@ -276,10 +366,10 @@ func TestScan(t *testing.T) {
"card0/device/vendor": []byte("0x8086"),
"card1/device/vendor": []byte("0x8086"),
},
- devfsdirs: []string{"card0", "card1"},
- options: cliOptions{sharedDevNum: 13, enableMonitoring: true},
- expectedDevs: 26,
- expectedMonitors: 1,
+ devfsdirs: []string{"card0", "card1"},
+ options: cliOptions{sharedDevNum: 13, enableMonitoring: true},
+ expectedI915Devs: 26,
+ expectedI915Monitors: 1,
},
{
name: "wrong vendor",
@@ -317,7 +407,7 @@ func TestScan(t *testing.T) {
// dirs/files need to be removed for the next test
defer os.RemoveAll(root)
- sysfs, devfs, err := createTestFiles(root, tc.devfsdirs, tc.sysfsdirs, tc.sysfsfiles)
+ sysfs, devfs, err := createTestFiles(root, tc)
if err != nil {
t.Errorf("unexpected error: %+v", err)
}
@@ -328,20 +418,157 @@ func TestScan(t *testing.T) {
scanDone: plugin.scanDone,
}
- plugin.resMan = &mockResourceManager{}
-
err = plugin.Scan(notifier)
// Scans in GPU plugin never fail
if err != nil {
t.Errorf("unexpected error: %+v", err)
}
- if tc.expectedDevs != notifier.devCount {
- t.Errorf("Expected %d, discovered %d devices",
- tc.expectedDevs, notifier.devCount)
+ if tc.expectedI915Devs != notifier.i915Count {
+ t.Errorf("Expected %d, discovered %d devices (i915)",
+ tc.expectedI915Devs, notifier.i915Count)
+ }
+ if tc.expectedI915Monitors != notifier.i915monitorCount {
+ t.Errorf("Expected %d, discovered %d monitors (i915)",
+ tc.expectedI915Monitors, notifier.i915monitorCount)
+ }
+ if tc.expectedXeDevs != notifier.xeCount {
+ t.Errorf("Expected %d, discovered %d devices (XE)",
+ tc.expectedXeDevs, notifier.xeCount)
+ }
+ if tc.expectedXeMonitors != notifier.xeMonitorCount {
+ t.Errorf("Expected %d, discovered %d monitors (XE)",
+ tc.expectedXeMonitors, notifier.xeMonitorCount)
+ }
+ })
+ }
+}
+
+func TestScanFails(t *testing.T) {
+ tc := TestCaseDetails{
+ name: "xe and i915 devices with rm will fail",
+ sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64", "card1/device/drm/card1"},
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ "card1/device/vendor": []byte("0x8086"),
+ },
+ symlinkfiles: map[string]string{
+ "card0/device/driver": "drivers/xe",
+ "card1/device/driver": "drivers/i915",
+ },
+ devfsdirs: []string{
+ "card0",
+ "card1",
+ },
+ }
+
+ t.Run(tc.name, func(t *testing.T) {
+ root, err := os.MkdirTemp("", "test_new_device_plugin")
+ if err != nil {
+ t.Fatalf("can't create temporary directory: %+v", err)
+ }
+ // dirs/files need to be removed for the next test
+ defer os.RemoveAll(root)
+
+ sysfs, devfs, err := createTestFiles(root, tc)
+ if err != nil {
+ t.Errorf("unexpected error: %+v", err)
+ }
+
+ plugin := newDevicePlugin(sysfs, devfs, tc.options)
+
+ plugin.resMan = &mockResourceManager{}
+
+ notifier := &mockNotifier{
+ scanDone: plugin.scanDone,
+ }
+
+ err = plugin.Scan(notifier)
+ if err == nil {
+ t.Error("unexpected nil error")
+ }
+ })
+}
+
+func TestScanWithRmAndTiles(t *testing.T) {
+ tcs := []TestCaseDetails{
+ {
+ name: "two tile xe devices with rm enabled - homogeneous",
+ sysfsdirs: []string{
+ "card0/device/drm/card0",
+ "card1/device/drm/card1",
+ "card0/device/tile0/gt0",
+ "card0/device/tile1/gt1",
+ "card1/device/tile0/gt0",
+ "card1/device/tile1/gt1",
+ },
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ "card1/device/vendor": []byte("0x8086"),
+ },
+ symlinkfiles: map[string]string{
+ "card0/device/driver": "drivers/xe",
+ "card1/device/driver": "drivers/xe",
+ },
+ devfsdirs: []string{
+ "card0",
+ "card1",
+ },
+ },
+ {
+ name: "2 & 1 tile xe devices with rm enabled - heterogeneous",
+ sysfsdirs: []string{
+ "card0/device/drm/card0",
+ "card1/device/drm/card1",
+ "card0/device/tile0/gt0",
+ "card0/device/tile1/gt1",
+ "card1/device/tile0/gt0",
+ },
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ "card1/device/vendor": []byte("0x8086"),
+ },
+ symlinkfiles: map[string]string{
+ "card0/device/driver": "drivers/xe",
+ "card1/device/driver": "drivers/xe",
+ },
+ devfsdirs: []string{
+ "card0",
+ "card1",
+ },
+ },
+ }
+
+ expectedTileCounts := []uint64{2, 0}
+
+ for i, tc := range tcs {
+ t.Run(tc.name, func(t *testing.T) {
+ root, err := os.MkdirTemp("", "test_new_device_plugin")
+ if err != nil {
+ t.Fatalf("can't create temporary directory: %+v", err)
+ }
+ // dirs/files need to be removed for the next test
+ defer os.RemoveAll(root)
+
+ sysfs, devfs, err := createTestFiles(root, tc)
+ if err != nil {
+ t.Errorf("unexpected error: %+v", err)
+ }
+
+ plugin := newDevicePlugin(sysfs, devfs, tc.options)
+
+ rm := &mockResourceManager{}
+ plugin.resMan = rm
+
+ notifier := &mockNotifier{
+ scanDone: plugin.scanDone,
+ }
+
+ err = plugin.Scan(notifier)
+ if err != nil {
+ t.Error("unexpected error")
}
- if tc.expectedMonitors != notifier.monitorCount {
- t.Errorf("Expected %d, discovered %d monitors",
- tc.expectedMonitors, notifier.monitorCount)
+ if rm.tileCount != expectedTileCounts[i] {
+ t.Error("unexpected tilecount for RM")
}
})
}
diff --git a/cmd/gpu_plugin/monitoring.md b/cmd/gpu_plugin/monitoring.md
new file mode 100644
index 000000000..3b3050aeb
--- /dev/null
+++ b/cmd/gpu_plugin/monitoring.md
@@ -0,0 +1,32 @@
+# Monitoring GPUs
+
+## i915_monitoring resource
+
+GPU plugin can be configured to register a monitoring resource for the nodes that have Intel GPUs on them. `gpu.intel.com/i915_monitoring` (or `gpu.intel.com/xe_monitoring`) is a singular resource on the nodes. A container requesting it, will get access to _all_ the Intel GPUs (`i915` or `xe` KMD device files) on the node. The idea behind this resource is to allow the container to _monitor_ the GPUs. A container requesting the `i915_monitoring` resource would typically export data to some metrics consumer. An example for such a consumer is [Prometheus](https://prometheus.io/).
+
+
+
+For the monitoring applications, there are two possibilities: [Intel XPU Manager](https://github.com/intel/xpumanager/) and [collectd](https://github.com/collectd/collectd/tree/collectd-6.0). Intel XPU Manager is readily available as a container and with a deployment yaml. collectd has Intel GPU support in its 6.0 branch, but there are no public containers available for it.
+
+To deploy XPU Manager to a cluster, one has to run the following kubectl:
+```
+$ kubectl apply -k https://github.com/intel/xpumanager/deployment/kubernetes/daemonset/base
+```
+
+This will deploy an XPU Manager daemonset to run on all the nodes having the `i915_monitoring` resource.
+
+## Prometheus integration with XPU Manager
+
+For deploying Prometheus to a cluster, see [this page](https://prometheus-operator.dev/docs/user-guides/getting-started/). One can also use Prometheus' [helm chart](https://github.com/prometheus-community/helm-charts).
+
+Prometheus requires additional Kubernetes configuration so it can fetch GPU metrics. The following steps will add a Kubernetes Service and a ServiceMonitor components. The components instruct Prometheus how and where from to retrieve the metrics.
+
+```
+$ kubectl apply -f https://raw.githubusercontent.com/intel/xpumanager/master/deployment/kubernetes/monitoring/service-intel-xpum.yaml
+$ kubectl apply -f https://raw.githubusercontent.com/intel/xpumanager/master/deployment/kubernetes/monitoring/servicemonitor-intel-xpum.yaml
+```
+
+With those components in place, one can query Intel GPU metrics from Prometheus with `xpum_` prefix.
diff --git a/cmd/gpu_plugin/monitoring.png b/cmd/gpu_plugin/monitoring.png
new file mode 100644
index 000000000..c56fc5057
Binary files /dev/null and b/cmd/gpu_plugin/monitoring.png differ
diff --git a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
index 491d27fe1..4a5046da0 100644
--- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
+++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
@@ -25,7 +25,6 @@ import (
"net"
"net/http"
"os"
- "slices"
"sort"
"strconv"
"strings"
@@ -105,7 +104,7 @@ type ResourceManager interface {
CreateFractionalResourceResponse(*pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error)
GetPreferredFractionalAllocation(*pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error)
SetDevInfos(DeviceInfoMap)
- SetTileCountPerCard(counts []uint64)
+ SetTileCountPerCard(count uint64)
}
type containerAssignments struct {
@@ -118,20 +117,20 @@ type podAssignmentDetails struct {
}
type resourceManager struct {
- clientset kubernetes.Interface
- deviceInfos DeviceInfoMap
- prGetClientFunc getClientFunc
- assignments map[string]podAssignmentDetails // pod name -> assignment details
- nodeName string
- hostIP string
- skipID string
- fullResourceName string
- retryTimeout time.Duration
- cleanupInterval time.Duration
- mutex sync.RWMutex // for devTree updates during scan
- cleanupMutex sync.RWMutex // for assignment details during cleanup
- useKubelet bool
- tileCountPerCard uint64
+ clientset kubernetes.Interface
+ deviceInfos DeviceInfoMap
+ prGetClientFunc getClientFunc
+ assignments map[string]podAssignmentDetails // pod name -> assignment details
+ nodeName string
+ hostIP string
+ skipID string
+ fullResourceNames []string
+ retryTimeout time.Duration
+ cleanupInterval time.Duration
+ mutex sync.RWMutex // for devTree updates during scan
+ cleanupMutex sync.RWMutex // for assignment details during cleanup
+ useKubelet bool
+ tileCountPerCard uint64
}
// NewDeviceInfo creates a new DeviceInfo.
@@ -152,7 +151,7 @@ func NewDeviceInfoMap() DeviceInfoMap {
}
// NewResourceManager creates a new resource manager.
-func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error) {
+func NewResourceManager(skipID string, fullResourceNames []string) (ResourceManager, error) {
clientset, err := getClientset()
if err != nil {
@@ -160,16 +159,16 @@ func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error
}
rm := resourceManager{
- nodeName: os.Getenv("NODE_NAME"),
- hostIP: os.Getenv("HOST_IP"),
- clientset: clientset,
- skipID: skipID,
- fullResourceName: fullResourceName,
- prGetClientFunc: podresources.GetV1Client,
- assignments: make(map[string]podAssignmentDetails),
- retryTimeout: 1 * time.Second,
- cleanupInterval: 20 * time.Minute,
- useKubelet: true,
+ nodeName: os.Getenv("NODE_NAME"),
+ hostIP: os.Getenv("HOST_IP"),
+ clientset: clientset,
+ skipID: skipID,
+ fullResourceNames: fullResourceNames,
+ prGetClientFunc: podresources.GetV1Client,
+ assignments: make(map[string]podAssignmentDetails),
+ retryTimeout: 1 * time.Second,
+ cleanupInterval: 20 * time.Minute,
+ useKubelet: true,
}
klog.Info("GPU device plugin resource manager enabled")
@@ -684,7 +683,7 @@ func (rm *resourceManager) getNodePendingGPUPods() (map[string]*v1.Pod, error) {
pendingPods := rm.listPodsOnNodeWithStates([]string{string(v1.PodPending)})
for podName, pod := range pendingPods {
- if numGPUUsingContainers(pod, rm.fullResourceName) == 0 {
+ if numGPUUsingContainers(pod, rm.fullResourceNames) == 0 {
delete(pendingPods, podName)
}
}
@@ -719,7 +718,7 @@ func (rm *resourceManager) findAllocationPodCandidates(pendingPods map[string]*v
for _, cont := range podRes.Containers {
for _, dev := range cont.Devices {
- if dev.ResourceName == rm.fullResourceName {
+ if sslices.Contains(rm.fullResourceNames, dev.ResourceName) {
numContainersAllocated++
break
}
@@ -729,7 +728,7 @@ func (rm *resourceManager) findAllocationPodCandidates(pendingPods map[string]*v
key := getPodResourceKey(podRes)
if pod, pending := pendingPods[key]; pending {
- allocationTargetNum := numGPUUsingContainers(pod, rm.fullResourceName)
+ allocationTargetNum := numGPUUsingContainers(pod, rm.fullResourceNames)
if numContainersAllocated < allocationTargetNum {
candidate := podCandidate{
pod: pod,
@@ -751,23 +750,10 @@ func (rm *resourceManager) SetDevInfos(deviceInfos DeviceInfoMap) {
rm.deviceInfos = deviceInfos
}
-func (rm *resourceManager) SetTileCountPerCard(counts []uint64) {
- if len(counts) == 0 {
- return
- }
-
- minCount := slices.Min(counts)
- maxCount := slices.Max(counts)
-
- if minCount != maxCount {
- klog.Warningf("Node's GPUs are heterogenous (min: %d, max: %d tiles)", minCount, maxCount)
-
- return
- }
-
+func (rm *resourceManager) SetTileCountPerCard(count uint64) {
rm.mutex.Lock()
defer rm.mutex.Unlock()
- rm.tileCountPerCard = maxCount
+ rm.tileCountPerCard = count
}
func (rm *resourceManager) createAllocateResponse(deviceIds []string, tileAffinityMask string) (*pluginapi.AllocateResponse, error) {
@@ -818,13 +804,13 @@ func (rm *resourceManager) createAllocateResponse(deviceIds []string, tileAffini
return &allocateResponse, nil
}
-func numGPUUsingContainers(pod *v1.Pod, fullResourceName string) int {
+func numGPUUsingContainers(pod *v1.Pod, fullResourceNames []string) int {
num := 0
for _, container := range pod.Spec.Containers {
for reqName, quantity := range container.Resources.Requests {
resourceName := reqName.String()
- if resourceName == fullResourceName {
+ if sslices.Contains(fullResourceNames, resourceName) {
value, _ := quantity.AsInt64()
if value > 0 {
num++
diff --git a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go
index ae8038da3..09a5c68b2 100644
--- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go
+++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go
@@ -107,11 +107,11 @@ func newMockResourceManager(pods []v1.Pod) ResourceManager {
prGetClientFunc: func(string, time.Duration, int) (podresourcesv1.PodResourcesListerClient, *grpc.ClientConn, error) {
return &mockPodResources{pods: pods}, client, nil
},
- skipID: "all",
- fullResourceName: "gpu.intel.com/i915",
- assignments: make(map[string]podAssignmentDetails),
- retryTimeout: 1 * time.Millisecond,
- useKubelet: false,
+ skipID: "all",
+ fullResourceNames: []string{"gpu.intel.com/i915", "gpu.intel.com/xe"},
+ assignments: make(map[string]podAssignmentDetails),
+ retryTimeout: 1 * time.Millisecond,
+ useKubelet: false,
}
deviceInfoMap := NewDeviceInfoMap()
@@ -150,7 +150,7 @@ type testCase struct {
func TestNewResourceManager(t *testing.T) {
// normal clientset is unavailable inside the unit tests
- _, err := NewResourceManager("foo", "bar")
+ _, err := NewResourceManager("foo", []string{"bar"})
if err == nil {
t.Errorf("unexpected success")
@@ -419,7 +419,7 @@ func TestCreateFractionalResourceResponse(t *testing.T) {
for _, tCase := range testCases {
rm := newMockResourceManager(tCase.pods)
- rm.SetTileCountPerCard([]uint64{1})
+ rm.SetTileCountPerCard(uint64(1))
_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
ContainerRequests: tCase.prefContainerRequests,
@@ -501,7 +501,7 @@ func TestCreateFractionalResourceResponseWithOneCardTwoTiles(t *testing.T) {
}
rm := newMockResourceManager(tCase.pods)
- rm.SetTileCountPerCard([]uint64{2})
+ rm.SetTileCountPerCard(uint64(2))
_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
ContainerRequests: tCase.prefContainerRequests,
@@ -574,7 +574,7 @@ func TestCreateFractionalResourceResponseWithTwoCardsOneTile(t *testing.T) {
}
rm := newMockResourceManager(tCase.pods)
- rm.SetTileCountPerCard([]uint64{5})
+ rm.SetTileCountPerCard(uint64(5))
_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
ContainerRequests: tCase.prefContainerRequests,
@@ -652,7 +652,7 @@ func TestCreateFractionalResourceResponseWithThreeCardsTwoTiles(t *testing.T) {
}
rm := newMockResourceManager(tCase.pods)
- rm.SetTileCountPerCard([]uint64{5})
+ rm.SetTileCountPerCard(uint64(5))
_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
ContainerRequests: tCase.prefContainerRequests,
@@ -747,7 +747,7 @@ func TestCreateFractionalResourceResponseWithMultipleContainersTileEach(t *testi
}
rm := newMockResourceManager(tCase.pods)
- rm.SetTileCountPerCard([]uint64{2})
+ rm.SetTileCountPerCard(uint64(2))
_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
ContainerRequests: properPrefContainerRequests,
diff --git a/cmd/internal/labeler/labeler.go b/cmd/internal/labeler/labeler.go
index 869bd8da4..0d2fdc19f 100644
--- a/cmd/internal/labeler/labeler.go
+++ b/cmd/internal/labeler/labeler.go
@@ -184,10 +184,16 @@ func GetMemoryAmount(sysfsDrmDir, gpuName string, numTiles uint64) uint64 {
}
// GetTileCount reads the tile count.
-func GetTileCount(sysfsDrmDir, gpuName string) (numTiles uint64) {
- filePath := filepath.Join(sysfsDrmDir, gpuName, "gt/gt*")
+func GetTileCount(cardPath string) (numTiles uint64) {
+ files := []string{}
- files, _ := filepath.Glob(filePath)
+ paths, _ := filepath.Glob(filepath.Join(cardPath, "gt/gt*")) // i915 driver
+ files = append(files, paths...)
+
+ paths, _ = filepath.Glob(filepath.Join(cardPath, "device/tile?")) // Xe driver
+ files = append(files, paths...)
+
+ klog.V(4).Info("tile files found:", files)
if len(files) == 0 {
return 1
@@ -232,6 +238,19 @@ func (lm labelMap) addNumericLabel(labelName string, valueToAdd int64) {
lm[labelName] = strconv.FormatInt(value, 10)
}
+// Stores a long string to labels so that it's possibly split into multiple
+// keys: foobar="", foobar2="", foobar3="The end."
+func (lm labelMap) addSplittableString(labelBase, fullValue string) {
+ splitList := pluginutils.SplitAtLastAlphaNum(fullValue, labelMaxLength, labelControlChar)
+
+ lm[labelBase] = splitList[0]
+
+ for i := 1; i < len(splitList); i++ {
+ nextLabel := labelBase + strconv.FormatInt(int64(i+1), 10)
+ lm[nextLabel] = splitList[i]
+ }
+}
+
// this returns pci groups label value, groups separated by "_", gpus separated by ".".
// Example for two groups with 4 gpus: "0.1.2.3_4.5.6.7".
func (l *labeler) createPCIGroupLabel(gpuNumList []string) string {
@@ -295,7 +314,7 @@ func (l *labeler) createLabels() error {
return errors.Wrap(err, "gpu name parsing error")
}
- numTiles := GetTileCount(l.sysfsDRMDir, gpuName)
+ numTiles := GetTileCount(filepath.Join(l.sysfsDRMDir, gpuName))
tileCount += int(numTiles)
memoryAmount := GetMemoryAmount(l.sysfsDRMDir, gpuName, numTiles)
@@ -327,24 +346,13 @@ func (l *labeler) createLabels() error {
strings.Join(gpuNameList, "."), labelMaxLength, labelControlChar)[0]
// add gpu num list label(s) (example: "0.1.2", which is short form of "card0.card1.card2")
- allGPUs := strings.Join(gpuNumList, ".")
- gpuNumLists := pluginutils.SplitAtLastAlphaNum(allGPUs, labelMaxLength, labelControlChar)
-
- l.labels[labelNamespace+gpuNumListLabelName] = gpuNumLists[0]
- for i := 1; i < len(gpuNumLists); i++ {
- l.labels[labelNamespace+gpuNumListLabelName+strconv.FormatInt(int64(i+1), 10)] = gpuNumLists[i]
- }
+ l.labels.addSplittableString(labelNamespace+gpuNumListLabelName, strings.Join(gpuNumList, "."))
if len(numaMapping) > 0 {
// add numa node mapping to labels: gpu.intel.com/numa-gpu-map="0-0.1.2.3_1-4.5.6.7"
numaMappingLabel := createNumaNodeMappingLabel(numaMapping)
- numaMappingLabelList := pluginutils.SplitAtLastAlphaNum(numaMappingLabel, labelMaxLength, labelControlChar)
-
- l.labels[labelNamespace+numaMappingName] = numaMappingLabelList[0]
- for i := 1; i < len(numaMappingLabelList); i++ {
- l.labels[labelNamespace+numaMappingName+strconv.FormatInt(int64(i+1), 10)] = numaMappingLabelList[i]
- }
+ l.labels.addSplittableString(labelNamespace+numaMappingName, numaMappingLabel)
}
// all GPUs get default number of millicores (1000)
@@ -353,12 +361,7 @@ func (l *labeler) createLabels() error {
// aa pci-group label(s), (two group example: "1.2.3.4_5.6.7.8")
allPCIGroups := l.createPCIGroupLabel(gpuNumList)
if allPCIGroups != "" {
- pciGroups := pluginutils.SplitAtLastAlphaNum(allPCIGroups, labelMaxLength, labelControlChar)
-
- l.labels[labelNamespace+pciGroupLabelName] = pciGroups[0]
- for i := 1; i < len(gpuNumLists); i++ {
- l.labels[labelNamespace+pciGroupLabelName+strconv.FormatInt(int64(i+1), 10)] = pciGroups[i]
- }
+ l.labels.addSplittableString(labelNamespace+pciGroupLabelName, allPCIGroups)
}
}
diff --git a/cmd/internal/labeler/labeler_test.go b/cmd/internal/labeler/labeler_test.go
index 31186e224..e3dd50bef 100644
--- a/cmd/internal/labeler/labeler_test.go
+++ b/cmd/internal/labeler/labeler_test.go
@@ -137,60 +137,6 @@ func getTestCases() []testcase {
"gpu.intel.com/tiles": "1",
},
},
- {
- sysfsdirs: []string{
- "card0/device/drm/card0",
- },
- sysfsfiles: map[string][]byte{
- "card0/device/vendor": []byte("0x8086"),
- },
- name: "when gen:capability info is missing",
- memoryOverride: 16000000000,
- expectedRetval: nil,
- expectedLabels: labelMap{
- "gpu.intel.com/millicores": "1000",
- "gpu.intel.com/memory.max": "16000000000",
- "gpu.intel.com/cards": "card0",
- "gpu.intel.com/gpu-numbers": "0",
- "gpu.intel.com/tiles": "1",
- },
- },
- {
- sysfsdirs: []string{
- "card0/device/drm/card0",
- },
- sysfsfiles: map[string][]byte{
- "card0/device/vendor": []byte("0x8086"),
- },
- name: "gen version missing, but media & graphics versions present",
- memoryOverride: 16000000000,
- expectedRetval: nil,
- expectedLabels: labelMap{
- "gpu.intel.com/millicores": "1000",
- "gpu.intel.com/memory.max": "16000000000",
- "gpu.intel.com/cards": "card0",
- "gpu.intel.com/gpu-numbers": "0",
- "gpu.intel.com/tiles": "1",
- },
- },
- {
- sysfsdirs: []string{
- "card0/device/drm/card0",
- },
- sysfsfiles: map[string][]byte{
- "card0/device/vendor": []byte("0x8086"),
- },
- name: "only media version present",
- memoryOverride: 16000000000,
- expectedRetval: nil,
- expectedLabels: labelMap{
- "gpu.intel.com/millicores": "1000",
- "gpu.intel.com/memory.max": "16000000000",
- "gpu.intel.com/cards": "card0",
- "gpu.intel.com/gpu-numbers": "0",
- "gpu.intel.com/tiles": "1",
- },
- },
{
sysfsdirs: []string{
"card0/device/drm/card0",
@@ -562,6 +508,74 @@ func getTestCases() []testcase {
"gpu.intel.com/tiles": "1",
},
},
+ {
+ sysfsdirs: []string{
+ "card0/device/drm/card0",
+ "card0/device/tile0/gt0",
+ "card0/device/tile1/gt0",
+ "card1/device/drm/card1",
+ "card1/device/tile0/gt0",
+ "card1/device/tile1/gt0",
+ "card2/device/drm/card2",
+ "card2/device/tile0/gt0",
+ "card2/device/tile1/gt0",
+ },
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ "card0/lmem_total_bytes": []byte("8000"),
+ "card0/device/numa_node": []byte("1"),
+ "card1/device/vendor": []byte("0x8086"),
+ "card1/lmem_total_bytes": []byte("8000"),
+ "card1/device/numa_node": []byte("1"),
+ "card2/device/vendor": []byte("0x8086"),
+ "card2/lmem_total_bytes": []byte("8000"),
+ "card2/device/numa_node": []byte("1"),
+ },
+ name: "successful labeling with three cards and with xe driver",
+ expectedRetval: nil,
+ expectedLabels: labelMap{
+ "gpu.intel.com/millicores": "3000",
+ "gpu.intel.com/memory.max": "48000",
+ "gpu.intel.com/gpu-numbers": "0.1.2",
+ "gpu.intel.com/cards": "card0.card1.card2",
+ "gpu.intel.com/tiles": "6",
+ "gpu.intel.com/numa-gpu-map": "1-0.1.2",
+ },
+ },
+ {
+ sysfsdirs: []string{
+ "card0/device/drm/card0",
+ "card0/device/tile0/gt0",
+ "card0/device/tile0/gt1",
+ "card0/device/tile1/gt2",
+ "card0/device/tile1/gt3",
+ "card0/device/tile1/gt4",
+ "card0/device/tile1/gt5",
+ "card1/device/drm/card1",
+ "card1/device/tile0/gt0",
+ "card1/device/tile0/gt1",
+ "card1/device/tile1/gt2",
+ "card1/device/tile1/gt4",
+ },
+ sysfsfiles: map[string][]byte{
+ "card0/device/vendor": []byte("0x8086"),
+ "card0/lmem_total_bytes": []byte("8000"),
+ "card0/device/numa_node": []byte("1"),
+ "card1/device/vendor": []byte("0x8086"),
+ "card1/lmem_total_bytes": []byte("8000"),
+ "card1/device/numa_node": []byte("1"),
+ },
+ name: "successful labeling with two cards, two tiles per card and multiple gts per tile",
+ expectedRetval: nil,
+ expectedLabels: labelMap{
+ "gpu.intel.com/millicores": "2000",
+ "gpu.intel.com/memory.max": "32000",
+ "gpu.intel.com/gpu-numbers": "0.1",
+ "gpu.intel.com/cards": "card0.card1",
+ "gpu.intel.com/tiles": "4",
+ "gpu.intel.com/numa-gpu-map": "1-0.1",
+ },
+ },
}
}
diff --git a/cmd/internal/pluginutils/devicedriver.go b/cmd/internal/pluginutils/devicedriver.go
new file mode 100644
index 000000000..0c7cda3fa
--- /dev/null
+++ b/cmd/internal/pluginutils/devicedriver.go
@@ -0,0 +1,30 @@
+// Copyright 2024 Intel Corporation. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pluginutils
+
+import (
+ "os"
+ "path/filepath"
+)
+
+// Read driver for a device.
+func ReadDeviceDriver(path string) (string, error) {
+ linkpath, err := os.Readlink(filepath.Join(path, "device/driver"))
+ if err != nil {
+ return "", err
+ }
+
+ return filepath.Base(linkpath), nil
+}
diff --git a/cmd/internal/pluginutils/devicedriver_test.go b/cmd/internal/pluginutils/devicedriver_test.go
new file mode 100644
index 000000000..b72a1bdbc
--- /dev/null
+++ b/cmd/internal/pluginutils/devicedriver_test.go
@@ -0,0 +1,80 @@
+// Copyright 2024 Intel Corporation. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pluginutils
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func TestDeviceDriverSymlink(t *testing.T) {
+ root, err := os.MkdirTemp("", "test_devicedriver")
+ if err != nil {
+ t.Fatalf("can't create temporary directory: %+v", err)
+ }
+
+ defer os.RemoveAll(root)
+
+ err = os.Mkdir(filepath.Join(root, "i915"), 0777)
+ if err != nil {
+ t.Errorf("Failed to create required directory structure: %+v", err)
+ }
+
+ err = os.Mkdir(filepath.Join(root, "device"), 0777)
+ if err != nil {
+ t.Errorf("Failed to create required directory structure: %+v", err)
+ }
+
+ err = os.Symlink(filepath.Join(root, "i915"), filepath.Join(root, "device", "driver"))
+ if err != nil {
+ t.Errorf("Failed to create required directory structure: %+v", err)
+ }
+
+ driver, err := ReadDeviceDriver(root)
+
+ if err != nil {
+ t.Errorf("Got error when there shouldn't be any: %+v", err)
+ }
+
+ if driver != "i915" {
+ t.Errorf("Got invalid driver: %s", driver)
+ }
+}
+
+func TestDeviceDriverSymlinkError(t *testing.T) {
+ root, err := os.MkdirTemp("", "test_devicedriver")
+ if err != nil {
+ t.Fatalf("can't create temporary directory: %+v", err)
+ }
+
+ defer os.RemoveAll(root)
+
+ err = os.Mkdir(filepath.Join(root, "i915"), 0777)
+ if err != nil {
+ t.Errorf("Failed to create required directory structure: %+v", err)
+ }
+
+ err = os.MkdirAll(filepath.Join(root, "device", "driver"), 0777)
+ if err != nil {
+ t.Errorf("Failed to create required directory structure: %+v", err)
+ }
+
+ _, err = ReadDeviceDriver(root)
+
+ if err == nil {
+ t.Errorf("Got no error when there should be one")
+ }
+}
diff --git a/deployments/nfd/overlays/node-feature-rules/node-feature-rules.yaml b/deployments/nfd/overlays/node-feature-rules/node-feature-rules.yaml
index 7e32d4c2e..1ccc85ab5 100644
--- a/deployments/nfd/overlays/node-feature-rules/node-feature-rules.yaml
+++ b/deployments/nfd/overlays/node-feature-rules/node-feature-rules.yaml
@@ -57,9 +57,23 @@ spec:
matchExpressions:
vendor: {op: In, value: ["8086"]}
class: {op: In, value: ["0300", "0380"]}
- - feature: kernel.loadedmodule
- matchExpressions:
- i915: {op: Exists}
+ matchAny:
+ - matchFeatures:
+ - feature: kernel.loadedmodule
+ matchExpressions:
+ i915: {op: Exists}
+ - matchFeatures:
+ - feature: kernel.enabledmodule
+ matchExpressions:
+ i915: {op: Exists}
+ - matchFeatures:
+ - feature: kernel.loadedmodule
+ matchExpressions:
+ xe: {op: Exists}
+ - matchFeatures:
+ - feature: kernel.enabledmodule
+ matchExpressions:
+ xe: {op: Exists}
- name: "intel.iaa"
labels:
diff --git a/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml b/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml
index 69acf5898..3ce726271 100644
--- a/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml
+++ b/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml
@@ -27,8 +27,3 @@ spec:
- ALL
readOnlyRootFilesystem: true
runAsUser: 0
- - name: xpumd
- resources:
- limits:
- $patch: replace
- gpu.intel.com/i915_monitoring: 1
diff --git a/deployments/xpumanager_sidecar/kustomization.yaml b/deployments/xpumanager_sidecar/kustomization.yaml
index 728397536..a72b9631c 100644
--- a/deployments/xpumanager_sidecar/kustomization.yaml
+++ b/deployments/xpumanager_sidecar/kustomization.yaml
@@ -1,5 +1,5 @@
resources:
-- https://raw.githubusercontent.com/intel/xpumanager/V1.2.18/deployment/kubernetes/daemonset-intel-xpum.yaml
+- https://github.com/intel/xpumanager/deployment/kubernetes/daemonset/base/?ref=V1.2.29
namespace: monitoring
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
diff --git a/test/e2e/gpu/gpu.go b/test/e2e/gpu/gpu.go
index 52747673a..783d556cb 100644
--- a/test/e2e/gpu/gpu.go
+++ b/test/e2e/gpu/gpu.go
@@ -144,4 +144,57 @@ func describe() {
ginkgo.It("does nothing", func() {})
})
})
+
+ ginkgo.Context("When GPU resources are available [Resource:xe]", func() {
+ ginkgo.BeforeEach(func(ctx context.Context) {
+ ginkgo.By("checking if the resource is allocatable")
+ if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "gpu.intel.com/xe", 30*time.Second); err != nil {
+ framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err)
+ }
+ })
+ ginkgo.It("checks availability of GPU resources [App:busybox]", func(ctx context.Context) {
+ ginkgo.By("submitting a pod requesting GPU resources")
+ podSpec := &v1.Pod{
+ ObjectMeta: metav1.ObjectMeta{Name: "gpuplugin-tester"},
+ Spec: v1.PodSpec{
+ Containers: []v1.Container{
+ {
+ Args: []string{"-c", "ls /dev/dri"},
+ Name: containerName,
+ Image: imageutils.GetE2EImage(imageutils.BusyBox),
+ Command: []string{"/bin/sh"},
+ Resources: v1.ResourceRequirements{
+ Requests: v1.ResourceList{"gpu.intel.com/xe": resource.MustParse("1")},
+ Limits: v1.ResourceList{"gpu.intel.com/xe": resource.MustParse("1")},
+ },
+ },
+ },
+ RestartPolicy: v1.RestartPolicyNever,
+ },
+ }
+ pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(ctx, podSpec, metav1.CreateOptions{})
+ framework.ExpectNoError(err, "pod Create API error")
+
+ ginkgo.By("waiting the pod to finish successfully")
+ e2epod.NewPodClient(f).WaitForSuccess(ctx, pod.ObjectMeta.Name, 60*time.Second)
+
+ ginkgo.By("checking log output")
+ log, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName)
+
+ if err != nil {
+ framework.Failf("unable to get log from pod: %v", err)
+ }
+
+ if !strings.Contains(log, "card") || !strings.Contains(log, "renderD") {
+ framework.Logf("log output: %s", log)
+ framework.Failf("device mounts not found from log")
+ }
+
+ framework.Logf("found card and renderD from the log")
+ })
+
+ ginkgo.When("there is no app to run [App:noapp]", func() {
+ ginkgo.It("does nothing", func() {})
+ })
+ })
}