diff --git a/README.md b/README.md index 801a28007..b017a4243 100644 --- a/README.md +++ b/README.md @@ -231,7 +231,7 @@ The summary of resources available via plugins in this repository is given in th * `gpu.intel.com` : `i915` * [intelgpu-job.yaml](demo/intelgpu-job.yaml) * `iaa.intel.com` : `wq-user-[shared or dedicated]` - * [iaa-qpl-demo-pod.yaml](demo/iaa-qpl-demo-pod.yaml) + * [iaa-accel-config-demo-pod.yaml](demo/iaa-accel-config-demo-pod.yaml) * `qat.intel.com` : `generic` or `cy`/`dc`/`asym-dc`/`sym-dc` * [crypto-perf-dpdk-pod-requesting-qat.yaml](deployments/qat_dpdk_app/base/crypto-perf-dpdk-pod-requesting-qat.yaml) * `sgx.intel.com` : `epc` diff --git a/cmd/iaa_plugin/README.md b/cmd/iaa_plugin/README.md index d2f067a1f..965ea3e36 100644 --- a/cmd/iaa_plugin/README.md +++ b/cmd/iaa_plugin/README.md @@ -76,28 +76,28 @@ node1 ## Testing and Demos -We can test the plugin is working by deploying the provided example iaa-qpl-demo test image. +We can test the plugin is working by deploying the provided example accel-config-demo test image. 1. Build a Docker image with an accel-config tests: ```bash - $ make iaa-qpl-demo + $ make accel-config-demo ... - Successfully tagged intel/iaa-qpl-demo:devel + Successfully tagged intel/accel-config-demo:devel ``` 1. Create a pod running unit tests off the local Docker image: ```bash - $ kubectl apply -f ./demo/iaa-qpl-demo-pod.yaml - pod/iaa-qpl-demo created + $ kubectl apply -f ./demo/iaa-accel-config-demo-pod.yaml + pod/iaa-accel-config-demo created ``` 1. Wait until pod is completed: ```bash - $ kubectl get pods |grep iaa-qpl-demo - iaa-qpl-demo 0/1 Completed 0 31m + $ kubectl get pods |grep iaa-accel-config-demo + iaa-accel-config-demo 0/1 Completed 0 31m If the pod did not successfully launch, possibly because it could not obtain the IAA resource, it will be stuck in the `Pending` status: @@ -105,16 +105,16 @@ We can test the plugin is working by deploying the provided example iaa-qpl-demo ```bash $ kubectl get pods NAME READY STATUS RESTARTS AGE - iaa-qpl-demo 0/1 Pending 0 7s + iaa-accel-config-demo 0/1 Pending 0 7s ``` This can be verified by checking the Events of the pod: ```bash - $ kubectl describe pod iaa-qpl-demo | grep -A3 Events: + $ kubectl describe pod iaa-accel-config-demo | grep -A3 Events: Events: Type Reason Age From Message ---- ------ ---- ---- ------- - Warning FailedScheduling 2m26s default-scheduler 0/1 nodes are available: 1 Insufficient iaa.intel.com/wq-user-dedicated, 1 Insufficient iaa.intel.com/wq-user-shared. + Warning FailedScheduling 2m26s default-scheduler 0/1 nodes are available: 1 Insufficient iaa.intel.com/wq-user-dedicated. ``` diff --git a/demo/iaa-qpl-demo-pod.yaml b/demo/iaa-qpl-demo-pod.yaml deleted file mode 100644 index 4bdc267c8..000000000 --- a/demo/iaa-qpl-demo-pod.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: iaa-qpl-demo -spec: - containers: - - name: iaa-qpl-demo - image: intel/iaa-qpl-demo:devel - imagePullPolicy: IfNotPresent - resources: - limits: - iaa.intel.com/wq-user-dedicated: 1 - iaa.intel.com/wq-user-shared: 1 - cpu: 1 - - restartPolicy: Never diff --git a/deployments/nfd/overlays/node-feature-rules/platform-labeling-rules.yaml b/deployments/nfd/overlays/node-feature-rules/platform-labeling-rules.yaml index 37562c89b..69884553a 100644 --- a/deployments/nfd/overlays/node-feature-rules/platform-labeling-rules.yaml +++ b/deployments/nfd/overlays/node-feature-rules/platform-labeling-rules.yaml @@ -16,8 +16,7 @@ spec: gpu.intel.com/tiles: {op: Exists} name: intel.gpu.fractionalresources # generic rule for older and upcoming devices - - labels: - labelsTemplate: | + - labelsTemplate: | {{ range .pci.device }}gpu.intel.com/device-id.{{ .class }}-{{ .device }}.present=true {{ end }} matchFeatures: @@ -33,8 +32,7 @@ spec: value: - "8086" name: intel.gpu.generic.deviceid - - labels: - labelsTemplate: gpu.intel.com/device-id.0300-{{ (index .pci.device 0).device }}.count={{ len .pci.device }} + - labelsTemplate: gpu.intel.com/device-id.0300-{{ (index .pci.device 0).device }}.count={{ len .pci.device }} matchFeatures: - feature: pci.device matchExpressions: @@ -47,8 +45,7 @@ spec: value: - "8086" name: intel.gpu.generic.count.300 - - labels: - labelsTemplate: gpu.intel.com/device-id.0380-{{ (index .pci.device 0).device }}.count={{ len .pci.device }} + - labelsTemplate: gpu.intel.com/device-id.0380-{{ (index .pci.device 0).device }}.count={{ len .pci.device }} matchFeatures: - feature: pci.device matchExpressions: diff --git a/pkg/controllers/dlb/controller.go b/pkg/controllers/dlb/controller.go index 8e250ba3d..5d4f01460 100644 --- a/pkg/controllers/dlb/controller.go +++ b/pkg/controllers/dlb/controller.go @@ -118,8 +118,11 @@ func (c *controller) UpdateDaemonSet(rawObj client.Object, ds *apps.DaemonSet) ( updated = true } } else { - setInitContainer(&ds.Spec.Template.Spec, dp.Spec) - updated = true + containers := ds.Spec.Template.Spec.InitContainers + if len(containers) != 1 || containers[0].Image != dp.Spec.InitImage { + setInitContainer(&ds.Spec.Template.Spec, dp.Spec) + updated = true + } } if len(dp.Spec.NodeSelector) > 0 { diff --git a/pkg/controllers/dsa/controller.go b/pkg/controllers/dsa/controller.go index 4cd504019..cc71a823f 100644 --- a/pkg/controllers/dsa/controller.go +++ b/pkg/controllers/dsa/controller.go @@ -35,8 +35,9 @@ import ( ) const ( - ownerKey = ".metadata.controller.dsa" - inicontainerName = "intel-idxd-config-initcontainer" + ownerKey = ".metadata.controller.dsa" + initcontainerName = "intel-idxd-config-initcontainer" + configVolumeName = "intel-dsa-config-volume" ) var defaultNodeSelector = deployments.DSAPluginDaemonSet().Spec.Template.Spec.NodeSelector @@ -87,7 +88,7 @@ func removeInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.DsaDevicePlugin) newInitContainers := []v1.Container{} for _, container := range ds.Spec.Template.Spec.InitContainers { - if container.Name == inicontainerName { + if container.Name == initcontainerName { continue } @@ -98,7 +99,7 @@ func removeInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.DsaDevicePlugin) newVolumes := []v1.Volume{} for _, volume := range ds.Spec.Template.Spec.Volumes { - if volume.Name == "intel-dsa-config-volume" || volume.Name == "sys-bus-dsa" || volume.Name == "sys-devices" || volume.Name == "scratch" { + if volume.Name == configVolumeName || volume.Name == "sys-bus-dsa" || volume.Name == "sys-devices" || volume.Name == "scratch" { continue } @@ -114,7 +115,7 @@ func addInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.DsaDevicePlugin) { ds.Spec.Template.Spec.InitContainers = append(ds.Spec.Template.Spec.InitContainers, v1.Container{ Image: dp.Spec.InitImage, ImagePullPolicy: "IfNotPresent", - Name: inicontainerName, + Name: initcontainerName, Env: []v1.EnvVar{ { Name: "NODE_NAME", @@ -176,7 +177,7 @@ func addInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.DsaDevicePlugin) { if dp.Spec.ProvisioningConfig != "" { ds.Spec.Template.Spec.Volumes = append(ds.Spec.Template.Spec.Volumes, v1.Volume{ - Name: "intel-dsa-config-volume", + Name: configVolumeName, VolumeSource: v1.VolumeSource{ ConfigMap: &v1.ConfigMapVolumeSource{ LocalObjectReference: v1.LocalObjectReference{Name: dp.Spec.ProvisioningConfig}}, @@ -184,9 +185,9 @@ func addInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.DsaDevicePlugin) { }) for i, initcontainer := range ds.Spec.Template.Spec.InitContainers { - if initcontainer.Name == inicontainerName { + if initcontainer.Name == initcontainerName { ds.Spec.Template.Spec.InitContainers[i].VolumeMounts = append(ds.Spec.Template.Spec.InitContainers[i].VolumeMounts, v1.VolumeMount{ - Name: "intel-dsa-config-volume", + Name: configVolumeName, MountPath: "/idxd-init/conf", }) } @@ -218,16 +219,19 @@ func provisioningUpdate(ds *apps.DaemonSet, dp *devicepluginv1.DsaDevicePlugin) found := false for _, container := range ds.Spec.Template.Spec.InitContainers { - if container.Name == inicontainerName && container.Image != dp.Spec.InitImage { + if container.Name == initcontainerName { + if container.Image != dp.Spec.InitImage { + update = true + } + found = true - update = true break } } for _, volume := range ds.Spec.Template.Spec.Volumes { - if volume.Name == "intel-dsa-config-volume" && volume.ConfigMap.Name != dp.Spec.ProvisioningConfig { + if volume.Name == configVolumeName && volume.ConfigMap.Name != dp.Spec.ProvisioningConfig { update = true break } diff --git a/pkg/controllers/gpu/controller.go b/pkg/controllers/gpu/controller.go index ca408b52d..45bbcf836 100644 --- a/pkg/controllers/gpu/controller.go +++ b/pkg/controllers/gpu/controller.go @@ -242,32 +242,54 @@ func removeVolumeMount(volumeMounts []v1.VolumeMount, name string) []v1.VolumeMo return newVolumeMounts } -func (c *controller) UpdateDaemonSet(rawObj client.Object, ds *apps.DaemonSet) (updated bool) { - dp := rawObj.(*devicepluginv1.GpuDevicePlugin) - - if ds.Spec.Template.Spec.Containers[0].Image != dp.Spec.Image { - ds.Spec.Template.Spec.Containers[0].Image = dp.Spec.Image - updated = true - } +func processInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.GpuDevicePlugin) bool { + initContainers := ds.Spec.Template.Spec.InitContainers if dp.Spec.InitImage == "" { - if ds.Spec.Template.Spec.InitContainers != nil { + if initContainers != nil { ds.Spec.Template.Spec.InitContainers = nil ds.Spec.Template.Spec.Volumes = removeVolume(ds.Spec.Template.Spec.Volumes, "nfd-features") - updated = true + + return true } - } else { + } else if len(initContainers) != 1 || initContainers[0].Image != dp.Spec.InitImage { setInitContainer(&ds.Spec.Template.Spec, dp.Spec.InitImage) - updated = true + + return true } + return false +} + +func processNodeSelector(ds *apps.DaemonSet, dp *devicepluginv1.GpuDevicePlugin) bool { if len(dp.Spec.NodeSelector) > 0 { if !reflect.DeepEqual(ds.Spec.Template.Spec.NodeSelector, dp.Spec.NodeSelector) { ds.Spec.Template.Spec.NodeSelector = dp.Spec.NodeSelector - updated = true + + return true } } else if !reflect.DeepEqual(ds.Spec.Template.Spec.NodeSelector, defaultNodeSelector) { ds.Spec.Template.Spec.NodeSelector = defaultNodeSelector + + return true + } + + return false +} + +func (c *controller) UpdateDaemonSet(rawObj client.Object, ds *apps.DaemonSet) (updated bool) { + dp := rawObj.(*devicepluginv1.GpuDevicePlugin) + + if ds.Spec.Template.Spec.Containers[0].Image != dp.Spec.Image { + ds.Spec.Template.Spec.Containers[0].Image = dp.Spec.Image + updated = true + } + + if processInitContainer(ds, dp) { + updated = true + } + + if processNodeSelector(ds, dp) { updated = true } diff --git a/pkg/controllers/iaa/controller.go b/pkg/controllers/iaa/controller.go index b9aa05fc4..f0dd5112d 100644 --- a/pkg/controllers/iaa/controller.go +++ b/pkg/controllers/iaa/controller.go @@ -35,8 +35,9 @@ import ( ) const ( - ownerKey = ".metadata.controller.iaa" - inicontainerName = "intel-iaa-initcontainer" + ownerKey = ".metadata.controller.iaa" + initcontainerName = "intel-iaa-initcontainer" + configVolumeName = "intel-iaa-config-volume" ) // +kubebuilder:rbac:groups=deviceplugin.intel.com,resources=iaadeviceplugins,verbs=get;list;watch;create;update;patch;delete @@ -85,7 +86,7 @@ func removeInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.IaaDevicePlugin) newInitContainers := []v1.Container{} for _, container := range ds.Spec.Template.Spec.InitContainers { - if container.Name == inicontainerName { + if container.Name == initcontainerName { continue } @@ -97,7 +98,7 @@ func removeInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.IaaDevicePlugin) newVolumes := []v1.Volume{} for _, volume := range ds.Spec.Template.Spec.Volumes { - if volume.Name == "intel-iaa-config-volume" || volume.Name == "sys-bus-dsa" || volume.Name == "sys-devices" || volume.Name == "scratch" { + if volume.Name == configVolumeName || volume.Name == "sys-bus-dsa" || volume.Name == "sys-devices" || volume.Name == "scratch" { continue } @@ -113,7 +114,7 @@ func addInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.IaaDevicePlugin) { ds.Spec.Template.Spec.InitContainers = append(ds.Spec.Template.Spec.InitContainers, v1.Container{ Image: dp.Spec.InitImage, ImagePullPolicy: "IfNotPresent", - Name: inicontainerName, + Name: initcontainerName, Env: []v1.EnvVar{ { Name: "NODE_NAME", @@ -175,7 +176,7 @@ func addInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.IaaDevicePlugin) { if dp.Spec.ProvisioningConfig != "" { ds.Spec.Template.Spec.Volumes = append(ds.Spec.Template.Spec.Volumes, v1.Volume{ - Name: "intel-iaa-config-volume", + Name: configVolumeName, VolumeSource: v1.VolumeSource{ ConfigMap: &v1.ConfigMapVolumeSource{ LocalObjectReference: v1.LocalObjectReference{Name: dp.Spec.ProvisioningConfig}}, @@ -183,9 +184,9 @@ func addInitContainer(ds *apps.DaemonSet, dp *devicepluginv1.IaaDevicePlugin) { }) for i, initcontainer := range ds.Spec.Template.Spec.InitContainers { - if initcontainer.Name == inicontainerName { + if initcontainer.Name == initcontainerName { ds.Spec.Template.Spec.InitContainers[i].VolumeMounts = append(ds.Spec.Template.Spec.InitContainers[i].VolumeMounts, v1.VolumeMount{ - Name: "intel-iaa-config-volume", + Name: configVolumeName, MountPath: "/idxd-init/conf", }) } @@ -219,16 +220,19 @@ func provisioningUpdate(ds *apps.DaemonSet, dp *devicepluginv1.IaaDevicePlugin) found := false for _, container := range ds.Spec.Template.Spec.InitContainers { - if container.Name == "intel-iaa-initcontainer" && container.Image != dp.Spec.InitImage { + if container.Name == initcontainerName { + if container.Image != dp.Spec.InitImage { + update = true + } + found = true - update = true break } } for _, volume := range ds.Spec.Template.Spec.Volumes { - if volume.Name == "intel-iaa-config-volume" && volume.ConfigMap.Name != dp.Spec.ProvisioningConfig { + if volume.Name == configVolumeName && volume.ConfigMap.Name != dp.Spec.ProvisioningConfig { update = true break diff --git a/pkg/controllers/qat/controller.go b/pkg/controllers/qat/controller.go index e1b8718f7..d1019dc62 100644 --- a/pkg/controllers/qat/controller.go +++ b/pkg/controllers/qat/controller.go @@ -111,7 +111,12 @@ func (c *controller) NewDaemonSet(rawObj client.Object) *apps.DaemonSet { func (c *controller) UpdateDaemonSet(rawObj client.Object, ds *apps.DaemonSet) (updated bool) { dp := rawObj.(*devicepluginv1.QatDevicePlugin) - if !reflect.DeepEqual(ds.ObjectMeta.Annotations, dp.ObjectMeta.Annotations) { + // Remove always incrementing annotation so it doesn't cause the next DeepEqual + // to return false every time. + dsAnnotations := ds.ObjectMeta.DeepCopy().Annotations + delete(dsAnnotations, "deprecated.daemonset.template.generation") + + if !reflect.DeepEqual(dsAnnotations, dp.ObjectMeta.Annotations) { pluginAnnotations := dp.ObjectMeta.DeepCopy().Annotations ds.ObjectMeta.Annotations = pluginAnnotations ds.Spec.Template.Annotations = pluginAnnotations @@ -131,8 +136,12 @@ func (c *controller) UpdateDaemonSet(rawObj client.Object, ds *apps.DaemonSet) ( updated = true } } else { - setInitContainer(&ds.Spec.Template.Spec, dp.Spec) - updated = true + containers := ds.Spec.Template.Spec.InitContainers + if len(containers) != 1 || containers[0].Image != dp.Spec.InitImage { + setInitContainer(&ds.Spec.Template.Spec, dp.Spec) + + updated = true + } } if len(dp.Spec.NodeSelector) > 0 { diff --git a/pkg/controllers/reconciler.go b/pkg/controllers/reconciler.go index 4a3d41c33..420241157 100644 --- a/pkg/controllers/reconciler.go +++ b/pkg/controllers/reconciler.go @@ -39,7 +39,7 @@ import ( var ( bKeeper = &bookKeeper{} - ImageMinVersion = versionutil.MustParseSemantic("0.27.0") + ImageMinVersion = versionutil.MustParseSemantic("0.28.0") ) func init() {