diff --git a/Jenkinsfile b/Jenkinsfile index 34bcaaf66..977cff63f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -142,7 +142,7 @@ pipeline { stage('make test-with-kind') { steps { dir(path: "$REPO_DIR") { - sh "make test-with-kind REG=intel/ TAG=0.26.0" + sh "make test-with-kind REG=intel/ TAG=0.26.1" } } } diff --git a/Makefile b/Makefile index 66212e8d1..ee80d8c5c 100644 --- a/Makefile +++ b/Makefile @@ -136,7 +136,7 @@ clean: ORG?=intel REG?=$(ORG)/ -TAG?=0.26.0 +TAG?=0.26.1 export TAG e2e-fpga: diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index af48bc740..d820974b2 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -19,6 +19,7 @@ import ( "fmt" "os" "path" + "path/filepath" "regexp" "sort" "strings" @@ -39,6 +40,7 @@ const ( devfsDriDirectory = "/dev/dri" gpuDeviceRE = `^card[0-9]+$` controlDeviceRE = `^controlD[0-9]+$` + pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$" vendorString = "0x8086" // Device plugin settings. @@ -145,32 +147,86 @@ func packedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string { return deviceIds } +// Returns a slice of by-path Mounts for a cardPath&Name. +// by-path files are searched from the given bypathDir. +// In the by-path dir, any files that start with "pci-" will be added to mounts. +func (dp *devicePlugin) bypathMountsForPci(cardPath, cardName, bypathDir string) []pluginapi.Mount { + linkPath, err := os.Readlink(cardPath) + if err != nil { + return nil + } + + // Fetches the pci address for a drm card by reading the + // symbolic link that the /sys/class/drm/cardX points to. + // ../../devices/pci0000:00/0000:00:02.0/drm/card + // -------------------------^^^^^^^^^^^^---------. + pciAddress := filepath.Base(strings.TrimSuffix(linkPath, filepath.Join("drm", cardName))) + + if !dp.pciAddressReg.MatchString(pciAddress) { + klog.Warningf("Invalid pci address for %s: %s", cardPath, pciAddress) + + return nil + } + + files, err := os.ReadDir(bypathDir) + if err != nil { + klog.Warningf("Failed to read by-path directory: %+v", err) + + return nil + } + + linkPrefix := "pci-" + pciAddress + + var mounts []pluginapi.Mount + + for _, f := range files { + if strings.HasPrefix(f.Name(), linkPrefix) { + absPath := path.Join(bypathDir, f.Name()) + + mounts = append(mounts, pluginapi.Mount{ + ContainerPath: absPath, + HostPath: absPath, + ReadOnly: true, + }) + } + } + + return mounts +} + type devicePlugin struct { gpuDeviceReg *regexp.Regexp controlDeviceReg *regexp.Regexp + pciAddressReg *regexp.Regexp scanTicker *time.Ticker scanDone chan bool resMan rm.ResourceManager - sysfsDir string - devfsDir string + sysfsDir string + devfsDir string + bypathDir string // Note: If restarting the plugin with a new policy, the allocations for existing pods remain with old policy. policy preferredAllocationPolicyFunc options cliOptions + + bypathFound bool } func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugin { dp := &devicePlugin{ sysfsDir: sysfsDir, devfsDir: devfsDir, + bypathDir: path.Join(devfsDir, "/by-path"), options: options, gpuDeviceReg: regexp.MustCompile(gpuDeviceRE), controlDeviceReg: regexp.MustCompile(controlDeviceRE), + pciAddressReg: regexp.MustCompile(pciAddressRE), scanTicker: time.NewTicker(scanPeriod), scanDone: make(chan bool, 1), // buffered as we may send to it before Scan starts receiving from it + bypathFound: true, } if options.resourceManagement { @@ -192,6 +248,12 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi dp.policy = nonePolicy } + if _, err := os.ReadDir(dp.bypathDir); err != nil { + klog.Warningf("failed to read by-path dir: $+v", err) + + dp.bypathFound = false + } + return dp } @@ -299,7 +361,9 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { continue } - drmFiles, err := os.ReadDir(path.Join(dp.sysfsDir, f.Name(), "device/drm")) + cardPath := path.Join(dp.sysfsDir, f.Name()) + + drmFiles, err := os.ReadDir(path.Join(cardPath, "device/drm")) if err != nil { return nil, errors.Wrap(err, "Can't read device folder") } @@ -338,7 +402,12 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { } if len(nodes) > 0 { - deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil) + mounts := []pluginapi.Mount{} + if dp.bypathFound { + mounts = dp.bypathMountsForPci(cardPath, f.Name(), dp.bypathDir) + } + + deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil, nil) for i := 0; i < dp.options.sharedDevNum; i++ { devID := fmt.Sprintf("%s-%d", f.Name(), i) @@ -346,7 +415,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { // TODO: check model ID to differentiate device models. devTree.AddDevice(deviceType, devID, deviceInfo) - rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil) + rmDevInfos[devID] = rm.NewDeviceInfo(nodes, mounts, nil) } } } diff --git a/cmd/gpu_plugin/gpu_plugin_test.go b/cmd/gpu_plugin/gpu_plugin_test.go index a4c304605..62536093d 100644 --- a/cmd/gpu_plugin/gpu_plugin_test.go +++ b/cmd/gpu_plugin/gpu_plugin_test.go @@ -18,11 +18,13 @@ import ( "flag" "os" "path" + "path/filepath" "reflect" "testing" "github.com/pkg/errors" "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + "k8s.io/utils/strings/slices" "github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm" dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" @@ -43,6 +45,7 @@ type mockNotifier struct { func (n *mockNotifier) Notify(newDeviceTree dpapi.DeviceTree) { n.monitorCount = len(newDeviceTree[monitorType]) n.devCount = len(newDeviceTree[deviceType]) + n.scanDone <- true } @@ -190,7 +193,11 @@ func TestScan(t *testing.T) { sysfsfiles: map[string][]byte{ "card0/device/vendor": []byte("0x8086"), }, - devfsdirs: []string{"card0"}, + devfsdirs: []string{ + "card0", + "by-path/pci-0000:00:00.0-card", + "by-path/pci-0000:00:00.0-render", + }, expectedDevs: 1, }, { @@ -314,3 +321,126 @@ func TestScan(t *testing.T) { }) } } + +// Would be nice to combine these with the overall Scan unit tests. +func createBypathTestFiles(t *testing.T, card, root, linkFile string, bypathFiles []string) (string, string) { + drmPath := path.Join(root, "sys/class/drm/", card) + devPath := path.Join(root, "sys", linkFile) + byPath := path.Join(root, "by-path") + + if linkFile != "" { + if err := os.MkdirAll(filepath.Dir(devPath), os.ModePerm); err != nil { + t.Fatal("Couldn't create test dev dir", err) + } + + if err := os.MkdirAll(filepath.Dir(drmPath), os.ModePerm); err != nil { + t.Fatal("Couldn't create test drm dir", err) + } + + if err := os.WriteFile(devPath, []byte{0}, os.ModePerm); err != nil { + t.Fatal("Couldn't create card file", err) + } + + if err := os.Symlink(devPath, drmPath); err != nil { + t.Fatal("Couldn't create symlink between pci path and sysfs drm path") + } + } + + if len(bypathFiles) > 0 { + if err := os.MkdirAll(byPath, os.ModePerm); err != nil { + t.Fatal("Mkdir failed:", byPath) + } + + for _, f := range bypathFiles { + if err := os.WriteFile(path.Join(byPath, f), []byte{1}, os.ModePerm); err != nil { + t.Fatal("WriteFile failed:", path.Join(byPath, f)) + } + } + } + + return drmPath, byPath +} + +func TestBypath(t *testing.T) { + type testData struct { + desc string + linkpath string + bypathFiles []string + mountCount int + } + + const cardName string = "card0" + + tds := []testData{ + { + "card with two by-path files", + "00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm/" + cardName, + []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + 2, + }, + { + "different by-path files", + "00.10.2/00.334.302/0.0.1.00/0000:ff:05.0/drm/" + cardName, + []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + 0, + }, + { + "invalid pci address", + "00.10.2/00.334.302/0.0.1.00/000:ff:05.1/drm/" + cardName, + []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + 0, + }, + { + "symlink without card", + "00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm", + []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + 0, + }, + { + "no symlink", + "", + []string{"pci-0000:0f:05.0-card", "pci-0000:0f:05.0-render"}, + 0, + }, + { + "no by-path files", + "00.10.2/00.334.302/0.0.1.00/0000:0f:05.0/drm/" + cardName, + []string{}, + 0, + }, + } + + for _, td := range tds { + root, err := os.MkdirTemp("", "test_bypath_mounting") + if err != nil { + t.Fatalf("can't create temporary directory: %+v", err) + } + // dirs/files need to be removed for the next test + defer os.RemoveAll(root) + + plugin := newDevicePlugin("/", "/", cliOptions{}) + + drmPath, byPath := createBypathTestFiles(t, cardName, root, td.linkpath, td.bypathFiles) + + mounts := plugin.bypathMountsForPci(drmPath, cardName, byPath) + + if len(mounts) != td.mountCount { + t.Errorf("%s: Wrong number of mounts %d vs. %d", td.desc, len(mounts), td.mountCount) + } + + absPaths := []string{} + for _, link := range td.bypathFiles { + absPaths = append(absPaths, path.Join(byPath, link)) + } + + for _, mount := range mounts { + if !slices.Contains(absPaths, mount.ContainerPath) { + t.Errorf("%s: containerpath is incorrect: %s", td.desc, mount.ContainerPath) + } + + if !slices.Contains(absPaths, mount.HostPath) { + t.Errorf("%s: hostpath is incorrect: %s", td.desc, mount.HostPath) + } + } + } +} diff --git a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go index 159e6de04..94a66968b 100644 --- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go +++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go @@ -16,6 +16,14 @@ package rm import ( "context" + "crypto/rand" + "crypto/tls" + "crypto/x509" + "encoding/json" + "io" + "math/big" + "net" + "net/http" "os" "sort" "strconv" @@ -35,6 +43,7 @@ import ( pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" podresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1" "k8s.io/kubernetes/pkg/kubelet/apis/podresources" + "k8s.io/utils/strings/slices" ) const ( @@ -47,6 +56,13 @@ const ( grpcAddress = "unix:///var/lib/kubelet/pod-resources/kubelet.sock" grpcBufferSize = 4 * 1024 * 1024 grpcTimeout = 5 * time.Second + + kubeletAPITimeout = 5 * time.Second + kubeletAPIMaxRetries = 5 + kubeletHTTPSCertPath = "/var/lib/kubelet/pki/kubelet.crt" + // This is detected incorrectly as credentials + //nolint:gosec + serviceAccountTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token" ) // Errors. @@ -100,12 +116,14 @@ type resourceManager struct { prGetClientFunc getClientFunc assignments map[string]podAssignmentDetails // pod name -> assignment details nodeName string + hostIP string skipID string fullResourceName string retryTimeout time.Duration cleanupInterval time.Duration mutex sync.RWMutex // for devTree updates during scan cleanupMutex sync.RWMutex // for assignment details during cleanup + useKubelet bool } // NewDeviceInfo creates a new DeviceInfo. @@ -135,30 +153,50 @@ func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error rm := resourceManager{ nodeName: os.Getenv("NODE_NAME"), + hostIP: os.Getenv("HOST_IP"), clientset: clientset, skipID: skipID, fullResourceName: fullResourceName, prGetClientFunc: podresources.GetV1Client, assignments: make(map[string]podAssignmentDetails), retryTimeout: 1 * time.Second, - cleanupInterval: 2 * time.Minute, + cleanupInterval: 20 * time.Minute, + useKubelet: true, } klog.Info("GPU device plugin resource manager enabled") + // Try listing Pods once to detect if Kubelet API works + _, err = rm.listPodsFromKubelet() + + if err != nil { + klog.V(2).Info("Not using Kubelet API") + + rm.useKubelet = false + } else { + klog.V(2).Info("Using Kubelet API") + } + go func() { - ticker := time.NewTicker(rm.cleanupInterval) + getRandDuration := func() time.Duration { + cleanupIntervalSeconds := int(rm.cleanupInterval.Seconds()) + + n, _ := rand.Int(rand.Reader, big.NewInt(int64(cleanupIntervalSeconds))) + + return rm.cleanupInterval/2 + time.Duration(n.Int64())*time.Second + } + + ticker := time.NewTicker(getRandDuration()) for range ticker.C { klog.V(4).Info("Running cleanup") + ticker.Reset(getRandDuration()) + // Gather both running and pending pods. It might happen that // cleanup is triggered between GetPreferredAllocation and Allocate // and it would remove the assignment data for the soon-to-be allocated pod - running := rm.listPodsOnNodeWithState(string(v1.PodRunning)) - for podName, podItem := range rm.listPodsOnNodeWithState(string(v1.PodPending)) { - running[podName] = podItem - } + running := rm.listPodsOnNodeWithStates([]string{string(v1.PodRunning), string(v1.PodPending)}) func() { rm.cleanupMutex.Lock() @@ -189,20 +227,129 @@ func getPodResourceKey(res *podresourcesv1.PodResources) string { return res.Namespace + "&" + res.Name } -func (rm *resourceManager) listPodsOnNodeWithState(state string) map[string]*v1.Pod { - pods := make(map[string]*v1.Pod) - - selector, err := fields.ParseSelector("spec.nodeName=" + rm.nodeName + - ",status.phase=" + state) +func (rm *resourceManager) listPodsFromAPIServer() (*v1.PodList, error) { + selector, err := fields.ParseSelector("spec.nodeName=" + rm.nodeName) if err != nil { - return pods + return &v1.PodList{}, err } + klog.V(4).Info("Requesting pods from API server") + podList, err := rm.clientset.CoreV1().Pods(v1.NamespaceAll).List(context.Background(), metav1.ListOptions{ FieldSelector: selector.String(), }) + if err != nil { + klog.Error("pod listing failed:", err) + + if err != nil { + return &v1.PodList{}, err + } + } + + return podList, nil +} + +// +kubebuilder:rbac:groups="",resources=nodes/proxy,verbs=list;get + +func (rm *resourceManager) listPodsFromKubelet() (*v1.PodList, error) { + var podList v1.PodList + + token, err := os.ReadFile(serviceAccountTokenPath) + if err != nil { + klog.Warning("Failed to read token for kubelet API access: ", err) + + return &podList, err + } + + kubeletCert, err := os.ReadFile(kubeletHTTPSCertPath) + if err != nil { + klog.Warning("Failed to read kubelet cert: ", err) + + return &podList, err + } + + certPool := x509.NewCertPool() + certPool.AppendCertsFromPEM(kubeletCert) + + // There isn't an official documentation for the kubelet API. There is a blog post: + // https://www.deepnetwork.com/blog/2020/01/13/kubelet-api.html + // And a tool to work with the API: + // https://github.com/cyberark/kubeletctl + + kubeletURL := "https://" + rm.hostIP + ":10250/pods" + req, _ := http.NewRequestWithContext(context.Background(), "GET", kubeletURL, nil) + req.Header.Set("Authorization", "Bearer "+string(token)) + + tr := &http.Transport{ + TLSClientConfig: &tls.Config{ + MinVersion: tls.VersionTLS12, + RootCAs: certPool, + ServerName: rm.nodeName, + }, + } + client := &http.Client{ + Timeout: kubeletAPITimeout, + Transport: tr, + } + + klog.V(4).Infof("Requesting pods from kubelet (%s)", kubeletURL) + + resp, err := (*client).Do(req) + if err != nil { + klog.Warning("Failed to read pods from kubelet API: ", err) + + return &podList, err + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + klog.Warning("Failed to read http response body: ", err) + + return &podList, err + } + + resp.Body.Close() + + err = json.Unmarshal(body, &podList) + if err != nil { + klog.Warning("Failed to unmarshal PodList from response: ", err) + + return &podList, err + } + + return &podList, nil +} + +func (rm *resourceManager) listPods() (*v1.PodList, error) { + // Try to use kubelet API as long as it provides listings within retries + if rm.useKubelet { + var neterr net.Error + + for i := 0; i < kubeletAPIMaxRetries; i++ { + if podList, err := rm.listPodsFromKubelet(); err == nil { + return podList, nil + } else if errors.As(err, neterr); neterr.Timeout() { + continue + } + + // If error is non-timeout, break to stop using kubelet API + break + } + + klog.Warning("Stopping Kubelet API use due to error/timeout") + + rm.useKubelet = false + } + + return rm.listPodsFromAPIServer() +} + +func (rm *resourceManager) listPodsOnNodeWithStates(states []string) map[string]*v1.Pod { + pods := make(map[string]*v1.Pod) + + podList, err := rm.listPods() if err != nil { klog.Error("pod listing failed:", err) @@ -210,8 +357,11 @@ func (rm *resourceManager) listPodsOnNodeWithState(state string) map[string]*v1. } for i := range podList.Items { - key := getPodKey(&podList.Items[i]) - pods[key] = &podList.Items[i] + phase := string(podList.Items[i].Status.Phase) + if slices.Contains(states, phase) { + key := getPodKey(&podList.Items[i]) + pods[key] = &podList.Items[i] + } } return pods @@ -516,7 +666,7 @@ func (rm *resourceManager) findAllocationPodCandidate() (*podCandidate, error) { // getNodePendingGPUPods returns a map of pod names -> pods that are pending and use the gpu. func (rm *resourceManager) getNodePendingGPUPods() (map[string]*v1.Pod, error) { - pendingPods := rm.listPodsOnNodeWithState(string(v1.PodPending)) + pendingPods := rm.listPodsOnNodeWithStates([]string{string(v1.PodPending)}) for podName, pod := range pendingPods { if numGPUUsingContainers(pod, rm.fullResourceName) == 0 { diff --git a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go index 5e161e3d2..cc5e6f542 100644 --- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go +++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go @@ -105,6 +105,7 @@ func newMockResourceManager(pods []v1.Pod) ResourceManager { fullResourceName: "gpu.intel.com/i915", assignments: make(map[string]podAssignmentDetails), retryTimeout: 1 * time.Millisecond, + useKubelet: false, } deviceInfoMap := NewDeviceInfoMap() @@ -168,6 +169,9 @@ func TestGetPreferredFractionalAllocation(t *testing.T) { }, }, }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, } gpuLessTestPod := v1.Pod{ @@ -326,6 +330,9 @@ func TestCreateFractionalResourceResponse(t *testing.T) { }, }, }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, } unAnnotatedTestPod := *properTestPod.DeepCopy() unAnnotatedTestPod.ObjectMeta.Annotations = nil @@ -458,6 +465,9 @@ func TestCreateFractionalResourceResponseWithOneCardTwoTiles(t *testing.T) { }, }, }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, } properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{ @@ -521,6 +531,9 @@ func TestCreateFractionalResourceResponseWithTwoCardsOneTile(t *testing.T) { }, }, }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, } properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{ @@ -589,6 +602,9 @@ func TestCreateFractionalResourceResponseWithThreeCardsTwoTiles(t *testing.T) { }, }, }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, } properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{ @@ -664,6 +680,9 @@ func TestCreateFractionalResourceResponseWithMultipleContainersTileEach(t *testi }, }, }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, } properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{ diff --git a/demo/crypto-perf/Dockerfile b/demo/crypto-perf/Dockerfile index 1faa82c6c..0c50dd11d 100644 --- a/demo/crypto-perf/Dockerfile +++ b/demo/crypto-perf/Dockerfile @@ -6,7 +6,6 @@ WORKDIR $DIR RUN echo "deb-src http://deb.debian.org/debian unstable main" >> \ /etc/apt/sources.list.d/deb-src.list RUN apt-get update && apt-get install -y wget build-essential meson python3-pyelftools libnuma-dev python3-pip libssl-dev pkg-config dpkg-dev -RUN pip install ninja # Download & unpack DPDK tarball ARG DPDK_TARBALL=dpdk-22.11.tar.xz diff --git a/demo/dlb-dpdk-demo/Dockerfile b/demo/dlb-dpdk-demo/Dockerfile index ad57f08c4..6bedf224e 100644 --- a/demo/dlb-dpdk-demo/Dockerfile +++ b/demo/dlb-dpdk-demo/Dockerfile @@ -4,7 +4,6 @@ ARG DIR=/dpdk-build WORKDIR $DIR RUN apt-get update && apt-get install -y wget build-essential meson python3-pyelftools libnuma-dev python3-pip -RUN pip install ninja # Download & unpack DLB tarball ARG DLB_TARBALL="dlb_linux_src_release_7.7.0_2022_06_17.txz" diff --git a/demo/dlb-libdlb-demo-pf-pod.yaml b/demo/dlb-libdlb-demo-pf-pod.yaml index 4413ed643..83ce510d2 100644 --- a/demo/dlb-libdlb-demo-pf-pod.yaml +++ b/demo/dlb-libdlb-demo-pf-pod.yaml @@ -6,7 +6,7 @@ spec: restartPolicy: Never containers: - name: dlb-libdlb-demo-pf-pod - image: intel/dlb-libdlb-demo:0.26.0 + image: intel/dlb-libdlb-demo:0.26.1 imagePullPolicy: IfNotPresent resources: limits: diff --git a/demo/dlb-libdlb-demo-pod.yaml b/demo/dlb-libdlb-demo-pod.yaml index e783f1f97..34a66d33b 100644 --- a/demo/dlb-libdlb-demo-pod.yaml +++ b/demo/dlb-libdlb-demo-pod.yaml @@ -6,7 +6,7 @@ spec: restartPolicy: Never containers: - name: pf - image: intel/dlb-libdlb-demo:0.26.0 + image: intel/dlb-libdlb-demo:0.26.1 imagePullPolicy: IfNotPresent resources: limits: @@ -18,7 +18,7 @@ spec: cpu: 1 memory: 200Mi - name: vf - image: intel/dlb-libdlb-demo:0.26.0 + image: intel/dlb-libdlb-demo:0.26.1 imagePullPolicy: IfNotPresent resources: limits: diff --git a/demo/dlb-libdlb-demo-vf-pod.yaml b/demo/dlb-libdlb-demo-vf-pod.yaml index ea9db3c47..b12c956f9 100644 --- a/demo/dlb-libdlb-demo-vf-pod.yaml +++ b/demo/dlb-libdlb-demo-vf-pod.yaml @@ -6,7 +6,7 @@ spec: restartPolicy: Never containers: - name: dlb-libdlb-demo-vf-pod - image: intel/dlb-libdlb-demo:0.26.0 + image: intel/dlb-libdlb-demo:0.26.1 command: [ "sh", "-c", "/usr/local/bin/dir_traffic -n 8 -w epoll -d $(ls /dev/dlb* | sed 's/\\/dev\\/dlb//')" ] imagePullPolicy: IfNotPresent resources: diff --git a/demo/dsa-accel-config-demo-pod.yaml b/demo/dsa-accel-config-demo-pod.yaml index b64bf50db..8169f528c 100644 --- a/demo/dsa-accel-config-demo-pod.yaml +++ b/demo/dsa-accel-config-demo-pod.yaml @@ -7,7 +7,7 @@ metadata: spec: containers: - name: dsa-accel-config-demo - image: intel/accel-config-demo:0.26.0 + image: intel/accel-config-demo:0.26.1 imagePullPolicy: IfNotPresent resources: limits: diff --git a/demo/iaa-accel-config-demo-pod.yaml b/demo/iaa-accel-config-demo-pod.yaml index 685da42e1..ef978a330 100644 --- a/demo/iaa-accel-config-demo-pod.yaml +++ b/demo/iaa-accel-config-demo-pod.yaml @@ -7,7 +7,7 @@ metadata: spec: containers: - name: iaa-accel-config-demo - image: intel/accel-config-demo:0.26.0 + image: intel/accel-config-demo:0.26.1 command: [ "/bin/bash", "-c", "cd /test && /bin/bash -e ./iaa_user_test_runner.sh" ] imagePullPolicy: IfNotPresent resources: diff --git a/demo/intelfpga-job.yaml b/demo/intelfpga-job.yaml index f4e14452e..9df30a3a4 100644 --- a/demo/intelfpga-job.yaml +++ b/demo/intelfpga-job.yaml @@ -13,7 +13,7 @@ spec: restartPolicy: Never containers: - name: intelfpga-demo-job-1 - image: intel/opae-nlb-demo:0.26.0 + image: intel/opae-nlb-demo:0.26.1 imagePullPolicy: IfNotPresent securityContext: capabilities: diff --git a/demo/openssl-qat-engine-cpa-sample-pod.yaml b/demo/openssl-qat-engine-cpa-sample-pod.yaml index 4a0e7f63e..0b93870a2 100644 --- a/demo/openssl-qat-engine-cpa-sample-pod.yaml +++ b/demo/openssl-qat-engine-cpa-sample-pod.yaml @@ -7,7 +7,7 @@ metadata: spec: containers: - name: openssl-qat-engine - image: intel/openssl-qat-engine:0.26.0 + image: intel/openssl-qat-engine:0.26.1 imagePullPolicy: IfNotPresent command: [ "cpa_sample_code", "runTests=4", "signOfLife=1" ] securityContext: diff --git a/demo/test-fpga-orchestrated.yaml b/demo/test-fpga-orchestrated.yaml index 6e575e99f..2322f6ee1 100644 --- a/demo/test-fpga-orchestrated.yaml +++ b/demo/test-fpga-orchestrated.yaml @@ -5,7 +5,7 @@ metadata: spec: containers: - name: test-container - image: intel/opae-nlb-demo:0.26.0 + image: intel/opae-nlb-demo:0.26.1 imagePullPolicy: IfNotPresent securityContext: capabilities: diff --git a/demo/test-fpga-preprogrammed.yaml b/demo/test-fpga-preprogrammed.yaml index 97c852729..3fc7c4559 100644 --- a/demo/test-fpga-preprogrammed.yaml +++ b/demo/test-fpga-preprogrammed.yaml @@ -5,7 +5,7 @@ metadata: spec: containers: - name: test-container - image: intel/opae-nlb-demo:0.26.0 + image: intel/opae-nlb-demo:0.26.1 imagePullPolicy: IfNotPresent securityContext: capabilities: diff --git a/deployments/dlb_plugin/base/intel-dlb-plugin.yaml b/deployments/dlb_plugin/base/intel-dlb-plugin.yaml index 41188249b..4c74b31c6 100644 --- a/deployments/dlb_plugin/base/intel-dlb-plugin.yaml +++ b/deployments/dlb_plugin/base/intel-dlb-plugin.yaml @@ -21,7 +21,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-dlb-plugin:0.26.0 + image: intel/intel-dlb-plugin:0.26.1 imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true diff --git a/deployments/dlb_plugin/overlays/dlb_initcontainer/dlb_initcontainer.yaml b/deployments/dlb_plugin/overlays/dlb_initcontainer/dlb_initcontainer.yaml index d9a85c4ce..5701a5a84 100644 --- a/deployments/dlb_plugin/overlays/dlb_initcontainer/dlb_initcontainer.yaml +++ b/deployments/dlb_plugin/overlays/dlb_initcontainer/dlb_initcontainer.yaml @@ -7,7 +7,7 @@ spec: spec: initContainers: - name: intel-dlb-initcontainer - image: intel/intel-dlb-initcontainer:0.26.0 + image: intel/intel-dlb-initcontainer:0.26.1 securityContext: readOnlyRootFilesystem: true privileged: true diff --git a/deployments/dsa_plugin/base/intel-dsa-plugin.yaml b/deployments/dsa_plugin/base/intel-dsa-plugin.yaml index dda001ce1..3d9c99040 100644 --- a/deployments/dsa_plugin/base/intel-dsa-plugin.yaml +++ b/deployments/dsa_plugin/base/intel-dsa-plugin.yaml @@ -21,7 +21,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-dsa-plugin:0.26.0 + image: intel/intel-dsa-plugin:0.26.1 imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true diff --git a/deployments/dsa_plugin/overlays/dsa_initcontainer/dsa_initcontainer.yaml b/deployments/dsa_plugin/overlays/dsa_initcontainer/dsa_initcontainer.yaml index c28641701..94e94bbe9 100644 --- a/deployments/dsa_plugin/overlays/dsa_initcontainer/dsa_initcontainer.yaml +++ b/deployments/dsa_plugin/overlays/dsa_initcontainer/dsa_initcontainer.yaml @@ -12,7 +12,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-idxd-config-initcontainer:0.26.0 + image: intel/intel-idxd-config-initcontainer:0.26.1 securityContext: readOnlyRootFilesystem: true privileged: true diff --git a/deployments/fpga_admissionwebhook/manager/manager.yaml b/deployments/fpga_admissionwebhook/manager/manager.yaml index 328cb8c06..66ea0cfa7 100644 --- a/deployments/fpga_admissionwebhook/manager/manager.yaml +++ b/deployments/fpga_admissionwebhook/manager/manager.yaml @@ -16,7 +16,7 @@ spec: control-plane: controller-manager spec: containers: - - image: intel/intel-fpga-admissionwebhook:0.26.0 + - image: intel/intel-fpga-admissionwebhook:0.26.1 imagePullPolicy: IfNotPresent name: manager securityContext: diff --git a/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml b/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml index c2e560ea3..65a0fb272 100644 --- a/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml +++ b/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml @@ -16,7 +16,7 @@ spec: spec: initContainers: - name: intel-fpga-initcontainer - image: intel/intel-fpga-initcontainer:0.26.0 + image: intel/intel-fpga-initcontainer:0.26.1 imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true @@ -33,7 +33,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-fpga-plugin:0.26.0 + image: intel/intel-fpga-plugin:0.26.1 imagePullPolicy: IfNotPresent args: - -mode=af diff --git a/deployments/gpu_plugin/base/intel-gpu-plugin.yaml b/deployments/gpu_plugin/base/intel-gpu-plugin.yaml index c58c2f43a..06ef0578a 100644 --- a/deployments/gpu_plugin/base/intel-gpu-plugin.yaml +++ b/deployments/gpu_plugin/base/intel-gpu-plugin.yaml @@ -15,7 +15,7 @@ spec: spec: initContainers: - name: intel-gpu-initcontainer - image: intel/intel-gpu-initcontainer:0.26.0 + image: intel/intel-gpu-initcontainer:0.26.1 imagePullPolicy: IfNotPresent securityContext: seLinuxOptions: @@ -32,7 +32,11 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-gpu-plugin:0.26.0 + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + image: intel/intel-gpu-plugin:0.26.1 imagePullPolicy: IfNotPresent securityContext: seLinuxOptions: diff --git a/deployments/gpu_plugin/overlays/fractional_resources/add-kubelet-crt-mount.yaml b/deployments/gpu_plugin/overlays/fractional_resources/add-kubelet-crt-mount.yaml new file mode 100644 index 000000000..ff4afafa8 --- /dev/null +++ b/deployments/gpu_plugin/overlays/fractional_resources/add-kubelet-crt-mount.yaml @@ -0,0 +1,17 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-gpu-plugin +spec: + template: + spec: + containers: + - name: intel-gpu-plugin + volumeMounts: + - name: kubeletcrt + mountPath: /var/lib/kubelet/pki/kubelet.crt + volumes: + - name: kubeletcrt + hostPath: + path: /var/lib/kubelet/pki/kubelet.crt + type: FileOrCreate diff --git a/deployments/gpu_plugin/overlays/fractional_resources/gpu-manager-role.yaml b/deployments/gpu_plugin/overlays/fractional_resources/gpu-manager-role.yaml index 17bca25fa..61db88233 100644 --- a/deployments/gpu_plugin/overlays/fractional_resources/gpu-manager-role.yaml +++ b/deployments/gpu_plugin/overlays/fractional_resources/gpu-manager-role.yaml @@ -4,5 +4,5 @@ metadata: name: gpu-manager-role rules: - apiGroups: [""] - resources: ["pods"] - verbs: ["list"] + resources: ["pods", "nodes/proxy"] + verbs: ["list", "get"] diff --git a/deployments/gpu_plugin/overlays/fractional_resources/kustomization.yaml b/deployments/gpu_plugin/overlays/fractional_resources/kustomization.yaml index 702053d01..85d0d920f 100644 --- a/deployments/gpu_plugin/overlays/fractional_resources/kustomization.yaml +++ b/deployments/gpu_plugin/overlays/fractional_resources/kustomization.yaml @@ -9,3 +9,4 @@ patches: - path: add-podresource-mount.yaml - path: add-args.yaml - path: add-nodeselector-intel-gpu.yaml + - path: add-kubelet-crt-mount.yaml diff --git a/deployments/iaa_plugin/base/intel-iaa-plugin.yaml b/deployments/iaa_plugin/base/intel-iaa-plugin.yaml index 2b72f4c88..ca0bfc8c6 100644 --- a/deployments/iaa_plugin/base/intel-iaa-plugin.yaml +++ b/deployments/iaa_plugin/base/intel-iaa-plugin.yaml @@ -21,7 +21,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-iaa-plugin:0.26.0 + image: intel/intel-iaa-plugin:0.26.1 imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true diff --git a/deployments/iaa_plugin/overlays/iaa_initcontainer/iaa_initcontainer.yaml b/deployments/iaa_plugin/overlays/iaa_initcontainer/iaa_initcontainer.yaml index a265b96eb..dfcdd1080 100644 --- a/deployments/iaa_plugin/overlays/iaa_initcontainer/iaa_initcontainer.yaml +++ b/deployments/iaa_plugin/overlays/iaa_initcontainer/iaa_initcontainer.yaml @@ -14,7 +14,7 @@ spec: fieldPath: spec.nodeName - name: DEVICE_TYPE value: "iaa" - image: intel/intel-idxd-config-initcontainer:0.26.0 + image: intel/intel-idxd-config-initcontainer:0.26.1 securityContext: readOnlyRootFilesystem: true privileged: true diff --git a/deployments/operator/default/manager_auth_proxy_patch.yaml b/deployments/operator/default/manager_auth_proxy_patch.yaml index 1dfc28da2..bfbb9e3d2 100644 --- a/deployments/operator/default/manager_auth_proxy_patch.yaml +++ b/deployments/operator/default/manager_auth_proxy_patch.yaml @@ -10,7 +10,7 @@ spec: spec: containers: - name: kube-rbac-proxy - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.14.1 args: - "--secure-listen-address=0.0.0.0:8443" - "--upstream=http://127.0.0.1:8080/" diff --git a/deployments/operator/manager/manager.yaml b/deployments/operator/manager/manager.yaml index 2d741ab2b..641428874 100644 --- a/deployments/operator/manager/manager.yaml +++ b/deployments/operator/manager/manager.yaml @@ -23,7 +23,7 @@ spec: control-plane: controller-manager spec: containers: - - image: docker.io/intel/intel-deviceplugin-operator:0.26.0 + - image: docker.io/intel/intel-deviceplugin-operator:0.26.1 imagePullPolicy: IfNotPresent name: manager livenessProbe: diff --git a/deployments/operator/manifests/bases/intel-device-plugins-operator.clusterserviceversion.yaml b/deployments/operator/manifests/bases/intel-device-plugins-operator.clusterserviceversion.yaml index fcca4bb92..8730f412e 100644 --- a/deployments/operator/manifests/bases/intel-device-plugins-operator.clusterserviceversion.yaml +++ b/deployments/operator/manifests/bases/intel-device-plugins-operator.clusterserviceversion.yaml @@ -5,7 +5,7 @@ metadata: alm-examples: '[]' capabilities: Seamless Upgrades categories: Drivers and plugins - containerImage: docker.io/intel/intel-deviceplugin-operator:0.26.0 + containerImage: docker.io/intel/intel-deviceplugin-operator:0.26.1 createdAt: "2022-11-09" description: This operator is a Kubernetes custom controller whose goal is to serve the installation and lifecycle management of Intel device plugins for diff --git a/deployments/operator/rbac/gpu_manager_role.yaml b/deployments/operator/rbac/gpu_manager_role.yaml index 691a47290..8c6790913 100644 --- a/deployments/operator/rbac/gpu_manager_role.yaml +++ b/deployments/operator/rbac/gpu_manager_role.yaml @@ -5,6 +5,13 @@ metadata: creationTimestamp: null name: gpu-manager-role rules: +- apiGroups: + - "" + resources: + - nodes/proxy + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/deployments/operator/rbac/role.yaml b/deployments/operator/rbac/role.yaml index 14495bc98..2c18d55d1 100644 --- a/deployments/operator/rbac/role.yaml +++ b/deployments/operator/rbac/role.yaml @@ -5,6 +5,13 @@ metadata: creationTimestamp: null name: manager-role rules: +- apiGroups: + - "" + resources: + - nodes/proxy + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/deployments/operator/samples/deviceplugin_v1_dlbdeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_dlbdeviceplugin.yaml index 5d74001de..755670271 100644 --- a/deployments/operator/samples/deviceplugin_v1_dlbdeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_dlbdeviceplugin.yaml @@ -9,8 +9,8 @@ metadata: # annotations: # container.apparmor.security.beta.kubernetes.io/intel-dlb-plugin: unconfined spec: - image: intel/intel-dlb-plugin:0.26.0 - initImage: intel/intel-dlb-initcontainer:0.26.0 + image: intel/intel-dlb-plugin:0.26.1 + initImage: intel/intel-dlb-initcontainer:0.26.1 logLevel: 4 nodeSelector: intel.feature.node.kubernetes.io/dlb: 'true' diff --git a/deployments/operator/samples/deviceplugin_v1_dsadeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_dsadeviceplugin.yaml index 4d1efb4e6..2dfedefe9 100644 --- a/deployments/operator/samples/deviceplugin_v1_dsadeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_dsadeviceplugin.yaml @@ -3,8 +3,8 @@ kind: DsaDevicePlugin metadata: name: dsadeviceplugin-sample spec: - image: intel/intel-dsa-plugin:0.26.0 - initImage: intel/intel-idxd-config-initcontainer:0.26.0 + image: intel/intel-dsa-plugin:0.26.1 + initImage: intel/intel-idxd-config-initcontainer:0.26.1 sharedDevNum: 10 logLevel: 4 nodeSelector: diff --git a/deployments/operator/samples/deviceplugin_v1_fpgadeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_fpgadeviceplugin.yaml index 03ccc83cd..067473986 100644 --- a/deployments/operator/samples/deviceplugin_v1_fpgadeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_fpgadeviceplugin.yaml @@ -3,8 +3,8 @@ kind: FpgaDevicePlugin metadata: name: fpgadeviceplugin-sample spec: - image: intel/intel-fpga-plugin:0.26.0 - initImage: intel/intel-fpga-initcontainer:0.26.0 + image: intel/intel-fpga-plugin:0.26.1 + initImage: intel/intel-fpga-initcontainer:0.26.1 mode: region logLevel: 4 nodeSelector: diff --git a/deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml index e0c38649a..7199ff8a9 100644 --- a/deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml @@ -3,8 +3,8 @@ kind: GpuDevicePlugin metadata: name: gpudeviceplugin-sample spec: - image: intel/intel-gpu-plugin:0.26.0 - initImage: intel/intel-gpu-initcontainer:0.26.0 + image: intel/intel-gpu-plugin:0.26.1 + initImage: intel/intel-gpu-initcontainer:0.26.1 sharedDevNum: 10 logLevel: 4 nodeSelector: diff --git a/deployments/operator/samples/deviceplugin_v1_iaadeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_iaadeviceplugin.yaml index 06bf370c3..3d73e13b9 100644 --- a/deployments/operator/samples/deviceplugin_v1_iaadeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_iaadeviceplugin.yaml @@ -3,8 +3,8 @@ kind: IaaDevicePlugin metadata: name: iaadeviceplugin-sample spec: - image: intel/intel-iaa-plugin:0.26.0 - initImage: intel/intel-idxd-config-initcontainer:0.26.0 + image: intel/intel-iaa-plugin:0.26.1 + initImage: intel/intel-idxd-config-initcontainer:0.26.1 sharedDevNum: 10 logLevel: 4 nodeSelector: diff --git a/deployments/operator/samples/deviceplugin_v1_qatdeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_qatdeviceplugin.yaml index 6c5cfb993..09a48430e 100644 --- a/deployments/operator/samples/deviceplugin_v1_qatdeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_qatdeviceplugin.yaml @@ -9,8 +9,8 @@ metadata: # annotations: # container.apparmor.security.beta.kubernetes.io/intel-qat-plugin: unconfined spec: - image: intel/intel-qat-plugin:0.26.0 - initImage: intel/intel-qat-initcontainer:0.26.0 + image: intel/intel-qat-plugin:0.26.1 + initImage: intel/intel-qat-initcontainer:0.26.1 dpdkDriver: vfio-pci kernelVfDrivers: - c6xxvf diff --git a/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml index 5e00d56e1..d865e34ed 100644 --- a/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml @@ -3,8 +3,8 @@ kind: SgxDevicePlugin metadata: name: sgxdeviceplugin-sample spec: - image: intel/intel-sgx-plugin:0.26.0 - initImage: intel/intel-sgx-initcontainer:0.26.0 + image: intel/intel-sgx-plugin:0.26.1 + initImage: intel/intel-sgx-initcontainer:0.26.1 enclaveLimit: 110 provisionLimit: 110 logLevel: 4 diff --git a/deployments/qat_dpdk_app/base/crypto-perf-dpdk-pod-requesting-qat.yaml b/deployments/qat_dpdk_app/base/crypto-perf-dpdk-pod-requesting-qat.yaml index 46c3d5a6d..199564a8a 100644 --- a/deployments/qat_dpdk_app/base/crypto-perf-dpdk-pod-requesting-qat.yaml +++ b/deployments/qat_dpdk_app/base/crypto-perf-dpdk-pod-requesting-qat.yaml @@ -5,7 +5,7 @@ metadata: spec: containers: - name: crypto-perf - image: intel/crypto-perf:0.26.0 + image: intel/crypto-perf:0.26.1 imagePullPolicy: IfNotPresent command: [ "/bin/bash", "-c", "--" ] args: [ "while true; do sleep 300000; done;" ] diff --git a/deployments/qat_plugin/base/intel-qat-kernel-plugin.yaml b/deployments/qat_plugin/base/intel-qat-kernel-plugin.yaml index 1c8572a81..9e5e4715e 100644 --- a/deployments/qat_plugin/base/intel-qat-kernel-plugin.yaml +++ b/deployments/qat_plugin/base/intel-qat-kernel-plugin.yaml @@ -19,7 +19,7 @@ spec: readOnlyRootFilesystem: true allowPrivilegeEscalation: false privileged: true - image: intel/intel-qat-plugin:0.26.0 + image: intel/intel-qat-plugin:0.26.1 imagePullPolicy: IfNotPresent args: ["-mode", "kernel"] volumeMounts: diff --git a/deployments/qat_plugin/base/intel-qat-plugin.yaml b/deployments/qat_plugin/base/intel-qat-plugin.yaml index 68412761d..c0645e119 100644 --- a/deployments/qat_plugin/base/intel-qat-plugin.yaml +++ b/deployments/qat_plugin/base/intel-qat-plugin.yaml @@ -21,7 +21,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-qat-plugin:0.26.0 + image: intel/intel-qat-plugin:0.26.1 securityContext: seLinuxOptions: type: "container_device_plugin_t" diff --git a/deployments/qat_plugin/overlays/qat_initcontainer/qat_initcontainer.yaml b/deployments/qat_plugin/overlays/qat_initcontainer/qat_initcontainer.yaml index 648656007..1cfdc375c 100644 --- a/deployments/qat_plugin/overlays/qat_initcontainer/qat_initcontainer.yaml +++ b/deployments/qat_plugin/overlays/qat_initcontainer/qat_initcontainer.yaml @@ -12,7 +12,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-qat-initcontainer:0.26.0 + image: intel/intel-qat-initcontainer:0.26.1 securityContext: readOnlyRootFilesystem: true privileged: true diff --git a/deployments/sgx_admissionwebhook/manager/manager.yaml b/deployments/sgx_admissionwebhook/manager/manager.yaml index f103f3435..f1864dbba 100644 --- a/deployments/sgx_admissionwebhook/manager/manager.yaml +++ b/deployments/sgx_admissionwebhook/manager/manager.yaml @@ -16,7 +16,7 @@ spec: control-plane: controller-manager spec: containers: - - image: intel/intel-sgx-admissionwebhook:0.26.0 + - image: intel/intel-sgx-admissionwebhook:0.26.1 imagePullPolicy: IfNotPresent name: manager securityContext: diff --git a/deployments/sgx_plugin/base/intel-sgx-plugin.yaml b/deployments/sgx_plugin/base/intel-sgx-plugin.yaml index 17f7cb465..79e171232 100644 --- a/deployments/sgx_plugin/base/intel-sgx-plugin.yaml +++ b/deployments/sgx_plugin/base/intel-sgx-plugin.yaml @@ -16,7 +16,7 @@ spec: automountServiceAccountToken: false containers: - name: intel-sgx-plugin - image: intel/intel-sgx-plugin:0.26.0 + image: intel/intel-sgx-plugin:0.26.1 securityContext: seLinuxOptions: type: "container_device_plugin_t" diff --git a/deployments/sgx_plugin/overlays/epc-hook-initcontainer/add-epc-nfd-initcontainer.yaml b/deployments/sgx_plugin/overlays/epc-hook-initcontainer/add-epc-nfd-initcontainer.yaml index 50e9859cc..c8512a917 100644 --- a/deployments/sgx_plugin/overlays/epc-hook-initcontainer/add-epc-nfd-initcontainer.yaml +++ b/deployments/sgx_plugin/overlays/epc-hook-initcontainer/add-epc-nfd-initcontainer.yaml @@ -7,7 +7,7 @@ spec: spec: initContainers: - name: intel-sgx-initcontainer - image: intel/intel-sgx-initcontainer:0.26.0 + image: intel/intel-sgx-initcontainer:0.26.1 imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true diff --git a/deployments/sgx_plugin/overlays/epc-nfd/add-epc-nfd-initcontainer.yaml b/deployments/sgx_plugin/overlays/epc-nfd/add-epc-nfd-initcontainer.yaml index 50e9859cc..c8512a917 100644 --- a/deployments/sgx_plugin/overlays/epc-nfd/add-epc-nfd-initcontainer.yaml +++ b/deployments/sgx_plugin/overlays/epc-nfd/add-epc-nfd-initcontainer.yaml @@ -7,7 +7,7 @@ spec: spec: initContainers: - name: intel-sgx-initcontainer - image: intel/intel-sgx-initcontainer:0.26.0 + image: intel/intel-sgx-initcontainer:0.26.1 imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true diff --git a/deployments/sgx_plugin/overlays/epc-register/init-daemonset.yaml b/deployments/sgx_plugin/overlays/epc-register/init-daemonset.yaml index c06900daf..7b2b74bd6 100644 --- a/deployments/sgx_plugin/overlays/epc-register/init-daemonset.yaml +++ b/deployments/sgx_plugin/overlays/epc-register/init-daemonset.yaml @@ -16,7 +16,7 @@ spec: serviceAccountName: sgx-plugin containers: - name: sgx-node-init - image: intel/intel-sgx-initcontainer:0.26.0 + image: intel/intel-sgx-initcontainer:0.26.1 imagePullPolicy: IfNotPresent command: - /usr/local/bin/sgx-sw/intel-sgx-epchook diff --git a/deployments/vpu_plugin/base/intel-vpu-plugin.yaml b/deployments/vpu_plugin/base/intel-vpu-plugin.yaml index e31de379a..354117f42 100644 --- a/deployments/vpu_plugin/base/intel-vpu-plugin.yaml +++ b/deployments/vpu_plugin/base/intel-vpu-plugin.yaml @@ -21,7 +21,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: intel/intel-vpu-plugin:0.26.0 + image: intel/intel-vpu-plugin:0.26.1 imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true diff --git a/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml b/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml index 041c21a6e..5737b8ff1 100644 --- a/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml +++ b/deployments/xpumanager_sidecar/kustom/kustom_xpumanager.yaml @@ -13,7 +13,7 @@ spec: path: "/etc/kubernetes/node-feature-discovery/features.d/" containers: - name: xelink-sidecar - image: intel/intel-xpumanager-sidecar:0.26.0 + image: intel/intel-xpumanager-sidecar:0.26.1 imagePullPolicy: Always args: - -v=2 diff --git a/pkg/apis/deviceplugin/v1/webhook_common.go b/pkg/apis/deviceplugin/v1/webhook_common.go index e0af950de..c41e70fd6 100644 --- a/pkg/apis/deviceplugin/v1/webhook_common.go +++ b/pkg/apis/deviceplugin/v1/webhook_common.go @@ -16,15 +16,23 @@ package v1 import ( "path/filepath" + "regexp" "strings" "github.com/pkg/errors" "k8s.io/apimachinery/pkg/util/version" ) +const sha256RE = "@sha256:[0-9a-f]{64}$" + // common functions for webhooks func validatePluginImage(image, expectedImageName string, expectedMinVersion *version.Version) error { + imageRe := regexp.MustCompile(expectedImageName + sha256RE) + if imageRe.MatchString(image) { + return nil + } + // Ignore registry, vendor and extract the image name with the tag parts := strings.SplitN(filepath.Base(image), ":", 2) if len(parts) != 2 { @@ -34,13 +42,14 @@ func validatePluginImage(image, expectedImageName string, expectedMinVersion *ve imageName := parts[0] versionStr := parts[1] - if imageName != expectedImageName { - return errors.Errorf("incorrect image name %q. Make sure you use '/%s:'", imageName, expectedImageName) + // If user provided faulty SHA digest, the image name may include @sha256 suffix so strip it. + if strings.TrimSuffix(imageName, "@sha256") != expectedImageName { + return errors.Errorf("incorrect image name %q. Make sure you use '/%s'", imageName, expectedImageName) } ver, err := version.ParseSemantic(versionStr) if err != nil { - return errors.Wrapf(err, "unable to parse version %q", versionStr) + return errors.Wrapf(err, "unable to parse version %q. Make sure it's either valid SHA digest or semver tag", versionStr) } if !ver.AtLeast(expectedMinVersion) { diff --git a/pkg/controllers/fpga/controller_test.go b/pkg/controllers/fpga/controller_test.go index 539d2e61c..835907d49 100644 --- a/pkg/controllers/fpga/controller_test.go +++ b/pkg/controllers/fpga/controller_test.go @@ -181,7 +181,7 @@ func TestNewDaemonSetFPGA(t *testing.T) { plugin := &devicepluginv1.FpgaDevicePlugin{ Spec: devicepluginv1.FpgaDevicePluginSpec{ - InitImage: "intel/intel-fpga-initcontainer:0.26.0", + InitImage: "intel/intel-fpga-initcontainer:0.26.1", }, } diff --git a/pkg/controllers/gpu/controller.go b/pkg/controllers/gpu/controller.go index 54705d6b4..71c3a7746 100644 --- a/pkg/controllers/gpu/controller.go +++ b/pkg/controllers/gpu/controller.go @@ -152,6 +152,8 @@ func (c *controller) NewDaemonSet(rawObj client.Object) *apps.DaemonSet { daemonSet.Spec.Template.Spec.ServiceAccountName = serviceAccountName addVolumeIfMissing(&daemonSet.Spec.Template.Spec, "podresources", "/var/lib/kubelet/pod-resources", v1.HostPathDirectory) addVolumeMountIfMissing(&daemonSet.Spec.Template.Spec, "podresources", "/var/lib/kubelet/pod-resources") + addVolumeIfMissing(&daemonSet.Spec.Template.Spec, "kubeletcrt", "/var/lib/kubelet/pki/kubelet.crt", v1.HostPathFileOrCreate) + addVolumeMountIfMissing(&daemonSet.Spec.Template.Spec, "kubeletcrt", "/var/lib/kubelet/pki/kubelet.crt") } return daemonSet diff --git a/pkg/controllers/gpu/controller_test.go b/pkg/controllers/gpu/controller_test.go index 41d9ef689..d40ab741a 100644 --- a/pkg/controllers/gpu/controller_test.go +++ b/pkg/controllers/gpu/controller_test.go @@ -74,6 +74,14 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet }, }, }, + { + Name: "HOST_IP", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "status.hostIP", + }, + }, + }, }, Args: getPodArgs(devicePlugin), Image: devicePlugin.Spec.Image, @@ -145,6 +153,8 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet daemonSet.Spec.Template.Spec.ServiceAccountName = serviceAccountName addVolumeIfMissing(&daemonSet.Spec.Template.Spec, "podresources", "/var/lib/kubelet/pod-resources", v1.HostPathDirectory) addVolumeMountIfMissing(&daemonSet.Spec.Template.Spec, "podresources", "/var/lib/kubelet/pod-resources") + addVolumeIfMissing(&daemonSet.Spec.Template.Spec, "kubeletcrt", "/var/lib/kubelet/pki/kubelet.crt", v1.HostPathFileOrCreate) + addVolumeMountIfMissing(&daemonSet.Spec.Template.Spec, "kubeletcrt", "/var/lib/kubelet/pki/kubelet.crt") } return &daemonSet @@ -189,7 +199,7 @@ func TestNewDamonSetGPU(t *testing.T) { } if tc.isInitImage { - plugin.Spec.InitImage = "intel/intel-gpu-initcontainer:0.26.0" + plugin.Spec.InitImage = "intel/intel-gpu-initcontainer:0.26.1" } t.Run(tc.name, func(t *testing.T) { diff --git a/pkg/controllers/reconciler.go b/pkg/controllers/reconciler.go index 52e2f1b4e..2ab216849 100644 --- a/pkg/controllers/reconciler.go +++ b/pkg/controllers/reconciler.go @@ -37,7 +37,7 @@ import ( var ( bKeeper = &bookKeeper{} - ImageMinVersion = versionutil.MustParseSemantic("0.26.0") + ImageMinVersion = versionutil.MustParseSemantic("0.26.1") ) func init() { @@ -73,6 +73,7 @@ func GetDevicePluginCount(pluginKind string) int { // +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create;delete // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;watch;create;delete // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch +// +kubebuilder:rbac:groups="",resources=nodes/proxy,verbs=get;list // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=create // +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,resourceNames=privileged,verbs=use // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,resourceNames=d1c7b6d5.intel.com,verbs=get;update diff --git a/pkg/deviceplugin/server.go b/pkg/deviceplugin/server.go index 56e03cf4e..7f02b79e8 100644 --- a/pkg/deviceplugin/server.go +++ b/pkg/deviceplugin/server.go @@ -143,6 +143,9 @@ func (srv *server) Allocate(ctx context.Context, rqt *pluginapi.AllocateRequest) for _, crqt := range rqt.ContainerRequests { cresp := new(pluginapi.ContainerAllocateResponse) + cresp.Envs = map[string]string{} + cresp.Annotations = map[string]string{} + for _, id := range crqt.DevicesIDs { dev, ok := srv.devices[id] if !ok { @@ -161,14 +164,10 @@ func (srv *server) Allocate(ctx context.Context, rqt *pluginapi.AllocateRequest) cresp.Mounts = append(cresp.Mounts, &dev.mounts[i]) } - cresp.Envs = map[string]string{} - for key, value := range dev.envs { cresp.Envs[key] = value } - cresp.Annotations = map[string]string{} - for key, value := range dev.annotations { cresp.Annotations[key] = value } diff --git a/scripts/test-image-base-layer.sh b/scripts/test-image-base-layer.sh index 6e16209cf..9d6b7af0c 100755 --- a/scripts/test-image-base-layer.sh +++ b/scripts/test-image-base-layer.sh @@ -20,9 +20,13 @@ fi echo "Testing $IMG base layer" if [ -z "${BUILDER}" ] || [ "${BUILDER}" = 'docker' ] ; then + if [ -z "$(docker image ls -q gcr.io/distroless/static:latest)" ]; then + docker pull gcr.io/distroless/static:latest + fi distroless_base=$(docker inspect --format='{{index .RootFS.Layers 0}}' "gcr.io/distroless/static") || die "failed to inspect gcr.io/distroless/static" img_base=$(docker inspect --format='{{index .RootFS.Layers 0}}' "$IMG") || die "failed to inspect $IMG" elif [ "${BUILDER}" = 'buildah' ] ; then + buildah images -q gcr.io/distroless/static:latest 2>/dev/null || buildah pull gcr.io/distroless/static:latest distroless_base=$(buildah inspect --type image --format='{{index .OCIv1.RootFS.DiffIDs 0}}' "gcr.io/distroless/static") || die "failed to inspect gcr.io/distroless/static" img_base=$(buildah inspect --type image --format='{{index .OCIv1.RootFS.DiffIDs 0}}' "$IMG") || die "failed to inspect $IMG" else diff --git a/test/e2e/fpga/fpga.go b/test/e2e/fpga/fpga.go index b243f57bc..06f4c7920 100644 --- a/test/e2e/fpga/fpga.go +++ b/test/e2e/fpga/fpga.go @@ -102,7 +102,7 @@ func runTestCase(fmw *framework.Framework, pluginKustomizationPath, mappingsColl } resource = v1.ResourceName(podResource) - image := "intel/opae-nlb-demo:0.26.0" + image := "intel/opae-nlb-demo:0.26.1" ginkgo.By("submitting a pod requesting correct FPGA resources")