Skip to content

Commit

Permalink
gpu: mount by-path directory
Browse files Browse the repository at this point in the history
oneCCL requires the /dev/dri/by-path folder to be available
to create a mapping between GPUs.

Signed-off-by: Tuomas Katila <[email protected]>
  • Loading branch information
tkatila committed Apr 6, 2023
1 parent 85b6795 commit 3c44bba
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 7 deletions.
18 changes: 11 additions & 7 deletions cmd/gpu_plugin/gpu_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ import (
)

const (
sysfsDrmDirectory = "/sys/class/drm"
devfsDriDirectory = "/dev/dri"
gpuDeviceRE = `^card[0-9]+$`
controlDeviceRE = `^controlD[0-9]+$`
vendorString = "0x8086"
sysfsDrmDirectory = "/sys/class/drm"
devfsDriDirectory = "/dev/dri"
devfsBypathDirectory = "/dev/dri/by-path"
gpuDeviceRE = `^card[0-9]+$`
controlDeviceRE = `^controlD[0-9]+$`
vendorString = "0x8086"

// Device plugin settings.
namespace = "gpu.intel.com"
Expand Down Expand Up @@ -338,15 +339,18 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
}

if len(nodes) > 0 {
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)
ueventPath := path.Join(dp.sysfsDir, f.Name(), "device/drm/uevent")
byPathMounts := pluginutils.BypathMountsFromUevent(ueventPath, devfsBypathDirectory)

deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, byPathMounts, nil, nil)

for i := 0; i < dp.options.sharedDevNum; i++ {
devID := fmt.Sprintf("%s-%d", f.Name(), i)
// Currently only one device type (i915) is supported.
// TODO: check model ID to differentiate device models.
devTree.AddDevice(deviceType, devID, deviceInfo)

rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil)
rmDevInfos[devID] = rm.NewDeviceInfo(nodes, byPathMounts, nil)
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions cmd/gpu_plugin/gpu_plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,16 @@ func TestScan(t *testing.T) {
devfsdirs: []string{"card0"},
expectedDevs: 1,
},
{
name: "one device with by-path links",
sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64"},
sysfsfiles: map[string][]byte{
"card0/device/vendor": []byte("0x8086"),
"card0/device/drm/uevent": []byte("PCI_SLOT_NAME=00:11.22.3"),
},
devfsdirs: []string{"card0", "by-path/pci-00:11.22.3-card"},
expectedDevs: 1,
},
{
name: "sriov-1-pf-no-vfs + monitoring",
sysfsdirs: []string{"card0/device/drm/card0", "card0/device/drm/controlD64"},
Expand Down
81 changes: 81 additions & 0 deletions cmd/internal/pluginutils/bypath.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright 2022 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package pluginutils

import (
"os"
"path"
"strings"

pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
)

const (
pciSlotPrefix = "PCI_SLOT_NAME="
)

// Returns a slice of by-path Mounts for a pci device that is identified by the
// uevent file contents. by-path files are searched from the given bypathDir.
// In the by-path dir, any files that start with "pci-<pci bus>" will be added to mounts.
func BypathMountsFromUevent(ueventPath, bypathDir string) []pluginapi.Mount {
var mounts []pluginapi.Mount

pciSlot := readPciSlotFromUevent(ueventPath)

if pciSlot == "" {
return nil
}

bypathFiles, err := os.ReadDir(bypathDir)
if err != nil {
return nil
}

linkPrefix := "pci-" + pciSlot

for _, f := range bypathFiles {
if strings.HasPrefix(f.Name(), linkPrefix) {
absPath := path.Join(bypathDir, f.Name())
mounts = append(mounts, pluginapi.Mount{
ContainerPath: absPath,
HostPath: absPath,
ReadOnly: true,
})
}
}

return mounts
}

func readPciSlotFromUevent(ueventPath string) string {
data, err := os.ReadFile(ueventPath)
if err != nil {
return ""
}

dataLines := strings.Split(string(data), "\n")

for _, line := range dataLines {
if !strings.HasPrefix(line, pciSlotPrefix) {
continue
}

slotValue := line[len(pciSlotPrefix):]

return slotValue
}

return ""
}
121 changes: 121 additions & 0 deletions cmd/internal/pluginutils/bypath_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright 2022 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package pluginutils

import (
"os"
"path"
"testing"

"k8s.io/utils/strings/slices"
)

func createTestFiles(t *testing.T, root, ueventData string, bypathFiles []string) (string, string) {
devPath := path.Join(root, "device")
byPath := path.Join(root, "by-path")

if err := os.Mkdir(devPath, os.ModePerm); err != nil {
t.Fatalf("Couldn't create test uevent test file")
}

if len(ueventData) > 0 {
err := os.WriteFile(path.Join(devPath, "uevent"), []byte(ueventData), os.ModePerm)
if err != nil {
t.Fatalf("Couldn't create test uevent test file")
}
}

if len(bypathFiles) > 0 {
if err := os.Mkdir(byPath, os.ModePerm); err != nil {
t.Fatal("Mkdir failed:", byPath)
}

for _, f := range bypathFiles {
if err := os.WriteFile(path.Join(byPath, f), []byte{1}, os.ModePerm); err != nil {
t.Fatal("WriteFile failed:", path.Join(byPath, f))
}
}
}

return devPath, byPath
}

func TestBypath(t *testing.T) {
type testData struct {
ueventData string
bypathFiles []string
mountCount int
}

tds := []testData{
{
"PCI_DEVICE=foobar\nPCI_SLOT_NAME=0-1-2-3-3342\n",
[]string{"pci-0-1-2-3-3342-card", "pci-0-1-2-3-3342-render"},
2,
},
{
"PCI_DEVICE=foobar\nPCI_SLOT_NAME=0-1-2-3-3342\n",
[]string{"pci-0-1-2-3-4444-card", "pci-0-1-2-3-4444-render"},
0,
},
{
"PCI_DEVICE=foobar\n",
[]string{"pci-0-1-2-3-4444-card", "pci-0-1-2-3-4444-render"},
0,
},
{
"PCI_DEVICE=foobar\nPCI_SLOT_NAME=0-1-2-3-3342\n",
[]string{},
0,
},
{
"",
[]string{"pci-0-1-2-3-3342-card", "pci-0-1-2-3-3342-render"},
0,
},
}

for _, td := range tds {
root, err := os.MkdirTemp("", "test_by_path_mounting")
if err != nil {
t.Fatalf("can't create temporary directory: %+v", err)
}
// dirs/files need to be removed for the next test
defer os.RemoveAll(root)

devPath, byPath := createTestFiles(t, root, td.ueventData, td.bypathFiles)

mounts := BypathMountsFromUevent(path.Join(devPath, "uevent"), byPath)

if len(mounts) != td.mountCount {
t.Errorf("Wrong number of mounts %d vs. %d", len(mounts), td.mountCount)
}

absPaths := []string{}
for _, link := range td.bypathFiles {
absPaths = append(absPaths, path.Join(byPath, link))
}

for _, mount := range mounts {
if !slices.Contains(absPaths, mount.ContainerPath) {
t.Errorf("containerpath is incorrect: %s", mount.ContainerPath)
}

if !slices.Contains(absPaths, mount.HostPath) {
t.Errorf("hostpath is incorrect: %s", mount.HostPath)
}
}
}
}

0 comments on commit 3c44bba

Please sign in to comment.