diff --git a/go.mod b/go.mod index 95f499bd75..d8169dba89 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.16 require ( github.com/BurntSushi/toml v1.0.0 github.com/alessio/shellescape v1.4.1 + github.com/container-orchestrated-devices/container-device-interface v0.6.0 github.com/evanphx/json-patch/v5 v5.6.0 github.com/google/safetext v0.0.0-20220905092116-b49f7bc46da2 github.com/mattn/go-isatty v0.0.14 diff --git a/go.sum b/go.sum index 863b64ef57..4473a619e8 100644 --- a/go.sum +++ b/go.sum @@ -1,41 +1,123 @@ +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.0.0 h1:dtDWrepsVPfW9H/4y7dDgFc2MBUSeJhlaDtK13CxFlU= github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0= github.com/alessio/shellescape v1.4.1/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= +github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= +github.com/container-orchestrated-devices/container-device-interface v0.6.0 h1:aWwcz/Ep0Fd7ZuBjQGjU/jdPloM7ydhMW13h85jZNvk= +github.com/container-orchestrated-devices/container-device-interface v0.6.0/go.mod h1:OQlgtJtDrOxSQ1BWODC8OZK1tzi9W69wek+Jy17ndzo= +github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= +github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJCLunww= github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= +github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= +github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/safetext v0.0.0-20220905092116-b49f7bc46da2 h1:SJ+NtwL6QaZ21U+IrK7d0gGgpjGGvd2kz+FzTHVzdqI= github.com/google/safetext v0.0.0-20220905092116-b49f7bc46da2/go.mod h1:Tv1PlzqC9t8wNnpPdctvtSUOPUUg4SHeE6vR1Ir2hmg= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs= +github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= +github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/opencontainers/runc v1.1.2/go.mod h1:Tj1hFw6eFWp/o33uxGf5yF2BX5yz2Z6iptFpuvbbKqc= +github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.1.0-rc.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI= +github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM= github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/spf13/cobra v1.4.0 h1:y+wJpx64xcgO1V+RcnwW0LEHxTKRi2ZDPSBjWnrg88Q= github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= +github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= +github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= +github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U= gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= diff --git a/pkg/apis/config/v1alpha4/types.go b/pkg/apis/config/v1alpha4/types.go index 308a6853b8..9689faf22a 100644 --- a/pkg/apis/config/v1alpha4/types.go +++ b/pkg/apis/config/v1alpha4/types.go @@ -118,6 +118,10 @@ type Node struct { // binded to a host Port ExtraPortMappings []PortMapping `yaml:"extraPortMappings,omitempty" json:"extraPortMappings,omitempty"` + // Devices allows access to GPUs through CDI using the --devices flag added in Docker v25. + // https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#container-device-interface-cdi-support + CDIDevices []string `yaml:"devices,omitempty" json:"devices,omitempty"` + // KubeadmConfigPatches are applied to the generated kubeadm config as // merge patches. The `kind` field must match the target object, and // if `apiVersion` is specified it will only be applied to matching objects. diff --git a/pkg/apis/config/v1alpha4/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha4/zz_generated.deepcopy.go index b210133da5..b506dd0bba 100644 --- a/pkg/apis/config/v1alpha4/zz_generated.deepcopy.go +++ b/pkg/apis/config/v1alpha4/zz_generated.deepcopy.go @@ -141,6 +141,11 @@ func (in *Node) DeepCopyInto(out *Node) { *out = make([]PortMapping, len(*in)) copy(*out, *in) } + if in.CDIDevices != nil { + in, out := &in.CDIDevices, &out.CDIDevices + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.KubeadmConfigPatches != nil { in, out := &in.KubeadmConfigPatches, &out.KubeadmConfigPatches *out = make([]string, len(*in)) diff --git a/pkg/cluster/internal/providers/docker/provision.go b/pkg/cluster/internal/providers/docker/provision.go index 6c644a3651..a3da3b5b83 100644 --- a/pkg/cluster/internal/providers/docker/provision.go +++ b/pkg/cluster/internal/providers/docker/provision.go @@ -255,6 +255,20 @@ func runArgsForNode(node *config.Node, clusterIPFamily config.ClusterIPFamily, n args = append(args, "-e", "KUBECONFIG=/etc/kubernetes/admin.conf") } + // Append CDI device args (used for GPU support) + if len(node.CDIDevices) > 0 { + // Check for docker > 25 + ver := Version() + if ver != "dev" || strings.Split(ver, ".")[0] < "25" { + return nil, errors.Errorf("using devices api in kind requires Docker >= v25, but found %q", ver) + } + + // Append args for each device + for _, device := range node.CDIDevices { + args = append(args, "--device", strings.TrimSpace(device)) + } + } + // finally, specify the image to run return append(args, node.Image), nil } diff --git a/pkg/cluster/internal/providers/docker/util.go b/pkg/cluster/internal/providers/docker/util.go index 2ec86d73fc..c2d3f558da 100644 --- a/pkg/cluster/internal/providers/docker/util.go +++ b/pkg/cluster/internal/providers/docker/util.go @@ -33,6 +33,16 @@ func IsAvailable() bool { return strings.HasPrefix(lines[0], "Docker version") } +// Version gets the version of docker available on the system +func Version() string { + cmd := exec.Command("docker", "version", "--format", "'{{.Server.Version}}'") + lines, err := exec.OutputLines(cmd) + if err != nil || len(lines) != 1 { + return "" + } + return strings.Trim(lines[0], "'") +} + // usernsRemap checks if userns-remap is enabled in dockerd func usernsRemap() bool { cmd := exec.Command("docker", "info", "--format", "'{{json .SecurityOptions}}'") diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go index c240a29293..0322302485 100644 --- a/pkg/cluster/internal/providers/podman/provision.go +++ b/pkg/cluster/internal/providers/podman/provision.go @@ -212,6 +212,13 @@ func runArgsForNode(node *config.Node, clusterIPFamily config.ClusterIPFamily, n args..., ) + // Append CDI device args (used for GPU support) + if len(node.CDIDevices) > 0 { + for _, device := range node.CDIDevices { + args = append(args, "--device", strings.TrimSpace(device)) + } + } + // convert mounts and port mappings to container run args args = append(args, generateMountBindings(node.ExtraMounts...)...) mappingArgs, err := generatePortMappings(clusterIPFamily, node.ExtraPortMappings...) @@ -302,7 +309,6 @@ type podmanNetworks []struct { func getSubnets(networkName string) ([]string, error) { cmd := exec.Command("podman", "network", "inspect", networkName) out, err := exec.Output(cmd) - if err != nil { return nil, errors.Wrap(err, "failed to get subnets") } diff --git a/pkg/internal/apis/config/convert_v1alpha4.go b/pkg/internal/apis/config/convert_v1alpha4.go index 2df4b75138..077325eb15 100644 --- a/pkg/internal/apis/config/convert_v1alpha4.go +++ b/pkg/internal/apis/config/convert_v1alpha4.go @@ -56,6 +56,7 @@ func convertv1alpha4Node(in *v1alpha4.Node, out *Node) { out.ExtraMounts = make([]Mount, len(in.ExtraMounts)) out.ExtraPortMappings = make([]PortMapping, len(in.ExtraPortMappings)) out.KubeadmConfigPatchesJSON6902 = make([]PatchJSON6902, len(in.KubeadmConfigPatchesJSON6902)) + out.CDIDevices = make([]string, len(in.CDIDevices)) for i := range in.ExtraMounts { convertv1alpha4Mount(&in.ExtraMounts[i], &out.ExtraMounts[i]) @@ -68,6 +69,10 @@ func convertv1alpha4Node(in *v1alpha4.Node, out *Node) { for i := range in.KubeadmConfigPatchesJSON6902 { convertv1alpha4PatchJSON6902(&in.KubeadmConfigPatchesJSON6902[i], &out.KubeadmConfigPatchesJSON6902[i]) } + + for i := range in.CDIDevices { + out.CDIDevices[i] = in.CDIDevices[i] + } } func convertv1alpha4PatchJSON6902(in *v1alpha4.PatchJSON6902, out *PatchJSON6902) { diff --git a/pkg/internal/apis/config/types.go b/pkg/internal/apis/config/types.go index fed3000798..fe90222422 100644 --- a/pkg/internal/apis/config/types.go +++ b/pkg/internal/apis/config/types.go @@ -98,6 +98,10 @@ type Node struct { // binded to a host Port ExtraPortMappings []PortMapping + // Devices allows access to GPUs through CDI using the --devices flag added in Docker v25. + // https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#container-device-interface-cdi-support + CDIDevices []string + // KubeadmConfigPatches are applied to the generated kubeadm config as // strategic merge patches to `kustomize build` internally // https://github.com/kubernetes/community/blob/a9cf5c8f3380bb52ebe57b1e2dbdec136d8dd484/contributors/devel/sig-api-machinery/strategic-merge-patch.md diff --git a/pkg/internal/apis/config/validate.go b/pkg/internal/apis/config/validate.go index 68185d1579..95ebd00f07 100644 --- a/pkg/internal/apis/config/validate.go +++ b/pkg/internal/apis/config/validate.go @@ -22,6 +22,7 @@ import ( "regexp" "strings" + "github.com/container-orchestrated-devices/container-device-interface/pkg/parser" "sigs.k8s.io/kind/pkg/errors" "sigs.k8s.io/kind/pkg/internal/sets" ) @@ -114,6 +115,10 @@ func (n *Node) Validate() error { errs = append(errs, errors.New("image is a required field")) } + if err := validateDevices(n.CDIDevices); err != nil { + errs = append(errs, errors.Wrapf(err, "invalid devices")) + } + // validate extra port forwards for _, mapping := range n.ExtraPortMappings { if err := validatePort(mapping.HostPort); err != nil { @@ -192,6 +197,23 @@ func validatePortMappings(portMappings []PortMapping) error { return nil } +func validateDevices(cdiDevices []string) error { + for _, device := range cdiDevices { + device := strings.TrimSpace(device) + // validate device string is not empty + if len(device) == 0 { + return errors.Errorf("invalid device string: '%v'. Empty Strings not allowed", device) + } + + // validate device string is valid + _, _, _, err := parser.ParseQualifiedName(device) + if err != nil { + return errors.Errorf("invalid device string: '%v'. %v", device, err) + } + } + return nil +} + func validatePort(port int32) error { // NOTE: -1 is a special value for auto-selecting the port in the container // backend where possible as opposed to in kind itself. diff --git a/pkg/internal/apis/config/validate_test.go b/pkg/internal/apis/config/validate_test.go index 6d4ef54a0e..e1e750968b 100644 --- a/pkg/internal/apis/config/validate_test.go +++ b/pkg/internal/apis/config/validate_test.go @@ -18,10 +18,10 @@ package config import ( "fmt" - "sigs.k8s.io/kind/pkg/internal/assert" "testing" "sigs.k8s.io/kind/pkg/errors" + "sigs.k8s.io/kind/pkg/internal/assert" ) func TestClusterValidate(t *testing.T) { @@ -251,7 +251,7 @@ func TestClusterValidate(t *testing.T) { } for _, tc := range cases { - tc := tc //capture loop variable + tc := tc // capture loop variable t.Run(tc.Name, func(t *testing.T) { t.Parallel() err := tc.Cluster.Validate() @@ -343,6 +343,33 @@ func TestNodeValidate(t *testing.T) { }(), ExpectErrors: 1, }, + { + TestName: "Empty Devices", + Node: func() Node { + cfg := newDefaultedNode(ControlPlaneRole) + cfg.CDIDevices = []string{" ", ""} + return cfg + }(), + ExpectErrors: 1, + }, + { + TestName: "Invalid Device String", + Node: func() Node { + cfg := newDefaultedNode(ControlPlaneRole) + cfg.CDIDevices = []string{"thisdeviceisnotvalid"} + return cfg + }(), + ExpectErrors: 1, + }, + { + TestName: "Valid Devices", + Node: func() Node { + cfg := newDefaultedNode(ControlPlaneRole) + cfg.CDIDevices = []string{"vendor1.com/device=test", "nvidia.com/gpu=1", "nvidia.com/gpu=all", "vendor.com/foo=1", "foo.bar.baz/foo-bar123.B_az=all"} + return cfg + }(), + ExpectErrors: 0, + }, { TestName: "Invalid HostPort", Node: func() Node { @@ -360,7 +387,7 @@ func TestNodeValidate(t *testing.T) { } for _, tc := range cases { - tc := tc //capture loop variable + tc := tc // capture loop variable t.Run(tc.TestName, func(t *testing.T) { t.Parallel() err := tc.Node.Validate() @@ -414,7 +441,7 @@ func TestPortValidate(t *testing.T) { } for _, tc := range cases { - tc := tc //capture loop variable + tc := tc // capture loop variable t.Run(tc.TestName, func(t *testing.T) { t.Parallel() err := validatePort(tc.Port) @@ -537,7 +564,7 @@ func TestValidatePortMappings(t *testing.T) { } for _, tc := range cases { - tc := tc //capture loop variable + tc := tc // capture loop variable t.Run(tc.testName, func(t *testing.T) { t.Parallel() diff --git a/pkg/internal/apis/config/zz_generated.deepcopy.go b/pkg/internal/apis/config/zz_generated.deepcopy.go index 6c86691fca..1cf067ea43 100644 --- a/pkg/internal/apis/config/zz_generated.deepcopy.go +++ b/pkg/internal/apis/config/zz_generated.deepcopy.go @@ -140,6 +140,11 @@ func (in *Node) DeepCopyInto(out *Node) { *out = make([]PortMapping, len(*in)) copy(*out, *in) } + if in.CDIDevices != nil { + in, out := &in.CDIDevices, &out.CDIDevices + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.KubeadmConfigPatches != nil { in, out := &in.KubeadmConfigPatches, &out.KubeadmConfigPatches *out = make([]string, len(*in)) diff --git a/site/content/docs/user/configuration.md b/site/content/docs/user/configuration.md index 64bb6b5574..aa9676f3ea 100644 --- a/site/content/docs/user/configuration.md +++ b/site/content/docs/user/configuration.md @@ -8,7 +8,7 @@ menu: toc: true description: |- This guide covers how to configure KIND cluster creation. - + We know this is currently a bit lacking and will expand it over time - PRs welcome! --- ## Getting Started @@ -281,10 +281,71 @@ nodes: image: kindest/node:v1.16.4@sha256:b91a2c2317a000f3a783489dfb755064177dbc3a0b2f4147d50f04825d016f55 {{< /codeFromInline >}} -[Reference](https://kind.sigs.k8s.io/docs/user/quick-start/#creating-a-cluster) +[Reference](https://kind.sigs.k8s.io/docs/user/quick-start/#creating-a-cluster) **Note**: Kubernetes versions are expressed as x.y.z, where x is the major version, y is the minor version, and z is the patch version, following [Semantic Versioning](https://semver.org/) terminology. For more information, see [Kubernetes Release Versioning.](https://github.com/kubernetes/sig-release/blob/master/release-engineering/versioning.md#kubernetes-release-versioning) +### GPU Support + +There are two ways to map GPUs in to a KinD cluster. The first is using the `devices` API and the second is using the `extraMounts` API. + +#### Using the Devices API + +As a pre-requisite you install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed on the host. + +Using `devices` for GPU support requires Docker v25 or later. A [CDI specification](https://github.com/container-orchestrated-devices/container-device-interface) will need to be generated for your device. For Nvidia GPU devices see notes [here.](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#container-device-interface-cdi-support) + +GPU devices can be mapped to Kind node copntainers with the devices API: + +All GPUs mapped to a single control-plane: + +{{< codeFromInline lang="yaml" >}} +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane + devices: + - "nvidia.com/gpu=all" +{{< /codeFromInline >}} + +Specific GPUs mapped to specific worker nodes based on index: + +{{< codeFromInline lang="yaml" >}} +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane +- role: worker + devices: + - "nvidia.com/gpu=0" +- role: worker + devices: + - "nvidia.com/gpu=1" +{{< /codeFromInline >}} + +#### Using the Extra Mounts API + +GPUs can also be mapped using the `extraMounts` API. This method passes a list of GPUs to inject as volume mounts rather than the environment variable `NVIDIA_VISIBLE_DEVICES`. + +Steps to enable this: + +1. Add nvidia as your default runtime in `/etc/docker/daemon.json` If you have the [NVIDIA Container Toolkit installed](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) this can be done with: `sudo nvidia-ctk runtime configure --runtime=docker --set-as-default` +1. Restart docker (as necessary) +1. Set `accept-nvidia-visible-devices-as-volume-mounts = true` in `/etc/nvidia-container-runtime/config.toml` +1. Add the `extraMounts` to any kind nodes you want to have access to all GPUs in the system: + +{{< codeFromInline lang="yaml" >}} +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane + extraMounts: + - hostPath: /dev/null + containerPath: /var/run/nvidia-container-devices/all +{{< /codeFromInline >}} + +Note: this method only support adding `all` GPUs to a single node. If you want to add specific GPUs to specific nodes, you will need to use the `devices` API. + ### Extra Mounts Extra mounts can be used to pass through storage on the host to a kind node @@ -300,10 +361,10 @@ For more information see the [Docker file sharing guide.](https://docs.docker.co ### Extra Port Mappings -Extra port mappings can be used to port forward to the kind nodes. This is a -cross-platform option to get traffic into your kind cluster. +Extra port mappings can be used to port forward to the kind nodes. This is a +cross-platform option to get traffic into your kind cluster. -If you are running Docker without the Docker Desktop Application on Linux, you can simply send traffic to the node IPs from the host without extra port mappings. +If you are running Docker without the Docker Desktop Application on Linux, you can simply send traffic to the node IPs from the host without extra port mappings. With the installation of the Docker Desktop Application, whether it is on macOs, Windows or Linux, you'll want to use these. You may also want to see the [Ingress Guide]. @@ -401,11 +462,11 @@ nodes: ### Kubeadm Config Patches -KIND uses [`kubeadm`](/docs/design/principles/#leverage-existing-tooling) +KIND uses [`kubeadm`](/docs/design/principles/#leverage-existing-tooling) to configure cluster nodes. Formally KIND runs `kubeadm init` on the first control-plane node, we can customize the flags by using the kubeadm -[InitConfiguration](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-init/#config-file) +[InitConfiguration](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-init/#config-file) ([spec](https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3#InitConfiguration)) {{< codeFromInline lang="yaml" >}} @@ -436,9 +497,9 @@ nodes: enable-admission-plugins: NodeRestriction,MutatingAdmissionWebhook,ValidatingAdmissionWebhook {{< /codeFromInline >}} -On every additional node configured in the KIND cluster, +On every additional node configured in the KIND cluster, worker or control-plane (in HA mode), -KIND runs `kubeadm join` which can be configured using the +KIND runs `kubeadm join` which can be configured using the [JoinConfiguration](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-join/#config-file) ([spec](https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3#JoinConfiguration))