diff --git a/clr-k8s-examples/9-multi-network/Dockerfile b/clr-k8s-examples/9-multi-network/Dockerfile index 3247acaf..6cc7a3c5 100644 --- a/clr-k8s-examples/9-multi-network/Dockerfile +++ b/clr-k8s-examples/9-multi-network/Dockerfile @@ -16,10 +16,18 @@ RUN git clone -q https://github.com/intel/sriov-network-device-plugin.git /go/sr WORKDIR /go/src/github.com/intel/sriov-network-device-plugin RUN make +# Build vfioveth plugin +FROM busybox as vfioveth +RUN wget -O /bin/jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 +COPY cni/vfioveth /bin/vfioveth +RUN chmod +x /bin/vfioveth /bin/jq + # Final image FROM centos/systemd WORKDIR /tmp/cni/bin COPY --from=multus /go/src/github.com/intel/multus-cni/bin/multus . COPY --from=sriov-cni /go/src/github.com/intel-corp/sriov-cni/bin/sriov . +COPY --from=vfioveth /bin/vfioveth . +COPY --from=vfioveth /bin/jq . WORKDIR /usr/bin COPY --from=sriov-dp /go/src/github.com/intel/sriov-network-device-plugin/build/sriovdp . diff --git a/clr-k8s-examples/9-multi-network/README.md b/clr-k8s-examples/9-multi-network/README.md index 909e8368..aaf97c74 100644 --- a/clr-k8s-examples/9-multi-network/README.md +++ b/clr-k8s-examples/9-multi-network/README.md @@ -9,21 +9,20 @@ directories on the host with the necessary binaries and configuration files. ### Customization -The device plugin will register the SR-IOV enabled devices on the host, specified -as `rootDevices` in [sriov-conf.yaml](clr-k8s-examples/9-multi-network/sriov-conf.yaml). -Helper [systemd](clr-k8s-examples/9-multi-network/systemd/sriov.service) example config -is provided, which enables SR-IOV for the above `rootDevices` +The device plugin will register the SR-IOV enabled devices on the host, specified as +`rootDevices` in [sriov-conf.yaml](sriov-conf.yaml). Helper [systemd unit](systemd/sriov.service) +file is provided, which enables SR-IOV for the above `rootDevices` > NOTE: This assumes homogenous nodes in the cluster ### Install -To install and configure `multus-cni` on all nodes, along with `sriov-cni` and -`sriov-network-device-plugin` : +To install and configure `multus-cni` on all nodes, along with +`sriov-cni`, `vfioveth-cni` and `sriov-network-device-plugin` ```bash kubectl apply -f . -kubectl get nodes -o json | jq '.items[].status.allocatable' # should list "intel.com/sriov" +kubectl get nodes -o json | jq '.items[].status.allocatable' # should list "intel.com/sriov_*" ``` ## Tests @@ -34,7 +33,7 @@ To test if default connectivity is working ```bash kubectl apply -f test/pod.yaml -kubectl exec test -- ip a # should see one interface only +kubectl exec test -- ip a # should see one interface only ``` ### Bridge @@ -43,8 +42,8 @@ To test multus with second interface created by `bridge` plugin ```bash kubectl apply -f test/bridge -kubectl exec test-bridge -- ip a # should see two interfaces -ip a show mynet # bridge created if it doesnt exist already +kubectl exec test-bridge -- ip a # should see two interfaces +ip a show mynet # bridge created on host if it doesnt exist already ``` ### SR-IOV @@ -53,5 +52,9 @@ To test multus with second interface created by `sriov` plugin ```bash kubectl apply -f test/sriov -kubectl exec test-sriov -- ip a # second interface is a VF + +kubectl exec test-sriov -- ip a # second interface is a VF + +kubectl exec test-sriov-dpdk -- ip a # veth pair with details of VF +kubectl exec test-sriov-dpdk -- ls -l /dev/vfio ``` diff --git a/clr-k8s-examples/9-multi-network/cni/vfioveth b/clr-k8s-examples/9-multi-network/cni/vfioveth new file mode 100755 index 00000000..d9b6876b --- /dev/null +++ b/clr-k8s-examples/9-multi-network/cni/vfioveth @@ -0,0 +1,67 @@ +#!/bin/bash -x + +set -o errexit +set -o pipefail +set -o nounset + +exec 3>&1 +exec &>>/var/log/$(basename $0).log + +PATH="$CNI_PATH:$(dirname "${BASH_SOURCE[0]}"):$PATH" +CNI_CONF=$(cat /dev/stdin) + +get_peer_name() { + echo "$1-vdev" +} + +get_mac_with_vfpci() { + local pf=$(readlink /sys/devices/pci*/*/$1/physfn | awk '{print substr($1,4)}') + local pfName=$(ls /sys/devices/pci*/*/$pf/net/ | head -1) + local idx=$(ls -l /sys/devices/pci*/*/$pf | awk -v vf=$1 'substr($11,4)==vf {print substr($9,7)}') + local mac=$(ip link show dev $pfName | awk -v idx="$idx" '$1=="vf" && $2==idx {print substr($4,1,17)}') + echo $mac +} + +ipam() { + local plugin=$(echo $CNI_CONF | jq -r '.ipam.type') + local res=$(echo $"$CNI_CONF" | "$plugin" | jq -c '.') + echo $res +} + +add_pair_ns() { + vfpci=$(echo $CNI_CONF | jq -r '.deviceID') + mac=$(get_mac_with_vfpci $vfpci) + peer=$(get_peer_name $CNI_IFNAME) + ip=$1 + + mkdir -p /var/run/netns/ + ln -sfT $CNI_NETNS /var/run/netns/$CNI_CONTAINERID + + ip netns exec $CNI_CONTAINERID ip link add $CNI_IFNAME type veth peer name $peer + ip netns exec $CNI_CONTAINERID ip link set $CNI_IFNAME addr $mac up + ip netns exec $CNI_CONTAINERID ip link set $peer up + ip netns exec $CNI_CONTAINERID ip addr add $ip dev $CNI_IFNAME +} + +delete_pair_ns() { + ip netns exec $CNI_CONTAINERID ip link del $CNI_IFNAME +} + +case $CNI_COMMAND in +ADD) + res=$(ipam) + ip=$(echo $res | jq -r '.ip4.ip') + add_pair_ns $ip + echo '{"cniVersion":"0.2.0"}' | jq -c --arg ip $ip '.ip4.ip = $ip' >&3 + ;; +DEL) + set +o errexit + ipam + delete_pair_ns + set -o errexit + ;; +*) + echo "CNI_COMMAND=[ADD|DEL] only supported" + exit 1 + ;; +esac diff --git a/clr-k8s-examples/9-multi-network/multus-sriov-ds.yaml b/clr-k8s-examples/9-multi-network/multus-sriov-ds.yaml index b4fdd933..b6c1a77f 100644 --- a/clr-k8s-examples/9-multi-network/multus-sriov-ds.yaml +++ b/clr-k8s-examples/9-multi-network/multus-sriov-ds.yaml @@ -96,6 +96,8 @@ data: { "name": "multus-cni-network", "type": "multus", + "logFile": "/var/log/multus.log", + "logLevel": "debug", "kubeconfig": "/etc/cni/net.d/multus-kubeconfig", "delegates": [ $MASTER_PLUGIN_JSON @@ -132,10 +134,10 @@ spec: spec: initContainers: - name: multus - image: ngick8stesting/aio-cni:k8s-1.13 + image: krsna1729/multus-sriov:k8s-1.13 command: [ "bash", "-c" ] args: - - cp /tmp/cni/bin/{multus,sriov} /host/opt/cni/bin/; + - cp /tmp/cni/bin/{multus,sriov,vfioveth,jq} /host/opt/cni/bin/; /tmp/multus/install-multus-conf.sh; /tmp/multus/install-certs.sh; systemctl stop kubelet; @@ -160,7 +162,7 @@ spec: mountPath: /run/systemd containers: - name: sriovdp - image: ngick8stesting/aio-cni:k8s-1.13 + image: krsna1729/multus-sriov:k8s-1.13 command: [ "sh", "-c" ] args: - /usr/bin/sriovdp --logtostderr -v 10; diff --git a/clr-k8s-examples/9-multi-network/sriov-conf.yaml b/clr-k8s-examples/9-multi-network/sriov-conf.yaml index 68b853d1..01b12aa3 100644 --- a/clr-k8s-examples/9-multi-network/sriov-conf.yaml +++ b/clr-k8s-examples/9-multi-network/sriov-conf.yaml @@ -10,10 +10,16 @@ data: "resourceList": [ { - "resourceName": "sriov", - "rootDevices": ["07:00.0", "07:00.1"], + "resourceName": "sriov_netdevice", + "rootDevices": ["07:00.0"], "sriovMode": true, "deviceType": "netdevice" + }, + { + "resourceName": "sriov_vfio", + "rootDevices": ["07:00.1"], + "sriovMode": true, + "deviceType": "vfio" } ] } diff --git a/clr-k8s-examples/9-multi-network/systemd/sriov.service b/clr-k8s-examples/9-multi-network/systemd/sriov.service index 97ad684e..b775d1e5 100644 --- a/clr-k8s-examples/9-multi-network/systemd/sriov.service +++ b/clr-k8s-examples/9-multi-network/systemd/sriov.service @@ -1,9 +1,10 @@ [Unit] -Description=Create VFs on ens785f0 ens785f1 interfaces +Description=Create VFs on ens785f0 (netdev) ens785f1 (vfio) interfaces [Service] Type=oneshot -ExecStart=/usr/bin/sriov.sh ens785f0 ens785f1 +ExecStart=/usr/bin/sriov.sh ens785f0 +ExecStart=/usr/bin/sriov.sh -b ens785f1 [Install] WantedBy=default.target diff --git a/clr-k8s-examples/9-multi-network/systemd/sriov.sh b/clr-k8s-examples/9-multi-network/systemd/sriov.sh index 708747d0..edc2f48e 100755 --- a/clr-k8s-examples/9-multi-network/systemd/sriov.sh +++ b/clr-k8s-examples/9-multi-network/systemd/sriov.sh @@ -1,14 +1,58 @@ #!/bin/bash -# Usage: sriov.sh ens785f0 ens785f1 ... -for pf in "$@"; do - echo "Resetting $pf" - echo 0 | tee /sys/class/net/$pf/device/sriov_numvfs +set -o errexit +set -o pipefail +set -o nounset + +OPTIND=1 +bind="false" + +while getopts ":b" opt; do + case ${opt} in + b) + bind="true" + ;; + \?) + echo "Usage: sriov.sh [-b] ens785f0 ens785f1 ..." + echo "-b Bind to vfio-pci" + exit + ;; + esac +done +shift $((OPTIND - 1)) - NUM_VFS=$(cat /sys/class/net/$pf/device/sriov_totalvfs) - echo "Enabling $NUM_VFS for $pf" +setup_pf() { + local pf=$1 + echo "Resetting PF $pf" + echo 0 | tee /sys/class/net/$pf/device/sriov_numvfs + local NUM_VFS=$(cat /sys/class/net/$pf/device/sriov_totalvfs) + echo "Enabling $NUM_VFS VFs for $pf" echo $NUM_VFS | tee /sys/class/net/$pf/device/sriov_numvfs ip link set $pf up - #for ((i = 0 ; i < ${NUM_VFS} ; i++ )); do ip link set $pf vf $i spoofchk off; done - for ((i = 0; i < ${NUM_VFS}; i++)); do ip link set dev $pf vf $i state enable; done + sleep 1 +} + +setup_vfs() { + local pf=$1 + local pfpci=$(readlink /sys/devices/pci*/*/*/net/$pf/device | awk '{print substr($1,10)}') + local NUM_VFS=$(cat /sys/class/net/$pf/device/sriov_numvfs) + for ((idx = 0; idx < NUM_VFS; idx++)); do + ip link set dev $pf vf $idx state enable + if [ $bind != "true" ]; then continue; fi + + local vfn="virtfn$idx" + local vfpci=$(ls -l /sys/devices/pci*/*/$pfpci | awk -v vfn=$vfn 'vfn==$9 {print substr($11,4)}') + # Capture and set MAC of the VF before unbinding from linux, for later use in CNI + local mac=$(cat /sys/bus/pci*/*/$vfpci/net/*/address) + ip link set dev $pf vf $idx mac $mac + # Bind VF to vfio-pci + echo $vfpci >/sys/bus/pci*/*/$vfpci/driver/unbind + echo "vfio-pci" >/sys/devices/pci*/*/$vfpci/driver_override + echo $vfpci >/sys/bus/pci/drivers/vfio-pci/bind + done +} + +for pf in "$@"; do + setup_pf $pf + setup_vfs $pf done diff --git a/clr-k8s-examples/9-multi-network/test/sriov/0-sriov-net.yaml b/clr-k8s-examples/9-multi-network/test/sriov/0-sriov-net.yaml index 0d4e8d2a..244d7add 100644 --- a/clr-k8s-examples/9-multi-network/test/sriov/0-sriov-net.yaml +++ b/clr-k8s-examples/9-multi-network/test/sriov/0-sriov-net.yaml @@ -4,7 +4,7 @@ kind: NetworkAttachmentDefinition metadata: name: sriov-net annotations: - k8s.v1.cni.cncf.io/resourceName: intel.com/sriov + k8s.v1.cni.cncf.io/resourceName: intel.com/sriov_netdevice spec: config: '{ "type": "sriov", @@ -17,4 +17,23 @@ spec: "gateway": "198.19.0.1" } }' +--- +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: sriov-net-dpdk + annotations: + k8s.v1.cni.cncf.io/resourceName: intel.com/sriov_vfio +spec: + config: '{ + "type": "vfioveth", + "name": "sriov-net", + "ipam": { + "type": "host-local", + "subnet": "198.19.0.0/24", + "rangeStart": "198.19.0.100", + "rangeEnd": "198.19.0.200", + "gateway": "198.19.0.1" + } +}' diff --git a/clr-k8s-examples/9-multi-network/test/sriov/1-pod-sriov.yaml b/clr-k8s-examples/9-multi-network/test/sriov/1-pod-sriov.yaml index cd8cd4bb..a4404b1b 100644 --- a/clr-k8s-examples/9-multi-network/test/sriov/1-pod-sriov.yaml +++ b/clr-k8s-examples/9-multi-network/test/sriov/1-pod-sriov.yaml @@ -1,3 +1,4 @@ +--- apiVersion: v1 kind: Pod metadata: @@ -11,4 +12,20 @@ spec: command: [ "top" ] resources: limits: - intel.com/sriov: '1' + intel.com/sriov_netdevice: '1' +--- +apiVersion: v1 +kind: Pod +metadata: + name: test-sriov-dpdk + annotations: + k8s.v1.cni.cncf.io/networks: sriov-net-dpdk +spec: + containers: + - name: busy + image: busybox + command: [ "top" ] + resources: + limits: + intel.com/sriov_vfio: '1' +