Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable sriov-network-device-plugin vfio mode #37

Merged
merged 4 commits into from
Feb 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clr-k8s-examples/9-multi-network/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,18 @@ RUN git clone -q https://github.com/intel/sriov-network-device-plugin.git /go/sr
WORKDIR /go/src/github.com/intel/sriov-network-device-plugin
RUN make

# Build vfioveth plugin
FROM busybox as vfioveth
RUN wget -O /bin/jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64
COPY cni/vfioveth /bin/vfioveth
RUN chmod +x /bin/vfioveth /bin/jq

# Final image
FROM centos/systemd
WORKDIR /tmp/cni/bin
COPY --from=multus /go/src/github.com/intel/multus-cni/bin/multus .
COPY --from=sriov-cni /go/src/github.com/intel-corp/sriov-cni/bin/sriov .
COPY --from=vfioveth /bin/vfioveth .
COPY --from=vfioveth /bin/jq .
WORKDIR /usr/bin
COPY --from=sriov-dp /go/src/github.com/intel/sriov-network-device-plugin/build/sriovdp .
25 changes: 14 additions & 11 deletions clr-k8s-examples/9-multi-network/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,20 @@ directories on the host with the necessary binaries and configuration files.

### Customization

The device plugin will register the SR-IOV enabled devices on the host, specified
as `rootDevices` in [sriov-conf.yaml](clr-k8s-examples/9-multi-network/sriov-conf.yaml).
Helper [systemd](clr-k8s-examples/9-multi-network/systemd/sriov.service) example config
is provided, which enables SR-IOV for the above `rootDevices`
The device plugin will register the SR-IOV enabled devices on the host, specified as
`rootDevices` in [sriov-conf.yaml](sriov-conf.yaml). Helper [systemd unit](systemd/sriov.service)
file is provided, which enables SR-IOV for the above `rootDevices`

> NOTE: This assumes homogenous nodes in the cluster

### Install

To install and configure `multus-cni` on all nodes, along with `sriov-cni` and
`sriov-network-device-plugin` :
To install and configure `multus-cni` on all nodes, along with
`sriov-cni`, `vfioveth-cni` and `sriov-network-device-plugin`

```bash
kubectl apply -f .
kubectl get nodes -o json | jq '.items[].status.allocatable' # should list "intel.com/sriov"
kubectl get nodes -o json | jq '.items[].status.allocatable' # should list "intel.com/sriov_*"
```

## Tests
Expand All @@ -34,7 +33,7 @@ To test if default connectivity is working

```bash
kubectl apply -f test/pod.yaml
kubectl exec test -- ip a # should see one interface only
kubectl exec test -- ip a # should see one interface only
```

### Bridge
Expand All @@ -43,8 +42,8 @@ To test multus with second interface created by `bridge` plugin

```bash
kubectl apply -f test/bridge
kubectl exec test-bridge -- ip a # should see two interfaces
ip a show mynet # bridge created if it doesnt exist already
kubectl exec test-bridge -- ip a # should see two interfaces
ip a show mynet # bridge created on host if it doesnt exist already
```

### SR-IOV
Expand All @@ -53,5 +52,9 @@ To test multus with second interface created by `sriov` plugin

```bash
kubectl apply -f test/sriov
kubectl exec test-sriov -- ip a # second interface is a VF

kubectl exec test-sriov -- ip a # second interface is a VF

kubectl exec test-sriov-dpdk -- ip a # veth pair with details of VF
kubectl exec test-sriov-dpdk -- ls -l /dev/vfio
```
67 changes: 67 additions & 0 deletions clr-k8s-examples/9-multi-network/cni/vfioveth
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash -x

set -o errexit
set -o pipefail
set -o nounset

exec 3>&1
exec &>>/var/log/$(basename $0).log

PATH="$CNI_PATH:$(dirname "${BASH_SOURCE[0]}"):$PATH"
CNI_CONF=$(cat /dev/stdin)

get_peer_name() {
echo "$1-vdev"
}

get_mac_with_vfpci() {
local pf=$(readlink /sys/devices/pci*/*/$1/physfn | awk '{print substr($1,4)}')
local pfName=$(ls /sys/devices/pci*/*/$pf/net/ | head -1)
local idx=$(ls -l /sys/devices/pci*/*/$pf | awk -v vf=$1 'substr($11,4)==vf {print substr($9,7)}')
local mac=$(ip link show dev $pfName | awk -v idx="$idx" '$1=="vf" && $2==idx {print substr($4,1,17)}')
echo $mac
}

ipam() {
local plugin=$(echo $CNI_CONF | jq -r '.ipam.type')
local res=$(echo $"$CNI_CONF" | "$plugin" | jq -c '.')
echo $res
}

add_pair_ns() {
vfpci=$(echo $CNI_CONF | jq -r '.deviceID')
mac=$(get_mac_with_vfpci $vfpci)
peer=$(get_peer_name $CNI_IFNAME)
ip=$1

mkdir -p /var/run/netns/
ln -sfT $CNI_NETNS /var/run/netns/$CNI_CONTAINERID

ip netns exec $CNI_CONTAINERID ip link add $CNI_IFNAME type veth peer name $peer
ip netns exec $CNI_CONTAINERID ip link set $CNI_IFNAME addr $mac up
ip netns exec $CNI_CONTAINERID ip link set $peer up
ip netns exec $CNI_CONTAINERID ip addr add $ip dev $CNI_IFNAME
}

delete_pair_ns() {
ip netns exec $CNI_CONTAINERID ip link del $CNI_IFNAME
}

case $CNI_COMMAND in
ADD)
res=$(ipam)
ip=$(echo $res | jq -r '.ip4.ip')
add_pair_ns $ip
echo '{"cniVersion":"0.2.0"}' | jq -c --arg ip $ip '.ip4.ip = $ip' >&3
;;
DEL)
set +o errexit
ipam
delete_pair_ns
set -o errexit
;;
*)
echo "CNI_COMMAND=[ADD|DEL] only supported"
exit 1
;;
esac
8 changes: 5 additions & 3 deletions clr-k8s-examples/9-multi-network/multus-sriov-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ data:
{
"name": "multus-cni-network",
"type": "multus",
"logFile": "/var/log/multus.log",
"logLevel": "debug",
"kubeconfig": "/etc/cni/net.d/multus-kubeconfig",
"delegates": [
$MASTER_PLUGIN_JSON
Expand Down Expand Up @@ -132,10 +134,10 @@ spec:
spec:
initContainers:
- name: multus
image: ngick8stesting/aio-cni:k8s-1.13
image: krsna1729/multus-sriov:k8s-1.13
command: [ "bash", "-c" ]
args:
- cp /tmp/cni/bin/{multus,sriov} /host/opt/cni/bin/;
- cp /tmp/cni/bin/{multus,sriov,vfioveth,jq} /host/opt/cni/bin/;
/tmp/multus/install-multus-conf.sh;
/tmp/multus/install-certs.sh;
systemctl stop kubelet;
Expand All @@ -160,7 +162,7 @@ spec:
mountPath: /run/systemd
containers:
- name: sriovdp
image: ngick8stesting/aio-cni:k8s-1.13
image: krsna1729/multus-sriov:k8s-1.13
command: [ "sh", "-c" ]
args:
- /usr/bin/sriovdp --logtostderr -v 10;
Expand Down
10 changes: 8 additions & 2 deletions clr-k8s-examples/9-multi-network/sriov-conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,16 @@ data:
"resourceList":
[
{
"resourceName": "sriov",
"rootDevices": ["07:00.0", "07:00.1"],
"resourceName": "sriov_netdevice",
"rootDevices": ["07:00.0"],
"sriovMode": true,
"deviceType": "netdevice"
},
{
"resourceName": "sriov_vfio",
"rootDevices": ["07:00.1"],
"sriovMode": true,
"deviceType": "vfio"
}
]
}
5 changes: 3 additions & 2 deletions clr-k8s-examples/9-multi-network/systemd/sriov.service
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
[Unit]
Description=Create VFs on ens785f0 ens785f1 interfaces
Description=Create VFs on ens785f0 (netdev) ens785f1 (vfio) interfaces

[Service]
Type=oneshot
ExecStart=/usr/bin/sriov.sh ens785f0 ens785f1
ExecStart=/usr/bin/sriov.sh ens785f0
ExecStart=/usr/bin/sriov.sh -b ens785f1

[Install]
WantedBy=default.target
60 changes: 52 additions & 8 deletions clr-k8s-examples/9-multi-network/systemd/sriov.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,58 @@
#!/bin/bash
# Usage: sriov.sh ens785f0 ens785f1 ...

for pf in "$@"; do
echo "Resetting $pf"
echo 0 | tee /sys/class/net/$pf/device/sriov_numvfs
set -o errexit
set -o pipefail
set -o nounset

OPTIND=1
bind="false"

while getopts ":b" opt; do
case ${opt} in
b)
bind="true"
;;
\?)
echo "Usage: sriov.sh [-b] ens785f0 ens785f1 ..."
echo "-b Bind to vfio-pci"
exit
;;
esac
done
shift $((OPTIND - 1))

NUM_VFS=$(cat /sys/class/net/$pf/device/sriov_totalvfs)
echo "Enabling $NUM_VFS for $pf"
setup_pf() {
local pf=$1
echo "Resetting PF $pf"
echo 0 | tee /sys/class/net/$pf/device/sriov_numvfs
local NUM_VFS=$(cat /sys/class/net/$pf/device/sriov_totalvfs)
echo "Enabling $NUM_VFS VFs for $pf"
echo $NUM_VFS | tee /sys/class/net/$pf/device/sriov_numvfs
ip link set $pf up
#for ((i = 0 ; i < ${NUM_VFS} ; i++ )); do ip link set $pf vf $i spoofchk off; done
for ((i = 0; i < ${NUM_VFS}; i++)); do ip link set dev $pf vf $i state enable; done
sleep 1
}

setup_vfs() {
local pf=$1
local pfpci=$(readlink /sys/devices/pci*/*/*/net/$pf/device | awk '{print substr($1,10)}')
local NUM_VFS=$(cat /sys/class/net/$pf/device/sriov_numvfs)
for ((idx = 0; idx < NUM_VFS; idx++)); do
ip link set dev $pf vf $idx state enable
if [ $bind != "true" ]; then continue; fi

local vfn="virtfn$idx"
local vfpci=$(ls -l /sys/devices/pci*/*/$pfpci | awk -v vfn=$vfn 'vfn==$9 {print substr($11,4)}')
# Capture and set MAC of the VF before unbinding from linux, for later use in CNI
local mac=$(cat /sys/bus/pci*/*/$vfpci/net/*/address)
ip link set dev $pf vf $idx mac $mac
# Bind VF to vfio-pci
echo $vfpci >/sys/bus/pci*/*/$vfpci/driver/unbind
echo "vfio-pci" >/sys/devices/pci*/*/$vfpci/driver_override
echo $vfpci >/sys/bus/pci/drivers/vfio-pci/bind
done
}

for pf in "$@"; do
setup_pf $pf
setup_vfs $pf
done
21 changes: 20 additions & 1 deletion clr-k8s-examples/9-multi-network/test/sriov/0-sriov-net.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ kind: NetworkAttachmentDefinition
metadata:
name: sriov-net
annotations:
k8s.v1.cni.cncf.io/resourceName: intel.com/sriov
k8s.v1.cni.cncf.io/resourceName: intel.com/sriov_netdevice
spec:
config: '{
"type": "sriov",
Expand All @@ -17,4 +17,23 @@ spec:
"gateway": "198.19.0.1"
}
}'
---
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: sriov-net-dpdk
annotations:
k8s.v1.cni.cncf.io/resourceName: intel.com/sriov_vfio
spec:
config: '{
"type": "vfioveth",
"name": "sriov-net",
"ipam": {
"type": "host-local",
"subnet": "198.19.0.0/24",
"rangeStart": "198.19.0.100",
"rangeEnd": "198.19.0.200",
"gateway": "198.19.0.1"
}
}'

19 changes: 18 additions & 1 deletion clr-k8s-examples/9-multi-network/test/sriov/1-pod-sriov.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
apiVersion: v1
kind: Pod
metadata:
Expand All @@ -11,4 +12,20 @@ spec:
command: [ "top" ]
resources:
limits:
intel.com/sriov: '1'
intel.com/sriov_netdevice: '1'
---
apiVersion: v1
kind: Pod
metadata:
name: test-sriov-dpdk
annotations:
k8s.v1.cni.cncf.io/networks: sriov-net-dpdk
spec:
containers:
- name: busy
image: busybox
command: [ "top" ]
resources:
limits:
intel.com/sriov_vfio: '1'