From 7d240ee2cef928f41070f63a3d802630c8ccae7a Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Tue, 19 Nov 2024 16:10:22 +0000 Subject: [PATCH] Add support for Linux Network Devices Implement support for passing Linux Network Devices to the container network namespace. The network device is passed during the creation of the container, before the process is started. It implements the logic defined in the OCI runtime specification. Signed-off-by: Antonio Ojea --- features.go | 3 + libcontainer/configs/config.go | 3 + libcontainer/configs/netdevices.go | 13 ++ libcontainer/configs/validate/validator.go | 55 ++++++ .../configs/validate/validator_test.go | 171 ++++++++++++++++++ libcontainer/factory_linux.go | 6 + libcontainer/init_linux.go | 18 ++ libcontainer/network_linux.go | 86 +++++++++ libcontainer/specconv/spec_linux.go | 11 ++ libcontainer/specconv/spec_linux_test.go | 108 +++++++++++ libcontainer/state_linux.go | 3 + tests/integration/netdev.bats | 153 ++++++++++++++++ 12 files changed, 630 insertions(+) create mode 100644 libcontainer/configs/netdevices.go create mode 100644 tests/integration/netdev.bats diff --git a/features.go b/features.go index b636466bfe4..c5dff4a2d17 100644 --- a/features.go +++ b/features.go @@ -63,6 +63,9 @@ var featuresCommand = cli.Command{ Enabled: &t, }, }, + NetDevices: &features.NetDevices{ + Enabled: &t, + }, }, PotentiallyUnsafeConfigAnnotations: []string{ "bundle", diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 22fe0f9b4c1..b27a2dc781e 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -115,6 +115,9 @@ type Config struct { // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! Devices []*devices.Device `json:"devices"` + // NetDevices are key-value pairs, keyed by network device name, moved to the container's network namespace. + NetDevices map[string]*LinuxNetDevice `json:"netDevices"` + MountLabel string `json:"mount_label"` // Hostname optionally sets the container's hostname if provided diff --git a/libcontainer/configs/netdevices.go b/libcontainer/configs/netdevices.go new file mode 100644 index 00000000000..da1336a5f4e --- /dev/null +++ b/libcontainer/configs/netdevices.go @@ -0,0 +1,13 @@ +package configs + +// LinuxNetDevice represents a single network device to be added to the container's network namespace +type LinuxNetDevice struct { + // Name of the device in the container namespace + Name string `json:"name,omitempty"` + // Address is the IP address and Prefix in the container namespace in CIDR fornat + Addresses []string `json:"addresses,omitempty"` + // HardwareAddres represents a physical hardware address. + HardwareAddress string `json:"hardwareAddress,omitempty"` + // MTU Maximum Transfer Unit of the network device in the container namespace + MTU uint32 `json:"mtu,omitempty"` +} diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 37ece0aebbd..023e9a51bdd 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -3,6 +3,8 @@ package validate import ( "errors" "fmt" + "net" + "net/netip" "os" "path/filepath" "strings" @@ -24,6 +26,7 @@ func Validate(config *configs.Config) error { cgroupsCheck, rootfs, network, + netdevices, uts, security, namespaces, @@ -70,6 +73,58 @@ func rootfs(config *configs.Config) error { return nil } +// https://elixir.bootlin.com/linux/v6.12/source/net/core/dev.c#L1066 +func devValidName(name string) bool { + if len(name) == 0 || len(name) > unix.IFNAMSIZ { + return false + } + if (name == ".") || (name == "..") { + return false + } + if strings.Contains(name, "/") || strings.Contains(name, ":") || strings.Contains(name, " ") { + return false + } + return true +} + +func netdevices(config *configs.Config) error { + if len(config.NetDevices) == 0 { + return nil + } + if !config.Namespaces.Contains(configs.NEWNET) { + return errors.New("unable to move network devices without a private NET namespace") + } + path := config.Namespaces.PathOf(configs.NEWNET) + if path == "" { + return errors.New("unable to move network devices without a private NET namespace") + } + if config.RootlessEUID || config.RootlessCgroups { + return errors.New("network devices are not supported for rootless containers") + } + + for name, netdev := range config.NetDevices { + if !devValidName(name) { + return fmt.Errorf("invalid network device name %q", name) + } + if netdev.Name != "" { + if !devValidName(netdev.Name) { + return fmt.Errorf("invalid network device name %q", netdev.Name) + } + } + for _, address := range netdev.Addresses { + if _, err := netip.ParsePrefix(address); err != nil { + return fmt.Errorf("invalid network IP address %q", address) + } + } + if netdev.HardwareAddress != "" { + if _, err := net.ParseMAC(netdev.HardwareAddress); err != nil { + return fmt.Errorf("invalid hardware address %q", netdev.HardwareAddress) + } + } + } + return nil +} + func network(config *configs.Config) error { if !config.Namespaces.Contains(configs.NEWNET) { if len(config.Networks) > 0 || len(config.Routes) > 0 { diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index b0b740a122d..575838604b2 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -871,3 +871,174 @@ func TestValidateIOPriority(t *testing.T) { } } } + +func TestValidateNetDevices(t *testing.T) { + testCases := []struct { + name string + isErr bool + config *configs.Config + }{ + { + name: "network device", + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rename", + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "c0", + Addresses: []string{"192.168.2.34/24", "2001:db8::2/64"}, + HardwareAddress: "82:06:8c:49:7a:4a", + MTU: 1500, + }, + }, + }, + }, + { + name: "network device host network", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{}, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rootless", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + RootlessEUID: true, + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rootless", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + RootlessCgroups: true, + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device bad name", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "eth0/", + }, + }, + }, + }, + { + name: "network device wrong ip", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "eth0", + Addresses: []string{"wrongip"}, + }, + }, + }, + }, + { + name: "network device wrong mac", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "eth0", + Addresses: []string{"192.168.1.1/24"}, + HardwareAddress: "wrongmac!", + }, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + config := tc.config + config.Rootfs = "/var" + + err := Validate(config) + if tc.isErr && err == nil { + t.Error("expected error, got nil") + } + + if !tc.isErr && err != nil { + t.Error(err) + } + }) + } +} diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index b13f8bf9bb3..7d712aeddca 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -90,6 +90,12 @@ func Create(root, id string, config *configs.Config) (*Container, error) { if err := os.Mkdir(stateDir, 0o711); err != nil { return nil, err } + + // move the specified devices to the container network namespace + if err := setupNetworkDevices(config); err != nil { + return nil, err + + } c := &Container{ id: id, stateDir: stateDir, diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 1eb0279d9e0..5913da3bac3 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -594,6 +594,24 @@ func fixStdioPermissions(u *user.ExecUser) error { return nil } +// setupNetworkDevices sets up and initializes network device inside the container. +func setupNetworkDevices(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWNET) { + return nil + } + nsPath := config.Namespaces.PathOf(configs.NEWNET) + if nsPath == "" { + return nil + } + for name, netDevice := range config.NetDevices { + err := moveIntoNS(name, nsPath, *netDevice) + if err != nil { + return err + } + } + return nil +} + // setupNetwork sets up and initializes any network interface inside the container. func setupNetwork(config *initConfig) error { for _, config := range config.Networks { diff --git a/libcontainer/network_linux.go b/libcontainer/network_linux.go index 8915548b3bc..02f94ae8a47 100644 --- a/libcontainer/network_linux.go +++ b/libcontainer/network_linux.go @@ -3,13 +3,16 @@ package libcontainer import ( "bytes" "fmt" + "net" "os" "path/filepath" "strconv" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/types" + "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" + "github.com/vishvananda/netns" ) var strategies = map[string]networkStrategy{ @@ -98,3 +101,86 @@ func (l *loopback) attach(n *configs.Network) (err error) { func (l *loopback) detach(n *configs.Network) (err error) { return nil } + +// moveIntoNS takes the network device referenced by name in the current network namespace +// and moves to the network namespace passed as a parameter. It also configure the +// network device inside the new network namespace with the passed parameters. +func moveIntoNS(name string, nsPath string, device configs.LinuxNetDevice) error { + logrus.Debugf("moving network device %s with attrs %#v to network namespace %s", name, device, nsPath) + link, err := netlink.LinkByName(name) + if err != nil { + return fmt.Errorf("link not found for interface %s on runtime namespace: %w", name, err) + } + attrs := netlink.NewLinkAttrs() + attrs.Index = link.Attrs().Index + + attrs.Name = name + if device.Name != "" { + attrs.Name = device.Name + } + + attrs.MTU = link.Attrs().MTU + if device.MTU > 0 { + attrs.MTU = int(device.MTU) + } + + attrs.HardwareAddr = link.Attrs().HardwareAddr + if device.HardwareAddress != "" { + attrs.HardwareAddr, err = net.ParseMAC(device.HardwareAddress) + if err != nil { + return err + } + } + + ns, err := netns.GetFromPath(nsPath) + if err != nil { + return fmt.Errorf("could not get network namespace from path %s : %w", nsPath, err) + } + + attrs.Namespace = netlink.NsFd(ns) + + // set the interface down before we change the address inside the network namespace + err = netlink.LinkSetDown(link) + if err != nil { + return err + } + + dev := &netlink.Device{ + LinkAttrs: attrs, + } + + err = netlink.LinkModify(dev) + if err != nil { + return fmt.Errorf("could not modify network device %s : %w", name, err) + } + + // to avoid golang problem with goroutines we create the socket in the + // namespace and use it directly + nhNs, err := netlink.NewHandleAt(ns) + if err != nil { + return err + } + + nsLink, err := nhNs.LinkByName(dev.Name) + if err != nil { + return fmt.Errorf("link not found for interface %s on namespace %s: %w", dev.Name, nsPath, err) + } + + err = nhNs.LinkSetUp(nsLink) + if err != nil { + return fmt.Errorf("failt to set up interface %s on namespace %s: %w", nsLink.Attrs().Name, nsPath, err) + } + + for _, address := range device.Addresses { + addr, err := netlink.ParseAddr(address) + if err != nil { + return err + } + + err = nhNs.AddrAdd(nsLink, addr) + if err != nil { + return err + } + } + return nil +} diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index e7c6faae347..ab87b3062e7 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -472,6 +472,17 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { } } + for name, netdev := range spec.Linux.NetDevices { + if config.NetDevices == nil { + config.NetDevices = make(map[string]*configs.LinuxNetDevice) + } + config.NetDevices[name] = &configs.LinuxNetDevice{ + Name: netdev.Name, + Addresses: netdev.Addresses, + HardwareAddress: netdev.HardwareAddress, + MTU: netdev.MTU, + } + } } // Set the host UID that should own the container's cgroup. diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index 8c7fb774f97..9ca627f330b 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -2,6 +2,7 @@ package specconv import ( "os" + "reflect" "strings" "testing" @@ -956,3 +957,110 @@ func TestCreateDevices(t *testing.T) { t.Errorf("device /dev/ram0 not found in config devices; got %v", conf.Devices) } } + +func TestCreateNetDevices(t *testing.T) { + testCases := []struct { + name string + netDevices map[string]specs.LinuxNetDevice + }{ + { + name: "no network devices", + }, + { + name: "one network devices", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + }, + }, + { + name: "multiple network devices", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + "eth2": {}, + }, + }, + { + name: "multiple network devices and rename", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + "eth2": { + Name: "ctr_eth2", + }, + }, + }, + { + name: "multiple network devices and addresses", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": { + Addresses: []string{"192.168.1.2/24", "fd00:1:2::9/64"}, + }, + "eth2": { + Name: "ctr_eth2", + }, + }, + }, + { + name: "multiple network devices and hardware address", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": { + Addresses: []string{"192.168.1.2/24", "fd00:1:2::9/64"}, + HardwareAddress: "e2:85:68:80:43:7a ", + }, + "eth2": { + Name: "ctr_eth2", + }, + }, + }, + { + name: "multiple network devices and mtu", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": { + Addresses: []string{"192.168.1.2/24", "fd00:1:2::9/64"}, + HardwareAddress: "e2:85:68:80:43:7a ", + }, + "eth2": { + Name: "ctr_eth2", + MTU: 1725, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + spec := Example() + spec.Linux.NetDevices = tc.netDevices + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + config, err := CreateLibcontainerConfig(opts) + if err != nil { + t.Errorf("Couldn't create libcontainer config: %v", err) + } + if len(config.NetDevices) != len(opts.Spec.Linux.NetDevices) { + t.Fatalf("expected %d network devices and got %d", len(config.NetDevices), len(opts.Spec.Linux.NetDevices)) + } + for name, netdev := range config.NetDevices { + ctrNetDev, ok := config.NetDevices[name] + if !ok { + t.Fatalf("network device %s not found in the configuration", name) + } + if ctrNetDev.Name != netdev.Name { + t.Fatalf("expected %s got %s", ctrNetDev.Name, netdev.Name) + } + if !reflect.DeepEqual(ctrNetDev.Addresses, netdev.Addresses) { + t.Fatalf("expected %v got %v", ctrNetDev.Addresses, netdev.Addresses) + } + if ctrNetDev.HardwareAddress != netdev.HardwareAddress { + t.Fatalf("expected %s got %s", ctrNetDev.HardwareAddress, netdev.HardwareAddress) + } + if ctrNetDev.MTU != netdev.MTU { + t.Fatalf("expected %d got %d", ctrNetDev.MTU, netdev.MTU) + } + } + }) + } + +} diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go index ad96f0801ea..34ca21a50b7 100644 --- a/libcontainer/state_linux.go +++ b/libcontainer/state_linux.go @@ -47,6 +47,9 @@ func destroy(c *Container) error { // Likely to fail when c.config.RootlessCgroups is true _ = signalAllProcesses(c.cgroupManager, unix.SIGKILL) } + // shutdown all the additional network devices to avoid network conflicts + // and let the kernel deal with the interface cleanup + if err := c.cgroupManager.Destroy(); err != nil { return fmt.Errorf("unable to remove container's cgroup: %w", err) } diff --git a/tests/integration/netdev.bats b/tests/integration/netdev.bats new file mode 100644 index 00000000000..6ec229886d1 --- /dev/null +++ b/tests/integration/netdev.bats @@ -0,0 +1,153 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + setup_busybox +} + +function teardown() { + teardown_bundle +} + +@test "move network device to container network namespace" { + # create a dummy interface to move to the container + ip link add dummy0 type dummy + ip link set up dev dummy0 + ip addr add 169.254.169.13/32 dev dummy0 + + update_config ' .linux.netDevices |= {"dummy0": {} } + | .process.args |= ["ip", "address", "show", "dev", "dummy0"]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + # shellcheck disable=SC2012 + ns_inode=$(ls -iL "$ns_path" | awk '{ print $1 }') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + + ip netns del "$ns_name" +} + +@test "move network device to container network namespace and rename" { + # create a dummy interface to move to the container + ip link add dummy1 type dummy + ip link set up dev dummy1 + ip addr add 169.254.169.14/32 dev dummy1 + + update_config ' .linux.netDevices |= { "dummy1": { "name" : "ctr_dummy1" } } + | .process.args |= ["ip", "address", "show", "dev", "ctr_dummy1"]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + # shellcheck disable=SC2012 + ns_inode=$(ls -iL "$ns_path" | awk '{ print $1 }') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + + ip netns del "$ns_name" +} + +@test "move network device to container network namespace and change ipv4 address" { + # create a dummy interface to move to the container + ip link add dummy1 type dummy + ip link set up dev dummy1 + ip addr add 169.254.169.14/32 dev dummy1 + + update_config ' .linux.netDevices |= { "dummy1": { "name" : "ctr_dummy1" , "addresses" : [ "10.0.0.2/24" ]} } + | .process.args |= ["ip", "address", "show", "dev", "ctr_dummy1" ]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + # shellcheck disable=SC2012 + ns_inode=$(ls -iL "$ns_path" | awk '{ print $1 }') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + [[ "$output" == *"10.0.0.2/24"* ]] + + ip netns del "$ns_name" +} + +@test "move network device to container network namespace and change ipv6 address" { + # create a dummy interface to move to the container + ip link add dummy1 type dummy + ip link set up dev dummy1 + ip addr add 169.254.169.14/32 dev dummy1 + + update_config ' .linux.netDevices |= { "dummy1": { "name" : "ctr_dummy1" , "addresses" : [ "10.0.0.2/24" , "2001:db8::2/64" ]} } + | .process.args |= ["ip", "address", "show", "dev", "ctr_dummy1" ]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + # shellcheck disable=SC2012 + ns_inode=$(ls -iL "$ns_path" | awk '{ print $1 }') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + [[ "$output" == *"2001:db8::2/64"* ]] + + ip netns del "$ns_name" +} + +@test "network device on root namespace fails" { + # create a dummy interface to move to the container + ip link add dummy2 type dummy + ip link set up dev dummy2 + ip addr add 169.254.169.13/32 dev dummy2 + + update_config ' .linux.netDevices |= {"dummy2": {} }' + runc run test_busybox + [ "$status" -ne 0 ] + [[ "$output" == *"unable to move network devices without a private NET namespace"* ]] + ip link del dev dummy2 +} + +@test "network device bad address fails" { + # create a dummy interface to move to the container + ip link add dummy2 type dummy + ip link set up dev dummy2 + ip addr add 169.254.169.13/32 dev dummy2 + + update_config '(.. | select(.type? == "network")) .path |= "'fake_net_ns'"' + update_config ' .linux.netDevices |= { "dummy2": { "name" : "ctr_dummy2" , "addresses" : [ "wrong_ip" ]} }' + + runc run test_busybox + [ "$status" -ne 0 ] + [[ "$output" == *"invalid network IP address"* ]] + ip link del dev dummy2 +}