From 2f5bbbe16b5d248610f6ef75835cab81a9cb9dda Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Sun, 28 Apr 2024 17:01:59 +0100 Subject: [PATCH] Option to avoid deleting the kernel_ll address from bridges. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If env var DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1, don't assign fe80::1/64 to a bridge, and don't delete any link local address with prefix fe80::/64. Signed-off-by: Rob Murray (cherry picked from commit 57ada4b8481f8a5a138b9ed312135b148eaf6d09) Signed-off-by: Paweł Gronowski --- integration/networking/bridge_test.go | 63 ++++++++++++------- libnetwork/drivers/bridge/interface_linux.go | 31 ++++++--- libnetwork/drivers/bridge/setup_ipv6_linux.go | 4 ++ 3 files changed, 64 insertions(+), 34 deletions(-) diff --git a/integration/networking/bridge_test.go b/integration/networking/bridge_test.go index d0726af541e52..918866908964d 100644 --- a/integration/networking/bridge_test.go +++ b/integration/networking/bridge_test.go @@ -469,7 +469,6 @@ func TestDefaultBridgeAddresses(t *testing.T) { skip.If(t, testEnv.DaemonInfo.OSType == "windows") ctx := setupTest(t) - d := daemon.New(t) type testStep struct { stepName string @@ -487,13 +486,13 @@ func TestDefaultBridgeAddresses(t *testing.T) { { stepName: "Set up initial UL prefix", fixedCIDRV6: "fd1c:f1a0:5d8d:aaaa::/64", - expAddrs: []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::1/64"}, + expAddrs: []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::"}, }, { // Modify that prefix, the default bridge's address must be deleted and re-added. stepName: "Modify UL prefix - address change", fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/64", - expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"}, + expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::"}, }, { // Modify the prefix length, the default bridge's address should not change. @@ -501,7 +500,7 @@ func TestDefaultBridgeAddresses(t *testing.T) { fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/80", // The prefix length displayed by 'ip a' is not updated - it's informational, and // can't be changed without unnecessarily deleting and re-adding the address. - expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"}, + expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::"}, }, }, }, @@ -511,14 +510,14 @@ func TestDefaultBridgeAddresses(t *testing.T) { { stepName: "Standard LL subnet prefix", fixedCIDRV6: "fe80::/64", - expAddrs: []string{"fe80::1/64"}, + expAddrs: []string{"fe80::"}, }, { // Modify that prefix, the default bridge's address must be deleted and re-added. // The bridge must still have an address in the required (standard) LL subnet. stepName: "Nonstandard LL prefix - address change", fixedCIDRV6: "fe80:1234::/32", - expAddrs: []string{"fe80:1234::1/32", "fe80::1/64"}, + expAddrs: []string{"fe80:1234::1/32", "fe80::"}, }, { // Modify the prefix length, the addresses should not change. @@ -526,32 +525,48 @@ func TestDefaultBridgeAddresses(t *testing.T) { fixedCIDRV6: "fe80:1234::/64", // The prefix length displayed by 'ip a' is not updated - it's informational, and // can't be changed without unnecessarily deleting and re-adding the address. - expAddrs: []string{"fe80:1234::1/", "fe80::1/64"}, + expAddrs: []string{"fe80:1234::1/", "fe80::"}, }, }, }, } - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { + for _, preserveKernelLL := range []bool{false, true} { + var dopts []daemon.Option + if preserveKernelLL { + dopts = append(dopts, daemon.WithEnvVars("DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1")) + } + d := daemon.New(t, dopts...) + c := d.NewClientT(t) + + for _, tc := range testcases { for _, step := range tc.steps { - // Check that the daemon starts - regression test for: - // https://github.com/moby/moby/issues/46829 - d.Start(t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6) - d.Stop(t) - - // Check that the expected addresses have been applied to the bridge. (Skip in - // rootless mode, because the bridge is in a different network namespace.) - if !testEnv.IsRootless() { - res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0") - assert.Equal(t, res.ExitCode, 0, step.stepName) - stdout := res.Stdout() - for _, expAddr := range step.expAddrs { - assert.Check(t, is.Contains(stdout, expAddr)) + tcName := fmt.Sprintf("kernel_ll_%v/%s/%s", preserveKernelLL, tc.name, step.stepName) + t.Run(tcName, func(t *testing.T) { + ctx := testutil.StartSpan(ctx, t) + // Check that the daemon starts - regression test for: + // https://github.com/moby/moby/issues/46829 + d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6) + + // Start a container, so that the bridge is set "up" and gets a kernel_ll address. + cID := container.Run(ctx, t, c) + defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true}) + + d.Stop(t) + + // Check that the expected addresses have been applied to the bridge. (Skip in + // rootless mode, because the bridge is in a different network namespace.) + if !testEnv.IsRootless() { + res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0") + assert.Equal(t, res.ExitCode, 0, step.stepName) + stdout := res.Stdout() + for _, expAddr := range step.expAddrs { + assert.Check(t, is.Contains(stdout, expAddr)) + } } - } + }) } - }) + } } } diff --git a/libnetwork/drivers/bridge/interface_linux.go b/libnetwork/drivers/bridge/interface_linux.go index 2a9c375522ea5..a78c02cadd0cd 100644 --- a/libnetwork/drivers/bridge/interface_linux.go +++ b/libnetwork/drivers/bridge/interface_linux.go @@ -5,6 +5,7 @@ import ( "fmt" "net" "net/netip" + "os" "github.com/containerd/log" "github.com/docker/docker/errdefs" @@ -73,18 +74,20 @@ func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) { func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) { requiredAddrs = make(map[netip.Addr]netip.Prefix) - // Always give the bridge 'fe80::1' - every interface is required to have an - // address in 'fe80::/64'. Linux may assign an address, but we'll replace it with - // 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool - // assigned address will not be a second address in the LL subnet. - ra, ok := netiputil.ToPrefix(bridgeIPv6) - if !ok { - err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix") - return nil, err + if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" { + // Always give the bridge 'fe80::1' - every interface is required to have an + // address in 'fe80::/64'. Linux may assign an address, but we'll replace it with + // 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool + // assigned address will not be a second address in the LL subnet. + ra, ok := netiputil.ToPrefix(bridgeIPv6) + if !ok { + err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix") + return nil, err + } + requiredAddrs[ra.Addr()] = ra } - requiredAddrs[ra.Addr()] = ra - ra, ok = netiputil.ToPrefix(config.AddressIPv6) + ra, ok := netiputil.ToPrefix(config.AddressIPv6) if !ok { err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String()) return nil, err @@ -116,6 +119,14 @@ func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) err if !ok { return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6)) } + // Optionally, avoid deleting the kernel-assigned link local address. + // (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the + // kernel_ll address was deleted, the bridge won't get a new kernel_ll address.) + if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" { + if p, _ := ea.Prefix(64); p == linkLocalPrefix { + continue + } + } // Ignore the prefix length when comparing addresses, it's informational // (RFC-5942 section 4), and removing/re-adding an address that's still valid // would disrupt traffic on live-restore. diff --git a/libnetwork/drivers/bridge/setup_ipv6_linux.go b/libnetwork/drivers/bridge/setup_ipv6_linux.go index 779306f35bd87..2cb0da732f4b0 100644 --- a/libnetwork/drivers/bridge/setup_ipv6_linux.go +++ b/libnetwork/drivers/bridge/setup_ipv6_linux.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net" + "net/netip" "os" "github.com/containerd/log" @@ -13,6 +14,9 @@ import ( // bridgeIPv6 is the default, link-local IPv6 address for the bridge (fe80::1/64) var bridgeIPv6 = &net.IPNet{IP: net.ParseIP("fe80::1"), Mask: net.CIDRMask(64, 128)} +// Standard link local prefix +var linkLocalPrefix = netip.MustParsePrefix("fe80::/64") + const ( ipv6ForwardConfPerm = 0o644 ipv6ForwardConfDefault = "/proc/sys/net/ipv6/conf/default/forwarding"