From 063df636728ca9a03f95b8c0fadc87b42155b687 Mon Sep 17 00:00:00 2001 From: marvin-tigera Date: Tue, 18 May 2021 04:47:16 -0700 Subject: [PATCH] Merge pull request #2811 from fasaxc/vxlan-checksum Disable checksum offload on the VXLAN tunnel for kernels unix.IFNAMSIZ { + return fmt.Errorf("name too long") + } + + // To access the IOCTL, we need a socket file descriptor. + socket, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0) + if err != nil { + return err + } + defer func() { + err := unix.Close(socket) + if err != nil { + // Super unlikely; normally a failure from Close means that some data couldn't be flushed + // but we didn't send any. + logrus.WithError(err).Warn("unix.Close(socket) failed") + } + }() + + // Allocate an EthtoolValue using manual memory management. This is required because we need to pass + // a struct to the Syscall that in turn points to the EthtoolValue. If we allocate the EthtooValue on the + // go stack/heap then it would not be protected from being moved by the GC while the syscall is in progress. + // (Only the directly-passed struct is protected from being moved during the syscall.) + alloc := memory.Allocator{} + defer func() { + err := alloc.Close() + if err != nil { + logrus.WithError(err).Panic("Failed to release memory to the system") + } + }() + valueUPtr, err := alloc.UnsafeCalloc(int(unsafe.Sizeof(EthtoolValue{}))) + if err != nil { + return fmt.Errorf("failed to allocate memory: %w", err) + } + defer func() { + err := alloc.UnsafeFree(valueUPtr) + if err != nil { + logrus.WithError(err).Warn("UnsafeFree() failed") + } + }() + value := (*EthtoolValue)(valueUPtr) + + // Get the current value so we only set it if it needs to change. + *value = EthtoolValue{Cmd: unix.ETHTOOL_GTXCSUM} + request := IFReqData{Data: uintptr(valueUPtr)} + copy(request.Name[:], name) + if err := ioctlEthtool(socket, &request); err != nil { + return err + } + if value.Data == 0 { // if already off, don't try to change + return nil + } + + // Set the value. + *value = EthtoolValue{Cmd: unix.ETHTOOL_STXCSUM, Data: 0 /* off */} + return ioctlEthtool(socket, &request) +} diff --git a/fv/bpf_test.go b/fv/bpf_test.go index 309cfd3f1a..84505683b4 100644 --- a/fv/bpf_test.go +++ b/fv/bpf_test.go @@ -285,6 +285,8 @@ func describeBPFTests(opts ...bpfTestOpt) bool { for i, felix := range felixes { felix.Exec("iptables-save", "-c") felix.Exec("ip", "r") + felix.Exec("ip", "link") + felix.Exec("ip", "addr") felix.Exec("ip", "route", "show", "cached") felix.Exec("calico-bpf", "ipsets", "dump") felix.Exec("calico-bpf", "routes", "dump") @@ -2172,7 +2174,6 @@ func describeBPFTests(opts ...bpfTestOpt) bool { hostW0SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedIPIPTunnelAddr) hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedIPIPTunnelAddr) case "wireguard": - hostW1SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedWireguardTunnelAddr) hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedWireguardTunnelAddr) } diff --git a/fv/vxlan_test.go b/fv/vxlan_test.go index ff7572b405..3e91524cf2 100644 --- a/fv/vxlan_test.go +++ b/fv/vxlan_test.go @@ -42,9 +42,17 @@ import ( var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before adding host IPs to IP sets", []apiconfig.DatastoreType{apiconfig.EtcdV3, apiconfig.Kubernetes}, func(getInfra infrastructure.InfraFactory) { for _, vxlanM := range []api.VXLANMode{api.VXLANModeCrossSubnet} { vxlanMode := vxlanM - for _, routeSource := range []string{"CalicoIPAM", "WorkloadIPs"} { - routeSource := routeSource - Describe(fmt.Sprintf("VXLAN mode set to %s, routeSource %s", vxlanMode, routeSource), func() { + type testConf struct { + RouteSource string + BrokenXSum bool + } + for _, testConfig := range []testConf{ + {"CalicoIPAM", true}, + {"WorkloadIPs", false}, + } { + routeSource := testConfig.RouteSource + brokenXSum := testConfig.BrokenXSum + Describe(fmt.Sprintf("VXLAN mode set to %s, routeSource %s, brokenXSum: %v", vxlanMode, routeSource, brokenXSum), func() { var ( infra infrastructure.DatastoreInfra felixes []*infrastructure.Felix @@ -60,6 +68,10 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before addin topologyOptions.VXLANMode = vxlanMode topologyOptions.IPIPEnabled = false topologyOptions.ExtraEnvVars["FELIX_ROUTESOURCE"] = routeSource + // We force the broken checksum handling on or off so that we're not dependent on kernel version + // for these tests. Since we're testing in containers anyway, checksum offload can't really be + // tested but we can verify the state with ethtool. + topologyOptions.ExtraEnvVars["FELIX_FeatureDetectOverride"] = fmt.Sprintf("ChecksumOffloadBroken=%t", brokenXSum) felixes, client = infrastructure.StartNNodeTopology(3, topologyOptions, infra) // Install a default profile that allows all ingress and egress, in the absence of any Policy. @@ -128,7 +140,27 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before addin } infra.Stop() }) - + if brokenXSum { + It("should disable checksum offload", func() { + Eventually(func() string { + out, err := felixes[0].ExecOutput("ethtool", "-k", "vxlan.calico") + if err != nil { + return fmt.Sprintf("ERROR: %v", err) + } + return out + }, "10s", "100ms").Should(ContainSubstring("tx-checksumming: off")) + }) + } else { + It("should not disable checksum offload", func() { + Eventually(func() string { + out, err := felixes[0].ExecOutput("ethtool", "-k", "vxlan.calico") + if err != nil { + return fmt.Sprintf("ERROR: %v", err) + } + return out + }, "10s", "100ms").Should(ContainSubstring("tx-checksumming: on")) + }) + } It("should use the --random-fully flag in the MASQUERADE rules", func() { for _, felix := range felixes { Eventually(func() string { diff --git a/fv/wireguard_test.go b/fv/wireguard_test.go index 2115442071..bc7954162d 100644 --- a/fv/wireguard_test.go +++ b/fv/wireguard_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Tigera, Inc. All rights reserved. +// Copyright (c) 2020-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/go.mod b/go.mod index c7e8cd3607..eccbb2b74b 100644 --- a/go.mod +++ b/go.mod @@ -41,6 +41,7 @@ require ( k8s.io/apimachinery v0.21.0-rc.0 k8s.io/client-go v0.21.0-rc.0 k8s.io/kubernetes v1.21.0-rc.0 + modernc.org/memory v1.0.4 ) replace ( diff --git a/go.sum b/go.sum index acfd46cb27..084b4a3a12 100644 --- a/go.sum +++ b/go.sum @@ -655,6 +655,8 @@ github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/quobyte/api v0.1.8/go.mod h1:jL7lIHrmqQ7yh05OJ+eEEdHr0u/kmT1Ff9iHd+4H6VI= github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uYEyJGbgTkfkS4+E/PavXkNJcbFIpEtjt2B0KDQ5+9M= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/robfig/cron v1.1.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -1237,6 +1239,10 @@ k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/ modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k= +modernc.org/mathutil v1.1.1 h1:FeylZSVX8S+58VsyJlkEj2bcpdytmp9MmDKZkKx8OIE= +modernc.org/mathutil v1.1.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.0.4 h1:utMBrFcpnQDdNsmM6asmyH/FM9TqLPS7XF7otpJmrwM= +modernc.org/memory v1.0.4/go.mod h1:nV2OApxradM3/OVbs2/0OsP6nPfakXpi50C7dcoHXlc= modernc.org/strutil v1.0.0/go.mod h1:lstksw84oURvj9y3tn8lGvRxyRC1S2+g5uuIzNfIOBs= modernc.org/xc v1.0.0/go.mod h1:mRNCo0bvLjGhHO9WsyuKVU4q0ceiDDDoEeWDJHrNx8I= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= diff --git a/iptables/feature_detect.go b/iptables/feature_detect.go index e684f2912d..0a940e692b 100644 --- a/iptables/feature_detect.go +++ b/iptables/feature_detect.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2020 Tigera, Inc. All rights reserved. +// Copyright (c) 2018-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ import ( "bytes" "io" "os/exec" + "reflect" "regexp" "strconv" "strings" @@ -44,6 +45,8 @@ var ( v3Dot10Dot0 = versionparse.MustParseVersion("3.10.0") // v3Dot14Dot0 added the random-fully feature on the iptables interface. v3Dot14Dot0 = versionparse.MustParseVersion("3.14.0") + // v5Dot7Dot0 contains a fix for checksum offloading. + v5Dot7Dot0 = versionparse.MustParseVersion("5.7.0") ) type Features struct { @@ -54,12 +57,17 @@ type Features struct { // RestoreSupportsLock is true if the iptables-restore command supports taking the xtables lock and the // associated -w and -W arguments. RestoreSupportsLock bool + // ChecksumOffloadBroken is true for kernels that have broken checksum offload for packets with SNATted source + // ports. See https://github.com/projectcalico/calico/issues/3145. On such kernels we disable checksum offload + // on our VXLAN device. + ChecksumOffloadBroken bool } type FeatureDetector struct { lock sync.Mutex featureCache *Features featureOverride map[string]string + loggedOverrides bool // Path to file with kernel version GetKernelVersionReader func() (io.Reader, error) @@ -102,32 +110,40 @@ func (d *FeatureDetector) refreshFeaturesLockHeld() { // Calculate the features. features := Features{ - SNATFullyRandom: iptV.Compare(v1Dot6Dot0) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, - MASQFullyRandom: iptV.Compare(v1Dot6Dot2) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, - RestoreSupportsLock: iptV.Compare(v1Dot6Dot2) >= 0, + SNATFullyRandom: iptV.Compare(v1Dot6Dot0) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, + MASQFullyRandom: iptV.Compare(v1Dot6Dot2) >= 0 && kerV.Compare(v3Dot14Dot0) >= 0, + RestoreSupportsLock: iptV.Compare(v1Dot6Dot2) >= 0, + ChecksumOffloadBroken: kerV.Compare(v5Dot7Dot0) < 0, } - if value, ok := (d.featureOverride)["SNATFullyRandom"]; ok { - ovr, err := strconv.ParseBool(value) - if err == nil { - log.WithField("override", ovr).Info("Override feature SNATFullyRandom") - features.SNATFullyRandom = ovr + for k, v := range d.featureOverride { + ovr, err := strconv.ParseBool(v) + logCxt := log.WithFields(log.Fields{ + "flag": k, + "value": v, + }) + if err != nil { + if !d.loggedOverrides { + logCxt.Warn("Failed to parse value for feature detection override; ignoring") + } + continue } - } - if value, ok := (d.featureOverride)["MASQFullyRandom"]; ok { - ovr, err := strconv.ParseBool(value) - if err == nil { - log.WithField("override", ovr).Info("Override feature MASQFullyRandom") - features.MASQFullyRandom = ovr + field := reflect.ValueOf(&features).Elem().FieldByName(k) + if field.IsValid() { + field.SetBool(ovr) + } else { + if !d.loggedOverrides { + logCxt.Warn("Unknown feature detection flag; ignoring") + } + continue } - } - if value, ok := (d.featureOverride)["RestoreSupportsLock"]; ok { - ovr, err := strconv.ParseBool(value) - if err == nil { - log.WithField("override", ovr).Info("Override feature RestoreSupportsLock") - features.RestoreSupportsLock = ovr + + if !d.loggedOverrides { + logCxt.Info("Overriding feature detection flag") } } + // Avoid logging all the override values every time through this function. + d.loggedOverrides = true if d.featureCache == nil || *d.featureCache != features { log.WithFields(log.Fields{ diff --git a/iptables/features_detect_test.go b/iptables/features_detect_test.go index 3306c4a99e..bd74e1305c 100644 --- a/iptables/features_detect_test.go +++ b/iptables/features_detect_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2019 Tigera, Inc. All rights reserved. +// Copyright (c) 2018-2021 Tigera, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,72 +39,90 @@ func TestFeatureDetection(t *testing.T) { "iptables v1.6.2", "Linux version 3.14.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: true, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: true, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.1", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: true, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: true, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.5.0", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.2", "Linux version 3.13.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "garbage", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.2", "garbage", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "error", "Linux version 3.14.0", Features{ - RestoreSupportsLock: false, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: false, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, }, { "iptables v1.6.2", "error", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: false, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: false, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, + }, + }, + { + "iptables v1.8.4", + "Linux version 5.7.0", + Features{ + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: true, + ChecksumOffloadBroken: false, }, }, } { @@ -146,9 +164,10 @@ func TestFeatureDetectionOverride(t *testing.T) { "iptables v1.6.2", "Linux version 3.14.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: true, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: true, + ChecksumOffloadBroken: true, }, map[string]string{}, }, @@ -156,9 +175,10 @@ func TestFeatureDetectionOverride(t *testing.T) { "iptables v1.6.1", "Linux version 3.14.0", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, map[string]string{ "RestoreSupportsLock": "true", @@ -168,9 +188,10 @@ func TestFeatureDetectionOverride(t *testing.T) { "error", "error", Features{ - RestoreSupportsLock: true, - SNATFullyRandom: true, - MASQFullyRandom: false, + RestoreSupportsLock: true, + SNATFullyRandom: true, + MASQFullyRandom: false, + ChecksumOffloadBroken: true, }, map[string]string{ "RestoreSupportsLock": "true",