Skip to content

Commit

Permalink
Merge pull request projectcalico#2811 from fasaxc/vxlan-checksum
Browse files Browse the repository at this point in the history
Disable checksum offload on the VXLAN tunnel for kernels <v5.7.
  • Loading branch information
marvin-tigera authored and fasaxc committed May 18, 2021
1 parent 65d4c75 commit 063df63
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 71 deletions.
1 change: 1 addition & 0 deletions .semaphore/create-test-vm
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ function create-vm() {
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo apt-get update -y && \
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo apt-get install -y --no-install-recommends git make iproute2 docker.io wireguard && \
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo usermod -a -G docker ubuntu && \
gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- sudo modprobe ipip && \
set +x && \
echo "$DOCKERHUB_PASSWORD" | gcloud --quiet compute ssh --zone=${zone} "ubuntu@${vm_name}" -- docker login --username "$DOCKERHUB_USERNAME" --password-stdin && \
set -x && \
Expand Down
3 changes: 3 additions & 0 deletions .semaphore/semaphore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ blocks:
- docker load -i /tmp/calico-felix.tar
- rm /tmp/calico-felix.tar
- touch bin/*
# Pre-loading the IPIP module prevents a flake where the first felix to use IPIP loads the module and
# routing in that first felix container chooses different source IPs than the tests are expecting.
- sudo modprobe ipip
jobs:
- name: FV Test matrix
commands:
Expand Down
2 changes: 1 addition & 1 deletion dataplane/linux/int_dataplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ func NewIntDataplaneDriver(config Config) *InternalDataplane {
config,
dp.loopSummarizer,
)
go vxlanManager.KeepVXLANDeviceInSync(config.VXLANMTU, 10*time.Second)
go vxlanManager.KeepVXLANDeviceInSync(config.VXLANMTU, iptablesFeatures.ChecksumOffloadBroken, 10*time.Second)
dp.RegisterManager(vxlanManager)
} else {
cleanUpVXLANDevice()
Expand Down
23 changes: 18 additions & 5 deletions dataplane/linux/vxlan_mgr.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2016-2020 Tigera, Inc. All rights reserved.
// Copyright (c) 2016-2021 Tigera, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,7 @@ import (
"syscall"
"time"

"github.com/projectcalico/felix/ethtool"
"github.com/projectcalico/felix/ipsets"
"github.com/projectcalico/felix/logutils"
"github.com/projectcalico/felix/rules"
Expand Down Expand Up @@ -339,8 +340,12 @@ func (m *vxlanManager) CompleteDeferredWork() error {

// KeepVXLANDeviceInSync is a goroutine that configures the VXLAN tunnel device, then periodically
// checks that it is still correctly configured.
func (m *vxlanManager) KeepVXLANDeviceInSync(mtu int, wait time.Duration) {
logrus.WithField("mtu", mtu).Info("VXLAN tunnel device thread started.")
func (m *vxlanManager) KeepVXLANDeviceInSync(mtu int, xsumBroken bool, wait time.Duration) {
logrus.WithFields(logrus.Fields{
"mtu": mtu,
"xsumBroken": xsumBroken,
"wait": wait,
}).Info("VXLAN tunnel device thread started.")
logNextSuccess := true
for {
localVTEP := m.getLocalVTEP()
Expand All @@ -362,13 +367,14 @@ func (m *vxlanManager) KeepVXLANDeviceInSync(mtu int, wait time.Duration) {
}
}

err := m.configureVXLANDevice(mtu, localVTEP)
err := m.configureVXLANDevice(mtu, localVTEP, xsumBroken)
if err != nil {
logrus.WithError(err).Warn("Failed configure VXLAN tunnel device, retrying...")
logNextSuccess = true
time.Sleep(1 * time.Second)
continue
}

if logNextSuccess {
logrus.Info("VXLAN tunnel device configured")
logNextSuccess = false
Expand Down Expand Up @@ -400,7 +406,7 @@ func (m *vxlanManager) getParentInterface(localVTEP *proto.VXLANTunnelEndpointUp
}

// configureVXLANDevice ensures the VXLAN tunnel device is up and configured correctly.
func (m *vxlanManager) configureVXLANDevice(mtu int, localVTEP *proto.VXLANTunnelEndpointUpdate) error {
func (m *vxlanManager) configureVXLANDevice(mtu int, localVTEP *proto.VXLANTunnelEndpointUpdate, xsumBroken bool) error {
logCxt := logrus.WithFields(logrus.Fields{"device": m.vxlanDevice})
logCxt.Debug("Configuring VXLAN tunnel device")
parent, err := m.getParentInterface(localVTEP)
Expand Down Expand Up @@ -478,6 +484,13 @@ func (m *vxlanManager) configureVXLANDevice(mtu int, localVTEP *proto.VXLANTunne
return fmt.Errorf("failed to ensure address of interface: %s", err)
}

// If required, disable checksum offload.
if xsumBroken {
if err := ethtool.EthtoolTXOff(m.vxlanDevice); err != nil {
return fmt.Errorf("failed to disable checksum offload: %s", err)
}
}

// And the device is up.
if err := m.nlHandle.LinkSetUp(link); err != nil {
return fmt.Errorf("failed to set interface up: %s", err)
Expand Down
8 changes: 4 additions & 4 deletions dataplane/linux/vxlan_mgr_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2020 Tigera, Inc. All rights reserved.
// Copyright (c) 2019-2021 Tigera, Inc. All rights reserved.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -141,7 +141,7 @@ var _ = Describe("VXLANManager", func() {

manager.noEncapRouteTable = prt

err := manager.configureVXLANDevice(50, localVTEP)
err := manager.configureVXLANDevice(50, localVTEP, false)
Expect(err).NotTo(HaveOccurred())

Expect(manager.myVTEP).NotTo(BeNil())
Expand Down Expand Up @@ -178,7 +178,7 @@ var _ = Describe("VXLANManager", func() {
})

It("adds the route to the default table on next try when the parent route table is not immediately found", func() {
go manager.KeepVXLANDeviceInSync(1400, 1*time.Second)
go manager.KeepVXLANDeviceInSync(1400, false, 1*time.Second)
manager.OnUpdate(&proto.VXLANTunnelEndpointUpdate{
Node: "node2",
Mac: "00:0a:95:9d:68:16",
Expand Down Expand Up @@ -213,7 +213,7 @@ var _ = Describe("VXLANManager", func() {
localVTEP := manager.getLocalVTEP()
Expect(localVTEP).NotTo(BeNil())

err = manager.configureVXLANDevice(50, localVTEP)
err = manager.configureVXLANDevice(50, localVTEP, false)
Expect(err).NotTo(HaveOccurred())

Expect(prt.currentRoutes["eth0"]).To(HaveLen(0))
Expand Down
108 changes: 108 additions & 0 deletions ethtool/ethtool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright (c) 2021 Tigera, Inc. All rights reserved.
//
// Based on the version in the Weave project, Copyright Weaveworks:
// https://github.com/weaveworks/weave/blob/c69e140e9e43d456b6fe5812a2bc61bc67953b93/net/ethtool.go
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ethtool

import (
"fmt"
"unsafe"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"modernc.org/memory"
)

// IFReqData represents linux/if.h 'struct ifreq'
type IFReqData struct {
Name [unix.IFNAMSIZ]byte
Data uintptr
}

// EthtoolValue represents linux/ethtool.h 'struct ethtool_value'
type EthtoolValue struct {
Cmd uint32
Data uint32
}

func ioctlEthtool(fd int, argp *IFReqData) error {
// Important that the cast to uintptr is _syntactically_ within the Syscall() invocation in order to guarantee
// safety. (See notes in the unsafe.Pointer docs.)
_, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.SIOCETHTOOL), uintptr(unsafe.Pointer(argp)))
if errno != 0 {
return errno
}
return nil
}

// EthtoolTXOff disables the TX checksum offload on the specified interface
func EthtoolTXOff(name string) error {
if len(name)+1 > unix.IFNAMSIZ {
return fmt.Errorf("name too long")
}

// To access the IOCTL, we need a socket file descriptor.
socket, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0)
if err != nil {
return err
}
defer func() {
err := unix.Close(socket)
if err != nil {
// Super unlikely; normally a failure from Close means that some data couldn't be flushed
// but we didn't send any.
logrus.WithError(err).Warn("unix.Close(socket) failed")
}
}()

// Allocate an EthtoolValue using manual memory management. This is required because we need to pass
// a struct to the Syscall that in turn points to the EthtoolValue. If we allocate the EthtooValue on the
// go stack/heap then it would not be protected from being moved by the GC while the syscall is in progress.
// (Only the directly-passed struct is protected from being moved during the syscall.)
alloc := memory.Allocator{}
defer func() {
err := alloc.Close()
if err != nil {
logrus.WithError(err).Panic("Failed to release memory to the system")
}
}()
valueUPtr, err := alloc.UnsafeCalloc(int(unsafe.Sizeof(EthtoolValue{})))
if err != nil {
return fmt.Errorf("failed to allocate memory: %w", err)
}
defer func() {
err := alloc.UnsafeFree(valueUPtr)
if err != nil {
logrus.WithError(err).Warn("UnsafeFree() failed")
}
}()
value := (*EthtoolValue)(valueUPtr)

// Get the current value so we only set it if it needs to change.
*value = EthtoolValue{Cmd: unix.ETHTOOL_GTXCSUM}
request := IFReqData{Data: uintptr(valueUPtr)}
copy(request.Name[:], name)
if err := ioctlEthtool(socket, &request); err != nil {
return err
}
if value.Data == 0 { // if already off, don't try to change
return nil
}

// Set the value.
*value = EthtoolValue{Cmd: unix.ETHTOOL_STXCSUM, Data: 0 /* off */}
return ioctlEthtool(socket, &request)
}
3 changes: 2 additions & 1 deletion fv/bpf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
for i, felix := range felixes {
felix.Exec("iptables-save", "-c")
felix.Exec("ip", "r")
felix.Exec("ip", "link")
felix.Exec("ip", "addr")
felix.Exec("ip", "route", "show", "cached")
felix.Exec("calico-bpf", "ipsets", "dump")
felix.Exec("calico-bpf", "routes", "dump")
Expand Down Expand Up @@ -2172,7 +2174,6 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
hostW0SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedIPIPTunnelAddr)
hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedIPIPTunnelAddr)
case "wireguard":
hostW1SrcIP = ExpectWithSrcIPs(felixes[0].ExpectedWireguardTunnelAddr)
hostW1SrcIP = ExpectWithSrcIPs(felixes[1].ExpectedWireguardTunnelAddr)
}

Expand Down
40 changes: 36 additions & 4 deletions fv/vxlan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ import (
var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before adding host IPs to IP sets", []apiconfig.DatastoreType{apiconfig.EtcdV3, apiconfig.Kubernetes}, func(getInfra infrastructure.InfraFactory) {
for _, vxlanM := range []api.VXLANMode{api.VXLANModeCrossSubnet} {
vxlanMode := vxlanM
for _, routeSource := range []string{"CalicoIPAM", "WorkloadIPs"} {
routeSource := routeSource
Describe(fmt.Sprintf("VXLAN mode set to %s, routeSource %s", vxlanMode, routeSource), func() {
type testConf struct {
RouteSource string
BrokenXSum bool
}
for _, testConfig := range []testConf{
{"CalicoIPAM", true},
{"WorkloadIPs", false},
} {
routeSource := testConfig.RouteSource
brokenXSum := testConfig.BrokenXSum
Describe(fmt.Sprintf("VXLAN mode set to %s, routeSource %s, brokenXSum: %v", vxlanMode, routeSource, brokenXSum), func() {
var (
infra infrastructure.DatastoreInfra
felixes []*infrastructure.Felix
Expand All @@ -60,6 +68,10 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before addin
topologyOptions.VXLANMode = vxlanMode
topologyOptions.IPIPEnabled = false
topologyOptions.ExtraEnvVars["FELIX_ROUTESOURCE"] = routeSource
// We force the broken checksum handling on or off so that we're not dependent on kernel version
// for these tests. Since we're testing in containers anyway, checksum offload can't really be
// tested but we can verify the state with ethtool.
topologyOptions.ExtraEnvVars["FELIX_FeatureDetectOverride"] = fmt.Sprintf("ChecksumOffloadBroken=%t", brokenXSum)
felixes, client = infrastructure.StartNNodeTopology(3, topologyOptions, infra)

// Install a default profile that allows all ingress and egress, in the absence of any Policy.
Expand Down Expand Up @@ -128,7 +140,27 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ VXLAN topology before addin
}
infra.Stop()
})

if brokenXSum {
It("should disable checksum offload", func() {
Eventually(func() string {
out, err := felixes[0].ExecOutput("ethtool", "-k", "vxlan.calico")
if err != nil {
return fmt.Sprintf("ERROR: %v", err)
}
return out
}, "10s", "100ms").Should(ContainSubstring("tx-checksumming: off"))
})
} else {
It("should not disable checksum offload", func() {
Eventually(func() string {
out, err := felixes[0].ExecOutput("ethtool", "-k", "vxlan.calico")
if err != nil {
return fmt.Sprintf("ERROR: %v", err)
}
return out
}, "10s", "100ms").Should(ContainSubstring("tx-checksumming: on"))
})
}
It("should use the --random-fully flag in the MASQUERADE rules", func() {
for _, felix := range felixes {
Eventually(func() string {
Expand Down
2 changes: 1 addition & 1 deletion fv/wireguard_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020 Tigera, Inc. All rights reserved.
// Copyright (c) 2020-2021 Tigera, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ require (
k8s.io/apimachinery v0.21.0-rc.0
k8s.io/client-go v0.21.0-rc.0
k8s.io/kubernetes v1.21.0-rc.0
modernc.org/memory v1.0.4
)

replace (
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,8 @@ github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/quobyte/api v0.1.8/go.mod h1:jL7lIHrmqQ7yh05OJ+eEEdHr0u/kmT1Ff9iHd+4H6VI=
github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uYEyJGbgTkfkS4+E/PavXkNJcbFIpEtjt2B0KDQ5+9M=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/robfig/cron v1.1.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
Expand Down Expand Up @@ -1237,6 +1239,10 @@ k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/
modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw=
modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk=
modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k=
modernc.org/mathutil v1.1.1 h1:FeylZSVX8S+58VsyJlkEj2bcpdytmp9MmDKZkKx8OIE=
modernc.org/mathutil v1.1.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
modernc.org/memory v1.0.4 h1:utMBrFcpnQDdNsmM6asmyH/FM9TqLPS7XF7otpJmrwM=
modernc.org/memory v1.0.4/go.mod h1:nV2OApxradM3/OVbs2/0OsP6nPfakXpi50C7dcoHXlc=
modernc.org/strutil v1.0.0/go.mod h1:lstksw84oURvj9y3tn8lGvRxyRC1S2+g5uuIzNfIOBs=
modernc.org/xc v1.0.0/go.mod h1:mRNCo0bvLjGhHO9WsyuKVU4q0ceiDDDoEeWDJHrNx8I=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
Expand Down
Loading

0 comments on commit 063df63

Please sign in to comment.