From 5d3d664255799fdefc76cbbbfb7af1253fe4332a Mon Sep 17 00:00:00 2001 From: Tom Denham Date: Wed, 8 Feb 2017 18:48:13 -0800 Subject: [PATCH] backend/vxlan: simplify vxlan processing --- backend/vxlan/device.go | 151 ++++++------------------- backend/vxlan/routes.go | 57 ---------- backend/vxlan/vxlan.go | 51 +++++++-- backend/vxlan/vxlan_network.go | 198 ++++++++++----------------------- dist/functional-test.sh | 6 +- 5 files changed, 136 insertions(+), 327 deletions(-) delete mode 100644 backend/vxlan/routes.go diff --git a/backend/vxlan/device.go b/backend/vxlan/device.go index 738fc2be5..8f00961e1 100644 --- a/backend/vxlan/device.go +++ b/backend/vxlan/device.go @@ -17,13 +17,10 @@ package vxlan import ( "fmt" "net" - "os" "syscall" - "time" log "github.com/golang/glog" "github.com/vishvananda/netlink" - "github.com/vishvananda/netlink/nl" "github.com/coreos/flannel/pkg/ip" ) @@ -41,17 +38,6 @@ type vxlanDevice struct { link *netlink.Vxlan } -func sysctlSet(path, value string) error { - f, err := os.Create(path) - if err != nil { - return err - } - defer f.Close() - - _, err = f.Write([]byte(value)) - return err -} - func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) { link := &netlink.Vxlan{ LinkAttrs: netlink.LinkAttrs{ @@ -69,12 +55,6 @@ func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) { if err != nil { return nil, err } - // this enables ARP requests being sent to userspace via netlink - sysctlPath := fmt.Sprintf("/proc/sys/net/ipv4/neigh/%s/app_solicit", devAttrs.name) - if err := sysctlSet(sysctlPath, "3"); err != nil { - return nil, err - } - return &vxlanDevice{ link: link, }, nil @@ -84,6 +64,7 @@ func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) { err := netlink.LinkAdd(vxlan) if err == syscall.EEXIST { // it's ok if the device already exists as long as config is similar + log.V(1).Infof("VXLAN device already exists") existing, err := netlink.LinkByName(vxlan.Name) if err != nil { return nil, err @@ -91,6 +72,7 @@ func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) { incompat := vxlanLinksIncompat(vxlan, existing) if incompat == "" { + log.V(1).Infof("Returning existing device") return existing.(*netlink.Vxlan), nil } @@ -123,28 +105,37 @@ func ensureLink(vxlan *netlink.Vxlan) (*netlink.Vxlan, error) { } func (dev *vxlanDevice) Configure(ipn ip.IP4Net) error { - setAddr4(dev.link, ipn.ToIPNet()) + addr := netlink.Addr{IPNet: ipn.ToIPNet()} + existingAddrs, err := netlink.AddrList(dev.link, netlink.FAMILY_V4) + if err != nil { + return err + } - if err := netlink.LinkSetUp(dev.link); err != nil { - return fmt.Errorf("failed to set interface %s to UP state: %s", dev.link.Attrs().Name, err) + // flannel will never make this happen. This situation can only be caused by a user, so get them to sort it out. + if len(existingAddrs) > 1 { + return fmt.Errorf("link has incompatible addresses. Remove additional addresses and try again. %s", dev.link) } - // explicitly add a route since there might be a route for a subnet already - // installed by Docker and then it won't get auto added - route := netlink.Route{ - LinkIndex: dev.link.Attrs().Index, - Scope: netlink.SCOPE_UNIVERSE, - Dst: ipn.Network().ToIPNet(), + // If the device has an incompatible address then delete it. This can happen if the lease changes for example. + if len(existingAddrs) == 1 && !existingAddrs[0].Equal(addr) { + if err := netlink.AddrDel(dev.link, &existingAddrs[0]); err != nil { + return fmt.Errorf("failed to remove IP address %s from %s: %s", ipn.String(), dev.link.Attrs().Name, err) + } + existingAddrs = []netlink.Addr{} } - if err := netlink.RouteAdd(&route); err != nil && err != syscall.EEXIST { - return fmt.Errorf("failed to add route (%s -> %s): %v", ipn.Network().String(), dev.link.Attrs().Name, err) + + // Actually add the desired address to the interface if needed. + if len(existingAddrs) == 0 { + if err := netlink.AddrAdd(dev.link, &addr); err != nil { + return fmt.Errorf("failed to add IP address %s to %s: %s", ipn.String(), dev.link.Attrs().Name, err) + } } - return nil -} + if err := netlink.LinkSetUp(dev.link); err != nil { + return fmt.Errorf("failed to set interface %s to UP state: %s", dev.link.Attrs().Name, err) + } -func (dev *vxlanDevice) Destroy() { - netlink.LinkDel(dev.link) + return nil } func (dev *vxlanDevice) MACAddr() net.HardwareAddr { @@ -160,14 +151,9 @@ type neighbor struct { IP ip.IP4 } -func (dev *vxlanDevice) GetL2List() ([]netlink.Neigh, error) { - log.V(4).Infof("calling GetL2List() dev.link.Index: %d ", dev.link.Index) - return netlink.NeighList(dev.link.Index, syscall.AF_BRIDGE) -} - -func (dev *vxlanDevice) AddL2(n neighbor) error { - log.V(4).Infof("calling NeighAdd: %v, %v", n.IP, n.MAC) - return netlink.NeighAdd(&netlink.Neigh{ +func (dev *vxlanDevice) AddFDB(n neighbor) error { + log.V(4).Infof("calling AddFDB: %v, %v", n.IP, n.MAC) + return netlink.NeighSet(&netlink.Neigh{ LinkIndex: dev.link.Index, State: netlink.NUD_PERMANENT, Family: syscall.AF_BRIDGE, @@ -177,8 +163,8 @@ func (dev *vxlanDevice) AddL2(n neighbor) error { }) } -func (dev *vxlanDevice) DelL2(n neighbor) error { - log.V(4).Infof("calling NeighDel: %v, %v", n.IP, n.MAC) +func (dev *vxlanDevice) DelFDB(n neighbor) error { + log.V(4).Infof("calling DelFDB: %v, %v", n.IP, n.MAC) return netlink.NeighDel(&netlink.Neigh{ LinkIndex: dev.link.Index, Family: syscall.AF_BRIDGE, @@ -188,77 +174,28 @@ func (dev *vxlanDevice) DelL2(n neighbor) error { }) } -func (dev *vxlanDevice) AddL3(n neighbor) error { - log.V(4).Infof("calling NeighSet: %v, %v", n.IP, n.MAC) +func (dev *vxlanDevice) AddARP(n neighbor) error { + log.V(4).Infof("calling AddARP: %v, %v", n.IP, n.MAC) return netlink.NeighSet(&netlink.Neigh{ LinkIndex: dev.link.Index, - State: netlink.NUD_REACHABLE, + State: netlink.NUD_PERMANENT, Type: syscall.RTN_UNICAST, IP: n.IP.ToIP(), HardwareAddr: n.MAC, }) } -func (dev *vxlanDevice) DelL3(n neighbor) error { - log.V(4).Infof("calling NeighDel: %v, %v", n.IP, n.MAC) +func (dev *vxlanDevice) DelARP(n neighbor) error { + log.V(4).Infof("calling DelARP: %v, %v", n.IP, n.MAC) return netlink.NeighDel(&netlink.Neigh{ LinkIndex: dev.link.Index, - State: netlink.NUD_REACHABLE, + State: netlink.NUD_PERMANENT, Type: syscall.RTN_UNICAST, IP: n.IP.ToIP(), HardwareAddr: n.MAC, }) } -func (dev *vxlanDevice) MonitorMisses(misses chan *netlink.Neigh) { - nlsock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) - if err != nil { - log.Error("Failed to subscribe to netlink RTNLGRP_NEIGH messages") - return - } - - for { - msgs, err := nlsock.Receive() - if err != nil { - log.Errorf("Failed to receive from netlink: %v ", err) - - time.Sleep(1 * time.Second) - continue - } - - for _, msg := range msgs { - dev.processNeighMsg(msg, misses) - } - } -} - -func isNeighResolving(state int) bool { - return (state & (netlink.NUD_INCOMPLETE | netlink.NUD_STALE | netlink.NUD_DELAY | netlink.NUD_PROBE)) != 0 -} - -func (dev *vxlanDevice) processNeighMsg(msg syscall.NetlinkMessage, misses chan *netlink.Neigh) { - neigh, err := netlink.NeighDeserialize(msg.Data) - if err != nil { - log.Error("Failed to deserialize netlink ndmsg: %v", err) - return - } - - if neigh.LinkIndex != dev.link.Index { - return - } - - if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH { - return - } - - if !isNeighResolving(neigh.State) { - // misses come with NUD_STALE bit set - return - } - - misses <- neigh -} - func vxlanLinksIncompat(l1, l2 netlink.Link) string { if l1.Type() != l2.Type() { return fmt.Sprintf("link type: %v vs %v", l1.Type(), l2.Type()) @@ -297,17 +234,3 @@ func vxlanLinksIncompat(l1, l2 netlink.Link) string { return "" } - -// sets IP4 addr on link -func setAddr4(link *netlink.Vxlan, ipn *net.IPNet) error { - // Ensure that the device has a /32 address so that no broadcast routes are created. - // This IP is just used as a source address for host to workload traffic (so - // the return path for the traffic has a decent address to use as the destination) - ipn.Mask = net.CIDRMask(32, 32) - addr := netlink.Addr{IPNet: ipn, Label: ""} - if err := netlink.AddrAdd(link, &addr); err != nil { - return fmt.Errorf("failed to add IP address %s to %s: %s", ipn.String(), link.Attrs().Name, err) - } - - return nil -} diff --git a/backend/vxlan/routes.go b/backend/vxlan/routes.go deleted file mode 100644 index 5d3173c74..000000000 --- a/backend/vxlan/routes.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2015 flannel authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vxlan - -import ( - "net" - - "github.com/coreos/flannel/pkg/ip" -) - -type route struct { - network ip.IP4Net - vtepMAC net.HardwareAddr -} - -type routes []route - -func (rts *routes) set(nw ip.IP4Net, vtepMAC net.HardwareAddr) { - for i, rt := range *rts { - if rt.network.Equal(nw) { - (*rts)[i].vtepMAC = vtepMAC - return - } - } - *rts = append(*rts, route{nw, vtepMAC}) -} - -func (rts *routes) remove(nw ip.IP4Net) { - for i, rt := range *rts { - if rt.network.Equal(nw) { - (*rts)[i] = (*rts)[len(*rts)-1] - (*rts) = (*rts)[0 : len(*rts)-1] - return - } - } -} - -func (rts routes) findByNetwork(ipAddr ip.IP4) *route { - for i, rt := range rts { - if rt.network.Contains(ipAddr) { - return &rts[i] - } - } - return nil -} diff --git a/backend/vxlan/vxlan.go b/backend/vxlan/vxlan.go index f938ca302..1d3a34a3e 100644 --- a/backend/vxlan/vxlan.go +++ b/backend/vxlan/vxlan.go @@ -14,6 +14,40 @@ package vxlan +// Some design notes and history: +// VXLAN encapsulates L2 packets (though flannel is L3 only so don't expect to be able to send L2 packets across hosts) +// The first versions of vxlan for flannel registered the flannel daemon as a handler for both "L2" and "L3" misses +// - When a container sends a packet to a new IP address on the flannel network (but on a different host) this generates +// an L2 miss (i.e. an ARP lookup) +// - The flannel daemon knows which flannel host the packet is destined for so it can supply the VTEP MAC to use. +// This is stored in the ARP table (with a timeout) to avoid constantly looking it up. +// - The packet can then be encapsulated but the host needs to know where to send it. This creates another callout from +// the kernal vxlan code to the flannel daemon to get the public IP that should be used for that VTEP (this gets called +// an L3 miss). The L2/L3 miss hooks are registered when the vxlan device is created. At the same time a device route +// is created to the whole flannel network so that non-local traffic is sent over the vxlan device. +// +// In this scheme the scaling of table entries (per host) is: +// - 1 route (for the configured network out the vxlan device) +// - One arp entry for each remote container that this host has recently contacted +// - One FDB entry for each remote host +// +// The second version of flannel vxlan removed the need for the L3MISS callout. When a new remote host is found (either +// during startup or when it's created), flannel simply adds the required entries so that no further lookup/callout is required. +// +// +// The latest version of the vxlan backend removes the need for the L2MISS too, which means that the flannel deamon is not +// listening for any netlink messages anymore. This improves reliability (no problems with timeouts if +// flannel crashes or restarts) and simplifies upgrades. +// +// How it works: +// Create the vxlan device but don't register for any L2MISS or L3MISS messages +// Then, as each remote host is discovered (either on startup or when they are added), do the following +// 1) create routing table entry for the remote subnet. It goes via the vxlan device but also specifies a next hop (of the remote flannel host). +// 2) Create a static ARP entry for the remote flannel host IP address (and the VTEP MAC) +// 3) Create an FDB entry with the VTEP MAC and the public IP of the remote flannel daemon. +// +// In this scheme the scaling of table entries is linear to the number of remote hosts - 1 route, 1 arp entry and 1 FDB entry per host + import ( "encoding/json" "fmt" @@ -99,25 +133,20 @@ func (be *VXLANBackend) RegisterNetwork(ctx context.Context, config *subnet.Conf lease, err := be.subnetMgr.AcquireLease(ctx, subnetAttrs) switch err { case nil: - case context.Canceled, context.DeadlineExceeded: return nil, err - default: return nil, fmt.Errorf("failed to acquire lease: %v", err) } - // vxlan's subnet is that of the whole overlay network (e.g. /16) - // and not that of the individual host (e.g. /24) - vxlanNet := ip.IP4Net{ - IP: lease.Subnet.IP, - PrefixLen: config.Network.PrefixLen, - } - if err = dev.Configure(vxlanNet); err != nil { - return nil, err + // Ensure that the device has a /32 address so that no broadcast routes are created. + // This IP is just used as a source address for host to workload traffic (so + // the return path for the traffic has an address on the flannel network to use as the destination) + if err := dev.Configure(ip.IP4Net{IP: lease.Subnet.IP, PrefixLen: 32}); err != nil { + return nil, fmt.Errorf("failed to configure interface %s: %s", dev.link.Attrs().Name, err) } - return newNetwork(be.subnetMgr, be.extIface, dev, vxlanNet, lease) + return newNetwork(be.subnetMgr, be.extIface, dev, ip.IP4Net{}, lease) } // So we can make it JSON (un)marshalable diff --git a/backend/vxlan/vxlan_network.go b/backend/vxlan/vxlan_network.go index 51d64bbc6..18e20748c 100644 --- a/backend/vxlan/vxlan_network.go +++ b/backend/vxlan/vxlan_network.go @@ -15,17 +15,16 @@ package vxlan import ( - "bytes" "encoding/json" - "fmt" "net" "sync" - "time" log "github.com/golang/glog" "github.com/vishvananda/netlink" "golang.org/x/net/context" + "syscall" + "github.com/coreos/flannel/backend" "github.com/coreos/flannel/pkg/ip" "github.com/coreos/flannel/subnet" @@ -35,7 +34,6 @@ type network struct { backend.SimpleNetwork extIface *backend.ExternalInterface dev *vxlanDevice - routes routes subnetMgr subnet.Manager } @@ -53,15 +51,9 @@ func newNetwork(subnetMgr subnet.Manager, extIface *backend.ExternalInterface, d } func (nw *network) Run(ctx context.Context) { - log.V(0).Info("Watching for L3 misses") - misses := make(chan *netlink.Neigh, 100) - // Unfortunately MonitorMisses does not take a cancel channel - // as there's no wait to interrupt netlink socket recv - go nw.dev.MonitorMisses(misses) - wg := sync.WaitGroup{} - log.V(0).Info("Watching for new subnet leases") + log.V(0).Info("watching for new subnet leases") events := make(chan []subnet.Event) wg.Add(1) go func() { @@ -72,26 +64,8 @@ func (nw *network) Run(ctx context.Context) { defer wg.Wait() - select { - case initialEventsBatch := <-events: - for { - err := nw.handleInitialSubnetEvents(initialEventsBatch) - if err == nil { - break - } - log.Error(err, " About to retry") - time.Sleep(time.Second) - } - - case <-ctx.Done(): - return - } - for { select { - case miss := <-misses: - nw.handleMiss(miss) - case evtBatch := <-events: nw.handleSubnetEvents(evtBatch) @@ -111,140 +85,80 @@ type vxlanLeaseAttrs struct { func (nw *network) handleSubnetEvents(batch []subnet.Event) { for _, event := range batch { + if event.Lease.Attrs.BackendType != "vxlan" { + log.Warningf("ignoring non-vxlan subnet(%s): type=%v", event.Lease.Subnet, event.Lease.Attrs.BackendType) + continue + } + + var attrs vxlanLeaseAttrs + if err := json.Unmarshal(event.Lease.Attrs.BackendData, &attrs); err != nil { + log.Error("error decoding subnet lease JSON: ", err) + continue + } + + route := netlink.Route{ + LinkIndex: nw.dev.link.Attrs().Index, + Scope: netlink.SCOPE_UNIVERSE, + Dst: event.Lease.Subnet.ToIPNet(), + Gw: event.Lease.Subnet.IP.ToIP(), + } + route.SetFlag(syscall.RTNH_F_ONLINK) + switch event.Type { case subnet.EventAdded: - log.V(1).Info("Subnet added: ", event.Lease.Subnet) + log.V(2).Infof("adding subnet: %s PublicIP: %s VtepMAC: %s", event.Lease.Subnet, event.Lease.Attrs.PublicIP, net.HardwareAddr(attrs.VtepMAC)) - if event.Lease.Attrs.BackendType != "vxlan" { - log.Warningf("Ignoring non-vxlan subnet: type=%v", event.Lease.Attrs.BackendType) + if err := nw.dev.AddARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil { + log.Error("AddARP failed: ", err) continue } - var attrs vxlanLeaseAttrs - if err := json.Unmarshal(event.Lease.Attrs.BackendData, &attrs); err != nil { - log.Error("Error decoding subnet lease JSON: ", err) - continue - } - nw.routes.set(event.Lease.Subnet, net.HardwareAddr(attrs.VtepMAC)) - nw.dev.AddL2(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}) + if err := nw.dev.AddFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil { + log.Error("AddFDB failed: ", err) - case subnet.EventRemoved: - log.V(1).Info("Subnet removed: ", event.Lease.Subnet) - - if event.Lease.Attrs.BackendType != "vxlan" { - log.Warningf("Ignoring non-vxlan subnet: type=%v", event.Lease.Attrs.BackendType) - continue - } + // Try to clean up the ARP entry then continue + if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil { + log.Error("DelARP failed: ", err) + } - var attrs vxlanLeaseAttrs - if err := json.Unmarshal(event.Lease.Attrs.BackendData, &attrs); err != nil { - log.Error("Error decoding subnet lease JSON: ", err) continue } - if len(attrs.VtepMAC) > 0 { - nw.dev.DelL2(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}) - } - nw.routes.remove(event.Lease.Subnet) + // Set the route - the kernel would ARP for the Gw IP address if it hadn't already been set above so make sure + // this is done last. + if err := netlink.RouteReplace(&route); err != nil { + log.Errorf("failed to add route (%s -> %s): %v", route.Dst, route.Gw, err) - default: - log.Error("Internal error: unknown event type: ", int(event.Type)) - } - } -} + // Try to clean up both the ARP and FDB entries then continue + if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil { + log.Error("DelARP failed: ", err) + } -func (nw *network) handleInitialSubnetEvents(batch []subnet.Event) error { - log.V(1).Infof("Handling initial subnet events") - fdbTable, err := nw.dev.GetL2List() - if err != nil { - return fmt.Errorf("error fetching L2 table: %v", err) - } + if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil { + log.Error("DelFDB failed: ", err) + } - // Log the existing VTEP -> Public IP mappings - for _, fdbEntry := range fdbTable { - log.V(1).Infof("fdb already populated with: %s %s ", fdbEntry.IP, fdbEntry.HardwareAddr) - } - - // "marked" events are skipped at the end. - eventMarker := make([]bool, len(batch)) - leaseAttrsList := make([]vxlanLeaseAttrs, len(batch)) - fdbEntryMarker := make([]bool, len(fdbTable)) - - // Run through the events "marking" ones that should be skipped - for eventMarkerIndex, evt := range batch { - if evt.Lease.Attrs.BackendType != "vxlan" { - log.Warningf("Ignoring non-vxlan subnet(%s): type=%v", evt.Lease.Subnet, evt.Lease.Attrs.BackendType) - eventMarker[eventMarkerIndex] = true - continue - } - - // Parse the vxlan specific backend data - if err := json.Unmarshal(evt.Lease.Attrs.BackendData, &leaseAttrsList[eventMarkerIndex]); err != nil { - log.Error("Error decoding subnet lease JSON: ", err) - eventMarker[eventMarkerIndex] = true - continue - } - - // Check the existing VTEP->Public IP mappings. - // If there's already an entry with the right VTEP and Public IP then the event can be skipped and the FDB entry can be retained - for j, fdbEntry := range fdbTable { - if evt.Lease.Attrs.PublicIP.ToIP().Equal(fdbEntry.IP) && bytes.Equal([]byte(leaseAttrsList[eventMarkerIndex].VtepMAC), []byte(fdbEntry.HardwareAddr)) { - eventMarker[eventMarkerIndex] = true - fdbEntryMarker[j] = true - break + continue } - } - // Store off the subnet lease and VTEP - nw.routes.set(evt.Lease.Subnet, net.HardwareAddr(leaseAttrsList[eventMarkerIndex].VtepMAC)) - log.V(2).Infof("Adding subnet: %s PublicIP: %s VtepMAC: %s", evt.Lease.Subnet, evt.Lease.Attrs.PublicIP, net.HardwareAddr(leaseAttrsList[eventMarkerIndex].VtepMAC)) - } + case subnet.EventRemoved: + log.V(2).Infof("removing subnet: %s PublicIP: %s VtepMAC: %s", event.Lease.Subnet, event.Lease.Attrs.PublicIP, net.HardwareAddr(attrs.VtepMAC)) - // Loop over the existing FDB entries, deleting any that shouldn't be there - for j, marker := range fdbEntryMarker { - if !marker && fdbTable[j].IP != nil { - err := nw.dev.DelL2(neighbor{IP: ip.FromIP(fdbTable[j].IP), MAC: fdbTable[j].HardwareAddr}) - if err != nil { - log.Error("Delete L2 failed: ", err) + // Try to remove all entries - don't bail out if one of them fails. + if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil { + log.Error("DelARP failed: ", err) } - } - } - // Loop over the events (skipping marked ones), adding them to the FDB table. - for i, marker := range eventMarker { - if !marker { - err := nw.dev.AddL2(neighbor{IP: batch[i].Lease.Attrs.PublicIP, MAC: net.HardwareAddr(leaseAttrsList[i].VtepMAC)}) - if err != nil { - log.Error("Add L2 failed: ", err) + if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil { + log.Error("DelFDB failed: ", err) } - } - } - return nil -} -func (nw *network) handleMiss(miss *netlink.Neigh) { - switch { - case len(miss.IP) == 0 && len(miss.HardwareAddr) == 0: - log.V(2).Info("Ignoring nil miss") - - case len(miss.HardwareAddr) == 0: - nw.handleL3Miss(miss) - - default: - log.V(4).Infof("Ignoring not a miss: %v, %v", miss.HardwareAddr, miss.IP) - } -} - -func (nw *network) handleL3Miss(miss *netlink.Neigh) { - route := nw.routes.findByNetwork(ip.FromIP(miss.IP)) - if route == nil { - log.V(0).Infof("L3 miss but route for %v not found", miss.IP) - return - } + if err := netlink.RouteDel(&route); err != nil { + log.Errorf("failed to delete route (%s -> %s): %v", route.Dst, route.Gw, err) + } - if err := nw.dev.AddL3(neighbor{IP: ip.FromIP(miss.IP), MAC: route.vtepMAC}); err != nil { - log.Errorf("AddL3 failed: %v", err) - } else { - log.V(2).Infof("L3 miss: AddL3 for %s succeeded", miss.IP) + default: + log.Error("internal error: unknown event type: ", int(event.Type)) + } } } diff --git a/dist/functional-test.sh b/dist/functional-test.sh index a4d6146a7..584196836 100755 --- a/dist/functional-test.sh +++ b/dist/functional-test.sh @@ -56,14 +56,14 @@ run_test() { # rm any old flannel container that maybe running, ignore error as it might not exist docker rm -f flannel-e2e-test-flannel1 2>/dev/null - docker run --name=flannel-e2e-test-flannel1 -d --privileged $flannel_img --etcd-endpoints=$etcd_endpt + docker run --name=flannel-e2e-test-flannel1 -d --privileged $flannel_img --etcd-endpoints=$etcd_endpt -v 10 if [ $? -ne 0 ]; then exit 1 fi # rm any old flannel container that maybe running, ignore error as it might not exist docker rm -f flannel-e2e-test-flannel2 2>/dev/null - docker run --name=flannel-e2e-test-flannel2 -d --privileged $flannel_img --etcd-endpoints=$etcd_endpt + docker run --name=flannel-e2e-test-flannel2 -d --privileged $flannel_img --etcd-endpoints=$etcd_endpt -v 10 if [ $? -ne 0 ]; then exit 1 fi @@ -96,7 +96,7 @@ run_test() { docker rm -f flannel-e2e-test-flannel1-iperf 2>/dev/null docker run -d --name flannel-e2e-test-flannel1-iperf --net=container:flannel-e2e-test-flannel1 mlabbe/iperf3 docker run --rm --net=container:flannel-e2e-test-flannel2 mlabbe/iperf3 -c $ping_dest - fi + fi docker stop flannel-e2e-test-flannel1 flannel-e2e-test-flannel2 >/dev/null