diff --git a/backend/vxlan/device_windows.go b/backend/vxlan/device_windows.go new file mode 100644 index 0000000000..e6bcbaad4c --- /dev/null +++ b/backend/vxlan/device_windows.go @@ -0,0 +1,224 @@ +// Copyright 2018 flannel authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vxlan + +import ( + "encoding/json" + "fmt" + "github.com/Microsoft/hcsshim" + "github.com/buger/jsonparser" + "github.com/coreos/flannel/network/netsh" + "github.com/coreos/flannel/pkg/ip" + log "github.com/golang/glog" + "github.com/juju/errors" + "k8s.io/apimachinery/pkg/util/wait" + utilexec "k8s.io/utils/exec" + "time" +) + +type vxlanDeviceAttrs struct { + vni uint32 + name string + gbp bool + addressPrefix ip.IP4Net +} + +type vxlanDevice struct { + link *hcsshim.HNSNetwork + macPrefix string + directRouting bool +} + +func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) { + hnsNetwork := &hcsshim.HNSNetwork{ + Name: devAttrs.name, + Type: "Overlay", + Subnets: make([]hcsshim.Subnet, 0, 1), + } + + hnsNetwork, err := ensureNetwork(hnsNetwork, int64(devAttrs.vni), devAttrs.addressPrefix.String(), (devAttrs.addressPrefix.IP + 1).String()) + if err != nil { + return nil, err + } + + return &vxlanDevice{ + link: hnsNetwork, + }, nil +} + +func ensureNetwork(expectedNetwork *hcsshim.HNSNetwork, expectedVSID int64, expectedAddressPrefix, expectedGW string) (*hcsshim.HNSNetwork, error) { + createNetwork := true + networkName := expectedNetwork.Name + + // 1. Check if the HNSNetwork exists and has the expected settings + existingNetwork, err := hcsshim.GetHNSNetworkByName(networkName) + if err == nil { + if existingNetwork.Type == expectedNetwork.Type { + for _, existingSubnet := range existingNetwork.Subnets { + if existingSubnet.AddressPrefix == expectedAddressPrefix && existingSubnet.GatewayAddress == expectedGW { + createNetwork = false + log.Infof("Found existing HNSNetwork %s", networkName) + break + } + } + } + } + + // 2. Create a new HNSNetwork + if createNetwork { + if existingNetwork != nil { + if _, err := existingNetwork.Delete(); err != nil { + return nil, errors.Annotatef(err, "failed to delete existing HNSNetwork %s", networkName) + } + log.Infof("Deleted stale HNSNetwork %s", networkName) + } + + // Add a VxLan subnet + expectedNetwork.Subnets = append(expectedNetwork.Subnets, hcsshim.Subnet{ + AddressPrefix: expectedAddressPrefix, + GatewayAddress: expectedGW, + Policies: []json.RawMessage{ + []byte(fmt.Sprintf(`{"Type":"VSID","VSID":%d}`, expectedVSID)), + }, + }) + + // Config request params + jsonRequest, err := json.Marshal(expectedNetwork) + if err != nil { + return nil, errors.Annotatef(err, "failed to marshal %+v", expectedNetwork) + } + + log.Infof("Attempting to create HNSNetwork %s", string(jsonRequest)) + newNetwork, err := hcsshim.HNSNetworkRequest("POST", "", string(jsonRequest)) + if err != nil { + return nil, errors.Annotatef(err, "failed to create HNSNetwork %s", networkName) + } + + var waitErr, lastErr error + // Wait for the network to populate Management IP + log.Infof("Waiting to get ManagementIP from HNSNetwork %s", networkName) + waitErr = wait.Poll(500*time.Millisecond, 5*time.Second, func() (done bool, err error) { + newNetwork, lastErr = hcsshim.HNSNetworkRequest("GET", newNetwork.Id, "") + return newNetwork != nil && len(newNetwork.ManagementIP) == 0, nil + }) + if waitErr == wait.ErrWaitTimeout { + return nil, errors.Annotatef(lastErr, "timeout, failed to get management IP from HNSNetwork %s", networkName) + } + + // Wait for the interface with the management IP + netshHelper := netsh.NewHelper(utilexec.New()) + log.Infof("Waiting to get net interface for HNSNetwork %s (%s)", networkName, newNetwork.ManagementIP) + waitErr = wait.Poll(500*time.Millisecond, 5*time.Second, func() (done bool, err error) { + _, lastErr = netshHelper.GetInterfaceByIP(newNetwork.ManagementIP) + return lastErr == nil, nil + }) + if waitErr == wait.ErrWaitTimeout { + return nil, errors.Annotatef(lastErr, "timeout, failed to get net interface for HNSNetwork %s (%s)", networkName, newNetwork.ManagementIP) + } + + log.Infof("Created HNSNetwork %s", networkName) + return newNetwork, nil + } + + return existingNetwork, nil +} + +type neighbor struct { + MAC string + IP ip.IP4 + ManagementAddress string +} + +func (dev *vxlanDevice) AddEndpoint(n *neighbor) error { + endpointName := createEndpointName(n.IP) + + // 1. Check if the HNSEndpoint exists and has the expected settings + existingEndpoint, err := hcsshim.GetHNSEndpointByName(endpointName) + if err == nil && existingEndpoint.VirtualNetwork == dev.link.Id { + // Check policies if there is PA type + targetType := "PA" + for _, policy := range existingEndpoint.Policies { + policyType, _ := jsonparser.GetUnsafeString(policy, "Type") + if policyType == targetType { + actualPaIP, _ := jsonparser.GetUnsafeString(policy, targetType) + if actualPaIP == n.ManagementAddress { + log.Infof("Found existing remote HNSEndpoint %s", endpointName) + return nil + } + } + } + } + + // 2. Create a new HNSNetwork + if existingEndpoint != nil { + if _, err := existingEndpoint.Delete(); err != nil { + return errors.Annotatef(err, "failed to delete existing remote HNSEndpoint %s", endpointName) + } + log.V(4).Infof("Deleted stale HNSEndpoint %s", endpointName) + } + + newEndpoint := &hcsshim.HNSEndpoint{ + Name: endpointName, + IPAddress: n.IP.ToIP(), + MacAddress: n.MAC, + VirtualNetwork: dev.link.Id, + IsRemoteEndpoint: true, + Policies: []json.RawMessage{ + []byte(fmt.Sprintf(`{"Type":"PA","PA":"%s"}`, n.ManagementAddress)), + }, + } + if _, err := newEndpoint.Create(); err != nil { + return errors.Annotatef(err, "failed to create remote HNSEndpoint %s", endpointName) + } + log.V(4).Infof("Created HNSEndpoint %s", endpointName) + + return nil +} + +func (dev *vxlanDevice) DelEndpoint(n *neighbor) error { + endpointName := createEndpointName(n.IP) + + existingEndpoint, err := hcsshim.GetHNSEndpointByName(endpointName) + if err == nil && existingEndpoint.VirtualNetwork == dev.link.Id { + // Check policies if there is PA type + targetType := "PA" + for _, policy := range existingEndpoint.Policies { + policyType, _ := jsonparser.GetUnsafeString(policy, "Type") + if policyType == targetType { + actualPaIP, _ := jsonparser.GetUnsafeString(policy, targetType) + if actualPaIP == n.ManagementAddress { + // Found it and delete + if _, err := existingEndpoint.Delete(); err != nil { + return errors.Annotatef(err, "failed to delete remote HNSEndpoint %s", endpointName) + } + + log.V(4).Infof("Deleted HNSEndpoint %s", endpointName) + break + } + } + } + } + + return nil +} + +func (dev *vxlanDevice) ConjureMac(targetIP ip.IP4) string { + a, b, c, d := targetIP.Octets() + return fmt.Sprintf("%v-%02x-%02x-%02x-%02x", dev.macPrefix, a, b, c, d) +} + +func createEndpointName(targetIP ip.IP4) string { + return "remote_" + targetIP.String() +} diff --git a/backend/vxlan/vxlan_network_windows.go b/backend/vxlan/vxlan_network_windows.go new file mode 100644 index 0000000000..74c4248a81 --- /dev/null +++ b/backend/vxlan/vxlan_network_windows.go @@ -0,0 +1,125 @@ +// Copyright 2015 flannel authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vxlan + +import ( + log "github.com/golang/glog" + "golang.org/x/net/context" + "sync" + + "github.com/coreos/flannel/backend" + "github.com/coreos/flannel/subnet" + + "github.com/coreos/flannel/pkg/ip" + "strings" +) + +type network struct { + backend.SimpleNetwork + dev *vxlanDevice + subnetMgr subnet.Manager +} + +const ( + encapOverhead = 50 +) + +func newNetwork(subnetMgr subnet.Manager, extIface *backend.ExternalInterface, dev *vxlanDevice, _ ip.IP4Net, lease *subnet.Lease) (*network, error) { + nw := &network{ + SimpleNetwork: backend.SimpleNetwork{ + SubnetLease: lease, + ExtIface: extIface, + }, + subnetMgr: subnetMgr, + dev: dev, + } + + return nw, nil +} + +func (nw *network) Run(ctx context.Context) { + wg := sync.WaitGroup{} + + log.V(0).Info("Watching for new subnet leases") + events := make(chan []subnet.Event) + wg.Add(1) + go func() { + subnet.WatchLeases(ctx, nw.subnetMgr, nw.SubnetLease, events) + log.V(1).Info("WatchLeases exited") + wg.Done() + }() + + defer wg.Wait() + + for { + select { + case evtBatch := <-events: + nw.handleSubnetEvents(evtBatch) + + case <-ctx.Done(): + return + } + } +} + +func (nw *network) MTU() int { + return nw.ExtIface.Iface.MTU - encapOverhead +} + +func (nw *network) handleSubnetEvents(batch []subnet.Event) { + for _, event := range batch { + leaseSubnet := event.Lease.Subnet + leaseAttrs := event.Lease.Attrs + if !strings.EqualFold(leaseAttrs.BackendType, "vxlan") { + log.Warningf("ignoring non-vxlan subnet(%v): type=%v", leaseSubnet, leaseAttrs.BackendType) + continue + } + + publicIP := leaseAttrs.PublicIP.String() + remoteIP := leaseSubnet.IP + 2 + lastIP := leaseSubnet.Next().IP - 1 + + switch event.Type { + case subnet.EventAdded: + for ; remoteIP < lastIP; remoteIP++ { + n := &neighbor{ + IP: remoteIP, + MAC: nw.dev.ConjureMac(remoteIP), + ManagementAddress: publicIP, + } + + log.V(2).Infof("adding subnet: %v publicIP: %s vtepMAC: %s", leaseSubnet, n.ManagementAddress, n.MAC) + if err := nw.dev.AddEndpoint(n); err != nil { + log.Error(err) + } + } + case subnet.EventRemoved: + for ; remoteIP < lastIP; remoteIP++ { + n := &neighbor{ + IP: remoteIP, + MAC: nw.dev.ConjureMac(remoteIP), + ManagementAddress: publicIP, + } + + log.V(2).Infof("removing subnet: %v publicIP: %s vtepMAC: %s", leaseSubnet, n.ManagementAddress, n.MAC) + if err := nw.dev.DelEndpoint(n); err != nil { + log.Error(err) + } + } + default: + log.Error("internal error: unknown event type: ", int(event.Type)) + } + } +} diff --git a/backend/vxlan/vxlan_windows.go b/backend/vxlan/vxlan_windows.go index 0c8633fbf8..835afc9f31 100644 --- a/backend/vxlan/vxlan_windows.go +++ b/backend/vxlan/vxlan_windows.go @@ -11,14 +11,132 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// +build windows package vxlan +// Some design notes: +// VXLAN encapsulates L2 packets (though flannel is L3 only so don't expect to be able to send L2 packets across hosts) +// Windows overlay decap works at L2 and so it needs the correct destination MAC for the remote host to work. +// Windows does not expose an L3Miss interface so for now all possible remote IP/MAC pairs have to be configured upfront. +// +// In this scheme the scaling of table entries (per host) is: +// - 1 network entry for the overlay network +// - 1 endpoint per local container +// - N remote endpoints remote node (total endpoints = import ( + "encoding/json" + "errors" + "fmt" + "sync" + log "github.com/golang/glog" + + "golang.org/x/net/context" + + "github.com/coreos/flannel/backend" + "github.com/coreos/flannel/pkg/ip" + "github.com/coreos/flannel/subnet" + "net" ) func init() { - log.Infof("vxlan is not supported on this platform") + backend.Register("vxlan", New) +} + +const ( + defaultVNI = 4096 + vxlanPort = 4789 +) + +type VXLANBackend struct { + subnetMgr subnet.Manager + extIface *backend.ExternalInterface +} + +func New(sm subnet.Manager, extIface *backend.ExternalInterface) (backend.Backend, error) { + backend := &VXLANBackend{ + subnetMgr: sm, + extIface: extIface, + } + + return backend, nil +} + +func newSubnetAttrs(publicIP net.IP) (*subnet.LeaseAttrs, error) { + return &subnet.LeaseAttrs{ + PublicIP: ip.FromIP(publicIP), + BackendType: "vxlan", + }, nil +} + +func (be *VXLANBackend) RegisterNetwork(ctx context.Context, wg sync.WaitGroup, config *subnet.Config) (backend.Network, error) { + // 1. Parse configuration + cfg := struct { + Name string + MacPrefix string + VNI int + Port int + GBP bool + DirectRouting bool + }{ + VNI: defaultVNI, + Port: vxlanPort, + MacPrefix: "0E-2A", + } + + if len(config.Backend) > 0 { + if err := json.Unmarshal(config.Backend, &cfg); err != nil { + return nil, fmt.Errorf("error decoding VXLAN backend config: %v", err) + } + } + + // 2. Verify configuration + if cfg.VNI < defaultVNI { + return nil, fmt.Errorf("invalid VXLAN backend config. VNI [%v] must be greater than or equal to %v on Windows", cfg.VNI, defaultVNI) + } + if cfg.Port != vxlanPort { + return nil, fmt.Errorf("invalid VXLAN backend config. Port [%v] is not supported on Windows. Omit the setting to default to port %v", cfg.Port, vxlanPort) + } + if cfg.DirectRouting { + return nil, errors.New("invalid VXLAN backend config. DirectRouting is not supported on Windows") + } + if cfg.GBP { + return nil, errors.New("invalid VXLAN backend config. GBP is not supported on Windows") + } + if len(cfg.MacPrefix) == 0 || len(cfg.MacPrefix) != 5 || cfg.MacPrefix[2] != '-' { + return nil, fmt.Errorf("invalid VXLAN backend config.MacPrefix [%v] is invalid, prefix must be of the format xx-xx e.g. 0E-2A", cfg.MacPrefix) + } + if len(cfg.Name) == 0 { + cfg.Name = fmt.Sprintf("flannel.%v", cfg.VNI) + } + log.Infof("VXLAN config: Name=%s MacPrefix=%s VNI=%d Port=%d GBP=%v DirectRouting=%v", cfg.Name, cfg.MacPrefix, cfg.VNI, cfg.Port, cfg.GBP, cfg.DirectRouting) + + devAttrs := vxlanDeviceAttrs{ + vni: uint32(cfg.VNI), + name: cfg.Name, + addressPrefix: config.Network, + } + + dev, err := newVXLANDevice(&devAttrs) + if err != nil { + return nil, err + } + dev.directRouting = cfg.DirectRouting + dev.macPrefix = cfg.MacPrefix + + subnetAttrs, err := newSubnetAttrs(be.extIface.ExtAddr) + if err != nil { + return nil, err + } + + lease, err := be.subnetMgr.AcquireLease(ctx, subnetAttrs) + switch err { + case nil: + case context.Canceled, context.DeadlineExceeded: + return nil, err + default: + return nil, fmt.Errorf("failed to acquire lease: %v", err) + } + + return newNetwork(be.subnetMgr, be.extIface, dev, ip.IP4Net{}, lease) }