diff --git a/Documentation/backends.md b/Documentation/backends.md index 544a4e5e95..20261a82c9 100644 --- a/Documentation/backends.md +++ b/Documentation/backends.md @@ -19,10 +19,11 @@ Use in-kernel VXLAN to encapsulate the packets. Type and options: * `Type` (string): `vxlan` -* `VNI` (number): VXLAN Identifier (VNI) to be used. Defaults to 1. -* `Port` (number): UDP port to use for sending encapsulated packets. Defaults to kernel default, currently 8472. -* `GBP` (Boolean): Enable [VXLAN Group Based Policy](https://github.com/torvalds/linux/commit/3511494ce2f3d3b77544c79b87511a4ddb61dc89). Defaults to `false`. -* `DirectRouting` (Boolean): Enable direct routes (like `host-gw`) when the hosts are on the same subnet. VXLAN will only be used to encapsulate packets to hosts on different subnets. Defaults to `false`. +* `VNI` (number): VXLAN Identifier (VNI) to be used. On Linux, defaults to 1. On Windows should be greater than or equal to 4096. +* `Port` (number): UDP port to use for sending encapsulated packets. On Linux, defaults to kernel default, currently 8472, but on Windows, must be 4789. +* `GBP` (Boolean): Enable [VXLAN Group Based Policy](https://github.com/torvalds/linux/commit/3511494ce2f3d3b77544c79b87511a4ddb61dc89). Defaults to `false`. GBP is not supported on Windows +* `DirectRouting` (Boolean): Enable direct routes (like `host-gw`) when the hosts are on the same subnet. VXLAN will only be used to encapsulate packets to hosts on different subnets. Defaults to `false`. DirectRouting is not supported on Windows. +* `MacPrefix` (String): Only use on Windows, set to the MAC prefix. Defaults to `0E-2A`. ### host-gw diff --git a/backend/hostgw/hostgw_windows.go b/backend/hostgw/hostgw_windows.go index f14d12b448..3ed790717d 100644 --- a/backend/hostgw/hostgw_windows.go +++ b/backend/hostgw/hostgw_windows.go @@ -1,6 +1,4 @@ -// +build windows - -// Copyright 2015 flannel authors +// Copyright 2018 flannel authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,14 +11,243 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// +build windows package hostgw import ( + "fmt" + "strconv" + "sync" + "time" + + "github.com/Microsoft/hcsshim" + "github.com/coreos/flannel/backend" + "github.com/rakelkar/gonetsh/netroute" + "github.com/rakelkar/gonetsh/netsh" + "github.com/coreos/flannel/pkg/ip" + "github.com/coreos/flannel/subnet" log "github.com/golang/glog" + "github.com/juju/errors" + "golang.org/x/net/context" + "k8s.io/apimachinery/pkg/util/json" + "k8s.io/apimachinery/pkg/util/wait" + utilexec "k8s.io/utils/exec" ) func init() { - log.Infof("hostgw is not supported on this platform") + backend.Register("host-gw", New) +} + +type HostgwBackend struct { + sm subnet.Manager + extIface *backend.ExternalInterface +} + +func New(sm subnet.Manager, extIface *backend.ExternalInterface) (backend.Backend, error) { + if !extIface.ExtAddr.Equal(extIface.IfaceAddr) { + return nil, fmt.Errorf("your PublicIP differs from interface IP, meaning that probably you're on a NAT, which is not supported by host-gw backend") + } + + be := &HostgwBackend{ + sm: sm, + extIface: extIface, + } + + return be, nil +} + +func (be *HostgwBackend) RegisterNetwork(ctx context.Context, wg sync.WaitGroup, config *subnet.Config) (backend.Network, error) { + // 1. Parse configuration + cfg := struct { + Name string + DNSServerList string + }{} + if len(config.Backend) > 0 { + if err := json.Unmarshal(config.Backend, &cfg); err != nil { + return nil, errors.Annotate(err, "error decoding windows host-gw backend config") + } + } + if len(cfg.Name) == 0 { + cfg.Name = "cbr0" + } + log.Infof("HOST-GW config: %+v", cfg) + + n := &backend.RouteNetwork{ + SimpleNetwork: backend.SimpleNetwork{ + ExtIface: be.extIface, + }, + SM: be.sm, + BackendType: "host-gw", + Mtu: be.extIface.Iface.MTU, + LinkIndex: be.extIface.Iface.Index, + } + n.GetRoute = func(lease *subnet.Lease) *netroute.Route { + return &netroute.Route{ + DestinationSubnet: lease.Subnet.ToIPNet(), + GatewayAddress: lease.Attrs.PublicIP.ToIP(), + LinkIndex: n.LinkIndex, + } + } + + // 2. Acquire the lease form subnet manager + attrs := subnet.LeaseAttrs{ + PublicIP: ip.FromIP(be.extIface.ExtAddr), + BackendType: "host-gw", + } + + l, err := be.sm.AcquireLease(ctx, &attrs) + switch err { + case nil: + n.SubnetLease = l + + case context.Canceled, context.DeadlineExceeded: + return nil, err + + default: + return nil, errors.Annotate(err, "failed to acquire lease") + } + + // 3. Check if the network exists and has the expected settings + netshHelper := netsh.New(utilexec.New()) + createNewNetwork := true + expectedSubnet := n.SubnetLease.Subnet + expectedAddressPrefix := expectedSubnet.String() + expectedGatewayAddress := (expectedSubnet.IP + 1).String() + expectedPodGatewayAddress := expectedSubnet.IP + 2 + networkName := cfg.Name + var waitErr, lastErr error + + existingNetwork, err := hcsshim.GetHNSNetworkByName(networkName) + if err == nil { + for _, subnet := range existingNetwork.Subnets { + if subnet.AddressPrefix == expectedAddressPrefix && subnet.GatewayAddress == expectedGatewayAddress { + createNewNetwork = false + log.Infof("Found existing HNSNetwork %s", networkName) + break + } + } + } + + // 4. Create a new HNSNetwork + expectedNetwork := existingNetwork + if createNewNetwork { + if existingNetwork != nil { + if _, err := existingNetwork.Delete(); err != nil { + return nil, errors.Annotatef(err, "failed to delete existing HNSNetwork %s", networkName) + } + log.Infof("Deleted stale HNSNetwork %s", networkName) + } + + expectedNetwork = &hcsshim.HNSNetwork{ + Name: networkName, + Type: "L2Bridge", + DNSServerList: cfg.DNSServerList, + Subnets: []hcsshim.Subnet{ + { + AddressPrefix: expectedAddressPrefix, + GatewayAddress: expectedGatewayAddress, + }, + }, + } + jsonRequest, err := json.Marshal(expectedNetwork) + if err != nil { + return nil, errors.Annotatef(err, "failed to marshal %+v", expectedNetwork) + } + + log.Infof("Attempting to create HNSNetwork %s", string(jsonRequest)) + newNetwork, err := hcsshim.HNSNetworkRequest("POST", "", string(jsonRequest)) + if err != nil { + return nil, errors.Annotatef(err, "failed to create HNSNetwork %s", networkName) + } + + // Wait for the network to populate Management IP + log.Infof("Waiting to get ManagementIP from HNSNetwork %s", networkName) + waitErr = wait.Poll(500*time.Millisecond, 5*time.Second, func() (done bool, err error) { + newNetwork, lastErr = hcsshim.HNSNetworkRequest("GET", newNetwork.Id, "") + return newNetwork != nil && len(newNetwork.ManagementIP) == 0, nil + }) + if waitErr == wait.ErrWaitTimeout { + return nil, errors.Annotatef(lastErr, "timeout, failed to get management IP from HNSNetwork %s", networkName) + } + + // Wait for the interface with the management IP + log.Infof("Waiting to get net interface for HNSNetwork %s (%s)", networkName, newNetwork.ManagementIP) + waitErr = wait.Poll(500*time.Millisecond, 5*time.Second, func() (done bool, err error) { + _, lastErr = netshHelper.GetInterfaceByIP(newNetwork.ManagementIP) + return lastErr == nil, nil + }) + if waitErr == wait.ErrWaitTimeout { + return nil, errors.Annotatef(lastErr, "timeout, failed to get net interface for HNSNetwork %s (%s)", networkName, newNetwork.ManagementIP) + } + + log.Infof("Created HNSNetwork %s", networkName) + expectedNetwork = newNetwork + } + + // 5. Ensure a 1.2 endpoint on this network in the host compartment + createNewBridgeEndpoint := true + bridgeEndpointName := networkName + "_ep" + existingBridgeEndpoint, err := hcsshim.GetHNSEndpointByName(bridgeEndpointName) + if err == nil && existingBridgeEndpoint.IPAddress.String() == expectedPodGatewayAddress.String() { + log.Infof("Found existing bridge HNSEndpoint %s", bridgeEndpointName) + createNewBridgeEndpoint = false + } + + // 6. Create a bridge HNSEndpoint + expectedBridgeEndpoint := existingBridgeEndpoint + if createNewBridgeEndpoint { + if existingBridgeEndpoint != nil { + if _, err = existingBridgeEndpoint.Delete(); err != nil { + return nil, errors.Annotatef(err, "failed to delete existing bridge HNSEndpoint %s", bridgeEndpointName) + } + log.Infof("Deleted stale bridge HNSEndpoint %s", bridgeEndpointName) + } + + expectedBridgeEndpoint = &hcsshim.HNSEndpoint{ + Name: bridgeEndpointName, + IPAddress: expectedPodGatewayAddress.ToIP(), + VirtualNetwork: expectedNetwork.Id, + } + + log.Infof("Attempting to create bridge HNSEndpoint %+v", expectedBridgeEndpoint) + if existingBridgeEndpoint, err = expectedBridgeEndpoint.Create(); err != nil { + return nil, errors.Annotatef(err, "failed to create bridge HNSEndpoint %s", bridgeEndpointName) + } + + log.Infof("Created bridge HNSEndpoint %s", bridgeEndpointName) + } + + // Wait for the bridgeEndpoint to attach to the host + log.Infof("Waiting to attach bridge endpoint %s to host", bridgeEndpointName) + waitErr = wait.Poll(500*time.Millisecond, 5*time.Second, func() (done bool, err error) { + lastErr = expectedBridgeEndpoint.HostAttach(1) + return lastErr == nil, nil + }) + if waitErr == wait.ErrWaitTimeout { + return nil, errors.Annotatef(lastErr, "failed to hot attach bridge HNSEndpoint %s to host compartment", bridgeEndpointName) + } + log.Infof("Attached bridge endpoint %s to host successfully", bridgeEndpointName) + + // 7. Enable forwarding on the host interface and endpoint + for _, interfaceIpAddress := range []string{expectedNetwork.ManagementIP, existingBridgeEndpoint.IPAddress.String()} { + netInterface, err := netshHelper.GetInterfaceByIP(interfaceIpAddress) + if err != nil { + return nil, errors.Annotatef(err, "failed to find interface for IP Address %s", interfaceIpAddress) + } + log.Infof("Found %+v interface with IP %s", netInterface, interfaceIpAddress) + + // When a new hns network is created, the interface is modified, esp the name, index + if expectedNetwork.ManagementIP == netInterface.IpAddress { + n.LinkIndex = netInterface.Idx + n.Name = netInterface.Name + } + + interfaceIdx := strconv.Itoa(netInterface.Idx) + if err := netshHelper.EnableForwarding(interfaceIdx); err != nil { + return nil, errors.Annotatef(err, "failed to enable forwarding on %s index %s", netInterface.Name, interfaceIdx) + } + log.Infof("Enabled forwarding on %s index %s", netInterface.Name, interfaceIdx) + } + + return n, nil } diff --git a/backend/route_network_windows.go b/backend/route_network_windows.go new file mode 100644 index 0000000000..17e1300c6c --- /dev/null +++ b/backend/route_network_windows.go @@ -0,0 +1,199 @@ +// Copyright 2018 flannel authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package backend + +import ( + "sync" + "time" + + "github.com/rakelkar/gonetsh/netroute" + log "github.com/golang/glog" + "golang.org/x/net/context" + + "github.com/coreos/flannel/subnet" + "strings" +) + +const ( + routeCheckRetries = 10 +) + +type RouteNetwork struct { + SimpleNetwork + Name string + BackendType string + SM subnet.Manager + GetRoute func(lease *subnet.Lease) *netroute.Route + Mtu int + LinkIndex int + routes []netroute.Route +} + +func (n *RouteNetwork) MTU() int { + return n.Mtu +} + +func (n *RouteNetwork) Run(ctx context.Context) { + wg := sync.WaitGroup{} + + log.Info("Watching for new subnet leases") + evts := make(chan []subnet.Event) + wg.Add(1) + go func() { + subnet.WatchLeases(ctx, n.SM, n.SubnetLease, evts) + wg.Done() + }() + + n.routes = make([]netroute.Route, 0, 10) + wg.Add(1) + go func() { + n.routeCheck(ctx) + wg.Done() + }() + + defer wg.Wait() + + for { + select { + case evtBatch := <-evts: + n.handleSubnetEvents(evtBatch) + + case <-ctx.Done(): + return + } + } +} + +func (n *RouteNetwork) handleSubnetEvents(batch []subnet.Event) { + netrouteHelper := netroute.New() + + for _, evt := range batch { + leaseSubnet := evt.Lease.Subnet + leaseAttrs := evt.Lease.Attrs + if !strings.EqualFold(leaseAttrs.BackendType, n.BackendType) { + log.Warningf("Ignoring non-%v subnet(%v): type=%v", n.BackendType, leaseSubnet, leaseAttrs.BackendType) + continue + } + + expectedRoute := n.GetRoute(&evt.Lease) + + switch evt.Type { + case subnet.EventAdded: + log.Infof("Subnet added: %v via %v", leaseSubnet, leaseAttrs.PublicIP) + + existingRoutes, _ := netrouteHelper.GetNetRoutes(expectedRoute.LinkIndex, expectedRoute.DestinationSubnet) + if len(existingRoutes) > 0 { + existingRoute := existingRoutes[0] + if existingRoute.Equal(*expectedRoute) { + continue + } + + log.Warningf("Replacing existing route %v via %v with %v via %v", leaseSubnet, existingRoute.GatewayAddress, leaseSubnet, leaseAttrs.PublicIP) + err := netrouteHelper.RemoveNetRoute(existingRoute.LinkIndex, existingRoute.DestinationSubnet, existingRoute.GatewayAddress) + if err != nil { + log.Errorf("Error removing route: %v", err) + continue + } + } + + err := netrouteHelper.NewNetRoute(expectedRoute.LinkIndex, expectedRoute.DestinationSubnet, expectedRoute.GatewayAddress) + if err != nil { + log.Errorf("Error creating route: %v", err) + continue + } + + n.addToRouteList(expectedRoute) + + case subnet.EventRemoved: + log.Infof("Subnet removed: %v", leaseSubnet) + + existingRoutes, _ := netrouteHelper.GetNetRoutes(expectedRoute.LinkIndex, expectedRoute.DestinationSubnet) + if len(existingRoutes) > 0 { + existingRoute := existingRoutes[0] + if existingRoute.Equal(*expectedRoute) { + log.Infof("Removing existing route %v via %v", leaseSubnet, existingRoute.GatewayAddress) + + err := netrouteHelper.RemoveNetRoute(existingRoute.LinkIndex, existingRoute.DestinationSubnet, existingRoute.GatewayAddress) + if err != nil { + log.Warningf("Error removing route: %v", err) + } + } + } + + n.removeFromRouteList(expectedRoute) + + default: + log.Error("Internal error: unknown event type: ", int(evt.Type)) + } + } +} + +func (n *RouteNetwork) addToRouteList(newRoute *netroute.Route) { + for _, route := range n.routes { + if route.Equal(*newRoute) { + return + } + } + + n.routes = append(n.routes, *newRoute) +} + +func (n *RouteNetwork) removeFromRouteList(oldRoute *netroute.Route) { + for index, route := range n.routes { + if route.Equal(*oldRoute) { + n.routes = append(n.routes[:index], n.routes[index+1:]...) + return + } + } +} + +func (n *RouteNetwork) routeCheck(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case <-time.After(routeCheckRetries * time.Second): + n.checkSubnetExistInRoutes() + } + } +} + +func (n *RouteNetwork) checkSubnetExistInRoutes() { + netrouteHelper := netroute.New() + + existingRoutes, err := netrouteHelper.GetNetRoutesAll() + if err != nil { + log.Errorf("Error enumerating routes: %v", err) + return + } + for _, expectedRoute := range n.routes { + exist := false + for _, existingRoute := range existingRoutes { + if expectedRoute.Equal(existingRoute) { + exist = true + break + } + } + + if !exist { + err := netrouteHelper.NewNetRoute(expectedRoute.LinkIndex, expectedRoute.DestinationSubnet, expectedRoute.GatewayAddress) + if err != nil { + log.Warningf("Error recovering route to %v via %v on %v (%v).", expectedRoute.DestinationSubnet, expectedRoute.GatewayAddress, expectedRoute.LinkIndex, err) + continue + } + log.Infof("Recovered route to %v via %v on %v.", expectedRoute.DestinationSubnet, expectedRoute.GatewayAddress, expectedRoute.LinkIndex) + } + } +} diff --git a/backend/vxlan/device_windows.go b/backend/vxlan/device_windows.go new file mode 100644 index 0000000000..77e85d2e27 --- /dev/null +++ b/backend/vxlan/device_windows.go @@ -0,0 +1,224 @@ +// Copyright 2018 flannel authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vxlan + +import ( + "encoding/json" + "fmt" + "github.com/Microsoft/hcsshim" + "github.com/buger/jsonparser" + "github.com/rakelkar/gonetsh/netsh" + "github.com/coreos/flannel/pkg/ip" + log "github.com/golang/glog" + "github.com/juju/errors" + "k8s.io/apimachinery/pkg/util/wait" + utilexec "k8s.io/utils/exec" + "time" +) + +type vxlanDeviceAttrs struct { + vni uint32 + name string + gbp bool + addressPrefix ip.IP4Net +} + +type vxlanDevice struct { + link *hcsshim.HNSNetwork + macPrefix string + directRouting bool +} + +func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) { + hnsNetwork := &hcsshim.HNSNetwork{ + Name: devAttrs.name, + Type: "Overlay", + Subnets: make([]hcsshim.Subnet, 0, 1), + } + + hnsNetwork, err := ensureNetwork(hnsNetwork, int64(devAttrs.vni), devAttrs.addressPrefix.String(), (devAttrs.addressPrefix.IP + 1).String()) + if err != nil { + return nil, err + } + + return &vxlanDevice{ + link: hnsNetwork, + }, nil +} + +func ensureNetwork(expectedNetwork *hcsshim.HNSNetwork, expectedVSID int64, expectedAddressPrefix, expectedGW string) (*hcsshim.HNSNetwork, error) { + createNetwork := true + networkName := expectedNetwork.Name + + // 1. Check if the HNSNetwork exists and has the expected settings + existingNetwork, err := hcsshim.GetHNSNetworkByName(networkName) + if err == nil { + if existingNetwork.Type == expectedNetwork.Type { + for _, existingSubnet := range existingNetwork.Subnets { + if existingSubnet.AddressPrefix == expectedAddressPrefix && existingSubnet.GatewayAddress == expectedGW { + createNetwork = false + log.Infof("Found existing HNSNetwork %s", networkName) + break + } + } + } + } + + // 2. Create a new HNSNetwork + if createNetwork { + if existingNetwork != nil { + if _, err := existingNetwork.Delete(); err != nil { + return nil, errors.Annotatef(err, "failed to delete existing HNSNetwork %s", networkName) + } + log.Infof("Deleted stale HNSNetwork %s", networkName) + } + + // Add a VxLan subnet + expectedNetwork.Subnets = append(expectedNetwork.Subnets, hcsshim.Subnet{ + AddressPrefix: expectedAddressPrefix, + GatewayAddress: expectedGW, + Policies: []json.RawMessage{ + []byte(fmt.Sprintf(`{"Type":"VSID","VSID":%d}`, expectedVSID)), + }, + }) + + // Config request params + jsonRequest, err := json.Marshal(expectedNetwork) + if err != nil { + return nil, errors.Annotatef(err, "failed to marshal %+v", expectedNetwork) + } + + log.Infof("Attempting to create HNSNetwork %s", string(jsonRequest)) + newNetwork, err := hcsshim.HNSNetworkRequest("POST", "", string(jsonRequest)) + if err != nil { + return nil, errors.Annotatef(err, "failed to create HNSNetwork %s", networkName) + } + + var waitErr, lastErr error + // Wait for the network to populate Management IP + log.Infof("Waiting to get ManagementIP from HNSNetwork %s", networkName) + waitErr = wait.Poll(500*time.Millisecond, 5*time.Second, func() (done bool, err error) { + newNetwork, lastErr = hcsshim.HNSNetworkRequest("GET", newNetwork.Id, "") + return newNetwork != nil && len(newNetwork.ManagementIP) == 0, nil + }) + if waitErr == wait.ErrWaitTimeout { + return nil, errors.Annotatef(lastErr, "timeout, failed to get management IP from HNSNetwork %s", networkName) + } + + // Wait for the interface with the management IP + netshHelper := netsh.New(utilexec.New()) + log.Infof("Waiting to get net interface for HNSNetwork %s (%s)", networkName, newNetwork.ManagementIP) + waitErr = wait.Poll(500*time.Millisecond, 5*time.Second, func() (done bool, err error) { + _, lastErr = netshHelper.GetInterfaceByIP(newNetwork.ManagementIP) + return lastErr == nil, nil + }) + if waitErr == wait.ErrWaitTimeout { + return nil, errors.Annotatef(lastErr, "timeout, failed to get net interface for HNSNetwork %s (%s)", networkName, newNetwork.ManagementIP) + } + + log.Infof("Created HNSNetwork %s", networkName) + return newNetwork, nil + } + + return existingNetwork, nil +} + +type neighbor struct { + MAC string + IP ip.IP4 + ManagementAddress string +} + +func (dev *vxlanDevice) AddEndpoint(n *neighbor) error { + endpointName := createEndpointName(n.IP) + + // 1. Check if the HNSEndpoint exists and has the expected settings + existingEndpoint, err := hcsshim.GetHNSEndpointByName(endpointName) + if err == nil && existingEndpoint.VirtualNetwork == dev.link.Id { + // Check policies if there is PA type + targetType := "PA" + for _, policy := range existingEndpoint.Policies { + policyType, _ := jsonparser.GetUnsafeString(policy, "Type") + if policyType == targetType { + actualPaIP, _ := jsonparser.GetUnsafeString(policy, targetType) + if actualPaIP == n.ManagementAddress { + log.Infof("Found existing remote HNSEndpoint %s", endpointName) + return nil + } + } + } + } + + // 2. Create a new HNSNetwork + if existingEndpoint != nil { + if _, err := existingEndpoint.Delete(); err != nil { + return errors.Annotatef(err, "failed to delete existing remote HNSEndpoint %s", endpointName) + } + log.V(4).Infof("Deleted stale HNSEndpoint %s", endpointName) + } + + newEndpoint := &hcsshim.HNSEndpoint{ + Name: endpointName, + IPAddress: n.IP.ToIP(), + MacAddress: n.MAC, + VirtualNetwork: dev.link.Id, + IsRemoteEndpoint: true, + Policies: []json.RawMessage{ + []byte(fmt.Sprintf(`{"Type":"PA","PA":"%s"}`, n.ManagementAddress)), + }, + } + if _, err := newEndpoint.Create(); err != nil { + return errors.Annotatef(err, "failed to create remote HNSEndpoint %s", endpointName) + } + log.V(4).Infof("Created HNSEndpoint %s", endpointName) + + return nil +} + +func (dev *vxlanDevice) DelEndpoint(n *neighbor) error { + endpointName := createEndpointName(n.IP) + + existingEndpoint, err := hcsshim.GetHNSEndpointByName(endpointName) + if err == nil && existingEndpoint.VirtualNetwork == dev.link.Id { + // Check policies if there is PA type + targetType := "PA" + for _, policy := range existingEndpoint.Policies { + policyType, _ := jsonparser.GetUnsafeString(policy, "Type") + if policyType == targetType { + actualPaIP, _ := jsonparser.GetUnsafeString(policy, targetType) + if actualPaIP == n.ManagementAddress { + // Found it and delete + if _, err := existingEndpoint.Delete(); err != nil { + return errors.Annotatef(err, "failed to delete remote HNSEndpoint %s", endpointName) + } + + log.V(4).Infof("Deleted HNSEndpoint %s", endpointName) + break + } + } + } + } + + return nil +} + +func (dev *vxlanDevice) ConjureMac(targetIP ip.IP4) string { + a, b, c, d := targetIP.Octets() + return fmt.Sprintf("%v-%02x-%02x-%02x-%02x", dev.macPrefix, a, b, c, d) +} + +func createEndpointName(targetIP ip.IP4) string { + return "remote_" + targetIP.String() +} diff --git a/backend/vxlan/vxlan_network_windows.go b/backend/vxlan/vxlan_network_windows.go new file mode 100644 index 0000000000..74c4248a81 --- /dev/null +++ b/backend/vxlan/vxlan_network_windows.go @@ -0,0 +1,125 @@ +// Copyright 2015 flannel authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vxlan + +import ( + log "github.com/golang/glog" + "golang.org/x/net/context" + "sync" + + "github.com/coreos/flannel/backend" + "github.com/coreos/flannel/subnet" + + "github.com/coreos/flannel/pkg/ip" + "strings" +) + +type network struct { + backend.SimpleNetwork + dev *vxlanDevice + subnetMgr subnet.Manager +} + +const ( + encapOverhead = 50 +) + +func newNetwork(subnetMgr subnet.Manager, extIface *backend.ExternalInterface, dev *vxlanDevice, _ ip.IP4Net, lease *subnet.Lease) (*network, error) { + nw := &network{ + SimpleNetwork: backend.SimpleNetwork{ + SubnetLease: lease, + ExtIface: extIface, + }, + subnetMgr: subnetMgr, + dev: dev, + } + + return nw, nil +} + +func (nw *network) Run(ctx context.Context) { + wg := sync.WaitGroup{} + + log.V(0).Info("Watching for new subnet leases") + events := make(chan []subnet.Event) + wg.Add(1) + go func() { + subnet.WatchLeases(ctx, nw.subnetMgr, nw.SubnetLease, events) + log.V(1).Info("WatchLeases exited") + wg.Done() + }() + + defer wg.Wait() + + for { + select { + case evtBatch := <-events: + nw.handleSubnetEvents(evtBatch) + + case <-ctx.Done(): + return + } + } +} + +func (nw *network) MTU() int { + return nw.ExtIface.Iface.MTU - encapOverhead +} + +func (nw *network) handleSubnetEvents(batch []subnet.Event) { + for _, event := range batch { + leaseSubnet := event.Lease.Subnet + leaseAttrs := event.Lease.Attrs + if !strings.EqualFold(leaseAttrs.BackendType, "vxlan") { + log.Warningf("ignoring non-vxlan subnet(%v): type=%v", leaseSubnet, leaseAttrs.BackendType) + continue + } + + publicIP := leaseAttrs.PublicIP.String() + remoteIP := leaseSubnet.IP + 2 + lastIP := leaseSubnet.Next().IP - 1 + + switch event.Type { + case subnet.EventAdded: + for ; remoteIP < lastIP; remoteIP++ { + n := &neighbor{ + IP: remoteIP, + MAC: nw.dev.ConjureMac(remoteIP), + ManagementAddress: publicIP, + } + + log.V(2).Infof("adding subnet: %v publicIP: %s vtepMAC: %s", leaseSubnet, n.ManagementAddress, n.MAC) + if err := nw.dev.AddEndpoint(n); err != nil { + log.Error(err) + } + } + case subnet.EventRemoved: + for ; remoteIP < lastIP; remoteIP++ { + n := &neighbor{ + IP: remoteIP, + MAC: nw.dev.ConjureMac(remoteIP), + ManagementAddress: publicIP, + } + + log.V(2).Infof("removing subnet: %v publicIP: %s vtepMAC: %s", leaseSubnet, n.ManagementAddress, n.MAC) + if err := nw.dev.DelEndpoint(n); err != nil { + log.Error(err) + } + } + default: + log.Error("internal error: unknown event type: ", int(event.Type)) + } + } +} diff --git a/backend/vxlan/vxlan_windows.go b/backend/vxlan/vxlan_windows.go index 0c8633fbf8..835afc9f31 100644 --- a/backend/vxlan/vxlan_windows.go +++ b/backend/vxlan/vxlan_windows.go @@ -11,14 +11,132 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// +build windows package vxlan +// Some design notes: +// VXLAN encapsulates L2 packets (though flannel is L3 only so don't expect to be able to send L2 packets across hosts) +// Windows overlay decap works at L2 and so it needs the correct destination MAC for the remote host to work. +// Windows does not expose an L3Miss interface so for now all possible remote IP/MAC pairs have to be configured upfront. +// +// In this scheme the scaling of table entries (per host) is: +// - 1 network entry for the overlay network +// - 1 endpoint per local container +// - N remote endpoints remote node (total endpoints = import ( + "encoding/json" + "errors" + "fmt" + "sync" + log "github.com/golang/glog" + + "golang.org/x/net/context" + + "github.com/coreos/flannel/backend" + "github.com/coreos/flannel/pkg/ip" + "github.com/coreos/flannel/subnet" + "net" ) func init() { - log.Infof("vxlan is not supported on this platform") + backend.Register("vxlan", New) +} + +const ( + defaultVNI = 4096 + vxlanPort = 4789 +) + +type VXLANBackend struct { + subnetMgr subnet.Manager + extIface *backend.ExternalInterface +} + +func New(sm subnet.Manager, extIface *backend.ExternalInterface) (backend.Backend, error) { + backend := &VXLANBackend{ + subnetMgr: sm, + extIface: extIface, + } + + return backend, nil +} + +func newSubnetAttrs(publicIP net.IP) (*subnet.LeaseAttrs, error) { + return &subnet.LeaseAttrs{ + PublicIP: ip.FromIP(publicIP), + BackendType: "vxlan", + }, nil +} + +func (be *VXLANBackend) RegisterNetwork(ctx context.Context, wg sync.WaitGroup, config *subnet.Config) (backend.Network, error) { + // 1. Parse configuration + cfg := struct { + Name string + MacPrefix string + VNI int + Port int + GBP bool + DirectRouting bool + }{ + VNI: defaultVNI, + Port: vxlanPort, + MacPrefix: "0E-2A", + } + + if len(config.Backend) > 0 { + if err := json.Unmarshal(config.Backend, &cfg); err != nil { + return nil, fmt.Errorf("error decoding VXLAN backend config: %v", err) + } + } + + // 2. Verify configuration + if cfg.VNI < defaultVNI { + return nil, fmt.Errorf("invalid VXLAN backend config. VNI [%v] must be greater than or equal to %v on Windows", cfg.VNI, defaultVNI) + } + if cfg.Port != vxlanPort { + return nil, fmt.Errorf("invalid VXLAN backend config. Port [%v] is not supported on Windows. Omit the setting to default to port %v", cfg.Port, vxlanPort) + } + if cfg.DirectRouting { + return nil, errors.New("invalid VXLAN backend config. DirectRouting is not supported on Windows") + } + if cfg.GBP { + return nil, errors.New("invalid VXLAN backend config. GBP is not supported on Windows") + } + if len(cfg.MacPrefix) == 0 || len(cfg.MacPrefix) != 5 || cfg.MacPrefix[2] != '-' { + return nil, fmt.Errorf("invalid VXLAN backend config.MacPrefix [%v] is invalid, prefix must be of the format xx-xx e.g. 0E-2A", cfg.MacPrefix) + } + if len(cfg.Name) == 0 { + cfg.Name = fmt.Sprintf("flannel.%v", cfg.VNI) + } + log.Infof("VXLAN config: Name=%s MacPrefix=%s VNI=%d Port=%d GBP=%v DirectRouting=%v", cfg.Name, cfg.MacPrefix, cfg.VNI, cfg.Port, cfg.GBP, cfg.DirectRouting) + + devAttrs := vxlanDeviceAttrs{ + vni: uint32(cfg.VNI), + name: cfg.Name, + addressPrefix: config.Network, + } + + dev, err := newVXLANDevice(&devAttrs) + if err != nil { + return nil, err + } + dev.directRouting = cfg.DirectRouting + dev.macPrefix = cfg.MacPrefix + + subnetAttrs, err := newSubnetAttrs(be.extIface.ExtAddr) + if err != nil { + return nil, err + } + + lease, err := be.subnetMgr.AcquireLease(ctx, subnetAttrs) + switch err { + case nil: + case context.Canceled, context.DeadlineExceeded: + return nil, err + default: + return nil, fmt.Errorf("failed to acquire lease: %v", err) + } + + return newNetwork(be.subnetMgr, be.extIface, dev, ip.IP4Net{}, lease) }