diff --git a/build/yamls/antrea-aks.yml b/build/yamls/antrea-aks.yml index c2f0194a666..66ff0e30862 100644 --- a/build/yamls/antrea-aks.yml +++ b/build/yamls/antrea-aks.yml @@ -3810,6 +3810,14 @@ data: # If there are multiple IP addresses configured on the interface, the first one is used. # The interface configured with Node IP is used if this parameter is not set. #transportInterface: + + # A string array of values which specifies the host IPv4/IPv6 addresses for NodePort. Values can be valid IP blocks. + # (e.g. 1.2.3.0/24, 1.2.3.4/32). An empty string slice is meant to select all host IPv4/IPv6 addresses. + #nodePortAddresses: [] + + # Whether or not to enable full Service support in AntreaProxy in antrea-agent. If this option is true, then without + # KubeProxy, NodePort/LoadBalancer are supported, and ClusterIP can be accessed from host. + #antreaProxyFull: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -3892,7 +3900,7 @@ metadata: annotations: {} labels: app: antrea - name: antrea-config-gdgb98gmfd + name: antrea-config-mb96584t95 namespace: kube-system --- apiVersion: v1 @@ -3963,7 +3971,7 @@ spec: fieldRef: fieldPath: spec.serviceAccountName - name: ANTREA_CONFIG_MAP_NAME - value: antrea-config-gdgb98gmfd + value: antrea-config-mb96584t95 image: projects.registry.vmware.com/antrea/antrea-ubuntu:latest imagePullPolicy: IfNotPresent livenessProbe: @@ -4014,7 +4022,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-gdgb98gmfd + name: antrea-config-mb96584t95 name: antrea-config - name: antrea-controller-tls secret: @@ -4295,7 +4303,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-gdgb98gmfd + name: antrea-config-mb96584t95 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index 53788c248b2..d4001e372e8 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -3810,6 +3810,14 @@ data: # If there are multiple IP addresses configured on the interface, the first one is used. # The interface configured with Node IP is used if this parameter is not set. #transportInterface: + + # A string array of values which specifies the host IPv4/IPv6 addresses for NodePort. Values can be valid IP blocks. + # (e.g. 1.2.3.0/24, 1.2.3.4/32). An empty string slice is meant to select all host IPv4/IPv6 addresses. + #nodePortAddresses: [] + + # Whether or not to enable full Service support in AntreaProxy in antrea-agent. If this option is true, then without + # KubeProxy, NodePort/LoadBalancer are supported, and ClusterIP can be accessed from host. + #antreaProxyFull: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -3892,7 +3900,7 @@ metadata: annotations: {} labels: app: antrea - name: antrea-config-gdgb98gmfd + name: antrea-config-mb96584t95 namespace: kube-system --- apiVersion: v1 @@ -3963,7 +3971,7 @@ spec: fieldRef: fieldPath: spec.serviceAccountName - name: ANTREA_CONFIG_MAP_NAME - value: antrea-config-gdgb98gmfd + value: antrea-config-mb96584t95 image: projects.registry.vmware.com/antrea/antrea-ubuntu:latest imagePullPolicy: IfNotPresent livenessProbe: @@ -4014,7 +4022,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-gdgb98gmfd + name: antrea-config-mb96584t95 name: antrea-config - name: antrea-controller-tls secret: @@ -4297,7 +4305,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-gdgb98gmfd + name: antrea-config-mb96584t95 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index eccc1194642..36643d13df9 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -3810,6 +3810,14 @@ data: # If there are multiple IP addresses configured on the interface, the first one is used. # The interface configured with Node IP is used if this parameter is not set. #transportInterface: + + # A string array of values which specifies the host IPv4/IPv6 addresses for NodePort. Values can be valid IP blocks. + # (e.g. 1.2.3.0/24, 1.2.3.4/32). An empty string slice is meant to select all host IPv4/IPv6 addresses. + #nodePortAddresses: [] + + # Whether or not to enable full Service support in AntreaProxy in antrea-agent. If this option is true, then without + # KubeProxy, NodePort/LoadBalancer are supported, and ClusterIP can be accessed from host. + #antreaProxyFull: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -3892,7 +3900,7 @@ metadata: annotations: {} labels: app: antrea - name: antrea-config-dc9bfdb7f6 + name: antrea-config-b2bft59f77 namespace: kube-system --- apiVersion: v1 @@ -3963,7 +3971,7 @@ spec: fieldRef: fieldPath: spec.serviceAccountName - name: ANTREA_CONFIG_MAP_NAME - value: antrea-config-dc9bfdb7f6 + value: antrea-config-b2bft59f77 image: projects.registry.vmware.com/antrea/antrea-ubuntu:latest imagePullPolicy: IfNotPresent livenessProbe: @@ -4014,7 +4022,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-dc9bfdb7f6 + name: antrea-config-b2bft59f77 name: antrea-config - name: antrea-controller-tls secret: @@ -4298,7 +4306,7 @@ spec: path: /home/kubernetes/bin name: host-cni-bin - configMap: - name: antrea-config-dc9bfdb7f6 + name: antrea-config-b2bft59f77 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index 8677b97d21e..3969c6b98a0 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -3815,6 +3815,14 @@ data: # If there are multiple IP addresses configured on the interface, the first one is used. # The interface configured with Node IP is used if this parameter is not set. #transportInterface: + + # A string array of values which specifies the host IPv4/IPv6 addresses for NodePort. Values can be valid IP blocks. + # (e.g. 1.2.3.0/24, 1.2.3.4/32). An empty string slice is meant to select all host IPv4/IPv6 addresses. + #nodePortAddresses: [] + + # Whether or not to enable full Service support in AntreaProxy in antrea-agent. If this option is true, then without + # KubeProxy, NodePort/LoadBalancer are supported, and ClusterIP can be accessed from host. + #antreaProxyFull: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -3897,7 +3905,7 @@ metadata: annotations: {} labels: app: antrea - name: antrea-config-cccf8b2ggf + name: antrea-config-8c848986g8 namespace: kube-system --- apiVersion: v1 @@ -3977,7 +3985,7 @@ spec: fieldRef: fieldPath: spec.serviceAccountName - name: ANTREA_CONFIG_MAP_NAME - value: antrea-config-cccf8b2ggf + value: antrea-config-8c848986g8 image: projects.registry.vmware.com/antrea/antrea-ubuntu:latest imagePullPolicy: IfNotPresent livenessProbe: @@ -4028,7 +4036,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-cccf8b2ggf + name: antrea-config-8c848986g8 name: antrea-config - name: antrea-controller-tls secret: @@ -4344,7 +4352,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-cccf8b2ggf + name: antrea-config-8c848986g8 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index 64a1b3db73d..f5c45e2190e 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -3815,6 +3815,14 @@ data: # If there are multiple IP addresses configured on the interface, the first one is used. # The interface configured with Node IP is used if this parameter is not set. #transportInterface: + + # A string array of values which specifies the host IPv4/IPv6 addresses for NodePort. Values can be valid IP blocks. + # (e.g. 1.2.3.0/24, 1.2.3.4/32). An empty string slice is meant to select all host IPv4/IPv6 addresses. + #nodePortAddresses: [] + + # Whether or not to enable full Service support in AntreaProxy in antrea-agent. If this option is true, then without + # KubeProxy, NodePort/LoadBalancer are supported, and ClusterIP can be accessed from host. + #antreaProxyFull: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -3897,7 +3905,7 @@ metadata: annotations: {} labels: app: antrea - name: antrea-config-mdd77fmc97 + name: antrea-config-c88c4k7kt5 namespace: kube-system --- apiVersion: v1 @@ -3968,7 +3976,7 @@ spec: fieldRef: fieldPath: spec.serviceAccountName - name: ANTREA_CONFIG_MAP_NAME - value: antrea-config-mdd77fmc97 + value: antrea-config-c88c4k7kt5 image: projects.registry.vmware.com/antrea/antrea-ubuntu:latest imagePullPolicy: IfNotPresent livenessProbe: @@ -4019,7 +4027,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-mdd77fmc97 + name: antrea-config-c88c4k7kt5 name: antrea-config - name: antrea-controller-tls secret: @@ -4300,7 +4308,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-mdd77fmc97 + name: antrea-config-c88c4k7kt5 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/base/conf/antrea-agent.conf b/build/yamls/base/conf/antrea-agent.conf index 40d0065d2c7..3789918459e 100644 --- a/build/yamls/base/conf/antrea-agent.conf +++ b/build/yamls/base/conf/antrea-agent.conf @@ -152,3 +152,11 @@ featureGates: # If there are multiple IP addresses configured on the interface, the first one is used. # The interface configured with Node IP is used if this parameter is not set. #transportInterface: + +# A string array of values which specifies the host IPv4/IPv6 addresses for NodePort. Values can be valid IP blocks. +# (e.g. 1.2.3.0/24, 1.2.3.4/32). An empty string slice is meant to select all host IPv4/IPv6 addresses. +#nodePortAddresses: [] + +# Whether or not to enable full Service support in AntreaProxy in antrea-agent. If this option is true, then without +# KubeProxy, NodePort/LoadBalancer are supported, and ClusterIP can be accessed from host. +#antreaProxyFull: false diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 1007b33985e..9b6c51201ad 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -104,7 +104,8 @@ func run(o *Options) error { features.DefaultFeatureGate.Enabled(features.AntreaProxy), features.DefaultFeatureGate.Enabled(features.AntreaPolicy), features.DefaultFeatureGate.Enabled(features.Egress), - features.DefaultFeatureGate.Enabled(features.FlowExporter)) + features.DefaultFeatureGate.Enabled(features.FlowExporter), + o.config.AntreaProxyFull) _, serviceCIDRNet, _ := net.ParseCIDR(o.config.ServiceCIDR) var serviceCIDRNetv6 *net.IPNet @@ -121,7 +122,7 @@ func run(o *Options) error { TransportIface: o.config.TransportInterface, } - routeClient, err := route.NewClient(serviceCIDRNet, networkConfig, o.config.NoSNAT) + routeClient, err := route.NewClient(serviceCIDRNet, networkConfig, o.config.NoSNAT, o.config.AntreaProxyFull) if err != nil { return fmt.Errorf("error creating route client: %v", err) } @@ -172,13 +173,28 @@ func run(o *Options) error { if features.DefaultFeatureGate.Enabled(features.AntreaProxy) { v4Enabled := config.IsIPv4Enabled(nodeConfig, networkConfig.TrafficEncapMode) v6Enabled := config.IsIPv6Enabled(nodeConfig, networkConfig.TrafficEncapMode) + proxyFull := o.config.AntreaProxyFull + var nodePortIPv4Map, nodePortIPv6Map map[int][]net.IP + if proxyFull { + nodePortIPv4Map, nodePortIPv6Map, err = getAvailableNodePortIPs(o.config.NodePortAddresses, o.config.HostGateway) + if err != nil { + return fmt.Errorf("getting available NodePort IP addresses failed: %v", err) + } + if v4Enabled && len(nodePortIPv4Map) == 0 { + return fmt.Errorf("no qualified NodePort IPv4 addresses was found") + } + if v6Enabled && len(nodePortIPv6Map) == 0 { + return fmt.Errorf("no qualified NodePort IPv6 addresses was found") + } + } + switch { case v4Enabled && v6Enabled: - proxier = proxy.NewDualStackProxier(nodeConfig.Name, informerFactory, ofClient) + proxier = proxy.NewDualStackProxier(nodeConfig.Name, informerFactory, ofClient, routeClient, nodePortIPv4Map, nodePortIPv6Map, proxyFull) case v4Enabled: - proxier = proxy.NewProxier(nodeConfig.Name, informerFactory, ofClient, false) + proxier = proxy.NewProxier(nodeConfig.Name, informerFactory, ofClient, false, routeClient, nodePortIPv4Map, proxyFull) case v6Enabled: - proxier = proxy.NewProxier(nodeConfig.Name, informerFactory, ofClient, true) + proxier = proxy.NewProxier(nodeConfig.Name, informerFactory, ofClient, true, routeClient, nodePortIPv6Map, proxyFull) default: return fmt.Errorf("at least one of IPv4 or IPv6 should be enabled") } diff --git a/cmd/antrea-agent/config.go b/cmd/antrea-agent/config.go index 3d0f5f9c4ce..bc70c566188 100644 --- a/cmd/antrea-agent/config.go +++ b/cmd/antrea-agent/config.go @@ -152,4 +152,10 @@ type AgentConfig struct { // If there are multiple IP addresses configured on the interface, the first one is used. // The interface configured with Node IP is used if this parameter is not set. TransportInterface string `yaml:"transportInterface,omitempty"` + // A string array of values which specifies the host IPv4/IPv6 addresses for NodePorts. Values may be valid IP blocks. + // (e.g. 1.2.3.0/24, 1.2.3.4/32). An empty string slice is meant to select all host IPv4/IPv6 addresses. + NodePortAddresses []string `yaml:"nodePortAddresses,omitempty"` + // Whether or not to enable full Service support in AntreaProxy in antrea-agent. If this option is true, then without + // KubeProxy, NodePort/LoadBalancer are supported, and ClusterIP can be accessed from host. + AntreaProxyFull bool `yaml:"antreaProxyFull,omitempty"` } diff --git a/cmd/antrea-agent/options.go b/cmd/antrea-agent/options.go index bb8a9ed3c62..8ee5f033ff9 100644 --- a/cmd/antrea-agent/options.go +++ b/cmd/antrea-agent/options.go @@ -146,6 +146,9 @@ func (o *Options) validate(args []string) error { // (but SNAT can be done by the primary CNI). o.config.NoSNAT = true } + if err := o.validateAntreaProxyConfig(); err != nil { + return fmt.Errorf("proxy config is invalid: %w", err) + } if err := o.validateFlowExporterConfig(); err != nil { return fmt.Errorf("failed to validate flow exporter config: %v", err) } @@ -219,6 +222,17 @@ func (o *Options) setDefaults() { } } +func (o *Options) validateAntreaProxyConfig() error { + if o.config.AntreaProxyFull { + for _, nodePortIP := range o.config.NodePortAddresses { + if _, _, err := net.ParseCIDR(nodePortIP); err != nil { + return fmt.Errorf("invalid NodePort IP address `%s`: %w", nodePortIP, err) + } + } + } + return nil +} + func (o *Options) validateFlowExporterConfig() error { if features.DefaultFeatureGate.Enabled(features.FlowExporter) { host, port, proto, err := flowexport.ParseFlowCollectorAddr(o.config.FlowCollectorAddr, defaultFlowCollectorPort, defaultFlowCollectorTransport) diff --git a/cmd/antrea-agent/util.go b/cmd/antrea-agent/util.go new file mode 100644 index 00000000000..af10b3d079f --- /dev/null +++ b/cmd/antrea-agent/util.go @@ -0,0 +1,64 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "net" + + "antrea.io/antrea/pkg/agent/util" +) + +func getAvailableNodePortIPs(nodePortIPsFromConfig []string, gateway string) (map[int][]net.IP, map[int][]net.IP, error) { + // Get all IP addresses of Node + nodeIPv4Map, nodeIPv6Map, err := util.GetAllNodeIPs() + if err != nil { + return nil, nil, err + } + // IP address of Antrea gateway should not be NodePort IP as it cannot be accessed from outside the Cluster. + gatewayIfIndex := util.GetIndexByName(gateway) + delete(nodeIPv4Map, gatewayIfIndex) + delete(nodeIPv6Map, gatewayIfIndex) + + // If option `NodePortAddresses` is not set, then all Node IP addresses will be used as NodePort IP address. + if len(nodePortIPsFromConfig) == 0 { + return nodeIPv4Map, nodeIPv6Map, nil + } + + var nodePortIPNets []*net.IPNet + for _, nodePortIP := range nodePortIPsFromConfig { + _, ipNet, _ := net.ParseCIDR(nodePortIP) + nodePortIPNets = append(nodePortIPNets, ipNet) + } + + nodePortIPv4Map, nodePortIPv6Map := make(map[int][]net.IP), make(map[int][]net.IP) + for _, nodePortIPNet := range nodePortIPNets { + for index, ips := range nodeIPv4Map { + for i := range ips { + if nodePortIPNet.Contains(ips[i]) { + nodePortIPv4Map[index] = append(nodePortIPv4Map[index], ips[i]) + } + } + } + for index, ips := range nodeIPv6Map { + for i := range ips { + if nodePortIPNet.Contains(ips[i]) { + nodePortIPv6Map[index] = append(nodePortIPv6Map[index], ips[i]) + } + } + } + } + + return nodePortIPv4Map, nodePortIPv6Map, nil +} diff --git a/hack/generate-manifest.sh b/hack/generate-manifest.sh index 1ecdf687406..516c96f470d 100755 --- a/hack/generate-manifest.sh +++ b/hack/generate-manifest.sh @@ -29,6 +29,7 @@ Generate a YAML manifest for Antrea using Kustomize and print it to stdout. --ipsec Generate a manifest with IPSec encryption of tunnel traffic enabled --all-features Generate a manifest with all alpha features enabled --no-proxy Generate a manifest with Antrea proxy disabled + --proxy-full Generate a manifest with Antrea proxy full support enabled --no-legacy-crd Generate a manifest without legacy CRD mirroring support enabled --endpointslice Generate a manifest with EndpointSlice support enabled --no-np Generate a manifest with Antrea-native policies disabled @@ -69,6 +70,7 @@ KIND=false IPSEC=false ALLFEATURES=false PROXY=true +PROXY_FULL=false LEGACY_CRD=true ENDPOINTSLICE=false NP=true @@ -118,6 +120,11 @@ case $key in PROXY=false shift ;; + --proxy-full) + PROXY=true + PROXY_FULL=true + shift + ;; --no-legacy-crd) LEGACY_CRD=false shift @@ -183,7 +190,13 @@ esac done if [ "$PROXY" == false ] && [ "$ENDPOINTSLICE" == true ]; then - echoerr "--endpointslice requires AntreaProxy and therefore cannot be used with --no-proxy" + echoerr "--endpointslice requires AntreaProxy, so it cannot be used with --no-proxy" + print_help + exit 1 +fi + +if [ "$PROXY" == false ] && [ "$PROXY_FULL" == true ]; then + echoerr "--proxy-full requires AntreaProxy, so it cannot be used with --no-proxy" print_help exit 1 fi @@ -267,6 +280,7 @@ fi if $ALLFEATURES; then sed -i.bak -E "s/^[[:space:]]*#[[:space:]]*AntreaPolicy[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/ AntreaPolicy: true/" antrea-agent.conf + sed -i.bak -E "s/^#antreaProxyFull[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/antreaProxyFull: true/" antrea-controller.conf sed -i.bak -E "s/^[[:space:]]*#[[:space:]]*FlowExporter[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/ FlowExporter: true/" antrea-agent.conf sed -i.bak -E "s/^[[:space:]]*#[[:space:]]*NetworkPolicyStats[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/ NetworkPolicyStats: true/" antrea-agent.conf sed -i.bak -E "s/^[[:space:]]*#[[:space:]]*EndpointSlice[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/ EndpointSlice: true/" antrea-agent.conf @@ -276,6 +290,10 @@ if ! $PROXY; then sed -i.bak -E "s/^[[:space:]]*#[[:space:]]*AntreaProxy[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/ AntreaProxy: false/" antrea-agent.conf fi +if $PROXY_FULL; then + sed -i.bak -E "s/^#antreaProxyFull[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/antreaProxyFull: true/" antrea-agent.conf +fi + if ! $LEGACY_CRD; then sed -i.bak -E "s/^#legacyCRDMirroring[[:space:]]*:[[:space:]]*[a-z]+[[:space:]]*$/legacyCRDMirroring: false/" antrea-controller.conf fi diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index fe5733d6565..fe089dcb627 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -349,7 +349,7 @@ func (i *Initializer) initOpenFlowPipeline() error { // Set up flow entries to enable Service connectivity. The agent proxy handles // ClusterIP Services while the upstream kube-proxy is leveraged to handle // any other kinds of Services. - if err := i.ofClient.InstallClusterServiceFlows(); err != nil { + if err := i.ofClient.InstallDefaultServiceFlows(); err != nil { klog.Errorf("Failed to setup default OpenFlow entries for ClusterIP Services: %v", err) return err } diff --git a/pkg/agent/config/node_config.go b/pkg/agent/config/node_config.go index 750b4ba2ee3..e06063725de 100644 --- a/pkg/agent/config/node_config.go +++ b/pkg/agent/config/node_config.go @@ -43,6 +43,16 @@ const ( IPv6ExtraOverhead = 20 ) +var ( + // ServiceGWHairpinIPv4/ServiceGWHairpinIPv6 is used to perform SNAT on Service hairpin packet. The hairpin packet comes + // from Antrea gateway and will be output through Antrea gateway. They are also used as the gateway IP address of + // host Service routing entry. + ServiceGWHairpinIPv4 = net.ParseIP("169.254.169.253") + ServiceGWHairpinIPv6 = net.ParseIP("fc01::aabb:ccdd:eeff") + + DummyNodePortSvcIP = net.ParseIP("0.0.0.0") +) + type GatewayConfig struct { // Name is the name of host gateway, e.g. antrea-gw0. Name string diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index c00bec3be23..8bb309df06c 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -20,6 +20,7 @@ import ( "net" "antrea.io/libOpenflow/protocol" + v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" "antrea.io/antrea/pkg/agent/config" @@ -49,9 +50,9 @@ type Client interface { // the Cluster Service CIDR as a parameter. InstallClusterServiceCIDRFlows(serviceNets []*net.IPNet) error - // InstallClusterServiceFlows sets up the appropriate flows so that traffic can reach + // InstallDefaultServiceFlows sets up the appropriate flows so that traffic can reach // the different Services running in the Cluster. This method needs to be invoked once. - InstallClusterServiceFlows() error + InstallDefaultServiceFlows() error // InstallDefaultTunnelFlows sets up the classification flow for the default (flow based) tunnel. InstallDefaultTunnelFlows() error @@ -119,6 +120,19 @@ type Client interface { // UninstallLoadBalancerServiceFromOutsideFlows removes flows installed by InstallLoadBalancerServiceFromOutsideFlows. UninstallLoadBalancerServiceFromOutsideFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error + // InstallInitNodePortClassifierFlows installs the first clause flow of conjunction which is used to classify the first packet of + // Service NodePort, with every NodePort IP address as destination IP address. + InstallInitNodePortClassifierFlows(nodePortIPMap map[int][]net.IP, isIPv6 bool) error + + // InstallServiceClassifierFlow installs flows to classify the first packet of Service. For NodePort/LoadBalancer + // whose externalTrafficPolicy is Cluster, or NodePort/LoadBalancer whose externalTrafficPolicy is Local and client + // is from localhost, the flow will set a register to indicate that the packet requires SNAT. The flow will also + // generate a learned flow to rewrite the destination MAC of response packet whose request packet is from remote + // client. + InstallServiceClassifierFlow(svcType v1.ServiceType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, nodeLocalExternal bool) error + // UninstallServiceClassifierFlow removes flows installed by InstallServiceClassifierFlow. + UninstallServiceClassifierFlow(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error + // GetFlowTableStatus should return an array of flow table status, all existing flow tables should be included in the list. GetFlowTableStatus() []binding.TableStatus @@ -523,6 +537,10 @@ func generateServicePortFlowCacheKey(svcIP net.IP, svcPort uint16, protocol bind return fmt.Sprintf("S%s%s%x", svcIP, protocol, svcPort) } +func generateServiceClassifierFlowCacheKey(svcIP net.IP, svcPort uint16, protocol binding.Protocol) string { + return fmt.Sprintf("S%s%s%x/C", svcIP, protocol, svcPort) +} + func (c *client) InstallEndpointFlows(protocol binding.Protocol, endpoints []proxy.Endpoint) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() @@ -536,8 +554,18 @@ func (c *client) InstallEndpointFlows(protocol binding.Protocol, endpoints []pro portVal := portToUint16(endpointPort) cacheKey := generateEndpointFlowCacheKey(endpoint.IP(), endpointPort, protocol) flows = append(flows, c.endpointDNATFlow(endpointIP, portVal, protocol)) + if endpoint.GetIsLocal() { - flows = append(flows, c.hairpinSNATFlow(endpointIP)) + // If Endpoint network is host network, don't add flow to hairpinSNATFlow table. + var hostNetwork bool + ipProtocol := getIPProtocol(endpointIP) + if ipProtocol == binding.ProtocolIP && !c.nodeConfig.PodIPv4CIDR.Contains(endpointIP) || + ipProtocol == binding.ProtocolIPv6 && !c.nodeConfig.PodIPv6CIDR.Contains(endpointIP) { + hostNetwork = true + } + if !hostNetwork { + flows = append(flows, c.hairpinSNATFlow(endpointIP)) + } } keyToFlows[cacheKey] = flows } @@ -557,11 +585,36 @@ func (c *client) UninstallEndpointFlows(protocol binding.Protocol, endpoint prox return c.deleteFlows(c.serviceFlowCache, cacheKey) } +func (c *client) InstallInitNodePortClassifierFlows(nodePortIPMap map[int][]net.IP, isIPv6 bool) error { + flows := c.initServiceClassifierFlows(nodePortIPMap, isIPv6) + if err := c.ofEntryOperations.AddAll(flows); err != nil { + return err + } + c.defaultServiceFlows = append(c.defaultServiceFlows, flows...) + return nil +} + +func (c *client) InstallServiceClassifierFlow(svcType v1.ServiceType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, nodeLocalExternal bool) error { + c.replayMutex.RLock() + defer c.replayMutex.RUnlock() + + flows := c.serviceClassifierFlow(svcType, svcIP, svcPort, protocol, nodeLocalExternal) + cacheKey := generateServiceClassifierFlowCacheKey(svcIP, svcPort, protocol) + return c.addFlows(c.serviceFlowCache, cacheKey, flows) +} + +func (c *client) UninstallServiceClassifierFlow(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { + c.replayMutex.RLock() + defer c.replayMutex.RUnlock() + cacheKey := generateServiceClassifierFlowCacheKey(svcIP, svcPort, protocol) + return c.deleteFlows(c.serviceFlowCache, cacheKey) +} + func (c *client) InstallServiceFlows(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, affinityTimeout uint16) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() var flows []binding.Flow - flows = append(flows, c.serviceLBFlow(groupID, svcIP, svcPort, protocol, affinityTimeout != 0)) + flows = append(flows, c.serviceLBFlows(groupID, svcIP, svcPort, protocol, affinityTimeout != 0)...) if affinityTimeout != 0 { flows = append(flows, c.serviceLearnFlow(groupID, svcIP, svcPort, protocol, affinityTimeout)) } @@ -587,7 +640,7 @@ func (c *client) GetServiceFlowKeys(svcIP net.IP, svcPort uint16, protocol bindi return flowKeys } -func (c *client) InstallClusterServiceFlows() error { +func (c *client) InstallDefaultServiceFlows() error { flows := []binding.Flow{ c.serviceNeedLBFlow(), c.sessionAffinityReselectFlow(), @@ -596,15 +649,24 @@ func (c *client) InstallClusterServiceFlows() error { if c.IsIPv4Enabled() { flows = append(flows, c.serviceHairpinResponseDNATFlow(binding.ProtocolIP)) flows = append(flows, c.serviceLBBypassFlows(binding.ProtocolIP)...) + flows = append(flows, c.l3FwdServiceDefaultFlowsViaGW(binding.ProtocolIP, cookie.Service)...) + if c.enableProxyFull { + flows = append(flows, c.serviceHairpinRegSetFlows(binding.ProtocolIP)) + flows = append(flows, c.arpResponderFlow(config.ServiceGWHairpinIPv4.To4(), cookie.Service)) + } } if c.IsIPv6Enabled() { flows = append(flows, c.serviceHairpinResponseDNATFlow(binding.ProtocolIPv6)) flows = append(flows, c.serviceLBBypassFlows(binding.ProtocolIPv6)...) + flows = append(flows, c.l3FwdServiceDefaultFlowsViaGW(binding.ProtocolIPv6, cookie.Service)...) + if c.enableProxyFull { + flows = append(flows, c.serviceHairpinRegSetFlows(binding.ProtocolIPv6)) + } } if err := c.ofEntryOperations.AddAll(flows); err != nil { return err } - c.defaultServiceFlows = flows + c.defaultServiceFlows = append(c.defaultServiceFlows, flows...) return nil } diff --git a/pkg/agent/openflow/client_test.go b/pkg/agent/openflow/client_test.go index 7f3af062e6a..c05ccafb451 100644 --- a/pkg/agent/openflow/client_test.go +++ b/pkg/agent/openflow/client_test.go @@ -100,7 +100,7 @@ func TestIdempotentFlowInstallation(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() m := oftest.NewMockOFEntryOperations(ctrl) - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false, false) client := ofClient.(*client) client.cookieAllocator = cookie.NewAllocator(0) client.ofEntryOperations = m @@ -128,7 +128,7 @@ func TestIdempotentFlowInstallation(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() m := oftest.NewMockOFEntryOperations(ctrl) - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false, false) client := ofClient.(*client) client.cookieAllocator = cookie.NewAllocator(0) client.ofEntryOperations = m @@ -169,7 +169,7 @@ func TestFlowInstallationFailed(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() m := oftest.NewMockOFEntryOperations(ctrl) - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false, false) client := ofClient.(*client) client.cookieAllocator = cookie.NewAllocator(0) client.ofEntryOperations = m @@ -203,7 +203,7 @@ func TestConcurrentFlowInstallation(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() m := oftest.NewMockOFEntryOperations(ctrl) - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, false, false, false, false) client := ofClient.(*client) client.cookieAllocator = cookie.NewAllocator(0) client.ofEntryOperations = m @@ -393,7 +393,7 @@ func Test_client_SendTraceflowPacket(t *testing.T) { } func prepareTraceflowFlow(ctrl *gomock.Controller) *client { - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, true, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, true, false, false, false) c := ofClient.(*client) c.cookieAllocator = cookie.NewAllocator(0) c.nodeConfig = nodeConfig @@ -411,7 +411,7 @@ func prepareTraceflowFlow(ctrl *gomock.Controller) *client { } func prepareSendTraceflowPacket(ctrl *gomock.Controller, success bool) *client { - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, true, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, true, false, false, false) c := ofClient.(*client) c.nodeConfig = nodeConfig m := ovsoftest.NewMockBridge(ctrl) @@ -499,7 +499,7 @@ func Test_client_setBasePacketOutBuilder(t *testing.T) { } func prepareSetBasePacketOutBuilder(ctrl *gomock.Controller, success bool) *client { - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, true, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, true, true, false, false, false) c := ofClient.(*client) m := ovsoftest.NewMockBridge(ctrl) c.bridge = m diff --git a/pkg/agent/openflow/network_policy_test.go b/pkg/agent/openflow/network_policy_test.go index f67e8be0403..b58cb0f948f 100644 --- a/pkg/agent/openflow/network_policy_test.go +++ b/pkg/agent/openflow/network_policy_test.go @@ -506,7 +506,7 @@ func TestBatchInstallPolicyRuleFlows(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockOperations := oftest.NewMockOFEntryOperations(ctrl) - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, false, true, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, false, true, false, false, false) c = ofClient.(*client) c.cookieAllocator = cookie.NewAllocator(0) c.ofEntryOperations = mockOperations @@ -573,7 +573,7 @@ func BenchmarkBatchInstallPolicyRuleFlows(b *testing.B) { ctrl := gomock.NewController(b) defer ctrl.Finish() mockOperations := oftest.NewMockOFEntryOperations(ctrl) - ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, false, true, false, false) + ofClient := NewClient(bridgeName, bridgeMgmtAddr, ovsconfig.OVSDatapathSystem, false, true, false, false, false) c = ofClient.(*client) c.cookieAllocator = cookie.NewAllocator(0) c.ofEntryOperations = mockOperations diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 9028f263a4f..29a4f2a6ddc 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -26,6 +26,7 @@ import ( "antrea.io/libOpenflow/protocol" "antrea.io/ofnet/ofctrl" + v1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -47,9 +48,11 @@ const ( spoofGuardTable binding.TableIDType = 10 arpResponderTable binding.TableIDType = 20 ipv6Table binding.TableIDType = 21 - serviceHairpinTable binding.TableIDType = 29 + serviceHairpinTable binding.TableIDType = 23 + serviceConntrackTable binding.TableIDType = 24 // serviceConntrackTable use a new ct_zone to transform SNATed connections. conntrackTable binding.TableIDType = 30 conntrackStateTable binding.TableIDType = 31 + serviceClassifierTable binding.TableIDType = 35 sessionAffinityTable binding.TableIDType = 40 dnatTable binding.TableIDType = 40 serviceLBTable binding.TableIDType = 41 @@ -69,7 +72,8 @@ const ( IngressDefaultTable binding.TableIDType = 100 IngressMetricTable binding.TableIDType = 101 conntrackCommitTable binding.TableIDType = 105 - hairpinSNATTable binding.TableIDType = 106 + serviceConntrackCommitTable binding.TableIDType = 106 + hairpinSNATTable binding.TableIDType = 108 L2ForwardingOutTable binding.TableIDType = 110 // Flow priority level @@ -92,6 +96,14 @@ const ( ipv6MulticastAddr = "FF00::/8" // IPv6 link-local prefix ipv6LinkLocalAddr = "FE80::/10" + + // The conjunction IDs which are used to classify the first packet of NodePort/LoadBalance at table serviceClassifierTable. + // - For NodePort/LoadBalancer whose externalTrafficPolicy is Cluster, SNAT is required. + // - For NodePort/LoadBalancer whose externalTrafficPolicy is Local, SNAT is not required. + clusterConjIDIPv4 = uint32(41) + localConjIDIPv4 = uint32(42) + clusterConjDIPv6 = uint32(61) + localConjIDIPv6 = uint32(62) ) type ofAction int32 @@ -134,8 +146,10 @@ var ( {arpResponderTable, "ARPResponder"}, {ipv6Table, "IPv6"}, {serviceHairpinTable, "ServiceHairpin"}, + {serviceConntrackTable, "serviceConntrack"}, {conntrackTable, "ConntrackZone"}, {conntrackStateTable, "ConntrackState"}, + {serviceClassifierTable, "serviceClassifier"}, {dnatTable, "DNAT(SessionAffinity)"}, {sessionAffinityTable, "SessionAffinity"}, {serviceLBTable, "ServiceLB"}, @@ -153,6 +167,7 @@ var ( {IngressDefaultTable, "IngressDefaultRule"}, {IngressMetricTable, "IngressMetric"}, {conntrackCommitTable, "ConntrackCommit"}, + {serviceConntrackCommitTable, "serviceConntrackCommit"}, {hairpinSNATTable, "HairpinSNATTable"}, {L2ForwardingOutTable, "Output"}, } @@ -232,9 +247,12 @@ const ( endpointIPReg regType = 3 // Use reg3 to store endpoint IP endpointPortReg regType = 4 // Use reg4[0..15] to store endpoint port serviceLearnReg = endpointPortReg // Use reg4[16..18] to store endpoint selection states. + isNodePortReg = endpointPortReg // Use reg4[19] to store the status of whether Service is NodePort. + serviceSNATReg = endpointPortReg // Use reg4[20] to store the status of whether Service traffic from gateway requires SNAT. EgressReg regType = 5 IngressReg regType = 6 - TraceflowReg regType = 9 // Use reg9[28..31] to store traceflow dataplaneTag. + + TraceflowReg regType = 9 // Use reg9[28..31] to store traceflow dataplaneTag. // CNPDenyConjIDReg reuses reg3 which will also be used for storing endpoint IP to store the rule ID. Since // the service selection will finish when a packet hitting NetworkPolicy related rules, there is no conflict. CNPDenyConjIDReg regType = 3 @@ -246,9 +264,15 @@ const ( // marksRegServiceNeedLearn indicates a packet has done service selection and // the selection result needs to be cached. marksRegServiceNeedLearn uint32 = 0b011 + // marksRegServiceNeedSNAT indicates that the packet requires SNAT. + marksRegServiceNeedSNAT uint32 = 0b1 + // marksServiceIsNodePort indicates that the Service is NodePort. + marksServiceIsNodePort uint32 = 0b1 - CtZone = 0xfff0 - CtZoneV6 = 0xffe6 + CtZone = 0xfff0 + CtZoneV6 = 0xffe6 + ServiceCtZone = 0xfff1 + ServiceCtZoneV6 = 0xffe7 portFoundMark = 0b1 hairpinMark = 0b1 @@ -337,6 +361,20 @@ var ( // Endpoint, still needs to select an Endpoint, or if an Endpoint has already // been selected and the selection decision needs to be learned. serviceLearnRegRange = binding.Range{16, 18} + // serviceSNATMarkRange takes a 1-bit range of register serviceSNATReg to + // mark whether the first packet of Service requires SNAT. + // Below cases need serviceGWHairpinIPv4/serviceGWHairpinIPv6 to perform SNAT, as + // the packet comes from Antrea gateway, and will be output through Antrea gateway. + // 1. ClusterIP client is from host, Endpoint is on host network. + // 2. NodePort/LoadBalancer client is from host, ExternalTrafficPolicy is Cluster/Local, + // Endpoint is on host network. + // 3. NodePort/LoadBalancer client is from remote, ExternalTrafficPolicy is Cluster, + // Endpoint is on host network. + // When the Endpoint is not on host network, Antrea gateway IP is used to perform SNAT. + serviceSNATMarkRange = binding.Range{20, 20} + // isNodePortRegRange takes a 1-bit range of register isNodePortReg to mark whether the + // Service is NodePort. + isNodePortRegRange = binding.Range{19, 19} // metricIngressRuleIDRange takes 0..31 range of ct_label to store the ingress rule ID. metricIngressRuleIDRange = binding.Range{0, 31} // metricEgressRuleIDRange takes 32..63 range of ct_label to store the egress rule ID. @@ -386,6 +424,7 @@ type client struct { enableAntreaPolicy bool enableDenyTracking bool enableEgress bool + enableProxyFull bool roundInfo types.RoundInfo cookieAllocator cookie.Allocator bridge binding.Bridge @@ -394,7 +433,7 @@ type client struct { pipeline map[binding.TableIDType]binding.Table // Flow caches for corresponding deletions. nodeFlowCache, podFlowCache, serviceFlowCache, snatFlowCache, tfFlowCache *flowCategoryCache - // "fixed" flows installed by the agent after initialization and which do not change during + // "fixed" flows installInstallNodePortIPFlowsed by the agent after initialization and which do not change during // the lifetime of the client. gatewayFlows, defaultServiceFlows, defaultTunnelFlows, hostNetworkingFlows []binding.Flow // ofEntryOperations is a wrapper interface for OpenFlow entry Add / Modify / Delete operations. It @@ -551,11 +590,15 @@ func (c *client) defaultFlows() (flows []binding.Flow) { // tunnelClassifierFlow generates the flow to mark traffic comes from the tunnelOFPort. func (c *client) tunnelClassifierFlow(tunnelOFPort uint32, category cookie.Category) binding.Flow { + nextTable := conntrackTable + if c.enableProxyFull { + nextTable = serviceConntrackTable + } return c.pipeline[ClassifierTable].BuildFlow(priorityNormal). MatchInPort(tunnelOFPort). Action().LoadRegRange(int(marksReg), markTrafficFromTunnel, binding.Range{0, 15}). Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). - Action().GotoTable(conntrackTable). + Action().GotoTable(nextTable). Cookie(c.cookieAllocator.Request(category).Raw()). Done() } @@ -598,37 +641,143 @@ func (c *client) connectionTrackFlows(category cookie.Category) []binding.Flow { connectionTrackCommitTable := c.pipeline[conntrackCommitTable] flows := c.conntrackBasicFlows(category) if c.enableProxy { - flows = append(flows, - // Replace the default flow with multiple resubmits actions. - connectionTrackStateTable.BuildFlow(priorityMiss). + // Replace the default flow with multiple resubmits actions. + if c.enableProxyFull { + flows = append(flows, connectionTrackStateTable.BuildFlow(priorityMiss). Cookie(c.cookieAllocator.Request(category).Raw()). + Action().ResubmitToTable(serviceClassifierTable). Action().ResubmitToTable(sessionAffinityTable). Action().ResubmitToTable(serviceLBTable). - Done(), - // Enable NAT. - connectionTrackTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). - Action().CT(false, connectionTrackTable.GetNext(), CtZone).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - connectionTrackTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIPv6). - Action().CT(false, connectionTrackTable.GetNext(), CtZoneV6).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - connectionTrackCommitTable.BuildFlow(priorityLow).MatchProtocol(binding.ProtocolIP). - MatchCTStateTrk(true). - MatchCTMark(ServiceCTMark, nil). - MatchRegRange(int(serviceLearnReg), marksRegServiceSelected, serviceLearnRegRange). - Cookie(c.cookieAllocator.Request(category).Raw()). - Action().GotoTable(connectionTrackCommitTable.GetNext()). - Done(), - connectionTrackCommitTable.BuildFlow(priorityLow).MatchProtocol(binding.ProtocolIPv6). - MatchCTStateTrk(true). - MatchCTMark(ServiceCTMark, nil). - MatchRegRange(int(serviceLearnReg), marksRegServiceSelected, serviceLearnRegRange). + Done()) + } else { + flows = append(flows, connectionTrackStateTable.BuildFlow(priorityMiss). Cookie(c.cookieAllocator.Request(category).Raw()). - Action().GotoTable(connectionTrackCommitTable.GetNext()). - Done(), - ) + Action().ResubmitToTable(sessionAffinityTable). + Action().ResubmitToTable(serviceLBTable). + Done()) + } + + for _, proto := range c.ipProtocols { + gatewayIP := c.nodeConfig.GatewayConfig.IPv4 + serviceGWHairpinIP := config.ServiceGWHairpinIPv4.To4() + serviceCtZone := ServiceCtZone + ctZone := CtZone + if proto == binding.ProtocolIPv6 { + gatewayIP = c.nodeConfig.GatewayConfig.IPv6 + serviceGWHairpinIP = config.ServiceGWHairpinIPv6.To16() + serviceCtZone = ServiceCtZoneV6 + ctZone = CtZoneV6 + } + flows = append(flows, + // This flow is used to maintain DNAT conntrack for Service traffic. + connectionTrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). + Action().CT(false, connectionTrackTable.GetNext(), ctZone).NAT().CTDone(). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + connectionTrackCommitTable.BuildFlow(priorityLow).MatchProtocol(proto). + MatchCTStateTrk(true). + MatchCTMark(ServiceCTMark, nil). + MatchRegRange(int(serviceLearnReg), marksRegServiceSelected, serviceLearnRegRange). + Cookie(c.cookieAllocator.Request(category).Raw()). + Action().GotoTable(connectionTrackCommitTable.GetNext()). + Done(), + ) + + if c.enableProxyFull { + serviceConnectionTrackTable := c.pipeline[serviceConntrackTable] + serviceConnectionTrackCommitTable := c.pipeline[serviceConntrackCommitTable] + flows = append(flows, + // This flow is used to match the Service traffic from Antrea gateway. The Service traffic from gateway + // should enter table serviceConntrackCommitTable, otherwise it will be matched by other flows in + // table connectionTrackCommit. + connectionTrackCommitTable.BuildFlow(priorityHigh).MatchProtocol(proto). + MatchCTMark(ServiceCTMark, nil). + MatchRegRange(int(marksReg), markTrafficFromGateway, binding.Range{0, 15}). + Action().GotoTable(serviceConntrackCommitTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // This flow is used to maintain SNAT conntrack for Service traffic. + serviceConnectionTrackTable.BuildFlow(priorityNormal).MatchProtocol(proto). + Action().CT(false, serviceConnectionTrackTable.GetNext(), serviceCtZone).NAT().CTDone(). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // This flow is used to match the first packet of NodePort/LoadBalancer whose Endpoint is on + // host network. As the packet is from Antrea gateway, and it will pass through Antrea gateway, + // a virtual hairpin IP is used to perform SNAT for the packet, rather than Antrea gateway's IP. + // Note that, this flow will change the behavior of the packet that NodePort/LoadBalancer whose + // externalTrafficPolicy is Local and the Endpoint is on host network. According to the definition + // of externalTrafficPolicy Local, the source IP should be retained. If a pod is on host network, + // a Node cannot hold more than one pod like the pod. There is no point to expose the pod as NodePort, + // as it makes no difference to access it directly. When externalTrafficPolicy is Local, there is + // just only one Endpoint, and it's not necessary to expose the pod with NodePort. + serviceConnectionTrackCommitTable.BuildFlow(priorityHigh).MatchProtocol(proto). + MatchRegRange(int(PortCacheReg), config.HostGatewayOFPort, ofPortRegRange). + Cookie(c.cookieAllocator.Request(category).Raw()). + MatchCTStateNew(true). + MatchCTStateTrk(true). + MatchCTStateDNAT(true). + Action().CT(true, serviceConnectionTrackCommitTable.GetNext(), serviceCtZone). + SNAT(&binding.IPRange{StartIP: serviceGWHairpinIP, EndIP: serviceGWHairpinIP}, nil). + CTDone(). + Done(), + // This flow is used to match the first packet of NodePort/LoadBalancer whose output port is not + // Antrea gateway, and externalTrafficPolicy is Cluster. This packet requires SNAT. + // Antrea gateway IP is used to perform SNAT for the packet. + serviceConnectionTrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). + MatchRegRange(int(serviceSNATReg), marksRegServiceNeedSNAT, serviceSNATMarkRange). + Cookie(c.cookieAllocator.Request(category).Raw()). + MatchCTStateNew(true). + MatchCTStateTrk(true). + MatchCTStateDNAT(true). + Action().CT(true, serviceConnectionTrackCommitTable.GetNext(), serviceCtZone). + SNAT(&binding.IPRange{StartIP: gatewayIP, EndIP: gatewayIP}, nil). + CTDone(). + Done(), + // This flow is used to match the later request packets of Service traffic whose first request packet has been committed + // and performed SNAT. For example: + /* + * 192.168.77.1 is the IP address of client. + * 192.168.77.100 is the IP address of k8s node. + * 30001 is a NodePort port. + * 10.10.0.1 is the IP address of Antrea gateway. + * 10.10.0.3 is the Endpoint of NodePort Service. + + * pkt 1 (request) + * client 192.168.77.1:12345->192.168.77.100:30001 + * ct zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 + * ct zone DNAT 65520 192.168.77.1:12345->192.168.77.100:30001 + * ct commit DNAT zone 65520 192.168.77.1:12345->192.168.77.100:30001 => 192.168.77.1:12345->10.10.0.3:80 + * ct commit SNAT zone 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 + * output + * pkt 2 (response) + * pod 10.10.0.3:80->10.10.0.1:12345 + * ct zone SNAT 65521 10.10.0.3:80->10.10.0.1:12345 => 10.10.0.3:80->192.168.77.1:12345 + * ct zone DNAT 65520 10.10.0.3:80->192.168.77.1:12345 => 192.168.77.1:30001->192.168.77.1:12345 + * output + * pkt 3 (request) + * client 192.168.77.1:12345->192.168.77.100:30001 + * ct zone SNAT 65521 192.168.77.1:12345->192.168.77.100:30001 + * ct zone DNAT 65520 192.168.77.1:12345->10.10.0.3:80 + * ct zone SNAT 65521 192.168.77.1:12345->10.10.0.3:80 => 10.10.0.1:12345->10.10.0.3:80 + * output + * pkt ... + + The source IP address of pkt 3 cannot be transformed through zone 65521 as there is no connection track about + 192.168.77.1:12345<->192.168.77.100:30001, and the source IP is still 192.168.77.100. + Before output, pkt 3 needs SNAT, but the connection has been committed. The flow is for pkt 3 to perform SNAT. + */ + serviceConnectionTrackCommitTable.BuildFlow(priorityNormal).MatchProtocol(proto). + Cookie(c.cookieAllocator.Request(category).Raw()). + MatchCTStateNew(false). + MatchCTStateTrk(true). + MatchCTStateDNAT(true). + Action().CT(false, serviceConnectionTrackCommitTable.GetNext(), serviceCtZone). + NAT(). + CTDone(). + Done(), + ) + } + } } else { flows = append(flows, c.kubeProxyFlows(category)...) } @@ -1228,6 +1377,40 @@ func (c *client) l3FwdFlowToRemoteViaGW( Done() } +// l3FwdServiceDefaultFlowsViaGW generates the default L3 forward flow to support Service traffic to pass through Antrea gateway. +func (c *client) l3FwdServiceDefaultFlowsViaGW(ipProto binding.Protocol, category cookie.Category) []binding.Flow { + gatewayMAC := c.nodeConfig.GatewayConfig.MAC + + flows := []binding.Flow{ + /* This flow is used to match the packets of Service traffic: + - NodePort/LoadBalancer request packets which pass through Antrea gateway and the Service Endpoint is on host network. + - ClusterIP request packets which are from Antrea gateway and the Service Endpoint is on host network. + The matched packets should leave through Antrea gateway, however, they also enter through Antrea gateway. This + is hairpin traffic. + */ + c.pipeline[l3ForwardingTable].BuildFlow(priorityLow).MatchProtocol(ipProto). + MatchCTMark(ServiceCTMark, nil). + MatchCTStateRpl(false). + MatchCTStateTrk(true). + MatchRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().SetDstMAC(gatewayMAC). + Action().ResubmitToTable(l3DecTTLTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // This flow is used to match the response packets of NodePort/LoadBalancer traffic. The destination MAC address + // and output port will be set on serviceResponseProcessTable. + c.pipeline[l3ForwardingTable].BuildFlow(priorityLow).MatchProtocol(ipProto). + MatchCTMark(ServiceCTMark, nil). + MatchCTStateRpl(true). + MatchCTStateTrk(true). + MatchRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().ResubmitToTable(l3DecTTLTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + } + return flows +} + // arpResponderFlow generates the ARP responder flow entry that replies request comes from local gateway for peer // gateway MAC. func (c *client) arpResponderFlow(peerGatewayIP net.IP, category cookie.Category) binding.Flow { @@ -1308,18 +1491,31 @@ func getIPProtocol(ip net.IP) binding.Protocol { // IP of the hairpin packet to the source IP. func (c *client) serviceHairpinResponseDNATFlow(ipProtocol binding.Protocol) binding.Flow { hpIP := hairpinIP - from := "NXM_OF_IP_SRC" - to := "NXM_OF_IP_DST" + from := binding.NxmFieldSrcIPv4 + to := binding.NxmFieldDstIPv4 if ipProtocol == binding.ProtocolIPv6 { hpIP = hairpinIPv6 - from = "NXM_NX_IPV6_SRC" - to = "NXM_NX_IPV6_DST" + from = binding.NxmFieldSrcIPv6 + to = binding.NxmFieldDstIPv6 } - return c.pipeline[serviceHairpinTable].BuildFlow(priorityNormal).MatchProtocol(ipProtocol). + hairpinTable := c.pipeline[serviceHairpinTable] + return hairpinTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). MatchDstIP(hpIP). Action().Move(from, to). Action().LoadRegRange(int(marksReg), hairpinMark, hairpinMarkRange). - Action().GotoTable(conntrackTable). + Action().GotoTable(hairpinTable.GetNext()). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Done() +} + +func (c *client) serviceHairpinRegSetFlows(ipProtocol binding.Protocol) binding.Flow { + // If the packet is from Antrea gateway, and its output is also Antrea gateway, set hairpin register, otherwise + // the packet will be dropped. + return c.pipeline[hairpinSNATTable].BuildFlow(priorityNormal).MatchProtocol(ipProtocol). + MatchRegRange(int(marksReg), markTrafficFromGateway, binding.Range{0, 15}). + MatchRegRange(int(PortCacheReg), config.HostGatewayOFPort, ofPortRegRange). + Action().LoadRegRange(int(marksReg), hairpinMark, hairpinMarkRange). + Action().GotoTable(L2ForwardingOutTable). Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). Done() } @@ -1890,6 +2086,114 @@ func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, loc } } +func (c *client) initServiceClassifierFlows(nodePortIPMap map[int][]net.IP, isIPv6 bool) []binding.Flow { + clusterConjID := clusterConjIDIPv4 + localConjID := localConjIDIPv4 + ipProtocol := binding.ProtocolIP + serviceGWHairpinIP := config.ServiceGWHairpinIPv4 + if isIPv6 { + clusterConjID = clusterConjDIPv6 + localConjID = localConjIDIPv6 + ipProtocol = binding.ProtocolIPv6 + serviceGWHairpinIP = config.ServiceGWHairpinIPv6 + } + + var flows []binding.Flow + for _, ips := range nodePortIPMap { + for _, ip := range ips { + flows = append(flows, + // This flow is used to match the first packet's destination IP address: + // 1. NodePort Service whose externalTrafficPolicy is Cluster, and client is from remote/localhost. + // 2. NodePort Service whose externalTrafficPolicy is Local, and client is from remote. + c.pipeline[serviceClassifierTable].BuildFlow(priorityNormal). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + MatchProtocol(ipProtocol). + MatchDstIP(ip). + Action().Conjunction(clusterConjID, 1, 2). + Action().Conjunction(localConjID, 1, 2). + Done(), + ) + } + } + flows = append(flows, + c.pipeline[serviceClassifierTable].BuildFlow(priorityNormal). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + MatchProtocol(ipProtocol). + MatchDstIP(serviceGWHairpinIP). + Action().Conjunction(clusterConjID, 1, 2). + Action().Conjunction(localConjID, 1, 2). + Done(), + ) + + flows = append(flows, + // This flow is used to perform actions for the first packet of NodePort Service whose externalTrafficPolicy is + // Cluster, and client is from remote/localhost. + c.pipeline[serviceClassifierTable].BuildFlow(priorityNormal). + MatchProtocol(ipProtocol). + MatchConjID(clusterConjID). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Action().LoadRegRange(int(serviceSNATReg), marksRegServiceNeedSNAT, serviceSNATMarkRange). + Action().LoadRegRange(int(isNodePortReg), marksServiceIsNodePort, isNodePortRegRange). + Done(), + // This flow is used to perform actions for the first packet of NodePort Service whose externalTrafficPolicy is + // Local, and client is from remote. + c.pipeline[serviceClassifierTable].BuildFlow(priorityNormal). + MatchProtocol(ipProtocol). + MatchConjID(localConjID). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Action().LoadRegRange(int(isNodePortReg), marksServiceIsNodePort, isNodePortRegRange). + Done(), + ) + return flows +} + +func (c *client) serviceClassifierFlow(svcType v1.ServiceType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, nodeLocalExternal bool) []binding.Flow { + var flows []binding.Flow + isIPv6 := false + if protocol == binding.ProtocolTCPv6 || protocol == binding.ProtocolUDPv6 || protocol == binding.ProtocolSCTPv6 { + isIPv6 = true + } + + if svcType == v1.ServiceTypeNodePort { + clusterConjID := clusterConjIDIPv4 + localConjID := localConjIDIPv4 + if isIPv6 { + clusterConjID = clusterConjDIPv6 + localConjID = localConjIDIPv6 + } + + if nodeLocalExternal { + flows = append(flows, + c.pipeline[serviceClassifierTable].BuildFlow(priorityHigh). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + MatchProtocol(protocol). + MatchDstPort(svcPort, nil). + Action().Conjunction(localConjID, 2, 2). + Done(), + ) + } else { + flows = append(flows, + c.pipeline[serviceClassifierTable].BuildFlow(priorityNormal). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + MatchProtocol(protocol). + MatchDstPort(svcPort, nil). + Action().Conjunction(clusterConjID, 2, 2). + Done()) + } + } else { + if !nodeLocalExternal { + flows = append(flows, c.pipeline[serviceClassifierTable].BuildFlow(priorityNormal). + MatchProtocol(protocol). + MatchDstIP(svcIP). + MatchDstPort(svcPort, nil). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Action().LoadRegRange(int(serviceSNATReg), marksRegServiceNeedSNAT, serviceSNATMarkRange). + Done()) + } + } + return flows +} + // loadBalancerServiceFromOutsideFlow generates the flow to forward LoadBalancer service traffic from outside node // to gateway. kube-proxy will then handle the traffic. // This flow is for Windows Node only. @@ -1909,12 +2213,22 @@ func (c *client) loadBalancerServiceFromOutsideFlow(svcIP net.IP, svcPort uint16 func (c *client) serviceLearnFlow(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, affinityTimeout uint16) binding.Flow { // Using unique cookie ID here to avoid learned flow cascade deletion. cookieID := c.cookieAllocator.RequestWithObjectID(cookie.Service, uint32(groupID)).Raw() - learnFlowBuilder := c.pipeline[serviceLBTable].BuildFlow(priorityLow). - MatchRegRange(int(serviceLearnReg), marksRegServiceNeedLearn, serviceLearnRegRange). - MatchDstIP(svcIP). - MatchProtocol(protocol). - MatchDstPort(svcPort, nil). - Cookie(cookieID) + var learnFlowBuilder binding.FlowBuilder + if svcIP.Equal(config.DummyNodePortSvcIP) { + learnFlowBuilder = c.pipeline[serviceLBTable].BuildFlow(priorityLow). + MatchRegRange(int(serviceLearnReg), marksRegServiceNeedLearn, serviceLearnRegRange). + MatchDstIP(svcIP). + MatchProtocol(protocol). + MatchDstPort(svcPort, nil). + Cookie(cookieID) + } else { + learnFlowBuilder = c.pipeline[serviceLBTable].BuildFlow(priorityLow). + MatchRegRange(int(isNodePortReg), marksServiceIsNodePort, isNodePortRegRange). + MatchRegRange(int(serviceLearnReg), marksRegServiceNeedLearn, serviceLearnRegRange). + MatchProtocol(protocol). + Cookie(cookieID) + } + // affinityTimeout is used as the OpenFlow "hard timeout": learned flow will be removed from // OVS after that time regarding of whether traffic is still hitting the flow. This is the // desired behavior based on the K8s spec. Note that existing connections will keep going to @@ -1968,9 +2282,9 @@ func (c *client) serviceLearnFlow(groupID binding.GroupIDType, svcIP net.IP, svc return nil } -// serviceLBFlow generates the flow which uses the specific group to do Endpoint +// serviceLBFlows generates the flow which uses the specific group to do Endpoint // selection. -func (c *client) serviceLBFlow(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, withSessionAffinity bool) binding.Flow { +func (c *client) serviceLBFlows(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, withSessionAffinity bool) []binding.Flow { var lbResultMark uint32 if withSessionAffinity { lbResultMark = marksRegServiceNeedLearn @@ -1978,16 +2292,35 @@ func (c *client) serviceLBFlow(groupID binding.GroupIDType, svcIP net.IP, svcPor lbResultMark = marksRegServiceSelected } - return c.pipeline[serviceLBTable].BuildFlow(priorityNormal). - MatchProtocol(protocol). - MatchDstPort(svcPort, nil). - MatchDstIP(svcIP). - MatchRegRange(int(serviceLearnReg), marksRegServiceNeedLB, serviceLearnRegRange). - Action().LoadRegRange(int(serviceLearnReg), lbResultMark, serviceLearnRegRange). - Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). - Action().Group(groupID). - Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). - Done() + var flows []binding.Flow + // This flow is used to match the first packet of non-NodePort. + if !svcIP.Equal(config.DummyNodePortSvcIP) { + flows = append(flows, c.pipeline[serviceLBTable].BuildFlow(priorityNormal). + MatchProtocol(protocol). + MatchDstPort(svcPort, nil). + MatchDstIP(svcIP). + MatchRegRange(int(serviceLearnReg), marksRegServiceNeedLB, serviceLearnRegRange). + Action().LoadRegRange(int(serviceLearnReg), lbResultMark, serviceLearnRegRange). + Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().Group(groupID). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Done(), + ) + } else { + unionVal := (marksServiceIsNodePort << serviceLearnRegRange.Length()) + marksRegServiceNeedLB + flows = append(flows, + c.pipeline[serviceLBTable].BuildFlow(priorityNormal). + MatchProtocol(protocol). + MatchDstPort(svcPort, nil). + MatchRegRange(int(serviceLearnReg), unionVal, binding.Range{16, 19}). + Action().LoadRegRange(int(serviceLearnReg), lbResultMark, serviceLearnRegRange). + Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().Group(groupID). + Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). + Done(), + ) + } + return flows } // endpointDNATFlow generates the flow which transforms the Service Cluster IP @@ -2162,7 +2495,14 @@ func (c *client) generatePipeline() { if c.enableProxy { c.pipeline[spoofGuardTable] = bridge.CreateTable(spoofGuardTable, serviceHairpinTable, binding.TableMissActionDrop) c.pipeline[ipv6Table] = bridge.CreateTable(ipv6Table, serviceHairpinTable, binding.TableMissActionNext) - c.pipeline[serviceHairpinTable] = bridge.CreateTable(serviceHairpinTable, conntrackTable, binding.TableMissActionNext) + if c.enableProxyFull { + c.pipeline[serviceHairpinTable] = bridge.CreateTable(serviceHairpinTable, serviceConntrackTable, binding.TableMissActionNext) + c.pipeline[serviceConntrackTable] = bridge.CreateTable(serviceConntrackTable, conntrackTable, binding.TableMissActionNext) + c.pipeline[serviceClassifierTable] = bridge.CreateTable(serviceClassifierTable, binding.LastTableID, binding.TableMissActionNone) + c.pipeline[serviceConntrackCommitTable] = bridge.CreateTable(serviceConntrackCommitTable, hairpinSNATTable, binding.TableMissActionNext) + } else { + c.pipeline[serviceHairpinTable] = bridge.CreateTable(serviceHairpinTable, conntrackTable, binding.TableMissActionNext) + } c.pipeline[conntrackStateTable] = bridge.CreateTable(conntrackStateTable, endpointDNATTable, binding.TableMissActionNext) c.pipeline[sessionAffinityTable] = bridge.CreateTable(sessionAffinityTable, binding.LastTableID, binding.TableMissActionNone) c.pipeline[serviceLBTable] = bridge.CreateTable(serviceLBTable, endpointDNATTable, binding.TableMissActionNext) @@ -2190,7 +2530,14 @@ func (c *client) generatePipeline() { } // NewClient is the constructor of the Client interface. -func NewClient(bridgeName, mgmtAddr string, ovsDatapathType ovsconfig.OVSDatapathType, enableProxy, enableAntreaPolicy, enableEgress bool, enableDenyTracking bool) Client { +func NewClient(bridgeName string, + mgmtAddr string, + ovsDatapathType ovsconfig.OVSDatapathType, + enableProxy bool, + enableAntreaPolicy bool, + enableEgress bool, + enableDenyTracking bool, + enableProxyFull bool) Client { bridge := binding.NewOFBridge(bridgeName, mgmtAddr) policyCache := cache.NewIndexer( policyConjKeyFunc, @@ -2202,6 +2549,7 @@ func NewClient(bridgeName, mgmtAddr string, ovsDatapathType ovsconfig.OVSDatapat enableAntreaPolicy: enableAntreaPolicy, enableDenyTracking: enableDenyTracking, enableEgress: enableEgress, + enableProxyFull: enableProxyFull, nodeFlowCache: newFlowCategoryCache(), podFlowCache: newFlowCategoryCache(), serviceFlowCache: newFlowCategoryCache(), diff --git a/pkg/agent/openflow/testing/mock_openflow.go b/pkg/agent/openflow/testing/mock_openflow.go index 917fca88551..f554015919a 100644 --- a/pkg/agent/openflow/testing/mock_openflow.go +++ b/pkg/agent/openflow/testing/mock_openflow.go @@ -25,6 +25,7 @@ import ( openflow "antrea.io/antrea/pkg/ovs/openflow" proxy "antrea.io/antrea/third_party/proxy" gomock "github.com/golang/mock/gomock" + v1 "k8s.io/api/core/v1" net "net" reflect "reflect" ) @@ -264,18 +265,18 @@ func (mr *MockClientMockRecorder) InstallClusterServiceCIDRFlows(arg0 interface{ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallClusterServiceCIDRFlows", reflect.TypeOf((*MockClient)(nil).InstallClusterServiceCIDRFlows), arg0) } -// InstallClusterServiceFlows mocks base method -func (m *MockClient) InstallClusterServiceFlows() error { +// InstallDefaultServiceFlows mocks base method +func (m *MockClient) InstallDefaultServiceFlows() error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "InstallClusterServiceFlows") + ret := m.ctrl.Call(m, "InstallDefaultServiceFlows") ret0, _ := ret[0].(error) return ret0 } -// InstallClusterServiceFlows indicates an expected call of InstallClusterServiceFlows -func (mr *MockClientMockRecorder) InstallClusterServiceFlows() *gomock.Call { +// InstallDefaultServiceFlows indicates an expected call of InstallDefaultServiceFlows +func (mr *MockClientMockRecorder) InstallDefaultServiceFlows() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallClusterServiceFlows", reflect.TypeOf((*MockClient)(nil).InstallClusterServiceFlows)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallDefaultServiceFlows", reflect.TypeOf((*MockClient)(nil).InstallDefaultServiceFlows)) } // InstallDefaultTunnelFlows mocks base method @@ -334,6 +335,20 @@ func (mr *MockClientMockRecorder) InstallGatewayFlows() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallGatewayFlows", reflect.TypeOf((*MockClient)(nil).InstallGatewayFlows)) } +// InstallInitNodePortClassifierFlows mocks base method +func (m *MockClient) InstallInitNodePortClassifierFlows(arg0 map[int][]net.IP, arg1 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "InstallInitNodePortClassifierFlows", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// InstallInitNodePortClassifierFlows indicates an expected call of InstallInitNodePortClassifierFlows +func (mr *MockClientMockRecorder) InstallInitNodePortClassifierFlows(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallInitNodePortClassifierFlows", reflect.TypeOf((*MockClient)(nil).InstallInitNodePortClassifierFlows), arg0, arg1) +} + // InstallLoadBalancerServiceFromOutsideFlows mocks base method func (m *MockClient) InstallLoadBalancerServiceFromOutsideFlows(arg0 net.IP, arg1 uint16, arg2 openflow.Protocol) error { m.ctrl.T.Helper() @@ -418,6 +433,20 @@ func (mr *MockClientMockRecorder) InstallSNATMarkFlows(arg0, arg1 interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallSNATMarkFlows", reflect.TypeOf((*MockClient)(nil).InstallSNATMarkFlows), arg0, arg1) } +// InstallServiceClassifierFlow mocks base method +func (m *MockClient) InstallServiceClassifierFlow(arg0 v1.ServiceType, arg1 net.IP, arg2 uint16, arg3 openflow.Protocol, arg4 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "InstallServiceClassifierFlow", arg0, arg1, arg2, arg3, arg4) + ret0, _ := ret[0].(error) + return ret0 +} + +// InstallServiceClassifierFlow indicates an expected call of InstallServiceClassifierFlow +func (mr *MockClientMockRecorder) InstallServiceClassifierFlow(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallServiceClassifierFlow", reflect.TypeOf((*MockClient)(nil).InstallServiceClassifierFlow), arg0, arg1, arg2, arg3, arg4) +} + // InstallServiceFlows mocks base method func (m *MockClient) InstallServiceFlows(arg0 openflow.GroupIDType, arg1 net.IP, arg2 uint16, arg3 openflow.Protocol, arg4 uint16) error { m.ctrl.T.Helper() @@ -721,6 +750,20 @@ func (mr *MockClientMockRecorder) UninstallSNATMarkFlows(arg0 interface{}) *gomo return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UninstallSNATMarkFlows", reflect.TypeOf((*MockClient)(nil).UninstallSNATMarkFlows), arg0) } +// UninstallServiceClassifierFlow mocks base method +func (m *MockClient) UninstallServiceClassifierFlow(arg0 net.IP, arg1 uint16, arg2 openflow.Protocol) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UninstallServiceClassifierFlow", arg0, arg1, arg2) + ret0, _ := ret[0].(error) + return ret0 +} + +// UninstallServiceClassifierFlow indicates an expected call of UninstallServiceClassifierFlow +func (mr *MockClientMockRecorder) UninstallServiceClassifierFlow(arg0, arg1, arg2 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UninstallServiceClassifierFlow", reflect.TypeOf((*MockClient)(nil).UninstallServiceClassifierFlow), arg0, arg1, arg2) +} + // UninstallServiceFlows mocks base method func (m *MockClient) UninstallServiceFlows(arg0 net.IP, arg1 uint16, arg2 openflow.Protocol) error { m.ctrl.T.Helper() diff --git a/pkg/agent/proxy/proxier.go b/pkg/agent/proxy/proxier.go index 71fe7c1f306..655fb123574 100644 --- a/pkg/agent/proxy/proxier.go +++ b/pkg/agent/proxy/proxier.go @@ -32,9 +32,11 @@ import ( "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + agentconfig "antrea.io/antrea/pkg/agent/config" "antrea.io/antrea/pkg/agent/openflow" "antrea.io/antrea/pkg/agent/proxy/metrics" "antrea.io/antrea/pkg/agent/proxy/types" + "antrea.io/antrea/pkg/agent/route" "antrea.io/antrea/pkg/features" binding "antrea.io/antrea/pkg/ovs/openflow" k8sproxy "antrea.io/antrea/third_party/proxy" @@ -99,11 +101,15 @@ type proxier struct { // oversizeServiceSet records the Services that have more than 800 Endpoints. oversizeServiceSet sets.String - runner *k8sproxy.BoundedFrequencyRunner - stopChan <-chan struct{} - ofClient openflow.Client - isIPv6 bool - enableEndpointSlice bool + runner *k8sproxy.BoundedFrequencyRunner + stopChan <-chan struct{} + ofClient openflow.Client + routeClient route.Interface + nodePortIPMap map[int][]net.IP + hostGateWay string + isIPv6 bool + proxyFullEnabled bool + endpointSliceEnabled bool } func endpointKey(endpoint k8sproxy.Endpoint, protocol binding.Protocol) string { @@ -129,25 +135,43 @@ func (p *proxier) removeStaleServices() { p.oversizeServiceSet.Delete(svcPortName.String()) } if err := p.ofClient.UninstallServiceFlows(svcInfo.ClusterIP(), uint16(svcInfo.Port()), svcInfo.OFProtocol); err != nil { - klog.Errorf("Failed to remove flows of Service %v: %v", svcPortName, err) + klog.ErrorS(err, "Failed to remove flows of Service", "Service", svcPortName) continue } - for _, ingress := range svcInfo.LoadBalancerIPStrings() { - if ingress != "" { - if err := p.uninstallLoadBalancerServiceFlows(net.ParseIP(ingress), uint16(svcInfo.Port()), svcInfo.OFProtocol); err != nil { - klog.Errorf("Error when removing Service flows: %v", err) + groupID, _ := p.groupCounter.Get(svcPortName, false) + if err := p.ofClient.UninstallServiceGroup(groupID); err != nil { + klog.ErrorS(err, "Failed to remove flows of Service", "Service", svcPortName) + continue + } + + if p.proxyFullEnabled { + // Remove Service group whose Endpoints are local. + if svcInfo.NodeLocalExternal() { + groupIDLocal, _ := p.groupCounter.Get(svcPortName, false) + if err := p.ofClient.UninstallServiceGroup(groupIDLocal); err != nil { + klog.ErrorS(err, "Failed to remove flows of Service", "Service", svcPortName) + continue + } + } + // Remove NodePort flows and configurations. + if svcInfo.NodePort() > 0 { + if err := p.uninstallNodePortService(uint16(svcInfo.NodePort()), svcInfo.OFProtocol); err != nil { + klog.ErrorS(err, "Failed to remove flows and configurations of Service", "Service", svcPortName) + continue + } + } + // Remove LoadBalancer flows and configurations. + if len(svcInfo.LoadBalancerIPStrings()) > 0 { + if err := p.uninstallLoadBalancerService(svcInfo.LoadBalancerIPStrings(), uint16(svcInfo.Port()), svcInfo.OFProtocol); err != nil { + klog.ErrorS(err, "Failed to remove flows and configurations of Service", "Service", svcPortName) continue } } } - groupID, _ := p.groupCounter.Get(svcPortName) - if err := p.ofClient.UninstallServiceGroup(groupID); err != nil { - klog.Errorf("Failed to remove flows of Service %v: %v", svcPortName, err) - continue - } + delete(p.serviceInstalledMap, svcPortName) p.deleteServiceByIP(svcInfo.String()) - p.groupCounter.Recycle(svcPortName) + p.groupCounter.Recycle(svcPortName, false) } } @@ -217,7 +241,9 @@ func (p *proxier) removeStaleEndpoints() { func serviceIdentityChanged(svcInfo, pSvcInfo *types.ServiceInfo) bool { return svcInfo.ClusterIP().String() != pSvcInfo.ClusterIP().String() || svcInfo.Port() != pSvcInfo.Port() || - svcInfo.OFProtocol != pSvcInfo.OFProtocol + svcInfo.OFProtocol != pSvcInfo.OFProtocol || + svcInfo.NodePort() != pSvcInfo.NodePort() || + svcInfo.NodeLocalExternal() != pSvcInfo.NodeLocalExternal() } // smallSliceDifference builds a slice which includes all the strings from s1 @@ -241,10 +267,77 @@ func smallSliceDifference(s1, s2 []string) []string { return diff } +func (p *proxier) installNodePortService(groupID binding.GroupIDType, svcPort uint16, protocol binding.Protocol, affinityTimeout uint16, nodeLocalExternal bool) error { + if err := p.ofClient.InstallServiceFlows(groupID, agentconfig.DummyNodePortSvcIP, svcPort, protocol, affinityTimeout); err != nil { + return fmt.Errorf("failed to install Service NodePort load balancing flows: %w", err) + } + if err := p.ofClient.InstallServiceClassifierFlow(corev1.ServiceTypeNodePort, agentconfig.DummyNodePortSvcIP, svcPort, protocol, nodeLocalExternal); err != nil { + return fmt.Errorf("failed to install Service NodePort classifying flows: %w", err) + } + if err := p.routeClient.AddNodePort(p.nodePortIPMap, svcPort, protocol, p.isIPv6); err != nil { + return fmt.Errorf("failed to install Service NodePort traffic redirecting flows: %w", err) + } + return nil +} + +func (p *proxier) uninstallNodePortService(svcPort uint16, protocol binding.Protocol) error { + if err := p.ofClient.UninstallServiceFlows(agentconfig.DummyNodePortSvcIP, svcPort, protocol); err != nil { + return fmt.Errorf("failed to remove Service NodePort NodePort load balancing flows: %w", err) + } + if err := p.ofClient.UninstallServiceClassifierFlow(agentconfig.DummyNodePortSvcIP, svcPort, protocol); err != nil { + return fmt.Errorf("failed to remove Service NodePort classifying flows: %w", err) + } + if err := p.routeClient.DeleteNodePort(p.nodePortIPMap, svcPort, protocol, p.isIPv6); err != nil { + return fmt.Errorf("failed to remove Service NodePort traffic redirecting flows: %w", err) + } + return nil +} + +func (p *proxier) installLoadBalancerService(groupID binding.GroupIDType, loadBalancerIPStrings []string, + svcPort uint16, protocol binding.Protocol, affinityTimeout uint16, nodeLocalExternal bool) error { + for _, ingress := range loadBalancerIPStrings { + if ingress != "" { + if err := p.ofClient.InstallServiceFlows(groupID, net.ParseIP(ingress), svcPort, protocol, affinityTimeout); err != nil { + return fmt.Errorf("failed to install Service LoadBalancer load balancing flows: %w", err) + } + if err := p.ofClient.InstallLoadBalancerServiceFromOutsideFlows(net.ParseIP(ingress), svcPort, protocol); err != nil { + return fmt.Errorf("failed to install Service LoadBalancer flows: %w", err) + } + if err := p.ofClient.InstallServiceClassifierFlow(corev1.ServiceTypeLoadBalancer, net.ParseIP(ingress), svcPort, protocol, nodeLocalExternal); err != nil { + return fmt.Errorf("failed to install Service LoadBalancer classifying flows: %w", err) + } + } + } + if err := p.routeClient.AddLoadBalancer(loadBalancerIPStrings, p.isIPv6); err != nil { + return fmt.Errorf("failed to install Service LoadBalancer traffic redirecting flows: %w", err) + } + return nil +} + +func (p *proxier) uninstallLoadBalancerService(loadBalancerIPStrings []string, svcPort uint16, protocol binding.Protocol) error { + for _, ingress := range loadBalancerIPStrings { + if ingress != "" { + if err := p.ofClient.UninstallServiceFlows(net.ParseIP(ingress), svcPort, protocol); err != nil { + return fmt.Errorf("failed to remove Service LoadBalancer load balancing flows: %w", err) + } + if err := p.ofClient.UninstallLoadBalancerServiceFromOutsideFlows(net.ParseIP(ingress), svcPort, protocol); err != nil { + return fmt.Errorf("failed to remove Service LoadBalancer flows: %w", err) + } + if err := p.ofClient.UninstallServiceClassifierFlow(net.ParseIP(ingress), svcPort, protocol); err != nil { + return fmt.Errorf("failed to remove Service LoadBalancer classifying flows: %w", err) + } + } + } + if err := p.routeClient.DeleteLoadBalancer(loadBalancerIPStrings, p.isIPv6); err != nil { + return fmt.Errorf("failed to remove Service LoadBalancer traffic redirecting flows: %w", err) + } + return nil +} + func (p *proxier) installServices() { for svcPortName, svcPort := range p.serviceMap { svcInfo := svcPort.(*types.ServiceInfo) - groupID, _ := p.groupCounter.Get(svcPortName) + groupID, _ := p.groupCounter.Get(svcPortName, false) endpointsInstalled, ok := p.endpointsInstalledMap[svcPortName] if !ok { endpointsInstalled = map[string]k8sproxy.Endpoint{} @@ -334,14 +427,31 @@ func (p *proxier) installServices() { if needUpdateEndpoints { err := p.ofClient.InstallEndpointFlows(svcInfo.OFProtocol, endpointUpdateList) if err != nil { - klog.Errorf("Error when installing Endpoints flows: %v", err) + klog.ErrorS(err, "Error when installing Endpoints flows") continue } err = p.ofClient.InstallServiceGroup(groupID, svcInfo.StickyMaxAgeSeconds() != 0, endpointUpdateList) if err != nil { - klog.Errorf("Error when installing Endpoints groups: %v", err) + klog.ErrorS(err, "Error when installing Endpoints groups") continue } + + // Install another group when Service externalTrafficPolicy is Local. + if p.proxyFullEnabled && svcInfo.NodeLocalExternal() { + groupIDLocal, _ := p.groupCounter.Get(svcPortName, true) + var localEndpointList []k8sproxy.Endpoint + for _, ed := range endpointUpdateList { + if !ed.GetIsLocal() { + continue + } + localEndpointList = append(localEndpointList, ed) + } + if err = p.ofClient.InstallServiceGroup(groupIDLocal, svcInfo.StickyMaxAgeSeconds() != 0, localEndpointList); err != nil { + klog.ErrorS(err, "Error when installing Group for Service whose externalTrafficPolicy is Local") + continue + } + } + for _, e := range endpointUpdateList { // If the Endpoint is newly installed, add a reference. if _, ok := endpointsInstalled[e.String()]; !ok { @@ -355,40 +465,75 @@ func (p *proxier) installServices() { if needUpdateService { // Delete previous flow. if needRemoval { + // If previous Service should be removed, remove ClusterIP flows of previous Service. if err := p.ofClient.UninstallServiceFlows(pSvcInfo.ClusterIP(), uint16(pSvcInfo.Port()), pSvcInfo.OFProtocol); err != nil { - klog.Errorf("Failed to remove flows of Service %v: %v", svcPortName, err) + klog.ErrorS(err, "Failed to remove flows of Service", "Service", svcPortName) continue } + + if p.proxyFullEnabled { + // If previous Service which has NodePort should be removed, remove NodePort flows and configurations of previous Service. + if pSvcInfo.NodePort() > 0 { + if err := p.uninstallNodePortService(uint16(pSvcInfo.NodePort()), pSvcInfo.OFProtocol); err != nil { + klog.ErrorS(err, "Failed to remove flows and configurations of Service", "Service", svcPortName) + continue + } + } + } } + + // Install ClusterIP flows of current Service. if err := p.ofClient.InstallServiceFlows(groupID, svcInfo.ClusterIP(), uint16(svcInfo.Port()), svcInfo.OFProtocol, uint16(svcInfo.StickyMaxAgeSeconds())); err != nil { klog.Errorf("Error when installing Service flows: %v", err) continue } - // Install OpenFlow entries for the ingress IPs of LoadBalancer Service. - // The LoadBalancer Service should be accessible from Pod, Node and - // external host. - var toDelete, toAdd []string - if needRemoval { - toDelete = pSvcInfo.LoadBalancerIPStrings() - toAdd = svcInfo.LoadBalancerIPStrings() - } else { - toDelete = deletedLoadBalancerIPs - toAdd = addedLoadBalancerIPs - } - for _, ingress := range toDelete { - if ingress != "" { - // It is safe to access pSvcInfo here. If this is a new Service, - // then toDelete will be an empty slice. - if err := p.uninstallLoadBalancerServiceFlows(net.ParseIP(ingress), uint16(pSvcInfo.Port()), pSvcInfo.OFProtocol); err != nil { - klog.Errorf("Error when removing LoadBalancer Service flows: %v", err) + + if p.proxyFullEnabled { + // Install ClusterIP route on Node so that ClusterIP can be accessed on Node. Every time a new ClusterIP + // is created, the routing target IP block will be recalculated for expansion to be able to route the new + // created ClusterIP. Deleting a ClusterIP will not shrink the target routing IP block. The Service CIDR + // can be finally calculated after creating enough ClusterIPs. + if err := p.routeClient.AddClusterIPRoute(svcInfo.ClusterIP(), p.isIPv6); err != nil { + klog.ErrorS(err, "Failed to install ClusterIP route of Service", "Service", svcPortName) + } + + // If externalTrafficPolicy of the Service is Local, Service NodePort or LoadBalancer should use the Service + // group whose Endpoints are local. + nGroupID := groupID + if svcInfo.NodeLocalExternal() { + nGroupID, _ = p.groupCounter.Get(svcPortName, true) + } + + // If previous Service is nil or NodePort flows and configurations of previous Service have been removed, + // install NodePort flows and configurations for current Service. + if svcInfo.NodePort() > 0 && (pSvcInfo == nil || needRemoval) { + if err := p.installNodePortService(nGroupID, uint16(svcInfo.NodePort()), svcInfo.OFProtocol, uint16(svcInfo.StickyMaxAgeSeconds()), svcInfo.NodeLocalExternal()); err != nil { + klog.ErrorS(err, "Failed to install NodePort flows and configurations of Service", "Service", svcPortName) continue } } - } - for _, ingress := range toAdd { - if ingress != "" { - if err := p.installLoadBalancerServiceFlows(groupID, net.ParseIP(ingress), uint16(svcInfo.Port()), svcInfo.OFProtocol, uint16(svcInfo.StickyMaxAgeSeconds())); err != nil { - klog.Errorf("Error when installing LoadBalancer Service flows: %v", err) + + // Service LoadBalancer flows can be partially updated. + var toDelete, toAdd []string + if needRemoval { + toDelete = pSvcInfo.LoadBalancerIPStrings() + toAdd = svcInfo.LoadBalancerIPStrings() + } else { + toDelete = deletedLoadBalancerIPs + toAdd = addedLoadBalancerIPs + } + // Remove LoadBalancer flows and configurations. + if len(toDelete) > 0 { + if err := p.uninstallLoadBalancerService(toDelete, uint16(pSvcInfo.Port()), pSvcInfo.OFProtocol); err != nil { + klog.ErrorS(err, "Failed to remove flows and configurations of Service", "Service", svcPortName) + continue + } + } + + // Install LoadBalancer flows and configurations. + if len(toAdd) > 0 { + if err := p.installLoadBalancerService(nGroupID, toAdd, uint16(svcInfo.Port()), svcInfo.OFProtocol, uint16(svcInfo.StickyMaxAgeSeconds()), svcInfo.NodeLocalExternal()); err != nil { + klog.ErrorS(err, "Failed to install LoadBalancer flows and configurations of Service", "Service", svcPortName) continue } } @@ -556,8 +701,16 @@ func (p *proxier) deleteServiceByIP(serviceStr string) { func (p *proxier) Run(stopCh <-chan struct{}) { p.once.Do(func() { + if p.proxyFullEnabled { + if err := p.routeClient.InitServiceProxyConfig(p.isIPv6); err != nil { + panic(err) + } + if err := p.ofClient.InstallInitNodePortClassifierFlows(p.nodePortIPMap, p.isIPv6); err != nil { + panic(err) + } + } go p.serviceConfig.Run(stopCh) - if p.enableEndpointSlice { + if p.endpointSliceEnabled { go p.endpointSliceConfig.Run(stopCh) } else { go p.endpointsConfig.Run(stopCh) @@ -605,7 +758,7 @@ func (p *proxier) GetServiceFlowKeys(serviceName, namespace string) ([]string, [ svcFlows := p.ofClient.GetServiceFlowKeys(svcInfo.ClusterIP(), uint16(svcInfo.Port()), svcInfo.OFProtocol, epList) flows = append(flows, svcFlows...) - groupID, _ := p.groupCounter.Get(svcPortName) + groupID, _ := p.groupCounter.Get(svcPortName, false) groups = append(groups, groupID) } @@ -616,7 +769,10 @@ func NewProxier( hostname string, informerFactory informers.SharedInformerFactory, ofClient openflow.Client, - isIPv6 bool) *proxier { + isIPv6 bool, + routeClient route.Interface, + nodePortIPMap map[int][]net.IP, + proxyFullEnabled bool) *proxier { recorder := record.NewBroadcaster().NewRecorder( runtime.NewScheme(), corev1.EventSource{Component: componentName, Host: hostname}, @@ -624,18 +780,16 @@ func NewProxier( metrics.Register() klog.V(2).Infof("Creating proxier with IPv6 enabled=%t", isIPv6) - enableEndpointSlice := features.DefaultFeatureGate.Enabled(features.EndpointSlice) - + endpointSliceEnabled := features.DefaultFeatureGate.Enabled(features.EndpointSlice) ipFamily := corev1.IPv4Protocol if isIPv6 { ipFamily = corev1.IPv6Protocol } p := &proxier{ - enableEndpointSlice: enableEndpointSlice, endpointsConfig: config.NewEndpointsConfig(informerFactory.Core().V1().Endpoints(), resyncPeriod), serviceConfig: config.NewServiceConfig(informerFactory.Core().V1().Services(), resyncPeriod), - endpointsChanges: newEndpointsChangesTracker(hostname, enableEndpointSlice, isIPv6), + endpointsChanges: newEndpointsChangesTracker(hostname, endpointSliceEnabled, isIPv6), serviceChanges: newServiceChangesTracker(recorder, ipFamily), serviceMap: k8sproxy.ServiceMap{}, serviceInstalledMap: k8sproxy.ServiceMap{}, @@ -646,12 +800,17 @@ func NewProxier( oversizeServiceSet: sets.NewString(), groupCounter: types.NewGroupCounter(isIPv6), ofClient: ofClient, + routeClient: routeClient, + nodePortIPMap: nodePortIPMap, isIPv6: isIPv6, + proxyFullEnabled: proxyFullEnabled, + endpointSliceEnabled: endpointSliceEnabled, } + p.serviceConfig.RegisterEventHandler(p) p.endpointsConfig.RegisterEventHandler(p) p.runner = k8sproxy.NewBoundedFrequencyRunner(componentName, p.syncProxyRules, time.Second, 30*time.Second, 2) - if enableEndpointSlice { + if endpointSliceEnabled { p.endpointSliceConfig = config.NewEndpointSliceConfig(informerFactory.Discovery().V1beta1().EndpointSlices(), resyncPeriod) p.endpointSliceConfig.RegisterEventHandler(p) } else { @@ -692,13 +851,19 @@ func (p *metaProxierWrapper) GetServiceByIP(serviceStr string) (k8sproxy.Service } func NewDualStackProxier( - hostname string, informerFactory informers.SharedInformerFactory, ofClient openflow.Client) *metaProxierWrapper { + hostname string, + informerFactory informers.SharedInformerFactory, + ofClient openflow.Client, + routeClient route.Interface, + nodePortIPMap map[int][]net.IP, + nodePortIPv6Map map[int][]net.IP, + proxyFullEnabled bool) *metaProxierWrapper { - // Create an ipv4 instance of the single-stack proxier - ipv4Proxier := NewProxier(hostname, informerFactory, ofClient, false) + // Create an ipv4 instance of the single-stack proxier. + ipv4Proxier := NewProxier(hostname, informerFactory, ofClient, false, routeClient, nodePortIPMap, proxyFullEnabled) - // Create an ipv6 instance of the single-stack proxier - ipv6Proxier := NewProxier(hostname, informerFactory, ofClient, true) + // Create an ipv6 instance of the single-stack proxier. + ipv6Proxier := NewProxier(hostname, informerFactory, ofClient, true, routeClient, nodePortIPv6Map, proxyFullEnabled) // Create a meta-proxier that dispatch calls between the two // single-stack proxier instances. diff --git a/pkg/agent/proxy/proxier_others.go b/pkg/agent/proxy/proxier_others.go deleted file mode 100644 index 28240caa188..00000000000 --- a/pkg/agent/proxy/proxier_others.go +++ /dev/null @@ -1,39 +0,0 @@ -// +build !windows -// Copyright 2020 Antrea Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proxy - -import ( - "net" - - binding "antrea.io/antrea/pkg/ovs/openflow" -) - -// installLoadBalancerServiceFlows install OpenFlow entries for LoadBalancer Service. -// The rules for traffic from local Pod to LoadBalancer Service are same with rules for Cluster Service. -// For the LoadBalancer Service traffic from outside, kube-proxy will handle it. -func (p *proxier) installLoadBalancerServiceFlows(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, affinityTimeout uint16) error { - if err := p.ofClient.InstallServiceFlows(groupID, svcIP, svcPort, protocol, affinityTimeout); err != nil { - return err - } - return nil -} - -func (p *proxier) uninstallLoadBalancerServiceFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { - if err := p.ofClient.UninstallServiceFlows(svcIP, svcPort, protocol); err != nil { - return err - } - return nil -} diff --git a/pkg/agent/proxy/proxier_test.go b/pkg/agent/proxy/proxier_test.go index 2af7b21d968..b10ab0efb29 100644 --- a/pkg/agent/proxy/proxier_test.go +++ b/pkg/agent/proxy/proxier_test.go @@ -30,14 +30,33 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/component-base/metrics/testutil" + agentconfig "antrea.io/antrea/pkg/agent/config" "antrea.io/antrea/pkg/agent/openflow" ofmock "antrea.io/antrea/pkg/agent/openflow/testing" "antrea.io/antrea/pkg/agent/proxy/metrics" "antrea.io/antrea/pkg/agent/proxy/types" + "antrea.io/antrea/pkg/agent/route" + routemock "antrea.io/antrea/pkg/agent/route/testing" binding "antrea.io/antrea/pkg/ovs/openflow" k8sproxy "antrea.io/antrea/third_party/proxy" ) +var ( + svcIPv4 = net.ParseIP("10.20.30.41") + svcIPv6 = net.ParseIP("2001::10:20:30:41") + ep1IPv4 = net.ParseIP("10.180.0.1") + ep1IPv6 = net.ParseIP("2001::10:180:0:1") + ep2IPv4 = net.ParseIP("10.180.0.2") + ep2IPv6 = net.ParseIP("2001::10:180:0:2") + loadBalancerIPv4 = net.ParseIP("169.254.169.1") + loadBalancerIPv6 = net.ParseIP("fec0::169:254:169:1") + svcNodePortIPv4 = net.ParseIP("192.168.77.100") + svcNodePortIPv6 = net.ParseIP("2001::192:168:77:100") + + nodePortIPv4Map = map[int][]net.IP{2: {svcNodePortIPv4}} + nodePortIPv6Map = map[int][]net.IP{2: {svcNodePortIPv6}} +) + func makeNamespaceName(namespace, name string) apimachinerytypes.NamespacedName { return apimachinerytypes.NamespacedName{Namespace: namespace, Name: name} } @@ -81,7 +100,7 @@ func makeTestEndpoints(namespace, name string, eptFunc func(*corev1.Endpoints)) return ept } -func NewFakeProxier(ofClient openflow.Client, isIPv6 bool) *proxier { +func NewFakeProxier(routeClient route.Interface, ofClient openflow.Client, nodePortIPMap map[int][]net.IP, isIPv6, proxyFullEnabled bool) *proxier { hostname := "localhost" eventBroadcaster := record.NewBroadcaster() recorder := eventBroadcaster.NewRecorder( @@ -104,8 +123,11 @@ func NewFakeProxier(ofClient openflow.Client, isIPv6 bool) *proxier { endpointsMap: types.EndpointsMap{}, groupCounter: types.NewGroupCounter(isIPv6), ofClient: ofClient, + routeClient: routeClient, serviceStringMap: map[string]k8sproxy.ServicePortName{}, isIPv6: isIPv6, + nodePortIPMap: nodePortIPMap, + proxyFullEnabled: proxyFullEnabled, } p.runner = k8sproxy.NewBoundedFrequencyRunner(componentName, p.syncProxyRules, time.Second, 30*time.Second, 2) return p @@ -115,7 +137,8 @@ func testClusterIP(t *testing.T, svcIP net.IP, epIP net.IP, isIPv6 bool) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, isIPv6, true) svcPort := 80 svcPortName := k8sproxy.ServicePortName{ @@ -149,7 +172,7 @@ func testClusterIP(t *testing.T, svcIP net.IP, epIP net.IP, isIPv6 bool) { }), ) - groupID, _ := fp.groupCounter.Get(svcPortName) + groupID, _ := fp.groupCounter.Get(svcPortName, false) mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) bindingProtocol := binding.ProtocolTCP if isIPv6 { @@ -157,45 +180,68 @@ func testClusterIP(t *testing.T, svcIP net.IP, epIP net.IP, isIPv6 bool) { } mockOFClient.EXPECT().InstallEndpointFlows(bindingProtocol, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIP, uint16(svcPort), bindingProtocol, uint16(0)).Times(1) + mockRouteClient.EXPECT().AddClusterIPRoute(svcIP, isIPv6).Times(1) fp.syncProxyRules() } -func TestLoadbalancer(t *testing.T) { +func testLoadBalancer(t *testing.T, nodePortIPMap map[int][]net.IP, svcIP, ep1IP, ep2IP, loadBalancerIP net.IP, isIPv6, nodeLocalExternal bool) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, false) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nodePortIPMap, isIPv6, true) - svcIPv4 := net.ParseIP("10.20.30.41") svcPort := 80 - loadBalancerIPv4 := net.ParseIP("169.254.0.1") + svcNodePort := 30008 svcPortName := k8sproxy.ServicePortName{ NamespacedName: makeNamespaceName("ns1", "svc1"), Port: "80", Protocol: corev1.ProtocolTCP, } + externalTrafficPolicy := corev1.ServiceExternalTrafficPolicyTypeCluster + if nodeLocalExternal { + externalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal + } + makeServiceMap(fp, makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *corev1.Service) { - svc.Spec.ClusterIP = svcIPv4.String() - svc.Spec.LoadBalancerIP = loadBalancerIPv4.String() + svc.Spec.ClusterIP = svcIP.String() + svc.Spec.LoadBalancerIP = loadBalancerIP.String() svc.Spec.Type = corev1.ServiceTypeLoadBalancer - ingress := []corev1.LoadBalancerIngress{{IP: loadBalancerIPv4.String()}} + ingress := []corev1.LoadBalancerIngress{{IP: loadBalancerIP.String()}} svc.Status.LoadBalancer.Ingress = ingress svc.Spec.Ports = []corev1.ServicePort{{ + NodePort: int32(svcNodePort), Name: svcPortName.Port, Port: int32(svcPort), Protocol: corev1.ProtocolTCP, }} + svc.Spec.ExternalTrafficPolicy = externalTrafficPolicy }), ) - epIP := net.ParseIP("10.180.0.1") - makeEndpointsMap(fp, - makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, func(ept *corev1.Endpoints) { + var eps []*corev1.Endpoints + epFunc := func(ept *corev1.Endpoints) { + ept.Subsets = []corev1.EndpointSubset{{ + Addresses: []corev1.EndpointAddress{{ + IP: ep1IP.String(), + Hostname: "localhost", + }}, + Ports: []corev1.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: corev1.ProtocolTCP, + }}, + }} + } + eps = append(eps, makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, epFunc)) + if nodeLocalExternal { + epFunc = func(ept *corev1.Endpoints) { ept.Subsets = []corev1.EndpointSubset{{ Addresses: []corev1.EndpointAddress{{ - IP: epIP.String(), + IP: ep2IP.String(), + Hostname: "remote", }}, Ports: []corev1.EndpointPort{{ Name: svcPortName.Port, @@ -203,33 +249,169 @@ func TestLoadbalancer(t *testing.T) { Protocol: corev1.ProtocolTCP, }}, }} + } + eps = append(eps, makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, epFunc)) + } + makeEndpointsMap(fp, eps...) + + groupID, _ := fp.groupCounter.Get(svcPortName, false) + bindingProtocol := binding.ProtocolTCP + if isIPv6 { + bindingProtocol = binding.ProtocolTCPv6 + } + mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) + mockOFClient.EXPECT().InstallEndpointFlows(bindingProtocol, gomock.Any()).Times(1) + mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIP, uint16(svcPort), bindingProtocol, uint16(0)).Times(1) + if nodeLocalExternal { + groupID, _ = fp.groupCounter.Get(svcPortName, true) + mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) + } + mockOFClient.EXPECT().InstallServiceFlows(groupID, loadBalancerIP, uint16(svcPort), bindingProtocol, uint16(0)).Times(1) + mockOFClient.EXPECT().InstallServiceFlows(groupID, gomock.Any(), uint16(svcNodePort), bindingProtocol, uint16(0)).Times(1) + mockOFClient.EXPECT().InstallLoadBalancerServiceFromOutsideFlows(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes() + mockOFClient.EXPECT().InstallServiceClassifierFlow(corev1.ServiceTypeLoadBalancer, loadBalancerIP, uint16(svcPort), bindingProtocol, nodeLocalExternal).Times(1) + mockOFClient.EXPECT().InstallServiceClassifierFlow(corev1.ServiceTypeNodePort, agentconfig.DummyNodePortSvcIP, uint16(svcNodePort), bindingProtocol, nodeLocalExternal).Times(1) + mockRouteClient.EXPECT().AddClusterIPRoute(svcIP, isIPv6).Times(1) + mockRouteClient.EXPECT().AddLoadBalancer([]string{loadBalancerIP.String()}, isIPv6).Times(1) + mockRouteClient.EXPECT().AddNodePort(nodePortIPMap, uint16(svcNodePort), bindingProtocol, isIPv6).Times(1) + + fp.syncProxyRules() +} + +func testNodePort(t *testing.T, nodePortIPMap map[int][]net.IP, svcIP, ep1IP, ep2IP net.IP, isIPv6, nodeLocalExternal bool) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockOFClient := ofmock.NewMockClient(ctrl) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nodePortIPMap, isIPv6, true) + + svcPort := 80 + svcNodePort := 31000 + svcPortName := k8sproxy.ServicePortName{ + NamespacedName: makeNamespaceName("ns1", "svc1"), + Port: "80", + Protocol: corev1.ProtocolTCP, + } + externalTrafficPolicy := corev1.ServiceExternalTrafficPolicyTypeCluster + if nodeLocalExternal { + externalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *corev1.Service) { + svc.Spec.ClusterIP = svcIP.String() + svc.Spec.Type = corev1.ServiceTypeNodePort + svc.Spec.Ports = []corev1.ServicePort{{ + NodePort: int32(svcNodePort), + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: corev1.ProtocolTCP, + }} + svc.Spec.ExternalTrafficPolicy = externalTrafficPolicy }), ) - groupID, _ := fp.groupCounter.Get(svcPortName) + var eps []*corev1.Endpoints + epFunc := func(ept *corev1.Endpoints) { + ept.Subsets = []corev1.EndpointSubset{{ + Addresses: []corev1.EndpointAddress{{ + IP: ep1IP.String(), + Hostname: "localhost", + }}, + Ports: []corev1.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: corev1.ProtocolTCP, + }}, + }} + } + eps = append(eps, makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, epFunc)) + if nodeLocalExternal { + epFunc = func(ept *corev1.Endpoints) { + ept.Subsets = []corev1.EndpointSubset{{ + Addresses: []corev1.EndpointAddress{{ + IP: ep2IP.String(), + Hostname: "remote", + }}, + Ports: []corev1.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: corev1.ProtocolTCP, + }}, + }} + } + eps = append(eps, makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, epFunc)) + } + makeEndpointsMap(fp, eps...) + + groupID, _ := fp.groupCounter.Get(svcPortName, false) + bindingProtocol := binding.ProtocolTCP + if isIPv6 { + bindingProtocol = binding.ProtocolTCPv6 + } mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) - mockOFClient.EXPECT().InstallEndpointFlows(binding.ProtocolTCP, gomock.Any()).Times(1) - mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIPv4, uint16(svcPort), binding.ProtocolTCP, uint16(0)).Times(1) - mockOFClient.EXPECT().InstallServiceFlows(groupID, loadBalancerIPv4, uint16(svcPort), binding.ProtocolTCP, uint16(0)).Times(1) - mockOFClient.EXPECT().InstallLoadBalancerServiceFromOutsideFlows(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes() + mockOFClient.EXPECT().InstallEndpointFlows(bindingProtocol, gomock.Any()).Times(1) + mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIP, uint16(svcPort), bindingProtocol, uint16(0)).Times(1) + if nodeLocalExternal { + groupID, _ = fp.groupCounter.Get(svcPortName, true) + mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) + } + + mockOFClient.EXPECT().InstallServiceFlows(groupID, gomock.Any(), uint16(svcNodePort), bindingProtocol, uint16(0)).Times(1) + mockOFClient.EXPECT().InstallServiceClassifierFlow(corev1.ServiceTypeNodePort, agentconfig.DummyNodePortSvcIP, uint16(svcNodePort), bindingProtocol, nodeLocalExternal).Times(1) + mockRouteClient.EXPECT().AddClusterIPRoute(svcIP, isIPv6).Times(1) + mockRouteClient.EXPECT().AddNodePort(gomock.Any(), uint16(svcNodePort), bindingProtocol, isIPv6).Times(1) fp.syncProxyRules() } +func TestLoadBalancerIPv4(t *testing.T) { + testLoadBalancer(t, nodePortIPv4Map, svcIPv4, ep1IPv4, nil, loadBalancerIPv4, false, false) +} + +func TestLoadBalancerIPv4ExternalLocal(t *testing.T) { + testLoadBalancer(t, nodePortIPv4Map, svcIPv4, ep1IPv4, ep2IPv4, loadBalancerIPv4, false, true) +} + +func TestLoadBalancerIPv6(t *testing.T) { + testLoadBalancer(t, nodePortIPv6Map, svcIPv6, ep1IPv6, nil, loadBalancerIPv6, true, false) +} + +func TestLoadBalancerIPv6ExternalLocal(t *testing.T) { + testLoadBalancer(t, nodePortIPv6Map, svcIPv6, ep1IPv6, ep2IPv6, loadBalancerIPv6, true, true) +} + +func TestNodePortIPv4(t *testing.T) { + testNodePort(t, nodePortIPv4Map, svcIPv4, ep1IPv4, nil, false, false) +} + +func TestNodePortIPv4ExternalLocal(t *testing.T) { + testNodePort(t, nodePortIPv4Map, svcIPv4, ep1IPv4, ep2IPv4, false, true) +} + +func TestNodePortIPv6(t *testing.T) { + testNodePort(t, nodePortIPv6Map, svcIPv6, ep1IPv6, nil, true, false) +} + +func TestNodePortIPv6ExternalLocal(t *testing.T) { + testNodePort(t, nodePortIPv6Map, svcIPv6, ep1IPv6, ep2IPv6, true, true) +} + func TestClusterIPv4(t *testing.T) { - testClusterIP(t, net.ParseIP("10.20.30.41"), net.ParseIP("10.180.0.1"), false) + testClusterIP(t, svcIPv4, ep1IPv4, false) } func TestClusterIPv6(t *testing.T) { - testClusterIP(t, net.ParseIP("10:20::41"), net.ParseIP("10:180::1"), true) + testClusterIP(t, svcIPv6, ep1IPv6, true) } func TestDualStackService(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fpv4 := NewFakeProxier(mockOFClient, false) - fpv6 := NewFakeProxier(mockOFClient, true) + mockRouteClient := routemock.NewMockInterface(ctrl) + fpv4 := NewFakeProxier(mockRouteClient, mockOFClient, nil, false, false) + fpv6 := NewFakeProxier(mockRouteClient, mockOFClient, nil, true, false) metaProxier := k8sproxy.NewMetaProxier(fpv4, fpv6) svcPort := 80 @@ -284,8 +466,8 @@ func TestDualStackService(t *testing.T) { metaProxier.OnEndpointsUpdate(nil, epv6) metaProxier.OnEndpointsSynced() - groupIDv4, _ := fpv4.groupCounter.Get(svcPortName) - groupIDv6, _ := fpv6.groupCounter.Get(svcPortName) + groupIDv4, _ := fpv4.groupCounter.Get(svcPortName, false) + groupIDv6, _ := fpv6.groupCounter.Get(svcPortName, false) mockOFClient.EXPECT().InstallServiceGroup(groupIDv4, false, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallEndpointFlows(binding.ProtocolTCP, gomock.Any()).Times(1) @@ -303,7 +485,8 @@ func testClusterIPRemoval(t *testing.T, svcIP net.IP, epIP net.IP, isIPv6 bool) ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, isIPv6, true) svcPort := 80 svcPortName := k8sproxy.ServicePortName{ @@ -340,10 +523,11 @@ func testClusterIPRemoval(t *testing.T, svcIP net.IP, epIP net.IP, isIPv6 bool) } ep := makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, epFunc) makeEndpointsMap(fp, ep) - groupID, _ := fp.groupCounter.Get(svcPortName) + groupID, _ := fp.groupCounter.Get(svcPortName, false) mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallEndpointFlows(bindingProtocol, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIP, uint16(svcPort), bindingProtocol, uint16(0)).Times(1) + mockRouteClient.EXPECT().AddClusterIPRoute(svcIP, isIPv6).Times(1) mockOFClient.EXPECT().UninstallServiceFlows(svcIP, uint16(svcPort), bindingProtocol).Times(1) mockOFClient.EXPECT().UninstallEndpointFlows(bindingProtocol, gomock.Any()).Times(1) mockOFClient.EXPECT().UninstallServiceGroup(groupID).Times(1) @@ -367,7 +551,8 @@ func testClusterIPNoEndpoint(t *testing.T, svcIP net.IP, isIPv6 bool) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, isIPv6, false) svcPort := 80 svcNodePort := 3001 @@ -404,7 +589,8 @@ func testClusterIPRemoveSamePortEndpoint(t *testing.T, svcIP net.IP, epIP net.IP ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, isIPv6, false) svcPort := 80 svcPortName := k8sproxy.ServicePortName{ @@ -468,8 +654,8 @@ func testClusterIPRemoveSamePortEndpoint(t *testing.T, svcIP net.IP, epIP net.IP } makeEndpointsMap(fp, ep, epUDP) - groupID, _ := fp.groupCounter.Get(svcPortName) - groupIDUDP, _ := fp.groupCounter.Get(svcPortNameUDP) + groupID, _ := fp.groupCounter.Get(svcPortName, false) + groupIDUDP, _ := fp.groupCounter.Get(svcPortNameUDP, false) mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallServiceGroup(groupIDUDP, false, gomock.Any()).Times(2) mockOFClient.EXPECT().InstallEndpointFlows(protocolTCP, gomock.Any()).Times(1) @@ -495,7 +681,8 @@ func testClusterIPRemoveEndpoints(t *testing.T, svcIP net.IP, epIP net.IP, isIPv ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, isIPv6, false) svcPort := 80 svcPortName := k8sproxy.ServicePortName{ @@ -531,7 +718,7 @@ func testClusterIPRemoveEndpoints(t *testing.T, svcIP net.IP, epIP net.IP, isIPv bindingProtocol = binding.ProtocolTCPv6 } makeEndpointsMap(fp, ep) - groupID, _ := fp.groupCounter.Get(svcPortName) + groupID, _ := fp.groupCounter.Get(svcPortName, false) mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(2) mockOFClient.EXPECT().InstallEndpointFlows(bindingProtocol, gomock.Any()).Times(2) mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIP, uint16(svcPort), bindingProtocol, uint16(0)).Times(1) @@ -554,7 +741,8 @@ func testSessionAffinityNoEndpoint(t *testing.T, svcExternalIPs net.IP, svcIP ne ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, isIPv6, false) svcPort := 80 svcNodePort := 3001 @@ -567,7 +755,7 @@ func testSessionAffinityNoEndpoint(t *testing.T, svcExternalIPs net.IP, svcIP ne makeServiceMap(fp, makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *corev1.Service) { - svc.Spec.Type = "NodePort" + svc.Spec.Type = corev1.ServiceTypeNodePort svc.Spec.ClusterIP = svcIP.String() svc.Spec.ExternalIPs = []string{svcExternalIPs.String()} svc.Spec.SessionAffinity = corev1.ServiceAffinityClientIP @@ -602,7 +790,7 @@ func testSessionAffinityNoEndpoint(t *testing.T, svcExternalIPs net.IP, svcIP ne if isIPv6 { bindingProtocol = binding.ProtocolTCPv6 } - groupID, _ := fp.groupCounter.Get(svcPortName) + groupID, _ := fp.groupCounter.Get(svcPortName, false) mockOFClient.EXPECT().InstallServiceGroup(groupID, true, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallEndpointFlows(bindingProtocol, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIP, uint16(svcPort), bindingProtocol, uint16(corev1.DefaultClientIPServiceAffinitySeconds)).Times(1) @@ -622,7 +810,7 @@ func testSessionAffinity(t *testing.T, svcExternalIPs net.IP, svcIP net.IP, isIP ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + fp := NewFakeProxier(nil, mockOFClient, nil, isIPv6, false) svcPort := 80 svcNodePort := 3001 @@ -635,7 +823,7 @@ func testSessionAffinity(t *testing.T, svcExternalIPs net.IP, svcIP net.IP, isIP makeServiceMap(fp, makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *corev1.Service) { - svc.Spec.Type = "NodePort" + svc.Spec.Type = corev1.ServiceTypeNodePort svc.Spec.ClusterIP = svcIP.String() svc.Spec.ExternalIPs = []string{svcExternalIPs.String()} svc.Spec.SessionAffinity = corev1.ServiceAffinityClientIP @@ -669,7 +857,8 @@ func testPortChange(t *testing.T, svcIP net.IP, epIP net.IP, isIPv6 bool) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, isIPv6) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, isIPv6, false) svcPort1 := 80 svcPort2 := 8080 @@ -708,7 +897,7 @@ func testPortChange(t *testing.T, svcIP net.IP, epIP net.IP, isIPv6 bool) { } ep := makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, epFunc) makeEndpointsMap(fp, ep) - groupID, _ := fp.groupCounter.Get(svcPortName) + groupID, _ := fp.groupCounter.Get(svcPortName, false) mockOFClient.EXPECT().InstallServiceGroup(groupID, false, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallEndpointFlows(bindingProtocol, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallServiceFlows(groupID, svcIP, uint16(svcPort1), bindingProtocol, uint16(0)) @@ -742,7 +931,8 @@ func TestServicesWithSameEndpoints(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockOFClient := ofmock.NewMockClient(ctrl) - fp := NewFakeProxier(mockOFClient, false) + mockRouteClient := routemock.NewMockInterface(ctrl) + fp := NewFakeProxier(mockRouteClient, mockOFClient, nil, false, false) epIP := net.ParseIP("10.50.60.71") svcIP1 := net.ParseIP("10.180.30.41") svcIP2 := net.ParseIP("10.180.30.42") @@ -793,8 +983,8 @@ func TestServicesWithSameEndpoints(t *testing.T) { ep1 := epMapFactory(svcPortName1, epIP.String()) ep2 := epMapFactory(svcPortName2, epIP.String()) - groupID1, _ := fp.groupCounter.Get(svcPortName1) - groupID2, _ := fp.groupCounter.Get(svcPortName2) + groupID1, _ := fp.groupCounter.Get(svcPortName1, false) + groupID2, _ := fp.groupCounter.Get(svcPortName2, false) mockOFClient.EXPECT().InstallServiceGroup(groupID1, false, gomock.Any()).Times(1) mockOFClient.EXPECT().InstallServiceGroup(groupID2, false, gomock.Any()).Times(1) bindingProtocol := binding.ProtocolTCP diff --git a/pkg/agent/proxy/proxier_windows.go b/pkg/agent/proxy/proxier_windows.go deleted file mode 100644 index 395a99e43fa..00000000000 --- a/pkg/agent/proxy/proxier_windows.go +++ /dev/null @@ -1,46 +0,0 @@ -// +build windows -// Copyright 2020 Antrea Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proxy - -import ( - "net" - - binding "antrea.io/antrea/pkg/ovs/openflow" -) - -// installLoadBalancerServiceFlows installs OpenFlow entries for LoadBalancer Service. -// The rules for traffic from local Pod to LoadBalancer Service are the same with rules for Cluster Service. -// For the LoadBalancer Service traffic from outside, specific rules are install to forward the packets -// to the host network to let kube-proxy handle the traffic. -func (p *proxier) installLoadBalancerServiceFlows(groupID binding.GroupIDType, svcIP net.IP, svcPort uint16, protocol binding.Protocol, affinityTimeout uint16) error { - if err := p.ofClient.InstallServiceFlows(groupID, svcIP, svcPort, protocol, affinityTimeout); err != nil { - return err - } - if err := p.ofClient.InstallLoadBalancerServiceFromOutsideFlows(svcIP, svcPort, protocol); err != nil { - return err - } - return nil -} - -func (p *proxier) uninstallLoadBalancerServiceFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { - if err := p.ofClient.UninstallServiceFlows(svcIP, svcPort, protocol); err != nil { - return err - } - if err := p.ofClient.UninstallLoadBalancerServiceFromOutsideFlows(svcIP, svcPort, protocol); err != nil { - return err - } - return nil -} diff --git a/pkg/agent/proxy/types/groupcounter.go b/pkg/agent/proxy/types/groupcounter.go index 5cfd5c91172..d4c537924d8 100644 --- a/pkg/agent/proxy/types/groupcounter.go +++ b/pkg/agent/proxy/types/groupcounter.go @@ -15,6 +15,7 @@ package types import ( + "fmt" "sync" binding "antrea.io/antrea/pkg/ovs/openflow" @@ -27,10 +28,10 @@ type GroupCounter interface { // If the group ID of the service has been generated, then return the // prior one. The bool return value indicates whether the groupID is newly // generated. - Get(svcPortName k8sproxy.ServicePortName) (binding.GroupIDType, bool) + Get(svcPortName k8sproxy.ServicePortName, isEndpointsLocal bool) (binding.GroupIDType, bool) // Recycle removes a Service Group ID mapping. The recycled groupID can be // reused. - Recycle(svcPortName k8sproxy.ServicePortName) bool + Recycle(svcPortName k8sproxy.ServicePortName, isEndpointsLocal bool) bool } type groupCounter struct { @@ -38,7 +39,7 @@ type groupCounter struct { groupIDCounter binding.GroupIDType recycled []binding.GroupIDType - groupMap map[k8sproxy.ServicePortName]binding.GroupIDType + groupMap map[string]binding.GroupIDType } func NewGroupCounter(isIPv6 bool) *groupCounter { @@ -46,33 +47,42 @@ func NewGroupCounter(isIPv6 bool) *groupCounter { if isIPv6 { groupIDCounter = 0x10000000 } - return &groupCounter{groupMap: map[k8sproxy.ServicePortName]binding.GroupIDType{}, groupIDCounter: groupIDCounter} + return &groupCounter{groupMap: map[string]binding.GroupIDType{}, groupIDCounter: groupIDCounter} } -func (c *groupCounter) Get(svcPortName k8sproxy.ServicePortName) (binding.GroupIDType, bool) { +func keyString(svcPortName k8sproxy.ServicePortName, isEndpointsLocal bool) string { + key := svcPortName.String() + if isEndpointsLocal { + key = fmt.Sprintf("%s/local", key) + } + return key +} + +func (c *groupCounter) Get(svcPortName k8sproxy.ServicePortName, isEndpointsLocal bool) (binding.GroupIDType, bool) { c.mu.Lock() defer c.mu.Unlock() - - if id, ok := c.groupMap[svcPortName]; ok { + key := keyString(svcPortName, isEndpointsLocal) + if id, ok := c.groupMap[key]; ok { return id, false } else if len(c.recycled) != 0 { id = c.recycled[len(c.recycled)-1] c.recycled = c.recycled[:len(c.recycled)-1] - c.groupMap[svcPortName] = id + c.groupMap[key] = id return id, true } else { c.groupIDCounter += 1 - c.groupMap[svcPortName] = c.groupIDCounter + c.groupMap[key] = c.groupIDCounter return c.groupIDCounter, true } } -func (c *groupCounter) Recycle(svcPortName k8sproxy.ServicePortName) bool { +func (c *groupCounter) Recycle(svcPortName k8sproxy.ServicePortName, isEndpointsLocal bool) bool { c.mu.Lock() defer c.mu.Unlock() - if id, ok := c.groupMap[svcPortName]; ok { - delete(c.groupMap, svcPortName) + key := keyString(svcPortName, isEndpointsLocal) + if id, ok := c.groupMap[key]; ok { + delete(c.groupMap, key) c.recycled = append(c.recycled, id) return true } diff --git a/pkg/agent/route/interfaces.go b/pkg/agent/route/interfaces.go index 7053b1c6d00..fcfb85c6755 100644 --- a/pkg/agent/route/interfaces.go +++ b/pkg/agent/route/interfaces.go @@ -18,6 +18,7 @@ import ( "net" "antrea.io/antrea/pkg/agent/config" + binding "antrea.io/antrea/pkg/ovs/openflow" ) // Interface is the interface for routing container packets in host network. @@ -51,6 +52,24 @@ type Interface interface { // DeleteSNATRule should delete rule to SNAT outgoing traffic with the mark. DeleteSNATRule(mark uint32) error + // InitServiceProxyConfig adds basic configuration for supporting Service. + InitServiceProxyConfig(isIPv6 bool) error + + // AddNodePort adds configurations when a NodePort Service is created. + AddNodePort(nodePortIPMap map[int][]net.IP, port uint16, protocol binding.Protocol, isIPv6 bool) error + + // DeleteNodePort deletes related configurations when a NodePort Service is deleted. + DeleteNodePort(nodePortIPMap map[int][]net.IP, port uint16, protocol binding.Protocol, isIPv6 bool) error + + // AddClusterIPRoute adds route on K8s node for Service ClusterIP. + AddClusterIPRoute(svcIP net.IP, isIPv6 bool) error + + // AddLoadBalancer adds configurations when a LoadBalancer Service is created. + AddLoadBalancer(externalIPs []string, isIPv6 bool) error + + // DeleteLoadBalancer deletes related configurations when a LoadBalancer Service is deleted. + DeleteLoadBalancer(externalIPs []string, isIPv6 bool) error + // Run starts the sync loop. Run(stopCh <-chan struct{}) } diff --git a/pkg/agent/route/route_linux.go b/pkg/agent/route/route_linux.go index 06cfca5b32d..c80aa1cc147 100644 --- a/pkg/agent/route/route_linux.go +++ b/pkg/agent/route/route_linux.go @@ -35,6 +35,7 @@ import ( "antrea.io/antrea/pkg/agent/util" "antrea.io/antrea/pkg/agent/util/ipset" "antrea.io/antrea/pkg/agent/util/iptables" + binding "antrea.io/antrea/pkg/ovs/openflow" "antrea.io/antrea/pkg/ovs/ovsconfig" "antrea.io/antrea/pkg/util/env" ) @@ -49,12 +50,24 @@ const ( // antreaPodIP6Set contains all IPv6 Pod CIDRs of this cluster. antreaPodIP6Set = "ANTREA-POD-IP6" + // Antrea proxy NodePort IP + antreaNodePortIPSet = "ANTREA-NODEPORT-IP" + antreaNodePortIP6Set = "ANTREA-NODEPORT-IP6" + // Antrea managed iptables chains. antreaForwardChain = "ANTREA-FORWARD" antreaPreRoutingChain = "ANTREA-PREROUTING" antreaPostRoutingChain = "ANTREA-POSTROUTING" antreaOutputChain = "ANTREA-OUTPUT" antreaMangleChain = "ANTREA-MANGLE" + + clusterIPv4FromNodeRouteKey = "ClusterIPv4FromNodeRoute" + clusterIPv6FromNodeRouteKey = "ClusterIPv6FromNodeRoute" + + defaultRouteTable = 0 + + ipv4AddrLength = 32 + ipv6AddrLength = 128 ) // Client implements Interface. @@ -83,17 +96,25 @@ type Client struct { // markToSNATIP caches marks to SNAT IPs. It's used in Egress feature. markToSNATIP sync.Map // iptablesInitialized is used to notify when iptables initialization is done. - iptablesInitialized chan struct{} + iptablesInitialized chan struct{} + defaultRouteInterfaceMap map[int]int + proxyFull bool } // NewClient returns a route client. // TODO: remove param serviceCIDR after kube-proxy is replaced by Antrea Proxy. This param is not used in this file; // leaving it here is to be compatible with the implementation on Windows. -func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSNAT bool) (*Client, error) { +func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSNAT, proxyFull bool) (*Client, error) { + defaultRouteMap, err := util.GetDefaultRouteInterfaces() + if err != nil { + return nil, err + } return &Client{ - serviceCIDR: serviceCIDR, - networkConfig: networkConfig, - noSNAT: noSNAT, + serviceCIDR: serviceCIDR, + networkConfig: networkConfig, + noSNAT: noSNAT, + defaultRouteInterfaceMap: defaultRouteMap, + proxyFull: proxyFull, }, nil } @@ -219,6 +240,17 @@ func (c *Client) syncIPSet() error { } } } + + // If proxy full is enabled, create NodePort ipset. + if c.proxyFull { + if err := ipset.CreateIPSet(antreaNodePortIPSet, ipset.HashIPPort, false); err != nil { + return err + } + if err := ipset.CreateIPSet(antreaNodePortIP6Set, ipset.HashIPPort, true); err != nil { + return err + } + } + return nil } @@ -229,6 +261,14 @@ func getIPSetName(ip net.IP) string { return antreaPodIPSet } +func getNodePortIPSetName(isIPv6 bool) string { + if isIPv6 { + return antreaNodePortIP6Set + } else { + return antreaNodePortIPSet + } +} + // writeEKSMangleRule writes an additional iptables mangle rule to the // iptablesData buffer, which is required to ensure that the reverse path for // NodePort Service traffic is correct on EKS. @@ -269,7 +309,9 @@ func (c *Client) syncIPTables() error { {iptables.RawTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"}, {iptables.RawTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"}, {iptables.FilterTable, iptables.ForwardChain, antreaForwardChain, "Antrea: jump to Antrea forwarding rules"}, + {iptables.NATTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"}, {iptables.NATTable, iptables.PostRoutingChain, antreaPostRoutingChain, "Antrea: jump to Antrea postrouting rules"}, + {iptables.NATTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea outputcd rules"}, {iptables.MangleTable, iptables.PreRoutingChain, antreaMangleChain, "Antrea: jump to Antrea mangle rules"}, // TODO: unify the chain naming style {iptables.MangleTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"}, } @@ -297,7 +339,7 @@ func (c *Client) syncIPTables() error { }) // Use iptables-restore to configure IPv4 settings. if v4Enabled { - iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv4CIDR, antreaPodIPSet, snatMarkToIPv4) + iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv4CIDR, antreaPodIPSet, antreaNodePortIPSet, config.ServiceGWHairpinIPv4, snatMarkToIPv4) // Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. if err := c.ipt.Restore(iptablesData.Bytes(), false, false); err != nil { return err @@ -306,7 +348,7 @@ func (c *Client) syncIPTables() error { // Use ip6tables-restore to configure IPv6 settings. if v6Enabled { - iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv6CIDR, antreaPodIP6Set, snatMarkToIPv6) + iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv6CIDR, antreaPodIP6Set, antreaNodePortIP6Set, config.ServiceGWHairpinIPv6, snatMarkToIPv6) // Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. if err := c.ipt.Restore(iptablesData.Bytes(), false, true); err != nil { return err @@ -315,7 +357,7 @@ func (c *Client) syncIPTables() error { return nil } -func (c *Client) restoreIptablesData(podCIDR *net.IPNet, podIPSet string, snatMarkToIP map[uint32]net.IP) *bytes.Buffer { +func (c *Client) restoreIptablesData(podCIDR *net.IPNet, podIPSet, nodePortIPSet string, serviceGWHairpinIP net.IP, snatMarkToIP map[uint32]net.IP) *bytes.Buffer { // Create required rules in the antrea chains. // Use iptables-restore as it flushes the involved chains and creates the desired rules // with a single call, instead of string matching to clean up stale rules. @@ -395,6 +437,24 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet, podIPSet string, snatMa writeLine(iptablesData, "COMMIT") writeLine(iptablesData, "*nat") + if c.proxyFull { + writeLine(iptablesData, iptables.MakeChainLine(antreaPreRoutingChain)) + writeLine(iptablesData, []string{ + "-A", antreaPreRoutingChain, + "-m", "comment", "--comment", `"Antrea: match NodePort packets"`, + "-m", "set", "--match-set", nodePortIPSet, "dst,dst", + "-j", iptables.DNATTarget, + "--to-destination", serviceGWHairpinIP.String(), + }...) + writeLine(iptablesData, iptables.MakeChainLine(antreaOutputChain)) + writeLine(iptablesData, []string{ + "-A", antreaOutputChain, + "-m", "comment", "--comment", `"Antrea: match NodePort packets"`, + "-m", "set", "--match-set", nodePortIPSet, "dst,dst", + "-j", iptables.DNATTarget, + "--to-destination", serviceGWHairpinIP.String(), + }...) + } writeLine(iptablesData, iptables.MakeChainLine(antreaPostRoutingChain)) // Egress rules must be inserted before the default masquerade rule. for snatMark, snatIP := range snatMarkToIP { @@ -431,6 +491,19 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet, podIPSet string, snatMa "-j", iptables.MasqueradeTarget, "--random-fully", }...) + // If AntreaProxy full support is enabled, for packets of Service whose Endpoint is on host network, the source IP + // will be performed SNAT with ServiceGWHairpinIPv4/ServiceGWHairpinIPv6 within OVS pipeline. When the host Endpoint + // is not on current K8s node, without SNAT another time with iptables, the reply packets will not be routed back to + // current K8s node, as the destination IP is ServiceGWHairpinIPv4/ServiceGWHairpinIPv6. + if c.proxyFull { + writeLine(iptablesData, []string{ + "-A", antreaPostRoutingChain, + "-m", "comment", "--comment", `"Antrea: masquerade Service host network Endpoint traffic"`, + "-s", serviceGWHairpinIP.String(), + "-j", iptables.MasqueradeTarget, + }...) + } + writeLine(iptablesData, "COMMIT") return iptablesData } @@ -476,7 +549,7 @@ func (c *Client) Reconcile(podCIDRs []string) error { } } - // Remove any unknown routes on antrea-gw0. + // Remove any unknown routes on Antrea gateway. routes, err := c.listIPRoutesOnGW() if err != nil { return fmt.Errorf("error listing ip routes: %v", err) @@ -489,13 +562,19 @@ func (c *Client) Reconcile(podCIDRs []string) error { if desiredPodCIDRs.Has(route.Dst.String()) { continue } + // Don't delete the virtual Service IP route which is added by AntreaProxy. + if route.Gw.To4() != nil && route.Gw.Equal(config.ServiceGWHairpinIPv4) || + route.Gw.To16() != nil && route.Gw.Equal(config.ServiceGWHairpinIPv6) || + route.Dst.IP.To16() != nil && route.Dst.IP.Equal(config.ServiceGWHairpinIPv6) { + continue + } klog.Infof("Deleting unknown route %v", route) if err := netlink.RouteDel(&route); err != nil && err != unix.ESRCH { return err } } - // Remove any unknown IPv6 neighbors on antrea-gw0. + // Remove any unknown IPv6 neighbors on Antrea gateway. desiredGWs := getIPv6Gateways(podCIDRs) // Return immediately if there is no IPv6 gateway address configured on the Nodes. if desiredGWs.Len() == 0 { @@ -510,6 +589,10 @@ func (c *Client) Reconcile(podCIDRs []string) error { if desiredGWs.Has(neighIP) { continue } + // Don't delete the virtual Service IP neigh which is added by AntreaProxy. + if actualNeigh.IP.Equal(config.ServiceGWHairpinIPv6) { + continue + } klog.V(4).Infof("Deleting orphaned IPv6 neighbor %v", actualNeigh) if err := netlink.NeighDel(actualNeigh); err != nil { return err @@ -518,7 +601,7 @@ func (c *Client) Reconcile(podCIDRs []string) error { return nil } -// listIPRoutes returns list of routes on antrea-gw0. +// listIPRoutes returns list of routes on Antrea gateway. func (c *Client) listIPRoutesOnGW() ([]netlink.Route, error) { filter := &netlink.Route{ LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex} @@ -773,3 +856,321 @@ func (c *Client) DeleteSNATRule(mark uint32) error { snatIP := value.(net.IP) return c.ipt.DeleteRule(iptables.NATTable, antreaPostRoutingChain, c.snatRuleSpec(snatIP, mark)) } + +// cleanStaleGatewayRoutes is used to clean ClusterIP/LoadBalancer routing entries on host. If Node routing entries are +// created, this function will delete them. Since this function only runs once when AntreaProxy starts, and Node routing +// controller can restore the Node routing entries. +func (c *Client) cleanStaleGatewayRoutes() error { + routes, err := c.listIPRoutesOnGW() + if err != nil { + return fmt.Errorf("error listing ip routes: %v", err) + } + for i := range routes { + route := routes[i] + // Don't delete the route of local pod CIDR. Node route controller cannot restore this routing entry. + if route.Dst.Contains(c.nodeConfig.GatewayConfig.IPv4) || route.Dst.Contains(c.nodeConfig.GatewayConfig.IPv6) { + continue + } + klog.Infof("Deleting unknown route %v", route) + if err = netlink.RouteDel(&route); err != nil && err != unix.ESRCH { + return err + } + } + return nil +} + +// addServiceGWHairpinIPRoute is used to add routing entry which is used to route ServiceGWHairpinIPv4/ServiceGWHairpinIPv6 +// back to Antrea gateway on host. +func (c *Client) addServiceGWHairpinIPRoute(isIPv6 bool) error { + linkIndex := c.nodeConfig.GatewayConfig.LinkIndex + var svcIP *net.IP + var route *netlink.Route + var neigh *netlink.Neigh + var err error + if !isIPv6 { + svcIP = &config.ServiceGWHairpinIPv4 + route, err = generateOnlinkRoute(svcIP, ipv4AddrLength, svcIP, linkIndex, defaultRouteTable) + if err != nil { + return fmt.Errorf("failed to generate route for virtual Service IP %s: %w", svcIP.String(), err) + } + } else { + svcIP = &config.ServiceGWHairpinIPv6 + route, neigh, err = generateIPv6RouteAndNeigh(svcIP, linkIndex) + if err != nil { + return fmt.Errorf("failed to generate route and neigh for virtual Service IP %s: %w", svcIP.String(), err) + } + } + + if isIPv6 { + if err = netlink.NeighSet(neigh); err != nil { + return fmt.Errorf("failed to add new Cluster route neighbor %v to gw %s: %v", neigh, c.nodeConfig.GatewayConfig.Name, err) + } + c.nodeNeighbors.Store(svcIP.String(), neigh) + } + + if err = netlink.RouteReplace(route); err != nil { + return fmt.Errorf("failed to install route for Service IP %s: %w", svcIP.String(), err) + } + klog.Infof("Adding Service hairpin IP route %v", route) + c.nodeRoutes.Store(svcIP.String(), []*netlink.Route{route}) + + return nil +} + +// InitServiceProxyConfig is used to add some configurations for supporting Service. +func (c *Client) InitServiceProxyConfig(isIPv6 bool) error { + if err := c.cleanStaleGatewayRoutes(); err != nil { + return err + } + if err := c.addServiceGWHairpinIPRoute(isIPv6); err != nil { + return err + } + return nil +} + +// AddNodePort is used to add IP,port:protocol entries to target ip set when a NodePort Service is added. An entry is added +// for every NodePort IP. +func (c *Client) AddNodePort(nodePortIPMap map[int][]net.IP, port uint16, protocol binding.Protocol, isIPv6 bool) error { + ipSetName := getNodePortIPSetName(isIPv6) + transProtocol := getTransProtocolStr(protocol) + + for _, addrs := range nodePortIPMap { + for i := range addrs { + ipSetEntry := fmt.Sprintf("%s,%s:%d", addrs[i], transProtocol, port) + if err := ipset.AddEntry(ipSetName, ipSetEntry); err != nil { + return err + } + } + } + + return nil +} + +// DeleteNodePort is used to delete related IP set entries when a NodePort Service is deleted. +func (c *Client) DeleteNodePort(nodePortIPMap map[int][]net.IP, port uint16, protocol binding.Protocol, isIPv6 bool) error { + ipSetName := getNodePortIPSetName(isIPv6) + transProtocol := getTransProtocolStr(protocol) + + for _, addrs := range nodePortIPMap { + for i := range addrs { + ipSetEntry := fmt.Sprintf("%s,%s:%d", addrs[i], transProtocol, port) + if err := ipset.DelEntry(ipSetName, ipSetEntry); err != nil { + return err + } + } + } + + return nil +} + +// AddClusterIPRoute is used to add or update a routing entry which is used to route ClusterIP traffic to Antrea gateway. +func (c *Client) AddClusterIPRoute(svcIP net.IP, isIPv6 bool) error { + routeKey := clusterIPv4FromNodeRouteKey + if isIPv6 { + routeKey = clusterIPv6FromNodeRouteKey + } + + routeVal, exist := c.nodeRoutes.Load(routeKey) + if exist { + curRoute := routeVal.([]*netlink.Route)[0] + // If the route exists, check that whether the route can cover the ClusterIP. + if !curRoute.Dst.Contains(svcIP) { + // If not, generate a new destination ipNet. + newDst, err := util.ExtendCIDRWithIP(curRoute.Dst, svcIP) + if err != nil { + return fmt.Errorf("extend destination route CIDR with error: %v", err) + } + + // Generate a new route with new destination ipNet. + networkMaskPrefix, _ := newDst.Mask.Size() + newRoute, err := generateOnlinkRoute(&newDst.IP, networkMaskPrefix, &curRoute.Gw, curRoute.LinkIndex, defaultRouteTable) + if err != nil { + return fmt.Errorf("failed to generate new route %s", svcIP.String()) + } + // Install new route first. + if err = netlink.RouteReplace(newRoute); err != nil { + return fmt.Errorf("failed to install new route: %w", err) + } + // Remote old route. + if err = netlink.RouteDel(curRoute); err != nil { + return fmt.Errorf("failed to uninstall old route: %w", err) + } + + klog.V(4).Infof("Create route %s to route ClusterIP %v to Antrea gateway", newRoute.Dst.String(), svcIP) + c.nodeRoutes.Store(routeKey, []*netlink.Route{newRoute}) + } else { + klog.V(4).Infof("Current route can route ClusterIP %v to Antrea gateway", svcIP) + } + } else { + // The route doesn't exist, create one. + var mask int + var gw *net.IP + if isIPv6 { + mask = ipv6AddrLength + gw = &config.ServiceGWHairpinIPv6 + } else { + mask = ipv4AddrLength + gw = &config.ServiceGWHairpinIPv4 + } + + linkIndex := c.nodeConfig.GatewayConfig.LinkIndex + route, err := generateOnlinkRoute(&svcIP, mask, gw, linkIndex, defaultRouteTable) + if err != nil { + return fmt.Errorf("failed to generate new route %s", svcIP.String()) + } + if err = netlink.RouteReplace(route); err != nil { + return fmt.Errorf("failed to install new ClusterIP route: %w", err) + } + + c.nodeRoutes.Store(routeKey, []*netlink.Route{route}) + } + + return nil +} + +// addLoadBalancerIngressIPRoute is used to add routing entry which is used to route LoadBalancer ingress IP to Antrea +// gateway on host. +func (c *Client) addLoadBalancerIngressIPRoute(svcIP *net.IP, isIPv6 bool) error { + linkIndex := c.nodeConfig.GatewayConfig.LinkIndex + var gw *net.IP + var mask int + if !isIPv6 { + gw = &config.ServiceGWHairpinIPv4 + mask = ipv4AddrLength + } else { + gw = &config.ServiceGWHairpinIPv6 + mask = ipv6AddrLength + } + + route, err := generateOnlinkRoute(svcIP, mask, gw, linkIndex, defaultRouteTable) + if err != nil { + return fmt.Errorf("failed to generate routing entry for LoadBalancer ingress IP %s: %w", svcIP.String(), err) + } + if err = netlink.RouteReplace(route); err != nil { + return fmt.Errorf("failed to install routing entry for LoadBalancer ingress IP %s: %w", svcIP.String(), err) + } + klog.V(4).Infof("Adding LoadBalancer ingress IP route %v", route) + c.nodeRoutes.Store(svcIP.String(), []*netlink.Route{route}) + + return nil +} + +// deleteLoadBalancerIngressIPRoute is used to delete routing entry which is used to route LoadBalancer ingress IP to Antrea +// gateway on host. +func (c *Client) deleteLoadBalancerIngressIPRoute(svcIP *net.IP, isIPv6 bool) error { + linkIndex := c.nodeConfig.GatewayConfig.LinkIndex + var gw *net.IP + var mask int + if !isIPv6 { + gw = &config.ServiceGWHairpinIPv4 + mask = 32 + } else { + gw = &config.ServiceGWHairpinIPv6 + mask = 128 + } + + route, err := generateOnlinkRoute(svcIP, mask, gw, linkIndex, defaultRouteTable) + if err != nil { + return fmt.Errorf("failed to generate routing entry for LoadBalancer ingress IP %s: %w", svcIP.String(), err) + } + if err = netlink.RouteDel(route); err != nil { + return fmt.Errorf("failed to delete routing entry for LoadBalancer ingress IP %s: %w", svcIP.String(), err) + } + klog.V(4).Infof("Deleting LoadBalancer ingress IP route %v", route) + c.nodeRoutes.Delete(svcIP.String()) + + return nil +} + +// AddLoadBalancer is used to add Linux TC filters and routing entries when a LoadBalancer Service is added. +func (c *Client) AddLoadBalancer(externalIPs []string, isIPv6 bool) error { + var svcIPs []net.IP + for _, svcIPStr := range externalIPs { + svcIPs = append(svcIPs, net.ParseIP(svcIPStr)) + } + + for i := range svcIPs { + if err := c.addLoadBalancerIngressIPRoute(&svcIPs[i], isIPv6); err != nil { + return err + } + } + + return nil +} + +// DeleteLoadBalancer is used to delete related Linux TC filters and routing entries when a LoadBalancer Service is deleted. +func (c *Client) DeleteLoadBalancer(externalIPs []string, isIPv6 bool) error { + var svcIPs []net.IP + for _, svcIPStr := range externalIPs { + svcIPs = append(svcIPs, net.ParseIP(svcIPStr)) + } + + for i := range svcIPs { + if err := c.deleteLoadBalancerIngressIPRoute(&svcIPs[i], isIPv6); err != nil { + return err + } + } + + return nil +} + +func getTransProtocolStr(protocol binding.Protocol) string { + if protocol == binding.ProtocolTCP || protocol == binding.ProtocolTCPv6 { + return "tcp" + } else if protocol == binding.ProtocolUDP || protocol == binding.ProtocolUDPv6 { + return "udp" + } else if protocol == binding.ProtocolSCTP || protocol == binding.ProtocolSCTPv6 { + return "tscp" + } + return "" +} + +func generateOnlinkRoute(ip *net.IP, mask int, gw *net.IP, linkIndex int, table int) (*netlink.Route, error) { + addrBits := ipv4AddrLength + if ip.To4() != nil { + if gw.To4() == nil { + return nil, fmt.Errorf("gateway %s is not an valid IPv4 address", gw.String()) + } + if mask > ipv4AddrLength { + return nil, fmt.Errorf("network mask should be less or equal to 32 as %s is an IPv4 address", ip.String()) + } + } else { + if mask > ipv6AddrLength { + return nil, fmt.Errorf("network mask should be less or equal to 32 as %s is an IPv6 address", ip.String()) + } + addrBits = ipv6AddrLength + } + + route := &netlink.Route{ + Dst: &net.IPNet{ + IP: *ip, + Mask: net.CIDRMask(mask, addrBits), + }, + Gw: *gw, + Flags: int(netlink.FLAG_ONLINK), + LinkIndex: linkIndex, + Table: table, + } + return route, nil +} + +func generateIPv6RouteAndNeigh(ip *net.IP, linkIndex int) (*netlink.Route, *netlink.Neigh, error) { + if ip.To16() == nil { + return nil, nil, fmt.Errorf("%s is not an IPv6 address", ip.String()) + } + route := &netlink.Route{ + Dst: &net.IPNet{ + IP: *ip, + Mask: net.CIDRMask(ipv6AddrLength, ipv6AddrLength), + }, + LinkIndex: linkIndex, + } + neigh := &netlink.Neigh{ + LinkIndex: linkIndex, + Family: netlink.FAMILY_V6, + State: netlink.NUD_PERMANENT, + IP: *ip, + HardwareAddr: globalVMAC, + } + return route, neigh, nil +} diff --git a/pkg/agent/route/route_windows.go b/pkg/agent/route/route_windows.go index 605e51eadf3..4b9283eda43 100644 --- a/pkg/agent/route/route_windows.go +++ b/pkg/agent/route/route_windows.go @@ -28,6 +28,7 @@ import ( "antrea.io/antrea/pkg/agent/config" "antrea.io/antrea/pkg/agent/util" "antrea.io/antrea/pkg/agent/util/winfirewall" + binding "antrea.io/antrea/pkg/ovs/openflow" ) const ( @@ -43,17 +44,19 @@ type Client struct { fwClient *winfirewall.Client bridgeInfIndex int noSNAT bool + proxyFull bool } // NewClient returns a route client. // Todo: remove param serviceCIDR after kube-proxy is replaced by Antrea Proxy completely. -func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSNAT bool) (*Client, error) { +func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSNAT, proxyFull bool) (*Client, error) { return &Client{ networkConfig: networkConfig, serviceCIDR: serviceCIDR, hostRoutes: &sync.Map{}, fwClient: winfirewall.NewClient(), noSNAT: noSNAT, + proxyFull: proxyFull, }, nil } @@ -245,3 +248,27 @@ func (c *Client) AddSNATRule(snatIP net.IP, mark uint32) error { func (c *Client) DeleteSNATRule(mark uint32) error { return nil } + +func (c *Client) InitServiceProxyConfig(isIPv6 bool) error { + return nil +} + +func (c *Client) AddNodePort(nodePortIPMap map[int][]net.IP, port uint16, protocol binding.Protocol, isIPv6 bool) error { + return nil +} + +func (c *Client) DeleteNodePort(nodePortIPMap map[int][]net.IP, port uint16, protocol binding.Protocol, isIPv6 bool) error { + return nil +} + +func (c *Client) AddClusterIPRoute(svcIP net.IP, isIPv6 bool) error { + return nil +} + +func (c *Client) AddLoadBalancer(externalIPs []string, isIPv6 bool) error { + return nil +} + +func (c *Client) DeleteLoadBalancer(externalIPs []string, isIPv6 bool) error { + return nil +} diff --git a/pkg/agent/route/route_windows_test.go b/pkg/agent/route/route_windows_test.go index 5c855656d37..e8253eccd18 100644 --- a/pkg/agent/route/route_windows_test.go +++ b/pkg/agent/route/route_windows_test.go @@ -50,7 +50,7 @@ func TestRouteOperation(t *testing.T) { gwIP2 := net.ParseIP("192.168.3.1") _, destCIDR2, _ := net.ParseCIDR(dest2) - client, err := NewClient(serviceCIDR, &config.NetworkConfig{}, true) + client, err := NewClient(serviceCIDR, &config.NetworkConfig{}, true, false) require.Nil(t, err) nodeConfig := &config.NodeConfig{ OVSBridge: "Loopback Pseudo-Interface 1", diff --git a/pkg/agent/route/testing/mock_route.go b/pkg/agent/route/testing/mock_route.go index e7287d94e28..d23dc0bfd82 100644 --- a/pkg/agent/route/testing/mock_route.go +++ b/pkg/agent/route/testing/mock_route.go @@ -21,6 +21,7 @@ package testing import ( config "antrea.io/antrea/pkg/agent/config" + openflow "antrea.io/antrea/pkg/ovs/openflow" gomock "github.com/golang/mock/gomock" net "net" reflect "reflect" @@ -49,6 +50,48 @@ func (m *MockInterface) EXPECT() *MockInterfaceMockRecorder { return m.recorder } +// AddClusterIPRoute mocks base method +func (m *MockInterface) AddClusterIPRoute(arg0 net.IP, arg1 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddClusterIPRoute", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddClusterIPRoute indicates an expected call of AddClusterIPRoute +func (mr *MockInterfaceMockRecorder) AddClusterIPRoute(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddClusterIPRoute", reflect.TypeOf((*MockInterface)(nil).AddClusterIPRoute), arg0, arg1) +} + +// AddLoadBalancer mocks base method +func (m *MockInterface) AddLoadBalancer(arg0 []string, arg1 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddLoadBalancer", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddLoadBalancer indicates an expected call of AddLoadBalancer +func (mr *MockInterfaceMockRecorder) AddLoadBalancer(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddLoadBalancer", reflect.TypeOf((*MockInterface)(nil).AddLoadBalancer), arg0, arg1) +} + +// AddNodePort mocks base method +func (m *MockInterface) AddNodePort(arg0 map[int][]net.IP, arg1 uint16, arg2 openflow.Protocol, arg3 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddNodePort", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddNodePort indicates an expected call of AddNodePort +func (mr *MockInterfaceMockRecorder) AddNodePort(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddNodePort", reflect.TypeOf((*MockInterface)(nil).AddNodePort), arg0, arg1, arg2, arg3) +} + // AddRoutes mocks base method func (m *MockInterface) AddRoutes(arg0 *net.IPNet, arg1 string, arg2, arg3 net.IP) error { m.ctrl.T.Helper() @@ -77,6 +120,34 @@ func (mr *MockInterfaceMockRecorder) AddSNATRule(arg0, arg1 interface{}) *gomock return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddSNATRule", reflect.TypeOf((*MockInterface)(nil).AddSNATRule), arg0, arg1) } +// DeleteLoadBalancer mocks base method +func (m *MockInterface) DeleteLoadBalancer(arg0 []string, arg1 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteLoadBalancer", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteLoadBalancer indicates an expected call of DeleteLoadBalancer +func (mr *MockInterfaceMockRecorder) DeleteLoadBalancer(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteLoadBalancer", reflect.TypeOf((*MockInterface)(nil).DeleteLoadBalancer), arg0, arg1) +} + +// DeleteNodePort mocks base method +func (m *MockInterface) DeleteNodePort(arg0 map[int][]net.IP, arg1 uint16, arg2 openflow.Protocol, arg3 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteNodePort", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteNodePort indicates an expected call of DeleteNodePort +func (mr *MockInterfaceMockRecorder) DeleteNodePort(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteNodePort", reflect.TypeOf((*MockInterface)(nil).DeleteNodePort), arg0, arg1, arg2, arg3) +} + // DeleteRoutes mocks base method func (m *MockInterface) DeleteRoutes(arg0 *net.IPNet) error { m.ctrl.T.Helper() @@ -105,6 +176,20 @@ func (mr *MockInterfaceMockRecorder) DeleteSNATRule(arg0 interface{}) *gomock.Ca return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteSNATRule", reflect.TypeOf((*MockInterface)(nil).DeleteSNATRule), arg0) } +// InitServiceProxyConfig mocks base method +func (m *MockInterface) InitServiceProxyConfig(arg0 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "InitServiceProxyConfig", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// InitServiceProxyConfig indicates an expected call of InitServiceProxyConfig +func (mr *MockInterfaceMockRecorder) InitServiceProxyConfig(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InitServiceProxyConfig", reflect.TypeOf((*MockInterface)(nil).InitServiceProxyConfig), arg0) +} + // Initialize mocks base method func (m *MockInterface) Initialize(arg0 *config.NodeConfig, arg1 func()) error { m.ctrl.T.Helper() diff --git a/pkg/agent/util/ipset/ipset.go b/pkg/agent/util/ipset/ipset.go index 4bfd933f43a..0f13104a111 100644 --- a/pkg/agent/util/ipset/ipset.go +++ b/pkg/agent/util/ipset/ipset.go @@ -26,8 +26,9 @@ type SetType string const ( // The hash:net set type uses a hash to store different sized IP network addresses. // The lookup time grows linearly with the number of the different prefix values added to the set. - HashNet SetType = "hash:net" - HashIP SetType = "hash:ip" + HashNet SetType = "hash:net" + HashIP SetType = "hash:ip" + HashIPPort SetType = "hash:ip,port" ) // memberPattern is used to match the members part of ipset list result. diff --git a/pkg/agent/util/iptables/iptables.go b/pkg/agent/util/iptables/iptables.go index 2c6a3fb74b4..4d4e1934623 100644 --- a/pkg/agent/util/iptables/iptables.go +++ b/pkg/agent/util/iptables/iptables.go @@ -40,6 +40,7 @@ const ( ConnTrackTarget = "CT" NoTrackTarget = "NOTRACK" SNATTarget = "SNAT" + DNATTarget = "DNAT" PreRoutingChain = "PREROUTING" ForwardChain = "FORWARD" diff --git a/pkg/agent/util/net.go b/pkg/agent/util/net.go index 9fcd7fabc1e..6a490261f96 100644 --- a/pkg/agent/util/net.go +++ b/pkg/agent/util/net.go @@ -169,3 +169,102 @@ func GetIPWithFamily(ips []net.IP, addrFamily uint8) (net.IP, error) { return nil, errors.New("no IP found with IPv4 AddressFamily") } } + +// GetAllNodeIPs gets all Node IP addresses (not including IPv6 link local address). +func GetAllNodeIPs() (map[int][]net.IP, map[int][]net.IP, error) { + nodeIPv4Map := make(map[int][]net.IP) + nodeIPv6Map := make(map[int][]net.IP) + _, ipv6LinkLocalNet, _ := net.ParseCIDR("fe80::/64") + + // Get all interfaces. + interfaces, err := net.Interfaces() + if err != nil { + return nil, nil, err + } + for _, itf := range interfaces { + // Get all IPs of every interface + ifIndex := itf.Index + addrs, err := itf.Addrs() + if err != nil { + return nil, nil, err + } + for _, addr := range addrs { + ip, _, _ := net.ParseCIDR(addr.String()) + if ipv6LinkLocalNet.Contains(ip) { + continue // Skip IPv6 link local address + } + + if ip.To4() != nil { + nodeIPv4Map[ifIndex] = append(nodeIPv4Map[ifIndex], ip) + } else { + nodeIPv6Map[ifIndex] = append(nodeIPv6Map[ifIndex], ip) + } + } + } + return nodeIPv4Map, nodeIPv6Map, nil +} + +// ExtendCIDRWithIP is used for extending an IPNet with an IP. +func ExtendCIDRWithIP(ipNet *net.IPNet, ip net.IP) (*net.IPNet, error) { + cpl := commonPrefixLen(ipNet.IP, ip) + if cpl == 0 { + return nil, fmt.Errorf("invalid common prefix length") + } + _, newIpNet, err := net.ParseCIDR(fmt.Sprintf("%s/%d", ipNet.IP.String(), cpl)) + if err != nil { + return nil, err + } + return newIpNet, nil +} + +/* +This is copied from net/addrselect.go as this function cannot be used outside of standard lib net. +Modifies: +- Replace argument type IP with type net.IP. +*/ +func commonPrefixLen(a, b net.IP) (cpl int) { + if a4 := a.To4(); a4 != nil { + a = a4 + } + if b4 := b.To4(); b4 != nil { + b = b4 + } + if len(a) != len(b) { + return 0 + } + // If IPv6, only up to the prefix (first 64 bits) + if len(a) > 8 { + a = a[:8] + b = b[:8] + } + for len(a) > 0 { + if a[0] == b[0] { + cpl += 8 + a = a[1:] + b = b[1:] + continue + } + bits := 8 + ab, bb := a[0], b[0] + for { + ab >>= 1 + bb >>= 1 + bits-- + if ab == bb { + cpl += bits + return + } + } + } + return +} + +func GetNameByIndex(ifIndex int) string { + dev, _ := net.InterfaceByIndex(ifIndex) + return dev.Name +} + +func GetIndexByName(name string) int { + dev, _ := net.InterfaceByName(name) + return dev.Index +} diff --git a/pkg/agent/util/net_linux.go b/pkg/agent/util/net_linux.go index 54f5d736149..854a8cd763a 100644 --- a/pkg/agent/util/net_linux.go +++ b/pkg/agent/util/net_linux.go @@ -221,3 +221,50 @@ func ListenLocalSocket(address string) (net.Listener, error) { func DialLocalSocket(address string) (net.Conn, error) { return dialUnix(address) } + +// GetDefaultRouteInterfaces gets the output interfaces of default IPv4 and IPv6 route with highest priority (with +// lowest metric value) +func GetDefaultRouteInterfaces() (map[int]int, error) { + defaultRouteInterfaces := make(map[int]int) + + var defaultRouteIPv4, defaultRouteIPv6 *netlink.Route + routesIPv4, err := netlink.RouteList(nil, netlink.FAMILY_V4) + if err != nil { + return nil, err + } + for i := range routesIPv4 { + if routesIPv4[i].Dst == nil && (defaultRouteIPv4 == nil || routesIPv4[i].Priority < defaultRouteIPv4.Priority) { + defaultRouteIPv4 = &routesIPv4[i] + defaultRouteInterfaces[netlink.FAMILY_V4] = routesIPv4[i].LinkIndex + } + } + if defaultRouteIPv4 != nil { + klog.Infof("Chosen IPv4 default route output interface index is %v", defaultRouteIPv4.LinkIndex) + } + + routesIPv6, err := netlink.RouteList(nil, netlink.FAMILY_V6) + if err != nil { + return nil, err + } + for i := range routesIPv6 { + if routesIPv6[i].Dst == nil && (defaultRouteIPv6 == nil || routesIPv6[i].Priority < defaultRouteIPv6.Priority) { + defaultRouteIPv6 = &routesIPv6[i] + if routesIPv6[i].LinkIndex == 0 { + // If there are multiple default output interfaces, choose a output interface. + for j := range routesIPv6[i].MultiPath { + if routesIPv6[i].MultiPath[j].LinkIndex != 0 { + defaultRouteInterfaces[netlink.FAMILY_V6] = routesIPv6[i].MultiPath[j].LinkIndex + break + } + } + } else { + defaultRouteInterfaces[netlink.FAMILY_V6] = routesIPv6[i].LinkIndex + } + } + } + if defaultRouteIPv6 != nil { + klog.Infof("Chosen IPv6 default route output interface index is %v", defaultRouteIPv6.LinkIndex) + } + + return defaultRouteInterfaces, nil +} diff --git a/pkg/ovs/openflow/interfaces.go b/pkg/ovs/openflow/interfaces.go index b6abef24603..4537328294f 100644 --- a/pkg/ovs/openflow/interfaces.go +++ b/pkg/ovs/openflow/interfaces.go @@ -71,6 +71,10 @@ const ( NxmFieldIPToS = "NXM_OF_IP_TOS" NxmFieldXXReg = "NXM_NX_XXREG" NxmFieldPktMark = "NXM_NX_PKT_MARK" + NxmFieldSrcIPv4 = "NXM_OF_IP_SRC" + NxmFieldDstIPv4 = "NXM_OF_IP_DST" + NxmFieldSrcIPv6 = "NXM_NX_IPV6_SRC" + NxmFieldDstIPv6 = "NXM_NX_IPV6_DST" ) const ( @@ -275,21 +279,34 @@ type LearnAction interface { DeleteLearned() LearnAction MatchEthernetProtocolIP(isIPv6 bool) LearnAction MatchTransportDst(protocol Protocol) LearnAction + MatchTransportDstAsSrc(protocol Protocol) LearnAction + MatchNetworkSrcAsDst(isIPv6 bool) LearnAction MatchLearnedTCPDstPort() LearnAction MatchLearnedUDPDstPort() LearnAction MatchLearnedSCTPDstPort() LearnAction MatchLearnedTCPv6DstPort() LearnAction MatchLearnedUDPv6DstPort() LearnAction MatchLearnedSCTPv6DstPort() LearnAction + MatchLearnedTCPDstPortAsSrcPort() LearnAction + MatchLearnedUDPDstPortAsSrcPort() LearnAction + MatchLearnedSCTPDstPortAsSrcPort() LearnAction + MatchLearnedTCPv6DstPortAsSrcPort() LearnAction + MatchLearnedUDPv6DstPortAsSrcPort() LearnAction + MatchLearnedSCTPv6DstPortAsSrcPort() LearnAction MatchLearnedSrcIP() LearnAction MatchLearnedDstIP() LearnAction MatchLearnedSrcIPv6() LearnAction MatchLearnedDstIPv6() LearnAction + MatchLearnedSrcIPAsDstIP() LearnAction + MatchLearnedDstIPAsSrcIP() LearnAction + MatchLearnedSrcIPv6AsDstIPv6() LearnAction + MatchLearnedDstIPv6AsSrcIPv6() LearnAction MatchReg(regID int, data uint32, rng Range) LearnAction LoadReg(regID int, data uint32, rng Range) LearnAction LoadRegToReg(fromRegID, toRegID int, fromRng, toRng Range) LearnAction LoadXXRegToXXReg(fromRegID, toRegID int, fromRng, toRng Range) LearnAction SetDstMAC(mac net.HardwareAddr) LearnAction + SetLearnedSrcMACAsDstMAC() LearnAction Done() FlowBuilder } diff --git a/pkg/ovs/openflow/ofctrl_action.go b/pkg/ovs/openflow/ofctrl_action.go index 4308188db6b..b9f65bb61c8 100644 --- a/pkg/ovs/openflow/ofctrl_action.go +++ b/pkg/ovs/openflow/ofctrl_action.go @@ -409,6 +409,47 @@ func (a *ofLearnAction) MatchTransportDst(protocol Protocol) LearnAction { return a } +// MatchTransportDstAsSrc specifies that the transport layer destination field +// {tcp|udp|sctp}_src in the learned flow must match the {tcp|udp|sctp}_dst field +// of the packet currently being processed. It only accepts ProtocolTCP, ProtocolUDP, or +// ProtocolSCTP, otherwise this does nothing. +func (a *ofLearnAction) MatchTransportDstAsSrc(protocol Protocol) LearnAction { + var ipProtoValue int + isIPv6 := false + switch protocol { + case ProtocolTCP: + ipProtoValue = ofctrl.IP_PROTO_TCP + case ProtocolUDP: + ipProtoValue = ofctrl.IP_PROTO_UDP + case ProtocolSCTP: + ipProtoValue = ofctrl.IP_PROTO_SCTP + case ProtocolTCPv6: + ipProtoValue = ofctrl.IP_PROTO_TCP + isIPv6 = true + case ProtocolUDPv6: + ipProtoValue = ofctrl.IP_PROTO_UDP + isIPv6 = true + case ProtocolSCTPv6: + ipProtoValue = ofctrl.IP_PROTO_SCTP + isIPv6 = true + default: + // Return directly if the protocol is not acceptable. + return a + } + + a.MatchEthernetProtocolIP(isIPv6) + ipTypeVal := make([]byte, 2) + ipTypeVal[1] = byte(ipProtoValue) + a.nxLearn.AddMatch(&ofctrl.LearnField{Name: "NXM_OF_IP_PROTO"}, 1*8, nil, ipTypeVal) + // OXM_OF fields support TCP, UDP and SCTP, but NXM_OF fields only support TCP and UDP. So here using "OXM_OF_" to + // generate the field name. + trimProtocol := strings.ReplaceAll(string(protocol), "v6", "") + learnFieldName := fmt.Sprintf("OXM_OF_%s_SRC", strings.ToUpper(trimProtocol)) + fromFieldName := fmt.Sprintf("OXM_OF_%s_DST", strings.ToUpper(trimProtocol)) + a.nxLearn.AddMatch(&ofctrl.LearnField{Name: learnFieldName}, 2*8, &ofctrl.LearnField{Name: fromFieldName}, nil) + return a +} + // MatchLearnedTCPDstPort specifies that the tcp_dst field in the learned flow // must match the tcp_dst of the packet currently being processed. func (a *ofLearnAction) MatchLearnedTCPDstPort() LearnAction { @@ -445,6 +486,42 @@ func (a *ofLearnAction) MatchLearnedSCTPv6DstPort() LearnAction { return a.MatchTransportDst(ProtocolSCTPv6) } +// MatchLearnedTCPDstPortAsSrcPort specifies that the tcp_src field in the learned flow +// must match the tcp_dst of the packet currently being processed. +func (a *ofLearnAction) MatchLearnedTCPDstPortAsSrcPort() LearnAction { + return a.MatchTransportDstAsSrc(ProtocolTCP) +} + +// MatchLearnedTCPv6DstPortAsSrcPort specifies that the tcp_src field in the learned flow +// must match the tcp_dst of the packet currently being processed. +func (a *ofLearnAction) MatchLearnedTCPv6DstPortAsSrcPort() LearnAction { + return a.MatchTransportDstAsSrc(ProtocolTCPv6) +} + +// MatchLearnedUDPDstPortAsSrcPort specifies that the udp_src field in the learned flow +// must match the udp_dst of the packet currently being processed. +func (a *ofLearnAction) MatchLearnedUDPDstPortAsSrcPort() LearnAction { + return a.MatchTransportDstAsSrc(ProtocolUDP) +} + +// MatchLearnedUDPv6DstPortAsSrcPort specifies that the udp_src field in the learned flow +// must match the udp_dst of the packet currently being processed. +func (a *ofLearnAction) MatchLearnedUDPv6DstPortAsSrcPort() LearnAction { + return a.MatchTransportDstAsSrc(ProtocolUDPv6) +} + +// MatchLearnedSCTPDstPortAsSrcPort specifies that the sctp_src field in the learned flow +// must match the sctp_dst of the packet currently being processed. +func (a *ofLearnAction) MatchLearnedSCTPDstPortAsSrcPort() LearnAction { + return a.MatchTransportDstAsSrc(ProtocolSCTP) +} + +// MatchLearnedSCTPv6DstPortAsSrcPort specifies that the sctp_src field in the learned flow +// must match the sctp_dst of the packet currently being processed. +func (a *ofLearnAction) MatchLearnedSCTPv6DstPortAsSrcPort() LearnAction { + return a.MatchTransportDstAsSrc(ProtocolSCTPv6) +} + // MatchLearnedSrcIP makes the learned flow to match the nw_src of current IP packet. func (a *ofLearnAction) MatchLearnedSrcIP() LearnAction { a.nxLearn.AddMatch(&ofctrl.LearnField{Name: "NXM_OF_IP_SRC"}, 4*8, &ofctrl.LearnField{Name: "NXM_OF_IP_SRC"}, nil) @@ -469,6 +546,45 @@ func (a *ofLearnAction) MatchLearnedDstIPv6() LearnAction { return a } +// MatchLearnedSrcIPAsDstIP makes the learned flow to match the nw_dst of current IP packet's nw_src. +func (a *ofLearnAction) MatchLearnedSrcIPAsDstIP() LearnAction { + a.nxLearn.AddMatch(&ofctrl.LearnField{Name: "NXM_OF_IP_DST"}, 4*8, &ofctrl.LearnField{Name: "NXM_OF_IP_SRC"}, nil) + return a +} + +// MatchLearnedDstIPAsSrcIP makes the learned flow to match the nw_src of current IP packet's nw_dst. +func (a *ofLearnAction) MatchLearnedDstIPAsSrcIP() LearnAction { + a.nxLearn.AddMatch(&ofctrl.LearnField{Name: "NXM_OF_IP_SRC"}, 4*8, &ofctrl.LearnField{Name: "NXM_OF_IP_DST"}, nil) + return a +} + +// MatchLearnedSrcIPv6AsDstIPv6 makes the learned flow to match the ipv6_dst of current IPv6 packet's ipv6_src. +func (a *ofLearnAction) MatchLearnedSrcIPv6AsDstIPv6() LearnAction { + a.nxLearn.AddMatch(&ofctrl.LearnField{Name: "NXM_NX_IPV6_DST"}, 16*8, &ofctrl.LearnField{Name: "NXM_NX_IPV6_SRC"}, nil) + return a +} + +// MatchLearnedDstIPv6AsSrcIPv6 makes the learned flow to match the ipv6_src of current IPv6 packet's ipv6_dst. +func (a *ofLearnAction) MatchLearnedDstIPv6AsSrcIPv6() LearnAction { + a.nxLearn.AddMatch(&ofctrl.LearnField{Name: "NXM_NX_IPV6_SRC"}, 16*8, &ofctrl.LearnField{Name: "NXM_NX_IPV6_DST"}, nil) + return a +} + +// MatchNetworkSrcAsDst makes the learned flow to match the network nw_src/ipv6_src of current packet's nw_dst/ipv6_dst. +func (a *ofLearnAction) MatchNetworkSrcAsDst(isIPv6 bool) LearnAction { + learnBits := uint16(32) + from := "NXM_OF_IP_SRC" + to := "NXM_OF_IP_DST" + if isIPv6 { + learnBits = 128 + from = "NXM_NX_IPV6_SRC" + to = "NXM_NX_IPV6_DST" + } + a.MatchEthernetProtocolIP(isIPv6) + a.nxLearn.AddMatch(&ofctrl.LearnField{Name: to}, learnBits, &ofctrl.LearnField{Name: from}, nil) + return a +} + // MatchReg makes the learned flow to match the data in the reg of specific range. func (a *ofLearnAction) MatchReg(regID int, data uint32, rng Range) LearnAction { toField := &ofctrl.LearnField{Name: fmt.Sprintf("NXM_NX_REG%d", regID), Start: uint16(rng[0])} @@ -531,6 +647,13 @@ func (a *ofLearnAction) SetDstMAC(mac net.HardwareAddr) LearnAction { return a } +func (a *ofLearnAction) SetLearnedSrcMACAsDstMAC() LearnAction { + toField := &ofctrl.LearnField{Name: "NXM_OF_ETH_DST"} + fromField := &ofctrl.LearnField{Name: "NXM_OF_ETH_SRC"} + a.nxLearn.AddLoadAction(toField, 48, fromField, nil) + return a +} + func (a *ofLearnAction) Done() FlowBuilder { a.flowBuilder.ApplyAction(a.nxLearn) return a.flowBuilder diff --git a/test/e2e/bandwidth_test.go b/test/e2e/bandwidth_test.go index 0c82e1776af..470997440ab 100644 --- a/test/e2e/bandwidth_test.go +++ b/test/e2e/bandwidth_test.go @@ -91,7 +91,7 @@ func testBenchmarkBandwidthIntraNode(t *testing.T, data *TestData) { } func benchmarkBandwidthService(t *testing.T, endpointNode, clientNode string, data *TestData) { - svc, err := data.createService("perftest-b", iperfPort, iperfPort, map[string]string{"antrea-e2e": "perftest-b"}, false, v1.ServiceTypeClusterIP, nil) + svc, err := data.createService("perftest-b", iperfPort, iperfPort, map[string]string{"antrea-e2e": "perftest-b"}, false, false, v1.ServiceTypeClusterIP, nil) if err != nil { t.Fatalf("Error when creating perftest service: %v", err) } diff --git a/test/e2e/basic_test.go b/test/e2e/basic_test.go index 57497ec6bab..4828a22f5ce 100644 --- a/test/e2e/basic_test.go +++ b/test/e2e/basic_test.go @@ -26,12 +26,14 @@ import ( "github.com/google/uuid" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/util/wait" "antrea.io/antrea/pkg/agent/apiserver/handlers/podinterface" "antrea.io/antrea/pkg/agent/config" "antrea.io/antrea/pkg/agent/openflow/cookie" "antrea.io/antrea/pkg/clusteridentity" + "antrea.io/antrea/pkg/features" ) // TestBasic is the top-level test which contains some subtests for @@ -384,7 +386,11 @@ func testReconcileGatewayRoutesOnStartup(t *testing.T, data *TestData, isIPv6 bo continue } route := Route{} - if _, route.peerPodCIDR, err = net.ParseCIDR(matches[1]); err != nil { + m1 := matches[1] + if !strings.Contains(m1, "/") { + m1 = m1 + "/32" + } + if _, route.peerPodCIDR, err = net.ParseCIDR(m1); err != nil { return nil, fmt.Errorf("%s is not a valid net CIDR", matches[1]) } if route.peerPodGW = net.ParseIP(matches[2]); route.peerPodGW == nil { @@ -402,6 +408,21 @@ func testReconcileGatewayRoutesOnStartup(t *testing.T, data *TestData, isIPv6 bo } else if encapMode == config.TrafficEncapModeHybrid { expectedRtNumMin = 1 } + agentFeatures, err := GetAgentFeatures() + require.NoError(t, err) + + if agentFeatures.Enabled(features.AntreaProxy) { + isProxyFull, err := data.IsProxyFull() + if err != nil { + t.Fatalf("Error getting option antreaProxyFull value") + } + if isProxyFull { + if !isIPv6 { + expectedRtNumMin += 2 + } + expectedRtNumMax += 2 + } + } t.Logf("Retrieving gateway routes on Node '%s'", nodeName) var routes []Route diff --git a/test/e2e/flowaggregator_test.go b/test/e2e/flowaggregator_test.go index f96db55a82d..732122da873 100644 --- a/test/e2e/flowaggregator_test.go +++ b/test/e2e/flowaggregator_test.go @@ -999,12 +999,12 @@ func createPerftestServices(data *TestData, isIPv6 bool) (svcB *corev1.Service, svcIPFamily = corev1.IPv6Protocol } - svcB, err = data.createService("perftest-b", iperfPort, iperfPort, map[string]string{"antrea-e2e": "perftest-b"}, false, corev1.ServiceTypeClusterIP, &svcIPFamily) + svcB, err = data.createService("perftest-b", iperfPort, iperfPort, map[string]string{"antrea-e2e": "perftest-b"}, false, false, corev1.ServiceTypeClusterIP, &svcIPFamily) if err != nil { return nil, nil, fmt.Errorf("Error when creating perftest-b Service: %v", err) } - svcC, err = data.createService("perftest-c", iperfPort, iperfPort, map[string]string{"antrea-e2e": "perftest-c"}, false, corev1.ServiceTypeClusterIP, &svcIPFamily) + svcC, err = data.createService("perftest-c", iperfPort, iperfPort, map[string]string{"antrea-e2e": "perftest-c"}, false, false, corev1.ServiceTypeClusterIP, &svcIPFamily) if err != nil { return nil, nil, fmt.Errorf("Error when creating perftest-c Service: %v", err) } diff --git a/test/e2e/framework.go b/test/e2e/framework.go index dcaaf3a91d4..dac31c877e6 100644 --- a/test/e2e/framework.go +++ b/test/e2e/framework.go @@ -105,6 +105,7 @@ const ( nginxImage = "projects.registry.vmware.com/antrea/nginx" perftoolImage = "projects.registry.vmware.com/antrea/perftool" ipfixCollectorImage = "projects.registry.vmware.com/antrea/ipfix-collector:v0.5.4" + echoServerImage = "k8s.gcr.io/echoserver:1.10" ipfixCollectorPort = "4739" nginxLBService = "nginx-loadbalancer" @@ -979,6 +980,30 @@ func (data *TestData) createNginxPodOnNode(name string, ns string, nodeName stri }, false, nil) } +// createEchoServerPodOnNode creates a Pod in the test namespace with a single echoserver container. The +// Pod will be scheduled on the specified Node (if nodeName is not empty). +func (data *TestData) createEchoServerPodOnNode(name string, nodeName string, hostNetwork bool) error { + return data.createPodOnNode(name, testNamespace, nodeName, echoServerImage, []string{}, nil, nil, []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: 8080, + Protocol: corev1.ProtocolTCP, + }, + }, hostNetwork, nil) +} + +// createNginxPodOnNode2 creates a Pod in the test namespace with a single nginx container. The +// Pod will be scheduled on the specified Node (if nodeName is not empty). +func (data *TestData) createNginxPodOnNodeV2(name string, nodeName string, hostNetwork bool) error { + return data.createPodOnNode(name, testNamespace, nodeName, nginxImage, []string{}, nil, nil, []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: 80, + Protocol: corev1.ProtocolTCP, + }, + }, hostNetwork, nil) +} + // createServerPod creates a Pod that can listen to specified port and have named port set. func (data *TestData) createServerPod(name string, ns string, portName string, portNum int32, setHostPort bool, hostNetwork bool) error { // See https://github.com/kubernetes/kubernetes/blob/master/test/images/agnhost/porter/porter.go#L17 for the image's detail. @@ -1325,14 +1350,14 @@ func validatePodIP(podNetworkCIDR string, ip net.IP) (bool, error) { } // createService creates a service with port and targetPort. -func (data *TestData) createService(serviceName string, port, targetPort int32, selector map[string]string, affinity bool, +func (data *TestData) createService(serviceName string, port, targetPort int32, selector map[string]string, affinity, nodeLocalExternal bool, serviceType corev1.ServiceType, ipFamily *corev1.IPFamily) (*corev1.Service, error) { annotation := make(map[string]string) - return data.createServiceWithAnnotations(serviceName, port, targetPort, selector, affinity, serviceType, ipFamily, annotation) + return data.createServiceWithAnnotations(serviceName, port, targetPort, selector, affinity, nodeLocalExternal, serviceType, ipFamily, annotation) } // createService creates a service with Annotation -func (data *TestData) createServiceWithAnnotations(serviceName string, port, targetPort int32, selector map[string]string, affinity bool, +func (data *TestData) createServiceWithAnnotations(serviceName string, port, targetPort int32, selector map[string]string, affinity, nodeLocalExternal bool, serviceType corev1.ServiceType, ipFamily *corev1.IPFamily, annotations map[string]string) (*corev1.Service, error) { affinityType := corev1.ServiceAffinityNone var ipFamilies []corev1.IPFamily @@ -1363,12 +1388,15 @@ func (data *TestData) createServiceWithAnnotations(serviceName string, port, tar IPFamilies: ipFamilies, }, } + if (serviceType == corev1.ServiceTypeNodePort || serviceType == corev1.ServiceTypeLoadBalancer) && nodeLocalExternal { + service.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal + } return data.clientset.CoreV1().Services(testNamespace).Create(context.TODO(), &service, metav1.CreateOptions{}) } // createNginxClusterIPServiceWithAnnotations creates nginx service with Annotation func (data *TestData) createNginxClusterIPServiceWithAnnotations(affinity bool, ipFamily *corev1.IPFamily, annotation map[string]string) (*corev1.Service, error) { - return data.createServiceWithAnnotations("nginx", 80, 80, map[string]string{"app": "nginx"}, affinity, corev1.ServiceTypeClusterIP, ipFamily, annotation) + return data.createServiceWithAnnotations("nginx", 80, 80, map[string]string{"app": "nginx"}, affinity, false, corev1.ServiceTypeClusterIP, ipFamily, annotation) } // createNginxClusterIPService create a nginx service with the given name. @@ -1376,11 +1404,40 @@ func (data *TestData) createNginxClusterIPService(name string, affinity bool, ip if name == "" { name = "nginx" } - return data.createService(name, 80, 80, map[string]string{"app": "nginx"}, affinity, corev1.ServiceTypeClusterIP, ipFamily) + return data.createService(name, 80, 80, map[string]string{"app": "nginx"}, affinity, false, corev1.ServiceTypeClusterIP, ipFamily) +} + +// createEchoServerNodePortService create a NodePort echoserver service with the given name. +func (data *TestData) createEchoServerNodePortService(serviceName string, affinity, nodeLocalExternal bool, ipFamily *corev1.IPFamily) (*corev1.Service, error) { + return data.createService(serviceName, 8080, 8080, map[string]string{"app": "echoserver"}, affinity, nodeLocalExternal, corev1.ServiceTypeNodePort, ipFamily) +} + +// createEchoServerLoadBalancerService create a LoadBalancer echoserver service with the given name. +func (data *TestData) createEchoServerLoadBalancerService(serviceName string, affinity, nodeLocalExternal bool, ingressIPs []string, ipFamily *corev1.IPFamily) (*corev1.Service, error) { + svc, err := data.createService(serviceName, 8080, 8080, map[string]string{"app": "echoserver"}, affinity, nodeLocalExternal, corev1.ServiceTypeLoadBalancer, ipFamily) + if err != nil { + return svc, err + } + ingress := make([]corev1.LoadBalancerIngress, len(ingressIPs)) + for idx, ingressIP := range ingressIPs { + ingress[idx].IP = ingressIP + } + updatedSvc := svc.DeepCopy() + updatedSvc.Status.LoadBalancer.Ingress = ingress + patchData, err := json.Marshal(updatedSvc) + if err != nil { + return svc, err + } + return data.clientset.CoreV1().Services(svc.Namespace).Patch(context.TODO(), svc.Name, types.MergePatchType, patchData, metav1.PatchOptions{}, "status") +} + +// createEchoServerClusterIPService create a ClusterIP echoserver service with the given name. +func (data *TestData) createEchoServerClusterIPService(serviceName string, affinity bool, ipFamily *corev1.IPFamily) (*corev1.Service, error) { + return data.createService(serviceName, 8080, 8080, map[string]string{"app": "echoserver"}, affinity, false, corev1.ServiceTypeClusterIP, ipFamily) } func (data *TestData) createNginxLoadBalancerService(affinity bool, ingressIPs []string, ipFamily *corev1.IPFamily) (*corev1.Service, error) { - svc, err := data.createService(nginxLBService, 80, 80, map[string]string{"app": "nginx"}, affinity, corev1.ServiceTypeLoadBalancer, ipFamily) + svc, err := data.createService(nginxLBService, 80, 80, map[string]string{"app": "nginx"}, affinity, false, corev1.ServiceTypeLoadBalancer, ipFamily) if err != nil { return svc, err } @@ -1633,6 +1690,20 @@ func (data *TestData) GetEncapMode() (config.TrafficEncapModeType, error) { return config.TrafficEncapModeEncap, nil } +func (data *TestData) IsProxyFull() (bool, error) { + configMap, err := data.GetAntreaConfigMap(antreaNamespace) + if err != nil { + return false, fmt.Errorf("failed to get Antrea ConfigMap: %v", err) + } + for _, antreaConfig := range configMap.Data { + searchStr := "antreaProxyFull: true" + if strings.Index(strings.ToLower(antreaConfig), strings.ToLower(searchStr)) != -1 { + return true, nil + } + } + return false, nil +} + func getFeatures(confName string) (featuregate.FeatureGate, error) { featureGate := features.DefaultMutableFeatureGate.DeepCopy() var cfg interface{} diff --git a/test/e2e/networkpolicy_test.go b/test/e2e/networkpolicy_test.go index c465d8b3337..b49806ee64f 100644 --- a/test/e2e/networkpolicy_test.go +++ b/test/e2e/networkpolicy_test.go @@ -349,7 +349,7 @@ func testDefaultDenyIngressPolicy(t *testing.T, data *TestData) { _, serverIPs, cleanupFunc := createAndWaitForPod(t, data, data.createNginxPodOnNode, "test-server-", serverNode, testNamespace) defer cleanupFunc() - service, err := data.createService("nginx", serverPort, serverPort, map[string]string{"app": "nginx"}, false, corev1.ServiceTypeNodePort, nil) + service, err := data.createService("nginx", serverPort, serverPort, map[string]string{"app": "nginx"}, false, false, corev1.ServiceTypeNodePort, nil) if err != nil { t.Fatalf("Error when creating nginx NodePort service: %v", err) } diff --git a/test/e2e/nodeportlocal_test.go b/test/e2e/nodeportlocal_test.go index bffc84d6a44..00c5549a74b 100644 --- a/test/e2e/nodeportlocal_test.go +++ b/test/e2e/nodeportlocal_test.go @@ -295,8 +295,8 @@ func NPLTestPodAddMultiPort(t *testing.T) { selector := make(map[string]string) selector["app"] = "agnhost" ipFamily := corev1.IPv4Protocol - testData.createServiceWithAnnotations("agnhost1", 80, 80, selector, false, corev1.ServiceTypeClusterIP, &ipFamily, annotation) - testData.createServiceWithAnnotations("agnhost2", 80, 8080, selector, false, corev1.ServiceTypeClusterIP, &ipFamily, annotation) + testData.createServiceWithAnnotations("agnhost1", 80, 80, selector, false, false, corev1.ServiceTypeClusterIP, &ipFamily, annotation) + testData.createServiceWithAnnotations("agnhost2", 80, 8080, selector, false, false, corev1.ServiceTypeClusterIP, &ipFamily, annotation) targetPorts := sets.NewInt(80, 8080) podcmd := "porter" diff --git a/test/e2e/proxy_test.go b/test/e2e/proxy_test.go index e8e180ad413..511f86dbe30 100644 --- a/test/e2e/proxy_test.go +++ b/test/e2e/proxy_test.go @@ -15,6 +15,7 @@ package e2e import ( + "context" "encoding/hex" "fmt" "net" @@ -76,9 +77,400 @@ func testProxyServiceSessionAffinityCase(t *testing.T, data *TestData) { } } +func skipIfProxyFullDisabled(t *testing.T, data *TestData) { + isProxyFull, err := data.IsProxyFull() + if err != nil { + t.Fatalf("Error getting option antreaProxyFull value") + } + if !isProxyFull { + t.Skipf("Skipping test because option antreaProxyFull is not enabled") + } +} + +func skipIfKubeProxyEnabledOnLinux(t *testing.T, data *TestData, nodeName string) { + pods, err := data.clientset.CoreV1().Pods(antreaNamespace).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + t.Fatalf("Error fetching pods: %v", err) + } + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "kube-proxy") && pod.Spec.NodeName == nodeName { + t.Skipf("Skipping test because kube-proxy is running") + } + } +} + +func TestProxyLoadBalancerService(t *testing.T) { + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) + } + defer teardownTest(t, data) + + skipIfProxyDisabled(t) + skipIfProxyFullDisabled(t, data) + skipIfHasWindowsNodes(t) + skipIfNumNodesLessThan(t, 2) + + testPodNameCp := "echoserver-cp" + testPodNameWk := "echoserver-wk" + clientPodCp := "busybox-cp" + clientPodWk := "busybox-wk" + clusterIngressIP := []string{"169.254.169.1"} + localIngressIP := []string{"169.254.169.2"} + port := "8080" + + clientCpIP, clientWkIP := createTestClientPods(t, data, clientPodCp, clientPodWk) + createTestEchoServerPods(t, data, testPodNameCp, testPodNameWk, false) + createLoadBalancerServices(t, data, clusterIngressIP, localIngressIP) + clusterUrl := net.JoinHostPort(clusterIngressIP[0], port) + localUrl := net.JoinHostPort(localIngressIP[0], port) + + t.Run("Pod CIDR Endpoints", func(t *testing.T) { + loadBalancerTestCases(t, data, clusterUrl, localUrl, clientPodCp, clientPodWk, clientCpIP, clientWkIP, testPodNameCp, testPodNameWk) + }) + + testPodHostNetworkNameCp := "echoserver-cp-h" + testPodHostNetworkNameWk := "echoserver-wk-h" + nodeNameCp := controlPlaneNodeName() + nodeNameWk := workerNodeName(1) + deleteTestEchoServerPods(t, data, testPodNameCp, testPodNameWk) + createTestEchoServerPods(t, data, testPodHostNetworkNameCp, testPodHostNetworkNameWk, true) + t.Run("Host Network Endpoints", func(t *testing.T) { + loadBalancerTestCases(t, data, clusterUrl, localUrl, clientPodCp, clientPodWk, clientCpIP, clientWkIP, nodeNameCp, nodeNameWk) + }) +} + +func loadBalancerTestCases(t *testing.T, data *TestData, clusterUrl, localUrl, clientCp, clientWk, clientCpIP, clientWkIP, + testPodHostnameCp, testPodHostnameWk string) { + t.Run("Case=ExternalTrafficPolicy:Cluster Client:Local", func(t *testing.T) { + testLoadBalancerClusterFromLocal(t, data, clusterUrl) + }) + t.Run("Case=ExternalTrafficPolicy:Cluster Client:Pod", func(t *testing.T) { + testLoadBalancerClusterFromPod(t, data, clusterUrl, clientCp, clientWk) + }) + t.Run("Case=ExternalTrafficPolicy:Local Client:Local", func(t *testing.T) { + testLoadBalancerLocalFromLocal(t, data, localUrl, testPodHostnameCp, testPodHostnameWk) + }) + t.Run("Case=ExternalTrafficPolicy:Local Client:Pod", func(t *testing.T) { + testLoadBalancerLocalFromPod(t, data, localUrl, clientCp, clientWk, clientCpIP, clientWkIP, testPodHostnameCp, testPodHostnameWk) + }) +} + +func createLoadBalancerServices(t *testing.T, data *TestData, ingressIPCluster, ingressIPLocal []string) { + ipProctol := corev1.IPv4Protocol + _, err := data.createEchoServerLoadBalancerService("echoserver-cluster", true, false, ingressIPCluster, &ipProctol) + require.NoError(t, err) + _, err = data.createEchoServerLoadBalancerService("echoserver-local", true, true, ingressIPLocal, &ipProctol) + require.NoError(t, err) +} + +func testLoadBalancerClusterFromLocal(t *testing.T, data *TestData, url string) { + errMsg := "Server LoadBalancer whose externalTrafficPolicy is Cluster should be able to be connected from local" + + nodeCp := controlPlaneNodeName() + skipIfKubeProxyEnabledOnLinux(t, data, nodeCp) + _, _, _, err := RunCommandOnNode(nodeCp, strings.Join([]string{"wget", "-O", "-", url, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + + nodeWk := workerNodeName(1) + skipIfKubeProxyEnabledOnLinux(t, data, nodeWk) + _, _, _, err = RunCommandOnNode(nodeWk, strings.Join([]string{"wget", "-O", "-", url, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) +} + +func testLoadBalancerClusterFromPod(t *testing.T, data *TestData, url, clientCp, clientWk string) { + errMsg := "Server LoadBalancer whose externalTrafficPolicy is Cluster should be able to be connected from pod" + _, _, err := data.runCommandFromPod(testNamespace, clientCp, busyboxContainerName, []string{"wget", "-O", "-", url, "-T", "1"}) + require.NoError(t, err, errMsg) + _, _, err = data.runCommandFromPod(testNamespace, clientWk, busyboxContainerName, []string{"wget", "-O", "-", url, "-T", "1"}) + require.NoError(t, err, errMsg) +} + +func testLoadBalancerLocalFromLocal(t *testing.T, data *TestData, url, nodeHostnameCp, nodeHostnameWk string) { + errMsg := "Server LoadBalancer whose externalTrafficPolicy is Local should be able to be connected from local" + + nodeCp := controlPlaneNodeName() + skipIfKubeProxyEnabledOnLinux(t, data, nodeCp) + _, output, _, err := RunCommandOnNode(nodeCp, strings.Join([]string{"wget", "-O", "-", url, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameCp), fmt.Sprintf("hostname should be %s", nodeHostnameCp)) + + nodeWk := workerNodeName(1) + skipIfKubeProxyEnabledOnLinux(t, data, nodeWk) + _, output, _, err = RunCommandOnNode(nodeWk, strings.Join([]string{"wget", "-O", "-", url, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameWk), fmt.Sprintf("hostname should be %s", nodeHostnameWk)) +} + +func testLoadBalancerLocalFromPod(t *testing.T, data *TestData, url, clientCp, clientWk, clientCpIP, clientWkIP, testPodHostnameCp, testPodHostnameWk string) { + errMsg := "Server NodePort whose externalTrafficPolicy is Local should be able to be connected from pod " + output, _, err := data.runCommandFromPod(testNamespace, clientCp, busyboxContainerName, []string{"wget", "-O", "-", url, "-T", "1"}) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", testPodHostnameCp), fmt.Sprintf("hostname should be %s", testPodHostnameCp)) + require.Contains(t, output, fmt.Sprintf("client_address=%s", clientCpIP), fmt.Sprintf("client IP should be %s", clientCpIP)) + + output, _, err = data.runCommandFromPod(testNamespace, clientWk, busyboxContainerName, []string{"wget", "-O", "-", url, "-T", "1"}) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", testPodHostnameWk), fmt.Sprintf("hostname should be %s", testPodHostnameWk)) + require.Contains(t, output, fmt.Sprintf("client_address=%s", clientWkIP), fmt.Sprintf("client IP should be %s", clientWkIP)) +} + +func TestProxyNodePortService(t *testing.T) { + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) + } + defer teardownTest(t, data) + + skipIfProxyDisabled(t) + skipIfProxyFullDisabled(t, data) + skipIfHasWindowsNodes(t) + skipIfNumNodesLessThan(t, 2) + + testPodNameCp := "echoserver-cp" + testPodNameWk := "echoserver-wk" + clientPodCp := "busybox-cp" + clientPodWk := "busybox-wk" + + podClientCpIP, podClientWkIP := createTestClientPods(t, data, clientPodCp, clientPodWk) + createTestEchoServerPods(t, data, testPodNameCp, testPodNameWk, false) + portCluster, portLocal := createNodePortServices(t, data) + nodeCpIP := controlPlaneNodeIP() + nodeWkIP := workerNodeIP(1) + clusterUrlCp := net.JoinHostPort(nodeCpIP, portCluster) + clusterUrlWk := net.JoinHostPort(nodeWkIP, portCluster) + clusterUrlLo := net.JoinHostPort("127.0.0.1", portCluster) + localUrlCp := net.JoinHostPort(nodeCpIP, portLocal) + localUrlWk := net.JoinHostPort(nodeWkIP, portLocal) + localUrlLo := net.JoinHostPort("127.0.0.1", portLocal) + + t.Run("Pod CIDR Endpoints", func(t *testing.T) { + nodePortTestCases(t, data, clusterUrlCp, clusterUrlWk, clusterUrlLo, localUrlCp, localUrlWk, localUrlLo, + nodeCpIP, nodeWkIP, podClientCpIP, podClientWkIP, testPodNameCp, testPodNameWk, clientPodCp, clientPodWk, false) + }) + + testPodHostNetworkNameCp := "echoserver-cp-h" + testPodHostNetworkNameWk := "echoserver-wk-h" + nodeNameCp := controlPlaneNodeName() + nodeNameWk := workerNodeName(1) + deleteTestEchoServerPods(t, data, testPodNameCp, testPodNameWk) + createTestEchoServerPods(t, data, testPodHostNetworkNameCp, testPodHostNetworkNameWk, true) + t.Run("Host Network Endpoints", func(t *testing.T) { + nodePortTestCases(t, data, clusterUrlCp, clusterUrlWk, clusterUrlLo, localUrlCp, localUrlWk, localUrlLo, + nodeCpIP, nodeWkIP, podClientCpIP, podClientWkIP, nodeNameCp, nodeNameWk, clientPodCp, clientPodWk, true) + }) +} + +func nodePortTestCases(t *testing.T, data *TestData, clusterUrlCp, clusterUrlWk, clusterUrlLo, localUrlCp, localUrlWk, localUrlLo string, + nodeCpIP, nodeWkIP, podClientCpIP, podClientWkIP, testPodHostnameCp, testPodHostnameWk, clientPodCp, clientPodWk string, hostNetwork bool) { + t.Run("Case=ExternalTrafficPolicy:Cluster Client:Remote", func(t *testing.T) { + testNodePortClusterFromRemote(t, clusterUrlCp, clusterUrlCp) + }) + t.Run("Case=ExternalTrafficPolicy:Cluster Client:Local", func(t *testing.T) { + testNodePortClusterFromLocal(t, data, clusterUrlCp, clusterUrlWk, clusterUrlLo) + }) + t.Run("Case=ExternalTrafficPolicy:Cluster Client:Pod", func(t *testing.T) { + testNodePortClusterFromPod(t, data, clusterUrlCp, clusterUrlCp, clientPodCp, clientPodWk) + }) + t.Run("Case=ExternalTrafficPolicy:Local Client:Remote", func(t *testing.T) { + if hostNetwork { + t.Skipf("Skip this test if Endpoint is on host network") + } + testNodePortLocalFromRemote(t, localUrlCp, localUrlWk, nodeCpIP, nodeWkIP, testPodHostnameCp, testPodHostnameWk) + }) + t.Run("Case=ExternalTrafficPolicy:Local Client:Local", func(t *testing.T) { + testNodePortLocalFromLocal(t, data, localUrlCp, localUrlWk, localUrlLo, nodeCpIP, nodeWkIP, testPodHostnameCp, testPodHostnameWk) + }) + t.Run("Case=ExternalTrafficPolicy:Local Client:Pod", func(t *testing.T) { + testNodePortLocalFromPod(t, data, localUrlCp, localUrlWk, clientPodCp, clientPodWk, podClientCpIP, podClientWkIP, testPodHostnameCp, testPodHostnameWk) + }) +} + +func createTestEchoServerPods(t *testing.T, data *TestData, echoServerCp, echoServerWk string, hostNetwork bool) { + // Create test echoserver pod on each node. + if echoServerCp != "" { + require.NoError(t, data.createEchoServerPodOnNode(echoServerCp, nodeName(0), hostNetwork)) + _, err := data.podWaitForIPs(defaultTimeout, echoServerCp, testNamespace) + require.NoError(t, err) + require.NoError(t, data.podWaitForRunning(defaultTimeout, echoServerCp, testNamespace)) + } + if echoServerWk != "" { + require.NoError(t, data.createEchoServerPodOnNode(echoServerWk, nodeName(1), hostNetwork)) + _, err := data.podWaitForIPs(defaultTimeout, echoServerWk, testNamespace) + require.NoError(t, err) + require.NoError(t, data.podWaitForRunning(defaultTimeout, echoServerWk, testNamespace)) + } +} + +func createTestClientPods(t *testing.T, data *TestData, clientCp, clientWk string) (string, string) { + // Create a busybox Pod on each node which is used as test client. + require.NoError(t, data.createBusyboxPodOnNode(clientCp, testNamespace, nodeName(0))) + require.NoError(t, data.podWaitForRunning(defaultTimeout, clientCp, testNamespace)) + busyboxCpPod, err := data.podWaitFor(defaultTimeout, clientCp, testNamespace, func(pod *corev1.Pod) (bool, error) { + return pod.Status.Phase == corev1.PodRunning, nil + }) + require.NoError(t, err) + require.NotNil(t, busyboxCpPod.Status) + require.NoError(t, data.createBusyboxPodOnNode(clientWk, testNamespace, nodeName(1))) + require.NoError(t, data.podWaitForRunning(defaultTimeout, clientWk, testNamespace)) + busyboxWkPod, err := data.podWaitFor(defaultTimeout, clientWk, testNamespace, func(pod *corev1.Pod) (bool, error) { + return pod.Status.Phase == corev1.PodRunning, nil + }) + require.NoError(t, err) + require.NotNil(t, busyboxWkPod.Status) + return busyboxCpPod.Status.PodIP, busyboxWkPod.Status.PodIP +} + +func deleteTestEchoServerPods(t *testing.T, data *TestData, echoServerCp, echoServerWk string) { + if echoServerCp != "" { + require.NoError(t, data.deletePod(testNamespace, echoServerCp)) + } + if echoServerWk != "" { + require.NoError(t, data.deletePod(testNamespace, echoServerWk)) + } +} + +func createNodePortServices(t *testing.T, data *TestData) (string, string) { + ipProctol := corev1.IPv4Protocol + nodePortCluster, err := data.createEchoServerNodePortService("echoserver-cluster", false, false, &ipProctol) + require.NoError(t, err) + nodePortLocal, err := data.createEchoServerNodePortService("echoserver-local", false, true, &ipProctol) + require.NoError(t, err) + var portCluster, portLocal string + for _, port := range nodePortCluster.Spec.Ports { + if port.NodePort != 0 { + portCluster = fmt.Sprint(port.NodePort) + break + } + } + for _, port := range nodePortLocal.Spec.Ports { + if port.NodePort != 0 { + portLocal = fmt.Sprint(port.NodePort) + break + } + } + return portCluster, portLocal +} + +func testNodePortClusterFromRemote(t *testing.T, urlCp, urlWk string) { + errMsg := "Server NodePort whose externalTrafficPolicy is Cluster should be able to be connected from remote" + _, _, _, err := RunCommandOnNode(controlPlaneNodeName(), strings.Join([]string{"wget", "-O", "-", urlWk, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + _, _, _, err = RunCommandOnNode(workerNodeName(1), strings.Join([]string{"wget", "-O", "-", urlCp, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) +} + +func testNodePortClusterFromLocal(t *testing.T, data *TestData, urlCp, urlWk, urlLo string) { + errMsg := "Server NodePort whose externalTrafficPolicy is Cluster should be able to be connected from localhost" + + nodeCp := controlPlaneNodeName() + skipIfKubeProxyEnabledOnLinux(t, data, nodeCp) + _, _, _, err := RunCommandOnNode(nodeCp, strings.Join([]string{"wget", "-O", "-", urlCp, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + _, _, _, err = RunCommandOnNode(nodeCp, strings.Join([]string{"wget", "-O", "-", urlLo, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + + nodeWk := workerNodeName(1) + skipIfKubeProxyEnabledOnLinux(t, data, nodeWk) + _, _, _, err = RunCommandOnNode(nodeWk, strings.Join([]string{"wget", "-O", "-", urlWk, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + _, _, _, err = RunCommandOnNode(nodeWk, strings.Join([]string{"wget", "-O", "-", urlLo, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) +} + +func testNodePortClusterFromPod(t *testing.T, data *TestData, urlCp, urlWk, clientCp, clientWk string) { + errMsg := "Server NodePort whose externalTrafficPolicy is Cluster should be able to be connected from pod" + _, _, err := data.runCommandFromPod(testNamespace, clientCp, busyboxContainerName, []string{"wget", "-O", "-", urlCp, "-T", "1"}) + require.NoError(t, err, errMsg) + _, _, err = data.runCommandFromPod(testNamespace, clientCp, busyboxContainerName, []string{"wget", "-O", "-", urlWk, "-T", "1"}) + require.NoError(t, err, errMsg) + _, _, err = data.runCommandFromPod(testNamespace, clientWk, busyboxContainerName, []string{"wget", "-O", "-", urlCp, "-T", "1"}) + require.NoError(t, err, errMsg) + _, _, err = data.runCommandFromPod(testNamespace, clientWk, busyboxContainerName, []string{"wget", "-O", "-", urlWk, "-T", "1"}) + require.NoError(t, err, errMsg) +} + +func testNodePortLocalFromRemote(t *testing.T, urlCp, urlWk, nodeIPCp, nodeIPWk, nodeHostnameCp, nodeHostnameWk string) { + errMsg := "Server NodePort whose externalTrafficPolicy is Local should be able to be connected from remote" + _, output, _, err := RunCommandOnNode(controlPlaneNodeName(), strings.Join([]string{"wget", "-O", "-", urlWk, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameWk), fmt.Sprintf("hostname should be %s", nodeHostnameWk)) + require.Contains(t, output, fmt.Sprintf("client_address=%s", nodeIPCp), fmt.Sprintf("client IP should be %s", nodeIPCp)) + _, output, _, err = RunCommandOnNode(workerNodeName(1), strings.Join([]string{"wget", "-O", "-", urlCp, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameCp), fmt.Sprintf("hostname should be %s", nodeHostnameCp)) + require.Contains(t, output, fmt.Sprintf("client_address=%s", nodeIPWk), fmt.Sprintf("client IP should be %s", nodeIPWk)) +} + +func testNodePortLocalFromLocal(t *testing.T, data *TestData, urlCp, urlWk, urlLo, nodeIPCp, nodeIPWk, nodeHostnameCp, nodeHostnameWk string) { + errMsg := "Server NodePort whose externalTrafficPolicy is Local should be able to be connected from local" + + nodeCp := controlPlaneNodeName() + skipIfKubeProxyEnabledOnLinux(t, data, nodeCp) + _, output, _, err := RunCommandOnNode(nodeCp, strings.Join([]string{"wget", "-O", "-", urlCp, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameCp), fmt.Sprintf("hostname should be %s", nodeHostnameCp)) + require.NotContains(t, output, fmt.Sprintf("client_address=%s", nodeIPCp), fmt.Sprintf("client IP should not be %s", nodeIPCp)) + _, output, _, err = RunCommandOnNode(nodeCp, strings.Join([]string{"wget", "-O", "-", urlLo, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameCp), fmt.Sprintf("hostname should be %s", nodeHostnameCp)) + require.NotContains(t, output, fmt.Sprintf("client_address=%s", nodeIPCp), fmt.Sprintf("client IP should not be %s", nodeIPCp)) + + nodeWk := nodeName(1) + skipIfKubeProxyEnabledOnLinux(t, data, nodeWk) + _, output, _, err = RunCommandOnNode(nodeWk, strings.Join([]string{"wget", "-O", "-", urlWk, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameWk), fmt.Sprintf("hostname should be %s", nodeHostnameWk)) + require.NotContains(t, output, fmt.Sprintf("client_address=%s", nodeIPWk), fmt.Sprintf("client IP should not be %s", nodeIPWk)) + _, output, _, err = RunCommandOnNode(nodeWk, strings.Join([]string{"wget", "-O", "-", urlLo, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameWk), fmt.Sprintf("hostname should be %s", nodeHostnameWk)) + require.NotContains(t, output, fmt.Sprintf("client_address=%s", nodeIPWk), fmt.Sprintf("client IP should not be %s", nodeIPWk)) +} + +func testNodePortLocalFromPod(t *testing.T, data *TestData, urlCp, urlWk, clientCp, clientWk, clientIPCp, clientIPWk, nodeHostnameCp, nodeHostnameWk string) { + errMsg := "Server NodePort whose externalTrafficPolicy is Local should be able to be connected from pod " + output, _, err := data.runCommandFromPod(testNamespace, clientCp, busyboxContainerName, []string{"wget", "-O", "-", urlCp, "-T", "1"}) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameCp), fmt.Sprintf("hostname should be %s", nodeHostnameCp)) + require.Contains(t, output, fmt.Sprintf("client_address=%s", clientIPCp), fmt.Sprintf("client IP should be %s", clientIPCp)) + + output, _, err = data.runCommandFromPod(testNamespace, clientWk, busyboxContainerName, []string{"wget", "-O", "-", urlWk, "-T", "1"}) + require.NoError(t, err, errMsg) + require.Contains(t, output, fmt.Sprintf("Hostname: %s", nodeHostnameWk), fmt.Sprintf("hostname should be %s", nodeHostnameWk)) + require.Contains(t, output, fmt.Sprintf("client_address=%s", clientIPWk), fmt.Sprintf("client IP should be %s", clientIPWk)) +} + +func TestProxyServiceSessionAffinity(t *testing.T) { + skipIfProviderIs(t, "kind", "#881 Does not work in Kind, needs to be investigated.") + skipIfHasWindowsNodes(t) + + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) + } + defer teardownTest(t, data) + + skipIfProxyDisabled(t) + + if len(clusterInfo.podV4NetworkCIDR) != 0 { + ipFamily := corev1.IPv4Protocol + testProxyServiceSessionAffinity(&ipFamily, []string{"169.254.169.1", "169.254.169.2"}, data, t) + } + if len(clusterInfo.podV6NetworkCIDR) != 0 { + ipFamily := corev1.IPv6Protocol + testProxyServiceSessionAffinity(&ipFamily, []string{"fd75::aabb:ccdd:ef00", "fd75::aabb:ccdd:ef01"}, data, t) + } +} + func testProxyServiceSessionAffinity(ipFamily *corev1.IPFamily, ingressIPs []string, data *TestData, t *testing.T) { nodeName := nodeName(1) nginx := randName("nginx-") + isProxyFull, err := data.IsProxyFull() + if err != nil { + t.Fatalf("Error getting option antreaProxyFull value") + } + require.NoError(t, data.createNginxPodOnNode(nginx, testNamespace, nodeName)) nginxIP, err := data.podWaitForIPs(defaultTimeout, nginx, testNamespace) defer data.deletePodAndWait(defaultTimeout, nginx, testNamespace) @@ -112,19 +504,22 @@ func testProxyServiceSessionAffinity(ipFamily *corev1.IPFamily, ingressIPs []str if *ipFamily == corev1.IPv4Protocol { require.Contains(t, table40Output, fmt.Sprintf("nw_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) require.Contains(t, table40Output, fmt.Sprintf("load:0x%s->NXM_NX_REG3[]", strings.TrimLeft(hex.EncodeToString(nginxIP.ipv4.To4()), "0"))) - for _, ingressIP := range ingressIPs { - require.Contains(t, table40Output, fmt.Sprintf("nw_dst=%s,tp_dst=80", ingressIP)) + if isProxyFull { + for _, ingressIP := range ingressIPs { + require.Contains(t, table40Output, fmt.Sprintf("nw_dst=%s,tp_dst=80", ingressIP)) + } } } else { require.Contains(t, table40Output, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) require.Contains(t, table40Output, fmt.Sprintf("load:0x%s->NXM_NX_XXREG3[0..63]", strings.TrimLeft(hex.EncodeToString([]byte(*nginxIP.ipv6)[8:16]), "0"))) require.Contains(t, table40Output, fmt.Sprintf("load:0x%s->NXM_NX_XXREG3[64..127]", strings.TrimLeft(hex.EncodeToString([]byte(*nginxIP.ipv6)[0:8]), "0"))) - for _, ingressIP := range ingressIPs { - require.Contains(t, table40Output, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", ingressIP)) + if isProxyFull { + for _, ingressIP := range ingressIPs { + require.Contains(t, table40Output, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", ingressIP)) + } } } } - func testProxyHairpinCase(t *testing.T, data *TestData) { if len(clusterInfo.podV4NetworkCIDR) != 0 { ipFamily := corev1.IPv4Protocol @@ -136,6 +531,27 @@ func testProxyHairpinCase(t *testing.T, data *TestData) { } } +func TestProxyHairpin(t *testing.T) { + skipIfHasWindowsNodes(t) + + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) + } + defer teardownTest(t, data) + + skipIfProxyDisabled(t) + + if len(clusterInfo.podV4NetworkCIDR) != 0 { + ipFamily := corev1.IPv4Protocol + testProxyHairpin(&ipFamily, data, t) + } + if len(clusterInfo.podV6NetworkCIDR) != 0 { + ipFamily := corev1.IPv6Protocol + testProxyHairpin(&ipFamily, data, t) + } +} + func testProxyHairpin(ipFamily *corev1.IPFamily, data *TestData, t *testing.T) { busybox := randName("busybox-") nodeName := nodeName(1) @@ -143,7 +559,7 @@ func testProxyHairpin(ipFamily *corev1.IPFamily, data *TestData, t *testing.T) { defer data.deletePodAndWait(defaultTimeout, busybox, testNamespace) require.NoError(t, err) require.NoError(t, data.podWaitForRunning(defaultTimeout, busybox, testNamespace)) - svc, err := data.createService(busybox, 80, 80, map[string]string{"antrea-e2e": busybox}, false, corev1.ServiceTypeClusterIP, ipFamily) + svc, err := data.createService(busybox, 80, 80, map[string]string{"antrea-e2e": busybox}, false, false, corev1.ServiceTypeClusterIP, ipFamily) defer data.deleteServiceAndWait(defaultTimeout, busybox) require.NoError(t, err) @@ -165,6 +581,27 @@ func testProxyEndpointLifeCycleCase(t *testing.T, data *TestData) { } } +func TestProxyEndpointLifeCycle(t *testing.T) { + skipIfHasWindowsNodes(t) + + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) + } + defer teardownTest(t, data) + + skipIfProxyDisabled(t) + + if len(clusterInfo.podV4NetworkCIDR) != 0 { + ipFamily := corev1.IPv4Protocol + testProxyEndpointLifeCycle(&ipFamily, data, t) + } + if len(clusterInfo.podV6NetworkCIDR) != 0 { + ipFamily := corev1.IPv6Protocol + testProxyEndpointLifeCycle(&ipFamily, data, t) + } +} + func testProxyEndpointLifeCycle(ipFamily *corev1.IPFamily, data *TestData, t *testing.T) { nodeName := nodeName(1) nginx := randName("nginx-") @@ -190,19 +627,41 @@ func testProxyEndpointLifeCycle(ipFamily *corev1.IPFamily, data *TestData, t *te keywords := make(map[int]string) keywords[42] = fmt.Sprintf("nat(dst=%s)", net.JoinHostPort(nginxIP, "80")) // endpointNATTable + var groupKeywords []string + if *ipFamily == corev1.IPv6Protocol { + groupKeywords = append(groupKeywords, fmt.Sprintf("set_field:0x%s->xxreg3", strings.TrimPrefix(hex.EncodeToString(*nginxIPs.ipv6), "0"))) + } else { + groupKeywords = append(groupKeywords, fmt.Sprintf("0x%s->NXM_NX_REG3[]", strings.TrimPrefix(hex.EncodeToString(nginxIPs.ipv4.To4()), "0"))) + } + for tableID, keyword := range keywords { tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%d", tableID)}) require.NoError(t, err) require.Contains(t, tableOutput, keyword) } + groupOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-groups", defaultBridgeName}) + require.NoError(t, err) + for _, k := range groupKeywords { + require.Contains(t, groupOutput, k) + } + require.NoError(t, data.deletePodAndWait(defaultTimeout, nginx, testNamespace)) + // Wait for one second to make sure the pipeline to be updated. + time.Sleep(time.Second) + for tableID, keyword := range keywords { tableOutput, _, err := data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-flows", defaultBridgeName, fmt.Sprintf("table=%d", tableID)}) require.NoError(t, err) require.NotContains(t, tableOutput, keyword) } + + groupOutput, _, err = data.runCommandFromPod(metav1.NamespaceSystem, agentName, "antrea-agent", []string{"ovs-ofctl", "dump-groups", defaultBridgeName}) + require.NoError(t, err) + for _, k := range groupKeywords { + require.NotContains(t, groupOutput, k) + } } func testProxyServiceLifeCycleCase(t *testing.T, data *TestData) { @@ -216,9 +675,35 @@ func testProxyServiceLifeCycleCase(t *testing.T, data *TestData) { } } +func TestProxyServiceLifeCycle(t *testing.T) { + skipIfHasWindowsNodes(t) + + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) + } + defer teardownTest(t, data) + + skipIfProxyDisabled(t) + + if len(clusterInfo.podV4NetworkCIDR) != 0 { + ipFamily := corev1.IPv4Protocol + testProxyServiceLifeCycle(&ipFamily, []string{"169.254.169.1", "169.254.169.2"}, data, t) + } + if len(clusterInfo.podV6NetworkCIDR) != 0 { + ipFamily := corev1.IPv6Protocol + testProxyServiceLifeCycle(&ipFamily, []string{"fd75::aabb:ccdd:ef00", "fd75::aabb:ccdd:ef01"}, data, t) + } +} + func testProxyServiceLifeCycle(ipFamily *corev1.IPFamily, ingressIPs []string, data *TestData, t *testing.T) { nodeName := nodeName(1) nginx := randName("nginx-") + isProxyFull, err := data.IsProxyFull() + if err != nil { + t.Fatalf("Error getting option antreaProxyFull value") + } + require.NoError(t, data.createNginxPodOnNode(nginx, testNamespace, nodeName)) defer data.deletePodAndWait(defaultTimeout, nginx, testNamespace) nginxIPs, err := data.podWaitForIPs(defaultTimeout, nginx, testNamespace) @@ -241,16 +726,20 @@ func testProxyServiceLifeCycle(ipFamily *corev1.IPFamily, ingressIPs []string, d // Hold on to make sure that the Service is realized. time.Sleep(3 * time.Second) - svcLBflows := make([]string, len(ingressIPs)+1) + var svcLBflows []string if *ipFamily == corev1.IPv6Protocol { - svcLBflows[0] = fmt.Sprintf("ipv6_dst=%s,tp_dst=80", svc.Spec.ClusterIP) - for idx, ingressIP := range ingressIPs { - svcLBflows[idx+1] = fmt.Sprintf("ipv6_dst=%s,tp_dst=80", ingressIP) + svcLBflows = append(svcLBflows, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) + if isProxyFull { + for _, ingressIP := range ingressIPs { + svcLBflows = append(svcLBflows, fmt.Sprintf("ipv6_dst=%s,tp_dst=80", ingressIP)) + } } } else { - svcLBflows[0] = fmt.Sprintf("nw_dst=%s,tp_dst=80", svc.Spec.ClusterIP) - for idx, ingressIP := range ingressIPs { - svcLBflows[idx+1] = fmt.Sprintf("nw_dst=%s,tp_dst=80", ingressIP) + svcLBflows = append(svcLBflows, fmt.Sprintf("nw_dst=%s,tp_dst=80", svc.Spec.ClusterIP)) + if isProxyFull { + for _, ingressIP := range ingressIPs { + svcLBflows = append(svcLBflows, fmt.Sprintf("nw_dst=%s,tp_dst=80", ingressIP)) + } } } diff --git a/test/e2e/service_test.go b/test/e2e/service_test.go index 3d698c4ab11..ce7cb6f6c12 100644 --- a/test/e2e/service_test.go +++ b/test/e2e/service_test.go @@ -17,93 +17,121 @@ package e2e import ( "fmt" "net" - "strconv" "strings" "testing" + "time" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" ) -// TestClusterIP tests traffic from Nodes and Pods to ClusterIP Service. -func TestClusterIP(t *testing.T) { - // TODO: Support for dual-stack and IPv6-only clusters - skipIfIPv6Cluster(t) - +func TestIPv4ClusterIP(t *testing.T) { data, err := setupTest(t) if err != nil { t.Fatalf("Error when setting up test: %v", err) } defer teardownTest(t, data) - svcName := "nginx" - serverPodNode := nodeName(0) - svc, cleanup := data.createClusterIPServiceAndBackendPods(t, svcName, serverPodNode) - defer cleanup() - t.Logf("%s Service is ready", svcName) + skipIfNumNodesLessThan(t, 2) + skipIfNotIPv4Cluster(t) - testFromNode := func(node string) { - // Retry is needed for rules to be installed by kube-proxy/antrea-proxy. - cmd := fmt.Sprintf("curl --connect-timeout 1 --retry 5 --retry-connrefused %s:80", svc.Spec.ClusterIP) - rc, stdout, stderr, err := RunCommandOnNode(node, cmd) - if rc != 0 || err != nil { - t.Errorf("Error when running command '%s' on Node '%s', rc: %d, stdout: %s, stderr: %s, error: %v", - cmd, node, rc, stdout, stderr, err) - } - } + clientPodCp := "busybox-cp" + clientPodWk := "busybox-wk" + createTestClientPods(t, data, clientPodCp, clientPodWk) + testClusterIPHelper(t, data, false, clientPodCp, clientPodWk) +} - testFromPod := func(podName, nodeName string, hostNetwork bool) { - require.NoError(t, data.createPodOnNode(podName, testNamespace, nodeName, busyboxImage, []string{"sleep", strconv.Itoa(3600)}, nil, nil, nil, hostNetwork, nil)) - defer data.deletePodAndWait(defaultTimeout, podName, testNamespace) - require.NoError(t, data.podWaitForRunning(defaultTimeout, podName, testNamespace)) - err := data.runNetcatCommandFromTestPod(podName, testNamespace, svc.Spec.ClusterIP, 80) - require.NoError(t, err, "Pod %s should be able to connect %s, but was not able to connect", podName, net.JoinHostPort(svc.Spec.ClusterIP, fmt.Sprint(80))) +func TestIPv6ClusterIP(t *testing.T) { + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) } + defer teardownTest(t, data) + + skipIfNumNodesLessThan(t, 2) + skipIfNotIPv6Cluster(t) - t.Run("ClusterIP", func(t *testing.T) { - t.Run("Same Linux Node can access the Service", func(t *testing.T) { - t.Parallel() - testFromPod("hostnetwork-client-on-same-node", serverPodNode, true) - }) - t.Run("Different Linux Node can access the Service", func(t *testing.T) { - t.Parallel() - skipIfNumNodesLessThan(t, 2) - testFromPod("hostnetwork-client-on-different-node", nodeName(1), true) - }) - t.Run("Windows host can access the Service", func(t *testing.T) { - t.Parallel() - skipIfNoWindowsNodes(t) - idx := clusterInfo.windowsNodes[0] - winNode := clusterInfo.nodes[idx].name - testFromNode(winNode) - }) - t.Run("Linux Pod on same Node can access the Service", func(t *testing.T) { - t.Parallel() - testFromPod("client-on-same-node", serverPodNode, false) - }) - t.Run("Linux Pod on different Node can access the Service", func(t *testing.T) { - t.Parallel() - skipIfNumNodesLessThan(t, 2) - testFromPod("client-on-different-node", nodeName(1), false) - }) + clientPodCp := "busybox-cp" + clientPodWk := "busybox-wk" + createTestClientPods(t, data, clientPodCp, clientPodWk) + testClusterIPHelper(t, data, true, clientPodCp, clientPodWk) +} + +func testClusterIPHelper(t *testing.T, data *TestData, isIPv6 bool, clientPodCp, clientPodWk string) { + testPodName := fmt.Sprintf("nginx-%v", isIPv6) + createTestNginxPod(t, data, testPodName, false) + clusterIP := createClusterIPService(t, data, isIPv6) + time.Sleep(2 * time.Second) + url := "http://" + net.JoinHostPort(clusterIP, "80") + t.Run("Pod CIDR Endpoints", func(t *testing.T) { + testClusterIPCases(t, data, url, testPodName, clientPodCp, clientPodWk) }) + deleteTestNginxPod(t, data, testPodName) + + testPodHostNetworkName := fmt.Sprintf("echoserver-cp-h-%v", isIPv6) + createTestNginxPod(t, data, testPodHostNetworkName, true) + time.Sleep(2 * time.Second) + t.Run("Host Network Endpoints", func(t *testing.T) { + testClusterIPCases(t, data, url, nodeName(0), clientPodCp, clientPodWk) + }) + deleteTestNginxPod(t, data, testPodHostNetworkName) } -func (data *TestData) createClusterIPServiceAndBackendPods(t *testing.T, name string, node string) (*corev1.Service, func()) { - ipv4Protocol := corev1.IPv4Protocol - require.NoError(t, data.createNginxPodOnNode(name, testNamespace, node)) - _, err := data.podWaitForIPs(defaultTimeout, name, testNamespace) - require.NoError(t, err) - require.NoError(t, data.podWaitForRunning(defaultTimeout, name, testNamespace)) - svc, err := data.createNginxClusterIPService(name, false, &ipv4Protocol) - require.NoError(t, err) +func testClusterIPCases(t *testing.T, data *TestData, url, hostname, clientPodCp, clientPodWk string) { + t.Run("Host on different Node can access the Service", func(t *testing.T) { + t.Parallel() + skipIfKubeProxyEnabledOnLinux(t, data, nodeName(1)) + skipIfProxyFullDisabled(t, data) + testClusterIPFromNode(t, url, nodeName(1)) + }) + t.Run("Host on the same Node can access the Service", func(t *testing.T) { + t.Parallel() + skipIfKubeProxyEnabledOnLinux(t, data, nodeName(0)) + skipIfProxyFullDisabled(t, data) + testClusterIPFromNode(t, url, nodeName(0)) + }) + t.Run("Pod on same Node can access the Service", func(t *testing.T) { + t.Parallel() + testClusterIPFromPod(t, data, url, clientPodCp) + }) + t.Run("Pod on different Node can access the Service", func(t *testing.T) { + t.Parallel() + testClusterIPFromPod(t, data, url, clientPodWk) + }) +} - cleanup := func() { - data.deletePodAndWait(defaultTimeout, name, testNamespace) - data.deleteServiceAndWait(defaultTimeout, name) +func testClusterIPFromPod(t *testing.T, data *TestData, url, podName string) { + errMsg := "Server ClusterIP should be able to be connected from pod" + _, _, err := data.runCommandFromPod(testNamespace, podName, busyboxContainerName, []string{"wget", "-O", "-", url, "-T", "1"}) + require.NoError(t, err, errMsg) +} + +func testClusterIPFromNode(t *testing.T, url, nodeName string) { + errMsg := "Server ClusterIP should be able to be connected from node on the same k8s node" + _, _, _, err := RunCommandOnNode(nodeName, strings.Join([]string{"wget", "-O", "-", url, "-T", "1"}, " ")) + require.NoError(t, err, errMsg) +} + +func createClusterIPService(t *testing.T, data *TestData, isIPv6 bool) string { + ipProctol := corev1.IPv4Protocol + if isIPv6 { + ipProctol = corev1.IPv6Protocol } + clusterIP, err := data.createNginxClusterIPService(fmt.Sprintf("echoserver-%v", isIPv6), false, &ipProctol) + require.NoError(t, err) + return clusterIP.Spec.ClusterIP +} - return svc, cleanup +func createTestNginxPod(t *testing.T, data *TestData, testPodName string, hostNetwork bool) { + require.NoError(t, data.createNginxPodOnNodeV2(testPodName, nodeName(0), false)) + _, err := data.podWaitForIPs(defaultTimeout, testPodName, testNamespace) + require.NoError(t, err) + require.NoError(t, data.podWaitForRunning(defaultTimeout, testPodName, testNamespace)) +} + +func deleteTestNginxPod(t *testing.T, data *TestData, testPodName string) { + err := data.deletePod(testNamespace, testPodName) + require.NoError(t, err) } // TestNodePortWindows tests NodePort Service on Windows Node. It is a temporary test to replace upstream Kubernetes one: @@ -160,7 +188,7 @@ func (data *TestData) createAgnhostServiceAndBackendPods(t *testing.T, name stri _, err := data.podWaitForIPs(defaultTimeout, name, testNamespace) require.NoError(t, err) require.NoError(t, data.podWaitForRunning(defaultTimeout, name, testNamespace)) - svc, err := data.createService(name, 80, 80, map[string]string{"app": "agnhost"}, false, svcType, &ipv4Protocol) + svc, err := data.createService(name, 80, 80, map[string]string{"app": "agnhost"}, false, false, svcType, &ipv4Protocol) require.NoError(t, err) cleanup := func() { diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index 8adc762d2d6..f3c04b3a284 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -110,7 +110,7 @@ func TestConnectivityFlows(t *testing.T) { antrearuntime.WindowsOS = runtime.GOOS } - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, true, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, true, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err)) defer func() { @@ -137,7 +137,7 @@ func TestConnectivityFlows(t *testing.T) { } func TestReplayFlowsConnectivityFlows(t *testing.T) { - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err)) @@ -164,7 +164,7 @@ func TestReplayFlowsConnectivityFlows(t *testing.T) { } func TestReplayFlowsNetworkPolicyFlows(t *testing.T) { - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err)) @@ -275,7 +275,7 @@ func testInstallTunnelFlows(t *testing.T, config *testConfig) { } func testInstallServiceFlows(t *testing.T, config *testConfig) { - err := c.InstallClusterServiceFlows() + err := c.InstallDefaultServiceFlows() if err != nil { t.Fatalf("Failed to install Openflow entries to skip service CIDR from egress table: %v", err) } @@ -343,11 +343,11 @@ func TestNetworkPolicyFlows(t *testing.T) { // Initialize ovs metrics (Prometheus) to test them metrics.InitializeOVSMetrics() - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br)) - _, err = c.Initialize(roundInfo, &config1.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: podIPv6CIDR}, config1.TrafficEncapModeEncap) + _, err = c.Initialize(roundInfo, &config1.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: podIPv6CIDR, GatewayConfig: gwConfig}, config1.TrafficEncapModeEncap) require.Nil(t, err, "Failed to initialize OFClient") defer func() { @@ -453,7 +453,7 @@ func TestIPv6ConnectivityFlows(t *testing.T) { // Initialize ovs metrics (Prometheus) to test them metrics.InitializeOVSMetrics() - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, true, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, true, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err)) @@ -485,11 +485,11 @@ type svcConfig struct { } func TestProxyServiceFlows(t *testing.T) { - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, true, false, false, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br)) - _, err = c.Initialize(roundInfo, &config1.NodeConfig{}, config1.TrafficEncapModeEncap) + _, err = c.Initialize(roundInfo, &config1.NodeConfig{PodIPv4CIDR: podIPv4CIDR, PodIPv6CIDR: podIPv6CIDR, GatewayConfig: gwConfig}, config1.TrafficEncapModeEncap) require.Nil(t, err, "Failed to initialize OFClient") defer func() { @@ -501,7 +501,7 @@ func TestProxyServiceFlows(t *testing.T) { endpoints := []k8sproxy.Endpoint{ k8stypes.NewEndpointInfo(&k8sproxy.BaseEndpointInfo{ - Endpoint: net.JoinHostPort("10.20.0.11", "8081"), + Endpoint: net.JoinHostPort("192.168.1.2", "8081"), IsLocal: true, }), k8stypes.NewEndpointInfo(&k8sproxy.BaseEndpointInfo{ @@ -617,7 +617,7 @@ func expectedProxyServiceGroupAndFlows(gid uint32, svc svcConfig, endpointList [ }, }} epDNATFlows := expectTableFlows{tableID: 42, flows: []*ofTestUtils.ExpectFlow{}} - hairpinFlows := expectTableFlows{tableID: 106, flows: []*ofTestUtils.ExpectFlow{}} + hairpinFlows := expectTableFlows{tableID: 108, flows: []*ofTestUtils.ExpectFlow{}} groupBuckets = make([]string, 0) for _, ep := range endpointList { epIP := ipToHexString(net.ParseIP(ep.IP())) @@ -973,7 +973,7 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, }, }, }) - nextTableForSpoofguard = 29 + nextTableForSpoofguard = 23 } else { ipProto = "ipv6" nwSrcField = "ipv6_src" @@ -1043,7 +1043,7 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa }, { MatchStr: fmt.Sprintf("priority=200,ip,in_port=%d", config1.HostGatewayOFPort), - ActStr: "goto_table:29", + ActStr: "goto_table:23", }, }, }) @@ -1162,19 +1162,25 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { } table105Flows := expectTableFlows{ tableID: 105, - flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:106"}}, + flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:108"}}, } table72Flows := expectTableFlows{ tableID: 72, flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:80"}}, } + table30Flows := expectTableFlows{ + tableID: 30, + } if config.enableIPv4 { + table30Flows.flows = append(table30Flows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ip", ActStr: "ct(table=31,zone=65520,nat)"}, + ) table31Flows.flows = append(table31Flows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+inv+trk,ip", ActStr: "drop"}, ) table105Flows.flows = append(table105Flows.flows, - &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ip,reg0=0x1/0xffff", ActStr: "ct(commit,table=106,zone=65520,exec(load:0x20->NXM_NX_CT_MARK[])"}, - &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ip", ActStr: "ct(commit,table=106,zone=65520)"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ip,reg0=0x1/0xffff", ActStr: "ct(commit,table=108,zone=65520,exec(load:0x20->NXM_NX_CT_MARK[])"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ip", ActStr: "ct(commit,table=108,zone=65520)"}, ) table72Flows.flows = append(table72Flows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ip,reg0=0x1/0xffff", ActStr: "goto_table:80"}, @@ -1182,12 +1188,15 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { ) } if config.enableIPv6 { + table30Flows.flows = append(table30Flows.flows, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ipv6", ActStr: "ct(table=31,zone=65510,nat)"}, + ) table31Flows.flows = append(table31Flows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+inv+trk,ipv6", ActStr: "drop"}, ) table105Flows.flows = append(table105Flows.flows, - &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ipv6,reg0=0x1/0xffff", ActStr: "ct(commit,table=106,zone=65510,exec(load:0x20->NXM_NX_CT_MARK[])"}, - &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ipv6", ActStr: "ct(commit,table=106,zone=65510)"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ipv6,reg0=0x1/0xffff", ActStr: "ct(commit,table=108,zone=65510,exec(load:0x20->NXM_NX_CT_MARK[])"}, + &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ipv6", ActStr: "ct(commit,table=108,zone=65510)"}, ) table72Flows.flows = append(table72Flows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ipv6,reg0=0x1/0xffff", ActStr: "goto_table:80"}, @@ -1211,12 +1220,6 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { {MatchStr: "priority=0", ActStr: "drop"}, }, }, - { - uint8(30), - []*ofTestUtils.ExpectFlow{ - {MatchStr: "priority=200,ip", ActStr: "ct(table=31,zone=65520,nat)"}, - }, - }, { uint8(42), []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:50"}}, @@ -1359,7 +1362,7 @@ func prepareSNATFlows(snatIP net.IP, mark, podOFPort, podOFPortRemote uint32, vM } func TestSNATFlows(t *testing.T) { - c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, false, false, true, false) + c = ofClient.NewClient(br, bridgeMgmtAddr, ovsconfig.OVSDatapathNetdev, false, false, true, false, false) err := ofTestUtils.PrepareOVSBridge(br) require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br)) diff --git a/test/integration/agent/route_test.go b/test/integration/agent/route_test.go index 7bd1ef8a39e..877c82d88dd 100644 --- a/test/integration/agent/route_test.go +++ b/test/integration/agent/route_test.go @@ -138,7 +138,7 @@ func TestInitialize(t *testing.T) { for _, tc := range tcs { t.Logf("Running Initialize test with mode %s node config %s", tc.networkConfig.TrafficEncapMode, nodeConfig) - routeClient, err := route.NewClient(serviceCIDR, tc.networkConfig, tc.noSNAT) + routeClient, err := route.NewClient(serviceCIDR, tc.networkConfig, tc.noSNAT, false) assert.NoError(t, err) var xtablesReleasedTime, initializedTime time.Time @@ -244,7 +244,7 @@ func TestIpTablesSync(t *testing.T) { gwLink := createDummyGW(t) defer netlink.LinkDel(gwLink) - routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeEncap}, false) + routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeEncap}, false, false) assert.Nil(t, err) inited := make(chan struct{}) @@ -295,7 +295,7 @@ func TestAddAndDeleteSNATRule(t *testing.T) { gwLink := createDummyGW(t) defer netlink.LinkDel(gwLink) - routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeEncap}, false) + routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeEncap}, false, false) assert.Nil(t, err) inited := make(chan struct{}) @@ -349,7 +349,7 @@ func TestAddAndDeleteRoutes(t *testing.T) { for _, tc := range tcs { t.Logf("Running test with mode %s peer cidr %s peer ip %s node config %s", tc.mode, tc.peerCIDR, tc.peerIP, nodeConfig) - routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: tc.mode}, false) + routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: tc.mode}, false, false) assert.NoError(t, err) err = routeClient.Initialize(nodeConfig, func() {}) assert.NoError(t, err) @@ -412,7 +412,7 @@ func TestSyncRoutes(t *testing.T) { for _, tc := range tcs { t.Logf("Running test with mode %s peer cidr %s peer ip %s node config %s", tc.mode, tc.peerCIDR, tc.peerIP, nodeConfig) - routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: tc.mode}, false) + routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: tc.mode}, false, false) assert.NoError(t, err) err = routeClient.Initialize(nodeConfig, func() {}) assert.NoError(t, err) @@ -502,7 +502,7 @@ func TestReconcile(t *testing.T) { for _, tc := range tcs { t.Logf("Running test with mode %s added routes %v desired routes %v", tc.mode, tc.addedRoutes, tc.desiredPeerCIDRs) - routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: tc.mode}, false) + routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: tc.mode}, false, false) assert.NoError(t, err) err = routeClient.Initialize(nodeConfig, func() {}) assert.NoError(t, err) @@ -540,7 +540,7 @@ func TestRouteTablePolicyOnly(t *testing.T) { gwLink := createDummyGW(t) defer netlink.LinkDel(gwLink) - routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeNetworkPolicyOnly}, false) + routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeNetworkPolicyOnly}, false, false) assert.NoError(t, err) err = routeClient.Initialize(nodeConfig, func() {}) assert.NoError(t, err) @@ -599,7 +599,7 @@ func TestIPv6RoutesAndNeighbors(t *testing.T) { gwLink := createDummyGW(t) defer netlink.LinkDel(gwLink) - routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeEncap}, false) + routeClient, err := route.NewClient(serviceCIDR, &config.NetworkConfig{TrafficEncapMode: config.TrafficEncapModeEncap}, false, false) assert.Nil(t, err) _, ipv6Subnet, _ := net.ParseCIDR("fd74:ca9b:172:19::/64") gwIPv6 := net.ParseIP("fd74:ca9b:172:19::1")