From 6ac68e8891bfe4ca6feb7984cf9f36694cc8dc98 Mon Sep 17 00:00:00 2001 From: Quan Tian Date: Mon, 4 Dec 2023 18:52:34 +0800 Subject: [PATCH] Support Egress using IPs from a separate subnet By default, it's assumed that the IPs allocated from the pool are in the same subnet as the Node IPs. In some cases, users want to use IPs in different subnets as Egress IPs. Additionally, users may want to use VLAN taggaing to segment the Egress traffic and the Node traffic. The commit implements the requirements by introducing an optional field, `subnetInfo`, to the ExternalIPPool resource. The `subnetInfo` field contains the subnet attributes of the IPs in this pool. When using a different subnet: * `gateway` and `prefixLength` must be set. Antrea will route Egress traffic to the specified gateway when the destination is not in the same subnet of the Egress IP, otherwise route it to the destination directly. * Optionally, you can specify `vlan` if the underlying network is expecting it. Once set, Antrea will tag Egress traffic leaving the Egress Node with the specified VLAN ID. Correspondingly, it's expected that reply traffic towards these Egress IPs are also tagged with the specified VLAN ID when arriving the Egress Node. The implementation involves VLAN sub-interfaces and policy routing. * For a given subnet with a VLAN ID, a separate VLAN sub-interface will be created to hold the Egress IPs allocated from it. Egress traffic and its reply traffic will be sent over and received from the VLAN sub-interface for proper tagging and untagging. * For a given subnet, a separate route table will be created, routing the selected Egress traffic to the specified gateway, or to its neighbor. * For multiple Egress IPs associated allocated from the same subnet, a separate IP rule will be created for each Egress IP, matching its pkt mark and looking up the shared route table. The feature is gated by the alpha "EgressSeparateSubnet" feature gate. Signed-off-by: Quan Tian --- .github/workflows/kind.yml | 1 + build/charts/antrea/conf/antrea-agent.conf | 3 + build/charts/antrea/crds/externalippool.yaml | 19 + .../webhooks/validating/crdvalidator.yaml | 4 +- build/yamls/antrea-aks.yml | 30 +- build/yamls/antrea-crds.yml | 19 + build/yamls/antrea-eks.yml | 30 +- build/yamls/antrea-gke.yml | 30 +- build/yamls/antrea-ipsec.yml | 30 +- build/yamls/antrea.yml | 30 +- ci/kind/kind-setup.sh | 86 ++- ci/kind/test-e2e-kind.sh | 31 +- cmd/antrea-agent/agent.go | 3 +- docs/egress.md | 47 ++ docs/feature-gates.md | 6 + .../controller/egress/egress_controller.go | 210 +++++++- .../egress/egress_controller_test.go | 419 ++++++++++++++- .../serviceexternalip/controller.go | 2 +- .../serviceexternalip/controller_test.go | 8 +- pkg/agent/ipassigner/ip_assigner.go | 12 +- pkg/agent/ipassigner/ip_assigner_linux.go | 503 +++++++++++++----- .../ipassigner/testing/mock_ipassigner.go | 31 +- pkg/agent/openflow/client_test.go | 6 +- pkg/agent/openflow/pipeline.go | 10 +- pkg/agent/route/interfaces.go | 15 + pkg/agent/route/route_linux.go | 127 +++++ pkg/agent/route/route_linux_test.go | 152 +++++- pkg/agent/route/route_windows.go | 20 + pkg/agent/route/testing/mock_route.go | 70 +++ pkg/agent/types/{marks.go => net.go} | 8 + pkg/agent/util/net_linux.go | 5 + pkg/agent/util/netlink/netlink_linux.go | 6 + .../netlink/testing/mock_netlink_linux.go | 43 ++ pkg/apis/crd/v1beta1/types.go | 13 + pkg/apis/crd/v1beta1/util.go | 15 + pkg/apis/crd/v1beta1/zz_generated.deepcopy.go | 21 + .../handlers/featuregates/handler_test.go | 1 + pkg/apiserver/openapi/zz_generated.openapi.go | 46 +- pkg/controller/externalippool/validate.go | 53 +- .../externalippool/validate_test.go | 101 +++- pkg/features/antrea_features.go | 7 + test/e2e/egress_test.go | 169 +++++- test/e2e/framework.go | 48 ++ test/e2e/main_test.go | 6 + test/e2e/service_externalip_test.go | 12 +- .../agent/ip_assigner_linux_test.go | 50 +- test/integration/agent/openflow_test.go | 8 +- 47 files changed, 2303 insertions(+), 263 deletions(-) rename pkg/agent/types/{marks.go => net.go} (80%) diff --git a/.github/workflows/kind.yml b/.github/workflows/kind.yml index 83e00fd3707..23a532b7d81 100644 --- a/.github/workflows/kind.yml +++ b/.github/workflows/kind.yml @@ -237,6 +237,7 @@ jobs: --feature-gates AllAlpha=true,AllBeta=true \ --proxy-all \ --node-ipam \ + --extra-vlan \ --multicast - name: Tar coverage files run: tar -czf test-e2e-encap-all-features-enabled-coverage.tar.gz test-e2e-encap-all-features-enabled-coverage diff --git a/build/charts/antrea/conf/antrea-agent.conf b/build/charts/antrea/conf/antrea-agent.conf index e93fe46c4fb..62179c88da4 100644 --- a/build/charts/antrea/conf/antrea-agent.conf +++ b/build/charts/antrea/conf/antrea-agent.conf @@ -79,6 +79,9 @@ featureGates: # Enable Egress traffic shaping. {{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "EgressTrafficShaping" "default" false) }} +# Allow users to allocate Egress IPs from a separate subnet different from the default Node subnet. +{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "EgressSeparateSubnet" "default" false) }} + # Name of the OpenVSwitch bridge antrea-agent will create and use. # Make sure it doesn't conflict with your existing OpenVSwitch bridges. ovsBridge: {{ .Values.ovs.bridgeName | quote }} diff --git a/build/charts/antrea/crds/externalippool.yaml b/build/charts/antrea/crds/externalippool.yaml index 7c6fbe96db1..ac17a519c1b 100644 --- a/build/charts/antrea/crds/externalippool.yaml +++ b/build/charts/antrea/crds/externalippool.yaml @@ -133,6 +133,25 @@ spec: oneOf: - format: ipv4 - format: ipv6 + subnetInfo: + type: object + required: + - gateway + - prefixLength + properties: + gateway: + type: string + oneOf: + - format: ipv4 + - format: ipv6 + prefixLength: + type: integer + minimum: 1 + maximum: 127 + vlan: + type: integer + minimum: 0 + maximum: 4094 nodeSelector: type: object properties: diff --git a/build/charts/antrea/templates/webhooks/validating/crdvalidator.yaml b/build/charts/antrea/templates/webhooks/validating/crdvalidator.yaml index 8e9d4c85a4d..0dfe1f8acd3 100644 --- a/build/charts/antrea/templates/webhooks/validating/crdvalidator.yaml +++ b/build/charts/antrea/templates/webhooks/validating/crdvalidator.yaml @@ -116,9 +116,9 @@ webhooks: namespace: {{ .Release.Namespace }} path: "/validate/externalippool" rules: - - operations: ["UPDATE"] + - operations: ["CREATE", "UPDATE"] apiGroups: ["crd.antrea.io"] - apiVersions: ["v1alpha2"] + apiVersions: ["v1alpha2", "v1beta1"] resources: ["externalippools"] scope: "Cluster" admissionReviewVersions: ["v1", "v1beta1"] diff --git a/build/yamls/antrea-aks.yml b/build/yamls/antrea-aks.yml index 494c88f78c4..68ebd7cf998 100644 --- a/build/yamls/antrea-aks.yml +++ b/build/yamls/antrea-aks.yml @@ -2654,6 +2654,25 @@ spec: oneOf: - format: ipv4 - format: ipv6 + subnetInfo: + type: object + required: + - gateway + - prefixLength + properties: + gateway: + type: string + oneOf: + - format: ipv4 + - format: ipv6 + prefixLength: + type: integer + minimum: 1 + maximum: 127 + vlan: + type: integer + minimum: 0 + maximum: 4094 nodeSelector: type: object properties: @@ -5603,6 +5622,9 @@ data: # Enable Egress traffic shaping. # EgressTrafficShaping: false + # Allow users to allocate Egress IPs from a separate subnet different from the default Node subnet. + # EgressSeparateSubnet: false + # Name of the OpenVSwitch bridge antrea-agent will create and use. # Make sure it doesn't conflict with your existing OpenVSwitch bridges. ovsBridge: "br-int" @@ -6903,7 +6925,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e59e0431902646d46cba490279184fea2bdd3c8b486b5a7b1d3ece9a91614634 + checksum/config: 34f7e1dc5a957bd3106fae1e0b94a6a50336420be33a9378e02e795364c007e6 labels: app: antrea component: antrea-agent @@ -7141,7 +7163,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e59e0431902646d46cba490279184fea2bdd3c8b486b5a7b1d3ece9a91614634 + checksum/config: 34f7e1dc5a957bd3106fae1e0b94a6a50336420be33a9378e02e795364c007e6 labels: app: antrea component: antrea-controller @@ -7456,9 +7478,9 @@ webhooks: namespace: kube-system path: "/validate/externalippool" rules: - - operations: ["UPDATE"] + - operations: ["CREATE", "UPDATE"] apiGroups: ["crd.antrea.io"] - apiVersions: ["v1alpha2"] + apiVersions: ["v1alpha2", "v1beta1"] resources: ["externalippools"] scope: "Cluster" admissionReviewVersions: ["v1", "v1beta1"] diff --git a/build/yamls/antrea-crds.yml b/build/yamls/antrea-crds.yml index a215b73db1e..06db6b42ff8 100644 --- a/build/yamls/antrea-crds.yml +++ b/build/yamls/antrea-crds.yml @@ -2641,6 +2641,25 @@ spec: oneOf: - format: ipv4 - format: ipv6 + subnetInfo: + type: object + required: + - gateway + - prefixLength + properties: + gateway: + type: string + oneOf: + - format: ipv4 + - format: ipv6 + prefixLength: + type: integer + minimum: 1 + maximum: 127 + vlan: + type: integer + minimum: 0 + maximum: 4094 nodeSelector: type: object properties: diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index bf31133d0f5..46140308832 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -2654,6 +2654,25 @@ spec: oneOf: - format: ipv4 - format: ipv6 + subnetInfo: + type: object + required: + - gateway + - prefixLength + properties: + gateway: + type: string + oneOf: + - format: ipv4 + - format: ipv6 + prefixLength: + type: integer + minimum: 1 + maximum: 127 + vlan: + type: integer + minimum: 0 + maximum: 4094 nodeSelector: type: object properties: @@ -5603,6 +5622,9 @@ data: # Enable Egress traffic shaping. # EgressTrafficShaping: false + # Allow users to allocate Egress IPs from a separate subnet different from the default Node subnet. + # EgressSeparateSubnet: false + # Name of the OpenVSwitch bridge antrea-agent will create and use. # Make sure it doesn't conflict with your existing OpenVSwitch bridges. ovsBridge: "br-int" @@ -6903,7 +6925,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e59e0431902646d46cba490279184fea2bdd3c8b486b5a7b1d3ece9a91614634 + checksum/config: 34f7e1dc5a957bd3106fae1e0b94a6a50336420be33a9378e02e795364c007e6 labels: app: antrea component: antrea-agent @@ -7142,7 +7164,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e59e0431902646d46cba490279184fea2bdd3c8b486b5a7b1d3ece9a91614634 + checksum/config: 34f7e1dc5a957bd3106fae1e0b94a6a50336420be33a9378e02e795364c007e6 labels: app: antrea component: antrea-controller @@ -7457,9 +7479,9 @@ webhooks: namespace: kube-system path: "/validate/externalippool" rules: - - operations: ["UPDATE"] + - operations: ["CREATE", "UPDATE"] apiGroups: ["crd.antrea.io"] - apiVersions: ["v1alpha2"] + apiVersions: ["v1alpha2", "v1beta1"] resources: ["externalippools"] scope: "Cluster" admissionReviewVersions: ["v1", "v1beta1"] diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index 8ef0f76ffe2..dcecd067278 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -2654,6 +2654,25 @@ spec: oneOf: - format: ipv4 - format: ipv6 + subnetInfo: + type: object + required: + - gateway + - prefixLength + properties: + gateway: + type: string + oneOf: + - format: ipv4 + - format: ipv6 + prefixLength: + type: integer + minimum: 1 + maximum: 127 + vlan: + type: integer + minimum: 0 + maximum: 4094 nodeSelector: type: object properties: @@ -5603,6 +5622,9 @@ data: # Enable Egress traffic shaping. # EgressTrafficShaping: false + # Allow users to allocate Egress IPs from a separate subnet different from the default Node subnet. + # EgressSeparateSubnet: false + # Name of the OpenVSwitch bridge antrea-agent will create and use. # Make sure it doesn't conflict with your existing OpenVSwitch bridges. ovsBridge: "br-int" @@ -6903,7 +6925,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 3b1758664de8044af1aa7454c64bd1a4911750e562e1ae9375c9c16a335a469d + checksum/config: 791d3c52ac84fd8a6db2a6fee50ee2dce7e04f49c722bf8e2967764f39e6866e labels: app: antrea component: antrea-agent @@ -7139,7 +7161,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 3b1758664de8044af1aa7454c64bd1a4911750e562e1ae9375c9c16a335a469d + checksum/config: 791d3c52ac84fd8a6db2a6fee50ee2dce7e04f49c722bf8e2967764f39e6866e labels: app: antrea component: antrea-controller @@ -7454,9 +7476,9 @@ webhooks: namespace: kube-system path: "/validate/externalippool" rules: - - operations: ["UPDATE"] + - operations: ["CREATE", "UPDATE"] apiGroups: ["crd.antrea.io"] - apiVersions: ["v1alpha2"] + apiVersions: ["v1alpha2", "v1beta1"] resources: ["externalippools"] scope: "Cluster" admissionReviewVersions: ["v1", "v1beta1"] diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index 8c644ea62ad..f8fa8ba9ced 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -2654,6 +2654,25 @@ spec: oneOf: - format: ipv4 - format: ipv6 + subnetInfo: + type: object + required: + - gateway + - prefixLength + properties: + gateway: + type: string + oneOf: + - format: ipv4 + - format: ipv6 + prefixLength: + type: integer + minimum: 1 + maximum: 127 + vlan: + type: integer + minimum: 0 + maximum: 4094 nodeSelector: type: object properties: @@ -5616,6 +5635,9 @@ data: # Enable Egress traffic shaping. # EgressTrafficShaping: false + # Allow users to allocate Egress IPs from a separate subnet different from the default Node subnet. + # EgressSeparateSubnet: false + # Name of the OpenVSwitch bridge antrea-agent will create and use. # Make sure it doesn't conflict with your existing OpenVSwitch bridges. ovsBridge: "br-int" @@ -6916,7 +6938,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: a34de3efa658ac40c9bde28e08832dd897259fdcf639beab9d4e47531d7da948 + checksum/config: 81a9d6f9ca6306467ca8a241709027336626dfaad557d3cbb5cfa7028b0af9a5 checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4 labels: app: antrea @@ -7198,7 +7220,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: a34de3efa658ac40c9bde28e08832dd897259fdcf639beab9d4e47531d7da948 + checksum/config: 81a9d6f9ca6306467ca8a241709027336626dfaad557d3cbb5cfa7028b0af9a5 labels: app: antrea component: antrea-controller @@ -7513,9 +7535,9 @@ webhooks: namespace: kube-system path: "/validate/externalippool" rules: - - operations: ["UPDATE"] + - operations: ["CREATE", "UPDATE"] apiGroups: ["crd.antrea.io"] - apiVersions: ["v1alpha2"] + apiVersions: ["v1alpha2", "v1beta1"] resources: ["externalippools"] scope: "Cluster" admissionReviewVersions: ["v1", "v1beta1"] diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index 4c297d211a1..4748085ddf4 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -2654,6 +2654,25 @@ spec: oneOf: - format: ipv4 - format: ipv6 + subnetInfo: + type: object + required: + - gateway + - prefixLength + properties: + gateway: + type: string + oneOf: + - format: ipv4 + - format: ipv6 + prefixLength: + type: integer + minimum: 1 + maximum: 127 + vlan: + type: integer + minimum: 0 + maximum: 4094 nodeSelector: type: object properties: @@ -5603,6 +5622,9 @@ data: # Enable Egress traffic shaping. # EgressTrafficShaping: false + # Allow users to allocate Egress IPs from a separate subnet different from the default Node subnet. + # EgressSeparateSubnet: false + # Name of the OpenVSwitch bridge antrea-agent will create and use. # Make sure it doesn't conflict with your existing OpenVSwitch bridges. ovsBridge: "br-int" @@ -6903,7 +6925,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: aa947bf5c403412b9c8cfcbcc335659992f19bd428886e80f43bafa052bac1e6 + checksum/config: b23abc9cc4f7acfafca9e864b89eadf4543e3f5f0c6b62b472b269e34b1c7347 labels: app: antrea component: antrea-agent @@ -7139,7 +7161,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: aa947bf5c403412b9c8cfcbcc335659992f19bd428886e80f43bafa052bac1e6 + checksum/config: b23abc9cc4f7acfafca9e864b89eadf4543e3f5f0c6b62b472b269e34b1c7347 labels: app: antrea component: antrea-controller @@ -7454,9 +7476,9 @@ webhooks: namespace: kube-system path: "/validate/externalippool" rules: - - operations: ["UPDATE"] + - operations: ["CREATE", "UPDATE"] apiGroups: ["crd.antrea.io"] - apiVersions: ["v1alpha2"] + apiVersions: ["v1alpha2", "v1beta1"] resources: ["externalippools"] scope: "Cluster" admissionReviewVersions: ["v1", "v1beta1"] diff --git a/ci/kind/kind-setup.sh b/ci/kind/kind-setup.sh index 11296ce1fa9..05c2341b129 100755 --- a/ci/kind/kind-setup.sh +++ b/ci/kind/kind-setup.sh @@ -30,12 +30,15 @@ IP_FAMILY="ipv4" NUM_WORKERS=2 SUBNETS="" EXTRA_NETWORKS="" +VLAN_SUBNETS="" +VLAN_ID="" ENCAP_MODE="" PROXY=true KUBE_PROXY_MODE="iptables" PROMETHEUS=false K8S_VERSION="" KUBE_NODE_IPAM=true +DEPLOY_EXTERNAL_SERVER=false positional_args=() options=() @@ -66,11 +69,17 @@ where: --subnets: a subnet creates a separate Docker bridge network (named 'antrea-') with the assigned subnet. A worker Node will be connected to one of those network. Default is empty: all worker Nodes connected to the default Docker bridge network created by kind. + --vlan-subnets: specifies the subnets of the VLAN to which all Nodes will be connected, in addition to the primary network. + The IP expression of the subnet will be used as the gateway IP. For example, '--vlan-subnets 10.100.100.1/24' means + 10.100.100.1/24 will be assigned to the VLAN sub-interface of the network. + --vlan-id: specifies the ID of the VLAN to which all Nodes will be connected, in addition to the primary network. Note, + '--vlan-subnets' and '--vlan-id' must be specified together. --extra-networks: an extra network creates a separate Docker bridge network (named 'antrea-') with the assigned subnet. All worker Nodes will be connected to all the extra networks, in addition to the default Docker bridge network. Note, '--extra-networks' and '--subnets' cannot be specified together. --ip-family: specifies the ip-family for the kind cluster, default is $IP_FAMILY. --k8s-version: specifies the Kubernetes version of the kind cluster, kind's default K8s version will be used if empty. + --deploy-external-server: deploy a container running as an external server for the cluster. --all: delete all kind clusters --until: delete kind clusters that have been created before the specified duration. " @@ -97,6 +106,10 @@ function add_option { options+=("$option $action") } +function docker_run_with_host_net { + docker run --rm --net=host --privileged antrea/toolbox:latest "$@" +} + function configure_networks { echo "Configuring networks" networks=$(docker network ls -f name=antrea --format '{{.Name}}') @@ -109,9 +122,9 @@ function configure_networks { # Inject allow all iptables to preempt docker bridge isolation rules if [[ ! -z $SUBNETS ]]; then set +e - docker run --net=host --privileged antrea/toolbox:latest iptables -C DOCKER-USER -j ACCEPT > /dev/null 2>&1 + docker_run_with_host_net iptables -C DOCKER-USER -j ACCEPT > /dev/null 2>&1 if [[ $? -ne 0 ]]; then - docker run --net=host --privileged antrea/toolbox:latest iptables -I DOCKER-USER -j ACCEPT + docker_run_with_host_net iptables -I DOCKER-USER -j ACCEPT fi set -e fi @@ -229,6 +242,45 @@ function configure_extra_networks { done } +function configure_vlan_subnets { + if [[ -z $VLAN_SUBNETS || -z $VLAN_ID ]]; then + return + fi + echo "Configuring VLAN subnets" + + bridge_id=$(docker network inspect kind -f {{.ID}}) + bridge_interface="br-${bridge_id:0:12}" + vlan_interface="br-${bridge_id:0:7}.$VLAN_ID" + + docker_run_with_host_net ip link add link $bridge_interface name $vlan_interface type vlan id $VLAN_ID + docker_run_with_host_net ip link set $vlan_interface up + IFS=',' read -r -a vlan_subnets <<< "$VLAN_SUBNETS" + for s in "${vlan_subnets[@]}" ; do + echo "configuring extra IP $s to vlan interface $vlan_interface" + docker_run_with_host_net ip addr add dev $vlan_interface $s + done + docker_run_with_host_net iptables -t filter -A FORWARD -i $bridge_interface -o $vlan_interface -j ACCEPT + docker_run_with_host_net iptables -t filter -A FORWARD -o $bridge_interface -i $vlan_interface -j ACCEPT +} + +function delete_vlan_subnets { + echo "Deleting VLAN subnets" + + bridge_id=$(docker network inspect kind -f {{.ID}}) + bridge_interface="br-${bridge_id:0:12}" + vlan_interface_prefix="br-${bridge_id:0:7}." + + found_vlan_interfaces=$(ip -br link show type vlan | cut -d " " -f 1) + for interface in $found_vlan_interfaces ; do + if [[ $interface =~ ${vlan_interface_prefix}[0-9]+@${bridge_interface} ]]; then + interface_name=${interface%@*} + docker_run_with_host_net ip link del $interface_name + docker_run_with_host_net iptables -t filter -D FORWARD -i $bridge_interface -o $interface_name -j ACCEPT || true + docker_run_with_host_net iptables -t filter -D FORWARD -o $bridge_interface -i $interface_name -j ACCEPT || true + fi + done +} + function delete_networks { networks=$(docker network ls -f name=antrea --format '{{.Name}}') networks="$(echo $networks)" @@ -355,6 +407,8 @@ EOF configure_networks configure_extra_networks + configure_vlan_subnets + setup_external_server load_images if [[ $ANTREA_CNI == true ]]; then @@ -385,6 +439,8 @@ function destroy { kind delete cluster --name $CLUSTER_NAME fi delete_networks + delete_vlan_subnets + destroy_external_server } function printUnixTimestamp { @@ -396,6 +452,17 @@ function printUnixTimestamp { fi } +function setup_external_server { + if [[ $DEPLOY_EXTERNAL_SERVER == true ]]; then + docker run -d --name external-server --network kind -it --rm registry.k8s.io/e2e-test-images/agnhost:2.29 netexec &> /dev/null + fi +} + +function destroy_external_server { + echo "Deleting external server" + docker rm -f external-server &> /dev/null || true +} + function clean_kind { echo "=== Cleaning up stale kind clusters ===" read -a all_kind_clusters <<< $(kind get clusters) @@ -482,6 +549,16 @@ while [[ $# -gt 0 ]] EXTRA_NETWORKS="$2" shift 2 ;; + --vlan-subnets) + add_option "--vlan-subnets" "create" + VLAN_SUBNETS="$2" + shift 2 + ;; + --vlan-id) + add_option "--vlan-id" "create" + VLAN_ID="$2" + shift 2 + ;; --images) add_option "--image" "create" IMAGES="$2" @@ -502,6 +579,11 @@ while [[ $# -gt 0 ]] K8S_VERSION="$2" shift 2 ;; + --deploy-external-server) + add_option "--deploy-external-server" "create" + DEPLOY_EXTERNAL_SERVER=true + shift + ;; --all) add_option "--all" "destroy" CLUSTER_NAME="*" diff --git a/ci/kind/test-e2e-kind.sh b/ci/kind/test-e2e-kind.sh index a1a560d1778..6d484cf4c4e 100755 --- a/ci/kind/test-e2e-kind.sh +++ b/ci/kind/test-e2e-kind.sh @@ -33,6 +33,8 @@ _usage="Usage: $0 [--encap-mode ] [--ip-family ] [--coverage] --multicast Enables Multicast. --flow-visibility Only run flow visibility related e2e tests. --extra-network Creates an extra network that worker Nodes will connect to. Cannot be specified with the hybrid mode. + --extra-vlan Creates an subnet-based VLAN that worker Nodes will connect to. + --deploy-external-server Deploy a container running as an external server for the cluster. --skip A comma-separated list of keywords, with which tests should be skipped. --coverage Enables measure Antrea code coverage when run e2e tests on kind. --setup-only Only perform setting up the cluster and run test. @@ -72,6 +74,8 @@ node_ipam=false multicast=false flow_visibility=false extra_network=false +extra_vlan=false +deploy_external_server=false coverage=false skiplist="" setup_only=false @@ -123,6 +127,14 @@ case $key in extra_network=true shift ;; + --extra-vlan) + extra_vlan=true + shift + ;; + --deploy-external-server) + deploy_external_server=true + shift + ;; --coverage) coverage=true shift @@ -237,6 +249,18 @@ fi printf -v COMMON_IMAGES "%s " "${COMMON_IMAGES_LIST[@]}" +vlan_args="" +if $extra_vlan; then + vlan_args="$vlan_args --vlan-id 10" + if [[ "$ipfamily" == "v4" ]]; then + vlan_args="$vlan_args --vlan-subnets 172.100.10.1/24" + elif [[ "$ipfamily" == "v6" ]]; then + vlan_args="$vlan_args --vlan-subnets fd00:172:100:10::1/96" + elif [[ "$ipfamily" == "dual" ]]; then + vlan_args="$vlan_args --vlan-subnets 172.100.10.1/24,fd00:172:100:10::1/96" + fi +fi + function setup_cluster { args=$1 @@ -257,6 +281,8 @@ function setup_cluster { if $extra_network && [[ "$mode" != "hybrid" ]]; then args="$args --extra-networks \"20.20.30.0/24\"" fi + # Deploy an external server which could be used when testing Pod-to-External traffic. + args="$args --deploy-external-server $vlan_args" echo "creating test bed with args $args" eval "timeout 600 $TESTBED_CMD create kind $args" @@ -310,7 +336,10 @@ function run_test { if [ -n "$run" ]; then RUN_OPT="-run $run" fi - go test -v -timeout=$timeout $RUN_OPT antrea.io/antrea/test/e2e $flow_visibility_args -provider=kind --logs-export-dir=$ANTREA_LOG_DIR --skip-cases=$skiplist $coverage_args + + EXTRA_ARGS="$vlan_args --external-server-ips $(docker inspect external-server -f '{{.NetworkSettings.Networks.kind.IPAddress}},{{.NetworkSettings.Networks.kind.GlobalIPv6Address}}')" + + go test -v -timeout=$timeout $RUN_OPT antrea.io/antrea/test/e2e $flow_visibility_args -provider=kind --logs-export-dir=$ANTREA_LOG_DIR --skip-cases=$skiplist $coverage_args $EXTRA_ARGS } if [[ "$mode" == "" ]] || [[ "$mode" == "encap" ]]; then diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 6c30874d141..fba691bdd76 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -517,8 +517,9 @@ func run(o *Options) error { if o.enableEgress { egressController, err = egress.NewEgressController( ofClient, k8sClient, antreaClientProvider, crdClient, ifaceStore, routeClient, nodeConfig.Name, nodeConfig.NodeTransportInterfaceName, - memberlistCluster, egressInformer, nodeInformer, podUpdateChannel, serviceCIDRProvider, o.config.Egress.MaxEgressIPsPerNode, + memberlistCluster, egressInformer, externalIPPoolInformer, nodeInformer, podUpdateChannel, serviceCIDRProvider, o.config.Egress.MaxEgressIPsPerNode, features.DefaultFeatureGate.Enabled(features.EgressTrafficShaping), + features.DefaultFeatureGate.Enabled(features.EgressSeparateSubnet), ) if err != nil { return fmt.Errorf("error creating new Egress controller: %v", err) diff --git a/docs/egress.md b/docs/egress.md index 86ef3c85732..e1f717be99e 100644 --- a/docs/egress.md +++ b/docs/egress.md @@ -12,6 +12,7 @@ - [Bandwidth](#bandwidth) - [The ExternalIPPool resource](#the-externalippool-resource) - [IPRanges](#ipranges) + - [SubnetInfo](#subnetinfo) - [NodeSelector](#nodeselector) - [Usage examples](#usage-examples) - [Configuring High-Availability Egress](#configuring-high-availability-egress) @@ -198,6 +199,52 @@ The `ipRanges` field contains a list of IP ranges representing the available IPs of this IP pool. Each IP range may consist of a `cidr` or a pair of `start` and `end` IPs (which are themselves included in the range). +### SubnetInfo + +By default, it's assumed that the IPs allocated from the pool are in the same +subnet as the Node IPs. Starting with Antrea v1.15, IPs can be allocated from a +subnet different from the Node IPs. + +The optional `subnetInfo` field contains the subnet attributes of the IPs in +this pool. When using a different subnet: + +* `gateway` and `prefixLength` must be set. Antrea will route Egress traffic to +the specified gateway when the destination is not in the same subnet of the +Egress IP, otherwise route it to the destination directly. + +* Optionally, you can specify `vlan` if the underlying network is expecting it. +Once set, Antrea will tag Egress traffic leaving the Egress Node with the +specified VLAN ID. Correspondingly, it's expected that reply traffic towards +these Egress IPs is also tagged with the specified VLAN ID when arriving at the +Egress Node. + +An example of ExternalIPPool using a different subnet is as below: + +```yaml +apiVersion: crd.antrea.io/v1beta1 +kind: ExternalIPPool +metadata: + name: prod-external-ip-pool +spec: + ipRanges: + - start: 10.10.0.2 + end: 10.10.0.10 + subnetInfo: + gateway: 10.10.0.1 + prefixLength: 24 + vlan: 10 + nodeSelector: + matchLabels: + network-role: egress-gateway +``` + +**Note**: Specifying different subnets is currently in alpha version. To use +this feature, users should enable the `EgressSeparateSubnet` feature gate. +Currently, the maximum number of different subnets that can be supported in a +cluster is 20, which should be sufficient for most cases. If you need to have +more subnets, please raise an issue with your use case, and we will consider +revising the limit based on that. + ### NodeSelector The `nodeSelector` field specifies which Nodes the IPs in this pool can be diff --git a/docs/feature-gates.md b/docs/feature-gates.md index bec2c838fc1..03405a6ea7f 100644 --- a/docs/feature-gates.md +++ b/docs/feature-gates.md @@ -56,6 +56,7 @@ edit the Agent configuration in the | `L7NetworkPolicy` | Agent + Controller | `false` | Alpha | v1.10 | N/A | N/A | Yes | | | `AdminNetworkPolicy` | Controller | `false` | Alpha | v1.13 | N/A | N/A | Yes | | | `EgressTrafficShaping` | Agent | `false` | Alpha | v1.14 | N/A | N/A | Yes | OVS meters should be supported | +| `EgressSeparateSubnet` | Agent | `false` | Alpha | v1.15 | N/A | N/A | No | | ## Description and Requirements of Features @@ -413,3 +414,8 @@ bandwidth for all egress traffic belonging to an Egress. Refer to this [document This feature leverages OVS meters to do the actual rate-limiting, therefore this feature requires OVS meters to be supported in the datapath. + +### EgressSeparateSubnet + +`EgressSeparateSubnet` allows users to allocate Egress IPs from a separate subnet different from the default Node +subnet. Refer to this [document](egress.md#subnetinfo) for more information. diff --git a/pkg/agent/controller/egress/egress_controller.go b/pkg/agent/controller/egress/egress_controller.go index 3e51d459e4a..ec1c5bd7cca 100644 --- a/pkg/agent/controller/egress/egress_controller.go +++ b/pkg/agent/controller/egress/egress_controller.go @@ -74,12 +74,15 @@ const ( // maxEgressMark is the maximum mark of Egress IPs can be configured on a Node. maxEgressMark = 255 - egressIPIndex = "egressIP" + egressIPIndex = "egressIP" + externalIPPoolIndex = "externalIPPool" // egressDummyDevice is the dummy device that holds the Egress IPs configured to the system by antrea-agent. egressDummyDevice = "antrea-egress0" ) +var maxSubnetsPerNodes = types.MaxEgressRouteTable - types.MinEgressRouteTable + 1 + var emptyWatch = watch.NewEmptyWatch() var newIPAssigner = ipassigner.NewIPAssigner @@ -128,6 +131,16 @@ type egressIPState struct { flowsInstalled bool // Whether its iptables rule has been installed. ruleInstalled bool + // The subnet the Egress IP is associated with. + subnetInfo *crdv1b1.SubnetInfo +} + +// egressRouteTable stores the route table ID created for a subnet and the marks that are referencing it. +type egressRouteTable struct { + // The route table ID. + tableID uint32 + // The marks referencing the table. Once it's empty, the route table should be deleted. + marks sets.Set[uint32] } // egressBinding keeps the Egresses applying to a Pod. @@ -149,11 +162,14 @@ type EgressController struct { egressListerSynced cache.InformerSynced queue workqueue.RateLimitingInterface + externalIPPoolLister crdlisters.ExternalIPPoolLister + externalIPPoolListerSynced cache.InformerSynced + // Use an interface for IP detector to enable testing. localIPDetector ipassigner.LocalIPDetector ifaceStore interfacestore.InterfaceStore nodeName string - idAllocator *idAllocator + markAllocator *idAllocator egressGroups map[string]sets.Set[string] egressGroupsMutex sync.RWMutex @@ -183,6 +199,12 @@ type EgressController struct { eventBroadcaster record.EventBroadcaster record record.EventRecorder + // Whether it should read SubnetInfo of the ExternalIPPool or not. + supportSeparateSubnet bool + // Used to allocate route table ID. + tableAllocator *idAllocator + // Each subnet has its own route table. + egressRouteTables map[crdv1b1.SubnetInfo]*egressRouteTable } func NewEgressController( @@ -196,11 +218,13 @@ func NewEgressController( nodeTransportInterface string, cluster memberlist.Interface, egressInformer crdinformers.EgressInformer, + externalIPPoolInformer crdinformers.ExternalIPPoolInformer, nodeInformers coreinformers.NodeInformer, podUpdateSubscriber channel.Subscriber, serviceCIDRInterface servicecidr.Interface, maxEgressIPsPerNode int, trafficShapingEnabled bool, + supportSeparateSubnet bool, ) (*EgressController, error) { if trafficShapingEnabled && !openflow.OVSMetersAreSupported() { klog.Info("EgressTrafficShaping feature gate is enabled, but it is ignored because OVS meters are not supported.") @@ -229,7 +253,7 @@ func NewEgressController( egressIPStates: map[string]*egressIPState{}, egressBindings: map[string]*egressBinding{}, localIPDetector: ipassigner.NewLocalIPDetector(), - idAllocator: newIDAllocator(minEgressMark, maxEgressMark), + markAllocator: newIDAllocator(minEgressMark, maxEgressMark), cluster: cluster, serviceCIDRInterface: serviceCIDRInterface, // One buffer is enough as we just use it to ensure the target handler is executed once. @@ -240,6 +264,21 @@ func NewEgressController( eventBroadcaster: eventBroadcaster, record: recorder, + + externalIPPoolLister: externalIPPoolInformer.Lister(), + externalIPPoolListerSynced: externalIPPoolInformer.Informer().HasSynced, + supportSeparateSubnet: supportSeparateSubnet, + } + if supportSeparateSubnet { + c.egressRouteTables = map[crdv1b1.SubnetInfo]*egressRouteTable{} + c.tableAllocator = newIDAllocator(types.MinEgressRouteTable, types.MaxEgressRouteTable) + externalIPPoolInformer.Informer().AddEventHandlerWithResyncPeriod( + cache.ResourceEventHandlerFuncs{ + AddFunc: c.addExternalIPPool, + UpdateFunc: c.updateExternalIPPool, + }, + resyncPeriod, + ) } ipAssigner, err := newIPAssigner(nodeTransportInterface, egressDummyDevice) if err != nil { @@ -268,6 +307,22 @@ func NewEgressController( } return egressIPs, nil }, + externalIPPoolIndex: func(obj interface{}) ([]string, error) { + egress, ok := obj.(*crdv1b1.Egress) + if !ok { + return nil, fmt.Errorf("obj is not Egress: %+v", obj) + } + var pools []string + if egress.Spec.ExternalIPPool != "" { + pools = append(pools, egress.Spec.ExternalIPPool) + } + for _, pool := range egress.Spec.ExternalIPPools { + if pool != "" { + pools = append(pools, pool) + } + } + return pools, nil + }, }) c.egressInformer.AddEventHandlerWithResyncPeriod( cache.ResourceEventHandlerFuncs{ @@ -384,6 +439,34 @@ func (c *EgressController) deleteEgress(obj interface{}) { klog.V(2).InfoS("Processed Egress DELETE event", "egress", klog.KObj(egress)) } +func (c *EgressController) addExternalIPPool(obj interface{}) { + pool := obj.(*crdv1b1.ExternalIPPool) + if pool.Spec.SubnetInfo == nil { + return + } + c.onExternalIPPoolUpdated(pool.Name) + klog.V(2).InfoS("Processed ExternalIPPool ADD event", "externalIPPool", klog.KObj(pool)) +} + +func (c *EgressController) updateExternalIPPool(old, cur interface{}) { + oldPool := old.(*crdv1b1.ExternalIPPool) + curPool := cur.(*crdv1b1.ExternalIPPool) + // We only care about SubnetInfo here. + if crdv1b1.CompareSubnetInfo(oldPool.Spec.SubnetInfo, curPool.Spec.SubnetInfo, false) { + return + } + c.onExternalIPPoolUpdated(curPool.Name) + klog.V(2).InfoS("Processed ExternalIPPool UPDATE event", "externalIPPool", klog.KObj(curPool)) +} + +func (c *EgressController) onExternalIPPoolUpdated(pool string) { + egresses, _ := c.egressInformer.GetIndexer().ByIndex(externalIPPoolIndex, pool) + for _, obj := range egresses { + egress := obj.(*crdv1b1.Egress) + c.queue.Add(egress.Name) + } +} + func (c *EgressController) onLocalIPUpdate(ip string, added bool) { egresses, _ := c.egressInformer.GetIndexer().ByIndex(egressIPIndex, ip) if len(egresses) == 0 { @@ -417,12 +500,15 @@ func (c *EgressController) Run(stopCh <-chan struct{}) { go c.localIPDetector.Run(stopCh) go c.egressIPScheduler.Run(stopCh) go c.ipAssigner.Run(stopCh) - if !cache.WaitForNamedCacheSync(controllerName, stopCh, c.egressListerSynced, c.localIPDetector.HasSynced, c.egressIPScheduler.HasScheduled) { + if !cache.WaitForNamedCacheSync(controllerName, stopCh, c.egressListerSynced, c.externalIPPoolListerSynced, c.localIPDetector.HasSynced, c.egressIPScheduler.HasScheduled) { return } if err := c.replaceEgressIPs(); err != nil { - klog.ErrorS(err, "failed to replace Egress IPs") + klog.ErrorS(err, "Failed to replace Egress IPs") + } + if err := c.routeClient.RestoreEgressRoutesAndRules(types.MinEgressRouteTable, types.MaxEgressRouteTable); err != nil { + klog.ErrorS(err, "Failed to restore Egress routes and rules") } go wait.NonSlidingUntil(c.watchEgressGroup, 5*time.Second, stopCh) @@ -439,11 +525,16 @@ func (c *EgressController) Run(stopCh <-chan struct{}) { // on this node. The unassigned IPs are from Egresses that were either deleted from the Kubernetes API or migrated // to other Nodes when the agent on this Node was not running. func (c *EgressController) replaceEgressIPs() error { - desiredLocalEgressIPs := sets.New[string]() + desiredLocalEgressIPs := map[string]*crdv1b1.SubnetInfo{} egresses, _ := c.egressLister.List(labels.Everything()) for _, egress := range egresses { if isEgressSchedulable(egress) && egress.Status.EgressNode == c.nodeName && egress.Status.EgressIP != "" { - desiredLocalEgressIPs.Insert(egress.Status.EgressIP) + pool, err := c.externalIPPoolLister.Get(egress.Spec.ExternalIPPool) + // Ignore the Egress if the ExternalIPPool doesn't exist. + if err != nil { + continue + } + desiredLocalEgressIPs[egress.Status.EgressIP] = pool.Spec.SubnetInfo // Record the Egress's state as we assign their IPs to this Node in the following call. It makes sure these // Egress IPs will be unassigned when the Egresses are deleted. c.newEgressState(egress.Name, egress.Status.EgressIP) @@ -489,12 +580,88 @@ func (c *EgressController) processNextWorkItem() bool { return true } +// installPolicyRoute ensures Egress traffic with the given mark access external network via the subnet's gateway, and +// tagged with the subnet's VLAN ID if present. +func (c *EgressController) installPolicyRoute(ipState *egressIPState, subnetInfo *crdv1b1.SubnetInfo) error { + if !c.supportSeparateSubnet { + return nil + } + if crdv1b1.CompareSubnetInfo(ipState.subnetInfo, subnetInfo, false) { + return nil + } + // Deletes stale policy route first. + if err := c.uninstallPolicyRoute(ipState); err != nil { + return err + } + // If the subnetInfo is nil, policy routing is not needed. The Egress IP should just use the main route table. + if subnetInfo == nil { + return nil + } + // Get or create a route table for this subnet. + rt, exists := c.egressRouteTables[*subnetInfo] + if !exists { + tableID, err := c.tableAllocator.allocate() + if err != nil { + return fmt.Errorf("error allocating table for subnet %v due to exceeding max allowed subnets %d: %w", subnetInfo, maxSubnetsPerNodes, err) + } + // Get the index of the network interface to which IPs in the subnet are assigned. + // The network interface will be used as the device via which the Egress traffic leaves. + devID, ok := c.ipAssigner.GetInterfaceID(subnetInfo) + // This should never happen. + if !ok { + return fmt.Errorf("interface for subnet %v not found", subnetInfo) + } + if err := c.routeClient.AddEgressRoutes(tableID, devID, net.ParseIP(subnetInfo.Gateway), int(subnetInfo.PrefixLength)); err != nil { + return fmt.Errorf("error creating route table for subnet %v: %w", subnetInfo, err) + } + rt = &egressRouteTable{tableID: tableID, marks: sets.New[uint32]()} + c.egressRouteTables[*subnetInfo] = rt + } + // Add an IP rule to make the marked Egress traffic look up the table. + if err := c.routeClient.AddEgressRule(rt.tableID, ipState.mark); err != nil { + return fmt.Errorf("error adding ip rule for mark %v: %w", ipState.mark, err) + } + // Track the route table's usage. + rt.marks.Insert(ipState.mark) + // Track the current subnet of the Egress IP. + ipState.subnetInfo = subnetInfo + return nil +} + +// uninstallPolicyRoute deletes the policy route of the Egress IP. +func (c *EgressController) uninstallPolicyRoute(ipState *egressIPState) error { + if !c.supportSeparateSubnet { + return nil + } + if ipState.subnetInfo == nil { + return nil + } + rt, exists := c.egressRouteTables[*ipState.subnetInfo] + if !exists { + return nil + } + if err := c.routeClient.DeleteEgressRule(rt.tableID, ipState.mark); err != nil { + return fmt.Errorf("error deleting ip rule for mark %v: %w", ipState.mark, err) + } + rt.marks.Delete(ipState.mark) + // Delete the route table If it is not used by any Egress. + if rt.marks.Len() == 0 { + if err := c.routeClient.DeleteEgressRoutes(rt.tableID); err != nil { + return fmt.Errorf("error deleting route table for subnet %v: %w", ipState.subnetInfo, err) + } + c.tableAllocator.release(rt.tableID) + delete(c.egressRouteTables, *ipState.subnetInfo) + } + ipState.subnetInfo = nil + return nil +} + // realizeEgressIP realizes an Egress IP. Multiple Egresses can share the same Egress IP. // If it's called the first time for a local Egress IP, it allocates a locally-unique mark for the IP and installs flows // and iptables rule for this IP and the mark. // If the Egress IP is changed from local to non local, it uninstalls flows and iptables rule and releases the mark. // The method returns the mark on success. Non local Egresses use 0 as the mark. -func (c *EgressController) realizeEgressIP(egressName, egressIP string) (uint32, error) { +func (c *EgressController) realizeEgressIP(egressName, egressIP string, subnetInfo *crdv1b1.SubnetInfo) (uint32, error) { isLocalIP := c.localIPDetector.IsLocalIP(egressIP) c.egressIPStatesMutex.Lock() @@ -516,7 +683,7 @@ func (c *EgressController) realizeEgressIP(egressName, egressIP string) (uint32, if isLocalIP { // Ensure the Egress IP has a mark allocated when it's a local IP. if ipState.mark == 0 { - ipState.mark, err = c.idAllocator.allocate() + ipState.mark, err = c.markAllocator.allocate() if err != nil { return 0, fmt.Errorf("error allocating mark for IP %s: %v", egressIP, err) } @@ -534,8 +701,14 @@ func (c *EgressController) realizeEgressIP(egressName, egressIP string) (uint32, } ipState.ruleInstalled = true } + if err := c.installPolicyRoute(ipState, subnetInfo); err != nil { + return 0, fmt.Errorf("error installing policy route for IP %s: %v", ipState.egressIP, err) + } } else { // Ensure datapath is uninstalled properly. + if err := c.uninstallPolicyRoute(ipState); err != nil { + return 0, fmt.Errorf("error uninstalling policy routing for IP %s: %v", ipState.egressIP, err) + } if ipState.ruleInstalled { if err := c.routeClient.DeleteSNATRule(ipState.mark); err != nil { return 0, fmt.Errorf("error uninstalling SNAT rule for IP %s: %v", ipState.egressIP, err) @@ -549,7 +722,7 @@ func (c *EgressController) realizeEgressIP(egressName, egressIP string) (uint32, ipState.flowsInstalled = false } if ipState.mark != 0 { - err := c.idAllocator.release(ipState.mark) + err := c.markAllocator.release(ipState.mark) if err != nil { return 0, fmt.Errorf("error releasing mark for IP %s: %v", egressIP, err) } @@ -633,6 +806,9 @@ func (c *EgressController) unrealizeEgressIP(egressName, egressIP string) error return nil } if ipState.mark != 0 { + if err := c.uninstallPolicyRoute(ipState); err != nil { + return err + } if ipState.ruleInstalled { if err := c.routeClient.DeleteSNATRule(ipState.mark); err != nil { return err @@ -645,7 +821,7 @@ func (c *EgressController) unrealizeEgressIP(egressName, egressIP string) error } ipState.flowsInstalled = false } - c.idAllocator.release(ipState.mark) + c.markAllocator.release(ipState.mark) } delete(c.egressIPStates, egressIP) return nil @@ -870,11 +1046,19 @@ func (c *EgressController) syncEgress(egressName string) error { eState = c.newEgressState(egressName, desiredEgressIP) } + var subnetInfo *crdv1b1.SubnetInfo if desiredNode == c.nodeName { + if c.supportSeparateSubnet && egress.Spec.ExternalIPPool != "" { + if pool, err := c.externalIPPoolLister.Get(egress.Spec.ExternalIPPool); err != nil { + return err + } else { + subnetInfo = pool.Spec.SubnetInfo + } + } // Ensure the Egress IP is assigned to the system. Force advertising the IP if it was previously assigned to // another Node in the Egress API. This could force refreshing other peers' neighbor cache when the Egress IP is // obtained by this Node and another Node at the same time in some situations, e.g. split brain. - assigned, err := c.ipAssigner.AssignIP(desiredEgressIP, egress.Status.EgressNode != c.nodeName) + assigned, err := c.ipAssigner.AssignIP(desiredEgressIP, subnetInfo, egress.Status.EgressNode != c.nodeName) if err != nil { return err } @@ -893,7 +1077,7 @@ func (c *EgressController) syncEgress(egressName string) error { } // Realize the latest EgressIP and get the desired mark. - mark, err := c.realizeEgressIP(egressName, desiredEgressIP) + mark, err := c.realizeEgressIP(egressName, desiredEgressIP, subnetInfo) if err != nil { return err } diff --git a/pkg/agent/controller/egress/egress_controller_test.go b/pkg/agent/controller/egress/egress_controller_test.go index 256358f0a2d..101edfe3ae8 100644 --- a/pkg/agent/controller/egress/egress_controller_test.go +++ b/pkg/agent/controller/egress/egress_controller_test.go @@ -59,8 +59,11 @@ const ( fakeLocalEgressIP1 = "1.1.1.1" fakeLocalEgressIP2 = "1.1.1.2" fakeRemoteEgressIP1 = "1.1.1.3" + fakeGatewayIP = "1.1.0.1" + fakeGatewayIP2 = "1.1.0.2" fakeNode = "node1" fakeNode2 = "node2" + fakeExternalIPPool = "external-ip-pool" ) var ( @@ -167,6 +170,7 @@ func newFakeController(t *testing.T, initObjects []runtime.Object) *fakeControll crdClient := fakeversioned.NewSimpleClientset(initObjects...) crdInformerFactory := crdinformers.NewSharedInformerFactory(crdClient, 0) egressInformer := crdInformerFactory.Crd().V1beta1().Egresses() + externalIPPoolInformer := crdInformerFactory.Crd().V1beta1().ExternalIPPools() k8sClient := fake.NewSimpleClientset() informerFactory := informers.NewSharedInformerFactory(k8sClient, 0) nodeInformer := informerFactory.Core().V1().Nodes() @@ -191,11 +195,13 @@ func newFakeController(t *testing.T, initObjects []runtime.Object) *fakeControll "eth0", mockCluster, egressInformer, + externalIPPoolInformer, nodeInformer, podUpdateChannel, mockServiceCIDRProvider, 255, true, + true, ) egressController.localIPDetector = localIPDetector return &fakeController{ @@ -214,15 +220,18 @@ func newFakeController(t *testing.T, initObjects []runtime.Object) *fakeControll func TestSyncEgress(t *testing.T) { tests := []struct { - name string - maxEgressIPsPerNode int - existingEgress *crdv1b1.Egress - newEgress *crdv1b1.Egress - existingEgressGroup *cpv1b2.EgressGroup - newEgressGroup *cpv1b2.EgressGroup - newLocalIPs sets.Set[string] - expectedEgresses []*crdv1b1.Egress - expectedCalls func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) + name string + supportSeparateSubnet bool + maxEgressIPsPerNode int + existingExternalIPPool *crdv1b1.ExternalIPPool + existingEgress *crdv1b1.Egress + newExternalIPPool *crdv1b1.ExternalIPPool + newEgress *crdv1b1.Egress + existingEgressGroup *cpv1b2.EgressGroup + newEgressGroup *cpv1b2.EgressGroup + newLocalIPs sets.Set[string] + expectedEgresses []*crdv1b1.Egress + expectedCalls func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) }{ { name: "Local IP becomes non local", @@ -585,7 +594,7 @@ func TestSyncEgress(t *testing.T) { }, newEgress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: fakeExternalIPPool}, }, existingEgressGroup: &cpv1b2.EgressGroup{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, @@ -608,7 +617,7 @@ func TestSyncEgress(t *testing.T) { }, { ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{EgressIP: fakeLocalEgressIP2, EgressNode: fakeNode, Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAssigned, Status: v1.ConditionTrue, Reason: "Assigned", Message: "EgressIP is successfully assigned to EgressNode"}, }}, @@ -620,9 +629,9 @@ func TestSyncEgress(t *testing.T) { mockOFClient.EXPECT().InstallPodSNATFlows(uint32(1), net.ParseIP(fakeLocalEgressIP1), uint32(1)) mockOFClient.EXPECT().InstallPodSNATFlows(uint32(2), net.ParseIP(fakeLocalEgressIP1), uint32(1)) mockRouteClient.EXPECT().AddSNATRule(net.ParseIP(fakeLocalEgressIP1), uint32(1)) - mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP2, true) + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP2, nil, true) // forceAdvertise depends on how fast the Egress status update is reflected in the informer cache, which doesn't really matter. - mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP2, gomock.Any()) + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP2, nil, gomock.Any()) mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP2), uint32(2)) mockOFClient.EXPECT().InstallPodSNATFlows(uint32(3), net.ParseIP(fakeLocalEgressIP2), uint32(2)) mockRouteClient.EXPECT().AddSNATRule(net.ParseIP(fakeLocalEgressIP2), uint32(2)) @@ -633,11 +642,11 @@ func TestSyncEgress(t *testing.T) { maxEgressIPsPerNode: 1, existingEgress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, }, newEgress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: fakeExternalIPPool}, }, existingEgressGroup: &cpv1b2.EgressGroup{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, @@ -655,21 +664,21 @@ func TestSyncEgress(t *testing.T) { expectedEgresses: []*crdv1b1.Egress{ { ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{EgressIP: fakeLocalEgressIP1, EgressNode: fakeNode, Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAssigned, Status: v1.ConditionTrue, Reason: "Assigned", Message: "EgressIP is successfully assigned to EgressNode"}, }}, }, { ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAssigned, Status: v1.ConditionFalse, Reason: "AssignmentError", Message: "Failed to assign the IP to EgressNode: no Node available"}, }}, }, }, expectedCalls: func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) { - mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, true) + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, nil, true) mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP1), uint32(1)) mockOFClient.EXPECT().InstallPodSNATFlows(uint32(1), net.ParseIP(fakeLocalEgressIP1), uint32(1)) mockOFClient.EXPECT().InstallPodSNATFlows(uint32(2), net.ParseIP(fakeLocalEgressIP1), uint32(1)) @@ -681,11 +690,11 @@ func TestSyncEgress(t *testing.T) { maxEgressIPsPerNode: 1, existingEgress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, }, newEgress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, - Spec: crdv1b1.EgressSpec{ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{EgressIP: fakeLocalEgressIP1, EgressNode: fakeNode}, }, existingEgressGroup: &cpv1b2.EgressGroup{ @@ -704,12 +713,12 @@ func TestSyncEgress(t *testing.T) { expectedEgresses: []*crdv1b1.Egress{ { ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, - Spec: crdv1b1.EgressSpec{ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{}, }, }, expectedCalls: func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) { - mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, true) + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, nil, true) mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP1), uint32(1)) mockOFClient.EXPECT().InstallPodSNATFlows(uint32(1), net.ParseIP(fakeLocalEgressIP1), uint32(1)) mockOFClient.EXPECT().InstallPodSNATFlows(uint32(2), net.ParseIP(fakeLocalEgressIP1), uint32(1)) @@ -837,10 +846,241 @@ func TestSyncEgress(t *testing.T) { mockOFClient.EXPECT().InstallEgressQoS(uint32(1), uint32(10000), uint32(20000)) }, }, + { + name: "Add SubnetInfo to ExternalIPPool", + supportSeparateSubnet: true, + existingExternalIPPool: &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uid"}, + Spec: crdv1b1.ExternalIPPoolSpec{ + IPRanges: []crdv1b1.IPRange{{Start: fakeLocalEgressIP1, End: fakeRemoteEgressIP1}}, + }, + }, + existingEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + }, + newExternalIPPool: &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uid"}, + Spec: crdv1b1.ExternalIPPoolSpec{ + IPRanges: []crdv1b1.IPRange{{Start: fakeLocalEgressIP1, End: fakeRemoteEgressIP1}}, + SubnetInfo: &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, + }, + }, + newEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + }, + existingEgressGroup: &cpv1b2.EgressGroup{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + GroupMembers: []cpv1b2.GroupMember{ + {Pod: &cpv1b2.PodReference{Name: "pod1", Namespace: "ns1"}}, + }, + }, + expectedEgresses: []*crdv1b1.Egress{ + { + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + Status: crdv1b1.EgressStatus{EgressIP: fakeLocalEgressIP1, EgressNode: fakeNode, Conditions: []crdv1b1.EgressCondition{ + {Type: crdv1b1.IPAssigned, Status: v1.ConditionTrue, Reason: "Assigned", Message: "EgressIP is successfully assigned to EgressNode"}, + }}, + }, + }, + expectedCalls: func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) { + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, nil, true) + mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockOFClient.EXPECT().InstallPodSNATFlows(uint32(1), net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockRouteClient.EXPECT().AddSNATRule(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, true) + mockIPAssigner.EXPECT().GetInterfaceID(&crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}).Return(20, true) + mockRouteClient.EXPECT().AddEgressRoutes(uint32(101), 20, net.ParseIP(fakeGatewayIP), 16) + mockRouteClient.EXPECT().AddEgressRule(uint32(101), uint32(1)) + + // forceAdvertise depends on how fast the Egress status update is reflected in the informer cache, which doesn't really matter. + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, gomock.Any()) + }, + }, + { + name: "Update SubnetInfo of ExternalIPPool", + supportSeparateSubnet: true, + existingExternalIPPool: &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uid"}, + Spec: crdv1b1.ExternalIPPoolSpec{ + IPRanges: []crdv1b1.IPRange{{Start: fakeLocalEgressIP1, End: fakeRemoteEgressIP1}}, + SubnetInfo: &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, + }, + }, + existingEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + }, + newExternalIPPool: &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uid"}, + Spec: crdv1b1.ExternalIPPoolSpec{ + IPRanges: []crdv1b1.IPRange{{Start: fakeLocalEgressIP1, End: fakeRemoteEgressIP1}}, + SubnetInfo: &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP2, PrefixLength: 16}, + }, + }, + newEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + }, + existingEgressGroup: &cpv1b2.EgressGroup{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + GroupMembers: []cpv1b2.GroupMember{ + {Pod: &cpv1b2.PodReference{Name: "pod1", Namespace: "ns1"}}, + }, + }, + expectedEgresses: []*crdv1b1.Egress{ + { + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + Status: crdv1b1.EgressStatus{EgressIP: fakeLocalEgressIP1, EgressNode: fakeNode, Conditions: []crdv1b1.EgressCondition{ + {Type: crdv1b1.IPAssigned, Status: v1.ConditionTrue, Reason: "Assigned", Message: "EgressIP is successfully assigned to EgressNode"}, + }}, + }, + }, + expectedCalls: func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) { + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, true) + mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockOFClient.EXPECT().InstallPodSNATFlows(uint32(1), net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockRouteClient.EXPECT().AddSNATRule(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockIPAssigner.EXPECT().GetInterfaceID(&crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}).Return(20, true) + mockRouteClient.EXPECT().AddEgressRoutes(uint32(101), 20, net.ParseIP(fakeGatewayIP), 16) + mockRouteClient.EXPECT().AddEgressRule(uint32(101), uint32(1)) + + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP2, PrefixLength: 16}, true) + mockRouteClient.EXPECT().DeleteEgressRule(uint32(101), uint32(1)) + mockRouteClient.EXPECT().DeleteEgressRoutes(uint32(101)) + mockIPAssigner.EXPECT().GetInterfaceID(&crdv1b1.SubnetInfo{Gateway: fakeGatewayIP2, PrefixLength: 16}).Return(30, true) + mockRouteClient.EXPECT().AddEgressRoutes(uint32(101), 30, net.ParseIP(fakeGatewayIP2), 16) + mockRouteClient.EXPECT().AddEgressRule(uint32(101), uint32(1)) + + // forceAdvertise depends on how fast the Egress status update is reflected in the informer cache, which doesn't really matter. + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP2, PrefixLength: 16}, gomock.Any()) + }, + }, + { + name: "Add Egress having same SubnetInfo", + supportSeparateSubnet: true, + existingExternalIPPool: &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uid"}, + Spec: crdv1b1.ExternalIPPoolSpec{ + IPRanges: []crdv1b1.IPRange{{Start: fakeLocalEgressIP1, End: fakeRemoteEgressIP1}}, + SubnetInfo: &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, + }, + }, + existingEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + }, + newEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: fakeExternalIPPool}, + }, + existingEgressGroup: &cpv1b2.EgressGroup{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + GroupMembers: []cpv1b2.GroupMember{ + {Pod: &cpv1b2.PodReference{Name: "pod1", Namespace: "ns1"}}, + }, + }, + newEgressGroup: &cpv1b2.EgressGroup{ + ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, + GroupMembers: []cpv1b2.GroupMember{ + {Pod: &cpv1b2.PodReference{Name: "pod2", Namespace: "ns2"}}, + }, + }, + expectedEgresses: []*crdv1b1.Egress{ + { + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + Status: crdv1b1.EgressStatus{EgressIP: fakeLocalEgressIP1, EgressNode: fakeNode, Conditions: []crdv1b1.EgressCondition{ + {Type: crdv1b1.IPAssigned, Status: v1.ConditionTrue, Reason: "Assigned", Message: "EgressIP is successfully assigned to EgressNode"}, + }}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: fakeExternalIPPool}, + Status: crdv1b1.EgressStatus{EgressIP: fakeLocalEgressIP2, EgressNode: fakeNode, Conditions: []crdv1b1.EgressCondition{ + {Type: crdv1b1.IPAssigned, Status: v1.ConditionTrue, Reason: "Assigned", Message: "EgressIP is successfully assigned to EgressNode"}, + }}, + }, + }, + expectedCalls: func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) { + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, true) + mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockOFClient.EXPECT().InstallPodSNATFlows(uint32(1), net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockRouteClient.EXPECT().AddSNATRule(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockIPAssigner.EXPECT().GetInterfaceID(&crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}).Return(20, true) + mockRouteClient.EXPECT().AddEgressRoutes(uint32(101), 20, net.ParseIP(fakeGatewayIP), 16) + mockRouteClient.EXPECT().AddEgressRule(uint32(101), uint32(1)) + + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP2, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, true) + mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP2), uint32(2)) + mockOFClient.EXPECT().InstallPodSNATFlows(uint32(2), net.ParseIP(fakeLocalEgressIP2), uint32(2)) + mockRouteClient.EXPECT().AddSNATRule(net.ParseIP(fakeLocalEgressIP2), uint32(2)) + mockRouteClient.EXPECT().AddEgressRule(uint32(101), uint32(2)) + + // forceAdvertise depends on how fast the Egress status update is reflected in the informer cache, which doesn't really matter. + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP2, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, gomock.Any()) + }, + }, + { + name: "Remove Egress IP with SubnetInfo ", + supportSeparateSubnet: true, + existingEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + }, + existingExternalIPPool: &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uid"}, + Spec: crdv1b1.ExternalIPPoolSpec{ + IPRanges: []crdv1b1.IPRange{{Start: fakeLocalEgressIP1, End: fakeRemoteEgressIP1}}, + SubnetInfo: &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, + }, + }, + newEgress: &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{ExternalIPPool: fakeExternalIPPool}, + }, + existingEgressGroup: &cpv1b2.EgressGroup{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + GroupMembers: []cpv1b2.GroupMember{ + {Pod: &cpv1b2.PodReference{Name: "pod1", Namespace: "ns1"}}, + }, + }, + expectedEgresses: []*crdv1b1.Egress{ + { + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{ExternalIPPool: fakeExternalIPPool}, + }, + }, + expectedCalls: func(mockOFClient *openflowtest.MockClient, mockRouteClient *routetest.MockInterface, mockIPAssigner *ipassignertest.MockIPAssigner) { + mockIPAssigner.EXPECT().AssignIP(fakeLocalEgressIP1, &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}, true) + mockOFClient.EXPECT().InstallSNATMarkFlows(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockOFClient.EXPECT().InstallPodSNATFlows(uint32(1), net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockRouteClient.EXPECT().AddSNATRule(net.ParseIP(fakeLocalEgressIP1), uint32(1)) + mockIPAssigner.EXPECT().GetInterfaceID(&crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 10}).Return(20, true) + mockRouteClient.EXPECT().AddEgressRoutes(uint32(101), 20, net.ParseIP(fakeGatewayIP), 16) + mockRouteClient.EXPECT().AddEgressRule(uint32(101), uint32(1)) + + mockIPAssigner.EXPECT().UnassignIP(fakeLocalEgressIP1) + mockRouteClient.EXPECT().DeleteEgressRule(uint32(101), uint32(1)) + mockRouteClient.EXPECT().DeleteEgressRoutes(uint32(101)) + mockOFClient.EXPECT().UninstallSNATMarkFlows(uint32(1)) + mockOFClient.EXPECT().UninstallPodSNATFlows(uint32(1)) + mockRouteClient.EXPECT().DeleteSNATRule(uint32(1)) + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c := newFakeController(t, []runtime.Object{tt.existingEgress}) + initObjects := []runtime.Object{tt.existingEgress} + if tt.existingExternalIPPool != nil { + initObjects = append(initObjects, tt.existingExternalIPPool) + } + c := newFakeController(t, initObjects) + c.supportSeparateSubnet = tt.supportSeparateSubnet c.trafficShapingEnabled = true if tt.maxEgressIPsPerNode > 0 { c.egressIPScheduler.maxEgressIPsPerNode = tt.maxEgressIPsPerNode @@ -864,12 +1104,27 @@ func TestSyncEgress(t *testing.T) { } else { c.crdClient.CrdV1beta1().Egresses().Create(context.TODO(), tt.newEgress, metav1.CreateOptions{}) } + if tt.newExternalIPPool != nil { + if tt.existingExternalIPPool != nil && tt.existingExternalIPPool.Name == tt.newExternalIPPool.Name { + c.crdClient.CrdV1beta1().ExternalIPPools().Update(context.TODO(), tt.newExternalIPPool, metav1.UpdateOptions{}) + } else { + c.crdClient.CrdV1beta1().ExternalIPPools().Create(context.TODO(), tt.newExternalIPPool, metav1.CreateOptions{}) + } + } - c.addEgressGroup(tt.newEgressGroup) + if tt.newEgressGroup != nil { + c.addEgressGroup(tt.newEgressGroup) + } if tt.newLocalIPs != nil { c.localIPDetector = &fakeLocalIPDetector{localIPs: tt.newLocalIPs} } assert.NoError(t, wait.Poll(time.Millisecond*100, time.Second, func() (done bool, err error) { + if tt.newExternalIPPool != nil { + pool, _ := c.externalIPPoolLister.Get(tt.newExternalIPPool.Name) + if !reflect.DeepEqual(pool, tt.newExternalIPPool) { + return false, nil + } + } egress, _ := c.egressLister.Get(tt.newEgress.Name) return reflect.DeepEqual(egress, tt.newEgress), nil })) @@ -938,6 +1193,64 @@ func TestPodUpdateShouldSyncEgress(t *testing.T) { c.queue.Done(item) } +func TestExternalIPPoolUpdateShouldSyncEgress(t *testing.T) { + egress1 := &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + } + egress2 := &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: fakeExternalIPPool}, + } + egress3 := &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressC", UID: "uidC"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: "another-pool"}, + } + c := newFakeController(t, []runtime.Object{egress1, egress2, egress3}) + stopCh := make(chan struct{}) + defer close(stopCh) + go c.podUpdateChannel.Run(stopCh) + c.crdInformerFactory.Start(stopCh) + c.informerFactory.Start(stopCh) + c.crdInformerFactory.WaitForCacheSync(stopCh) + c.informerFactory.WaitForCacheSync(stopCh) + + assertItemsInQueue := func(items ...string) { + require.NoError(t, wait.Poll(10*time.Millisecond, time.Second, func() (done bool, err error) { + return c.queue.Len() == len(items), nil + })) + expectedItems := sets.New[string](items...) + for i := 0; i < len(items); i++ { + item, _ := c.queue.Get() + c.queue.Done(item) + expectedItems.Delete(item.(string)) + } + assert.Empty(t, expectedItems) + } + + assertItemsInQueue(egress1.Name, egress2.Name, egress3.Name) + + // Creating the pool with subnetInfo should trigger Egress sync. + externalIPPool := &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uidA"}, + Spec: crdv1b1.ExternalIPPoolSpec{SubnetInfo: &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 2}}, + } + c.crdClient.CrdV1beta1().ExternalIPPools().Create(context.TODO(), externalIPPool, metav1.CreateOptions{}) + assertItemsInQueue(egress1.Name, egress2.Name) + + // Updating the pool's subnetInfo should trigger Egress sync. + updateExternalIPPool := externalIPPool.DeepCopy() + updateExternalIPPool.Spec.SubnetInfo.VLAN = 10 + c.crdClient.CrdV1beta1().ExternalIPPools().Update(context.TODO(), updateExternalIPPool, metav1.UpdateOptions{}) + assertItemsInQueue(egress1.Name, egress2.Name) + + // Updating the pool's annotation should not trigger Egress sync. + updateExternalIPPool = updateExternalIPPool.DeepCopy() + updateExternalIPPool.Annotations = map[string]string{"foo": "bar"} + c.crdClient.CrdV1beta1().ExternalIPPools().Update(context.TODO(), updateExternalIPPool, metav1.UpdateOptions{}) + assertItemsInQueue() +} + func TestSyncOverlappingEgress(t *testing.T) { egress1 := &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, @@ -1163,7 +1476,7 @@ func TestUpdateEgressStatus(t *testing.T) { name: "updating HA Egress with local IP succeeds immediately", egress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA", ResourceVersion: "fake-ResourceVersion"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{ Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAllocated, Status: v1.ConditionTrue, Reason: "Allocated", Message: "EgressIP is successfully allocated"}, @@ -1185,7 +1498,7 @@ func TestUpdateEgressStatus(t *testing.T) { name: "updating HA Egress with remote IP does nothing", egress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA", ResourceVersion: "fake-ResourceVersion"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{ Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAllocated, Status: v1.ConditionTrue, Reason: "Allocated", Message: "EgressIP is successfully allocated"}, @@ -1203,7 +1516,7 @@ func TestUpdateEgressStatus(t *testing.T) { name: "updating HA Egress with schedule error succeeds immediately", egress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA", ResourceVersion: "fake-ResourceVersion"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{ Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAllocated, Status: v1.ConditionTrue, Reason: "Allocated", Message: "EgressIP is successfully allocated"}, @@ -1224,7 +1537,7 @@ func TestUpdateEgressStatus(t *testing.T) { name: "updating HA Egress with schedule error succeeds after one update conflict failure", egress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA", ResourceVersion: "fake-ResourceVersion"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{ Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAllocated, Status: v1.ConditionTrue, Reason: "Allocated", Message: "EgressIP is successfully allocated"}, @@ -1248,7 +1561,7 @@ func TestUpdateEgressStatus(t *testing.T) { name: "updating HA Egress with schedule error does nothing when the Node is not selected to update", egress: &crdv1b1.Egress{ ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA", ResourceVersion: "fake-ResourceVersion"}, - Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: "external-ip-pool"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: fakeExternalIPPool}, Status: crdv1b1.EgressStatus{ Conditions: []crdv1b1.EgressCondition{ {Type: crdv1b1.IPAllocated, Status: v1.ConditionTrue, Reason: "Allocated", Message: "EgressIP is successfully allocated"}, @@ -1604,3 +1917,51 @@ func TestCompareEgressStatus(t *testing.T) { }) } } + +func TestEgressControllerReplaceEgressIPs(t *testing.T) { + c := newFakeController(t, []runtime.Object{ + &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP1, ExternalIPPool: fakeExternalIPPool}, + Status: crdv1b1.EgressStatus{EgressNode: fakeNode, EgressIP: fakeLocalEgressIP1}, + }, + &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressB", UID: "uidB"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeLocalEgressIP2, ExternalIPPool: fakeExternalIPPool}, + Status: crdv1b1.EgressStatus{EgressNode: fakeNode, EgressIP: fakeLocalEgressIP2}, + }, + // Should not be included. + &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressC", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: fakeRemoteEgressIP1, ExternalIPPool: fakeExternalIPPool}, + Status: crdv1b1.EgressStatus{EgressNode: fakeNode2, EgressIP: fakeRemoteEgressIP1}, + }, + &crdv1b1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressD", UID: "uidA"}, + Spec: crdv1b1.EgressSpec{EgressIP: "1.2.3.4", ExternalIPPool: "other-pool"}, + Status: crdv1b1.EgressStatus{EgressNode: fakeNode, EgressIP: "1.2.3.4"}, + }, + &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: fakeExternalIPPool, UID: "pool-uidA"}, + Spec: crdv1b1.ExternalIPPoolSpec{SubnetInfo: &crdv1b1.SubnetInfo{Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 2}}, + }, + &crdv1b1.ExternalIPPool{ + ObjectMeta: metav1.ObjectMeta{Name: "other-pool", UID: "pool-uidB"}, + Spec: crdv1b1.ExternalIPPoolSpec{}, + }, + }) + stopCh := make(chan struct{}) + defer close(stopCh) + go c.podUpdateChannel.Run(stopCh) + c.crdInformerFactory.Start(stopCh) + c.informerFactory.Start(stopCh) + c.crdInformerFactory.WaitForCacheSync(stopCh) + c.informerFactory.WaitForCacheSync(stopCh) + + c.mockIPAssigner.EXPECT().InitIPs(map[string]*crdv1b1.SubnetInfo{ + fakeLocalEgressIP1: {Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 2}, + fakeLocalEgressIP2: {Gateway: fakeGatewayIP, PrefixLength: 16, VLAN: 2}, + "1.2.3.4": nil, + }) + c.replaceEgressIPs() +} diff --git a/pkg/agent/controller/serviceexternalip/controller.go b/pkg/agent/controller/serviceexternalip/controller.go index 55d1c3cde58..290558da961 100644 --- a/pkg/agent/controller/serviceexternalip/controller.go +++ b/pkg/agent/controller/serviceexternalip/controller.go @@ -393,7 +393,7 @@ func (c *ServiceExternalIPController) assignIP(ip string, service apimachineryty c.assignedIPsMutex.Lock() defer c.assignedIPsMutex.Unlock() if _, ok := c.assignedIPs[ip]; !ok { - if _, err := c.ipAssigner.AssignIP(ip, true); err != nil { + if _, err := c.ipAssigner.AssignIP(ip, nil, true); err != nil { return err } c.assignedIPs[ip] = sets.New[string](service.String()) diff --git a/pkg/agent/controller/serviceexternalip/controller_test.go b/pkg/agent/controller/serviceexternalip/controller_test.go index 92ee2375e76..9c5a32a1246 100644 --- a/pkg/agent/controller/serviceexternalip/controller_test.go +++ b/pkg/agent/controller/serviceexternalip/controller_test.go @@ -228,7 +228,7 @@ func TestCreateService(t *testing.T) { serviceToCreate: servicePolicyCluster, healthyNodes: []string{fakeNode1, fakeNode2}, expectedCalls: func(mockIPAssigner *ipassignertest.MockIPAssigner) { - mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP1, true) + mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP1, nil, true) }, expectedExternalIPStates: map[apimachinerytypes.NamespacedName]externalIPState{ keyFor(servicePolicyCluster): { @@ -269,7 +269,7 @@ func TestCreateService(t *testing.T) { serviceToCreate: servicePolicyLocal, healthyNodes: []string{fakeNode1, fakeNode2}, expectedCalls: func(mockIPAssigner *ipassignertest.MockIPAssigner) { - mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP1, true) + mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP1, nil, true) }, expectedExternalIPStates: map[apimachinerytypes.NamespacedName]externalIPState{ keyFor(servicePolicyLocal): { @@ -454,7 +454,7 @@ func TestUpdateService(t *testing.T) { healthyNodes: []string{fakeNode1, fakeNode2}, expectedCalls: func(mockIPAssigner *ipassignertest.MockIPAssigner) { mockIPAssigner.EXPECT().UnassignIP(fakeServiceExternalIP1) - mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP2, true) + mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP2, nil, true) }, expectError: false, }, @@ -472,7 +472,7 @@ func TestUpdateService(t *testing.T) { }, healthyNodes: []string{fakeNode1, fakeNode2}, expectedCalls: func(mockIPAssigner *ipassignertest.MockIPAssigner) { - mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP2, true) + mockIPAssigner.EXPECT().AssignIP(fakeServiceExternalIP2, nil, true) }, expectError: false, }, diff --git a/pkg/agent/ipassigner/ip_assigner.go b/pkg/agent/ipassigner/ip_assigner.go index 13918d798ae..6fcde6781e2 100644 --- a/pkg/agent/ipassigner/ip_assigner.go +++ b/pkg/agent/ipassigner/ip_assigner.go @@ -14,7 +14,9 @@ package ipassigner -import "k8s.io/apimachinery/pkg/util/sets" +import ( + "antrea.io/antrea/pkg/apis/crd/v1beta1" +) // IPAssigner provides methods to assign or unassign IP. type IPAssigner interface { @@ -22,15 +24,17 @@ type IPAssigner interface { // It returns true only in the case when there is no error and the IP provided // was not assigned to the interface before the operation, in all other cases it // returns false. - AssignIP(ip string, forceAdvertise bool) (bool, error) + AssignIP(ip string, subnetInfo *v1beta1.SubnetInfo, forceAdvertise bool) (bool, error) // UnassignIP ensures the provided IP is not assigned to the system. // It returns true only in the case when there is no error and the IP provided // was assigned to the interface before the operation. UnassignIP(ip string) (bool, error) // AssignedIPs return the IPs that are assigned to the system by this IPAssigner. - AssignedIPs() sets.Set[string] + AssignedIPs() map[string]*v1beta1.SubnetInfo // InitIPs ensures the IPs that are assigned to the system match the given IPs. - InitIPs(sets.Set[string]) error + InitIPs(map[string]*v1beta1.SubnetInfo) error + // GetInterfaceID returns the index of the network interface to which IPs in the provided subnet are assigned. + GetInterfaceID(subnetInfo *v1beta1.SubnetInfo) (int, bool) // Run starts the IP assigner. Run(<-chan struct{}) } diff --git a/pkg/agent/ipassigner/ip_assigner_linux.go b/pkg/agent/ipassigner/ip_assigner_linux.go index 49e5e13f5c4..4192ccbfadd 100644 --- a/pkg/agent/ipassigner/ip_assigner_linux.go +++ b/pkg/agent/ipassigner/ip_assigner_linux.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strings" "sync" "github.com/vishvananda/netlink" @@ -31,25 +32,188 @@ import ( "antrea.io/antrea/pkg/agent/util/arping" "antrea.io/antrea/pkg/agent/util/ndp" "antrea.io/antrea/pkg/agent/util/sysctl" + crdv1b1 "antrea.io/antrea/pkg/apis/crd/v1beta1" ) -// ipAssigner creates a dummy device and assigns IPs to it. +// VLAN interfaces created by antrea-agent will be named with the prefix. +// For example, when VLAN ID is 10, the name will be antrea-ext.10. +// It can be used to determine whether it's safe to delete an interface when it's no longer used. +const vlanInterfacePrefix = "antrea-ext." + +// assignee is the unit that IPs are assigned to. All IPs from the same subnet share an assignee. +type assignee struct { + // logicalInterface is the interface IPs should be logically assigned to. It's also used for IP advertisement. + // The field must not be nil. + logicalInterface *net.Interface + // link is used for IP link management and IP address add/del operation. The field can be nil if IPs don't need to + // assigned to an interface physically. + link netlink.Link + // arpResponder is used for ARP responder for IPv4 address. The field should be nil if the interface can respond to + // ARP queries itself. + arpResponder responder.Responder + // ndpResponder is used for NDP responder for IPv6 address. The field should be nil if the interface can respond to + // NDP queries itself. + ndpResponder responder.Responder + // ips tracks IPs that have been assigned to this assignee. + ips sets.Set[string] +} + +// deletable returns whether this assignee can be safely deleted. +func (as *assignee) deletable() bool { + if as.ips.Len() > 0 { + return false + } + // It never has a real link. + if as.link == nil { + return false + } + // Do not delete non VLAN interfaces. + if _, ok := as.link.(*netlink.Vlan); !ok { + return false + } + // Do not delete VLAN interfaces not created by antrea-agent. + if !strings.HasPrefix(as.link.Attrs().Name, vlanInterfacePrefix) { + return false + } + return true +} + +func (as *assignee) destroy() error { + if err := netlink.LinkDel(as.link); err != nil { + return fmt.Errorf("error deleting interface %v", as.link) + } + return nil +} + +func (as *assignee) assign(ip net.IP, subnetInfo *crdv1b1.SubnetInfo) error { + // If there is a real link, add the IP to its address list. + if as.link != nil { + addr := getIPNet(ip, subnetInfo) + if err := netlink.AddrAdd(as.link, &netlink.Addr{IPNet: addr}); err != nil { + if !errors.Is(err, unix.EEXIST) { + return fmt.Errorf("failed to add IP %v to interface %s: %v", addr, as.link.Attrs().Name, err) + } else { + klog.InfoS("IP was already assigned to interface", "ip", ip, "interface", as.link.Attrs().Name) + } + } else { + klog.InfoS("Assigned IP to interface", "ip", ip, "interface", as.link.Attrs().Name) + } + } + + if utilnet.IsIPv4(ip) && as.arpResponder != nil { + if err := as.arpResponder.AddIP(ip); err != nil { + return fmt.Errorf("failed to assign IP %v to ARP responder: %v", ip, err) + } + } + if utilnet.IsIPv6(ip) && as.ndpResponder != nil { + if err := as.ndpResponder.AddIP(ip); err != nil { + return fmt.Errorf("failed to assign IP %v to NDP responder: %v", ip, err) + } + } + // Always advertise the IP when the IP is newly assigned to this Node. + as.advertise(ip) + as.ips.Insert(ip.String()) + return nil +} + +func (as *assignee) advertise(ip net.IP) { + if utilnet.IsIPv4(ip) { + klog.V(2).InfoS("Sending gratuitous ARP", "ip", ip) + if err := arping.GratuitousARPOverIface(ip, as.logicalInterface); err != nil { + klog.ErrorS(err, "Failed to send gratuitous ARP", "ip", ip) + } + } else { + klog.V(2).InfoS("Sending neighbor advertisement", "ip", ip) + if err := ndp.NeighborAdvertisement(ip, as.logicalInterface); err != nil { + klog.ErrorS(err, "Failed to send neighbor advertisement", "ip", ip) + } + } +} + +func (as *assignee) unassign(ip net.IP, subnetInfo *crdv1b1.SubnetInfo) error { + // If there is a real link, delete the IP from its address list. + if as.link != nil { + addr := getIPNet(ip, subnetInfo) + if err := netlink.AddrDel(as.link, &netlink.Addr{IPNet: addr}); err != nil { + if !errors.Is(err, unix.EADDRNOTAVAIL) { + return fmt.Errorf("failed to delete IP %v from interface %s: %v", ip, as.link.Attrs().Name, err) + } else { + klog.InfoS("IP does not exist on interface", "ip", ip, "interface", as.link.Attrs().Name) + } + } + klog.InfoS("Deleted IP from interface", "ip", ip, "interface", as.link.Attrs().Name) + } + + if utilnet.IsIPv4(ip) && as.arpResponder != nil { + if err := as.arpResponder.RemoveIP(ip); err != nil { + return fmt.Errorf("failed to remove IP %v from ARP responder: %v", ip, err) + } + } + if utilnet.IsIPv6(ip) && as.ndpResponder != nil { + if err := as.ndpResponder.RemoveIP(ip); err != nil { + return fmt.Errorf("failed to remove IP %v from NDP responder: %v", ip, err) + } + } + as.ips.Delete(ip.String()) + return nil +} + +func (as *assignee) getVLANID() (int, bool) { + if as.link == nil { + return 0, false + } + vlan, ok := as.link.(*netlink.Vlan) + if !ok { + return 0, false + } + return vlan.VlanId, true +} + +func (as *assignee) loadIPAddresses() (map[string]*crdv1b1.SubnetInfo, error) { + assignedIPs := map[string]*crdv1b1.SubnetInfo{} + addresses, err := netlink.AddrList(as.link, netlink.FAMILY_ALL) + if err != nil { + return nil, err + } + vlanID, isVLAN := as.getVLANID() + for _, address := range addresses { + // Only include global unicast addresses, otherwise addresses like link local ones may be mistakenly deleted. + if address.IP.IsGlobalUnicast() { + // subnetInfo should be nil for the dummy interface. + var subnetInfo *crdv1b1.SubnetInfo + if isVLAN { + prefixLength, _ := address.Mask.Size() + subnetInfo = &crdv1b1.SubnetInfo{ + PrefixLength: int32(prefixLength), + VLAN: int32(vlanID), + } + } + assignedIPs[address.IP.String()] = subnetInfo + as.ips.Insert(address.IP.String()) + } + } + return assignedIPs, nil +} + +// ipAssigner creates dummy/vlan devices and assigns IPs to them. // It's supposed to be used in the cases that external IPs should be configured on the system so that they can be used -// for SNAT (egress scenario) or DNAT (ingress scenario). A dummy device is used because the IPs just need to be present -// in any device to be functional, and using dummy device avoids touching system managed devices and is easy to know IPs -// that are assigned by antrea-agent. +// for SNAT (egress scenario) or DNAT (ingress scenario). +// By default, a dummy device is used because the IPs just need to be present in any device to be functional, and using +// dummy device avoids touching system managed devices and is easy to know IPs that are assigned by antrea-agent. +// If an IP is associated with a VLAN ID, it will be assigned to a vlan device which is a sub-interface of the external +// device for proper VLAN tagging and untagging. type ipAssigner struct { - // externalInterface is the device that GARP (IPv4) and Unsolicited NA (IPv6) will be sent from. + // externalInterface is the device that GARP (IPv4) and Unsolicited NA (IPv6) will eventually be sent from. externalInterface *net.Interface - // dummyDevice is the device that IPs will be assigned to. - dummyDevice netlink.Link - // assignIPs caches the IPs that are assigned to the dummy device. + // defaultAssignee is the assignee that IPs without VLAN tag will be assigned to. + defaultAssignee *assignee + // vlanAssignees contains the vlan-based assignees that IPs with VLAN tag will be assigned to, keyed by VLAN ID. + vlanAssignees map[int32]*assignee + // assignIPs caches the IPs that have been assigned. // TODO: Add a goroutine to ensure that the cache is in sync with the IPs assigned to the dummy device in case the // IPs are removed by users accidentally. - assignedIPs sets.Set[string] - mutex sync.RWMutex - arpResponder responder.Responder - ndpResponder responder.Responder + assignedIPs map[string]*crdv1b1.SubnetInfo + mutex sync.RWMutex } // NewIPAssigner returns an *ipAssigner. @@ -60,7 +224,12 @@ func NewIPAssigner(nodeTransportInterface string, dummyDeviceName string) (IPAss } a := &ipAssigner{ externalInterface: externalInterface, - assignedIPs: sets.New[string](), + assignedIPs: map[string]*crdv1b1.SubnetInfo{}, + defaultAssignee: &assignee{ + logicalInterface: externalInterface, + ips: sets.New[string](), + }, + vlanAssignees: map[int32]*assignee{}, } if ipv4 != nil { // For the Egress scenario, the external IPs should always be present on the dummy @@ -73,30 +242,49 @@ func NewIPAssigner(nodeTransportInterface string, dummyDeviceName string) (IPAss return nil, err } if dummyDeviceName == "" || arpIgnore > 0 { - arpResponder, err := responder.NewARPResponder(externalInterface) + a.defaultAssignee.arpResponder, err = responder.NewARPResponder(externalInterface) if err != nil { return nil, fmt.Errorf("failed to create ARP responder for link %s: %v", externalInterface.Name, err) } - a.arpResponder = arpResponder } } if ipv6 != nil { - ndpResponder, err := responder.NewNDPResponder(externalInterface) + a.defaultAssignee.ndpResponder, err = responder.NewNDPResponder(externalInterface) if err != nil { return nil, fmt.Errorf("failed to create NDP responder for link %s: %v", externalInterface.Name, err) } - a.ndpResponder = ndpResponder } if dummyDeviceName != "" { - dummyDevice, err := ensureDummyDevice(dummyDeviceName) + a.defaultAssignee.link, err = ensureDummyDevice(dummyDeviceName) if err != nil { return nil, fmt.Errorf("error when ensuring dummy device exists: %v", err) } - a.dummyDevice = dummyDevice + } + vlans, err := getVLANInterfaces(externalInterface.Index) + if err != nil { + return nil, fmt.Errorf("error when getting vlan devices: %w", err) + } + for _, vlan := range vlans { + a.addVLANAssignee(vlan, int32(vlan.VlanId)) } return a, nil } +// getVLANInterfaces returns all VLAN sub-interfaces of the given parent interface. +func getVLANInterfaces(parentIndex int) ([]*netlink.Vlan, error) { + links, err := netlink.LinkList() + if err != nil { + return nil, err + } + var vlans []*netlink.Vlan + for _, link := range links { + if vlan, ok := link.(*netlink.Vlan); ok && vlan.ParentIndex == parentIndex { + vlans = append(vlans, vlan) + } + } + return vlans, nil +} + // getARPIgnoreForInterface gets the max value of conf/{all,interface}/arp_ignore form sysctl. func getARPIgnoreForInterface(iface string) (int, error) { arpIgnoreAll, err := sysctl.GetSysctlNet("ipv4/conf/all/arp_ignore") @@ -129,21 +317,33 @@ func ensureDummyDevice(deviceName string) (netlink.Link, error) { return dummy, nil } -// loadIPAddresses gets the IP addresses on the dummy device and caches them in memory. -func (a *ipAssigner) loadIPAddresses() (sets.Set[string], error) { - addresses, err := netlink.AddrList(a.dummyDevice, netlink.FAMILY_ALL) +// loadIPAddresses gets the IP addresses on the default device and the vlan devices. +func (a *ipAssigner) loadIPAddresses() error { + // Load IPs assigned to the default interface. + var err error + a.assignedIPs, err = a.defaultAssignee.loadIPAddresses() if err != nil { - return nil, err + return err } - newAssignIPs := sets.New[string]() - for _, address := range addresses { - newAssignIPs.Insert(address.IP.String()) + // Load IPs assigned to the vlan interfaces. + for _, vlanAssignee := range a.vlanAssignees { + newAssignedIPs, err := vlanAssignee.loadIPAddresses() + if err != nil { + return err + } + for k, v := range newAssignedIPs { + a.assignedIPs[k] = v + } } - return newAssignIPs, nil + return nil } -// AssignIP ensures the provided IP is assigned to the dummy device and the ARP/NDP responders. -func (a *ipAssigner) AssignIP(ip string, forceAdvertise bool) (bool, error) { +// AssignIP ensures the provided IP is assigned to the system and advertised to its neighbors. +// - If subnetInfo is nil or the vlan is 0, the IP will be assigned to the default interface, and its advertisement +// will be sent through the external interface. +// - Otherwise, the IP will be assigned to a corresponding vlan sub-interface of the external interface, and its +// advertisement will be sent through the vlan sub-interface (though via the external interface eventually). +func (a *ipAssigner) AssignIP(ip string, subnetInfo *crdv1b1.SubnetInfo, forceAdvertise bool) (bool, error) { parsedIP := net.ParseIP(ip) if parsedIP == nil { return false, fmt.Errorf("invalid IP %s", ip) @@ -151,58 +351,34 @@ func (a *ipAssigner) AssignIP(ip string, forceAdvertise bool) (bool, error) { a.mutex.Lock() defer a.mutex.Unlock() - if a.assignedIPs.Has(ip) { - klog.V(2).InfoS("The IP is already assigned", "ip", ip) - if forceAdvertise { - a.advertise(parsedIP) - } - return false, nil + as, err := a.getAssignee(subnetInfo, true) + if err != nil { + return false, err } - if a.dummyDevice != nil { - addr := util.NewIPNet(parsedIP) - if err := netlink.AddrAdd(a.dummyDevice, &netlink.Addr{IPNet: addr}); err != nil { - if !errors.Is(err, unix.EEXIST) { - return false, fmt.Errorf("failed to add IP %v to interface %s: %v", ip, a.dummyDevice.Attrs().Name, err) - } else { - klog.InfoS("IP was already assigned to interface", "ip", parsedIP, "interface", a.dummyDevice.Attrs().Name) + oldSubnetInfo, exists := a.assignedIPs[ip] + if exists { + // ipAssigner doesn't care about the gateway. + if crdv1b1.CompareSubnetInfo(subnetInfo, oldSubnetInfo, true) { + klog.V(2).InfoS("The IP is already assigned", "ip", ip) + if forceAdvertise { + as.advertise(parsedIP) } - } else { - klog.InfoS("Assigned IP to interface", "ip", parsedIP, "interface", a.dummyDevice.Attrs().Name) + return false, nil } - } - - if utilnet.IsIPv4(parsedIP) && a.arpResponder != nil { - if err := a.arpResponder.AddIP(parsedIP); err != nil { - return false, fmt.Errorf("failed to assign IP %v to ARP responder: %v", ip, err) - } - } - if utilnet.IsIPv6(parsedIP) && a.ndpResponder != nil { - if err := a.ndpResponder.AddIP(parsedIP); err != nil { - return false, fmt.Errorf("failed to assign IP %v to NDP responder: %v", ip, err) + if err := a.unassign(parsedIP, oldSubnetInfo); err != nil { + return false, err } } - // Always advertise the IP when the IP is newly assigned to this Node. - a.advertise(parsedIP) - a.assignedIPs.Insert(ip) - return true, nil -} -func (a *ipAssigner) advertise(ip net.IP) { - if utilnet.IsIPv4(ip) { - klog.V(2).InfoS("Sending gratuitous ARP", "ip", ip) - if err := arping.GratuitousARPOverIface(ip, a.externalInterface); err != nil { - klog.ErrorS(err, "Failed to send gratuitous ARP", "ip", ip) - } - } else { - klog.V(2).InfoS("Sending neighbor advertisement", "ip", ip) - if err := ndp.NeighborAdvertisement(ip, a.externalInterface); err != nil { - klog.ErrorS(err, "Failed to send neighbor advertisement", "ip", ip) - } + if err := as.assign(parsedIP, subnetInfo); err != nil { + return false, err } + a.assignedIPs[ip] = subnetInfo + return true, nil } -// UnassignIP ensures the provided IP is not assigned to the dummy device. +// UnassignIP ensures the provided IP is not assigned to the dummy/vlan device. func (a *ipAssigner) UnassignIP(ip string) (bool, error) { parsedIP := net.ParseIP(ip) if parsedIP == nil { @@ -211,99 +387,156 @@ func (a *ipAssigner) UnassignIP(ip string) (bool, error) { a.mutex.Lock() defer a.mutex.Unlock() - if !a.assignedIPs.Has(ip) { + subnetInfo, exists := a.assignedIPs[ip] + if !exists { klog.V(2).InfoS("The IP is not assigned", "ip", ip) return false, nil } - - if a.dummyDevice != nil { - addr := util.NewIPNet(parsedIP) - if err := netlink.AddrDel(a.dummyDevice, &netlink.Addr{IPNet: addr}); err != nil { - if !errors.Is(err, unix.EADDRNOTAVAIL) { - return false, fmt.Errorf("failed to delete IP %v from interface %s: %v", ip, a.dummyDevice.Attrs().Name, err) - } else { - klog.InfoS("IP does not exist on interface", "ip", parsedIP, "interface", a.dummyDevice.Attrs().Name) - } - } - klog.InfoS("Deleted IP from interface", "ip", ip, "interface", a.dummyDevice.Attrs().Name) + if err := a.unassign(parsedIP, subnetInfo); err != nil { + return false, err } + return true, nil +} - if utilnet.IsIPv4(parsedIP) && a.arpResponder != nil { - if err := a.arpResponder.RemoveIP(parsedIP); err != nil { - return false, fmt.Errorf("failed to remove IP %v from ARP responder: %v", ip, err) - } +func (a *ipAssigner) unassign(ip net.IP, subnetInfo *crdv1b1.SubnetInfo) error { + as, _ := a.getAssignee(subnetInfo, false) + // The assignee doesn't exist, meaning the IP has been unassigned previously. + if as == nil { + return nil } - if utilnet.IsIPv6(parsedIP) && a.ndpResponder != nil { - if err := a.ndpResponder.RemoveIP(parsedIP); err != nil { - return false, fmt.Errorf("failed to remove IP %v from NDP responder: %v", ip, err) + if err := as.unassign(ip, subnetInfo); err != nil { + return err + } + if as.deletable() { + klog.InfoS("Deleting VLAN sub-interface", "interface", as.logicalInterface.Name, "vlan", subnetInfo.VLAN) + if err := as.destroy(); err != nil { + return err } + delete(a.vlanAssignees, subnetInfo.VLAN) } - - a.assignedIPs.Delete(ip) - return true, nil + delete(a.assignedIPs, ip.String()) + return nil } // AssignedIPs return the IPs that are assigned to the dummy device. -func (a *ipAssigner) AssignedIPs() sets.Set[string] { +func (a *ipAssigner) AssignedIPs() map[string]*crdv1b1.SubnetInfo { a.mutex.RLock() defer a.mutex.RUnlock() // Return a copy. - return a.assignedIPs.Union(nil) + copy := map[string]*crdv1b1.SubnetInfo{} + for k, v := range a.assignedIPs { + copy[k] = v + } + return copy } -// InitIPs loads the IPs from the dummy device and replaces the IPs that are assigned to it +// InitIPs loads the IPs from the dummy/vlan devices and replaces the IPs that are assigned to it // with the given ones. This function also adds the given IPs to the ARP/NDP responder if // applicable. It can be used to recover the IP assigner to the desired state after Agent restarts. -func (a *ipAssigner) InitIPs(ips sets.Set[string]) error { - a.mutex.Lock() - defer a.mutex.Unlock() - if a.dummyDevice != nil { - assigned, err := a.loadIPAddresses() - if err != nil { - return fmt.Errorf("error when loading IP addresses from the system: %v", err) - } - for ip := range ips.Difference(assigned) { - addr := util.NewIPNet(net.ParseIP(ip)) - if err := netlink.AddrAdd(a.dummyDevice, &netlink.Addr{IPNet: addr}); err != nil { - if !errors.Is(err, unix.EEXIST) { - return fmt.Errorf("failed to add IP %v to interface %s: %v", ip, a.dummyDevice.Attrs().Name, err) - } - } - } - for ip := range assigned.Difference(ips) { - addr := util.NewIPNet(net.ParseIP(ip)) - if err := netlink.AddrDel(a.dummyDevice, &netlink.Addr{IPNet: addr}); err != nil { - if !errors.Is(err, unix.EADDRNOTAVAIL) { - return fmt.Errorf("failed to delete IP %v from interface %s: %v", ip, a.dummyDevice.Attrs().Name, err) - } - } - } +func (a *ipAssigner) InitIPs(desired map[string]*crdv1b1.SubnetInfo) error { + if err := a.loadIPAddresses(); err != nil { + return fmt.Errorf("error when loading IP addresses from the system: %v", err) } - for ipStr := range ips { - ip := net.ParseIP(ipStr) - var err error - if utilnet.IsIPv4(ip) && a.arpResponder != nil { - err = a.arpResponder.AddIP(ip) - } - if utilnet.IsIPv6(ip) && a.ndpResponder != nil { - err = a.ndpResponder.AddIP(ip) + staleIPs := sets.StringKeySet(a.assignedIPs) + for ip, desiredSubnetInfo := range desired { + if _, err := a.AssignIP(ip, desiredSubnetInfo, true); err != nil { + return err } - if err != nil { + staleIPs.Delete(ip) + } + for ip := range staleIPs { + if _, err := a.UnassignIP(ip); err != nil { return err } - a.advertise(ip) } - a.assignedIPs = ips.Union(nil) return nil } +func (a *ipAssigner) GetInterfaceID(subnetInfo *crdv1b1.SubnetInfo) (int, bool) { + as, _ := a.getAssignee(subnetInfo, false) + // The assignee doesn't exist, meaning the IP has been unassigned previously. + if as == nil { + return 0, false + } + return as.logicalInterface.Index, true +} + // Run starts the ARP responder and NDP responder. func (a *ipAssigner) Run(ch <-chan struct{}) { - if a.arpResponder != nil { - go a.arpResponder.Run(ch) + if a.defaultAssignee.arpResponder != nil { + go a.defaultAssignee.arpResponder.Run(ch) } - if a.ndpResponder != nil { - go a.ndpResponder.Run(ch) + if a.defaultAssignee.ndpResponder != nil { + go a.defaultAssignee.ndpResponder.Run(ch) } <-ch } + +// getAssignee gets or creates the vlan device for the subnet if it doesn't exist. +func (a *ipAssigner) getAssignee(subnetInfo *crdv1b1.SubnetInfo, createIfNotExist bool) (*assignee, error) { + // Use the default assignee if subnet info is nil or the vlan is not set. + if subnetInfo == nil || subnetInfo.VLAN == 0 { + return a.defaultAssignee, nil + } + if as, exists := a.vlanAssignees[subnetInfo.VLAN]; exists { + return as, nil + } + if !createIfNotExist { + return nil, nil + } + + name := fmt.Sprintf("%s%d", vlanInterfacePrefix, subnetInfo.VLAN) + klog.InfoS("Creating VLAN sub-interface", "interface", name, "parent", a.externalInterface.Name, "vlan", subnetInfo.VLAN) + vlan := &netlink.Vlan{ + LinkAttrs: netlink.LinkAttrs{ + Name: name, + ParentIndex: a.externalInterface.Index, + }, + VlanId: int(subnetInfo.VLAN), + } + if err := netlink.LinkAdd(vlan); err != nil { + if !errors.Is(err, unix.EEXIST) { + return nil, fmt.Errorf("error creating VLAN sub-interface for VLAN %d", subnetInfo.VLAN) + } + } + // Loose mode is needed because incoming traffic received on the interface is expected to be received on the parent + // external interface when looking up the main table. To make it look up the custom table, we will need to restore + // the mark on the reply traffic and turn on src_valid_mark on this interface, which is more complicated. + if err := util.EnsureRPFilterOnInterface(name, 2); err != nil { + return nil, err + } + as, err := a.addVLANAssignee(vlan, subnetInfo.VLAN) + if err != nil { + return nil, err + } + return as, nil +} + +func (a *ipAssigner) addVLANAssignee(link netlink.Link, vlan int32) (*assignee, error) { + if err := netlink.LinkSetUp(link); err != nil { + return nil, fmt.Errorf("error setting up interface %v", link) + } + iface, err := net.InterfaceByName(link.Attrs().Name) + if err != nil { + return nil, err + } + // VLAN interface can answer ARP/NDP directly, no need to create userspace responders. + as := &assignee{ + logicalInterface: iface, + link: link, + ips: sets.New[string](), + } + a.vlanAssignees[vlan] = as + return as, nil +} + +func getIPNet(ip net.IP, subnetInfo *crdv1b1.SubnetInfo) *net.IPNet { + ones, bits := 32, 32 + if ip.To4() == nil { + ones, bits = 128, 128 + } + if subnetInfo != nil { + ones = int(subnetInfo.PrefixLength) + } + return &net.IPNet{IP: ip, Mask: net.CIDRMask(ones, bits)} +} diff --git a/pkg/agent/ipassigner/testing/mock_ipassigner.go b/pkg/agent/ipassigner/testing/mock_ipassigner.go index 3406707fdaf..340a96fbc6b 100644 --- a/pkg/agent/ipassigner/testing/mock_ipassigner.go +++ b/pkg/agent/ipassigner/testing/mock_ipassigner.go @@ -26,8 +26,8 @@ package testing import ( reflect "reflect" + v1beta1 "antrea.io/antrea/pkg/apis/crd/v1beta1" gomock "go.uber.org/mock/gomock" - sets "k8s.io/apimachinery/pkg/util/sets" ) // MockIPAssigner is a mock of IPAssigner interface. @@ -54,25 +54,25 @@ func (m *MockIPAssigner) EXPECT() *MockIPAssignerMockRecorder { } // AssignIP mocks base method. -func (m *MockIPAssigner) AssignIP(arg0 string, arg1 bool) (bool, error) { +func (m *MockIPAssigner) AssignIP(arg0 string, arg1 *v1beta1.SubnetInfo, arg2 bool) (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AssignIP", arg0, arg1) + ret := m.ctrl.Call(m, "AssignIP", arg0, arg1, arg2) ret0, _ := ret[0].(bool) ret1, _ := ret[1].(error) return ret0, ret1 } // AssignIP indicates an expected call of AssignIP. -func (mr *MockIPAssignerMockRecorder) AssignIP(arg0, arg1 any) *gomock.Call { +func (mr *MockIPAssignerMockRecorder) AssignIP(arg0, arg1, arg2 any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AssignIP", reflect.TypeOf((*MockIPAssigner)(nil).AssignIP), arg0, arg1) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AssignIP", reflect.TypeOf((*MockIPAssigner)(nil).AssignIP), arg0, arg1, arg2) } // AssignedIPs mocks base method. -func (m *MockIPAssigner) AssignedIPs() sets.Set[string] { +func (m *MockIPAssigner) AssignedIPs() map[string]*v1beta1.SubnetInfo { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "AssignedIPs") - ret0, _ := ret[0].(sets.Set[string]) + ret0, _ := ret[0].(map[string]*v1beta1.SubnetInfo) return ret0 } @@ -82,8 +82,23 @@ func (mr *MockIPAssignerMockRecorder) AssignedIPs() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AssignedIPs", reflect.TypeOf((*MockIPAssigner)(nil).AssignedIPs)) } +// GetInterfaceID mocks base method. +func (m *MockIPAssigner) GetInterfaceID(arg0 *v1beta1.SubnetInfo) (int, bool) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetInterfaceID", arg0) + ret0, _ := ret[0].(int) + ret1, _ := ret[1].(bool) + return ret0, ret1 +} + +// GetInterfaceID indicates an expected call of GetInterfaceID. +func (mr *MockIPAssignerMockRecorder) GetInterfaceID(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetInterfaceID", reflect.TypeOf((*MockIPAssigner)(nil).GetInterfaceID), arg0) +} + // InitIPs mocks base method. -func (m *MockIPAssigner) InitIPs(arg0 sets.Set[string]) error { +func (m *MockIPAssigner) InitIPs(arg0 map[string]*v1beta1.SubnetInfo) error { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "InitIPs", arg0) ret0, _ := ret[0].(error) diff --git a/pkg/agent/openflow/client_test.go b/pkg/agent/openflow/client_test.go index 11107f32b56..8a4167c199a 100644 --- a/pkg/agent/openflow/client_test.go +++ b/pkg/agent/openflow/client_test.go @@ -1617,7 +1617,7 @@ func Test_client_InstallSNATMarkFlows(t *testing.T) { snatIP: net.ParseIP("192.168.77.100"), trafficShapingEnabled: false, expectedFlows: []string{ - "cookie=0x1040000000000, table=EgressMark, priority=200,ct_state=+new+trk,ip,tun_dst=192.168.77.100 actions=set_field:0x64/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:L2ForwardingCalc", + "cookie=0x1040000000000, table=EgressMark, priority=200,ct_state=+trk,ip,tun_dst=192.168.77.100 actions=set_field:0x64/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:L2ForwardingCalc", }, }, { @@ -1625,7 +1625,7 @@ func Test_client_InstallSNATMarkFlows(t *testing.T) { snatIP: net.ParseIP("fec0:192:168:77::100"), trafficShapingEnabled: false, expectedFlows: []string{ - "cookie=0x1040000000000, table=EgressMark, priority=200,ct_state=+new+trk,ipv6,tun_ipv6_dst=fec0:192:168:77::100 actions=set_field:0x64/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:L2ForwardingCalc", + "cookie=0x1040000000000, table=EgressMark, priority=200,ct_state=+trk,ipv6,tun_ipv6_dst=fec0:192:168:77::100 actions=set_field:0x64/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:L2ForwardingCalc", }, }, { @@ -1683,7 +1683,7 @@ func Test_client_InstallPodSNATFlows(t *testing.T) { trafficShapingEnabled: false, snatMark: uint32(100), expectedFlows: []string{ - "cookie=0x1040000000000, table=EgressMark, priority=200,ct_state=+new+trk,ip,in_port=100 actions=set_field:0x64/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:L2ForwardingCalc", + "cookie=0x1040000000000, table=EgressMark, priority=200,ct_state=+trk,ip,in_port=100 actions=set_field:0x64/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:L2ForwardingCalc", }, }, { diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index fd38572aad0..7f77b81362b 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -2327,11 +2327,10 @@ func (f *featureEgress) snatIPFromTunnelFlow(snatIP net.IP, mark uint32) binding Action().LoadPktMarkRange(mark, snatPktMarkRange). Action().LoadRegMark(ToGatewayRegMark) if f.enableEgressTrafficShaping { - // To apply rate-limit on all traffic, instead of just the first one, remove ct_state=+new. + // To apply rate-limit on all traffic. fb = fb.Action().GotoTable(EgressQoSTable.GetID()) } else { - fb = fb.MatchCTStateNew(true). - Action().GotoStage(stageSwitching) + fb = fb.Action().GotoStage(stageSwitching) } return fb.Done() } @@ -2352,11 +2351,10 @@ func (f *featureEgress) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint Action().LoadPktMarkRange(snatMark, snatPktMarkRange). Action().LoadRegMark(ToGatewayRegMark) if f.enableEgressTrafficShaping { - // To apply rate-limit on all traffic, instead of just the first one, remove ct_state=+new. + // To apply rate-limit on all traffic. fb = fb.Action().GotoTable(EgressQoSTable.GetID()) } else { - fb = fb.MatchCTStateNew(true). - Action().GotoStage(stageSwitching) + fb = fb.Action().GotoStage(stageSwitching) } return fb.Done() } diff --git a/pkg/agent/route/interfaces.go b/pkg/agent/route/interfaces.go index 30b86097bc3..4debd207d25 100644 --- a/pkg/agent/route/interfaces.go +++ b/pkg/agent/route/interfaces.go @@ -59,6 +59,21 @@ type Interface interface { // DeleteSNATRule should delete rule to SNAT outgoing traffic with the mark. DeleteSNATRule(mark uint32) error + // RestoreEgressRoutesAndRules restores the routes and rules configured on the system for Egress to the cache. + RestoreEgressRoutesAndRules(minTableID, maxTableID int) error + + // AddEgressRoutes creates a route table which routes Egress traffic to the provided gateway via the device. + AddEgressRoutes(tableID uint32, dev int, gateway net.IP, prefixLength int) error + + // DeleteEgressRoutes deletes the routes installed by AddEgressRoute. + DeleteEgressRoutes(tableID uint32) error + + // AddEgressRule creates an IP rule which makes Egress traffic with the provided mark look up the specified table. + AddEgressRule(tableID uint32, mark uint32) error + + // DeleteEgressRule deletes the IP rule installed by AddEgressRule. + DeleteEgressRule(tableID uint32, mark uint32) error + // AddNodePort adds configurations when a NodePort Service is created. AddNodePort(nodePortAddresses []net.IP, port uint16, protocol binding.Protocol) error diff --git a/pkg/agent/route/route_linux.go b/pkg/agent/route/route_linux.go index e4c6c4dfcec..2420e4ab6cf 100644 --- a/pkg/agent/route/route_linux.go +++ b/pkg/agent/route/route_linux.go @@ -124,6 +124,8 @@ type Client struct { clusterNodeIPs sync.Map // clusterNodeIP6s stores the IPv6 of all other Nodes in the cluster clusterNodeIP6s sync.Map + // egressRoutes caches ip routes about Egresses. + egressRoutes sync.Map // The latest calculated Service CIDRs can be got from serviceCIDRProvider. serviceCIDRProvider servicecidr.Interface } @@ -233,6 +235,7 @@ type routeKey struct { linkIndex int dst string gw string + tableID int } func (c *Client) syncRoute() error { @@ -250,6 +253,7 @@ func (c *Client) syncRoute() error { linkIndex: r.LinkIndex, dst: r.Dst.String(), gw: r.Gw.String(), + tableID: r.Table, }) } restoreRoute := func(route *netlink.Route) bool { @@ -257,6 +261,7 @@ func (c *Client) syncRoute() error { linkIndex: route.LinkIndex, dst: route.Dst.String(), gw: route.Gw.String(), + tableID: route.Table, }) { return true } @@ -280,6 +285,14 @@ func (c *Client) syncRoute() error { return restoreRoute(route) }) } + c.egressRoutes.Range(func(_, v any) bool { + for _, route := range v.([]*netlink.Route) { + if !restoreRoute(route) { + return false + } + } + return true + }) // These routes are installed automatically by the kernel when the address is configured on // the interface (with "proto kernel"). If these routes are deleted manually by mistake, we // restore them as part of this sync (without "proto kernel"). An alternative would be to @@ -980,6 +993,38 @@ func (c *Client) listIPRoutesOnGW() ([]netlink.Route, error) { return routes, nil } +// RestoreEgressRoutesAndRules simply deletes all IP routes and rules created for Egress for now. +// It may be better to keep the ones whose Egress IPs are still on this Node, but it's a bit hard to achieve it at the +// moment because the marks are not permanent and could change upon restart. +func (c *Client) RestoreEgressRoutesAndRules(minTableID, maxTableID int) error { + klog.InfoS("Restoring IP routes and rules for Egress") + routes, err := c.netlink.RouteList(nil, netlink.FAMILY_ALL) + if err != nil { + return err + } + for i := range routes { + route := routes[i] + // Not routes created for Egress. + if route.Table < minTableID || route.Table > maxTableID { + continue + } + c.netlink.RouteDel(&route) + } + rules, err := c.netlink.RuleList(netlink.FAMILY_ALL) + if err != nil { + return err + } + for i := range rules { + rule := rules[i] + // Not rules created for Egress. + if rule.Table < minTableID || rule.Table > maxTableID { + continue + } + c.netlink.RuleDel(&rule) + } + return nil +} + // getIPv6Gateways returns the IPv6 gateway addresses of the given CIDRs. func getIPv6Gateways(podCIDRs []string) sets.Set[string] { ipv6GWs := sets.New[string]() @@ -1278,6 +1323,88 @@ func (c *Client) DeleteSNATRule(mark uint32) error { return c.iptables.DeleteRule(protocol, iptables.NATTable, antreaPostRoutingChain, c.snatRuleSpec(snatIP, mark)) } +func (c *Client) AddEgressRoutes(tableID uint32, dev int, gateway net.IP, prefixLength int) error { + var dst *net.IPNet + if gateway.To4() != nil { + mask := net.CIDRMask(prefixLength, 32) + dst = &net.IPNet{ + IP: gateway.To4().Mask(mask), + Mask: mask, + } + } else { + mask := net.CIDRMask(prefixLength, 128) + dst = &net.IPNet{ + IP: gateway.Mask(mask), + Mask: mask, + } + } + // Install routes for the subnet, for example: + // tableID=101, dev=eth.10, gateway=172.20.10.1, prefixLength=24 + // $ ip route show table 101 + // 172.20.10.0/24 dev eth0.10 table 101 + // default via 172.20.10.1 dev eth0.10 table 101 + localRoute := &netlink.Route{ + Scope: netlink.SCOPE_LINK, + Dst: dst, + LinkIndex: dev, + Table: int(tableID), + } + defaultRoute := &netlink.Route{ + LinkIndex: dev, + Gw: gateway, + Table: int(tableID), + } + if err := c.netlink.RouteReplace(localRoute); err != nil { + return err + } + if err := c.netlink.RouteReplace(defaultRoute); err != nil { + return err + } + c.egressRoutes.Store(tableID, []*netlink.Route{localRoute, defaultRoute}) + return nil +} + +func (c *Client) DeleteEgressRoutes(tableID uint32) error { + value, exists := c.egressRoutes.Load(tableID) + if !exists { + return nil + } + routes := value.([]*netlink.Route) + for _, route := range routes { + if err := c.netlink.RouteDel(route); err != nil { + if err.Error() != "no such process" { + return err + } + } + } + c.egressRoutes.Delete(tableID) + return nil +} + +func (c *Client) AddEgressRule(tableID uint32, mark uint32) error { + rule := netlink.NewRule() + rule.Table = int(tableID) + rule.Mark = int(mark) + rule.Mask = int(types.SNATIPMarkMask) + if err := c.netlink.RuleAdd(rule); err != nil { + return fmt.Errorf("error adding ip rule %v: %w", rule, err) + } + return nil +} + +func (c *Client) DeleteEgressRule(tableID uint32, mark uint32) error { + rule := netlink.NewRule() + rule.Table = int(tableID) + rule.Mark = int(mark) + rule.Mask = int(types.SNATIPMarkMask) + if err := c.netlink.RuleDel(rule); err != nil { + if err.Error() != "no such process" { + return fmt.Errorf("error deleting ip rule %v: %w", rule, err) + } + } + return nil +} + // addVirtualServiceIPRoute is used to add a route which is used to route the packets whose destination IP is a virtual // IP to Antrea gateway. func (c *Client) addVirtualServiceIPRoute(isIPv6 bool) error { diff --git a/pkg/agent/route/route_linux_test.go b/pkg/agent/route/route_linux_test.go index e9f14ade813..35e999fd0a4 100644 --- a/pkg/agent/route/route_linux_test.go +++ b/pkg/agent/route/route_linux_test.go @@ -59,9 +59,12 @@ func TestSyncRoutes(t *testing.T) { nodeRoute2 := &netlink.Route{Dst: ip.MustParseCIDR("192.168.2.0/24"), Gw: net.ParseIP("1.1.1.2")} serviceRoute1 := &netlink.Route{Dst: ip.MustParseCIDR("169.254.0.253/32"), LinkIndex: 10} serviceRoute2 := &netlink.Route{Dst: ip.MustParseCIDR("169.254.0.252/32"), Gw: net.ParseIP("169.254.0.253")} - mockNetlink.EXPECT().RouteList(nil, netlink.FAMILY_ALL).Return([]netlink.Route{*nodeRoute1, *serviceRoute1}, nil) + egressRoute1 := &netlink.Route{Scope: netlink.SCOPE_LINK, Dst: ip.MustParseCIDR("10.10.10.0/24"), LinkIndex: 10, Table: 101} + egressRoute2 := &netlink.Route{Gw: net.ParseIP("10.10.10.1"), LinkIndex: 10, Table: 101} + mockNetlink.EXPECT().RouteList(nil, netlink.FAMILY_ALL).Return([]netlink.Route{*nodeRoute1, *serviceRoute1, *egressRoute1}, nil) mockNetlink.EXPECT().RouteReplace(nodeRoute2) mockNetlink.EXPECT().RouteReplace(serviceRoute2) + mockNetlink.EXPECT().RouteReplace(egressRoute2) mockNetlink.EXPECT().RouteReplace(&netlink.Route{ LinkIndex: 10, Dst: ip.MustParseCIDR("192.168.0.0/24"), @@ -95,10 +98,49 @@ func TestSyncRoutes(t *testing.T) { c.nodeRoutes.Store("192.168.2.0/24", []*netlink.Route{nodeRoute2}) c.serviceRoutes.Store("169.254.0.253/32", serviceRoute1) c.serviceRoutes.Store("169.254.0.252/32", serviceRoute2) + c.egressRoutes.Store(101, []*netlink.Route{egressRoute1, egressRoute2}) assert.NoError(t, c.syncRoute()) } +func TestRestoreEgressRoutesAndRules(t *testing.T) { + ctrl := gomock.NewController(t) + mockNetlink := netlinktest.NewMockInterface(ctrl) + + // route1 and route2 should be removed + route1 := &netlink.Route{Scope: netlink.SCOPE_LINK, Dst: ip.MustParseCIDR("10.10.10.0/24"), LinkIndex: 10, Table: 101} + route2 := &netlink.Route{Gw: net.ParseIP("10.10.10.1"), LinkIndex: 10, Table: 101} + route3 := &netlink.Route{Dst: ip.MustParseCIDR("192.168.1.0/24"), Gw: net.ParseIP("1.1.1.1")} + route4 := &netlink.Route{Gw: net.ParseIP("192.168.1.1"), LinkIndex: 8} + // rule1 should be removed + rule1 := netlink.NewRule() + rule1.Table = 101 + rule1.Mark = 1 + rule1.Mask = int(types.SNATIPMarkMask) + rule2 := netlink.NewRule() + rule2.Table = 50 + rule2.Mark = 10 + rule2.Mask = int(types.SNATIPMarkMask) + + mockNetlink.EXPECT().RouteList(nil, netlink.FAMILY_ALL).Return([]netlink.Route{*route1, *route2, *route3, *route4}, nil) + mockNetlink.EXPECT().RuleList(netlink.FAMILY_ALL).Return([]netlink.Rule{*rule1, *rule2}, nil) + mockNetlink.EXPECT().RouteDel(route1) + mockNetlink.EXPECT().RouteDel(route2) + mockNetlink.EXPECT().RuleDel(rule1) + c := &Client{ + netlink: mockNetlink, + proxyAll: true, + nodeRoutes: sync.Map{}, + serviceRoutes: sync.Map{}, + nodeConfig: &config.NodeConfig{ + GatewayConfig: &config.GatewayConfig{LinkIndex: 10, IPv4: net.ParseIP("192.168.0.1"), IPv6: net.ParseIP("aabb:ccdd::1")}, + PodIPv4CIDR: ip.MustParseCIDR("192.168.0.0/24"), + PodIPv6CIDR: ip.MustParseCIDR("aabb:ccdd::/64"), + }, + } + assert.NoError(t, c.RestoreEgressRoutesAndRules(101, 120)) +} + func TestSyncIPSet(t *testing.T) { podCIDRStr := "172.16.10.0/24" _, podCIDR, _ := net.ParseCIDR(podCIDRStr) @@ -1721,3 +1763,111 @@ func TestAddAndDeleteNodeIP(t *testing.T) { }) } } + +func TestEgressRoutes(t *testing.T) { + tests := []struct { + name string + tableID uint32 + dev int + gateway net.IP + prefixLength int + expectedCalls func(mockNetlink *netlinktest.MockInterfaceMockRecorder) + }{ + { + name: "IPv4", + tableID: 101, + dev: 10, + gateway: net.ParseIP("1.1.1.1"), + prefixLength: 24, + expectedCalls: func(mockNetlink *netlinktest.MockInterfaceMockRecorder) { + mockNetlink.RouteReplace(&netlink.Route{Dst: ip.MustParseCIDR("1.1.1.0/24"), Scope: netlink.SCOPE_LINK, LinkIndex: 10, Table: 101}) + mockNetlink.RouteReplace(&netlink.Route{Gw: net.ParseIP("1.1.1.1"), LinkIndex: 10, Table: 101}) + + mockNetlink.RouteDel(&netlink.Route{Dst: ip.MustParseCIDR("1.1.1.0/24"), Scope: netlink.SCOPE_LINK, LinkIndex: 10, Table: 101}) + mockNetlink.RouteDel(&netlink.Route{Gw: net.ParseIP("1.1.1.1"), LinkIndex: 10, Table: 101}) + }, + }, + { + name: "IPv6", + tableID: 102, + dev: 11, + gateway: net.ParseIP("1122:3344::5566"), + prefixLength: 80, + expectedCalls: func(mockNetlink *netlinktest.MockInterfaceMockRecorder) { + mockNetlink.RouteReplace(&netlink.Route{Dst: ip.MustParseCIDR("1122:3344::/80"), Scope: netlink.SCOPE_LINK, LinkIndex: 11, Table: 102}) + mockNetlink.RouteReplace(&netlink.Route{Gw: net.ParseIP("1122:3344::5566"), LinkIndex: 11, Table: 102}) + + mockNetlink.RouteDel(&netlink.Route{Dst: ip.MustParseCIDR("1122:3344::/80"), Scope: netlink.SCOPE_LINK, LinkIndex: 11, Table: 102}) + mockNetlink.RouteDel(&netlink.Route{Gw: net.ParseIP("1122:3344::5566"), LinkIndex: 11, Table: 102}) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + mockNetlink := netlinktest.NewMockInterface(ctrl) + c := &Client{ + netlink: mockNetlink, + nodeConfig: nodeConfig, + } + tt.expectedCalls(mockNetlink.EXPECT()) + + assert.NoError(t, c.AddEgressRoutes(tt.tableID, tt.dev, tt.gateway, tt.prefixLength)) + assert.NoError(t, c.DeleteEgressRoutes(tt.tableID)) + c.egressRoutes.Range(func(key, value any) bool { + t.Errorf("The egressRoutes should be empty but contains %v:%v", key, value) + return true + }) + }) + } +} + +func TestEgressRule(t *testing.T) { + tests := []struct { + name string + tableID uint32 + mark uint32 + expectedCalls func(mockNetlink *netlinktest.MockInterfaceMockRecorder) + }{ + { + name: "normal", + tableID: 101, + mark: 1, + expectedCalls: func(mockNetlink *netlinktest.MockInterfaceMockRecorder) { + rule := netlink.NewRule() + rule.Table = 101 + rule.Mark = 1 + rule.Mask = int(types.SNATIPMarkMask) + mockNetlink.RuleAdd(rule) + mockNetlink.RuleDel(rule) + }, + }, + { + name: "not found", + tableID: 101, + mark: 1, + expectedCalls: func(mockNetlink *netlinktest.MockInterfaceMockRecorder) { + rule := netlink.NewRule() + rule.Table = 101 + rule.Mark = 1 + rule.Mask = int(types.SNATIPMarkMask) + mockNetlink.RuleAdd(rule) + mockNetlink.RuleDel(rule).Return(fmt.Errorf("no such process")) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + mockNetlink := netlinktest.NewMockInterface(ctrl) + c := &Client{ + netlink: mockNetlink, + nodeConfig: nodeConfig, + } + tt.expectedCalls(mockNetlink.EXPECT()) + + assert.NoError(t, c.AddEgressRule(tt.tableID, tt.mark)) + assert.NoError(t, c.DeleteEgressRule(tt.tableID, tt.mark)) + }) + } +} diff --git a/pkg/agent/route/route_windows.go b/pkg/agent/route/route_windows.go index 7a67d6475b7..e2d5fb7b977 100644 --- a/pkg/agent/route/route_windows.go +++ b/pkg/agent/route/route_windows.go @@ -573,3 +573,23 @@ func (c *Client) DeleteRouteForLink(dstCIDR *net.IPNet, linkIndex int) error { func (c *Client) ClearConntrackEntryForService(svcIP net.IP, svcPort uint16, endpointIP net.IP, protocol binding.Protocol) error { return errors.New("ClearConntrackEntryForService is not implemented on Windows") } + +func (c *Client) RestoreEgressRoutesAndRules(minTableID, maxTableID int) error { + return errors.New("RestoreEgressRoutesAndRules is not implemented on Windows") +} + +func (c *Client) AddEgressRoutes(tableID uint32, dev int, gateway net.IP, prefixLength int) error { + return errors.New("AddEgressRoutes is not implemented on Windows") +} + +func (c *Client) DeleteEgressRoutes(tableID uint32) error { + return errors.New("DeleteEgressRoutes is not implemented on Windows") +} + +func (c *Client) AddEgressRule(tableID uint32, mark uint32) error { + return errors.New("AddEgressRule is not implemented on Windows") +} + +func (c *Client) DeleteEgressRule(tableID uint32, mark uint32) error { + return errors.New("DeleteEgressRule is not implemented on Windows") +} diff --git a/pkg/agent/route/testing/mock_route.go b/pkg/agent/route/testing/mock_route.go index c6a3e39921f..d6d8a725617 100644 --- a/pkg/agent/route/testing/mock_route.go +++ b/pkg/agent/route/testing/mock_route.go @@ -55,6 +55,34 @@ func (m *MockInterface) EXPECT() *MockInterfaceMockRecorder { return m.recorder } +// AddEgressRoutes mocks base method. +func (m *MockInterface) AddEgressRoutes(arg0 uint32, arg1 int, arg2 net.IP, arg3 int) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddEgressRoutes", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddEgressRoutes indicates an expected call of AddEgressRoutes. +func (mr *MockInterfaceMockRecorder) AddEgressRoutes(arg0, arg1, arg2, arg3 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddEgressRoutes", reflect.TypeOf((*MockInterface)(nil).AddEgressRoutes), arg0, arg1, arg2, arg3) +} + +// AddEgressRule mocks base method. +func (m *MockInterface) AddEgressRule(arg0, arg1 uint32) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddEgressRule", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddEgressRule indicates an expected call of AddEgressRule. +func (mr *MockInterfaceMockRecorder) AddEgressRule(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddEgressRule", reflect.TypeOf((*MockInterface)(nil).AddEgressRule), arg0, arg1) +} + // AddExternalIPRoute mocks base method. func (m *MockInterface) AddExternalIPRoute(arg0 net.IP) error { m.ctrl.T.Helper() @@ -153,6 +181,34 @@ func (mr *MockInterfaceMockRecorder) ClearConntrackEntryForService(arg0, arg1, a return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ClearConntrackEntryForService", reflect.TypeOf((*MockInterface)(nil).ClearConntrackEntryForService), arg0, arg1, arg2, arg3) } +// DeleteEgressRoutes mocks base method. +func (m *MockInterface) DeleteEgressRoutes(arg0 uint32) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteEgressRoutes", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteEgressRoutes indicates an expected call of DeleteEgressRoutes. +func (mr *MockInterfaceMockRecorder) DeleteEgressRoutes(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteEgressRoutes", reflect.TypeOf((*MockInterface)(nil).DeleteEgressRoutes), arg0) +} + +// DeleteEgressRule mocks base method. +func (m *MockInterface) DeleteEgressRule(arg0, arg1 uint32) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteEgressRule", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteEgressRule indicates an expected call of DeleteEgressRule. +func (mr *MockInterfaceMockRecorder) DeleteEgressRule(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteEgressRule", reflect.TypeOf((*MockInterface)(nil).DeleteEgressRule), arg0, arg1) +} + // DeleteExternalIPRoute mocks base method. func (m *MockInterface) DeleteExternalIPRoute(arg0 net.IP) error { m.ctrl.T.Helper() @@ -279,6 +335,20 @@ func (mr *MockInterfaceMockRecorder) Reconcile(arg0 any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Reconcile", reflect.TypeOf((*MockInterface)(nil).Reconcile), arg0) } +// RestoreEgressRoutesAndRules mocks base method. +func (m *MockInterface) RestoreEgressRoutesAndRules(arg0, arg1 int) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RestoreEgressRoutesAndRules", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// RestoreEgressRoutesAndRules indicates an expected call of RestoreEgressRoutesAndRules. +func (mr *MockInterfaceMockRecorder) RestoreEgressRoutesAndRules(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RestoreEgressRoutesAndRules", reflect.TypeOf((*MockInterface)(nil).RestoreEgressRoutesAndRules), arg0, arg1) +} + // Run mocks base method. func (m *MockInterface) Run(arg0 <-chan struct{}) { m.ctrl.T.Helper() diff --git a/pkg/agent/types/marks.go b/pkg/agent/types/net.go similarity index 80% rename from pkg/agent/types/marks.go rename to pkg/agent/types/net.go index 7dd65bc4d77..439b8612b72 100644 --- a/pkg/agent/types/marks.go +++ b/pkg/agent/types/net.go @@ -28,3 +28,11 @@ var ( // SNAT IP for a "Pod -> external" egress packet, that is to be SNAT'd. SNATIPMarkMask = uint32(0xFF) ) + +// IP Route tables +const ( + // MinEgressRouteTable to MaxEgressRouteTable are the route table IDs that can be configured on a Node for Egress traffic. + // Each distinct subnet uses one route table. 20 subnets should be enough. + MinEgressRouteTable = 101 + MaxEgressRouteTable = 120 +) diff --git a/pkg/agent/util/net_linux.go b/pkg/agent/util/net_linux.go index ca0890d1024..80459d737c1 100644 --- a/pkg/agent/util/net_linux.go +++ b/pkg/agent/util/net_linux.go @@ -342,6 +342,11 @@ func EnsureARPAnnounceOnInterface(ifaceName string, value int) error { return sysctl.EnsureSysctlNetValue(path, value) } +func EnsureRPFilterOnInterface(ifaceName string, value int) error { + path := fmt.Sprintf("ipv4/conf/%s/rp_filter", ifaceName) + return sysctl.EnsureSysctlNetValue(path, value) +} + func getRoutesOnInterface(linkIndex int) ([]interface{}, error) { link, err := netlinkUtil.LinkByIndex(linkIndex) if err != nil { diff --git a/pkg/agent/util/netlink/netlink_linux.go b/pkg/agent/util/netlink/netlink_linux.go index ac053fbedee..253f0dd1ddc 100644 --- a/pkg/agent/util/netlink/netlink_linux.go +++ b/pkg/agent/util/netlink/netlink_linux.go @@ -22,6 +22,12 @@ import ( // Interface is created to allow testing. type Interface interface { + RuleAdd(rule *netlink.Rule) error + + RuleDel(rule *netlink.Rule) error + + RuleList(family int) ([]netlink.Rule, error) + RouteReplace(route *netlink.Route) error RouteList(link netlink.Link, family int) ([]netlink.Route, error) diff --git a/pkg/agent/util/netlink/testing/mock_netlink_linux.go b/pkg/agent/util/netlink/testing/mock_netlink_linux.go index 4c79a1efbb6..f61b0276b5c 100644 --- a/pkg/agent/util/netlink/testing/mock_netlink_linux.go +++ b/pkg/agent/util/netlink/testing/mock_netlink_linux.go @@ -340,3 +340,46 @@ func (mr *MockInterfaceMockRecorder) RouteReplace(arg0 any) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RouteReplace", reflect.TypeOf((*MockInterface)(nil).RouteReplace), arg0) } + +// RuleAdd mocks base method. +func (m *MockInterface) RuleAdd(arg0 *netlink.Rule) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RuleAdd", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// RuleAdd indicates an expected call of RuleAdd. +func (mr *MockInterfaceMockRecorder) RuleAdd(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RuleAdd", reflect.TypeOf((*MockInterface)(nil).RuleAdd), arg0) +} + +// RuleDel mocks base method. +func (m *MockInterface) RuleDel(arg0 *netlink.Rule) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RuleDel", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// RuleDel indicates an expected call of RuleDel. +func (mr *MockInterfaceMockRecorder) RuleDel(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RuleDel", reflect.TypeOf((*MockInterface)(nil).RuleDel), arg0) +} + +// RuleList mocks base method. +func (m *MockInterface) RuleList(arg0 int) ([]netlink.Rule, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RuleList", arg0) + ret0, _ := ret[0].([]netlink.Rule) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// RuleList indicates an expected call of RuleList. +func (mr *MockInterfaceMockRecorder) RuleList(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RuleList", reflect.TypeOf((*MockInterface)(nil).RuleList), arg0) +} diff --git a/pkg/apis/crd/v1beta1/types.go b/pkg/apis/crd/v1beta1/types.go index 9e40241400f..f969a4e4ce3 100644 --- a/pkg/apis/crd/v1beta1/types.go +++ b/pkg/apis/crd/v1beta1/types.go @@ -210,6 +210,9 @@ type ExternalIPPool struct { type ExternalIPPoolSpec struct { // The IP ranges of this IP pool, e.g. 10.10.0.0/24, 10.10.10.2-10.10.10.20, 10.10.10.30-10.10.10.30. IPRanges []IPRange `json:"ipRanges"` + // The Subnet info of this IP pool. If set, all IP ranges in the IP pool should share the same subnet attributes. + // Currently, it's only used when an IP is allocated from the pool for Egress, and is ignored otherwise. + SubnetInfo *SubnetInfo `json:"subnetInfo,omitempty"` // The Nodes that the external IPs can be assigned to. If empty, it means all Nodes. NodeSelector metav1.LabelSelector `json:"nodeSelector"` } @@ -224,6 +227,16 @@ type IPRange struct { End string `json:"end,omitempty"` } +// SubnetInfo specifies subnet attributes for IP Range. +type SubnetInfo struct { + // Gateway IP for this subnet, e.g. 10.10.1.1. + Gateway string `json:"gateway"` + // Prefix length for the subnet, e.g. 24. + PrefixLength int32 `json:"prefixLength"` + // VLAN ID for this subnet. Default is 0. Valid value is 0~4094. + VLAN int32 `json:"vlan,omitempty"` +} + type ExternalIPPoolStatus struct { Usage IPPoolUsage `json:"usage,omitempty"` } diff --git a/pkg/apis/crd/v1beta1/util.go b/pkg/apis/crd/v1beta1/util.go index 09c49b696a0..615db552e2c 100644 --- a/pkg/apis/crd/v1beta1/util.go +++ b/pkg/apis/crd/v1beta1/util.go @@ -22,3 +22,18 @@ func GetEgressCondition(conditions []EgressCondition, conditionType EgressCondit } return nil } + +func CompareSubnetInfo(a, b *SubnetInfo, ignoringGateway bool) bool { + if a == nil && b == nil { + return true + } + if a == nil || b == nil { + return false + } + if !ignoringGateway { + if a.Gateway != b.Gateway { + return false + } + } + return a.VLAN == b.VLAN && a.PrefixLength == b.PrefixLength +} diff --git a/pkg/apis/crd/v1beta1/zz_generated.deepcopy.go b/pkg/apis/crd/v1beta1/zz_generated.deepcopy.go index 404f9fb8360..7cde65971e8 100644 --- a/pkg/apis/crd/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/crd/v1beta1/zz_generated.deepcopy.go @@ -649,6 +649,11 @@ func (in *ExternalIPPoolSpec) DeepCopyInto(out *ExternalIPPoolSpec) { *out = make([]IPRange, len(*in)) copy(*out, *in) } + if in.SubnetInfo != nil { + in, out := &in.SubnetInfo, &out.SubnetInfo + *out = new(SubnetInfo) + **out = **in + } in.NodeSelector.DeepCopyInto(&out.NodeSelector) return } @@ -1508,6 +1513,22 @@ func (in *Source) DeepCopy() *Source { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SubnetInfo) DeepCopyInto(out *SubnetInfo) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SubnetInfo. +func (in *SubnetInfo) DeepCopy() *SubnetInfo { + if in == nil { + return nil + } + out := new(SubnetInfo) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TCPHeader) DeepCopyInto(out *TCPHeader) { *out = *in diff --git a/pkg/apiserver/handlers/featuregates/handler_test.go b/pkg/apiserver/handlers/featuregates/handler_test.go index e7a3d9c1f76..d4f812ddcaa 100644 --- a/pkg/apiserver/handlers/featuregates/handler_test.go +++ b/pkg/apiserver/handlers/featuregates/handler_test.go @@ -56,6 +56,7 @@ func Test_getGatesResponse(t *testing.T) { {Component: "agent", Name: "AntreaProxy", Status: "Enabled", Version: "GA"}, {Component: "agent", Name: "CleanupStaleUDPSvcConntrack", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "Egress", Status: egressStatus, Version: "BETA"}, + {Component: "agent", Name: "EgressSeparateSubnet", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "EgressTrafficShaping", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "EndpointSlice", Status: "Enabled", Version: "GA"}, {Component: "agent", Name: "ExternalNode", Status: "Disabled", Version: "ALPHA"}, diff --git a/pkg/apiserver/openapi/zz_generated.openapi.go b/pkg/apiserver/openapi/zz_generated.openapi.go index fa1140155bf..626f6d18b8f 100644 --- a/pkg/apiserver/openapi/zz_generated.openapi.go +++ b/pkg/apiserver/openapi/zz_generated.openapi.go @@ -130,6 +130,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "antrea.io/antrea/pkg/apis/crd/v1beta1.PeerService": schema_pkg_apis_crd_v1beta1_PeerService(ref), "antrea.io/antrea/pkg/apis/crd/v1beta1.Rule": schema_pkg_apis_crd_v1beta1_Rule(ref), "antrea.io/antrea/pkg/apis/crd/v1beta1.Source": schema_pkg_apis_crd_v1beta1_Source(ref), + "antrea.io/antrea/pkg/apis/crd/v1beta1.SubnetInfo": schema_pkg_apis_crd_v1beta1_SubnetInfo(ref), "antrea.io/antrea/pkg/apis/crd/v1beta1.TCPHeader": schema_pkg_apis_crd_v1beta1_TCPHeader(ref), "antrea.io/antrea/pkg/apis/crd/v1beta1.TLSProtocol": schema_pkg_apis_crd_v1beta1_TLSProtocol(ref), "antrea.io/antrea/pkg/apis/crd/v1beta1.Tier": schema_pkg_apis_crd_v1beta1_Tier(ref), @@ -3766,6 +3767,12 @@ func schema_pkg_apis_crd_v1beta1_ExternalIPPoolSpec(ref common.ReferenceCallback }, }, }, + "subnetInfo": { + SchemaProps: spec.SchemaProps{ + Description: "The Subnet info of this IP pool. If set, all IP ranges in the IP pool should share the same subnet attributes. Currently, it's only used when an IP is allocated from the pool for Egress, and is ignored otherwise.", + Ref: ref("antrea.io/antrea/pkg/apis/crd/v1beta1.SubnetInfo"), + }, + }, "nodeSelector": { SchemaProps: spec.SchemaProps{ Description: "The Nodes that the external IPs can be assigned to. If empty, it means all Nodes.", @@ -3778,7 +3785,7 @@ func schema_pkg_apis_crd_v1beta1_ExternalIPPoolSpec(ref common.ReferenceCallback }, }, Dependencies: []string{ - "antrea.io/antrea/pkg/apis/crd/v1beta1.IPRange", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector"}, + "antrea.io/antrea/pkg/apis/crd/v1beta1.IPRange", "antrea.io/antrea/pkg/apis/crd/v1beta1.SubnetInfo", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector"}, } } @@ -5272,6 +5279,43 @@ func schema_pkg_apis_crd_v1beta1_Source(ref common.ReferenceCallback) common.Ope } } +func schema_pkg_apis_crd_v1beta1_SubnetInfo(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "SubnetInfo specifies subnet attributes for IP Range.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "gateway": { + SchemaProps: spec.SchemaProps{ + Description: "Gateway IP for this subnet, e.g. 10.10.1.1.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "prefixLength": { + SchemaProps: spec.SchemaProps{ + Description: "Prefix length for the subnet, e.g. 24.", + Default: 0, + Type: []string{"integer"}, + Format: "int32", + }, + }, + "vlan": { + SchemaProps: spec.SchemaProps{ + Description: "VLAN ID for this subnet. Default is 0. Valid value is 0~4094.", + Type: []string{"integer"}, + Format: "int32", + }, + }, + }, + Required: []string{"gateway", "prefixLength"}, + }, + }, + } +} + func schema_pkg_apis_crd_v1beta1_TCPHeader(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/pkg/controller/externalippool/validate.go b/pkg/controller/externalippool/validate.go index f95ecaf6e6a..6a5719f2e11 100644 --- a/pkg/controller/externalippool/validate.go +++ b/pkg/controller/externalippool/validate.go @@ -17,6 +17,7 @@ package externalippool import ( "encoding/json" "fmt" + "net" admv1 "k8s.io/api/admission/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -24,6 +25,7 @@ import ( "k8s.io/klog/v2" crdv1beta1 "antrea.io/antrea/pkg/apis/crd/v1beta1" + "antrea.io/antrea/pkg/util/ip" ) func (c *ExternalIPPoolController) ValidateExternalIPPool(review *admv1.AdmissionReview) *admv1.AdmissionResponse { @@ -48,12 +50,15 @@ func (c *ExternalIPPoolController) ValidateExternalIPPool(review *admv1.Admissio switch review.Request.Operation { case admv1.Create: - // This shouldn't happen with the webhook configuration we include in the Antrea YAML manifests. klog.V(2).Info("Validating CREATE request for ExternalIPPool") - // Always allow CREATE request. + if msg, allowed = validateIPRangesAndSubnetInfo(newObj.Spec.IPRanges, newObj.Spec.SubnetInfo); !allowed { + break + } case admv1.Update: klog.V(2).Info("Validating UPDATE request for ExternalIPPool") - + if msg, allowed = validateIPRangesAndSubnetInfo(newObj.Spec.IPRanges, newObj.Spec.SubnetInfo); !allowed { + break + } oldIPRangeSet := getIPRangeSet(oldObj.Spec.IPRanges) newIPRangeSet := getIPRangeSet(newObj.Spec.IPRanges) deletedIPRanges := oldIPRangeSet.Difference(newIPRangeSet) @@ -77,6 +82,48 @@ func (c *ExternalIPPoolController) ValidateExternalIPPool(review *admv1.Admissio Result: result, } } + +func validateIPRangesAndSubnetInfo(ipRanges []crdv1beta1.IPRange, subnetInfo *crdv1beta1.SubnetInfo) (string, bool) { + if subnetInfo == nil { + return "", true + } + gatewayIP := net.ParseIP(subnetInfo.Gateway) + var mask net.IPMask + if gatewayIP.To4() != nil { + if subnetInfo.PrefixLength <= 0 || subnetInfo.PrefixLength >= 32 { + return fmt.Sprintf("invalid prefixLength %d", subnetInfo.PrefixLength), false + } + mask = net.CIDRMask(int(subnetInfo.PrefixLength), 32) + } else { + if subnetInfo.PrefixLength <= 0 || subnetInfo.PrefixLength >= 128 { + return fmt.Sprintf("invalid prefixLength %d", subnetInfo.PrefixLength), false + } + mask = net.CIDRMask(int(subnetInfo.PrefixLength), 128) + } + subnet := &net.IPNet{ + IP: gatewayIP.Mask(mask), + Mask: mask, + } + for _, ipRange := range ipRanges { + if ipRange.CIDR != "" { + _, cidr, err := net.ParseCIDR(ipRange.CIDR) + if err != nil { + return err.Error(), false + } + if !ip.IPNetContains(subnet, cidr) { + return fmt.Sprintf("cidr %s must be a strict subset of the subnet", ipRange.CIDR), false + } + } else { + start := net.ParseIP(ipRange.Start) + end := net.ParseIP(ipRange.End) + if !subnet.Contains(start) || !subnet.Contains(end) { + return fmt.Sprintf("IP range %s-%s must be a strict subset of the subnet", ipRange.Start, ipRange.End), false + } + } + } + return "", true +} + func getIPRangeSet(ipRanges []crdv1beta1.IPRange) sets.Set[string] { set := sets.New[string]() for _, ipRange := range ipRanges { diff --git a/pkg/controller/externalippool/validate_test.go b/pkg/controller/externalippool/validate_test.go index fd45823cb14..13422bb6b48 100644 --- a/pkg/controller/externalippool/validate_test.go +++ b/pkg/controller/externalippool/validate_test.go @@ -24,6 +24,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/cache" + + crdv1b1 "antrea.io/antrea/pkg/apis/crd/v1beta1" ) func marshal(object runtime.Object) []byte { @@ -31,6 +33,11 @@ func marshal(object runtime.Object) []byte { return raw } +func mutateExternalIPPool(pool *crdv1b1.ExternalIPPool, mutate func(*crdv1b1.ExternalIPPool)) *crdv1b1.ExternalIPPool { + mutate(pool) + return pool +} + func TestControllerValidateExternalIPPool(t *testing.T) { tests := []struct { name string @@ -38,7 +45,7 @@ func TestControllerValidateExternalIPPool(t *testing.T) { expectedResponse *admv1.AdmissionResponse }{ { - name: "CREATE operation should be allowed", + name: "CREATE operation without SubnetInfo should be allowed", request: &admv1.AdmissionRequest{ Name: "foo", Operation: "CREATE", @@ -46,6 +53,98 @@ func TestControllerValidateExternalIPPool(t *testing.T) { }, expectedResponse: &admv1.AdmissionResponse{Allowed: true}, }, + { + name: "CREATE operation with valid SubnetInfo should be allowed", + request: &admv1.AdmissionRequest{ + Name: "foo", + Operation: "CREATE", + Object: runtime.RawExtension{Raw: marshal(mutateExternalIPPool(newExternalIPPool("foo", "10.10.10.0/24", "", ""), func(pool *crdv1b1.ExternalIPPool) { + pool.Spec.SubnetInfo = &crdv1b1.SubnetInfo{ + Gateway: "10.10.0.1", + PrefixLength: 16, + VLAN: 2, + } + }))}, + }, + expectedResponse: &admv1.AdmissionResponse{Allowed: true}, + }, + { + name: "CREATE operation with invalid SubnetInfo should not be allowed", + request: &admv1.AdmissionRequest{ + Name: "foo", + Operation: "CREATE", + Object: runtime.RawExtension{Raw: marshal(mutateExternalIPPool(newExternalIPPool("foo", "10.10.10.0/24", "", ""), func(pool *crdv1b1.ExternalIPPool) { + pool.Spec.SubnetInfo = &crdv1b1.SubnetInfo{ + Gateway: "10.10.11.1", + PrefixLength: 64, + VLAN: 2, + } + }))}, + }, + expectedResponse: &admv1.AdmissionResponse{ + Allowed: false, + Result: &metav1.Status{ + Message: "invalid prefixLength 64", + }, + }, + }, + { + name: "CREATE operation with unmatched SubnetInfo should not be allowed", + request: &admv1.AdmissionRequest{ + Name: "foo", + Operation: "CREATE", + Object: runtime.RawExtension{Raw: marshal(mutateExternalIPPool(newExternalIPPool("foo", "10.10.10.0/24", "", ""), func(pool *crdv1b1.ExternalIPPool) { + pool.Spec.SubnetInfo = &crdv1b1.SubnetInfo{ + Gateway: "10.10.11.1", + PrefixLength: 24, + VLAN: 2, + } + }))}, + }, + expectedResponse: &admv1.AdmissionResponse{ + Allowed: false, + Result: &metav1.Status{ + Message: "cidr 10.10.10.0/24 must be a strict subset of the subnet", + }, + }, + }, + { + name: "Adding matched SubnetInfo should be allowed", + request: &admv1.AdmissionRequest{ + Name: "foo", + Operation: "UPDATE", + OldObject: runtime.RawExtension{Raw: marshal(newExternalIPPool("foo", "10.10.10.0/24", "10.10.20.1", "10.10.20.2"))}, + Object: runtime.RawExtension{Raw: marshal(mutateExternalIPPool(newExternalIPPool("foo", "10.10.10.0/24", "10.10.20.1", "10.10.20.2"), func(pool *crdv1b1.ExternalIPPool) { + pool.Spec.SubnetInfo = &crdv1b1.SubnetInfo{ + Gateway: "10.10.0.1", + PrefixLength: 16, + VLAN: 2, + } + }))}, + }, + expectedResponse: &admv1.AdmissionResponse{Allowed: true}, + }, + { + name: "Adding unmatched SubnetInfo should not be allowed", + request: &admv1.AdmissionRequest{ + Name: "foo", + Operation: "UPDATE", + OldObject: runtime.RawExtension{Raw: marshal(newExternalIPPool("foo", "10.10.10.0/24", "10.10.20.1", "10.10.20.2"))}, + Object: runtime.RawExtension{Raw: marshal(mutateExternalIPPool(newExternalIPPool("foo", "10.10.10.0/24", "10.10.20.1", "10.10.20.2"), func(pool *crdv1b1.ExternalIPPool) { + pool.Spec.SubnetInfo = &crdv1b1.SubnetInfo{ + Gateway: "10.10.10.1", + PrefixLength: 24, + VLAN: 2, + } + }))}, + }, + expectedResponse: &admv1.AdmissionResponse{ + Allowed: false, + Result: &metav1.Status{ + Message: "IP range 10.10.20.1-10.10.20.2 must be a strict subset of the subnet", + }, + }, + }, { name: "Deleting IPRange should not be allowed", request: &admv1.AdmissionRequest{ diff --git a/pkg/features/antrea_features.go b/pkg/features/antrea_features.go index e2b5cb801c8..7b3730a46f8 100644 --- a/pkg/features/antrea_features.go +++ b/pkg/features/antrea_features.go @@ -146,6 +146,10 @@ const ( // alpha: v1.14 // Enable Egress traffic shaping. EgressTrafficShaping featuregate.Feature = "EgressTrafficShaping" + + // alpha: v1.15 + // Allow users to allocate Egress IPs from a separate subnet different from the default Node subnet. + EgressSeparateSubnet featuregate.Feature = "EgressSeparateSubnet" ) var ( @@ -184,6 +188,7 @@ var ( LoadBalancerModeDSR: {Default: false, PreRelease: featuregate.Alpha}, AdminNetworkPolicy: {Default: false, PreRelease: featuregate.Alpha}, EgressTrafficShaping: {Default: false, PreRelease: featuregate.Alpha}, + EgressSeparateSubnet: {Default: false, PreRelease: featuregate.Alpha}, } // AgentGates consists of all known feature gates for the Antrea Agent. @@ -211,6 +216,7 @@ var ( Traceflow, TrafficControl, EgressTrafficShaping, + EgressSeparateSubnet, ) // ControllerGates consists of all known feature gates for the Antrea Controller. @@ -255,6 +261,7 @@ var ( LoadBalancerModeDSR: {}, CleanupStaleUDPSvcConntrack: {}, EgressTrafficShaping: {}, + EgressSeparateSubnet: {}, } // supportedFeaturesOnExternalNode records the features supported on an external // Node. Antrea Agent checks the enabled features if it is running on an diff --git a/test/e2e/egress_test.go b/test/e2e/egress_test.go index ddb5634948e..075ce134138 100644 --- a/test/e2e/egress_test.go +++ b/test/e2e/egress_test.go @@ -23,6 +23,7 @@ import ( "testing" "time" + "github.com/containernetworking/plugins/pkg/ip" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" @@ -46,6 +47,10 @@ func skipIfEgressDisabled(tb testing.TB) { skipIfFeatureDisabled(tb, features.Egress, true, true) } +func skipIfEgressSeparateSubnetDisabled(tb testing.TB) { + skipIfFeatureDisabled(tb, features.EgressSeparateSubnet, true, false) +} + func TestEgress(t *testing.T) { skipIfHasWindowsNodes(t) skipIfNumNodesLessThan(t, 2) @@ -61,6 +66,7 @@ func TestEgress(t *testing.T) { skipIfEncapModeIsNot(t, data, config.TrafficEncapModeEncap) t.Run("testEgressClientIP", func(t *testing.T) { testEgressClientIP(t, data) }) + t.Run("testEgressClientIPFromVLANSubnet", func(t *testing.T) { testEgressClientIPFromVLANSubnet(t, data) }) t.Run("testEgressCRUD", func(t *testing.T) { testEgressCRUD(t, data) }) t.Run("testEgressUpdateEgressIP", func(t *testing.T) { testEgressUpdateEgressIP(t, data) }) t.Run("testEgressUpdateNodeSelector", func(t *testing.T) { testEgressUpdateNodeSelector(t, data) }) @@ -287,6 +293,156 @@ func testEgressClientIP(t *testing.T, data *TestData) { } } +func testEgressClientIPFromVLANSubnet(t *testing.T, data *TestData) { + skipIfEgressSeparateSubnetDisabled(t) + tests := []struct { + name string + serverIP string + vlanSubnet string + vlanGateway string + vlanID int + }{ + { + name: "ipv4-cluster", + serverIP: externalInfo.externalServerIPv4, + vlanSubnet: externalInfo.vlanSubnetIPv4, + vlanGateway: externalInfo.vlanGatewayIPv4, + vlanID: externalInfo.vlanID, + }, + { + name: "ipv6-cluster", + serverIP: externalInfo.externalServerIPv6, + vlanSubnet: externalInfo.vlanSubnetIPv6, + vlanGateway: externalInfo.vlanGatewayIPv6, + vlanID: externalInfo.vlanID, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.serverIP == "" { + t.Skipf("Skipping test as the server IP is not set") + } + if tt.vlanSubnet == "" { + t.Skipf("Skipping test as the vlan subnet is not set") + } + if utilnet.IsIPv6String(tt.serverIP) { + skipIfNotIPv6Cluster(t) + } else { + skipIfNotIPv4Cluster(t) + } + + clientNode := workerNodeName(1) + clientPod1 := fmt.Sprintf("clientpod1-%s", tt.name) + clientPod2 := fmt.Sprintf("clientpod2-%s", tt.name) + if err := data.createBusyboxPodOnNode(clientPod1, data.testNamespace, clientNode, false); err != nil { + t.Fatalf("Failed to create client Pod %s: %v", clientPod1, err) + } + defer deletePodWrapper(t, data, data.testNamespace, clientPod1) + if err := data.podWaitForRunning(defaultTimeout, clientPod1, data.testNamespace); err != nil { + t.Fatalf("Error when waiting for Pod '%s' to be in the Running state", clientPod1) + } + if err := data.createBusyboxPodOnNode(clientPod2, data.testNamespace, clientNode, false); err != nil { + t.Fatalf("Failed to create applied Pod %s: %v", clientPod2, err) + } + defer deletePodWrapper(t, data, data.testNamespace, clientPod2) + if err := data.podWaitForRunning(defaultTimeout, clientPod2, data.testNamespace); err != nil { + t.Fatalf("Error when waiting for Pod '%s' to be in the Running state", clientPod2) + } + + _, cidr, _ := net.ParseCIDR(tt.vlanSubnet) + prefixLength, _ := cidr.Mask.Size() + ipRange := v1beta1.IPRange{CIDR: tt.vlanSubnet} + subnet := v1beta1.SubnetInfo{ + Gateway: tt.vlanGateway, + PrefixLength: int32(prefixLength), + VLAN: int32(tt.vlanID), + } + pool := data.createExternalIPPool(t, "pool-vlan", ipRange, &subnet, nil, nil) + defer data.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), pool.Name, metav1.DeleteOptions{}) + + // Specify the Egress IP to the next IP of the gateway IP. + egressIP := ip.NextIP(net.ParseIP(tt.vlanGateway)).String() + egress := data.createEgress(t, "egress-vlan", nil, map[string]string{"antrea-e2e": clientPod1}, pool.Name, egressIP, nil) + defer data.crdClient.CrdV1beta1().Egresses().Delete(context.TODO(), egress.Name, metav1.DeleteOptions{}) + // Use Poll to wait the interval before the first run to detect the case that the IP is assigned to any Node + // when it's not supposed to. + err := wait.Poll(500*time.Millisecond, 3*time.Second, func() (done bool, err error) { + egress, err = data.crdClient.CrdV1beta1().Egresses().Get(context.TODO(), egress.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + if !k8s.SemanticIgnoringTime.DeepEqual([]v1beta1.EgressCondition{ + {Type: v1beta1.IPAssigned, Status: v1.ConditionTrue, Reason: "Assigned", Message: "EgressIP is successfully assigned to EgressNode"}, + {Type: v1beta1.IPAllocated, Status: v1.ConditionTrue, Reason: "Allocated", Message: "EgressIP is successfully allocated"}, + }, egress.Status.Conditions) { + return false, nil + } + return true, nil + }) + require.NoError(t, err, "Egress didn't meet expected conditions, current status: %v", egress.Status) + + serverIPStr := tt.serverIP + // By default, Pod will be SNATed to Node IP. + defaultClientIP := workerNodeIPv4(1) + if utilnet.IsIPv6String(tt.serverIP) { + serverIPStr = fmt.Sprintf("[%s]", tt.serverIP) + defaultClientIP = workerNodeIPv6(1) + } + + // getClientIP gets the translated client IP by accessing the API that replies the request's client IP. + getClientIP := func(pod string) (string, string, error) { + url := fmt.Sprintf("%s:8080/clientip", serverIPStr) + return data.runWgetCommandOnBusyboxWithRetry(pod, data.testNamespace, url, 5) + } + + // assertClientIP asserts the Pod is translated to the provided client IP. + assertClientIP := func(pod string, clientIP string) { + var exeErr error + var stdout, stderr string + if err := wait.Poll(100*time.Millisecond, 5*time.Second, func() (done bool, err error) { + stdout, stderr, exeErr = getClientIP(pod) + if exeErr != nil { + return false, nil + } + + // The stdout return is in this format: x.x.x.x:port or [xx:xx:xx::x]:port + host, _, err := net.SplitHostPort(stdout) + if err != nil { + return false, nil + } + return clientIP == host, nil + }); err != nil { + t.Fatalf("Failed to get expected client IP %s for Pod %s, stdout: %s, stderr: %s, err: %v", clientIP, pod, stdout, stderr, exeErr) + } + } + + assertClientIP(clientPod1, egress.Spec.EgressIP) + assertClientIP(clientPod2, defaultClientIP) + + t.Log("Updating the Egress's AppliedTo to clientPod2 only") + egress.Spec.AppliedTo = v1beta1.AppliedTo{ + PodSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"antrea-e2e": clientPod2}, + }, + } + egress, err = data.crdClient.CrdV1beta1().Egresses().Update(context.TODO(), egress, metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("Failed to update Egress %v: %v", egress, err) + } + assertClientIP(clientPod1, defaultClientIP) + assertClientIP(clientPod2, egress.Spec.EgressIP) + + t.Log("Deleting the Egress") + err = data.crdClient.CrdV1beta1().Egresses().Delete(context.TODO(), egress.Name, metav1.DeleteOptions{}) + if err != nil { + t.Fatalf("Failed to delete Egress %v: %v", egress, err) + } + assertClientIP(clientPod1, defaultClientIP) + assertClientIP(clientPod2, defaultClientIP) + }) + } +} + func testEgressCRUD(t *testing.T, data *TestData) { tests := []struct { name string @@ -373,7 +529,7 @@ func testEgressCRUD(t *testing.T, data *TestData) { } else { skipIfNotIPv4Cluster(t) } - pool := data.createExternalIPPool(t, "crud-pool-", tt.ipRange, tt.nodeSelector.MatchExpressions, tt.nodeSelector.MatchLabels) + pool := data.createExternalIPPool(t, "crud-pool-", tt.ipRange, nil, tt.nodeSelector.MatchExpressions, tt.nodeSelector.MatchLabels) defer data.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), pool.Name, metav1.DeleteOptions{}) egress := data.createEgress(t, "crud-egress-", nil, map[string]string{"foo": "bar"}, pool.Name, "", nil) @@ -496,9 +652,9 @@ func testEgressUpdateEgressIP(t *testing.T, data *TestData) { } else { skipIfNotIPv4Cluster(t) } - originalPool := data.createExternalIPPool(t, "originalpool-", tt.originalIPRange, nil, map[string]string{v1.LabelHostname: tt.originalNode}) + originalPool := data.createExternalIPPool(t, "originalpool-", tt.originalIPRange, nil, nil, map[string]string{v1.LabelHostname: tt.originalNode}) defer data.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), originalPool.Name, metav1.DeleteOptions{}) - newPool := data.createExternalIPPool(t, "newpool-", tt.newIPRange, nil, map[string]string{v1.LabelHostname: tt.newNode}) + newPool := data.createExternalIPPool(t, "newpool-", tt.newIPRange, nil, nil, map[string]string{v1.LabelHostname: tt.newNode}) defer data.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), newPool.Name, metav1.DeleteOptions{}) egress := data.createEgress(t, "egress-", nil, map[string]string{"foo": "bar"}, originalPool.Name, "", nil) @@ -646,7 +802,7 @@ func testEgressMigration(t *testing.T, data *TestData, triggerFunc, revertFunc f Values: sets.List(nodeCandidates), }, } - externalIPPoolTwoNodes := data.createExternalIPPool(t, "pool-with-two-nodes-", *ipRange, matchExpressions, nil) + externalIPPoolTwoNodes := data.createExternalIPPool(t, "pool-with-two-nodes-", *ipRange, nil, matchExpressions, nil) defer data.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), externalIPPoolTwoNodes.Name, metav1.DeleteOptions{}) egress := data.createEgress(t, "migration-egress-", nil, map[string]string{"foo": "bar"}, externalIPPoolTwoNodes.Name, "", nil) @@ -859,11 +1015,12 @@ func setupIPNeighborChecker(data *TestData, t *testing.T, observerNode, node1, n return checkIPNeighbor, nil } -func (data *TestData) createExternalIPPool(t *testing.T, generateName string, ipRange v1beta1.IPRange, matchExpressions []metav1.LabelSelectorRequirement, matchLabels map[string]string) *v1beta1.ExternalIPPool { +func (data *TestData) createExternalIPPool(t *testing.T, generateName string, ipRange v1beta1.IPRange, subnet *v1beta1.SubnetInfo, matchExpressions []metav1.LabelSelectorRequirement, matchLabels map[string]string) *v1beta1.ExternalIPPool { pool := &v1beta1.ExternalIPPool{ ObjectMeta: metav1.ObjectMeta{GenerateName: generateName}, Spec: v1beta1.ExternalIPPoolSpec{ - IPRanges: []v1beta1.IPRange{ipRange}, + IPRanges: []v1beta1.IPRange{ipRange}, + SubnetInfo: subnet, NodeSelector: metav1.LabelSelector{ MatchExpressions: matchExpressions, MatchLabels: matchLabels, diff --git a/test/e2e/framework.go b/test/e2e/framework.go index 7973fb7f9d2..8a9919d93f1 100644 --- a/test/e2e/framework.go +++ b/test/e2e/framework.go @@ -188,7 +188,19 @@ type ClusterInfo struct { k8sServicePort int32 } +type ExternalInfo struct { + externalServerIPv4 string + externalServerIPv6 string + + vlanSubnetIPv4 string + vlanGatewayIPv4 string + vlanSubnetIPv6 string + vlanGatewayIPv6 string + vlanID int +} + var clusterInfo ClusterInfo +var externalInfo ExternalInfo type TestOptions struct { providerName string @@ -206,6 +218,10 @@ type TestOptions struct { // deployAntrea determines whether to deploy Antrea before running tests. It requires antrea.yml to be present in // the home directory of the control-plane Node. Note it doesn't affect the tests that redeploy Antrea themselves. deployAntrea bool + + externalServerIPs string + vlanSubnets string + vlanID int } type flowVisibilityTestOptions struct { @@ -466,6 +482,38 @@ func (data *TestData) RunCommandOnNodeExt(nodeName, cmd string, envs map[string] return data.provider.RunCommandOnNodeExt(nodeName, cmd, envs, stdin, sudo) } +func (data *TestData) collectExternalInfo() error { + ips := strings.Split(testOptions.externalServerIPs, ",") + for _, ip := range ips { + parsedIP := net.ParseIP(ip) + if parsedIP == nil { + continue + } + if parsedIP.To4() != nil { + externalInfo.externalServerIPv4 = ip + } else { + externalInfo.externalServerIPv6 = ip + } + } + + subnets := strings.Split(testOptions.vlanSubnets, ",") + for _, subnet := range subnets { + gatewayIP, _, err := net.ParseCIDR(subnet) + if err != nil { + continue + } + if gatewayIP.To4() != nil { + externalInfo.vlanSubnetIPv4 = subnet + externalInfo.vlanGatewayIPv4 = gatewayIP.String() + } else { + externalInfo.vlanSubnetIPv6 = subnet + externalInfo.vlanGatewayIPv6 = gatewayIP.String() + } + } + externalInfo.vlanID = testOptions.vlanID + return nil +} + func (data *TestData) collectClusterInfo() error { // retrieve K8s server version // this needs to be done first, as there may be dependencies on the diff --git a/test/e2e/main_test.go b/test/e2e/main_test.go index a1cd1ca0eb6..d9ae5cd04aa 100644 --- a/test/e2e/main_test.go +++ b/test/e2e/main_test.go @@ -95,6 +95,9 @@ func testMain(m *testing.M) int { flag.StringVar(&testOptions.skipCases, "skip-cases", "", "Key words to skip cases") flag.StringVar(&testOptions.linuxVMs, "linuxVMs", "", "hostname of Linux VMs") flag.StringVar(&testOptions.windowsVMs, "windowsVMs", "", "hostname of Windows VMs") + flag.StringVar(&testOptions.externalServerIPs, "external-server-ips", "", "IP addresses of external server, at most one IP per IP family") + flag.StringVar(&testOptions.vlanSubnets, "vlan-subnets", "", "IP subnets of the VLAN network the Nodes reside in, at most one subnet per IP family") + flag.IntVar(&testOptions.vlanID, "vlan-id", 0, "ID of the VLAN network the Nodes reside in") flag.Parse() cleanupLogging := testOptions.setupLogging() @@ -129,6 +132,9 @@ func testMain(m *testing.M) int { log.Printf("Service IPv6 network: '%s'", clusterInfo.svcV6NetworkCIDR) } log.Printf("Num nodes: %d", clusterInfo.numNodes) + if err := testData.collectExternalInfo(); err != nil { + log.Fatalf("Error when collecting external information: %v", err) + } err = ensureAntreaRunning(testData) if err != nil { log.Fatalf("Error when deploying Antrea: %v", err) diff --git a/test/e2e/service_externalip_test.go b/test/e2e/service_externalip_test.go index f5d69d8b66d..aaae30c6b8f 100644 --- a/test/e2e/service_externalip_test.go +++ b/test/e2e/service_externalip_test.go @@ -207,7 +207,7 @@ func testServiceExternalTrafficPolicyLocal(t *testing.T, data *TestData) { var err error var service *v1.Service var eps *v1.Endpoints - ipPool := data.createExternalIPPool(t, "test-service-pool-", tt.ipRange, tt.nodeSelector.MatchExpressions, tt.nodeSelector.MatchLabels) + ipPool := data.createExternalIPPool(t, "test-service-pool-", tt.ipRange, nil, tt.nodeSelector.MatchExpressions, tt.nodeSelector.MatchLabels) defer data.crdClient.CrdV1alpha2().ExternalIPPools().Delete(context.TODO(), ipPool.Name, metav1.DeleteOptions{}) annotation := map[string]string{ @@ -324,7 +324,7 @@ func testServiceWithExternalIPCRUD(t *testing.T, data *TestData) { } var err error var service *v1.Service - ipPool := data.createExternalIPPool(t, "crud-pool-", tt.ipRange, tt.nodeSelector.MatchExpressions, tt.nodeSelector.MatchLabels) + ipPool := data.createExternalIPPool(t, "crud-pool-", tt.ipRange, nil, tt.nodeSelector.MatchExpressions, tt.nodeSelector.MatchLabels) defer data.crdClient.CrdV1alpha2().ExternalIPPools().Delete(context.TODO(), ipPool.Name, metav1.DeleteOptions{}) annotation := map[string]string{ @@ -414,9 +414,9 @@ func testServiceUpdateExternalIP(t *testing.T, data *TestData) { skipIfNotIPv4Cluster(t) } - originalPool := data.createExternalIPPool(t, "originalpool-", tt.originalIPRange, nil, map[string]string{v1.LabelHostname: tt.originalNode}) + originalPool := data.createExternalIPPool(t, "originalpool-", tt.originalIPRange, nil, nil, map[string]string{v1.LabelHostname: tt.originalNode}) defer data.crdClient.CrdV1alpha2().ExternalIPPools().Delete(context.TODO(), originalPool.Name, metav1.DeleteOptions{}) - newPool := data.createExternalIPPool(t, "newpool-", tt.newIPRange, nil, map[string]string{v1.LabelHostname: tt.newNode}) + newPool := data.createExternalIPPool(t, "newpool-", tt.newIPRange, nil, nil, map[string]string{v1.LabelHostname: tt.newNode}) defer data.crdClient.CrdV1alpha2().ExternalIPPools().Delete(context.TODO(), newPool.Name, metav1.DeleteOptions{}) annotation := map[string]string{ @@ -499,7 +499,7 @@ func testServiceNodeFailure(t *testing.T, data *TestData) { Values: sets.List(nodeCandidates), }, } - externalIPPoolTwoNodes := data.createExternalIPPool(t, "pool-with-two-nodes-", tt.ipRange, matchExpressions, nil) + externalIPPoolTwoNodes := data.createExternalIPPool(t, "pool-with-two-nodes-", tt.ipRange, nil, matchExpressions, nil) defer data.crdClient.CrdV1alpha2().ExternalIPPools().Delete(context.TODO(), externalIPPoolTwoNodes.Name, metav1.DeleteOptions{}) annotation := map[string]string{ antreaagenttypes.ServiceExternalIPPoolAnnotationKey: externalIPPoolTwoNodes.Name, @@ -573,7 +573,7 @@ func testExternalIPAccess(t *testing.T, data *TestData) { } nodes := []string{nodeName(0), nodeName(1)} ipRange := v1beta1.IPRange{CIDR: tt.externalIPCIDR} - ipPool := data.createExternalIPPool(t, "ippool-", ipRange, nil, nil) + ipPool := data.createExternalIPPool(t, "ippool-", ipRange, nil, nil, nil) defer data.crdClient.CrdV1alpha2().ExternalIPPools().Delete(context.TODO(), ipPool.Name, metav1.DeleteOptions{}) agnhosts := []string{"agnhost-0", "agnhost-1"} // Create agnhost Pods on each Node. diff --git a/test/integration/agent/ip_assigner_linux_test.go b/test/integration/agent/ip_assigner_linux_test.go index 8c161f40756..1016502753e 100644 --- a/test/integration/agent/ip_assigner_linux_test.go +++ b/test/integration/agent/ip_assigner_linux_test.go @@ -25,6 +25,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "antrea.io/antrea/pkg/agent/ipassigner" + crdv1b1 "antrea.io/antrea/pkg/apis/crd/v1beta1" ) const dummyDeviceName = "antrea-dummy0" @@ -40,16 +41,21 @@ func TestIPAssigner(t *testing.T) { require.NoError(t, err, "Failed to find the dummy device") defer netlink.LinkDel(dummyDevice) - _, err = ipAssigner.AssignIP("x", false) + _, err = ipAssigner.AssignIP("x", nil, false) assert.Error(t, err, "Assigning an invalid IP should fail") ip1 := "10.10.10.10" ip2 := "10.10.10.11" ip3 := "2021:124:6020:1006:250:56ff:fea7:36c2" - desiredIPs := sets.New[string](ip1, ip2, ip3) - - for ip := range desiredIPs { - _, errAssign := ipAssigner.AssignIP(ip, false) + ip1VLAN20 := "10.10.20.10" + ip2VLAN20 := "10.10.20.11" + ip1VLAN30 := "10.10.30.10" + subnet20 := &crdv1b1.SubnetInfo{PrefixLength: 24, VLAN: 20} + subnet30 := &crdv1b1.SubnetInfo{PrefixLength: 24, VLAN: 30} + desiredIPs := map[string]*crdv1b1.SubnetInfo{ip1: nil, ip2: nil, ip3: nil, ip1VLAN20: subnet20, ip2VLAN20: subnet20, ip1VLAN30: subnet30} + + for ip, subnetInfo := range desiredIPs { + _, errAssign := ipAssigner.AssignIP(ip, subnetInfo, false) cmd := exec.Command("ip", "addr") out, err := cmd.CombinedOutput() if err != nil { @@ -60,33 +66,53 @@ func TestIPAssigner(t *testing.T) { assert.Equal(t, desiredIPs, ipAssigner.AssignedIPs(), "Assigned IPs don't match") + vlan20Device, err := netlink.LinkByName("antrea-ext.20") + require.NoError(t, err, "Failed to find the VLAN 20 device") + defer netlink.LinkDel(vlan20Device) + vlan30Device, err := netlink.LinkByName("antrea-ext.30") + require.NoError(t, err, "Failed to find the VLAN 30 device") + defer netlink.LinkDel(vlan30Device) + actualIPs, err := listIPAddresses(dummyDevice) require.NoError(t, err, "Failed to list IP addresses") - assert.Equal(t, desiredIPs, actualIPs, "Actual IPs don't match") + assert.Equal(t, sets.New[string](fmt.Sprintf("%s/32", ip1), fmt.Sprintf("%s/32", ip2), fmt.Sprintf("%s/128", ip3)), actualIPs, "Actual IPs don't match") + actualIPs, err = listIPAddresses(vlan20Device) + require.NoError(t, err, "Failed to list IP addresses") + assert.Equal(t, sets.New[string](fmt.Sprintf("%s/%d", ip1VLAN20, subnet20.PrefixLength), fmt.Sprintf("%s/%d", ip2VLAN20, subnet20.PrefixLength)), actualIPs, "Actual IPs don't match") + actualIPs, err = listIPAddresses(vlan30Device) + require.NoError(t, err, "Failed to list IP addresses") + assert.Equal(t, sets.New[string](fmt.Sprintf("%s/%d", ip1VLAN30, subnet30.PrefixLength)), actualIPs, "Actual IPs don't match") newIPAssigner, err := ipassigner.NewIPAssigner(nodeLinkName, dummyDeviceName) require.NoError(t, err, "Initializing new IP assigner failed") - assert.Equal(t, sets.New[string](), newIPAssigner.AssignedIPs(), "Assigned IPs don't match") + assert.Equal(t, map[string]*crdv1b1.SubnetInfo{}, newIPAssigner.AssignedIPs(), "Assigned IPs don't match") ip4 := "2021:124:6020:1006:250:56ff:fea7:36c4" - newDesiredIPs := sets.New[string](ip1, ip2, ip4) + newDesiredIPs := map[string]*crdv1b1.SubnetInfo{ip1: nil, ip2: nil, ip4: nil, ip1VLAN20: subnet20} err = newIPAssigner.InitIPs(newDesiredIPs) require.NoError(t, err, "InitIPs failed") assert.Equal(t, newDesiredIPs, newIPAssigner.AssignedIPs(), "Assigned IPs don't match") actualIPs, err = listIPAddresses(dummyDevice) require.NoError(t, err, "Failed to list IP addresses") - assert.Equal(t, newDesiredIPs, actualIPs, "Actual IPs don't match") + assert.Equal(t, sets.New[string](fmt.Sprintf("%s/32", ip1), fmt.Sprintf("%s/32", ip2), fmt.Sprintf("%s/128", ip4)), actualIPs, "Actual IPs don't match") + actualIPs, err = listIPAddresses(vlan20Device) + require.NoError(t, err, "Failed to list IP addresses") + assert.Equal(t, sets.New[string](fmt.Sprintf("%s/%d", ip1VLAN20, subnet20.PrefixLength)), actualIPs, "Actual IPs don't match") + _, err = netlink.LinkByName("antrea-ext.30") + require.Error(t, err, "VLAN 30 device should be deleted but was not") for ip := range newDesiredIPs { _, err = newIPAssigner.UnassignIP(ip) assert.NoError(t, err, "Failed to unassign a valid IP") } - assert.Equal(t, sets.New[string](), newIPAssigner.AssignedIPs(), "Assigned IPs don't match") + assert.Equal(t, map[string]*crdv1b1.SubnetInfo{}, newIPAssigner.AssignedIPs(), "Assigned IPs don't match") actualIPs, err = listIPAddresses(dummyDevice) require.NoError(t, err, "Failed to list IP addresses") assert.Equal(t, sets.New[string](), actualIPs, "Actual IPs don't match") + _, err = netlink.LinkByName("antrea-ext.20") + require.Error(t, err, "VLAN 20 device should be deleted but was not") } func listIPAddresses(device netlink.Link) (sets.Set[string], error) { @@ -96,7 +122,9 @@ func listIPAddresses(device netlink.Link) (sets.Set[string], error) { } addresses := sets.New[string]() for _, addr := range addrList { - addresses.Insert(addr.IP.String()) + if addr.IP.IsGlobalUnicast() { + addresses.Insert(addr.IPNet.String()) + } } return addresses, nil } diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index 210145441df..feed0be4863 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -1699,7 +1699,7 @@ func expectedExternalFlows(ipProtoStr, gwMACStr string) []expectTableFlows { } func prepareEgressMarkFlows(snatIP net.IP, mark, podOFPort, podOFPortRemote uint32, vMAC, localGwMAC net.HardwareAddr, trafficShaping bool) []expectTableFlows { - var ipProtoStr, tunDstFieldName, nextTableName, ctStateMatch string + var ipProtoStr, tunDstFieldName, nextTableName string if snatIP.To4() != nil { tunDstFieldName = "tun_dst" ipProtoStr = "ip" @@ -1709,21 +1709,19 @@ func prepareEgressMarkFlows(snatIP net.IP, mark, podOFPort, podOFPortRemote uint } if trafficShaping { nextTableName = "EgressQoS" - ctStateMatch = "+trk" } else { nextTableName = "L2ForwardingCalc" - ctStateMatch = "+new+trk" } return []expectTableFlows{ { "EgressMark", []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,ct_state=%s,%s,%s=%s", ctStateMatch, ipProtoStr, tunDstFieldName, snatIP), + MatchStr: fmt.Sprintf("priority=200,ct_state=+trk,%s,%s=%s", ipProtoStr, tunDstFieldName, snatIP), ActStr: fmt.Sprintf("set_field:0x%x/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:%s", mark, nextTableName), }, { - MatchStr: fmt.Sprintf("priority=200,ct_state=%s,%s,in_port=%d", ctStateMatch, ipProtoStr, podOFPort), + MatchStr: fmt.Sprintf("priority=200,ct_state=+trk,%s,in_port=%d", ipProtoStr, podOFPort), ActStr: fmt.Sprintf("set_field:0x%x/0xff->pkt_mark,set_field:0x20/0xf0->reg0,goto_table:%s", mark, nextTableName), }, {