From 34cb15e3b1d0dab68f490bcf64ca4d9ff66a338b Mon Sep 17 00:00:00 2001 From: Quan Tian Date: Thu, 4 Jan 2024 12:57:29 +0800 Subject: [PATCH] Address comments Signed-off-by: Quan Tian --- ci/kind/kind-setup.sh | 13 +- docs/egress.md | 8 +- .../controller/egress/egress_controller.go | 2 +- pkg/agent/ipassigner/ip_assigner_linux.go | 9 +- pkg/agent/route/interfaces.go | 2 +- pkg/agent/route/route_linux.go | 2 +- test/e2e/egress_test.go | 168 +++++++----------- test/e2e/framework.go | 4 +- 8 files changed, 92 insertions(+), 116 deletions(-) diff --git a/ci/kind/kind-setup.sh b/ci/kind/kind-setup.sh index 05c2341b129..7258a24c44c 100755 --- a/ci/kind/kind-setup.sh +++ b/ci/kind/kind-setup.sh @@ -71,7 +71,7 @@ where: bridge network created by kind. --vlan-subnets: specifies the subnets of the VLAN to which all Nodes will be connected, in addition to the primary network. The IP expression of the subnet will be used as the gateway IP. For example, '--vlan-subnets 10.100.100.1/24' means - 10.100.100.1/24 will be assigned to the VLAN sub-interface of the network. + that a VLAN sub-interface will be created on the primary Docker bridge, and it will be assigned the 10.100.100.1/24 address. --vlan-id: specifies the ID of the VLAN to which all Nodes will be connected, in addition to the primary network. Note, '--vlan-subnets' and '--vlan-id' must be specified together. --extra-networks: an extra network creates a separate Docker bridge network (named 'antrea-') with the assigned @@ -256,7 +256,7 @@ function configure_vlan_subnets { docker_run_with_host_net ip link set $vlan_interface up IFS=',' read -r -a vlan_subnets <<< "$VLAN_SUBNETS" for s in "${vlan_subnets[@]}" ; do - echo "configuring extra IP $s to vlan interface $vlan_interface" + echo "Configuring extra IP $s to vlan interface $vlan_interface" docker_run_with_host_net ip addr add dev $vlan_interface $s done docker_run_with_host_net iptables -t filter -A FORWARD -i $bridge_interface -o $vlan_interface -j ACCEPT @@ -274,9 +274,9 @@ function delete_vlan_subnets { for interface in $found_vlan_interfaces ; do if [[ $interface =~ ${vlan_interface_prefix}[0-9]+@${bridge_interface} ]]; then interface_name=${interface%@*} - docker_run_with_host_net ip link del $interface_name docker_run_with_host_net iptables -t filter -D FORWARD -i $bridge_interface -o $interface_name -j ACCEPT || true docker_run_with_host_net iptables -t filter -D FORWARD -o $bridge_interface -i $interface_name -j ACCEPT || true + docker_run_with_host_net ip link del $interface_name fi done } @@ -643,6 +643,13 @@ if [[ $ACTION == "destroy" ]]; then exit 0 fi +if [[ -n "$VLAN_SUBNETS" || -n "$VLAN_ID" ]]; then + if [[ -z "$VLAN_SUBNETS" || -z "$VLAN_ID" ]]; then + echoerr "'--vlan-subnets' and '--vlan-id' must be specified together" + exit 1 + fi +fi + kind_version=$(kind version | awk '{print $2}') kind_version=${kind_version:1} # strip leading 'v' function version_lt() { test "$(printf '%s\n' "$@" | sort -rV | head -n 1)" != "$1"; } diff --git a/docs/egress.md b/docs/egress.md index e1f717be99e..f3c263c3614 100644 --- a/docs/egress.md +++ b/docs/egress.md @@ -201,9 +201,9 @@ of this IP pool. Each IP range may consist of a `cidr` or a pair of `start` and ### SubnetInfo -By default, it's assumed that the IPs allocated from the pool are in the same -subnet as the Node IPs. Starting with Antrea v1.15, IPs can be allocated from a -subnet different from the Node IPs. +By default, it's assumed that the IPs allocated from an ExternalIPPool are in +the same subnet as the Node IPs. Starting with Antrea v1.15, IPs can be +allocated from a subnet different from the Node IPs. The optional `subnetInfo` field contains the subnet attributes of the IPs in this pool. When using a different subnet: @@ -218,7 +218,7 @@ specified VLAN ID. Correspondingly, it's expected that reply traffic towards these Egress IPs is also tagged with the specified VLAN ID when arriving at the Egress Node. -An example of ExternalIPPool using a different subnet is as below: +An example of ExternalIPPool using a non-default subnet is as below: ```yaml apiVersion: crd.antrea.io/v1beta1 diff --git a/pkg/agent/controller/egress/egress_controller.go b/pkg/agent/controller/egress/egress_controller.go index 32d44fa8c3e..523584bec1d 100644 --- a/pkg/agent/controller/egress/egress_controller.go +++ b/pkg/agent/controller/egress/egress_controller.go @@ -644,7 +644,7 @@ func (c *EgressController) uninstallPolicyRoute(ipState *egressIPState) error { return fmt.Errorf("error deleting ip rule for mark %v: %w", ipState.mark, err) } rt.marks.Delete(ipState.mark) - // Delete the route table If it is not used by any Egress. + // Delete the route table if it is not used by any Egress. if rt.marks.Len() == 0 { if err := c.routeClient.DeleteEgressRoutes(rt.tableID); err != nil { return fmt.Errorf("error deleting route table for subnet %v: %w", ipState.subnetInfo, err) diff --git a/pkg/agent/ipassigner/ip_assigner_linux.go b/pkg/agent/ipassigner/ip_assigner_linux.go index 4192ccbfadd..6256bae61b2 100644 --- a/pkg/agent/ipassigner/ip_assigner_linux.go +++ b/pkg/agent/ipassigner/ip_assigner_linux.go @@ -40,13 +40,13 @@ import ( // It can be used to determine whether it's safe to delete an interface when it's no longer used. const vlanInterfacePrefix = "antrea-ext." -// assignee is the unit that IPs are assigned to. All IPs from the same subnet share an assignee. +// assignee is the unit that IPs are assigned to. All IPs from the same VLAN share an assignee. type assignee struct { // logicalInterface is the interface IPs should be logically assigned to. It's also used for IP advertisement. // The field must not be nil. logicalInterface *net.Interface // link is used for IP link management and IP address add/del operation. The field can be nil if IPs don't need to - // assigned to an interface physically. + // be assigned to an interface physically. link netlink.Link // arpResponder is used for ARP responder for IPv4 address. The field should be nil if the interface can respond to // ARP queries itself. @@ -80,7 +80,7 @@ func (as *assignee) deletable() bool { func (as *assignee) destroy() error { if err := netlink.LinkDel(as.link); err != nil { - return fmt.Errorf("error deleting interface %v", as.link) + return fmt.Errorf("error deleting interface %v: %w", as.link, err) } return nil } @@ -433,6 +433,7 @@ func (a *ipAssigner) AssignedIPs() map[string]*crdv1b1.SubnetInfo { // InitIPs loads the IPs from the dummy/vlan devices and replaces the IPs that are assigned to it // with the given ones. This function also adds the given IPs to the ARP/NDP responder if // applicable. It can be used to recover the IP assigner to the desired state after Agent restarts. +// It's not thread-safe and should only be called once for initialization before calling other methods. func (a *ipAssigner) InitIPs(desired map[string]*crdv1b1.SubnetInfo) error { if err := a.loadIPAddresses(); err != nil { return fmt.Errorf("error when loading IP addresses from the system: %v", err) @@ -453,6 +454,8 @@ func (a *ipAssigner) InitIPs(desired map[string]*crdv1b1.SubnetInfo) error { } func (a *ipAssigner) GetInterfaceID(subnetInfo *crdv1b1.SubnetInfo) (int, bool) { + a.mutex.RLock() + defer a.mutex.RUnlock() as, _ := a.getAssignee(subnetInfo, false) // The assignee doesn't exist, meaning the IP has been unassigned previously. if as == nil { diff --git a/pkg/agent/route/interfaces.go b/pkg/agent/route/interfaces.go index 4debd207d25..fd0d725d5c7 100644 --- a/pkg/agent/route/interfaces.go +++ b/pkg/agent/route/interfaces.go @@ -59,7 +59,7 @@ type Interface interface { // DeleteSNATRule should delete rule to SNAT outgoing traffic with the mark. DeleteSNATRule(mark uint32) error - // RestoreEgressRoutesAndRules restores the routes and rules configured on the system for Egress to the cache. + // RestoreEgressRoutesAndRules restores the routes and rules configured on the system for Egresses to the cache. RestoreEgressRoutesAndRules(minTableID, maxTableID int) error // AddEgressRoutes creates a route table which routes Egress traffic to the provided gateway via the device. diff --git a/pkg/agent/route/route_linux.go b/pkg/agent/route/route_linux.go index 2420e4ab6cf..dd37f23d421 100644 --- a/pkg/agent/route/route_linux.go +++ b/pkg/agent/route/route_linux.go @@ -993,7 +993,7 @@ func (c *Client) listIPRoutesOnGW() ([]netlink.Route, error) { return routes, nil } -// RestoreEgressRoutesAndRules simply deletes all IP routes and rules created for Egress for now. +// RestoreEgressRoutesAndRules simply deletes all IP routes and rules created for Egresses for now. // It may be better to keep the ones whose Egress IPs are still on this Node, but it's a bit hard to achieve it at the // moment because the marks are not permanent and could change upon restart. func (c *Client) RestoreEgressRoutesAndRules(minTableID, maxTableID int) error { diff --git a/test/e2e/egress_test.go b/test/e2e/egress_test.go index 075ce134138..c108d9d8174 100644 --- a/test/e2e/egress_test.go +++ b/test/e2e/egress_test.go @@ -154,57 +154,10 @@ func testEgressClientIP(t *testing.T, data *TestData) { serverIPStr = fmt.Sprintf("[%s]", tt.serverIP) } - // getClientIP gets the translated client IP by accessing the API that replies the request's client IP. - getClientIP := func(pod string) (string, string, error) { - url := fmt.Sprintf("%s:8080/clientip", serverIPStr) - return data.runWgetCommandOnBusyboxWithRetry(pod, data.testNamespace, url, 5) - } - - // assertClientIP asserts the Pod is translated to the provided client IP. - assertClientIP := func(pod string, clientIPs ...string) { - var exeErr error - var stdout, stderr string - if err := wait.Poll(100*time.Millisecond, 5*time.Second, func() (done bool, err error) { - stdout, stderr, exeErr = getClientIP(pod) - if exeErr != nil { - return false, nil - } - - // The stdout return is in this format: x.x.x.x:port or [xx:xx:xx::x]:port - host, _, err := net.SplitHostPort(stdout) - if err != nil { - return false, nil - } - for _, cip := range clientIPs { - if cip == host { - return true, nil - } - } - return false, nil - }); err != nil { - t.Fatalf("Failed to get expected client IPs %s for Pod %s, stdout: %s, stderr: %s, err: %v", clientIPs, pod, stdout, stderr, exeErr) - } - } - - // assertConnError asserts the Pod is not able to access the API that replies the request's client IP. - assertConnError := func(pod string) { - var exeErr error - var stdout, stderr string - if err := wait.Poll(100*time.Millisecond, 2*time.Second, func() (done bool, err error) { - stdout, stderr, exeErr = getClientIP(pod) - if exeErr != nil { - return true, nil - } - return false, nil - }); err != nil { - t.Fatalf("Failed to get expected error, stdout: %v, stderr: %v, err: %v", stdout, stderr, exeErr) - } - } - // As the fake server runs in a netns of the Egress Node, only egress Node can reach the server, Pods running on // other Nodes cannot reach it before Egress is added. - assertClientIP(localPod, tt.localIP0, tt.localIP1) - assertConnError(remotePod) + assertClientIP(data, t, localPod, busyboxContainerName, serverIPStr, tt.localIP0, tt.localIP1) + assertConnError(data, t, remotePod, busyboxContainerName, serverIPStr) t.Logf("Creating an Egress applying to all e2e Pods") matchExpressions := []metav1.LabelSelectorRequirement{ @@ -215,8 +168,8 @@ func testEgressClientIP(t *testing.T, data *TestData) { } egress := data.createEgress(t, "egress-", matchExpressions, nil, "", egressNodeIP, nil) defer data.crdClient.CrdV1beta1().Egresses().Delete(context.TODO(), egress.Name, metav1.DeleteOptions{}) - assertClientIP(localPod, egressNodeIP) - assertClientIP(remotePod, egressNodeIP) + assertClientIP(data, t, localPod, busyboxContainerName, serverIPStr, egressNodeIP) + assertClientIP(data, t, remotePod, busyboxContainerName, serverIPStr, egressNodeIP) var err error err = wait.Poll(time.Millisecond*100, time.Second, func() (bool, error) { @@ -257,8 +210,8 @@ func testEgressClientIP(t *testing.T, data *TestData) { if err != nil { t.Fatalf("Failed to update Egress %v: %v", egress, err) } - assertClientIP(localPod, tt.localIP0, tt.localIP1) - assertClientIP(remotePod, egressNodeIP) + assertClientIP(data, t, localPod, busyboxContainerName, serverIPStr, tt.localIP0, tt.localIP1) + assertClientIP(data, t, remotePod, busyboxContainerName, serverIPStr, egressNodeIP) t.Log("Updating the Egress's AppliedTo to localPod only") egress.Spec.AppliedTo = v1beta1.AppliedTo{ @@ -270,8 +223,8 @@ func testEgressClientIP(t *testing.T, data *TestData) { if err != nil { t.Fatalf("Failed to update Egress %v: %v", egress, err) } - assertClientIP(localPod, egressNodeIP) - assertConnError(remotePod) + assertClientIP(data, t, localPod, busyboxContainerName, serverIPStr, egressNodeIP) + assertConnError(data, t, remotePod, busyboxContainerName, serverIPStr) t.Logf("Updating the Egress's EgressIP to %s", tt.localIP1) egress.Spec.EgressIP = tt.localIP1 @@ -279,16 +232,16 @@ func testEgressClientIP(t *testing.T, data *TestData) { if err != nil { t.Fatalf("Failed to update Egress %v: %v", egress, err) } - assertClientIP(localPod, tt.localIP1) - assertConnError(remotePod) + assertClientIP(data, t, localPod, busyboxContainerName, serverIPStr, tt.localIP1) + assertConnError(data, t, remotePod, busyboxContainerName, serverIPStr) t.Log("Deleting the Egress") err = data.crdClient.CrdV1beta1().Egresses().Delete(context.TODO(), egress.Name, metav1.DeleteOptions{}) if err != nil { t.Fatalf("Failed to delete Egress %v: %v", egress, err) } - assertClientIP(localPod, tt.localIP0, tt.localIP1) - assertConnError(remotePod) + assertClientIP(data, t, localPod, busyboxContainerName, serverIPStr, tt.localIP0, tt.localIP1) + assertConnError(data, t, remotePod, busyboxContainerName, serverIPStr) }) } } @@ -334,14 +287,14 @@ func testEgressClientIPFromVLANSubnet(t *testing.T, data *TestData) { clientNode := workerNodeName(1) clientPod1 := fmt.Sprintf("clientpod1-%s", tt.name) clientPod2 := fmt.Sprintf("clientpod2-%s", tt.name) - if err := data.createBusyboxPodOnNode(clientPod1, data.testNamespace, clientNode, false); err != nil { + if err := data.createToolboxPodOnNode(clientPod1, data.testNamespace, clientNode, false); err != nil { t.Fatalf("Failed to create client Pod %s: %v", clientPod1, err) } defer deletePodWrapper(t, data, data.testNamespace, clientPod1) if err := data.podWaitForRunning(defaultTimeout, clientPod1, data.testNamespace); err != nil { t.Fatalf("Error when waiting for Pod '%s' to be in the Running state", clientPod1) } - if err := data.createBusyboxPodOnNode(clientPod2, data.testNamespace, clientNode, false); err != nil { + if err := data.createToolboxPodOnNode(clientPod2, data.testNamespace, clientNode, false); err != nil { t.Fatalf("Failed to create applied Pod %s: %v", clientPod2, err) } defer deletePodWrapper(t, data, data.testNamespace, clientPod2) @@ -349,9 +302,11 @@ func testEgressClientIPFromVLANSubnet(t *testing.T, data *TestData) { t.Fatalf("Error when waiting for Pod '%s' to be in the Running state", clientPod2) } + gatewayIP := net.ParseIP(tt.vlanGateway) _, cidr, _ := net.ParseCIDR(tt.vlanSubnet) prefixLength, _ := cidr.Mask.Size() - ipRange := v1beta1.IPRange{CIDR: tt.vlanSubnet} + // We need only 1 Egress IP, set the range to include the next IP of the gateway IP. + ipRange := v1beta1.IPRange{Start: ip.NextIP(gatewayIP).String(), End: ip.NextIP(gatewayIP).String()} subnet := v1beta1.SubnetInfo{ Gateway: tt.vlanGateway, PrefixLength: int32(prefixLength), @@ -360,13 +315,9 @@ func testEgressClientIPFromVLANSubnet(t *testing.T, data *TestData) { pool := data.createExternalIPPool(t, "pool-vlan", ipRange, &subnet, nil, nil) defer data.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), pool.Name, metav1.DeleteOptions{}) - // Specify the Egress IP to the next IP of the gateway IP. - egressIP := ip.NextIP(net.ParseIP(tt.vlanGateway)).String() - egress := data.createEgress(t, "egress-vlan", nil, map[string]string{"antrea-e2e": clientPod1}, pool.Name, egressIP, nil) + egress := data.createEgress(t, "egress-vlan", nil, map[string]string{"antrea-e2e": clientPod1}, pool.Name, "", nil) defer data.crdClient.CrdV1beta1().Egresses().Delete(context.TODO(), egress.Name, metav1.DeleteOptions{}) - // Use Poll to wait the interval before the first run to detect the case that the IP is assigned to any Node - // when it's not supposed to. - err := wait.Poll(500*time.Millisecond, 3*time.Second, func() (done bool, err error) { + err := wait.PollImmediate(500*time.Millisecond, 3*time.Second, func() (done bool, err error) { egress, err = data.crdClient.CrdV1beta1().Egresses().Get(context.TODO(), egress.Name, metav1.GetOptions{}) if err != nil { return false, err @@ -389,35 +340,8 @@ func testEgressClientIPFromVLANSubnet(t *testing.T, data *TestData) { defaultClientIP = workerNodeIPv6(1) } - // getClientIP gets the translated client IP by accessing the API that replies the request's client IP. - getClientIP := func(pod string) (string, string, error) { - url := fmt.Sprintf("%s:8080/clientip", serverIPStr) - return data.runWgetCommandOnBusyboxWithRetry(pod, data.testNamespace, url, 5) - } - - // assertClientIP asserts the Pod is translated to the provided client IP. - assertClientIP := func(pod string, clientIP string) { - var exeErr error - var stdout, stderr string - if err := wait.Poll(100*time.Millisecond, 5*time.Second, func() (done bool, err error) { - stdout, stderr, exeErr = getClientIP(pod) - if exeErr != nil { - return false, nil - } - - // The stdout return is in this format: x.x.x.x:port or [xx:xx:xx::x]:port - host, _, err := net.SplitHostPort(stdout) - if err != nil { - return false, nil - } - return clientIP == host, nil - }); err != nil { - t.Fatalf("Failed to get expected client IP %s for Pod %s, stdout: %s, stderr: %s, err: %v", clientIP, pod, stdout, stderr, exeErr) - } - } - - assertClientIP(clientPod1, egress.Spec.EgressIP) - assertClientIP(clientPod2, defaultClientIP) + assertClientIP(data, t, clientPod1, toolboxContainerName, serverIPStr, egress.Spec.EgressIP) + assertClientIP(data, t, clientPod2, toolboxContainerName, serverIPStr, defaultClientIP) t.Log("Updating the Egress's AppliedTo to clientPod2 only") egress.Spec.AppliedTo = v1beta1.AppliedTo{ @@ -429,16 +353,16 @@ func testEgressClientIPFromVLANSubnet(t *testing.T, data *TestData) { if err != nil { t.Fatalf("Failed to update Egress %v: %v", egress, err) } - assertClientIP(clientPod1, defaultClientIP) - assertClientIP(clientPod2, egress.Spec.EgressIP) + assertClientIP(data, t, clientPod1, toolboxContainerName, serverIPStr, defaultClientIP) + assertClientIP(data, t, clientPod2, toolboxContainerName, serverIPStr, egress.Spec.EgressIP) t.Log("Deleting the Egress") err = data.crdClient.CrdV1beta1().Egresses().Delete(context.TODO(), egress.Name, metav1.DeleteOptions{}) if err != nil { t.Fatalf("Failed to delete Egress %v: %v", egress, err) } - assertClientIP(clientPod1, defaultClientIP) - assertClientIP(clientPod2, defaultClientIP) + assertClientIP(data, t, clientPod1, toolboxContainerName, serverIPStr, defaultClientIP) + assertClientIP(data, t, clientPod2, toolboxContainerName, serverIPStr, defaultClientIP) }) } } @@ -1068,3 +992,45 @@ func (data *TestData) waitForEgressRealized(egress *v1beta1.Egress) (*v1beta1.Eg } return egress, nil } + +// assertClientIP asserts the Pod is translated to the provided client IP. +func assertClientIP(data *TestData, t *testing.T, pod, container, server string, clientIPs ...string) { + var exeErr error + var stdout, stderr string + err := wait.Poll(100*time.Millisecond, 5*time.Second, func() (done bool, err error) { + url := fmt.Sprintf("%s:8080/clientip", server) + stdout, stderr, exeErr = data.runWgetCommandFromTestPodWithRetry(pod, data.testNamespace, container, url, 5) + if exeErr != nil { + return false, nil + } + + // The stdout return is in this format: x.x.x.x:port or [xx:xx:xx::x]:port + host, _, err := net.SplitHostPort(stdout) + if err != nil { + return false, nil + } + for _, cip := range clientIPs { + if cip == host { + return true, nil + } + } + return false, nil + }) + require.NoError(t, err, "Failed to get expected client IPs %s for Pod %s, stdout: %s, stderr: %s, err: %v", clientIPs, pod, stdout, stderr, exeErr) +} + +// assertConnError asserts the Pod is not able to access the API that replies the request's client IP. +func assertConnError(data *TestData, t *testing.T, pod, container, server string) { + var exeErr error + var stdout, stderr string + err := wait.Poll(100*time.Millisecond, 2*time.Second, func() (done bool, err error) { + url := fmt.Sprintf("%s:8080/clientip", server) + stdout, stderr, exeErr = data.runWgetCommandFromTestPodWithRetry(pod, data.testNamespace, url, container, 5) + if exeErr != nil { + return true, nil + } + return false, nil + }) + require.NoError(t, err, "Failed to get expected error, stdout: %v, stderr: %v, err: %v", stdout, stderr, exeErr) + +} diff --git a/test/e2e/framework.go b/test/e2e/framework.go index 8a9919d93f1..d9e9af1cffa 100644 --- a/test/e2e/framework.go +++ b/test/e2e/framework.go @@ -487,7 +487,7 @@ func (data *TestData) collectExternalInfo() error { for _, ip := range ips { parsedIP := net.ParseIP(ip) if parsedIP == nil { - continue + return fmt.Errorf("invalid external server IP %s", ip) } if parsedIP.To4() != nil { externalInfo.externalServerIPv4 = ip @@ -500,7 +500,7 @@ func (data *TestData) collectExternalInfo() error { for _, subnet := range subnets { gatewayIP, _, err := net.ParseCIDR(subnet) if err != nil { - continue + return fmt.Errorf("invalid vlan subnet %s: %w", subnet, err) } if gatewayIP.To4() != nil { externalInfo.vlanSubnetIPv4 = subnet