Skip to content

Commit

Permalink
Fix flaky TestClusterIPv4 e2e tests (#2866)
Browse files Browse the repository at this point in the history
The tests checking access from the Node / hostNetwork were broken on
Kind clusters: wget is used to check connectivity, but wget is not
installed on the Kind Node image. This issue was going unnoticed however
because the tests were not checking for errors properly.

The tests checking access from the Pod network were flaky (at least with
kube-proxy) because no more than one connection was attempted, with
little delay between Service creation and the connectivity
test. Instead, we replace wget with agnhost, and we make up to 5
connection attempts.

Fixes #2860

Signed-off-by: Antonin Bas <[email protected]>
  • Loading branch information
antoninbas authored Oct 14, 2021
1 parent 00716fc commit 784dc4c
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 23 deletions.
2 changes: 1 addition & 1 deletion test/e2e/basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ func testDeletePod(t *testing.T, data *TestData) {
podName := randName("test-pod-")

t.Logf("Creating an agnhost test Pod on '%s'", nodeName)
if err := data.createAgnhostPodOnNode(podName, testNamespace, nodeName); err != nil {
if err := data.createAgnhostPodOnNode(podName, testNamespace, nodeName, false); err != nil {
t.Fatalf("Error when creating agnhost test Pod: %v", err)
}
if err := data.podWaitForRunning(defaultTimeout, podName, testNamespace); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/connectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (data *TestData) testPodConnectivitySameNode(t *testing.T) {
t.Logf("Creating %d agnhost Pods on '%s'", numPods, workerNode)
for i := range podInfos {
podInfos[i].os = clusterInfo.nodesOS[workerNode]
if err := data.createAgnhostPodOnNode(podInfos[i].name, testNamespace, workerNode); err != nil {
if err := data.createAgnhostPodOnNode(podInfos[i].name, testNamespace, workerNode, false); err != nil {
t.Fatalf("Error when creating agnhost test Pod '%s': %v", podInfos[i], err)
}
defer deletePodWrapper(t, data, podInfos[i].name)
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -2092,9 +2092,9 @@ func (data *TestData) copyNodeFiles(nodeName string, fileName string, covDir str

// createAgnhostPodOnNode creates a Pod in the test namespace with a single agnhost container. The
// Pod will be scheduled on the specified Node (if nodeName is not empty).
func (data *TestData) createAgnhostPodOnNode(name string, ns string, nodeName string) error {
func (data *TestData) createAgnhostPodOnNode(name string, ns string, nodeName string, hostNetwork bool) error {
sleepDuration := 3600 // seconds
return data.createPodOnNode(name, ns, nodeName, agnhostImage, []string{"sleep", strconv.Itoa(sleepDuration)}, nil, nil, nil, false, nil)
return data.createPodOnNode(name, ns, nodeName, agnhostImage, []string{"sleep", strconv.Itoa(sleepDuration)}, nil, nil, nil, hostNetwork, nil)
}

func (data *TestData) createDaemonSet(name string, ns string, ctrName string, image string, cmd []string, args []string) (*appsv1.DaemonSet, func() error, error) {
Expand Down
56 changes: 37 additions & 19 deletions test/e2e/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ import (
"net"
"strings"
"testing"
"time"

"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
)

func TestClusterIPv4(t *testing.T) {
Expand All @@ -43,10 +45,17 @@ func testClusterIP(t *testing.T, isIPv6 bool) {
defer teardownTest(t, data)

nodes := []string{nodeName(0), nodeName(1)}
var busyboxes []string
clients := make(map[string]string)
for idx, node := range nodes {
podName, _, _ := createAndWaitForPod(t, data, data.createBusyboxPodOnNode, fmt.Sprintf("busybox-%d-", idx), node, testNamespace, false)
busyboxes = append(busyboxes, podName)
podName, _, _ := createAndWaitForPod(t, data, data.createAgnhostPodOnNode, fmt.Sprintf("client-%d-", idx), node, testNamespace, false)
require.NoError(t, err)
clients[node] = podName
}
hostNetworkClients := make(map[string]string)
for idx, node := range nodes {
podName, _, _ := createAndWaitForPod(t, data, data.createAgnhostPodOnNode, fmt.Sprintf("hostnet-client-%d-", idx), node, testNamespace, true)
require.NoError(t, err)
hostNetworkClients[node] = podName
}

nginx := fmt.Sprintf("nginx-%v", isIPv6)
Expand All @@ -62,39 +71,48 @@ func testClusterIP(t *testing.T, isIPv6 bool) {

createAndWaitForPod(t, data, data.createNginxPodOnNode, nginx, nodeName(0), testNamespace, false)
t.Run("Non-HostNetwork Endpoints", func(t *testing.T) {
testClusterIPCases(t, data, url, nodes, busyboxes)
testClusterIPCases(t, data, url, clients, hostNetworkClients)
})

require.NoError(t, data.deletePod(testNamespace, nginx))
createAndWaitForPod(t, data, data.createNginxPodOnNode, hostNginx, nodeName(0), testNamespace, true)
t.Run("HostNetwork Endpoints", func(t *testing.T) {
testClusterIPCases(t, data, url, nodes, busyboxes)
testClusterIPCases(t, data, url, clients, hostNetworkClients)
})
}

func testClusterIPCases(t *testing.T, data *TestData, url string, nodes, pods []string) {
func testClusterIPCases(t *testing.T, data *TestData, url string, clients, hostNetworkClients map[string]string) {
t.Run("All Nodes can access Service ClusterIP", func(t *testing.T) {
skipIfProxyAllDisabled(t, data)
skipIfKubeProxyEnabled(t, data)
for _, node := range nodes {
testClusterIPFromNode(t, url, node)
for node, pod := range hostNetworkClients {
testClusterIPFromPod(t, data, url, node, pod, true)
}
})
t.Run("Pods from all Nodes can access Service ClusterIP", func(t *testing.T) {
for _, pod := range pods {
testClusterIPFromPod(t, data, url, pod)
for node, pod := range clients {
testClusterIPFromPod(t, data, url, node, pod, false)
}
})
}

func testClusterIPFromPod(t *testing.T, data *TestData, url, podName string) {
_, _, err := data.runCommandFromPod(testNamespace, podName, busyboxContainerName, []string{"wget", "-O", "-", url, "-T", "1"})
require.NoError(t, err, "Pod should be able to connect Service ClusterIP")
}

func testClusterIPFromNode(t *testing.T, url, nodeName string) {
_, _, _, err := RunCommandOnNode(nodeName, strings.Join([]string{"wget", "-O", "-", url, "-T", "1"}, " "))
require.NoError(t, err, "Node should be able to connect Service ClusterIP")
func testClusterIPFromPod(t *testing.T, data *TestData, url, nodeName, podName string, hostNetwork bool) {
cmd := []string{"/agnhost", "connect", url, "--timeout=1s", "--protocol=tcp"}
err := wait.PollImmediate(1*time.Second, 5*time.Second, func() (bool, error) {
t.Logf(strings.Join(cmd, " "))
stdout, stderr, err := data.runCommandFromPod(testNamespace, podName, agnhostContainerName, cmd)
t.Logf("stdout: %s - stderr: %s - err: %v", stdout, stderr, err)
if err == nil {
return true, nil
}
return false, nil
})
if err != nil {
t.Errorf(
"Pod '%s' on Node '%s' (hostNetwork: %t) should be able to connect to Service ClusterIP",
podName, nodeName, hostNetwork,
)
}
}

// TestNodePortWindows tests NodePort Service on Windows Node. It is a temporary test to replace upstream Kubernetes one:
Expand All @@ -119,7 +137,7 @@ func TestNodePortWindows(t *testing.T) {
// It doesn't need to be the control-plane for e2e test and other Linux workers will work as well. However, in this
// e2e framework, nodeName(0)/Control-plane Node is guaranteed to be a Linux one.
clientName := "agnhost-client"
require.NoError(t, data.createAgnhostPodOnNode(clientName, testNamespace, nodeName(0)))
require.NoError(t, data.createAgnhostPodOnNode(clientName, testNamespace, nodeName(0), false))
defer data.deletePodAndWait(defaultTimeout, clientName, testNamespace)
_, err = data.podWaitForIPs(defaultTimeout, clientName, testNamespace)
require.NoError(t, err)
Expand Down

0 comments on commit 784dc4c

Please sign in to comment.