Skip to content

Commit

Permalink
Merge pull request #15957 from danwinship/service-test-flake
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue (batch tested with PRs 15942, 15940, 15957, 15858, 15946)

Make service e2e tests retry to avoid flakes

This is an attempt to fix #14385; given that our tests tend to flake but the upstream service tests don't, it seems like we should make our tests more like theirs. So this replaces our `checkConnectivityToHost` code with code mostly copied from the upstream `execSourceipTest` (which, among other things, retries on failure until the timeout is reached).

There are actually a lot of changes we could make to our tests to use new upstream code, but I wanted to keep this simple for now to avoid introducing new flakes.

Fixes #14385 (hopefully)
  • Loading branch information
openshift-merge-robot authored Aug 26, 2017
2 parents 0518674 + 460db29 commit f29206e
Showing 1 changed file with 32 additions and 27 deletions.
59 changes: 32 additions & 27 deletions test/extended/networking/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package networking
import (
"fmt"
"os"
"strings"
"time"

testexutil "github.com/openshift/origin/test/extended/util"
Expand Down Expand Up @@ -163,32 +164,36 @@ func launchWebserverService(f *e2e.Framework, serviceName string, nodeName strin
return
}

func checkConnectivityToHost(f *e2e.Framework, nodeName string, podName string, host string, timeout int) error {
contName := fmt.Sprintf("%s-container", podName)
pod := &kapiv1.Pod{
TypeMeta: metav1.TypeMeta{
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: kapiv1.PodSpec{
Containers: []kapiv1.Container{
{
Name: contName,
Image: "gcr.io/google_containers/busybox",
Command: []string{"wget", fmt.Sprintf("--timeout=%d", timeout), "-s", host},
},
},
NodeName: nodeName,
RestartPolicy: kapiv1.RestartPolicyNever,
},
func checkConnectivityToHost(f *e2e.Framework, nodeName string, podName string, host string, timeout time.Duration) error {
e2e.Logf("Creating an exec pod on node %v", nodeName)
execPodName := e2e.CreateExecPodOrFail(f.ClientSet, f.Namespace.Name, fmt.Sprintf("execpod-sourceip-%s", nodeName), func(pod *kapiv1.Pod) {
pod.Spec.NodeName = nodeName
})
defer func() {
e2e.Logf("Cleaning up the exec pod")
err := f.ClientSet.Core().Pods(f.Namespace.Name).Delete(execPodName, nil)
Expect(err).NotTo(HaveOccurred())
}()
execPod, err := f.ClientSet.Core().Pods(f.Namespace.Name).Get(execPodName, metav1.GetOptions{})
e2e.ExpectNoError(err)

var stdout string
e2e.Logf("Waiting up to %v to wget %s", timeout, host)
cmd := fmt.Sprintf("wget -T 30 -qO- %s", host)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2) {
stdout, err = e2e.RunHostCmd(execPod.Namespace, execPod.Name, cmd)
if err != nil {
e2e.Logf("got err: %v, retry until timeout", err)
continue
}
// Need to check output because wget -q might omit the error.
if strings.TrimSpace(stdout) == "" {
e2e.Logf("got empty stdout, retry until timeout")
continue
}
break
}
podClient := f.ClientSet.CoreV1().Pods(f.Namespace.Name)
_, err := podClient.Create(pod)
expectNoError(err)
defer podClient.Delete(podName, nil)
return waitForPodSuccessInNamespace(f.ClientSet, podName, contName, f.Namespace.Name)
return err
}

func pluginIsolatesNamespaces() bool {
Expand Down Expand Up @@ -226,7 +231,7 @@ func checkPodIsolation(f1, f2 *e2e.Framework, nodeType NodeType) error {
defer f1.ClientSet.CoreV1().Pods(f1.Namespace.Name).Delete(podName, nil)
ip := e2e.LaunchWebserverPod(f1, podName, serverNode.Name)

return checkConnectivityToHost(f2, clientNode.Name, "isolation-wget", ip, 10)
return checkConnectivityToHost(f2, clientNode.Name, "isolation-wget", ip, 10*time.Second)
}

func checkServiceConnectivity(serverFramework, clientFramework *e2e.Framework, nodeType NodeType) error {
Expand All @@ -247,7 +252,7 @@ func checkServiceConnectivity(serverFramework, clientFramework *e2e.Framework, n
defer serverFramework.ClientSet.CoreV1().Services(serverFramework.Namespace.Name).Delete(podName, nil)
ip := launchWebserverService(serverFramework, podName, serverNode.Name)

return checkConnectivityToHost(clientFramework, clientNode.Name, "service-wget", ip, 10)
return checkConnectivityToHost(clientFramework, clientNode.Name, "service-wget", ip, 10*time.Second)
}

func InSingleTenantContext(body func()) {
Expand Down

0 comments on commit f29206e

Please sign in to comment.