Skip to content
This repository has been archived by the owner on Oct 24, 2023. It is now read-only.

Commit

Permalink
test: address 4 E2E test flakes (#1997)
Browse files Browse the repository at this point in the history
1. validate service URLs w/ retries
2. don’t enforce “shell out command” timeouts everywhere
3. standardize Linux stability tests timeout to 3 secs per test
4. standardize some Windows stability tests timeout to 1 minute per test
  • Loading branch information
jackfrancis authored Sep 20, 2019
1 parent 31ec26c commit 0ec70c1
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 66 deletions.
10 changes: 9 additions & 1 deletion test/e2e/kubernetes/deployment/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
)

const (
commandTimeout = 1 * time.Minute
validateDeploymentNotExistRetries = 3
deploymentGetAfterCreateTimeout = 1 * time.Minute
)
Expand Down Expand Up @@ -73,6 +72,7 @@ type Container struct {
// CreateLinuxDeploy will create a deployment for a given image with a name in a namespace
// --overrides='{ "spec":{"template":{"spec": {"nodeSelector":{"beta.kubernetes.io/os":"linux"}}}}}'
func CreateLinuxDeploy(image, name, namespace, miscOpts string) (*Deployment, error) {
var commandTimeout time.Duration
var cmd *exec.Cmd
overrides := `{ "spec":{"template":{"spec": {"nodeSelector":{"beta.kubernetes.io/os":"linux"}}}}}`
if miscOpts != "" {
Expand Down Expand Up @@ -118,6 +118,7 @@ func CreateLinuxDeployDeleteIfExists(pattern, image, name, namespace, miscOpts s
// RunLinuxDeploy will create a deployment that runs a bash command in a pod
// --overrides=' "spec":{"template":{"spec": {"nodeSelector":{"beta.kubernetes.io/os":"linux"}}}}}'
func RunLinuxDeploy(image, name, namespace, command string, replicas int) (*Deployment, error) {
var commandTimeout time.Duration
overrides := `{ "spec":{"template":{"spec": {"nodeSelector":{"beta.kubernetes.io/os":"linux"}}}}}`
cmd := exec.Command("k", "run", name, "-n", namespace, "--image", image, "--image-pull-policy=IfNotPresent", "--replicas", strconv.Itoa(replicas), "--overrides", overrides, "--command", "--", "/bin/sh", "-c", command)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
Expand Down Expand Up @@ -148,6 +149,7 @@ func RunLinuxDeployDeleteIfExists(pattern, image, name, namespace, command strin

// CreateWindowsDeploy will create a deployment for a given image with a name in a namespace and create a service mapping a hostPort
func CreateWindowsDeploy(pattern, image, name, namespace, miscOpts string) (*Deployment, error) {
var commandTimeout time.Duration
overrides := `{ "spec":{"template":{"spec": {"nodeSelector":{"beta.kubernetes.io/os":"windows"}}}}}`
var args []string
args = append(args, "run", name)
Expand All @@ -173,6 +175,7 @@ func CreateWindowsDeploy(pattern, image, name, namespace, miscOpts string) (*Dep

// CreateWindowsDeployWithHostport will create a deployment for a given image with a name in a namespace and create a service mapping a hostPort
func CreateWindowsDeployWithHostport(image, name, namespace string, port int, hostport int) (*Deployment, error) {
var commandTimeout time.Duration
overrides := `{ "spec":{"template":{"spec": {"nodeSelector":{"beta.kubernetes.io/os":"windows"}}}}}`
cmd := exec.Command("k", "run", name, "-n", namespace, "--image", image, "--image-pull-policy=IfNotPresent", "--port", strconv.Itoa(port), "--hostport", strconv.Itoa(hostport), "--overrides", overrides)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
Expand Down Expand Up @@ -287,6 +290,7 @@ func GetAllByPrefix(prefix, namespace string) ([]Deployment, error) {

// Describe will describe a deployment resource
func (d *Deployment) Describe() error {
var commandTimeout time.Duration
cmd := exec.Command("k", "describe", "deployment", d.Metadata.Name, "-n", d.Metadata.Namespace)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
log.Printf("\n%s\n", string(out))
Expand All @@ -295,6 +299,7 @@ func (d *Deployment) Describe() error {

// Delete will delete a deployment in a given namespace
func (d *Deployment) Delete(retries int) error {
var commandTimeout time.Duration
var kubectlOutput []byte
var kubectlError error
for i := 0; i < retries; i++ {
Expand Down Expand Up @@ -328,6 +333,7 @@ func (d *Deployment) Delete(retries int) error {

// Expose will create a load balancer and expose the deployment on a given port
func (d *Deployment) Expose(svcType string, targetPort, exposedPort int) error {
var commandTimeout time.Duration
cmd := exec.Command("k", "expose", "deployment", d.Metadata.Name, "--type", svcType, "-n", d.Metadata.Namespace, "--target-port", strconv.Itoa(targetPort), "--port", strconv.Itoa(exposedPort))
out, err := util.RunAndLogCommand(cmd, commandTimeout)
if err != nil {
Expand Down Expand Up @@ -361,6 +367,7 @@ func (d *Deployment) ExposeDeleteIfExist(pattern, namespace, svcType string, tar

// ScaleDeployment scales a deployment to n instancees
func (d *Deployment) ScaleDeployment(n int) error {
var commandTimeout time.Duration
cmd := exec.Command("k", "scale", fmt.Sprintf("--replicas=%d", n), "deployment", d.Metadata.Name)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
if err != nil {
Expand All @@ -372,6 +379,7 @@ func (d *Deployment) ScaleDeployment(n int) error {

// CreateDeploymentHPA applies autoscale characteristics to deployment
func (d *Deployment) CreateDeploymentHPA(cpuPercent, min, max int) error {
var commandTimeout time.Duration
cmd := exec.Command("k", "autoscale", "deployment", d.Metadata.Name, fmt.Sprintf("--cpu-percent=%d", cpuPercent),
fmt.Sprintf("--min=%d", min), fmt.Sprintf("--max=%d", max))
out, err := util.RunAndLogCommand(cmd, commandTimeout)
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/kubernetes/hpa/hpa.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ import (
"github.com/pkg/errors"
)

const commandTimeout = 1 * time.Minute

type List struct {
HPAs []HPA `json:"items"`
}
Expand Down Expand Up @@ -132,6 +130,7 @@ func GetAllByPrefix(prefix, namespace string) ([]HPA, error) {

// Describe will describe a HPA resource
func (h *HPA) Describe() error {
var commandTimeout time.Duration
cmd := exec.Command("k", "describe", "hpa", h.Metadata.Name, "-n", h.Metadata.Namespace)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
log.Printf("\n%s\n", string(out))
Expand All @@ -140,6 +139,7 @@ func (h *HPA) Describe() error {

// Delete will delete a HPA in a given namespace
func (h *HPA) Delete(retries int) error {
var commandTimeout time.Duration
var kubectlOutput []byte
var kubectlError error
for i := 0; i < retries; i++ {
Expand Down
3 changes: 1 addition & 2 deletions test/e2e/kubernetes/job/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ import (
"github.com/pkg/errors"
)

const commandTimeout = 1 * time.Minute

// List is a container that holds all jobs returned from doing a kubectl get jobs
type List struct {
Jobs []Job `json:"items"`
Expand Down Expand Up @@ -311,6 +309,7 @@ func DescribeJobs(jobPrefix, namespace string) {

// Describe will describe a Job resource
func (j *Job) Describe() error {
var commandTimeout time.Duration
cmd := exec.Command("k", "describe", "jobs/", j.Metadata.Name, "-n", j.Metadata.Namespace)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
log.Printf("\n%s\n", string(out))
Expand Down
36 changes: 19 additions & 17 deletions test/e2e/kubernetes/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ const (
sleepBetweenRetriesWhenWaitingForPodReady = 1 * time.Second
sleepBetweenRetriesRemoteSSHCommand = 3 * time.Second
timeoutWhenWaitingForPodOutboundAccess = 1 * time.Minute
stabilityCommandTimeout = 1 * time.Second
stabilityCommandTimeout = 3 * time.Second
windowsCommandTimeout = 1 * time.Minute
validateNetworkPolicyTimeout = 3 * time.Minute
validateDNSTimeout = 2 * time.Minute
Expand Down Expand Up @@ -981,7 +981,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
By("Ensuring we can create an ILB service attachment")
sILB, err := service.CreateServiceFromFileDeleteIfExist(filepath.Join(WorkloadDir, "ingress-nginx-ilb.yaml"), serviceName+"-ilb", "default")
Expect(err).NotTo(HaveOccurred())
svc, err := sILB.WaitForIngress(cfg.Timeout, 5*time.Second)
err = sILB.WaitForIngress(cfg.Timeout, 5*time.Second)
Expect(err).NotTo(HaveOccurred())

By("Ensuring we can create a curl pod to connect to the service")
Expand All @@ -997,12 +997,12 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
By("Ensuring we can connect to the ILB service from another pod")
var success bool
for _, curlPod := range curlPods {
pass, curlErr := curlPod.ValidateCurlConnection(svc.Status.LoadBalancer.Ingress[0]["ip"], 30*time.Second, 3*time.Minute)
pass, curlErr := curlPod.ValidateCurlConnection(sILB.Status.LoadBalancer.Ingress[0]["ip"], 30*time.Second, 3*time.Minute)
if curlErr == nil && pass {
success = true
break
} else {
e := svc.Describe()
e := sILB.Describe()
if e != nil {
log.Printf("Unable to describe service\n: %s", e)
}
Expand All @@ -1013,21 +1013,21 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
By("Ensuring we can create an ELB service attachment")
sELB, err := service.CreateServiceFromFileDeleteIfExist(filepath.Join(WorkloadDir, "ingress-nginx-elb.yaml"), serviceName+"-elb", "default")
Expect(err).NotTo(HaveOccurred())
svc, err = sELB.WaitForIngress(cfg.Timeout, 5*time.Second)
err = sELB.WaitForIngress(cfg.Timeout, 5*time.Second)
Expect(err).NotTo(HaveOccurred())

By("Ensuring we can connect to the ELB service on the service IP")
valid := sELB.Validate("(Welcome to nginx)", 5, 30*time.Second, cfg.Timeout)
Expect(valid).To(BeTrue())
err = sELB.ValidateWithRetry("(Welcome to nginx)", 30*time.Second, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())
By("Ensuring we can connect to the ELB service from another pod")
success = false
for _, curlPod := range curlPods {
pass, curlErr := curlPod.ValidateCurlConnection(svc.Status.LoadBalancer.Ingress[0]["ip"], 30*time.Second, 3*time.Minute)
pass, curlErr := curlPod.ValidateCurlConnection(sELB.Status.LoadBalancer.Ingress[0]["ip"], 30*time.Second, 3*time.Minute)
if curlErr == nil && pass {
success = true
break
} else {
e := svc.Describe()
e := sELB.Describe()
if e != nil {
log.Printf("Unable to describe service\n: %s", e)
}
Expand Down Expand Up @@ -1571,10 +1571,12 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
Expect(err).NotTo(HaveOccurred())
iisService, err := service.Get(deploymentName, "default")
Expect(err).NotTo(HaveOccurred())
err = iisService.WaitForIngress(cfg.Timeout, 5*time.Second)
Expect(err).NotTo(HaveOccurred())

By("Verifying that the service is reachable and returns the default IIS start page")
valid := iisService.Validate("(IIS Windows Server)", 10, sleepBetweenRetriesWhenWaitingForPodReady, cfg.Timeout)
Expect(valid).To(BeTrue())
err = iisService.ValidateWithRetry("(IIS Windows Server)", sleepBetweenRetriesWhenWaitingForPodReady, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())

By("Checking that each pod can reach the internet")
var iisPods []pod.Pod
Expand Down Expand Up @@ -1603,8 +1605,8 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
Expect(len(iisPods)).To(Equal(5))

By("Verifying that the service is reachable and returns the default IIS start page")
valid = iisService.Validate("(IIS Windows Server)", 10, sleepBetweenRetriesWhenWaitingForPodReady, cfg.Timeout)
Expect(valid).To(BeTrue())
err = iisService.ValidateWithRetry("(IIS Windows Server)", sleepBetweenRetriesWhenWaitingForPodReady, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())

By("Checking that each pod can reach the internet")
iisPods, err = iisDeploy.Pods()
Expand Down Expand Up @@ -1634,8 +1636,8 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
Expect(len(iisPods)).To(Equal(2))

By("Verifying that the service is reachable and returns the default IIS start page")
valid = iisService.Validate("(IIS Windows Server)", 10, sleepBetweenRetriesWhenWaitingForPodReady, cfg.Timeout)
Expect(valid).To(BeTrue())
err = iisService.ValidateWithRetry("(IIS Windows Server)", sleepBetweenRetriesWhenWaitingForPodReady, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())

By("Checking that each pod can reach the internet")
iisPods, err = iisDeploy.Pods()
Expand Down Expand Up @@ -1700,14 +1702,14 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
By("Connecting to Windows from another Windows deployment")
name := fmt.Sprintf("windows-2-windows-%s", cfg.Name)
command := fmt.Sprintf("iwr -UseBasicParsing -TimeoutSec 60 %s", windowsService.Metadata.Name)
successes, err := pod.RunCommandMultipleTimes(pod.RunWindowsPod, windowsImages.ServerCore, name, command, cfg.StabilityIterations, 1*time.Second, retryCommandsTimeout, windowsCommandTimeout)
successes, err := pod.RunCommandMultipleTimes(pod.RunWindowsPod, windowsImages.ServerCore, name, command, cfg.StabilityIterations, 1*time.Second, windowsCommandTimeout, retryCommandsTimeout)
Expect(err).NotTo(HaveOccurred())
Expect(successes).To(Equal(cfg.StabilityIterations))

By("Connecting to Linux from Windows deployment")
name = fmt.Sprintf("windows-2-linux-%s", cfg.Name)
command = fmt.Sprintf("iwr -UseBasicParsing -TimeoutSec 60 %s", linuxService.Metadata.Name)
successes, err = pod.RunCommandMultipleTimes(pod.RunWindowsPod, windowsImages.ServerCore, name, command, cfg.StabilityIterations, 1*time.Second, retryCommandsTimeout, windowsCommandTimeout)
successes, err = pod.RunCommandMultipleTimes(pod.RunWindowsPod, windowsImages.ServerCore, name, command, cfg.StabilityIterations, 1*time.Second, windowsCommandTimeout, retryCommandsTimeout)
Expect(err).NotTo(HaveOccurred())
Expect(successes).To(Equal(cfg.StabilityIterations))

Expand Down
4 changes: 2 additions & 2 deletions test/e2e/kubernetes/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ import (

const (
//ServerVersion is used to parse out the version of the API running
ServerVersion = `(Server Version:\s)+(.*)`
commandTimeout = 1 * time.Minute
ServerVersion = `(Server Version:\s)+(.*)`
)

// Node represents the kubernetes Node Resource
Expand Down Expand Up @@ -162,6 +161,7 @@ func DescribeNodes() {

// Describe will describe a node resource
func (n *Node) Describe() error {
var commandTimeout time.Duration
cmd := exec.Command("k", "describe", "node", n.Metadata.Name)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
log.Printf("\n%s\n", string(out))
Expand Down
3 changes: 1 addition & 2 deletions test/e2e/kubernetes/persistentvolume/persistentvolume.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import (
"github.com/Azure/aks-engine/test/e2e/kubernetes/util"
)

const commandTimeout = 1 * time.Minute

// PersistentVolume is used to parse data from kubectl get pv
type PersistentVolume struct {
Metadata Metadata `json:"metadata"`
Expand Down Expand Up @@ -86,6 +84,7 @@ func DescribePVs() {

// Describe will describe a pv resource
func (pv *PersistentVolume) Describe() error {
var commandTimeout time.Duration
cmd := exec.Command("k", "describe", "pv", pv.Metadata.Name)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
log.Printf("\n%s\n", string(out))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ import (
"github.com/pkg/errors"
)

const commandTimeout = 1 * time.Minute

type List struct {
PersistentVolumeClaims []PersistentVolumeClaim `json:"items"`
}
Expand Down Expand Up @@ -179,6 +177,7 @@ func DescribePVCs(pvcPrefix, namespace string) {

// Describe will describe a pv resource
func (pvc *PersistentVolumeClaim) Describe() error {
var commandTimeout time.Duration
cmd := exec.Command("k", "describe", "pvc", pvc.Metadata.Name, "-n", pvc.Metadata.Namespace)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
log.Printf("\n%s\n", string(out))
Expand All @@ -187,6 +186,7 @@ func (pvc *PersistentVolumeClaim) Describe() error {

// Delete will delete a PersistentVolumeClaim in a given namespace
func (pvc *PersistentVolumeClaim) Delete(retries int) error {
var commandTimeout time.Duration
var kubectlOutput []byte
var kubectlError error
for i := 0; i < retries; i++ {
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/kubernetes/pod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (

const (
testDir string = "testdirectory"
commandTimeout = 1 * time.Minute
deleteTimeout = 5 * time.Minute
validatePodNotExistRetries = 3
)
Expand Down Expand Up @@ -1112,6 +1111,7 @@ func (p *Pod) CheckWindowsOutboundConnection(sleep, timeout time.Duration) (bool

// ValidateHostPort will attempt to run curl against the POD's hostIP and hostPort
func (p *Pod) ValidateHostPort(check string, attempts int, sleep time.Duration, master, sshKeyPath string) bool {
var commandTimeout time.Duration
hostIP := p.Status.HostIP
if len(p.Spec.Containers) == 0 || len(p.Spec.Containers[0].Ports) == 0 {
log.Printf("Unexpected POD container spec: %v. Should have hostPort.\n", p.Spec)
Expand All @@ -1138,6 +1138,7 @@ func (p *Pod) ValidateHostPort(check string, attempts int, sleep time.Duration,

// Logs will get logs from all containers in a pod
func (p *Pod) Logs() error {
var commandTimeout time.Duration
for _, container := range p.Spec.Containers {
cmd := exec.Command("k", "logs", p.Metadata.Name, "-c", container.Name, "-n", p.Metadata.Namespace)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
Expand All @@ -1151,6 +1152,7 @@ func (p *Pod) Logs() error {

// Describe will describe a pod resource
func (p *Pod) Describe() error {
var commandTimeout time.Duration
cmd := exec.Command("k", "describe", "pod", p.Metadata.Name, "-n", p.Metadata.Namespace)
out, err := util.RunAndLogCommand(cmd, commandTimeout)
log.Printf("\n%s\n", string(out))
Expand Down
Loading

0 comments on commit 0ec70c1

Please sign in to comment.