Skip to content

Commit

Permalink
Merge pull request #2239 from jackfrancis/log-collector-timeout-retries
Browse files Browse the repository at this point in the history
test: timeout log collection operations
  • Loading branch information
k8s-ci-robot authored Apr 20, 2022
2 parents d1c2f87 + 03d8cb9 commit 1703ef3
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 17 deletions.
24 changes: 17 additions & 7 deletions test/e2e/azure_logcollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"net/http"
"path/filepath"
"strings"
"time"

apierrors "k8s.io/apimachinery/pkg/api/errors"

Expand All @@ -48,6 +49,11 @@ import (
// AzureLogCollector collects logs from a CAPZ workload cluster.
type AzureLogCollector struct{}

const (
collectLogInterval = 3 * time.Second
collectLogTimeout = 1 * time.Minute
)

var _ framework.ClusterLogCollector = &AzureLogCollector{}

// CollectMachineLog collects logs from a machine.
Expand Down Expand Up @@ -118,18 +124,22 @@ func (k AzureLogCollector) CollectMachinePoolLog(ctx context.Context, management

// collectLogsFromNode collects logs from various sources by ssh'ing into the node
func collectLogsFromNode(ctx context.Context, managementClusterClient client.Client, cluster *clusterv1.Cluster, hostname string, isWindows bool, outputPath string) error {
Logf("INFO: Collecting logs for node %s in cluster %s in namespace %s\n", hostname, cluster.Name, cluster.Namespace)
nodeOSType := "Linux"
if isWindows {
nodeOSType = "Windows"
}
Logf("INFO: Collecting logs for %s node %s in cluster %s in namespace %s\n", nodeOSType, hostname, cluster.Name, cluster.Namespace)

controlPlaneEndpoint := cluster.Spec.ControlPlaneEndpoint.Host

execToPathFn := func(outputFileName, command string, args ...string) func() error {
return func() error {
f, err := fileOnHost(filepath.Join(outputPath, outputFileName))
if err != nil {
return err
}
defer f.Close()
return retryWithExponentialBackOff(func() error {
return retryWithTimeout(collectLogInterval, collectLogTimeout, func() error {
f, err := fileOnHost(filepath.Join(outputPath, outputFileName))
if err != nil {
return err
}
defer f.Close()
return execOnHost(controlPlaneEndpoint, hostname, sshPort, f, command, args...)
})
}
Expand Down
14 changes: 4 additions & 10 deletions test/e2e/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,9 @@ const (
retryBackoffSteps = 3
)

// retryWithExponentialBackOff retries the function until it returns nil,
// or until the number of attempts (steps) has reached the maximum value.
func retryWithExponentialBackOff(fn func() error) error {
backoff := wait.Backoff{
Duration: retryBackoffInitialDuration,
Factor: retryBackoffFactor,
Jitter: retryBackoffJitter,
Steps: retryBackoffSteps,
}
// retryWithTimeout retries the function until it returns true,
// or a timeout is reached.
func retryWithTimeout(interval, timeout time.Duration, fn func() error) error {
retryFn := func(fn func() error) func() (bool, error) {
return func() (bool, error) {
err := fn()
Expand All @@ -51,5 +45,5 @@ func retryWithExponentialBackOff(fn func() error) error {
return false, err
}
}
return wait.ExponentialBackoff(backoff, retryFn(fn))
return wait.PollImmediate(interval, timeout, retryFn(fn))
}

0 comments on commit 1703ef3

Please sign in to comment.