From 2e319ea7379b3ec920b87baef31d6e0142d7dec4 Mon Sep 17 00:00:00 2001
From: Jack Francis <jackfrancis@gmail.com>
Date: Wed, 20 Apr 2022 13:49:23 -0700
Subject: [PATCH] clusterctl: retry github i/o operations

---
 .../client/repository/repository_github.go    | 104 +++++++++++++-----
 .../repository/repository_github_test.go      |  13 +++
 2 files changed, 90 insertions(+), 27 deletions(-)

diff --git a/cmd/clusterctl/client/repository/repository_github.go b/cmd/clusterctl/client/repository/repository_github.go
index ee0ddbe6b069..6d1f1b8ea4b1 100644
--- a/cmd/clusterctl/client/repository/repository_github.go
+++ b/cmd/clusterctl/client/repository/repository_github.go
@@ -24,11 +24,13 @@ import (
 	"net/url"
 	"path/filepath"
 	"strings"
+	"time"
 
 	"github.com/google/go-github/v33/github"
 	"github.com/pkg/errors"
 	"golang.org/x/oauth2"
 	"k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apimachinery/pkg/util/wait"
 
 	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
 	"sigs.k8s.io/cluster-api/cmd/clusterctl/client/config"
@@ -44,9 +46,11 @@ const (
 var (
 	// Caches used to limit the number of GitHub API calls.
 
-	cacheVersions = map[string][]string{}
-	cacheReleases = map[string]*github.RepositoryRelease{}
-	cacheFiles    = map[string][]byte{}
+	cacheVersions              = map[string][]string{}
+	cacheReleases              = map[string]*github.RepositoryRelease{}
+	cacheFiles                 = map[string][]byte{}
+	retryableOperationInterval = 10 * time.Second
+	retryableOperationTimeout  = 1 * time.Minute
 )
 
 // gitHubRepository provides support for providers hosted on GitHub.
@@ -216,9 +220,24 @@ func (g *gitHubRepository) getVersions() ([]string, error) {
 
 	// get all the releases
 	// NB. currently Github API does not support result ordering, so it not possible to limit results
-	releases, _, err := client.Repositories.ListReleases(context.TODO(), g.owner, g.repository, nil)
-	if err != nil {
-		return nil, g.handleGithubErr(err, "failed to get the list of releases")
+	var releases []*github.RepositoryRelease
+	var retryError error
+	_ = wait.PollImmediate(retryableOperationInterval, retryableOperationTimeout, func() (bool, error) {
+		var listReleasesErr error
+		releases, _, listReleasesErr = client.Repositories.ListReleases(context.TODO(), g.owner, g.repository, nil)
+		if listReleasesErr != nil {
+			retryError = g.handleGithubErr(listReleasesErr, "failed to get the list of releases")
+			// return immediately if we are rate limited
+			if _, ok := listReleasesErr.(*github.RateLimitError); ok {
+				return false, retryError
+			}
+			return false, nil
+		}
+		retryError = nil
+		return true, nil
+	})
+	if retryError != nil {
+		return nil, retryError
 	}
 	versions := []string{}
 	for _, r := range releases {
@@ -247,13 +266,24 @@ func (g *gitHubRepository) getReleaseByTag(tag string) (*github.RepositoryReleas
 
 	client := g.getClient()
 
-	release, _, err := client.Repositories.GetReleaseByTag(context.TODO(), g.owner, g.repository, tag)
-	if err != nil {
-		return nil, g.handleGithubErr(err, "failed to read release %q", tag)
-	}
-
-	if release == nil {
-		return nil, errors.Errorf("failed to get release %q", tag)
+	var release *github.RepositoryRelease
+	var retryError error
+	_ = wait.PollImmediate(retryableOperationInterval, retryableOperationTimeout, func() (bool, error) {
+		var getReleasesErr error
+		release, _, getReleasesErr = client.Repositories.GetReleaseByTag(context.TODO(), g.owner, g.repository, tag)
+		if getReleasesErr != nil {
+			retryError = g.handleGithubErr(getReleasesErr, "failed to read release %q", tag)
+			// return immediately if we are rate limited
+			if _, ok := getReleasesErr.(*github.RateLimitError); ok {
+				return false, retryError
+			}
+			return false, nil
+		}
+		retryError = nil
+		return true, nil
+	})
+	if retryError != nil {
+		return nil, retryError
 	}
 
 	cacheReleases[cacheID] = release
@@ -284,23 +314,43 @@ func (g *gitHubRepository) downloadFilesFromRelease(release *github.RepositoryRe
 		return nil, errors.Errorf("failed to get file %q from %q release", fileName, *release.TagName)
 	}
 
-	reader, redirect, err := client.Repositories.DownloadReleaseAsset(context.TODO(), g.owner, g.repository, *assetID, http.DefaultClient)
-	if err != nil {
-		return nil, g.handleGithubErr(err, "failed to download file %q from %q release", *release.TagName, fileName)
-	}
-	if redirect != "" {
-		req, err := http.NewRequestWithContext(ctx, http.MethodGet, redirect, http.NoBody)
-		if err != nil {
-			return nil, errors.Wrapf(err, "failed to download file %q from %q release via redirect location %q: failed to create request", *release.TagName, fileName, redirect)
+	var reader io.ReadCloser
+	var retryError error
+	_ = wait.PollImmediate(retryableOperationInterval, retryableOperationTimeout, func() (bool, error) {
+		var redirect string
+		var downloadReleaseError error
+		reader, redirect, downloadReleaseError = client.Repositories.DownloadReleaseAsset(context.TODO(), g.owner, g.repository, *assetID, http.DefaultClient)
+		if downloadReleaseError != nil {
+			retryError = g.handleGithubErr(downloadReleaseError, "failed to download file %q from %q release", *release.TagName, fileName)
+			// return immediately if we are rate limited
+			if _, ok := downloadReleaseError.(*github.RateLimitError); ok {
+				return false, retryError
+			}
+			return false, nil
 		}
-
-		response, err := http.DefaultClient.Do(req) //nolint:bodyclose // (NB: The reader is actually closed in a defer)
-		if err != nil {
-			return nil, errors.Wrapf(err, "failed to download file %q from %q release via redirect location %q", *release.TagName, fileName, redirect)
+		if redirect != "" {
+			req, err := http.NewRequestWithContext(ctx, http.MethodGet, redirect, http.NoBody)
+			if err != nil {
+				retryError = errors.Wrapf(err, "failed to download file %q from %q release via redirect location %q: failed to create request", *release.TagName, fileName, redirect)
+				return false, nil
+			}
+
+			response, err := http.DefaultClient.Do(req) //nolint:bodyclose // (NB: The reader is actually closed in a defer)
+			if err != nil {
+				retryError = errors.Wrapf(err, "failed to download file %q from %q release via redirect location %q", *release.TagName, fileName, redirect)
+				return false, nil
+			}
+			reader = response.Body
 		}
-		reader = response.Body
+		retryError = nil
+		return true, nil
+	})
+	if reader != nil {
+		defer reader.Close()
+	}
+	if retryError != nil {
+		return nil, retryError
 	}
-	defer reader.Close()
 
 	// Read contents from the reader (redirect or not), and return.
 	content, err := io.ReadAll(reader)
diff --git a/cmd/clusterctl/client/repository/repository_github_test.go b/cmd/clusterctl/client/repository/repository_github_test.go
index d6809c1daebe..c1ef05b528ea 100644
--- a/cmd/clusterctl/client/repository/repository_github_test.go
+++ b/cmd/clusterctl/client/repository/repository_github_test.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"net/http"
 	"testing"
+	"time"
 
 	"github.com/google/go-github/v33/github"
 	. "github.com/onsi/gomega"
@@ -31,6 +32,8 @@ import (
 )
 
 func Test_githubRepository_newGitHubRepository(t *testing.T) {
+	retryableOperationInterval = 200 * time.Millisecond
+	retryableOperationTimeout = 1 * time.Second
 	type field struct {
 		providerConfig config.Provider
 		variableClient config.VariablesClient
@@ -156,6 +159,8 @@ func Test_githubRepository_getComponentsPath(t *testing.T) {
 }
 
 func Test_githubRepository_getFile(t *testing.T) {
+	retryableOperationInterval = 200 * time.Millisecond
+	retryableOperationTimeout = 1 * time.Second
 	client, mux, teardown := test.NewFakeGitHub()
 	defer teardown()
 
@@ -228,6 +233,8 @@ func Test_githubRepository_getFile(t *testing.T) {
 }
 
 func Test_gitHubRepository_getVersions(t *testing.T) {
+	retryableOperationInterval = 200 * time.Millisecond
+	retryableOperationTimeout = 1 * time.Second
 	client, mux, teardown := test.NewFakeGitHub()
 	defer teardown()
 
@@ -284,6 +291,8 @@ func Test_gitHubRepository_getVersions(t *testing.T) {
 }
 
 func Test_gitHubRepository_getLatestContractRelease(t *testing.T) {
+	retryableOperationInterval = 200 * time.Millisecond
+	retryableOperationTimeout = 1 * time.Second
 	client, mux, teardown := test.NewFakeGitHub()
 	defer teardown()
 
@@ -540,6 +549,8 @@ func Test_gitHubRepository_getLatestPatchRelease(t *testing.T) {
 }
 
 func Test_gitHubRepository_getReleaseByTag(t *testing.T) {
+	retryableOperationInterval = 200 * time.Millisecond
+	retryableOperationTimeout = 1 * time.Second
 	client, mux, teardown := test.NewFakeGitHub()
 	defer teardown()
 
@@ -605,6 +616,8 @@ func Test_gitHubRepository_getReleaseByTag(t *testing.T) {
 }
 
 func Test_gitHubRepository_downloadFilesFromRelease(t *testing.T) {
+	retryableOperationInterval = 200 * time.Millisecond
+	retryableOperationTimeout = 1 * time.Second
 	client, mux, teardown := test.NewFakeGitHub()
 	defer teardown()