From 7d0c44fb03a996501f10e3af4a718fd008fe9bed Mon Sep 17 00:00:00 2001 From: Joyce Liu <12664976+yt3liu@users.noreply.github.com> Date: Wed, 29 May 2019 05:39:30 -0700 Subject: [PATCH] Fixes flaky knative serving install (#834) * Wait on istio before installing knative serving * retry knative serving install if it fails * retry the serving install once after 60 second if it fails the first time * Added logs for the retry serving install and wait on istio components * Update comment for waiting on istio installation if necessary Co-Authored-By: Adriano Cunha <35786489+adrcunha@users.noreply.github.com> --- scripts/e2e-tests.sh | 4 +++- scripts/library.sh | 11 ++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/e2e-tests.sh b/scripts/e2e-tests.sh index e47d4615c5..fb2bced10f 100755 --- a/scripts/e2e-tests.sh +++ b/scripts/e2e-tests.sh @@ -279,7 +279,7 @@ function setup_test_cluster() { header "Setting up test cluster" # Set the actual project the test cluster resides in - # It will be a project assigned by Boskos if test is running on Prow, + # It will be a project assigned by Boskos if test is running on Prow, # otherwise will be ${GCP_PROJECT} set up by user. readonly export E2E_PROJECT_ID="$(gcloud config get-value project)" @@ -314,6 +314,8 @@ function setup_test_cluster() { set +o pipefail if (( ! SKIP_KNATIVE_SETUP )) && function_exists knative_setup; then + # Wait for Istio installation to complete, if necessary, before calling knative_setup. + (( ! SKIP_ISTIO_ADDON )) && (wait_until_batch_job_complete istio-system || return 1) knative_setup || fail_test "Knative setup failed" fi if function_exists test_setup; then diff --git a/scripts/library.sh b/scripts/library.sh index 94204611ef..4d7206bf1b 100755 --- a/scripts/library.sh +++ b/scripts/library.sh @@ -332,7 +332,16 @@ function start_latest_knative_serving() { header "Starting Knative Serving" subheader "Installing Knative Serving" echo "Installing Serving from ${KNATIVE_SERVING_RELEASE}" - kubectl apply -f ${KNATIVE_SERVING_RELEASE} || return 1 + # Some CRDs defined in serving YAML are also referenced by other components in serving. As it takes + # time for CRDs to become effective, there is a race condition between when the CRDs are effective + # and when the resources that references those CRDs are created. + # The current workaround is to re-apply serving.yaml if it fails. Remove the retry logic after the + # race condition is fixed. (https://github.com/knative/serving/issues/4176) + if ! kubectl apply -f ${KNATIVE_SERVING_RELEASE}; then + echo "Install failed, waiting 60s and then retrying..." + sleep 60 + kubectl apply -f ${KNATIVE_SERVING_RELEASE} || return 1 + fi wait_until_pods_running knative-serving || return 1 }