Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: run analyses when apps are not synced and healthy after configu… #840

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 61 additions & 3 deletions install-platform.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,72 @@ wait_until_apps_synced_healthy() {

if [ ${all_apps_synced} != "true" ] ; then
echo "not all apps synced and healthy after limit reached :("
echo "status of all pods"
kubectl get pods -A
analyze_all_unhealthy_apps "${apps}"
exit 1
else
echo "all apps are synced."
fi
}

analyze_all_unhealthy_apps() {
local apps=$1
for app in ${apps} ; do
if kubectl get application -n argocd ${app} > /dev/null 2>&1 ; then
sync_status=$(kubectl get application -n argocd ${app} -o jsonpath='{.status.sync.status}')
health_status=$(kubectl get application -n argocd ${app} -o jsonpath='{.status.health.status}')

if [[ "${sync_status}" != ${synced} ]] || [[ "${health_status}" != ${healthy} ]] ; then
analyze_app ${app}
fi
fi
done
}

analyze_app() {
local app=$1

# get target namespace for app
app_namespace=$( kubectl get applications -n argocd ${app} -o=jsonpath='{.spec.destination.namespace}' )

echo "------------------"
echo "starting analyzing unhealthy/unsynced app '${app}'"
echo "------------------"

# get application spec and status
echo "------------------"
echo "kubectl get application -n argocd ${app} -o yaml"
kubectl get application -n argocd ${app} -o yaml
echo "------------------"

# get events in this namespace
echo "------------------"
echo "kubectl get events -n ${app_namespace} --sort-by='.lastTimestamp'"
kubectl get events -n ${app_namespace} --sort-by='.lastTimestamp'
echo "------------------"

# get pods for app
echo "------------------"
echo "kubectl get pods -n ${app_namespace}"
kubectl get pods -n ${app_namespace}
echo "------------------"

# describe pods for app
echo "------------------"
echo "kubectl describe pod -n ${app_namespace}"
kubectl describe pod -n ${app_namespace}
echo "------------------"

# get logs
echo "------------------"
echo "kubectl get pods -o name -n ${app_namespace} | xargs -I {} kubectl logs -n ${app_namespace} {}"
kubectl get pods -o name -n ${app_namespace} | xargs -I {} kubectl logs -n ${app_namespace} {} --all-containers=true
echo "------------------"

echo "------------------"
echo "finished analyzing degraded app '${app}'"
echo "------------------"
}


if [ "${KUBRIX_CREATE_K3D_CLUSTER}" == true ] ; then
# do we need to set this always? I had DNS issues on the train
Expand Down Expand Up @@ -254,7 +312,7 @@ argocd_apps=$(cat $target_chart_value_file | awk '/^ - name:/ { printf "%s", "s
argocd_apps_without_individual=$(cat $target_chart_value_file | egrep -Ev "backstage|kargo" | awk '/^ - name:/ { printf "%s", "sx-"$3" "}' )

# max wait for 20 minutes until all apps except backstage and kargo are synced and healthy
wait_until_apps_synced_healthy "${argocd_apps_without_individual}" "Synced" "Healthy" ${MAX_WAIT_TIME:-1200}
wait_until_apps_synced_healthy "${argocd_apps_without_individual}" "Synced" "Healthy" ${KUBRIX_BOOTSTRAP_MAX_WAIT_TIME:-1200}

# apply argocd-secret to set a secretKey
kubectl apply -f platform-apps/charts/argocd/manual-secret/argocd-secret.yaml
Expand Down