Skip to content

Commit

Permalink
Extract post-mortem composite GHA
Browse files Browse the repository at this point in the history
Extracting the post-mortem to a composite GHA that can be used by other
projects.
As part of this change, GitHub specific prints have been added to make
the GHA log easier to consume.

Signed-off-by: Mike Kolesnik <[email protected]>
  • Loading branch information
mkolesnik authored and skitt committed Feb 2, 2021
1 parent e6b9eb0 commit 7b83031
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 30 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/consuming.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,7 @@ jobs:

- name: Post mortem
if: failure()
run: |
df -h
free -h
make post-mortem
uses: ./gh-actions/post-mortem

lint-consuming:
name: Lint
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:

- name: Post mortem
if: failure()
run: make post-mortem
uses: ./gh-actions/post-mortem

- name: Clean up E2E deployment
run: make cleanup
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:

- name: Post mortem
if: failure()
run: make post-mortem
uses: ./gh-actions/post-mortem

- name: Clean up clusters
run: make cleanup
Expand Down Expand Up @@ -58,7 +58,7 @@ jobs:

- name: Post mortem
if: failure()
run: make post-mortem
uses: ./gh-actions/post-mortem

- name: Clean up deployment
run: make cleanup
Expand All @@ -81,7 +81,7 @@ jobs:

- name: Post mortem
if: failure()
run: make post-mortem
uses: ./gh-actions/post-mortem

post_mortem:
name: Post Mortem
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/upgrade-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,4 @@ jobs:
- name: Post Mortem
if: failure()
run: |
df -h
free -h
make post-mortem
uses: ./gh-actions/post-mortem
19 changes: 19 additions & 0 deletions gh-actions/post-mortem/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: 'Post Mortem'
description: 'Autopsy'
runs:
using: "composite"
steps:
- shell: bash
run: |
echo "::group::Report available disk space"
df -h
echo "::endgroup::"
echo "::group::Report available RAM"
free -h
echo "::endgroup::"
- shell: bash
run: |
echo "::group::Running post mortem"
make post-mortem
echo "::endgroup::"
11 changes: 0 additions & 11 deletions scripts/shared/lib/utils
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,6 @@ function run_subm_clusters() {
run_parallel "${subm_clusters[*]}" "$@"
}

# Run cluster commands sequentially.
# 1st argument is the numbers of the clusters names for which to run.
# 2nd argument is the command to execute, which will have the $cluster variable set.
# 3rd argument and so forth get passed to the command.
function run_sequential() {
local cmnd=$2
for cluster in $(eval echo "$1"); do
$cmnd "${@:3}" | sed "s/^/[${cluster}] /"
done
}

function registry_running() {
docker ps --filter name="^/?$KIND_REGISTRY$" | grep $KIND_REGISTRY
return $?
Expand Down
21 changes: 14 additions & 7 deletions scripts/shared/post_mortem.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,26 @@ source ${SCRIPTS_DIR}/lib/utils

### Functions ###

function print_section() {
echo "===================================================================="
echo "::endgroup::"
echo "::group::$*"
echo "======================= $* ======================="
}

function post_analyze() {
echo "======================= Post mortem $cluster ======================="
print_section "** Pods not running in $cluster **"
kubectl get all --all-namespaces
for pod in $(kubectl get pods -A | tail -n +2 | grep -v Running | sed 's/ */;/g'); do
ns=$(echo $pod | cut -f1 -d';')
name=$(echo $pod | cut -f2 -d';')
echo "======================= $name - $ns ============================"
print_section "NS: $ns; Pod: $name"
kubectl -n $ns describe pod $name
kubectl -n $ns logs $name
echo "===================== END $name - $ns =========================="
done

# TODO (revisit): The following is added to debug intermittent globalnet failures.
print_section "** Globalnet related logs in $cluster **"
namespace="kube-system"
for pod in $(kubectl get pods --selector=k8s-app=kube-proxy -n $namespace -o jsonpath='{.items[*].metadata.name}'); do
echo "+++++++++++++++++++++: Logs for Pod $pod in namespace $namespace :++++++++++++++++++++++"
Expand Down Expand Up @@ -70,12 +77,12 @@ function post_analyze() {

echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
subctl show all

echo "===================== END Post mortem $cluster ====================="
return 0
}

### Main ###

declare_kubeconfig
clusters=($(kind get clusters))
run_sequential "${clusters[*]}" post_analyze
for cluster in $(kind get clusters); do
post_analyze
done

0 comments on commit 7b83031

Please sign in to comment.