From 1fc40746f28632279a00b0a66d3ffd539a1cecb1 Mon Sep 17 00:00:00 2001 From: Andrew Scribner Date: Wed, 26 Apr 2023 16:15:18 -0400 Subject: [PATCH 1/2] feat: update integrate.yaml env setup, debug logging * refactors integrate.yaml to set up its environment similar to our other repositories * improves integrate.yaml's debug logging, adding juju-crashdump and fixing a bug where pytest-operator would delete the kubernetes model before we collected logs. --- .github/workflows/integrate.yaml | 75 ++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/.github/workflows/integrate.yaml b/.github/workflows/integrate.yaml index 6b7d2a2f..005f23a4 100644 --- a/.github/workflows/integrate.yaml +++ b/.github/workflows/integrate.yaml @@ -49,77 +49,88 @@ jobs: - name: Check out repo uses: actions/checkout@v3 - - uses: balchua/microk8s-actions@v0.3.2 + - name: Setup operator environment + uses: charmed-kubernetes/actions-operator@main with: - # TODO: Pinned to <1.25 until we update to istio 1.16 - channel: '1.24/stable' - addons: '["dns", "storage", "rbac", "metallb:10.64.140.43-10.64.140.49"]' - - - name: Install dependencies - run: | - set -eux - sudo snap install charmcraft --classic --channel=latest/candidate - sudo snap install juju --classic - sudo snap install juju-bundle --classic - sudo snap install juju-wait --classic - sudo apt update - sudo apt install tox - - - name: Wait before bootstrap + provider: microk8s + channel: 1.24/stable + charmcraft-channel: latest/candidate + # TODO: Unpin this when this bug is resolved: https://bugs.launchpad.net/juju/+bug/1992833. + # In particular, these tests failed deploying the prometheus-k8s charm where it gets an error in + # the "metrics-endpoint-relation-changed" hook. + bootstrap-options: --agent-version="2.9.34" + microk8s-addons: "dns storage rbac metallb:10.64.140.43-10.64.140.49" + # TODO: Remove once the actions-operator does this automatically + - name: Configure kubectl run: | - set -eux - sg microk8s -c 'microk8s status --wait-ready' - kubectl wait --for=condition=available --timeout=5m -nkube-system deployment/coredns deployment/hostpath-provisioner + sg microk8s -c "microk8s config > ~/.kube/config" - - name: Bootstrap + - name: Add model run: | - # Pinning juju agent to 2.9.42 to keep compatibility with pythonlib-juju<3 - sg microk8s -c 'juju bootstrap microk8s uk8s --agent-version=2.9.42' - sg microk8s -c 'juju add-model kubeflow' + juju add-model kubeflow + juju switch kubeflow + # DEBUG: added keep-models to ensure our logs are available during dumping - run: sg microk8s -c 'KUBECONFIG=/home/runner/.kube/config tox -e integration - -- --model kubeflow --destructive-mode' + -- --model kubeflow --destructive-mode --keep-models' timeout-minutes: 80 - name: Setup Debug Artifact Collection run: mkdir tmp - if: failure() + if: always() + + - name: Collect juju-crashdump from pytest-operator + run: mv juju-crashdump-* tmp + if: always() - name: Collect charmcraft logs - if: failure() + if: always() run: cat /home/runner/snap/charmcraft/common/cache/charmcraft/log/charmcraft-*.log | tee tmp/charmcraft.log - name: Collect Juju status - if: failure() + if: always() run: juju status | tee tmp/juju-status.txt - name: Collect Juju log - if: failure() - run: juju debug-log --replay --no-tail | tee tmp/juju-status.txt + if: always() + run: juju debug-log --replay --no-tail | tee tmp/juju-debug-log.txt + + - name: Collect Juju status log + if: always() + run: | + juju show-status-log istio-pilot/0 | tee tmp/juju-status-log-istio-pilot.txt + juju show-status-log istio-ingressgateway/0 | tee tmp/juju-status-log-istio-ingressgateway.txt - name: Collect Kube status - if: failure() + if: always() run: | kubectl get all -A | tee tmp/kube-summary.txt kubectl describe virtualservices -A | tee tmp/kube-virtualservices.txt kubectl describe gateways -A | tee tmp/kube-gateways.txt + kubectl describe envoyfilters -A | tee tmp/kube-envoyfilters.txt kubectl describe deployments -A | tee tmp/kube-deployments.txt kubectl describe replicasets -A | tee tmp/kubectl-replicasets.txt kubectl exec -n kubeflow istio-pilot-0 --container charm -- agents/unit-istio-pilot-0/charm/istioctl analyze -n kubeflow | tee tmp/istioctl-analyze.txt - name: Collect Kube logs - if: failure() + if: always() run: | kubectl logs -n kubeflow --tail 1000 -lapp.kubernetes.io/name=istio-pilot -c charm | tee tmp/istio-pilot.log kubectl logs -n kubeflow --tail 1000 -lapp.kubernetes.io/name=istio-ingressgateway-operator -c charm | tee tmp/istio-ingressgateway-operator.log + # This should get output by pytest-operator, but it is blocked by --keep-models until + # https://github.com/charmed-kubernetes/pytest-operator/pull/108 is merged + - name: Collect juju-crashdump explicitly + run: juju-crashdump -o tmp + - name: Upload debug artifacts - if: failure() + if: always() uses: actions/upload-artifact@v3 with: name: test-run-artifacts path: tmp + if-no-files-found: error integration-observability: name: Observability Integration Test From b9315c99da130cf115c70c1c3d2a2247ddce3cb0 Mon Sep 17 00:00:00 2001 From: Andrew Scribner Date: Wed, 26 Apr 2023 17:16:06 -0400 Subject: [PATCH 2/2] fix: integrate.yaml log copying --- .github/workflows/integrate.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integrate.yaml b/.github/workflows/integrate.yaml index 005f23a4..63c98f5e 100644 --- a/.github/workflows/integrate.yaml +++ b/.github/workflows/integrate.yaml @@ -80,7 +80,10 @@ jobs: if: always() - name: Collect juju-crashdump from pytest-operator - run: mv juju-crashdump-* tmp + run: | + for f in `ls juju-crashdump-*`; do + cp $f tmp + done if: always() - name: Collect charmcraft logs