Create AKS cluster, deploy CKF and run bundle test #12
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Create AKS cluster, deploy CKF and run bundle test | |
on: | |
workflow_dispatch: | |
inputs: | |
bundle_version: | |
description: 'Comma-separated list of bundle versions e.g. "1.7","1.8"' | |
default: '"1.8"' | |
required: true | |
k8s_version: | |
description: 'Kubernetes version to be used for the AKS cluster' | |
required: false | |
uats_branch: | |
description: 'Branch to run the UATs from e.g. main or track/1.8. By default, the workflow uses main.' | |
required: false | |
schedule: | |
- cron: "17 0 * * 2" | |
jobs: | |
deploy-ckf-to-aks: | |
runs-on: ubuntu-22.04 | |
strategy: | |
matrix: | |
bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.8","latest"')) }} | |
fail-fast: false | |
env: | |
AZURE_CORE_OUTPUT: none | |
K8S_VERSION: ${{ inputs.k8s_version || fromJSON('{"1.8":"1.28", "latest":"1.28"}')[matrix.bundle_version] }} | |
JUJU_VERSION: ${{ fromJSON('{"1.8":"3.1","latest":"3.1"}')[ matrix.bundle_version ] }} | |
UATS_BRANCH: ${{ inputs.uats_branch || fromJSON('{"1.8":"main", "latest":"main"}')[matrix.bundle_version] }} | |
PYTHON_VERSION: "3.8" | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v2 | |
# Remove once https://github.com/canonical/bundle-kubeflow/issues/761 | |
# is resolved and applied to uats repository. | |
- name: Install python ${{ env.PYTHON_VERSION }} | |
run: | | |
sudo add-apt-repository ppa:deadsnakes/ppa -y | |
sudo apt update -y | |
sudo apt install python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-distutils python${{ env.PYTHON_VERSION }}-venv -y | |
- name: Install CLI tools | |
run: | | |
wget https://bootstrap.pypa.io/get-pip.py | |
python${{ env.PYTHON_VERSION }} get-pip.py | |
python${{ env.PYTHON_VERSION }} -m pip install tox | |
sudo snap install juju --classic --channel=${{ env.JUJU_VERSION }}/stable | |
sudo snap install charmcraft --classic | |
juju version | |
- uses: azure/login@v1 | |
with: | |
creds: ${{ secrets.BUNDLE_KUBEFLOW_AKS_SERVICE_PRINCIPAL }} | |
- name: Create resource group and cluster | |
run: | | |
# We need to remove the dot from version | |
# due to cluster naming restrictions | |
version=${{ matrix.bundle_version }} | |
KF_VERSION="kf-${version//.}" | |
RESOURCE_GROUP=${KF_VERSION}-ResourceGroup | |
NAME=${KF_VERSION}-AKSCluster | |
LOCATION=westeurope | |
echo "RESOURCE_GROUP=${RESOURCE_GROUP}" >> $GITHUB_ENV | |
echo "NAME=${NAME}" >> $GITHUB_ENV | |
echo "LOCATION=${LOCATION}" >> $GITHUB_ENV | |
az group create --name ${RESOURCE_GROUP} --location ${LOCATION} | |
az aks create \ | |
--resource-group ${RESOURCE_GROUP} \ | |
--name ${NAME} \ | |
--kubernetes-version ${{ env.K8S_VERSION }} \ | |
--node-count 2 \ | |
--node-vm-size Standard_D8s_v3 \ | |
--node-osdisk-size 100 \ | |
--node-osdisk-type Managed \ | |
--os-sku Ubuntu \ | |
--no-ssh-key | |
- name: Add AKS cloud to juju and bootstrap controller | |
run: | | |
az aks get-credentials --resource-group ${{ env.RESOURCE_GROUP }} --name ${{ env.NAME }} --admin | |
juju add-k8s aks --client | |
juju bootstrap aks aks-controller | |
juju add-model kubeflow | |
- name: Test bundle deployment | |
run: | | |
tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s | |
- name: Run Kubeflow UATs | |
run: | | |
git clone https://github.com/canonical/charmed-kubeflow-uats.git ~/charmed-kubeflow-uats | |
cd ~/charmed-kubeflow-uats | |
git checkout ${{ env.UATS_BRANCH }} | |
tox -e kubeflow-remote | |
# On failure, capture debugging resources | |
- name: Save debug artifacts | |
uses: canonical/kubeflow-ci/actions/dump-charm-debug-artifacts@main | |
if: always() | |
- name: Get juju status | |
run: juju status | |
if: failure() || cancelled() | |
- name: Get juju debug logs | |
run: juju debug-log --replay --no-tail | |
if: failure() || cancelled() | |
- name: Get all kubernetes resources | |
run: kubectl get all -A | |
if: failure() || cancelled() | |
- name: Describe all pods | |
if: failure() || cancelled() | |
run: kubectl describe pods --all-namespaces | |
- name: Get logs from pods with status = Pending | |
run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 | |
if: failure() || cancelled() | |
- name: Get logs from pods with status = Failed | |
run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 | |
if: failure() || cancelled() | |
- name: Get logs from pods with status = CrashLoopBackOff | |
run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 | |
if: failure() || cancelled() | |
- name: Delete AKS cluster | |
if: always() | |
run: az aks delete --resource-group ${{ env.RESOURCE_GROUP }} --name ${{ env.NAME }} --yes | |
- name: Delete resource groups | |
if: always() | |
run: | | |
az group delete --name ${{ env.RESOURCE_GROUP }} --yes | |
if [ "$(az group exists --name MC_${{ env.RESOURCE_GROUP }}_${{ env.NAME }}_${{ env.LOCATION }})" = "true" ]; then | |
az group delete --name MC_${{ env.RESOURCE_GROUP }}_${{ env.NAME }}_${{ env.LOCATION }} --yes | |
fi | |
- name: Check that resource groups have been deleted, else fail | |
if: always() | |
run: | | |
if [ "$(az group exists --name ${{ env.RESOURCE_GROUP }} )" = "true" ] || [ "$(az group exists --name MC_${{ env.RESOURCE_GROUP }}_${{ env.NAME }}_${{ env.LOCATION }})" = "true" ]; then | |
exit 1 | |
fi |