From 5e46102bf3ce4fe2dd4f2bd2aa9ad07aa5cd5268 Mon Sep 17 00:00:00 2001
From: Orfeas Kourkakis <orfeas.kourkakis@canonical.com>
Date: Wed, 13 Dec 2023 11:58:47 +0200
Subject: [PATCH] ci: Integrate UATs to full-bundle-tests.yaml (#739)

- tests-bundle: Update tests-bundle/1.8
  * Remove dex-auth from the first assertion since this will be waiting
    for oidc-gatekeeper relation data
  * Remove argo-server since this is not part of the bundle anymore
  * Adjust timeout and idle_period since download now takes a lot less and
    big idle_period makes charms flaky since charms are active but go into
    maintenance status during this period.
- tests-bundle: Unpin juju in tests-bundle/1.8.
- ci: Refactor workflow to use Aproxy, Python 3.8 and operator action.
- ci: Add UATs to workflow
- ci: Add save debug artifacts to workflow
* ci: Extract bundle test path from bundle source in `full_bundle_tests` WF
  Add `get_release_from_bundle_source` script and use in the workflow.
  This way, it doesn't require `bundle_test_path` as input.
---
 .github/workflows/full-bundle-tests.yaml  | 199 ++++++++++++++--------
 scripts/get_release_from_bundle_source.py |  36 ++++
 tests-bundle/1.8/requirements.txt         |   2 +-
 tests-bundle/1.8/test_release_1-8.py      |  11 +-
 4 files changed, 167 insertions(+), 81 deletions(-)
 create mode 100755 scripts/get_release_from_bundle_source.py

diff --git a/.github/workflows/full-bundle-tests.yaml b/.github/workflows/full-bundle-tests.yaml
index 42eadfb1..c0665390 100644
--- a/.github/workflows/full-bundle-tests.yaml
+++ b/.github/workflows/full-bundle-tests.yaml
@@ -1,24 +1,44 @@
-name: Tests
+name: Deploy bundle and run UATs on self-hosted runners
 
 on:
   workflow_dispatch:
     inputs:
-      bundle-test-path:
-        description: 'Test folder to run'
-        required: true
       bundle-source:
         description: 'Either `--channel <channel_name>` or `--file <bundle_file>.yaml`'
         required: true
+      uats-branch:
+        description: Branch to run the UATs from e.g. main or track/1.7
+        required: false
+        default: main
+      microk8s-channel:
+        description: Microk8s channel e.g. 1.25-strict/stable
+        required: false
+        default: "1.25-strict/stable"
+      juju-channel:
+        description: Juju channel e.g. 3.1/stable
+        required: false
+        default: "3.1/stable"
   workflow_call:
     inputs:
-      bundle-test-path:
-        description: 'Test folder to run'
-        type: string
-        required: true
       bundle-source:
         description: 'Either `--channel <channel_name>` or `--file <bundle_file>.yaml`'
         type: string
         required: true
+      uats-branch:
+        description: Branch to run the UATs from e.g. main or track/1.7
+        required: false
+        type: string
+        default: main
+      microk8s-channel:
+        description: Microk8s channel e.g. 1.25-strict/stable
+        required: false
+        type: string
+        default: "1.25-strict/stable"
+      juju-channel:
+        description: Juju channel e.g. 3.1/stable
+        required: false
+        type: string
+        default: "3.1/stable"
 
 jobs:
   test-bundle:
@@ -33,98 +53,122 @@ jobs:
 
           echo "MY_ADDONS=hostpath-storage ingress dns:$dns_server rbac registry metallb:'10.64.140.43-10.64.140.49,192.168.0.105-192.168.0.111'" >> $GITHUB_OUTPUT
 
-      - name: Install tools
+      # This is needed in order to configure internet connection for self-hosted runner.
+      # Source: https://discourse.canonical.com/t/introducing-aproxy-a-transparent-proxy-for-github-self-hosted-runners/2566
+      - name: Setup aproxy
         run: |
+          sudo snap install aproxy --edge
+          sudo snap set aproxy proxy=squid.internal:3128
+          sudo nft -f - << EOF
+          define default-ip = $(ip route get $(ip route show 0.0.0.0/0 | grep -oP 'via \K\S+') | grep -oP 'src \K\S+')
+          define private-ips = { 10.0.0.0/8, 127.0.0.1/8, 172.16.0.0/12, 192.168.0.0/16 }
+          table ip aproxy
+          flush table ip aproxy
+          table ip aproxy {
+                chain prerouting {
+                        type nat hook prerouting priority dstnat; policy accept;
+                        ip daddr != \$private-ips tcp dport { 80, 443 } counter dnat to \$default-ip:8443
+                }
+
+                chain output {
+                        type nat hook output priority -100; policy accept;
+                        ip daddr != \$private-ips tcp dport { 80, 443 } counter dnat to \$default-ip:8443
+                }
+          }
+          EOF
+
+      - name: Setup Python 3.8
+        run: |
+          echo "deb-src http://archive.ubuntu.com/ubuntu/ jammy main" | sudo tee -a /etc/apt/sources.list
+
           sudo apt-get update -yqq
-          sudo apt-get install -yqq python3-pip
-          sudo --preserve-env=http_proxy,https_proxy,no_proxy pip3 install tox
-          sudo snap install charmcraft --classic
-          sudo snap install firefox
+          sudo apt-get build-dep -yqq python3
+          sudo apt-get install -yqq pkg-config
 
-      - name: Setup microk8s
-        run: |
-          sudo snap install microk8s --classic --channel=1.24/stable
-          sudo usermod -a -G microk8s $USER
+          sudo apt-get install -yqq build-essential gdb lcov pkg-config \
+            libbz2-dev libffi-dev libgdbm-dev libgdbm-compat-dev liblzma-dev \
+            libncurses5-dev libreadline6-dev libsqlite3-dev libssl-dev \
+            lzma lzma-dev tk-dev uuid-dev zlib1g-dev
 
-#      - name: update registry
-#        run: |
-#          sg microk8s -c "cat > /var/snap/microk8s/current/args/certs.d/docker.io/hosts.toml << EOF
-#          server = \"https://rocks.canonical.com\"
-#          [host.\"https://rocks.canonical.com\"]
-#          capabilities = [\"pull\", \"resolve\"]
-#          EOF"
-#
-#      - run: |
-#          sudo cat /var/snap/microk8s/current/args/certs.d/docker.io/hosts.toml
+          curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash
 
-      - name: update credentials
-        run: |
-          sg microk8s -c "cat >> /var/snap/microk8s/current/args/containerd-template.toml << EOF
-          [plugins.\"io.containerd.grpc.v1.cri\".registry.configs.\"registry-1.docker.io\".auth]
-          username = \"${{ secrets.MAKSIM_DOCKERHUB_USER }}\"
-          password = \"${{ secrets.MAKSIM_DOCKERHUB_PASSWORD }}\"
-          EOF"
+          export PYENV_ROOT="$HOME/.pyenv"
+          command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"
 
-      - name: Restart microk8s
-        run: |
-          sg microk8s -c "microk8s stop"
-          sg microk8s -c "microk8s start"
-          sg microk8s -c "microk8s status --wait-ready --timeout 150"
+          pyenv install 3.8.16
+          pyenv global 3.8.16
 
-      - name: Enable addons
-        run: |
-          sg microk8s -c "microk8s enable ${{ steps.dns-name.outputs.MY_ADDONS }}"
+          # configure environment variables to be available in subsequent steps
+          echo "PYENV_ROOT=$PYENV_ROOT" >> "$GITHUB_ENV"
+          echo "PATH=$PATH" >> "$GITHUB_ENV"
 
-      - name: Wait for microk8s to be ready and give time for addons
+      - name: Install tox
         run: |
-          sleep 90
-          sg microk8s -c "microk8s status --wait-ready --timeout 150"
-          sg microk8s -c "mkdir -p ~/.kube"
-          sg microk8s -c "microk8s config > ~/.kube/config"
+          eval "$(pyenv init -)"
+          pip install tox
+
+      - name: Setup operator environment
+        uses: charmed-kubernetes/actions-operator@main
+        with:
+          provider: microk8s
+          channel: ${{ inputs.microk8s-channel }}
+          juju-channel: ${{ inputs.juju-channel }}
+          charmcraft-channel: latest/candidate
+          microk8s-addons: ${{ steps.dns-name.outputs.MY_ADDONS }}
 
-      - name: Install and bootstrap juju
+      - name: Wait for microk8s to be ready and configure .kube/config
         run: |
-          sudo snap install juju --classic --channel=2.9/stable
-          sg microk8s -c 'juju bootstrap microk8s bundle-controller --model-default test-mode=true --model-default logging-config="<root>=DEBUG" --agent-version="2.9.44" --debug --verbose'
+          sudo microk8s status --wait-ready --timeout 150
+          sg microk8s -c "mkdir -p ~/.kube"
+          sudo microk8s config > ~/.kube/config
 
       - name: Show all pods status
         run: |
-          sg microk8s -c "microk8s kubectl get pods --all-namespaces"
+          sudo microk8s kubectl get pods --all-namespaces
 
       - name: Increase file system limits
         run: |
           sudo sysctl fs.inotify.max_user_instances=1280
           sudo sysctl fs.inotify.max_user_watches=655360
 
-      - run: |
+      - name: Configure Juju model
+        run: |
           sg microk8s -c "juju add-model kubeflow --config default-series=focal --config automatically-retry-hooks=true"
           sg microk8s -c "juju model-config"
           sg microk8s -c "juju status"
 
-      - run: |
-          # required for gecko driver
-          export XDG_RUNTIME_DIR="/run/user/$(id -u)"
-          export DBUS_SESSION_BUS_ADDRESS="unix:path=$XDG_RUNTIME_DIR/bus"
-          echo "$(id -u)"
-          loginctl enable-linger $USER
-          sudo apt-get install dbus-user-session -yqq
-          systemctl --user start dbus.service
+      - name: Get release from bundle-source input
+        id: get-release-from-bundle-source
+        run: python scripts/get_release_from_bundle_source.py "${{ inputs.bundle-source }}"
+
+      - name: Get bundle test path for ${{ steps.get-release-from-bundle-source.outputs.release }}
+        id: bundle-test-path
+        run: python scripts/get_bundle_test_path.py ${{ steps.get-release-from-bundle-source.outputs.release }}
 
       - name: Run bundle tests
         run: |
-          export BUNDLE_TEST_PATH=${{ inputs.bundle-test-path }}
+          eval "$(pyenv init -)"
+          export BUNDLE_TEST_PATH=${{ steps.bundle-test-path.outputs.bundle_test_path }}
           export GH_TOKEN=${{ secrets.GITHUB_TOKEN }}
           sg microk8s -c "tox -e full_bundle_tests -- ${{ inputs.bundle-source }}"
 
-      - name: Upload selenium screenshots
-        if: failure()
-        uses: actions/upload-artifact@v3
-        with:
-          name: selenium-screenshots
-          path: |
-            sel-screenshots
-            **/sel-screenshots
-            **/**/sel-screenshots
+      - name: Checkout Kubeflow UATs
+        run: |
+          git clone https://github.com/canonical/charmed-kubeflow-uats.git ~/charmed-kubeflow-uats
+          cd ~/charmed-kubeflow-uats
+          git checkout ${{ inputs.uats-branch }}
+      - name: Run UATs
+        run: |
+          eval "$(pyenv init -)"
+          sg microk8s -c "tox -c ~/charmed-kubeflow-uats/ -e kubeflow"
+
+      - name: Save debug artifacts
+        uses: canonical/kubeflow-ci/actions/dump-charm-debug-artifacts@main
+        if: always()
+
+      - name: Dump Aproxy logs on failure
+        if: failure() || cancelled()
+        run: sudo snap logs aproxy.aproxy -n=all
 
       - name: Run connectivity check
         if: always()
@@ -137,22 +181,27 @@ jobs:
           sg microk8s -c "juju status"
 
           echo "Dumping k8s logs"
-          sg microk8s -c "microk8s kubectl get all --all-namespaces"
+          sudo microk8s kubectl get all --all-namespaces
+
+      - name: Dump juju debug-log on failure
+        if: failure() || cancelled()
+        run: |
+          sg microk8s -c "juju debug-log --replay --no-tail"
 
       - name: Descript all pods
-        if: failure()
+        if: failure() || cancelled()
         run: |
-          sg microk8s -c "microk8s kubectl describe pods --all-namespaces"
+          sudo microk8s kubectl describe pods --all-namespaces
 
       - name: Get logs from failed pods
         if: failure() || cancelled()
         run: |
-          POD_LIST=$(sg microk8s -c "microk8s kubectl get pods --all-namespaces -o 'custom-columns=NAME:.metadata.name,NAMESPACE:.metadata.namespace,CONTAINERS:.status.containerStatuses[*].ready'" | awk '$3 == "false" {print $1,$2}')
+          POD_LIST=$(sudo microk8s kubectl get pods --all-namespaces -o 'custom-columns=NAME:.metadata.name,NAMESPACE:.metadata.namespace,CONTAINERS:.status.containerStatuses[*].ready' | awk '$3 == "false" {print $1,$2}')
 
           if [ -n "$POD_LIST" ]; then
             echo "Actual Logs"
             while read -r POD NAMESPACE; do
               echo "\n\n\nPod: $POD"
-              sg microk8s -c "microk8s kubectl logs -n $NAMESPACE $POD"
+              sudo microk8s kubectl logs -n $NAMESPACE $POD
             done <<< "$POD_LIST"
           fi
diff --git a/scripts/get_release_from_bundle_source.py b/scripts/get_release_from_bundle_source.py
new file mode 100755
index 00000000..11682a40
--- /dev/null
+++ b/scripts/get_release_from_bundle_source.py
@@ -0,0 +1,36 @@
+# Get bundle test path for specific release
+import os
+import re
+import sys
+
+def get_release_from_bundle_source() -> None:
+    if len(sys.argv) <= 1:
+        raise Exception("No bundle source given as input.")
+
+    bundle_source = sys.argv[1]
+    # Bundle source input should be `--channel <channel_name>` or `--file <bundle_file>.yaml``
+    # e.g. --channel 1.8/stable or --file releases/1.8/stable/kubeflow/bundle.yaml
+    bundle_source_starts_with_channel = re.search("^--channel", bundle_source)
+    bundle_source_starts_with_file = re.search("^--file", bundle_source)
+
+    try:
+        if bundle_source_starts_with_channel:
+            if re.search("^--channel=", bundle_source):
+                substrings = bundle_source.split("=")
+            else:
+                substrings = bundle_source.split(" ")
+            release=substrings[1]
+        elif bundle_source_starts_with_file:
+            substrings = bundle_source.split('/')
+            track = substrings[1]
+            risk = substrings[2]
+            release = f"{track}/{risk}"
+        print(
+            f"Returning release={release}.")
+    except:
+        raise Exception("Bundle source doesn't have expected format.")
+
+    with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
+        print(f'release={release}', file=fh)
+
+get_release_from_bundle_source()
diff --git a/tests-bundle/1.8/requirements.txt b/tests-bundle/1.8/requirements.txt
index aefbb87d..bcc3fae4 100644
--- a/tests-bundle/1.8/requirements.txt
+++ b/tests-bundle/1.8/requirements.txt
@@ -2,6 +2,6 @@ lightkube
 pytest
 pytest-operator
 kfp<2.0.0
-juju<3.0.0
+juju<4.0
 selenium>=4.8.3
 webdriver_manager>=3.8.5
diff --git a/tests-bundle/1.8/test_release_1-8.py b/tests-bundle/1.8/test_release_1-8.py
index ee269a3c..91bb227c 100644
--- a/tests-bundle/1.8/test_release_1-8.py
+++ b/tests-bundle/1.8/test_release_1-8.py
@@ -21,8 +21,7 @@ async def test_deploy(ops_test: OpsTest, lightkube_client, deploy_cmd):
     apps = [
         'admission-webhook',
         'argo-controller',
-        'argo-server',
-        'dex-auth',
+        # 'dex-auth', # this is expected to wait for OIDC
         'envoy',
         # 'istio-ingressgateway',  # this is expected to wait for OIDC
         # 'istio-pilot',  # this is expected to wait for OIDC
@@ -64,7 +63,7 @@ async def test_deploy(ops_test: OpsTest, lightkube_client, deploy_cmd):
         status="active",
         raise_on_blocked=False,
         raise_on_error=False,
-        timeout=from_minutes(minutes=180),
+        timeout=from_minutes(minutes=30),
     )
     print("All applications are active")
 
@@ -77,15 +76,18 @@ async def test_deploy(ops_test: OpsTest, lightkube_client, deploy_cmd):
     await ops_test.model.applications["oidc-gatekeeper"].set_config({"public-url": url})
 
     # append apps since they should be configured now
+    apps.append("dex-auth")
     apps.append("oidc-gatekeeper")
     apps.append("istio-ingressgateway")
     apps.append("istio-pilot")
+    apps.append("kubeflow-profiles")
+    apps.append("tensorboard-controller")
     await ops_test.model.wait_for_idle(
         apps=apps,
         status="active",
         raise_on_blocked=False,
         raise_on_error=False,
-        timeout=from_minutes(minutes=100),
+        timeout=from_minutes(minutes=30),
     )
 
     if rc != 0:
@@ -97,7 +99,6 @@ async def test_deploy(ops_test: OpsTest, lightkube_client, deploy_cmd):
         raise_on_blocked=False,
         raise_on_error=True,
         timeout=from_minutes(minutes=30),
-        idle_period=from_minutes(minutes=3),
     )