Skip to content

azure-e2e-test

azure-e2e-test #285

name: azure-e2e-test
permissions:
id-token: write
contents: read
env:
CLUSTER_NAME_TEMPLATE: "{0}-e2e-test-${{ github.run_id }}-${{ github.run_attempt }}"
TEST_PROVISION_PATH_TEMPLATE: "${{ github.workspace }}/provision_azure${{ github.run_id }}_{0}.properties"
CLOUD_PROVIDER: "azure"
BUILTIN_CLOUD_PROVIDERS: "azure"
TEST_E2E_CREATE_RG: "no"
ACR_URL: "${{ vars.AZURE_ACR_URL }}"
TEST_TAGS: "owner=github-actions,run=${{ github.run_id }}-${{ github.run_attempt }}"
on:
schedule:
# Runs "at midnight every day" (see https://crontab.guru)
# will base on default branch `main`
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
podvm-image-id:
type: string
description: prebuilt podvm image
caa-image:
type: string
description: prebuilt caa image
kbs-image-id:
type: string
description: prebuild kbs image
kbs-image-tag:
type: string
description: prebuild kbs image tag
jobs:
generate-podvm-image-version:
if: github.event.inputs.podvm-image-id == ''
runs-on: ubuntu-latest
outputs:
image-version: "${{ steps.generate-image-version.outputs.image-version }}"
steps:
- name: Generate PodVM image version
id: generate-image-version
run: |
unique_version="$(date '+%m.%d.%H%M%S')${{ github.run_attempt }}"
echo "Generated unique version for the image as: ${unique_version}"
echo "image-version=${unique_version}" >> "$GITHUB_OUTPUT"
build-podvm-image:
uses: ./.github/workflows/azure-podvm-image-build.yml
needs:
- generate-podvm-image-version
if: github.event.inputs.podvm-image-id == ''
secrets: inherit
with:
image-version: ${{ needs.generate-podvm-image-version.outputs.image-version }}
build-caa-container-image:
if: github.event.inputs.caa-image == ''
runs-on: ubuntu-latest
defaults:
run:
working-directory: src/cloud-api-adaptor
outputs:
caa-image: "${{ steps.build-container.outputs.caa-image }}"
steps:
- uses: actions/checkout@v3
- name: Extract go version number
run: echo "GO_VERSION=$(yq -e '.tools.golang' versions.yaml)" >> "$GITHUB_ENV"
- name: Set up Go environment
uses: actions/setup-go@v4
with:
go-version: ${{ env.GO_VERSION }}
cache-dependency-path: go.sum
- uses: azure/login@v1
name: 'Az CLI login'
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Install build dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y libvirt-dev
- name: Build container image
id: build-container
env:
ARCHES: "linux/amd64"
RELEASE_BUILD: "false"
run: |
az acr login --name "$ACR_URL"
# This builds image in the following format:
# $ACR_URL/cloud-api-adaptor:dev-COMMIT
make image "registry=${ACR_URL}"
echo "caa-image=${ACR_URL}/cloud-api-adaptor:dev-${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
install-aks:
runs-on: ubuntu-latest
defaults:
run:
working-directory: src/cloud-api-adaptor
needs:
- generate-podvm-image-version
- build-caa-container-image
# when none of required steps failed, skipped is ok
if: always() && !failure() && !cancelled()
strategy:
matrix:
parameters:
- id: "tdx"
machine_type: "Standard_DC2es_v5"
- id: "snp"
machine_type: "Standard_DC2as_v5"
steps:
- uses: actions/checkout@v3
- name: Extract go version number
run: echo "GO_VERSION=$(yq -e '.tools.golang' versions.yaml)" >> "$GITHUB_ENV"
- name: Set up Go environment
uses: actions/setup-go@v4
with:
go-version: ${{ env.GO_VERSION }}
- name: Set Provisioner Environment Variables
run: |
echo "TEST_PROVISION_FILE=${{ format(env.TEST_PROVISION_PATH_TEMPLATE, matrix.parameters.id) }}" >> "$GITHUB_ENV"
echo "CLUSTER_NAME=${{ format(env.CLUSTER_NAME_TEMPLATE, matrix.parameters.id) }}" >> "$GITHUB_ENV"
- name: Extract provisioner environment
run: |
echo "KBS_IMAGE=$(yq -e '.oci.kbs.registry' versions.yaml)" >> "$GITHUB_ENV"
echo "KBS_IMAGE_TAG=$(yq -e '.oci.kbs.tag' versions.yaml)" >> "$GITHUB_ENV"
- name: Create provisioner file
env:
AZURE_IMAGE_ID: ${{ github.event.inputs.podvm-image-id || format('/CommunityGalleries/{0}/images/{1}/Versions/{2}', vars.AZURE_COMMUNITY_GALLERY_NAME, vars.AZURE_PODVM_IMAGE_DEF_NAME, needs.generate-podvm-image-version.outputs.image-version) }}
CAA_IMAGE: "${{ github.event.inputs.caa-image || needs.build-caa-container-image.outputs.caa-image }}"
KBS_IMAGE: ${{ github.event.inputs.kbs-image-id || env.KBS_IMAGE }}
KBS_IMAGE_TAG: ${{ github.event.inputs.kbs-image-tag || env.KBS_IMAGE_TAG}}
AZURE_INSTANCE_SIZE: ${{ matrix.parameters.machine_type }}
run: |
echo "Provisioner file for ${{ matrix.parameters.id }}"
cat << EOF > "$TEST_PROVISION_FILE"
AZURE_SUBSCRIPTION_ID="${{ secrets.AZURE_SUBSCRIPTION_ID }}"
AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}"
RESOURCE_GROUP_NAME="${{ secrets.AZURE_RESOURCE_GROUP }}"
CLUSTER_NAME="${{ env.CLUSTER_NAME }}"
LOCATION="${{ secrets.AZURE_REGION }}"
SSH_KEY_ID="id_rsa.pub"
AZURE_IMAGE_ID="$AZURE_IMAGE_ID"
IS_CI_MANAGED_CLUSTER="true"
MANAGED_IDENTITY_NAME="${{ secrets.AZURE_MANAGED_IDENTITY_NAME}}"
CAA_IMAGE="${CAA_IMAGE}"
KBS_IMAGE="${KBS_IMAGE}"
KBS_IMAGE_TAG="${KBS_IMAGE_TAG}"
AZURE_INSTANCE_SIZE="${AZURE_INSTANCE_SIZE}"
TAGS="${{ env.TEST_TAGS }}"
EOF
cat "$TEST_PROVISION_FILE"
# assert that no variable is unset
! grep -E '=x$|=""$' "$TEST_PROVISION_FILE"
- name: Create public ssh key
run: ssh-keygen -t rsa -b 4096 -f install/overlays/azure/id_rsa -N "" -C [email protected]
- name: Save the configuration created here
uses: actions/upload-artifact@v3
with:
path: |
src/cloud-api-adaptor/install/overlays/azure/id_rsa.pub
${{ env.TEST_PROVISION_FILE }}
name: e2e-configuration
- uses: azure/login@v1
name: 'Az CLI login'
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
# Install AKS cluster in parallel with the podvm image build.
- name: Run provisioner
working-directory: src/cloud-api-adaptor/test/tools
run: |
echo "Creating cluster for ${{ matrix.parameters.id }} machine"
make caa-provisioner-cli
./caa-provisioner-cli -action=createcluster
run-e2e-test:
runs-on: ubuntu-latest
defaults:
run:
working-directory: src/cloud-api-adaptor
needs:
- install-aks
- build-podvm-image
# when none of required steps failed, build-podvm-image can be skipped
if: always() && !failure() && !cancelled()
strategy:
matrix:
parameters:
- id: "tdx"
machine_type: "Standard_DC2es_v5"
- id: "snp"
machine_type: "Standard_DC2as_v5"
steps:
- uses: actions/checkout@v3
- name: Extract go version number
run: echo "GO_VERSION=$(yq -e '.tools.golang' versions.yaml)" >> "$GITHUB_ENV"
- name: Set up Go environment
uses: actions/setup-go@v4
with:
go-version: ${{ env.GO_VERSION }}
- name: Set Provisioner Environment Variables
run: |
echo "TEST_PROVISION_FILE=${{ format(env.TEST_PROVISION_PATH_TEMPLATE, matrix.parameters.id) }}" >> "$GITHUB_ENV"
echo "CLUSTER_NAME=${{ format(env.CLUSTER_NAME_TEMPLATE, matrix.parameters.id) }}" >> "$GITHUB_ENV"
- uses: azure/login@v1
name: 'Az CLI login'
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
- name: Restore the configuration created before
uses: actions/download-artifact@v3
with:
name: e2e-configuration
- name: Add AKS Cluster Subnet ID to test provision file
run: |
NODE_RESOURCE_GROUP="$(az aks show -g ${{ secrets.AZURE_RESOURCE_GROUP }} -n "$CLUSTER_NAME" --query nodeResourceGroup -o tsv)"
SUBNET_ID="$(az network vnet list -g "$NODE_RESOURCE_GROUP" --query '[0].subnets[0].id' -o tsv)"
test -n "$SUBNET_ID"
echo "AZURE_SUBNET_ID=\"${SUBNET_ID}\"" >> "$TEST_PROVISION_FILE"
- name: Extract kbs reference
run: echo "KBS_VERSION=$(yq -e '.git.kbs.reference' versions.yaml)" >> "$GITHUB_ENV"
- name: Checkout kbs Repository
run: |
rm -rf test/trustee
git clone https://github.com/confidential-containers/trustee test/trustee
pushd test/trustee
git checkout "${KBS_VERSION}"
popd
- name: Run e2e test
env:
TEST_PROVISION: "no"
DEPLOY_KBS: "yes"
run: |
# Since we install the cluster in parallel, we need to get the credentials here.
echo "running e2e test for ${{ matrix.parameters.id }} machine"
az aks get-credentials \
--resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \
--name "${CLUSTER_NAME}"
make test-e2e
cleanup:
runs-on: ubuntu-latest
needs:
- run-e2e-test
- generate-podvm-image-version
if: always()
strategy:
matrix:
parameters:
- id: "tdx"
machine_type: "Standard_DC2es_v5"
- id: "snp"
machine_type: "Standard_DC2as_v5"
steps:
- uses: azure/login@v1
name: 'Az CLI login'
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
- name: Delete coco namespace
# We want to delete the coco namespace because CAA might still spawn resources
# which prevents deletion of the AKS cluster
run: |
az aks get-credentials \
--resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \
--name "${{ format(env.CLUSTER_NAME_TEMPLATE, matrix.parameters.id) }}" || true
namespace="confidential-containers-system"
kubectl patch namespace "$namespace" -p '{"metadata":{"finalizers": null }}' || true
kubectl delete namespace "$namespace" || true
- name: Remove podvm image
if: github.event.inputs.podvm-image-id == ''
run: |
# Delete the Pod VM image from the gallery.
az sig image-version delete \
--resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \
--gallery-name ${{ secrets.AZURE_PODVM_GALLERY_NAME }} \
--gallery-image-definition ${{ vars.AZURE_PODVM_IMAGE_DEF_NAME }} \
--gallery-image-version "${{ needs.generate-podvm-image-version.outputs.image-version }}" || true
- name: Remove container image
if: github.event.inputs.caa-image == ''
run: |
# Delete the CAA container image built for this run.
suffix=".azurecr.io"
registry_name="${ACR_URL%"${suffix}"}"
az acr repository delete \
--name "${registry_name}" \
--image "${ACR_URL}/cloud-api-adaptor:dev-${GITHUB_SHA}" \
--yes || true
- name: Remove dangling VMs
# Remove any VMs that might have been left behind in failed test runs
run: |
vms=$(az resource list \
--tag owner=github-actions \
--tag run="${{ github.run_id }}-${{ github.run_attempt }}" \
-o tsv --query "[?type == 'Microsoft.Compute/virtualMachines'].name")
for vm in $vms; do
az vm delete -n "$vm" -g "${{ secrets.AZURE_RESOURCE_GROUP }}" --yes || true
done
- name: Remove dangling NICs
# Remove any NICs that might have been left behind in failed test runs
# NICs are reserved for 180s for VMs, even if they never launched
run: |
nics=$(az resource list \
--tag owner=github-actions \
--tag run="${{ github.run_id }}-${{ github.run_attempt }}" \
-o tsv --query "[?type == 'Microsoft.Network/networkInterfaces'].name")
sleep 180
for nic in $nics; do
az network nic delete -n "$nic" -g "${{ secrets.AZURE_RESOURCE_GROUP }}" || true
done
- name: Remove AKS cluster
run: |
# Delete the cluster even if it has been deleted already or does not exists.
az aks delete \
--name "${{ format(env.CLUSTER_NAME_TEMPLATE, matrix.parameters.id) }}" \
--resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \
--no-wait \
--yes || true