From dc742606462fc5c47d23d15319aacca6c897974e Mon Sep 17 00:00:00 2001 From: Luca Comellini Date: Wed, 22 May 2024 10:48:13 -0700 Subject: [PATCH] Use OpenTofu to create GCP infrastructure --- .editorconfig | 2 +- .github/workflows/nfr.yml | 61 +++++----- .gitignore | 29 +++++ .pre-commit-config.yaml | 8 +- tests/Makefile | 35 +----- tests/scripts/add-local-ip-auth-networks.sh | 10 -- tests/scripts/cleanup-router.sh | 8 -- tests/scripts/cleanup-vm.sh | 17 --- tests/scripts/create-and-setup-gcp-vm.sh | 68 ----------- tests/scripts/create-gke-cluster.sh | 40 ------- tests/scripts/create-gke-router.sh | 15 --- tests/scripts/delete-gke-cluster.sh | 7 -- tests/scripts/run-tests-gcp-vm.sh | 21 ++-- tests/scripts/sync-files-to-vm.sh | 9 +- tests/scripts/vars.env-example | 12 -- tests/tofu/.terraform.lock.hcl | 53 +++++++++ tests/tofu/config.tf | 65 ++++++++++ tests/tofu/main.tf | 124 ++++++++++++++++++++ tests/tofu/network.tf | 44 +++++++ tests/tofu/outputs.tf | 30 +++++ tests/tofu/variables.tf | 44 +++++++ 21 files changed, 451 insertions(+), 251 deletions(-) delete mode 100755 tests/scripts/add-local-ip-auth-networks.sh delete mode 100755 tests/scripts/cleanup-router.sh delete mode 100755 tests/scripts/cleanup-vm.sh delete mode 100755 tests/scripts/create-and-setup-gcp-vm.sh delete mode 100755 tests/scripts/create-gke-cluster.sh delete mode 100755 tests/scripts/create-gke-router.sh delete mode 100755 tests/scripts/delete-gke-cluster.sh create mode 100644 tests/tofu/.terraform.lock.hcl create mode 100644 tests/tofu/config.tf create mode 100644 tests/tofu/main.tf create mode 100644 tests/tofu/network.tf create mode 100644 tests/tofu/outputs.tf create mode 100644 tests/tofu/variables.tf diff --git a/.editorconfig b/.editorconfig index 302cfc4277..f1e641b04c 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,6 +8,6 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = tab -[*.{md,yml,yaml}] +[*.{md,yml,yaml,tf}] indent_size = 2 indent_style = space diff --git a/.github/workflows/nfr.yml b/.github/workflows/nfr.yml index c61d4dbff6..7f751e9891 100644 --- a/.github/workflows/nfr.yml +++ b/.github/workflows/nfr.yml @@ -88,7 +88,6 @@ jobs: id: auth uses: google-github-actions/auth@6fc4af4b145ae7821d527454aa9bd537d1f2dc5f # v2.1.7 with: - token_format: access_token workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }} service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} @@ -98,44 +97,48 @@ jobs: project_id: ${{ secrets.GCP_PROJECT_ID }} install_components: kubectl + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@592200bd4b9bbf4772ace78f887668b1aee8f716 # v1.0.5 + with: + tofu_version: 1.8.7 # renovate: datasource=github-tags depName=opentofu/opentofu + tofu_wrapper: false + + - name: Initialize OpenTofu + working-directory: ./tests/tofu + run: | + tofu version + tofu init + + - name: Setup tfvars file + working-directory: ./tests/tofu + run: | + cat < terraform.tfvars + gke_project = "${{ secrets.GCP_PROJECT_ID }}" + gke_cluster_name = "nfr-tests-${{ github.run_id }}-${{ matrix.type }}" + gke_num_nodes = 12 + gke_machine_type = "n2d-standard-16" + gke_nodes_service_account = "${{ secrets.GKE_NODES_SERVICE_ACCOUNT }}" + vm_service_account = "${{ secrets.GCP_SERVICE_ACCOUNT }}" + ngf_branch = "${{ github.ref_name }}" + EOF + - name: Setup dotenv file working-directory: ./tests/scripts run: | - echo "RESOURCE_NAME=nfr-tests-${{ github.run_id }}-${{ matrix.type }}" >> vars.env echo "TAG=${{ needs.vars.outputs.image_tag }}" >> vars.env echo "PREFIX=ghcr.io/nginxinc/nginx-gateway-fabric" >> vars.env echo "NGINX_PREFIX=ghcr.io/nginxinc/nginx-gateway-fabric/nginx" >> vars.env echo "NGINX_PLUS_PREFIX=us-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/nginx-gateway-fabric/nginx-plus" >> vars.env - echo "GKE_CLUSTER_NAME=nfr-tests-${{ github.run_id }}-${{ matrix.type }}" >> vars.env - echo "GKE_CLUSTER_ZONE=us-west1-b" >> vars.env - echo "GKE_CLUSTER_REGION=us-west1" >> vars.env echo "GKE_PROJECT=${{ secrets.GCP_PROJECT_ID }}" >> vars.env - echo "GKE_SVC_ACCOUNT=${{ secrets.GCP_SERVICE_ACCOUNT }}" >> vars.env - echo "GKE_NODES_SERVICE_ACCOUNT=${{ secrets.GKE_NODES_SERVICE_ACCOUNT }}" >> vars.env - echo "NETWORK_TAGS=nfr-tests-${{ github.run_id }}-${{ matrix.type }}" >> vars.env - echo "NGF_BRANCH=${{ github.ref_name }}" >> vars.env - echo "SOURCE_IP_RANGE=$(curl -sS -4 icanhazip.com)/32" >> vars.env - echo "ADD_VM_IP_AUTH_NETWORKS=true" >> vars.env echo "PLUS_ENABLED=${{ matrix.type == 'plus' }}" >> vars.env echo "GINKGO_LABEL=" >> vars.env echo "NGF_VERSION=${{ needs.vars.outputs.version }}" >> vars.env - echo "GKE_NUM_NODES=12" >> vars.env - echo "GKE_MACHINE_TYPE=n2d-standard-16" >> vars.env echo "PLUS_USAGE_ENDPOINT=${{ secrets.JWT_PLUS_REPORTING_ENDPOINT }}" >> vars.env - - name: Setup license file for plus - if: matrix.type == 'plus' - env: - PLUS_LICENSE: ${{ secrets.JWT_PLUS_REPORTING }} - run: echo "${PLUS_LICENSE}" > license.jwt - - - name: Create GKE cluster - working-directory: ./tests - run: make create-gke-cluster CI=true - - - name: Create and setup VM - working-directory: ./tests - run: make create-and-setup-vm + - name: Create GKE cluster and VM + working-directory: ./tests/tofu + run: | + tofu apply --auto-approve - name: Run Tests working-directory: ./tests @@ -153,12 +156,10 @@ jobs: path: tests/results/**/*-${{ matrix.type }}.* - name: Cleanup - working-directory: ./tests + working-directory: ./tests/tofu if: always() run: | - bash scripts/cleanup-vm.sh true - make delete-gke-cluster - rm -rf scripts/vars.env + tofu destroy --auto-approve pr-results: name: Open PR with results diff --git a/.gitignore b/.gitignore index 4dbda581d7..a542411f55 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,32 @@ internal/mode/static/nginx/modules/coverage # temporary files used for manifest generation config/base/deploy.yaml config/base/deploy.yaml.bak + +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sensitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +*.tfvars +*.tfvars.json + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Ignore CLI configuration files +.terraformrc +terraform.rc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 38549023a8..d3065a348d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -80,7 +80,7 @@ repos: - --template-files=README.md.gotmpl - repo: https://github.com/scop/pre-commit-shfmt - rev: v3.8.0-1 + rev: v3.10.0-2 hooks: - id: shfmt-src args: [-w, -s, -i, "4"] @@ -95,6 +95,12 @@ repos: - "--skip-auto-generation=required,additionalProperties" - --append-newline + - repo: https://github.com/tofuutils/pre-commit-opentofu + rev: v2.1.0 + hooks: + - id: tofu_fmt + # - id: tofu_docs + ci: skip: [golangci-lint-full, prettier, markdownlint-cli2, yamllint] autofix_prs: false diff --git a/tests/Makefile b/tests/Makefile index b4e34787ee..ee29ee85fd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -90,19 +90,11 @@ ifeq ($(PLUS_ENABLED),true) endif .PHONY: setup-gcp-and-run-nfr-tests -setup-gcp-and-run-nfr-tests: create-gke-router create-and-setup-vm nfr-test ## Create and setup a GKE router and GCP VM for tests and run the NFR tests +setup-gcp-and-run-nfr-tests: create-gcp-resources nfr-test ## Create and setup a GKE router and GCP VM for tests and run the NFR tests .PHONY: create-gke-cluster -create-gke-cluster: ## Create a GKE cluster - ./scripts/create-gke-cluster.sh $(CI) - -.PHONY: create-and-setup-vm -create-and-setup-vm: ## Create and setup a GCP VM for tests - ./scripts/create-and-setup-gcp-vm.sh - -.PHONY: create-gke-router -create-gke-router: ## Create a GKE router to allow egress traffic from private nodes (allows for external image pulls) - ./scripts/create-gke-router.sh +create-gcp-resources: ## Create a GKE cluster and a GCP VM for tests + tofu -chdir=tofu apply .PHONY: sync-files-to-vm sync-files-to-vm: ## Syncs your local NGF files with the NGF repo on the VM @@ -124,7 +116,7 @@ stop-longevity-test: nfr-test ## Stop the longevity test and collects results .PHONY: .vm-nfr-test .vm-nfr-test: ## Runs the NFR tests on the GCP VM (called by `nfr-test`) go run github.com/onsi/ginkgo/v2/ginkgo --randomize-all --randomize-suites --keep-going --fail-on-pending \ - --trace -r -v --buildvcs --force-newlines $(GITHUB_OUTPUT) \ + --trace -r -vv --buildvcs --force-newlines $(GITHUB_OUTPUT) \ --label-filter "nfr" $(GINKGO_FLAGS) --timeout 5h ./suite -- --gateway-api-version=$(GW_API_VERSION) \ --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) --nginx-plus-image-repo=$(NGINX_PLUS_PREFIX) \ @@ -151,23 +143,8 @@ test-with-plus: PLUS_ENABLED=true test-with-plus: check-for-plus-usage-endpoint test ## Runs the functional tests for NGF with NGINX Plus on your default k8s cluster .PHONY: cleanup-gcp -cleanup-gcp: cleanup-router cleanup-vm delete-gke-cluster ## Cleanup all GCP resources - -.PHONY: cleanup-router -cleanup-router: ## Delete the GKE router - ./scripts/cleanup-router.sh - -.PHONY: cleanup-vm -cleanup-vm: ## Delete the test GCP VM and delete the firewall rule - ./scripts/cleanup-vm.sh - -.PHONY: delete-gke-cluster -delete-gke-cluster: ## Delete the GKE cluster - ./scripts/delete-gke-cluster.sh - -.PHONY: add-local-ip-to-cluster -add-local-ip-to-cluster: ## Add local IP to the GKE cluster master-authorized-networks - ./scripts/add-local-ip-auth-networks.sh +cleanup-gcp: ## Cleanup all GCP resources + tofu -chdir=tofu destroy HELM_PARAMETERS += --set nameOverride=nginx-gateway --set nginxGateway.kind=skip --set service.create=false --skip-schema-validation diff --git a/tests/scripts/add-local-ip-auth-networks.sh b/tests/scripts/add-local-ip-auth-networks.sh deleted file mode 100755 index 2ba06012f5..0000000000 --- a/tests/scripts/add-local-ip-auth-networks.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -set -eo pipefail - -source scripts/vars.env - -CURRENT_AUTH_NETWORK=$(gcloud container clusters describe "${GKE_CLUSTER_NAME}" --zone="${GKE_CLUSTER_ZONE}" \ - --format="value(masterAuthorizedNetworksConfig.cidrBlocks[0])" | sed 's/cidrBlock=//') - -gcloud container clusters update "${GKE_CLUSTER_NAME}" --zone="${GKE_CLUSTER_ZONE}" --enable-master-authorized-networks --master-authorized-networks="${SOURCE_IP_RANGE}","${CURRENT_AUTH_NETWORK}" diff --git a/tests/scripts/cleanup-router.sh b/tests/scripts/cleanup-router.sh deleted file mode 100755 index ee3ea524b2..0000000000 --- a/tests/scripts/cleanup-router.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -set -o pipefail - -source scripts/vars.env - -gcloud compute routers nats delete "${RESOURCE_NAME}" --quiet --router "${RESOURCE_NAME}" --router-region "${GKE_CLUSTER_REGION}" -gcloud compute routers delete "${RESOURCE_NAME}" --quiet --region "${GKE_CLUSTER_REGION}" diff --git a/tests/scripts/cleanup-vm.sh b/tests/scripts/cleanup-vm.sh deleted file mode 100755 index ecb0420d18..0000000000 --- a/tests/scripts/cleanup-vm.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -set -o pipefail - -source scripts/vars.env - -skip_gke_master_control_node_access="${1:-false}" - -# Remove VM IP from GKE master control node access, if required -if [ "${ADD_VM_IP_AUTH_NETWORKS}" = "true" ] && [ "${skip_gke_master_control_node_access}" != "true" ]; then - CURRENT_AUTH_NETWORK=$(gcloud container clusters describe "${GKE_CLUSTER_NAME}" --zone "${GKE_CLUSTER_ZONE}" \ - --format="value(masterAuthorizedNetworksConfig.cidrBlocks[0])" | sed 's/cidrBlock=//') - gcloud container clusters update "${GKE_CLUSTER_NAME}" --zone "${GKE_CLUSTER_ZONE}" --enable-master-authorized-networks --master-authorized-networks="${CURRENT_AUTH_NETWORK}" -fi - -gcloud compute instances delete "${RESOURCE_NAME}" --quiet --project="${GKE_PROJECT}" --zone="${GKE_CLUSTER_ZONE}" -gcloud compute firewall-rules delete "${RESOURCE_NAME}" --quiet --project="${GKE_PROJECT}" diff --git a/tests/scripts/create-and-setup-gcp-vm.sh b/tests/scripts/create-and-setup-gcp-vm.sh deleted file mode 100755 index f837ba9ede..0000000000 --- a/tests/scripts/create-and-setup-gcp-vm.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env bash - -set -o pipefail - -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR")) - -source scripts/vars.env - -gcloud compute firewall-rules create "${RESOURCE_NAME}" \ - --project="${GKE_PROJECT}" \ - --direction=INGRESS \ - --priority=1000 \ - --network=default \ - --action=ALLOW \ - --rules=tcp:22 \ - --source-ranges="${SOURCE_IP_RANGE}" \ - --target-tags="${NETWORK_TAGS}" - -gcloud compute instances create "${RESOURCE_NAME}" --project="${GKE_PROJECT}" --zone="${GKE_CLUSTER_ZONE}" --machine-type=n2-standard-2 \ - --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default --maintenance-policy=MIGRATE \ - --provisioning-model=STANDARD --service-account="${GKE_SVC_ACCOUNT}" \ - --scopes=https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring.write,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append,https://www.googleapis.com/auth/cloud-platform \ - --tags="${NETWORK_TAGS}" --create-disk=auto-delete=yes,boot=yes,device-name="${RESOURCE_NAME}",image-family=projects/"${GKE_PROJECT}"/global/images/ngf-debian,mode=rw,size=20 --no-shielded-secure-boot --shielded-vtpm --shielded-integrity-monitoring --labels=goog-ec-src=vm_add-gcloud --reservation-affinity=any - -# Add VM IP to GKE master control node access, if required -if [ "${ADD_VM_IP_AUTH_NETWORKS}" = "true" ]; then - EXTERNAL_IP=$(gcloud compute instances describe "${RESOURCE_NAME}" --project="${GKE_PROJECT}" --zone="${GKE_CLUSTER_ZONE}" \ - --format='value(networkInterfaces[0].accessConfigs[0].natIP)') - CURRENT_AUTH_NETWORK=$(gcloud container clusters describe "${GKE_CLUSTER_NAME}" --zone="${GKE_CLUSTER_ZONE}" \ - --format="value(masterAuthorizedNetworksConfig.cidrBlocks[0])" | sed 's/cidrBlock=//') - gcloud container clusters update "${GKE_CLUSTER_NAME}" --zone="${GKE_CLUSTER_ZONE}" --enable-master-authorized-networks --master-authorized-networks="${EXTERNAL_IP}"/32,"${CURRENT_AUTH_NETWORK}" -fi - -# Poll for SSH connectivity -MAX_RETRIES=10 -RETRY_INTERVAL=5 -for ((i = 1; i <= MAX_RETRIES; i++)); do - echo "Attempt $i to connect to the VM..." - gcloud compute ssh username@"${RESOURCE_NAME}" --zone="${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" --quiet --command="echo 'VM is ready'" - if [ $? -eq 0 ]; then - echo "SSH connection successful. VM is ready." - break - fi - echo "Waiting for ${RETRY_INTERVAL} seconds before the next attempt..." - sleep ${RETRY_INTERVAL} -done - -gcloud compute scp --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" "${SCRIPT_DIR}"/vars.env username@"${RESOURCE_NAME}":~ - -if [ -n "${NGF_REPO}" ] && [ "${NGF_REPO}" != "nginxinc" ]; then - gcloud compute ssh --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" username@"${RESOURCE_NAME}" \ - --command="bash -i </dev/null && pwd) - "${SCRIPT_DIR}"/add-local-ip-auth-networks.sh -fi diff --git a/tests/scripts/create-gke-router.sh b/tests/scripts/create-gke-router.sh deleted file mode 100755 index 599986638b..0000000000 --- a/tests/scripts/create-gke-router.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -set -eo pipefail - -source scripts/vars.env - -gcloud compute routers create "${RESOURCE_NAME}" \ - --region "${GKE_CLUSTER_REGION}" \ - --network default - -gcloud compute routers nats create "${RESOURCE_NAME}" \ - --router-region "${GKE_CLUSTER_REGION}" \ - --router "${RESOURCE_NAME}" \ - --nat-all-subnet-ip-ranges \ - --auto-allocate-nat-external-ips diff --git a/tests/scripts/delete-gke-cluster.sh b/tests/scripts/delete-gke-cluster.sh deleted file mode 100755 index 2cebd452ad..0000000000 --- a/tests/scripts/delete-gke-cluster.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -set -eo pipefail - -source scripts/vars.env - -gcloud container clusters delete "${GKE_CLUSTER_NAME}" --zone "${GKE_CLUSTER_ZONE}" --project "${GKE_PROJECT}" --quiet diff --git a/tests/scripts/run-tests-gcp-vm.sh b/tests/scripts/run-tests-gcp-vm.sh index 3cff298694..eca305a00a 100755 --- a/tests/scripts/run-tests-gcp-vm.sh +++ b/tests/scripts/run-tests-gcp-vm.sh @@ -4,11 +4,14 @@ set -eo pipefail SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -source scripts/vars.env +GKE_CLUSTER_ZONE=$(tofu -chdir=tofu output -raw k8s_cluster_zone) +GKE_CLUSTER_NAME=$(tofu -chdir=tofu output -raw k8s_cluster_name) +GKE_PROJECT=$(tofu -chdir=tofu output -raw project_id) +VM_NAME=$(tofu -chdir=tofu output -raw vm_name) -gcloud compute scp --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" "${SCRIPT_DIR}"/vars.env username@"${RESOURCE_NAME}":~ +gcloud compute scp --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" "${SCRIPT_DIR}"/vars.env username@"${VM_NAME}":~ -gcloud compute ssh --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" username@"${RESOURCE_NAME}" \ +gcloud compute ssh --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" username@"${VM_NAME}" \ --command="export START_LONGEVITY=${START_LONGEVITY} &&\ export STOP_LONGEVITY=${STOP_LONGEVITY} &&\ export CI=${CI} &&\ @@ -23,9 +26,9 @@ fi ## Use rsync if running locally (faster); otherwise if in the pipeline don't download an SSH config if [ "${CI}" = "false" ]; then gcloud compute config-ssh --ssh-config-file ngf-gcp.ssh >/dev/null - rsync -ave 'ssh -F ngf-gcp.ssh' username@"${RESOURCE_NAME}"."${GKE_CLUSTER_ZONE}"."${GKE_PROJECT}":~/nginx-gateway-fabric/tests/results . + rsync -ave 'ssh -F ngf-gcp.ssh' username@"${VM_NAME}"."${GKE_CLUSTER_ZONE}"."${GKE_PROJECT}":~/nginx-gateway-fabric/tests/results . else - gcloud compute scp --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" --recurse username@"${RESOURCE_NAME}":~/nginx-gateway-fabric/tests/results . + gcloud compute scp --zone "${GKE_CLUSTER_ZONE}" --project="${GKE_PROJECT}" --recurse username@"${VM_NAME}":~/nginx-gateway-fabric/tests/results . fi ## If tearing down the longevity test, we need to collect logs from gcloud and add to the results @@ -44,15 +47,15 @@ if [ "${STOP_LONGEVITY}" = "true" ]; then printf "\n## Error Logs\n\n" >>"${results}" ## ngf error logs - ngfErrText=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("error")' --format "value(textPayload)") - ngfErrJSON=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("error")' --format "value(jsonPayload)") + ngfErrText=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${GKE_CLUSTER_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("error")' --format "value(textPayload)") + ngfErrJSON=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${GKE_CLUSTER_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("error")' --format "value(jsonPayload)") printf "### nginx-gateway\n%s\n%s\n\n" "${ngfErrText}" "${ngfErrJSON}" >>"${results}" ## nginx error logs - ngxErr=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("`[warn]`") OR SEARCH("`[error]`") OR SEARCH("`[emerg]`")' --format "value(textPayload)") + ngxErr=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${GKE_CLUSTER_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("`[warn]`") OR SEARCH("`[error]`") OR SEARCH("`[emerg]`")' --format "value(textPayload)") printf "### nginx\n%s\n\n" "${ngxErr}" >>"${results}" ## nginx non-200 responses (also filter out 499 since wrk cancels connections) - ngxNon200=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND "GET" "HTTP/1.1" -"200" -"499" -"client prematurely closed connection"' --format "value(textPayload)") + ngxNon200=$(gcloud logging read --project="${GKE_PROJECT}" 'resource.labels.cluster_name='"${GKE_CLUSTER_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND "GET" "HTTP/1.1" -"200" -"499" -"client prematurely closed connection"' --format "value(textPayload)") printf "%s\n\n" "${ngxNon200}" >>"${results}" fi diff --git a/tests/scripts/sync-files-to-vm.sh b/tests/scripts/sync-files-to-vm.sh index 5b44c03c16..d4823f803f 100755 --- a/tests/scripts/sync-files-to-vm.sh +++ b/tests/scripts/sync-files-to-vm.sh @@ -2,10 +2,11 @@ set -eo pipefail -source scripts/vars.env - -NGF_DIR=$(dirname "$PWD")/ +NGF_DIR=$(dirname "${PWD}")/ +GKE_CLUSTER_ZONE=$(tofu -chdir=tofu output -raw k8s_cluster_zone) +GKE_PROJECT=$(tofu -chdir=tofu output -raw project_id) +VM_NAME=$(tofu -chdir=tofu output -raw vm_name) gcloud compute config-ssh --ssh-config-file ngf-gcp.ssh >/dev/null -rsync -ave 'ssh -F ngf-gcp.ssh' "${NGF_DIR}" username@"${RESOURCE_NAME}"."${GKE_CLUSTER_ZONE}"."${GKE_PROJECT}":~/nginx-gateway-fabric +rsync -Putae 'ssh -F ngf-gcp.ssh' "${NGF_DIR}" username@"${VM_NAME}"."${GKE_CLUSTER_ZONE}"."${GKE_PROJECT}":~/nginx-gateway-fabric diff --git a/tests/scripts/vars.env-example b/tests/scripts/vars.env-example index b8f1276fb5..5fb8177615 100644 --- a/tests/scripts/vars.env-example +++ b/tests/scripts/vars.env-example @@ -1,24 +1,12 @@ # WARNING: This is an example - please copy this file to vars.env and complete appropriately for your environment -RESOURCE_NAME= TAG= PREFIX= NGINX_PREFIX= NGINX_PLUS_PREFIX= -GKE_CLUSTER_NAME= -GKE_CLUSTER_ZONE= -GKE_CLUSTER_REGION= -GKE_PROJECT= -GKE_SVC_ACCOUNT= -GKE_NODES_SERVICE_ACCOUNT= -NETWORK_TAGS= NGF_REPO= NGF_BRANCH=main GINKGO_LABEL= GINKGO_FLAGS= -SOURCE_IP_RANGE= -ADD_VM_IP_AUTH_NETWORKS= PLUS_ENABLED= PLUS_USAGE_ENDPOINT= NGF_VERSION= -GKE_MACHINE_TYPE= -GKE_NUM_NODES= diff --git a/tests/tofu/.terraform.lock.hcl b/tests/tofu/.terraform.lock.hcl new file mode 100644 index 0000000000..36ab06c229 --- /dev/null +++ b/tests/tofu/.terraform.lock.hcl @@ -0,0 +1,53 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hashicorp/cloudinit" { + version = "2.3.5" + hashes = [ + "h1:pGyOmbhV9UkT+Vd9rSZQ5BSh4MwZXAfKl2OQEce0WDo=", + "zh:18d857ff5090bb50bc7314c11852709001ab33db1c9a957327125335ee105b0b", + "zh:31909baeeab00b70c871c4f310b0bbe12334ab2bda2adf4e1d51b6447f2ee6ce", + "zh:4ebd2975c7b0a4d142cc2002483316503087a0a4ab8947a54df8d71832e3cdee", + "zh:5c151543b94d1f8191257ca5e656c47f3d4524211ca60d462f8ab3f2c890e2ff", + "zh:67dfd1063ddfae0bcf1704c69fc17704ce4243dacdc862c9c184dcb6141ee568", + "zh:7f6a18a3d1dc5f2d1770516ff7f2c267ceef08071dbba485d632e04c107e9a8f", + "zh:993373080b67bd32a3ee6ec106dcb7891664cbbac515272af22ffd84dde68d0c", + "zh:a72fe5757f7456491a5e7b91dd088f993082470b6acae8cf1ed0c922c81adb18", + "zh:a7987554dc22fc16a2cca9b47cbed1a7d4f93c264b56bfefc819bf1a5b28e59a", + "zh:e1ca388d9eb2edc34ed26564004bcd2f6384f49956c757363886a535b275ac9c", + ] +} + +provider "registry.opentofu.org/hashicorp/google" { + version = "6.14.1" + hashes = [ + "h1:p8D03uf0J1d8m7uvfCFZFX4DV791BgdgGpjC1TrJI+Y=", + "zh:03bee255c6d2f3164205d19d0c2a4131a620b581fb39a12ae809fe1734818dfe", + "zh:442f732f3650e59b328d4e0ba42fd96c6badfc4d43a55bb909d721fe45248b54", + "zh:50c1726822905915ee318a03de69017a22ca9294bc692d7f34bf0f60ffdde919", + "zh:8b609b1b6b8cf9e24af15c61206f2e0f104a13ac52db3d4520721477c2487926", + "zh:8e193095770bda41a1da35f83a3d6cecd5c321e065a2e325eea365669d34117b", + "zh:dbd939bf4632620166b9fc39961c37dfb2d3581b1461efdc3aa61692bdc26053", + "zh:e6ad386aa479af3076d8f2112682458f5ad4fc561a8750083337d875c38dc166", + "zh:f2df91fac48c1d2de97a24f9c3de20223ee7d7134e921c79f6a7c0ff2afd67fe", + "zh:fafc79e55b90ca5c3f8d95c641e5f3e77a97a7a93d70a713dcecb6a18c0ab2a9", + "zh:fc4f1321277287462363552945e97374d7529493ec7bfc3f7424309dcd41427e", + ] +} + +provider "registry.opentofu.org/hashicorp/http" { + version = "3.4.5" + hashes = [ + "h1:P3NFKZbtHuQ6mmoDVpg4WYlDJ+yK4cchzkjTPzBWG3Y=", + "zh:055a4431d33bb89b9848193152433eaead7cc2e6746d3436a5922419de2112bf", + "zh:0bfabafea9f5e36802fcfc5a800831ec1767d896af889abc610014d02b09bdc2", + "zh:300b4983fe1b43bd0a7dac1f94b30b3814f11c824224dd83fb45a521c02cea60", + "zh:68f6958314ca5dc0868be70e37ec123b99b8828aa49f27fd2fdd13df05d31ab1", + "zh:c29f098a597250adc2a7d9f99acbce3c9e07d37f1c5cfded5df4309738cf613c", + "zh:c33607397f9c9302c0cd797c8b7484c9c6cfa09c3489d4b55af17df20b204368", + "zh:d519ca364a224110428b390ee06e963a3ec4dfdd1ac816c9f32e647567957cf5", + "zh:e4a9c7c0ac31a0192362ef43449390cdf00d2cf6f13061ef730b177eaf00ac45", + "zh:f25223c062f274d8f89bb96017e73586030a205bc91cdad266a9954d0def2a23", + "zh:fd4dc824ebae2f3a66318df364bec83b88e9a52e7f66b00dafa29a796d9a94ab", + ] +} diff --git a/tests/tofu/config.tf b/tests/tofu/config.tf new file mode 100644 index 0000000000..a28b1a9efc --- /dev/null +++ b/tests/tofu/config.tf @@ -0,0 +1,65 @@ +locals { + kubeconfig_data = { + apiVersion = "v1" + kind = "Config" + preferences = { + colors = true + } + current-context = google_container_cluster.primary.name + contexts = [ + { + name = google_container_cluster.primary.name + context = { + cluster = google_container_cluster.primary.name + user = google_container_cluster.primary.name + } + } + ] + clusters = [ + { + name = google_container_cluster.primary.name + cluster = { + server = "https://${google_container_cluster.primary.private_cluster_config.0.private_endpoint}" + certificate-authority-data = google_container_cluster.primary.master_auth[0].cluster_ca_certificate + } + } + ] + users = [ + { + name = google_container_cluster.primary.name + user = { + exec = { + apiVersion = "client.authentication.k8s.io/v1beta1" + command = "gke-gcloud-auth-plugin" + interactiveMode = "Never" + provideClusterInfo = true + } + } + } + ] + } + kubeconfig = yamlencode(local.kubeconfig_data) +} + +data "cloudinit_config" "kubeconfig_setup" { + gzip = false + base64_encode = false + + part { + content_type = "text/cloud-config" + content = <<-EOF + #cloud-config + write_files: + - path: /home/username/.kube/config + content: ${indent(10, yamlencode(local.kubeconfig))} + permissions: '0600' + owner: username:username + + runcmd: + - echo "Kubeconfig has been written to /home/username/.kube/config" + - | + sudo -i -u username bash -c "cd /home/username/nginx-gateway-fabric/tests && git fetch -pP --all && git checkout ${var.ngf_branch} && git pull" + echo "Branch ${var.ngf_branch} has been checked out." + EOF + } +} diff --git a/tests/tofu/main.tf b/tests/tofu/main.tf new file mode 100644 index 0000000000..c6a231253a --- /dev/null +++ b/tests/tofu/main.tf @@ -0,0 +1,124 @@ +provider "google" { + project = var.gke_project + region = var.gke_cluster_region +} + +data "http" "myip" { + url = "https://ipv4.icanhazip.com" +} + +data "google_client_config" "current" {} + +data "google_compute_zones" "available" {} + +locals { + google_zone = data.google_compute_zones.available.names[0] +} + +resource "google_container_cluster" "primary" { + name = var.gke_cluster_name + project = data.google_client_config.current.project + + location = local.google_zone + initial_node_count = 1 + remove_default_node_pool = true + + network = google_compute_network.vpc.self_link + subnetwork = google_compute_subnetwork.subnet.self_link + node_config { + service_account = var.gke_nodes_service_account + kubelet_config { + cpu_manager_policy = "" + insecure_kubelet_readonly_port_enabled = "FALSE" + } + } + + logging_config { + enable_components = ["SYSTEM_COMPONENTS", "WORKLOADS"] + } + + deletion_protection = false + resource_labels = { + env = "ngf-tests" + } + + master_authorized_networks_config { + cidr_blocks { + cidr_block = "${chomp(data.http.myip.response_body)}/32" + display_name = "local-ip" + } + cidr_blocks { + cidr_block = google_compute_subnetwork.subnet.ip_cidr_range + display_name = "vpc" + } + } + + private_cluster_config { + enable_private_nodes = true + private_endpoint_subnetwork = google_compute_subnetwork.subnet.self_link + } +} + +resource "google_container_node_pool" "primary_nodes" { + name = "${var.gke_cluster_name}-nodes" + cluster = google_container_cluster.primary.id + node_count = var.gke_num_nodes + + node_config { + machine_type = var.gke_machine_type + service_account = var.gke_nodes_service_account + metadata = { + block-project-ssh-keys = "TRUE" + disable-legacy-endpoints = "true" + } + tags = ["ngf-tests-${var.gke_cluster_name}-nodes"] + shielded_instance_config { + enable_secure_boot = true + } + kubelet_config { + cpu_manager_policy = "" + insecure_kubelet_readonly_port_enabled = "FALSE" + } + } + + lifecycle { + ignore_changes = [ + initial_node_count + ] + } + +} + +resource "google_compute_instance" "vm" { + name = "${var.gke_cluster_name}-vm" + machine_type = "n2-standard-2" + zone = local.google_zone + allow_stopping_for_update = true + tags = ["ngf-tests-${var.gke_cluster_name}-vm"] + + boot_disk { + initialize_params { + image = "ngf-debian" + } + } + shielded_instance_config { + enable_secure_boot = true + } + network_interface { + network = google_compute_network.vpc.self_link + subnetwork = google_compute_subnetwork.subnet.self_link + + access_config { + nat_ip = google_compute_address.vpc-ip.address + } + } + + service_account { + email = var.vm_service_account + scopes = ["cloud-platform"] + } + metadata = { + user-data = data.cloudinit_config.kubeconfig_setup.rendered + block-project-ssh-keys = "TRUE" + } +} diff --git a/tests/tofu/network.tf b/tests/tofu/network.tf new file mode 100644 index 0000000000..5f45c3a8ce --- /dev/null +++ b/tests/tofu/network.tf @@ -0,0 +1,44 @@ +resource "google_compute_network" "vpc" { + name = "${var.gke_cluster_name}-vpc" + auto_create_subnetworks = "false" + project = data.google_client_config.current.project +} + +resource "google_compute_subnetwork" "subnet" { + name = "${var.gke_cluster_name}-subnet" + network = google_compute_network.vpc.self_link + ip_cidr_range = "10.10.0.0/24" + private_ip_google_access = true +} + +resource "google_compute_router" "router" { + name = "${var.gke_cluster_name}-router" + network = google_compute_network.vpc.self_link +} + +resource "google_compute_router_nat" "nat" { + name = "${var.gke_cluster_name}-nat" + router = google_compute_router.router.name + nat_ip_allocate_option = "AUTO_ONLY" + source_subnetwork_ip_ranges_to_nat = "LIST_OF_SUBNETWORKS" + subnetwork { + name = google_compute_subnetwork.subnet.self_link + source_ip_ranges_to_nat = ["ALL_IP_RANGES"] + } +} + +resource "google_compute_firewall" "ssh" { + name = "${var.gke_cluster_name}-ssh" + network = google_compute_network.vpc.self_link + allow { + protocol = "tcp" + ports = ["22"] + } + source_ranges = ["${chomp(data.http.myip.response_body)}/32"] +} + +resource "google_compute_address" "vpc-ip" { + name = "${var.gke_cluster_name}-vpc-ip" + address_type = "EXTERNAL" + network_tier = "PREMIUM" +} diff --git a/tests/tofu/outputs.tf b/tests/tofu/outputs.tf new file mode 100644 index 0000000000..ef0bb0b834 --- /dev/null +++ b/tests/tofu/outputs.tf @@ -0,0 +1,30 @@ +output "region" { + value = data.google_client_config.current.region + description = "GCloud Region" +} + +output "k8s_cluster_zone" { + value = google_container_cluster.primary.location + description = "GKE Cluster Zone" +} + +output "project_id" { + value = data.google_client_config.current.project + description = "GCloud Project ID" + sensitive = true +} + +output "k8s_cluster_name" { + value = google_container_cluster.primary.name + description = "GKE Cluster Name" +} + +output "k8s_cluster_version" { + value = google_container_cluster.primary.master_version + description = "GKE Cluster Version" +} + +output "vm_name" { + value = google_compute_instance.vm.name + description = "VM Name" +} diff --git a/tests/tofu/variables.tf b/tests/tofu/variables.tf new file mode 100644 index 0000000000..c8da860aee --- /dev/null +++ b/tests/tofu/variables.tf @@ -0,0 +1,44 @@ +variable "gke_project" { + description = "The project ID where the GKE cluster will be created." + type = string +} + +variable "gke_cluster_name" { + description = "The name of the GKE cluster." + type = string +} + +variable "gke_cluster_region" { + description = "The zone where the GKE cluster will be created." + type = string + default = "us-west1" +} + +variable "gke_machine_type" { + description = "The type of machine to use for the nodes." + type = string + default = "e2-medium" +} + +variable "gke_num_nodes" { + description = "The number of nodes to create in the cluster." + type = number + default = 3 +} + +variable "gke_nodes_service_account" { + description = "The service account to use for the nodes." + type = string +} + +variable "vm_service_account" { + description = "The service account to use for the VM." + type = string + +} + +variable "ngf_branch" { + description = "The branch of the NGF repository to use." + type = string + default = "main" +}