From 41defd45614262e4b0e99974bf2f1febe6715bdc Mon Sep 17 00:00:00 2001 From: Marc Lopez Rubio Date: Tue, 5 Jul 2022 09:01:17 +0800 Subject: [PATCH] smoketest: Add APM Managed tests (#8477) Adds two new smoke tests which migrate the APM Server from standalone to managed mode. All tests ingest data and asserts its existence in ES after the deployment has been created / upgraded. - `legacy-managed.sh`: Creates a `7.17.latest` deployment, migrates from standalone to managed mode. - `standalone-major-managed.sh`: Creates a `7.17.latest` deployment, upgrades to the `8.latest.latest`, and migrates from standalone to managed mode. The second test takes significantly longer since major version upgrades take a decent amount of time (~10m). Signed-off-by: Marc Lopez Rubio --- Makefile | 4 +- .../{test.sh => basic-upgrade.sh} | 13 +--- testing/smoke/basic_upgrade/legacy-managed.sh | 23 ++++++ testing/smoke/basic_upgrade/main.tf | 7 +- .../basic_upgrade/standalone-major-managed.sh | 30 ++++++++ testing/smoke/lib.sh | 73 ++++++++++++++----- 6 files changed, 120 insertions(+), 30 deletions(-) rename testing/smoke/basic_upgrade/{test.sh => basic-upgrade.sh} (79%) create mode 100755 testing/smoke/basic_upgrade/legacy-managed.sh create mode 100755 testing/smoke/basic_upgrade/standalone-major-managed.sh diff --git a/Makefile b/Makefile index 3966535b168..9533b15d6ae 100644 --- a/Makefile +++ b/Makefile @@ -290,4 +290,6 @@ SMOKETEST_VERSIONS ?= latest .PHONY: smoketest smoketest: - @ for version in $(shell echo $(SMOKETEST_VERSIONS) | tr ',' ' '); do cd ./testing/smoke/basic_upgrade && ./test.sh $$version; cd - ; done + @ echo "-> Running smoke tests for versions: $(SMOKETEST_VERSIONS)..." + @ for version in $(shell echo $(SMOKETEST_VERSIONS) | tr ',' ' '); do cd ./testing/smoke/basic_upgrade && ./basic-upgrade.sh $$version; if [ $$version == 7.17 ]; then ./legacy-managed.sh && ./standalone-major-managed.sh; fi; cd -; done + @ echo "-> Smoke tests passed!" diff --git a/testing/smoke/basic_upgrade/test.sh b/testing/smoke/basic_upgrade/basic-upgrade.sh similarity index 79% rename from testing/smoke/basic_upgrade/test.sh rename to testing/smoke/basic_upgrade/basic-upgrade.sh index 2a7279f3a9e..3bab3a67615 100755 --- a/testing/smoke/basic_upgrade/test.sh +++ b/testing/smoke/basic_upgrade/basic-upgrade.sh @@ -25,28 +25,19 @@ else exit 5 fi +echo "-> Running basic upgrade smoke test for version ${VERSION}" + . $(git rev-parse --show-toplevel)/testing/smoke/lib.sh trap "terraform_destroy" EXIT terraform_apply ${PREV_LATEST_VERSION} ${INTEGRATIONS_SERVER} - -ELASTICSEARCH_URL=$(terraform output -raw elasticsearch_url) -ELASTICSEARCH_USER=$(terraform output -raw elasticsearch_username) -ELASTICSEARCH_PASS=$(terraform output -raw elasticsearch_password) -APM_AUTH_HEADER="Authorization: Bearer $(terraform output -raw apm_secret_token)" -APM_SERVER_URL=$(terraform output -raw apm_server_url) - healthcheck 1 send_events ${ASSERT_EVENTS_FUNC} ${PREV_LATEST_VERSION} echo "-> Upgrading APM Server to ${LATEST_VERSION}" -echo stack_version=\"${LATEST_VERSION}\" > terraform.tfvars terraform_apply ${LATEST_VERSION} ${INTEGRATIONS_SERVER} - healthcheck 1 send_events ${ASSERT_EVENTS_FUNC} ${LATEST_VERSION} - -echo "-> Smoke tests passed!" diff --git a/testing/smoke/basic_upgrade/legacy-managed.sh b/testing/smoke/basic_upgrade/legacy-managed.sh new file mode 100755 index 00000000000..4d5dd05d68a --- /dev/null +++ b/testing/smoke/basic_upgrade/legacy-managed.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -eo pipefail + +VERSION=7.17 +LATEST_VERSION=$(curl -s --fail https://artifacts-api.elastic.co/v1/versions/${VERSION} | jq -r '.version.builds[0].version') + +echo "-> Running ${LATEST_VERSION} standalone to ${LATEST_VERSION} managed upgrade" + +. $(git rev-parse --show-toplevel)/testing/smoke/lib.sh + +trap "terraform_destroy" EXIT + +terraform_apply ${LATEST_VERSION} +healthcheck 1 +send_events +legacy_assert_events ${LATEST_VERSION} + +echo "-> Upgrading APM Server to managed mode" +upgrade_managed ${LATEST_VERSION} +healthcheck 1 +send_events +data_stream_assert_events ${LATEST_VERSION} diff --git a/testing/smoke/basic_upgrade/main.tf b/testing/smoke/basic_upgrade/main.tf index fc63c1a74ec..8d547157ce5 100644 --- a/testing/smoke/basic_upgrade/main.tf +++ b/testing/smoke/basic_upgrade/main.tf @@ -50,9 +50,14 @@ output "apm_server_url" { description = "The APM Server URL" } +output "kibana_url" { + value = module.ec_deployment.kibana_url + description = "The Kibana URL" +} + output "elasticsearch_url" { value = module.ec_deployment.elasticsearch_url - description = "The APM Server URL" + description = "The Elasticsearch URL" } output "elasticsearch_username" { diff --git a/testing/smoke/basic_upgrade/standalone-major-managed.sh b/testing/smoke/basic_upgrade/standalone-major-managed.sh new file mode 100755 index 00000000000..6dadc210543 --- /dev/null +++ b/testing/smoke/basic_upgrade/standalone-major-managed.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -eo pipefail + +VERSION=7.17 +LATEST_VERSION=$(curl -s --fail https://artifacts-api.elastic.co/v1/versions/${VERSION} | jq -r '.version.builds[0].version') +VERSIONS=$(curl -s --fail https://artifacts-api.elastic.co/v1/versions) +NEXT_MAJOR_LATEST=$(echo ${VERSIONS} | jq -r '.versions[]' | grep -v 'SNAPSHOT' | grep '^8' | tail -1) + +echo "-> Running ${LATEST_VERSION} standalone to ${NEXT_MAJOR_LATEST} to ${NEXT_MAJOR_LATEST} managed" + +. $(git rev-parse --show-toplevel)/testing/smoke/lib.sh + +trap "terraform_destroy" EXIT + +terraform_apply ${LATEST_VERSION} +healthcheck 1 +send_events +legacy_assert_events ${LATEST_VERSION} + +terraform_apply ${NEXT_MAJOR_LATEST} +healthcheck 1 +send_events +data_stream_assert_events ${NEXT_MAJOR_LATEST} + +upgrade_managed ${NEXT_MAJOR_LATEST} +healthcheck 1 +send_events +Assert there are 2 instances of the same event, since we ingested data twice. +data_stream_assert_events ${NEXT_MAJOR_LATEST} 2 diff --git a/testing/smoke/lib.sh b/testing/smoke/lib.sh index 745892c75a5..820337e1bcd 100644 --- a/testing/smoke/lib.sh +++ b/testing/smoke/lib.sh @@ -6,14 +6,28 @@ terraform_apply() { if [[ ! -z ${2} ]] && [[ ${2} ]]; then echo integrations_server=true >> terraform.tfvars; fi terraform init terraform apply -auto-approve + + if [[ ${EXPORTED_AUTH} ]]; then + return + fi + ELASTICSEARCH_URL=$(terraform output -raw elasticsearch_url) + ELASTICSEARCH_USER=$(terraform output -raw elasticsearch_username) + ELASTICSEARCH_PASS=$(terraform output -raw elasticsearch_password) + APM_AUTH_HEADER="Authorization: Bearer $(terraform output -raw apm_secret_token)" + APM_SERVER_URL=$(terraform output -raw apm_server_url) + KIBANA_URL=$(terraform output -raw kibana_url) + EXPORTED_AUTH=true } terraform_destroy() { exit_code=$? + if [[ ${exit_code} -gt 0 ]]; then + echo "-> Smoke tests FAILED!!" + fi echo "-> Destroying the underlying infrastructure..." terraform destroy -auto-approve rm -f terraform.tfvars - exit $exit_code + exit ${exit_code} } assert_document() { @@ -21,28 +35,31 @@ assert_document() { local FIELD=${2} local VALUE=${3} local VERSION=${4} + local ENTRIES=${5} + if [[ -z ${ENTRIES} ]]; then ENTRIES=1; fi local AUTH=${ELASTICSEARCH_USER}:${ELASTICSEARCH_PASS} local URL=${ELASTICSEARCH_URL}/${INDEX}/_search # RESULT needs to be a global variable in order to be able to parse - # the whole result in assert_single_entry. Passing it as a string + # the whole result in assert_entry. Passing it as a string # argument doesn't work well. RESULT=$(curl -s -u ${AUTH} -XGET "${URL}" -H 'Content-Type: application/json' -d"{\"query\":{\"bool\":{\"must\":[{\"match\":{\"${FIELD}\":\"${VALUE}\"}},{\"match\":{\"observer.version\":\"${VERSION}\"}}]}}}") echo "-> Asserting ${INDEX} contains expected documents documents..." - assert_single_entry ${FIELD} ${VALUE} + assert_entry ${FIELD} ${VALUE} ${ENTRIES} } -assert_single_entry() { +assert_entry() { local FIELD=${1} local VALUE=${2} + local ENTRIES=${3} local HITS=$(echo ${RESULT} | jq .hits.total.value) local MSG="${FIELD}=${VALUE}" - if [[ ${HITS} -ne 1 ]]; then - echo "Didn't find the indexed document ${MSG}, total hits ${HITS}" + if [[ ${HITS} -ne ${ENTRIES} ]]; then + echo "Didn't find ${ENTRIES} indexed documents ${MSG}, total hits ${HITS}" echo ${RESULT} exit 2 else - echo "-> Asserted 1 ${MSG} exists" + echo "-> Asserted ${ENTRIES} ${MSG} exists" fi } @@ -56,16 +73,17 @@ send_events() { curl --fail --data-binary @${INTAKE_DATA} -H "${APM_AUTH_HEADER}" -H "${INTAKE_HEADER}" ${APM_SERVER_INTAKE} # TODO(marclop). It would be best to query Elasticsearch until at least X documents have been ingested. - sleep 5 + sleep 10 } legacy_assert_events() { local INDEX="apm-${1}" local VERSION=${1} - assert_document "${INDEX}-error-*" "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} - assert_document "${INDEX}-span-*" "span.id" "1234567890aaaade" ${VERSION} - assert_document "${INDEX}-transaction-*" "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} - assert_document "${INDEX}-metric-*" "transaction.type" "request" ${VERSION} + local ENTRIES=${2} + assert_document "${INDEX}-error-*" "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} ${ENTRIES} + assert_document "${INDEX}-span-*" "span.id" "1234567890aaaade" ${VERSION} ${ENTRIES} + assert_document "${INDEX}-transaction-*" "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} ${ENTRIES} + assert_document "${INDEX}-metric-*" "transaction.type" "request" ${VERSION} ${ENTRIES} } data_stream_assert_events() { @@ -73,16 +91,17 @@ data_stream_assert_events() { local ERRORS_INDEX="logs-apm.error-*" local METRICS_INDEX="metrics-apm.internal-*" local VERSION=${1} - assert_document ${ERRORS_INDEX} "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} - assert_document ${TRACES_INDEX} "span.id" "1234567890aaaade" ${VERSION} - assert_document ${TRACES_INDEX} "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} - assert_document ${METRICS_INDEX} "transaction.type" "request" ${VERSION} + local ENTRIES=${2} + assert_document ${ERRORS_INDEX} "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} ${ENTRIES} + assert_document ${TRACES_INDEX} "span.id" "1234567890aaaade" ${VERSION} ${ENTRIES} + assert_document ${TRACES_INDEX} "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} ${ENTRIES} + assert_document ${METRICS_INDEX} "transaction.type" "request" ${VERSION} ${ENTRIES} } healthcheck() { local PUBLISH_READY=$(curl -s --fail -H "${APM_AUTH_HEADER}" ${APM_SERVER_URL} | jq '.publish_ready') if [[ ! ${PUBLISH_READY} ]]; then - local MAX_RETRIES=5 + local MAX_RETRIES=10 if [[ ${1} -gt 0 ]] && [[ ${1} -lt ${MAX_RETRIES} ]]; then echo "-> APM Server isn't ready to receive events, retrying (${1}/${MAX_RETRIES})..." sleep $((1 * ${1})) @@ -96,3 +115,23 @@ healthcheck() { echo "-> APM Server ready!" fi } + +upgrade_managed() { + local CURR_VERSION=${1} + local AUTH=${ELASTICSEARCH_USER}:${ELASTICSEARCH_PASS} + local URL_MIGRATE=${KIBANA_URL}/internal/apm/fleet/cloud_apm_package_policy + + echo "-> Upgrading APM Server ${CURR_VERSION} to managed mode..." + local RESULT=$(curl -s --fail -H 'kbn-xsrf: true' -u "${AUTH}" -XPOST ${URL_MIGRATE}) + local ENABLED=$(echo ${RESULT} | jq '.cloudApmPackagePolicy.enabled') + + if [[ ! ${ENABLED} ]]; then + echo "-> Failed migrating and enabling the APM Integration" + exit 6 + fi + + # Allow the new server to start serving requets. Waiting for an arbitrary 70 seconds + # period is not ideal, but there aren't any other APIs we can query. + echo "-> Waiting for 70 seconds for the APM Server to become available..." + sleep 70 +}