Skip to content

Commit

Permalink
smoketest: Add APM Managed tests (#8477)
Browse files Browse the repository at this point in the history
Adds two new smoke tests which migrate the APM Server from standalone to
managed mode. All tests ingest data and asserts its existence in ES
after the deployment has been created / upgraded.

- `legacy-managed.sh`: Creates a `7.17.latest` deployment, migrates from
    standalone to managed mode.
- `standalone-major-managed.sh`: Creates a `7.17.latest` deployment,
    upgrades to the `8.latest.latest`, and migrates from standalone to
    managed mode.

The second test takes significantly longer since major version upgrades
take a decent amount of time (~10m).

Signed-off-by: Marc Lopez Rubio <[email protected]>
  • Loading branch information
marclop authored Jul 5, 2022
1 parent 183586d commit 41defd4
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 30 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -290,4 +290,6 @@ SMOKETEST_VERSIONS ?= latest

.PHONY: smoketest
smoketest:
@ for version in $(shell echo $(SMOKETEST_VERSIONS) | tr ',' ' '); do cd ./testing/smoke/basic_upgrade && ./test.sh $$version; cd - ; done
@ echo "-> Running smoke tests for versions: $(SMOKETEST_VERSIONS)..."
@ for version in $(shell echo $(SMOKETEST_VERSIONS) | tr ',' ' '); do cd ./testing/smoke/basic_upgrade && ./basic-upgrade.sh $$version; if [ $$version == 7.17 ]; then ./legacy-managed.sh && ./standalone-major-managed.sh; fi; cd -; done
@ echo "-> Smoke tests passed!"
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,19 @@ else
exit 5
fi

echo "-> Running basic upgrade smoke test for version ${VERSION}"

. $(git rev-parse --show-toplevel)/testing/smoke/lib.sh

trap "terraform_destroy" EXIT

terraform_apply ${PREV_LATEST_VERSION} ${INTEGRATIONS_SERVER}

ELASTICSEARCH_URL=$(terraform output -raw elasticsearch_url)
ELASTICSEARCH_USER=$(terraform output -raw elasticsearch_username)
ELASTICSEARCH_PASS=$(terraform output -raw elasticsearch_password)
APM_AUTH_HEADER="Authorization: Bearer $(terraform output -raw apm_secret_token)"
APM_SERVER_URL=$(terraform output -raw apm_server_url)

healthcheck 1
send_events
${ASSERT_EVENTS_FUNC} ${PREV_LATEST_VERSION}

echo "-> Upgrading APM Server to ${LATEST_VERSION}"
echo stack_version=\"${LATEST_VERSION}\" > terraform.tfvars
terraform_apply ${LATEST_VERSION} ${INTEGRATIONS_SERVER}

healthcheck 1
send_events
${ASSERT_EVENTS_FUNC} ${LATEST_VERSION}

echo "-> Smoke tests passed!"
23 changes: 23 additions & 0 deletions testing/smoke/basic_upgrade/legacy-managed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

set -eo pipefail

VERSION=7.17
LATEST_VERSION=$(curl -s --fail https://artifacts-api.elastic.co/v1/versions/${VERSION} | jq -r '.version.builds[0].version')

echo "-> Running ${LATEST_VERSION} standalone to ${LATEST_VERSION} managed upgrade"

. $(git rev-parse --show-toplevel)/testing/smoke/lib.sh

trap "terraform_destroy" EXIT

terraform_apply ${LATEST_VERSION}
healthcheck 1
send_events
legacy_assert_events ${LATEST_VERSION}

echo "-> Upgrading APM Server to managed mode"
upgrade_managed ${LATEST_VERSION}
healthcheck 1
send_events
data_stream_assert_events ${LATEST_VERSION}
7 changes: 6 additions & 1 deletion testing/smoke/basic_upgrade/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,14 @@ output "apm_server_url" {
description = "The APM Server URL"
}

output "kibana_url" {
value = module.ec_deployment.kibana_url
description = "The Kibana URL"
}

output "elasticsearch_url" {
value = module.ec_deployment.elasticsearch_url
description = "The APM Server URL"
description = "The Elasticsearch URL"
}

output "elasticsearch_username" {
Expand Down
30 changes: 30 additions & 0 deletions testing/smoke/basic_upgrade/standalone-major-managed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

set -eo pipefail

VERSION=7.17
LATEST_VERSION=$(curl -s --fail https://artifacts-api.elastic.co/v1/versions/${VERSION} | jq -r '.version.builds[0].version')
VERSIONS=$(curl -s --fail https://artifacts-api.elastic.co/v1/versions)
NEXT_MAJOR_LATEST=$(echo ${VERSIONS} | jq -r '.versions[]' | grep -v 'SNAPSHOT' | grep '^8' | tail -1)

echo "-> Running ${LATEST_VERSION} standalone to ${NEXT_MAJOR_LATEST} to ${NEXT_MAJOR_LATEST} managed"

. $(git rev-parse --show-toplevel)/testing/smoke/lib.sh

trap "terraform_destroy" EXIT

terraform_apply ${LATEST_VERSION}
healthcheck 1
send_events
legacy_assert_events ${LATEST_VERSION}

terraform_apply ${NEXT_MAJOR_LATEST}
healthcheck 1
send_events
data_stream_assert_events ${NEXT_MAJOR_LATEST}

upgrade_managed ${NEXT_MAJOR_LATEST}
healthcheck 1
send_events
Assert there are 2 instances of the same event, since we ingested data twice.
data_stream_assert_events ${NEXT_MAJOR_LATEST} 2
73 changes: 56 additions & 17 deletions testing/smoke/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,60 @@ terraform_apply() {
if [[ ! -z ${2} ]] && [[ ${2} ]]; then echo integrations_server=true >> terraform.tfvars; fi
terraform init
terraform apply -auto-approve

if [[ ${EXPORTED_AUTH} ]]; then
return
fi
ELASTICSEARCH_URL=$(terraform output -raw elasticsearch_url)
ELASTICSEARCH_USER=$(terraform output -raw elasticsearch_username)
ELASTICSEARCH_PASS=$(terraform output -raw elasticsearch_password)
APM_AUTH_HEADER="Authorization: Bearer $(terraform output -raw apm_secret_token)"
APM_SERVER_URL=$(terraform output -raw apm_server_url)
KIBANA_URL=$(terraform output -raw kibana_url)
EXPORTED_AUTH=true
}

terraform_destroy() {
exit_code=$?
if [[ ${exit_code} -gt 0 ]]; then
echo "-> Smoke tests FAILED!!"
fi
echo "-> Destroying the underlying infrastructure..."
terraform destroy -auto-approve
rm -f terraform.tfvars
exit $exit_code
exit ${exit_code}
}

assert_document() {
local INDEX=${1}
local FIELD=${2}
local VALUE=${3}
local VERSION=${4}
local ENTRIES=${5}
if [[ -z ${ENTRIES} ]]; then ENTRIES=1; fi
local AUTH=${ELASTICSEARCH_USER}:${ELASTICSEARCH_PASS}
local URL=${ELASTICSEARCH_URL}/${INDEX}/_search
# RESULT needs to be a global variable in order to be able to parse
# the whole result in assert_single_entry. Passing it as a string
# the whole result in assert_entry. Passing it as a string
# argument doesn't work well.
RESULT=$(curl -s -u ${AUTH} -XGET "${URL}" -H 'Content-Type: application/json' -d"{\"query\":{\"bool\":{\"must\":[{\"match\":{\"${FIELD}\":\"${VALUE}\"}},{\"match\":{\"observer.version\":\"${VERSION}\"}}]}}}")

echo "-> Asserting ${INDEX} contains expected documents documents..."
assert_single_entry ${FIELD} ${VALUE}
assert_entry ${FIELD} ${VALUE} ${ENTRIES}
}

assert_single_entry() {
assert_entry() {
local FIELD=${1}
local VALUE=${2}
local ENTRIES=${3}
local HITS=$(echo ${RESULT} | jq .hits.total.value)
local MSG="${FIELD}=${VALUE}"
if [[ ${HITS} -ne 1 ]]; then
echo "Didn't find the indexed document ${MSG}, total hits ${HITS}"
if [[ ${HITS} -ne ${ENTRIES} ]]; then
echo "Didn't find ${ENTRIES} indexed documents ${MSG}, total hits ${HITS}"
echo ${RESULT}
exit 2
else
echo "-> Asserted 1 ${MSG} exists"
echo "-> Asserted ${ENTRIES} ${MSG} exists"
fi
}

Expand All @@ -56,33 +73,35 @@ send_events() {
curl --fail --data-binary @${INTAKE_DATA} -H "${APM_AUTH_HEADER}" -H "${INTAKE_HEADER}" ${APM_SERVER_INTAKE}

# TODO(marclop). It would be best to query Elasticsearch until at least X documents have been ingested.
sleep 5
sleep 10
}

legacy_assert_events() {
local INDEX="apm-${1}"
local VERSION=${1}
assert_document "${INDEX}-error-*" "error.id" "9876543210abcdeffedcba0123456789" ${VERSION}
assert_document "${INDEX}-span-*" "span.id" "1234567890aaaade" ${VERSION}
assert_document "${INDEX}-transaction-*" "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION}
assert_document "${INDEX}-metric-*" "transaction.type" "request" ${VERSION}
local ENTRIES=${2}
assert_document "${INDEX}-error-*" "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} ${ENTRIES}
assert_document "${INDEX}-span-*" "span.id" "1234567890aaaade" ${VERSION} ${ENTRIES}
assert_document "${INDEX}-transaction-*" "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} ${ENTRIES}
assert_document "${INDEX}-metric-*" "transaction.type" "request" ${VERSION} ${ENTRIES}
}

data_stream_assert_events() {
local TRACES_INDEX="traces-apm-*"
local ERRORS_INDEX="logs-apm.error-*"
local METRICS_INDEX="metrics-apm.internal-*"
local VERSION=${1}
assert_document ${ERRORS_INDEX} "error.id" "9876543210abcdeffedcba0123456789" ${VERSION}
assert_document ${TRACES_INDEX} "span.id" "1234567890aaaade" ${VERSION}
assert_document ${TRACES_INDEX} "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION}
assert_document ${METRICS_INDEX} "transaction.type" "request" ${VERSION}
local ENTRIES=${2}
assert_document ${ERRORS_INDEX} "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} ${ENTRIES}
assert_document ${TRACES_INDEX} "span.id" "1234567890aaaade" ${VERSION} ${ENTRIES}
assert_document ${TRACES_INDEX} "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} ${ENTRIES}
assert_document ${METRICS_INDEX} "transaction.type" "request" ${VERSION} ${ENTRIES}
}

healthcheck() {
local PUBLISH_READY=$(curl -s --fail -H "${APM_AUTH_HEADER}" ${APM_SERVER_URL} | jq '.publish_ready')
if [[ ! ${PUBLISH_READY} ]]; then
local MAX_RETRIES=5
local MAX_RETRIES=10
if [[ ${1} -gt 0 ]] && [[ ${1} -lt ${MAX_RETRIES} ]]; then
echo "-> APM Server isn't ready to receive events, retrying (${1}/${MAX_RETRIES})..."
sleep $((1 * ${1}))
Expand All @@ -96,3 +115,23 @@ healthcheck() {
echo "-> APM Server ready!"
fi
}

upgrade_managed() {
local CURR_VERSION=${1}
local AUTH=${ELASTICSEARCH_USER}:${ELASTICSEARCH_PASS}
local URL_MIGRATE=${KIBANA_URL}/internal/apm/fleet/cloud_apm_package_policy

echo "-> Upgrading APM Server ${CURR_VERSION} to managed mode..."
local RESULT=$(curl -s --fail -H 'kbn-xsrf: true' -u "${AUTH}" -XPOST ${URL_MIGRATE})
local ENABLED=$(echo ${RESULT} | jq '.cloudApmPackagePolicy.enabled')

if [[ ! ${ENABLED} ]]; then
echo "-> Failed migrating and enabling the APM Integration"
exit 6
fi

# Allow the new server to start serving requets. Waiting for an arbitrary 70 seconds
# period is not ideal, but there aren't any other APIs we can query.
echo "-> Waiting for 70 seconds for the APM Server to become available..."
sleep 70
}

0 comments on commit 41defd4

Please sign in to comment.