Soak tests #861
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Soak tests | |
env: | |
AWS_DEFAULT_REGION: us-east-1 | |
on: | |
schedule: | |
- cron: '0 14 * * 1,3,5' # Mon, Wed, Fri morning PST | |
workflow_dispatch: | |
inputs: | |
soak_config: | |
description: 'set memory/cpu threshold, soak time (s), emitter interval' | |
required: false | |
default: '-t 1800' | |
permissions: | |
id-token: write | |
contents: read | |
jobs: | |
soaking-test: | |
runs-on: ubuntu-22.04 | |
name: Soak Test - (${{ matrix.language }}, ${{ matrix.sample-app }}, ${{ matrix.instrumentation-type }}, ${{ matrix.architecture }}) | |
strategy: | |
fail-fast: false | |
matrix: | |
# FIXME: (enowell) Both .NET and Go Sample Apps want to Soak Test | |
# the same collector-only lambda layer. We count on Soaking Tests to | |
# test whether a layer is ready for release. However, the current | |
# workflow can only test one Sample App per Lambda Layer. We should | |
# create a separate workflow to soak-test Layers with multiple | |
# soak tests. | |
language: [ go, java, nodejs, python ] | |
sample-app: [ aws-sdk ] | |
instrumentation-type: [ wrapper ] | |
architecture: [ amd64, arm64 ] | |
include: | |
# FIXME: (enowell) Same problem as above, we cannot Soak Test the | |
# other java app (okhttp) because it will create its own Lambda Layer | |
# instead of soak test the same one as the `aws-sdk` sample app. | |
- language: java | |
sample-app: aws-sdk | |
instrumentation-type: agent | |
architecture: amd64 | |
- language: java | |
sample-app: aws-sdk | |
instrumentation-type: agent | |
architecture: arm64 | |
outputs: | |
go-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.go-wrapper-error }} | |
nodejs-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.nodejs-wrapper-error }} | |
python-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.python-wrapper-error }} | |
java-agent-error: ${{ steps.set-layer-if-error-output.outputs.java-agent-error }} | |
java-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.java-wrapper-error }} | |
# NOTE: (enowell) When we release a Lambda Layer, we will ALWAYS release | |
# all the architectures TOGETHER. So all architectures will be at the same | |
# version. | |
go-wrapper-version: ${{ steps.set-collector-layer-version-output.outputs.go-wrapper-version }} | |
nodejs-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.nodejs-wrapper-version }} | |
python-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.python-wrapper-version }} | |
java-agent-version: ${{ steps.set-sdk-layer-version-output.outputs.java-agent-version }} | |
java-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.java-wrapper-version }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-go@v5 | |
with: | |
go-version: '~1.21.10' | |
check-latest: true | |
- uses: actions/setup-java@v4 | |
if: ${{ matrix.language == 'java' }} | |
with: | |
distribution: corretto | |
java-version: '17' | |
- name: Cache (Java) | |
uses: actions/cache@v4 | |
if: ${{ matrix.language == 'java' }} | |
with: | |
path: | | |
~/go/pkg/mod | |
~/.gradle/caches | |
~/.gradle/wrapper | |
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}-go-${{ hashFiles('**/go.sum') }} | |
restore-keys: | | |
${{ runner.os }}-gradle- | |
- name: Get default soaking test configuration | |
# CPU baseline was obtained empirically based on the max value observed for Go in a complete run of soak test | |
if: ${{ matrix.language != 'java' }} | |
run: | | |
echo SOAKING_TEST_CONFIG="-c 120 -m 70" | tee --append $GITHUB_ENV | |
- name: Get java soaking test configuration | |
# NOTE (enowell): Java's JVM is heavy and needs more memory than others. | |
if: ${{ matrix.language == 'java' }} | |
run: | | |
echo SOAKING_TEST_CONFIG="-c 200 -m 90" | tee --append $GITHUB_ENV | |
- uses: actions/setup-node@v4 | |
if: ${{ matrix.language == 'nodejs' }} | |
with: | |
node-version: '16' | |
- name: Cache (NodeJS) | |
uses: actions/cache@v4 | |
if: ${{ matrix.language == 'nodejs' }} | |
with: | |
path: | | |
~/go/pkg/mod | |
~/.npm | |
key: ${{ runner.os }}-node-${{ hashFiles('**/package.json') }}-go-${{ hashFiles('**/go.sum') }} | |
restore-keys: | | |
${{ runner.os }}-node- | |
- uses: actions/setup-python@v5 | |
if: ${{ matrix.language == 'python' }} | |
with: | |
python-version: '3.x' | |
- name: Cache (Python) | |
uses: actions/cache@v4 | |
if: ${{ matrix.language == 'python' }} | |
with: | |
path: | | |
~/go/pkg/mod | |
~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-go-${{ hashFiles('**/go.sum') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- uses: actions/setup-dotnet@v4 | |
if: ${{ matrix.language == 'dotnet' }} | |
with: | |
dotnet-version: '6.0.405' | |
- uses: aws-actions/[email protected] | |
with: | |
role-to-assume: ${{ secrets.INTEG_TEST_LAMBDA_ROLE_ARN }} | |
mask-aws-account-id: false | |
aws-region: ${{ env.AWS_DEFAULT_REGION }} | |
# Default session duration is 1 hour with OIDC. | |
role-duration-seconds: 14400 # 4 hours | |
- name: Patch ADOT | |
run: ./patch-upstream.sh | |
# Login to ECR since may be needed for Python build image. | |
- name: Login to Public ECR | |
uses: docker/login-action@v3 | |
with: | |
registry: public.ecr.aws | |
- name: Build layers / functions | |
run: GOARCH=${{ matrix.architecture }} ./build.sh ${{ matrix.architecture }} | |
working-directory: ${{ matrix.language }} | |
- name: Get Lambda Layer `amd64` architecture value | |
if: ${{ matrix.architecture == 'amd64' }} | |
run: echo LAMBDA_FUNCTION_ARCH=x86_64 | tee --append $GITHUB_ENV | |
- name: Get Lambda Layer `arm64` architecture value | |
if: ${{ matrix.architecture == 'arm64' }} | |
run: echo LAMBDA_FUNCTION_ARCH=arm64 | tee --append $GITHUB_ENV | |
- name: Get terraform directory | |
run: | | |
echo TERRAFORM_DIRECTORY=${{ matrix.language }}/integration-tests/${{ matrix.sample-app }}/${{ matrix.instrumentation-type }} | | |
tee --append $GITHUB_ENV | |
- uses: hashicorp/setup-terraform@v2 | |
with: | |
terraform_version: 1.3.1 | |
- name: Initialize terraform | |
run: terraform init | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Get terraform Lambda function name | |
run: | | |
echo TERRAFORM_LAMBDA_FUNCTION_NAME=lambda-${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}-${{ matrix.architecture }}-${{ github.run_id }} | | |
tee --append $GITHUB_ENV | |
# NOTE: (enowell) We don't need to include `sample-app` in the Lambda | |
# Layer name because different apps should be use the same layer, not | |
# create their own. However, if we ever Soak Test multiple apps, we need | |
# to BE CAREFUL about not creating duplicate layer with the same name. | |
- name: Get terraform Lambda layer name | |
run: | | |
echo TERRAFORM_LAMBDA_LAYER_NAME=aws-otel-${{ matrix.language }}-${{ matrix.instrumentation-type }}-${{ matrix.architecture }}-${{ github.sha }} | | |
tee --append $GITHUB_ENV | |
- name: Apply terraform | |
run: terraform apply -auto-approve | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
env: | |
TF_VAR_sdk_layer_name: ${{ env.TERRAFORM_LAMBDA_LAYER_NAME }} | |
TF_VAR_collector_layer_name: ${{ env.TERRAFORM_LAMBDA_LAYER_NAME }} | |
TF_VAR_function_name: ${{ env.TERRAFORM_LAMBDA_FUNCTION_NAME }} | |
TF_VAR_architecture: ${{ env.LAMBDA_FUNCTION_ARCH }} | |
- name: Extract endpoint | |
id: extract-endpoint | |
run: terraform output -raw api-gateway-url | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Extract AMP endpoint | |
if: ${{ matrix.language == 'java' && matrix.sample-app == 'aws-sdk' && matrix.instrumentation-type == 'agent' }} | |
id: extract-amp-endpoint | |
run: terraform output -raw amp_endpoint | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Extract SDK layer arn | |
id: extract-sdk-layer-arn | |
if: ${{ matrix.language != 'dotnet' && matrix.language != 'go' }} | |
run: terraform output -raw sdk_layer_arn | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Extract Collector layer arn | |
id: extract-collector-layer-arn | |
if: ${{ matrix.language == 'dotnet' || matrix.language == 'go' }} | |
run: terraform output -raw collector_layer_arn | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
# NOTE: (enowell) `terraform output` outputs additional text we are | |
# not interested in because the `hashicorp/setup-terraform@v1` has a | |
# wrapper. We solve this by using separate steps, because this text | |
# doesn't show up when accessed in later steps. | |
# | |
# See more: https://github.com/hashicorp/setup-terraform/issues/20 | |
- name: Set SDK layer version output | |
id: set-sdk-layer-version-output | |
if: ${{ matrix.language != 'dotnet' && matrix.language != 'go' }} | |
run: | | |
version=$(echo "${{ steps.extract-sdk-layer-arn.outputs.stdout }}" | cut -d : -f 8) | |
echo "Found version number: $version" | |
echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-version=$version" >> $GITHUB_OUTPUT | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Set Collector layer version output | |
id: set-collector-layer-version-output | |
if: ${{ matrix.language == 'dotnet' || matrix.language == 'go' }} | |
run: | | |
version=$(echo "${{ steps.extract-collector-layer-arn.outputs.stdout }}" | cut -d : -f 8) | |
echo "Found version number: $version" | |
echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-version=$version" >> $GITHUB_OUTPUT | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Send request to endpoint | |
run: curl -sS ${{ steps.extract-endpoint.outputs.stdout }} | |
- name: Checkout test framework | |
uses: actions/checkout@v4 | |
with: | |
repository: aws-observability/aws-otel-test-framework | |
path: test-framework | |
- name: validate trace sample | |
run: | | |
cp adot/utils/expected-templates/${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}.json \ | |
test-framework/validator/src/main/resources/expected-data-template/lambdaExpectedTrace.mustache | |
cd test-framework | |
./gradlew :validator:run --args="-c default-lambda-validation.yml --endpoint ${{ steps.extract-endpoint.outputs.stdout }} --region ${{ env.AWS_DEFAULT_REGION }}" | |
- name: validate java agent metric sample | |
if: ${{ matrix.language == 'java' && matrix.sample-app == 'aws-sdk' && matrix.instrumentation-type == 'agent' }} | |
run: | | |
cp adot/utils/expected-templates/${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}-metric.json \ | |
test-framework/validator/src/main/resources/expected-data-template/ampExpectedMetric.mustache | |
cd test-framework | |
./gradlew :validator:run --args="-c prometheus-static-metric-validation.yml --cortex-instance-endpoint ${{ steps.extract-amp-endpoint.outputs.stdout }} --region ${{ env.AWS_DEFAULT_REGION }}" | |
- name: Run soak test | |
run: | |
>- | |
docker run | |
--rm | |
-e AWS_DEFAULT_REGION | |
-e AWS_ACCESS_KEY_ID | |
-e AWS_SECRET_ACCESS_KEY | |
-e AWS_SESSION_TOKEN | |
public.ecr.aws/aws-otel-test/lambda-soak:latest | |
-n ${{ env.TERRAFORM_LAMBDA_FUNCTION_NAME }} | |
-e ${{ steps.extract-endpoint.outputs.stdout }} | |
${{ github.event.inputs.soak_config }} | |
${{ env.SOAKING_TEST_CONFIG }} | |
-a ${{ matrix.architecture }} | |
- name: Set output if layer Soak Tests has error | |
id: set-layer-if-error-output | |
if: ${{ failure() }} | |
run: echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-error=FAILED" >> $GITHUB_OUTPUT | |
- name: Remove sdk layers from terraform management to prevent deletion. | |
if: ${{ matrix.language != 'go' }} | |
run: terraform state rm aws_lambda_layer_version.sdk_layer | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Remove collector layers from terraform management to prevent deletion. | |
if: ${{ matrix.language == 'go' }} | |
run: terraform state rm aws_lambda_layer_version.collector_layer | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
- name: Destroy terraform | |
if: always() | |
run: terraform destroy -auto-approve | |
working-directory: ${{ env.TERRAFORM_DIRECTORY }} | |
env: | |
TF_VAR_architecture: ${{ env.LAMBDA_FUNCTION_ARCH }} | |
output-keywords: | |
if: ${{ always() }} | |
name: Output (${{ matrix.language }}, ${{ matrix.instrumentation-type }}) Layer Keyword | |
runs-on: ubuntu-22.04 | |
needs: | |
- soaking-test | |
strategy: | |
fail-fast: false | |
matrix: | |
language: [ go, java, nodejs, python ] | |
instrumentation-type: [ wrapper ] | |
include: | |
- language: java | |
instrumentation-type: agent | |
steps: | |
- name: Confirm none of the architecture soak tests for the layer failed | |
run: | | |
AT_LEAST_ONE_LAYER_SOAK_TEST_FAILED=$( | |
echo '${{ toJSON(needs.soaking-test.outputs) }}' | | |
jq ' | |
."${{ matrix.language }}-${{ matrix.instrumentation-type }}-error" == "FAILED" | |
' || echo false | |
) | |
[[ $AT_LEAST_ONE_LAYER_SOAK_TEST_FAILED == false ]] | |
- name: Output keyword for (${{ matrix.language }}, ${{ matrix.instrumentation-type }}) layer | |
run: | | |
VERSION=$( | |
echo '${{ toJSON(needs.soaking-test.outputs) }}' | | |
jq -r '."${{ matrix.language }}-${{ matrix.instrumentation-type }}-version"' | |
) | |
echo "::warning::Layer ARN Keyword: arn:aws:lambda:${{ env.AWS_DEFAULT_REGION }}:611364707713:layer:aws-otel-${{ matrix.language }}-${{ matrix.instrumentation-type }}-<ARCHITECTURE>-${{ github.sha }}:$VERSION" | |
publish-soaking-status: | |
needs: [soaking-test] | |
if: ${{ always() }} | |
uses: ./.github/workflows/publish-status.yml | |
with: | |
namespace: 'ADOT/GitHubActions' | |
repository: ${{ github.repository }} | |
branch: ${{ github.ref_name }} | |
workflow: soaking | |
success: ${{ needs.soaking-test.result == 'success' }} | |
region: us-west-2 | |
secrets: | |
roleArn: ${{ secrets.METRICS_ROLE_ARN }} |