Skip to content

Commit

Permalink
feat: AWS S3 instrumentation of aws-sdk package (#2112)
Browse files Browse the repository at this point in the history
Note that this only includes 'aws-sdk@2' instrumentation. Instrumentation
of '@awk-sdk/client-s3' (aka "v3" or the "Modular AWS SDK for JavaScript")
will be done in a separate change.

This adds localstack to the test suite for testing the S3 API.

Fixes: #1954
  • Loading branch information
trentm authored Jun 24, 2021
1 parent 98d2aa1 commit 9ece910
Show file tree
Hide file tree
Showing 25 changed files with 811 additions and 9 deletions.
1 change: 1 addition & 0 deletions .ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ def generateStepForWindows(Map params = [:]){
"ELASTIC_APM_ASYNC_HOOKS=${ELASTIC_APM_ASYNC_HOOKS}",
"CASSANDRA_HOST=${linuxIp}",
"ES_HOST=${linuxIp}",
"LOCALSTACK_HOST=${linuxIp}",
"MEMCACHED_HOST=${linuxIp}",
"MONGODB_HOST=${linuxIp}",
"MSSQL_HOST=${linuxIp}",
Expand Down
8 changes: 8 additions & 0 deletions .ci/docker/docker-compose-all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ services:
extends:
file: docker-compose.yml
service: memcached
localstack:
extends:
file: docker-compose.yml
service: localstack
node_tests:
extends:
file: docker-compose-node-test.yml
Expand All @@ -54,6 +58,8 @@ services:
condition: service_healthy
memcached:
condition: service_healthy
localstack:
condition: service_healthy

volumes:
nodepgdata:
Expand All @@ -68,3 +74,5 @@ volumes:
driver: local
nodecassandradata:
driver: local
nodelocalstackdata:
driver: local
8 changes: 8 additions & 0 deletions .ci/docker/docker-compose-edge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ services:
extends:
file: docker-compose.yml
service: elasticsearch
localstack:
extends:
file: docker-compose.yml
service: localstack
memcached:
extends:
file: docker-compose.yml
Expand Down Expand Up @@ -42,6 +46,8 @@ services:
condition: service_healthy
elasticsearch:
condition: service_healthy
localstack:
condition: service_healthy
memcached:
condition: service_healthy
mongodb:
Expand All @@ -64,6 +70,8 @@ volumes:
driver: local
nodemysqldata:
driver: local
nodelocalstackdata:
driver: local
nodeesdata:
driver: local
nodecassandradata:
Expand Down
14 changes: 14 additions & 0 deletions .ci/docker/docker-compose-localstack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version: '2.1'

services:
localstack:
extends:
file: docker-compose.yml
service: localstack
node_tests:
extends:
file: docker-compose-node-test.yml
service: node_tests
depends_on:
localstack:
condition: service_healthy
1 change: 1 addition & 0 deletions .ci/docker/docker-compose-node-edge-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ services:
PGHOST: 'postgres'
PGUSER: 'postgres'
MEMCACHED_HOST: 'memcached'
LOCALSTACK_HOST: 'localstack'
NODE_VERSION: ${NODE_VERSION}
NVM_NODEJS_ORG_MIRROR: ${NVM_NODEJS_ORG_MIRROR}
ELASTIC_APM_ASYNC_HOOKS: ${ELASTIC_APM_ASYNC_HOOKS}
Expand Down
1 change: 1 addition & 0 deletions .ci/docker/docker-compose-node-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ services:
PGHOST: 'postgres'
PGUSER: 'postgres'
MEMCACHED_HOST: 'memcached'
LOCALSTACK_HOST: 'localstack'
NODE_VERSION: ${NODE_VERSION}
TAV: ${TAV_MODULE}
ELASTIC_APM_ASYNC_HOOKS: ${ELASTIC_APM_ASYNC_HOOKS}
Expand Down
18 changes: 18 additions & 0 deletions .ci/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,22 @@ services:
timeout: 10s
retries: 5

localstack:
# https://hub.docker.com/r/localstack/localstack/tags
image: localstack/localstack:0.12.12
environment:
- LOCALSTACK_SERVICES=s3
- DATA_DIR=/var/lib/localstack
ports:
- "4566:4566"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:4566/health"]
interval: 30s
timeout: 10s
retries: 5
volumes:
- nodelocalstackdata:/var/lib/localstack

volumes:
nodepgdata:
driver: local
Expand All @@ -127,3 +143,5 @@ volumes:
driver: local
nodecassandradata:
driver: local
nodelocalstackdata:
driver: local
2 changes: 1 addition & 1 deletion .ci/scripts/docker-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fi
# Skip for node v8 because it results in this warning:
# openssl config failed: error:25066067:DSO support routines:DLFCN_LOAD:could not load the shared library
if [[ $major_node_version -gt 8 ]]; then
export NODE_OPTIONS="${NODE_OPTIONS:+${NODE_OPTIONS}} --openssl-config=./test/openssl-config-for-testing.cnf"
export NODE_OPTIONS="${NODE_OPTIONS:+${NODE_OPTIONS}} --openssl-config=$(pwd)/test/openssl-config-for-testing.cnf"
fi

# Workaround to git <2.7
Expand Down
5 changes: 5 additions & 0 deletions .ci/scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ elif [[ -n "${TAV_MODULE}" ]]; then
memcached)
DOCKER_COMPOSE_FILE=docker-compose-memcached.yml
;;
aws-sdk)
DOCKER_COMPOSE_FILE=docker-compose-localstack.yml
;;
*)
# Just the "node_tests" container. No additional services needed for testing.
DOCKER_COMPOSE_FILE=docker-compose-node-test.yml
Expand All @@ -210,6 +213,8 @@ else
DOCKER_COMPOSE_FILE=docker-compose-all.yml
fi

ELASTIC_APM_ASYNC_HOOKS=${ELASTIC_APM_ASYNC_HOOKS:-true}

set +e
NVM_NODEJS_ORG_MIRROR=${NVM_NODEJS_ORG_MIRROR} \
ELASTIC_APM_ASYNC_HOOKS=${ELASTIC_APM_ASYNC_HOOKS} \
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@ jobs:
volumes:
- nodeesdata:/usr/share/elasticsearch/data

localstack:
image: localstack/localstack:0.12.12
env:
LOCALSTACK_SERVICES: 's3'
DATA_DIR: '/var/lib/localstack'
ports:
- "4566:4566"
volumes:
- nodelocalstackdata:/var/lib/localstack

strategy:
matrix:
node:
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ Notes:
[float]
===== Features
* Add instrumentation of all AWS S3 methods when using the
https://www.npmjs.com/package/aws-sdk[JavaScript AWS SDK v2] (`aws-sdk`).
[float]
===== Bug fixes
Expand Down
2 changes: 1 addition & 1 deletion docs/supported-technologies.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ The Node.js agent will automatically instrument the following modules to give yo
[options="header"]
|=======================================================================
|Module |Version |Note
|https://www.npmjs.com/package/aws-sdk[aws-sdk] |>1 <3 |Will instrument SQS send/receive/delete messages
|https://www.npmjs.com/package/aws-sdk[aws-sdk] |>1 <3 |Will instrument SQS send/receive/delete messages, all S3 methods
|https://www.npmjs.com/package/cassandra-driver[cassandra-driver] |>=3.0.0 |Will instrument all queries
|https://www.npmjs.com/package/elasticsearch[elasticsearch] |>=8.0.0 |Will instrument all queries
|https://www.npmjs.com/package/@elastic/elasticsearch[@elastic/elasticsearch] |>=7.0.0 <8.0.0 |Will instrument all queries
Expand Down
1 change: 1 addition & 0 deletions lib/agent.js
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ const EMPTY_OPTS = {}
//
// Usage:
// captureError(err, opts, cb)
// captureError(err, opts)
// captureError(err, cb)
//
// where:
Expand Down
11 changes: 9 additions & 2 deletions lib/instrumentation/modules/aws-sdk.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
'use strict'
const semver = require('semver')
const shimmer = require('../shimmer')
const { instrumentationS3 } = require('./aws-sdk/s3')
const { instrumentationSqs } = require('./aws-sdk/sqs')

const instrumentorFromSvcId = {
s3: instrumentationS3,
sqs: instrumentationSqs
}

// Called in place of AWS.Request.send and AWS.Request.promise
//
// Determines which amazon service an API request is for
// and then passes call on to an appropriate instrumentation
// function.
function instrumentOperation (orig, origArguments, request, AWS, agent, { version, enabled }) {
if (request.service.serviceIdentifier === 'sqs') {
return instrumentationSqs(orig, origArguments, request, AWS, agent, { version, enabled })
const instrumentor = instrumentorFromSvcId[request.service.serviceIdentifier]
if (instrumentor) {
return instrumentor(orig, origArguments, request, AWS, agent, { version, enabled })
}

// if we're still here, then we still need to call the original method
Expand Down
129 changes: 129 additions & 0 deletions lib/instrumentation/modules/aws-sdk/s3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
'use strict'

// Instrument AWS S3 operations via the 'aws-sdk' package.

const constants = require('../../../constants')

const TYPE = 'storage'
const SUBTYPE = 's3'

// Return the PascalCase operation name from `request.operation` by undoing to
// `lowerFirst()` from
// https://github.com/aws/aws-sdk-js/blob/c0c44b8a4e607aae521686898f39a3e359f727e4/lib/model/api.js#L63-L65
//
// For example: 'headBucket' -> 'HeadBucket'
function opNameFromOperation (operation) {
return operation[0].toUpperCase() + operation.slice(1)
}

// Return an APM "resource" string for the bucket, Access Point ARN, or Outpost
// ARN. ARNs are normalized to a shorter resource name.
//
// Known ARN patterns:
// - arn:aws:s3:<region>:<account-id>:accesspoint/<accesspoint-name>
// - arn:aws:s3-outposts:<region>:<account>:outpost/<outpost-id>/bucket/<bucket-name>
// - arn:aws:s3-outposts:<region>:<account>:outpost/<outpost-id>/accesspoint/<accesspoint-name>
//
// In general that is:
// arn:$partition:$service:$region:$accountId:$resource
//
// This parses using the same "split on colon" used by the JavaScript AWS SDK v3.
// https://github.com/aws/aws-sdk-js-v3/blob/v3.18.0/packages/util-arn-parser/src/index.ts#L14-L37
function resourceFromBucket (bucket) {
let resource = null
if (bucket) {
resource = bucket
if (resource.startsWith('arn:')) {
resource = bucket.split(':').slice(5).join(':')
}
}
return resource
}

// Instrument an [email protected] operation (i.e. a AWS.Request.send or
// AWS.Request.promise).
//
// @param {AWS.Request} request https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html
function instrumentationS3 (orig, origArguments, request, AWS, agent, { version, enabled }) {
const opName = opNameFromOperation(request.operation)
let name = 'S3 ' + opName
const resource = resourceFromBucket(request.params && request.params.Bucket)
if (resource) {
name += ' ' + resource
}

const span = agent.startSpan(name, TYPE, SUBTYPE, opName)
if (span) {
request.on('complete', function onComplete (response) {
// `response` is an AWS.Response
// https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Response.html

// Determining the bucket's region.
// `request.httpRequest.region` isn't documented, but the aws-sdk@2
// lib/services/s3.js will set it to the bucket's determined region.
// This can be asynchronously determined -- e.g. if it differs from the
// configured service endpoint region -- so this won't be set until
// 'complete'.
const region = request.httpRequest && request.httpRequest.region

// Destination context.
// '.httpRequest.endpoint' might differ from '.service.endpoint' if
// the bucket is in a different region.
const endpoint = request.httpRequest && request.httpRequest.endpoint
const destContext = {
service: {
name: SUBTYPE,
type: TYPE
}
}
if (endpoint) {
destContext.address = endpoint.hostname
destContext.port = endpoint.port
}
if (resource) {
destContext.service.resource = resource
}
if (region) {
destContext.cloud = { region }
}
span.setDestinationContext(destContext)

if (response) {
// Follow the spec for HTTP client span outcome.
// https://github.com/elastic/apm/blob/master/specs/agents/tracing-instrumentation-http.md#outcome
//
// For example, a S3 GetObject conditional request (e.g. using the
// IfNoneMatch param) will respond with response.error=NotModifed and
// statusCode=304. This is a *successful* outcome.
const statusCode = response.httpResponse && response.httpResponse.statusCode
if (statusCode) {
span._setOutcomeFromHttpStatusCode(statusCode)
} else {
// `statusCode` will be undefined for errors before sending a request, e.g.:
// InvalidConfiguration: Custom endpoint is not compatible with access point ARN
span._setOutcomeFromErrorCapture(constants.OUTCOME_FAILURE)
}

if (response.error && (!statusCode || statusCode >= 400)) {
agent.captureError(response.error, { skipOutcome: true })
}
}

// Workaround a bug in the agent's handling of `span.sync`.
//
// The bug: Currently this span.sync is not set `false` because there is
// an HTTP span created (for this S3 request) in the same async op. That
// HTTP span becomes the "active span" for this async op, and *it* gets
// marked as sync=false in `before()` in async-hooks.js.
span.sync = false

span.end()
})
}

return orig.apply(request, origArguments)
}

module.exports = {
instrumentationS3
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@
"thunky": "^1.1.0",
"typescript": "^3.7.5",
"untildify": "^4.0.0",
"vasync": "^2.2.0",
"wait-on": "^3.3.0",
"ws": "^7.2.1"
},
Expand Down
File renamed without changes.
5 changes: 4 additions & 1 deletion test/docker-compose.ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ version: '2.1'
services:
node_tests:
image: node:${NODE_VERSION}
environment:
environment:
MONGODB_HOST: 'mongodb'
REDIS_HOST: 'redis'
ES_HOST: 'elasticsearch'
MSSQL_HOST: 'mssql'
MYSQL_HOST: 'mysql'
CASSANDRA_HOST: 'cassandra'
MEMCACHED_HOST: 'memcached'
LOCALSTACK_HOST: 'localstack'
PGHOST: 'postgres'
PGUSER: 'postgres'
depends_on:
Expand All @@ -30,3 +31,5 @@ services:
condition: service_started
memcached:
condition: service_started
localstack:
condition: service_started
Loading

0 comments on commit 9ece910

Please sign in to comment.