Skip to content

Commit

Permalink
Common: put good bad and pii inside output {} in config (close #493)
Browse files Browse the repository at this point in the history
  • Loading branch information
benjben committed Oct 13, 2021
1 parent 9fb30c1 commit 36bd76f
Show file tree
Hide file tree
Showing 11 changed files with 264 additions and 240 deletions.
12 changes: 7 additions & 5 deletions config/config.kinesis.minimal.hocon
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
"streamName": "collector-payloads"
}

"good": {
"streamName": "enriched"
}
"output": {
"good": {
"streamName": "enriched"
}

"bad": {
"streamName": "bad"
"bad": {
"streamName": "bad"
}
}
}
256 changes: 130 additions & 126 deletions config/config.kinesis.reference.hocon
Original file line number Diff line number Diff line change
Expand Up @@ -37,124 +37,128 @@
}
}

# Enriched events output
"good": {
"type": "Kinesis"

# Name of the Kinesis stream to write to
"streamName": "enriched"

# Region where the Kinesis stream is located
# This field is optional if it can be resolved with AWS region provider chain.
# It checks places like env variables, system properties, AWS profile file.
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/providers/DefaultAwsRegionProviderChain.html
"region": "eu-central-1"

# Optional. How the output stream/topic will be partitioned in Kinesis
# Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid,
# user_ipaddress, domain_sessionid, user_fingerprint
# Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the
# possible parittion keys correspond to.
# Otherwise, the partition key will be a random UUID.
# "partitionKey" = "user_id"

# The delay threshold to use for batching
# Default is 200 milliseconds
"delayThreshold": 200 milliseconds

# Max number of items in the batch to collect before emitting
# Default is 500
"maxBatchSize": 500

# Max size of the batch in bytes before emitting
# Default is 5MB
"maxBatchBytes": 5000000

# Minimum and maximum backoff periods
"backoffPolicy": {
# Default is 100 ms
"minBackoff": 100 milliseconds
# Default is 10 s
"maxBackoff": 10 seconds
"output" {

# Enriched events output
"good": {
"type": "Kinesis"

# Name of the Kinesis stream to write to
"streamName": "enriched"

# Region where the Kinesis stream is located
# This field is optional if it can be resolved with AWS region provider chain.
# It checks places like env variables, system properties, AWS profile file.
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/providers/DefaultAwsRegionProviderChain.html
"region": "eu-central-1"

# Optional. How the output stream/topic will be partitioned in Kinesis
# Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid,
# user_ipaddress, domain_sessionid, user_fingerprint
# Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the
# possible parittion keys correspond to.
# Otherwise, the partition key will be a random UUID.
# "partitionKey" = "user_id"

# The delay threshold to use for batching
# Default is 200 milliseconds
"delayThreshold": 200 milliseconds

# Max number of items in the batch to collect before emitting
# Default is 500
"maxBatchSize": 500

# Max size of the batch in bytes before emitting
# Default is 5MB
"maxBatchBytes": 5000000

# Minimum and maximum backoff periods
"backoffPolicy": {
# Default is 100 ms
"minBackoff": 100 milliseconds
# Default is 10 s
"maxBackoff": 10 seconds
}
}
}

# Pii events output
"pii": {
"type": "Kinesis"

# Name of the Kinesis stream to write to
"streamName": "pii"

# Region where the Kinesis stream is located
# This field is optional if it can be resolved with AWS region provider chain.
# It checks places like env variables, system properties, AWS profile file.
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/providers/DefaultAwsRegionProviderChain.html
"region": "eu-central-1"

# Optional. How the output stream/topic will be partitioned in Kinesis
# Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid,
# user_ipaddress, domain_sessionid, user_fingerprint
# Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the
# possible parittion keys correspond to.
# Otherwise, the partition key will be a random UUID.
# "partitionKey" = "user_id"

# The delay threshold to use for batching
# Default is 200 milliseconds
"delayThreshold": 200 milliseconds

# Max number of items in the batch to collect before emitting
# Default is 500
"maxBatchSize": 500

# Max size of the batch in bytes before emitting
# Default is 5MB
"maxBatchBytes": 5000000

# Minimum and maximum backoff periods
"backoffPolicy": {
# Default is 100 ms
"minBackoff": 100 milliseconds
# Default is 10 s
"maxBackoff": 10 seconds

# Bad rows output
"bad": {
"type": "Kinesis"

# Name of the Kinesis stream to write to
"streamName": "bad"

# Region where the Kinesis stream is located
# This field is optional if it can be resolved with AWS region provider chain.
# It checks places like env variables, system properties, AWS profile file.
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/providers/DefaultAwsRegionProviderChain.html
# AWS region in which the Kinesis stream resides.
"region": "eu-central-1"

# The delay threshold to use for batching
# Default is 200 milliseconds
"delayThreshold": 200 milliseconds

# Max number of items in the batch to collect before emitting
# Default is 500
"maxBatchSize": 500

# Max size of the batch in bytes before emitting
# Default is 5MB
"maxBatchBytes": 5000000

# Minimum and maximum backoff periods
"backoffPolicy": {
# Default is 100 ms
"minBackoff": 100 milliseconds
# Default is 10 s
"maxBackoff": 10 seconds
}
}
}

# Bad rows output
"bad": {
"type": "Kinesis"

# Name of the Kinesis stream to write to
"streamName": "bad"

# Region where the Kinesis stream is located
# This field is optional if it can be resolved with AWS region provider chain.
# It checks places like env variables, system properties, AWS profile file.
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/providers/DefaultAwsRegionProviderChain.html
"region": "eu-central-1"

# The delay threshold to use for batching
# Default is 200 milliseconds
"delayThreshold": 200 milliseconds

# Max number of items in the batch to collect before emitting
# Default is 500
"maxBatchSize": 500

# Max size of the batch in bytes before emitting
# Default is 5MB
"maxBatchBytes": 5000000

# Minimum and maximum backoff periods
"backoffPolicy": {
# Default is 100 ms
"minBackoff": 100 milliseconds
# Default is 10 s
"maxBackoff": 10 seconds
# Pii events output
"pii": {
"type": "Kinesis"

# Name of the Kinesis stream to write to
"streamName": "pii"

# Region where the Kinesis stream is located
# This field is optional if it can be resolved with AWS region provider chain.
# It checks places like env variables, system properties, AWS profile file.
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/providers/DefaultAwsRegionProviderChain.html
"region": "eu-central-1"

# Optional. How the output stream/topic will be partitioned in Kinesis
# Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid,
# user_ipaddress, domain_sessionid, user_fingerprint
# Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the
# possible parittion keys correspond to.
# Otherwise, the partition key will be a random UUID.
# "partitionKey" = "user_id"

# The delay threshold to use for batching
# Default is 200 milliseconds
"delayThreshold": 200 milliseconds

# Max number of items in the batch to collect before emitting
# Default is 500
"maxBatchSize": 500

# Max size of the batch in bytes before emitting
# Default is 5MB
"maxBatchBytes": 5000000

# Minimum and maximum backoff periods
"backoffPolicy": {
# Default is 100 ms
"minBackoff": 100 milliseconds
# Default is 10 s
"maxBackoff": 10 seconds
}
}
}

# Optional. Concurrency of the app
"concurrency" : {
# Controls the maximum number of events that can be waiting to get sunk
Expand All @@ -164,52 +168,52 @@
# PubSub: should at least exceed the number events that can be sunk within the delay threshold
# used for batching (default 200ms)
"output": 10000

# Maximum number of events that can get enriched at the same time
# Default: 64
"enrichment": 64
}

# Optional, period after which enrich assets should be checked for updates
# no assets will be updated if the key is absent
"assetsUpdatePeriod": "7 days"

"monitoring": {

# Optional, for tracking runtime exceptions
"sentry": {
"dsn": "http://sentry.acme.com"
}

# Optional, configure how metrics are reported
"metrics": {

# Send metrics to a StatsD server on localhost
"statsd": {

"hostname": "localhost"
"port": 8125

# Required, how frequently to report metrics
"period": "10 seconds"

# Any key-value pairs to be tagged on every StatsD metric
"tags": {
"app": enrich
}

# Optional, override the default metric prefix
# "prefix": "snowplow.enrich."
}

# Log to stdout using Slf4j
"stdout": {
"period": "10 seconds"

# Optional, override the default metric prefix
# "prefix": "snowplow.enrich."
}

# Optional, cloudwatch metrics are enabled by default
"cloudwatch": false
}
Expand Down
Loading

0 comments on commit 36bd76f

Please sign in to comment.