From 8a8476b215d009581fadac66c9a0996f0ee7ac83 Mon Sep 17 00:00:00 2001 From: "pavel.voropaev" Date: Fri, 3 Dec 2021 16:31:46 +0000 Subject: [PATCH] Validate enriched event against atomic schema before emitting (close #517) --- config/config.kinesis.extended.hocon | 7 + config/config.pubsub.extended.hocon | 7 + .../EnrichBench.scala | 45 +- .../EtlPipelineBench.scala | 4 +- .../ThriftLoaderBench.scala | 4 +- .../snowplow/enrich/common/fs2/Enrich.scala | 9 +- .../enrich/common/fs2/Environment.scala | 11 +- .../snowplow/enrich/common/fs2/Run.scala | 6 +- .../enrich/common/fs2/config/ConfigFile.scala | 12 +- .../enrich/common/fs2/config/io.scala | 11 + .../schemas/com.acme/output/jsonschema/1-0-0 | 14 + .../schemas/com.acme/test/jsonschema/1-0-1 | 18 + .../call_complete/jsonschema/1-0-2 | 143 +++++ .../hit/jsonschema/1-0-0 | 30 + .../page_view/jsonschema/1-0-0 | 31 + .../contact_creation/jsonschema/1-0-0 | 64 ++ .../spiders_and_robots/jsonschema/1-0-0 | 32 + .../com.mailchimp/subscribe/jsonschema/1-0-0 | 67 ++ .../message_delivered/jsonschema/1-0-0 | 56 ++ .../message_sent/jsonschema/1-0-0 | 111 ++++ .../com.marketo/event/jsonschema/2-0-0 | 601 ++++++++++++++++++ .../com.olark/transcript/jsonschema/1-0-0 | 211 ++++++ .../com.pagerduty/incident/jsonschema/1-0-0 | 207 ++++++ .../incident_assign/jsonschema/1-0-0 | 30 + .../com.sendgrid/processed/jsonschema/2-0-0 | 89 +++ .../signup_form_submitted/jsonschema/1-0-0 | 30 + .../jsonschema/1-0-0 | 167 +++++ .../jsonschema/1-0-0 | 84 +++ .../pii_enrichment_config/jsonschema/2-0-0 | 164 +++++ .../jsonschema/1-0-0 | 211 ++++++ .../yauaa_enrichment_config/jsonschema/1-0-0 | 38 ++ .../segment_webhook_config/jsonschema/1-0-0 | 37 ++ .../atomic/jsonschema/1-0-0 | 489 ++++++++++++++ .../campaign_attribution/jsonschema/1-0-1 | 74 +++ .../contexts/jsonschema/1-0-0 | 32 + .../contexts/jsonschema/1-0-1 | 30 + .../jsonschema/1-0-0 | 46 ++ .../enrichments/jsonschema/1-0-0 | 30 + .../geolocation_context/jsonschema/1-0-0 | 41 ++ .../geolocation_context/jsonschema/1-1-0 | 44 ++ .../ip_lookups/jsonschema/2-0-0 | 90 +++ .../mobile_context/jsonschema/1-0-0 | 43 ++ .../payload_data/jsonschema/1-0-0 | 246 +++++++ .../payload_data/jsonschema/1-0-4 | 261 ++++++++ .../referer_parser/jsonschema/2-0-0 | 44 ++ .../screen_view/jsonschema/1-0-0 | 22 + .../unstruct_event/jsonschema/1-0-0 | 25 + .../uri_redirect/jsonschema/1-0-0 | 21 + .../status_change/jsonschema/1-0-0 | 45 ++ .../com.unbounce/form_post/jsonschema/1-0-0 | 38 ++ .../nl.basjes/yauaa_context/jsonschema/1-0-3 | 231 +++++++ .../enrich/common/fs2/EnrichSpec.scala | 4 +- .../common/fs2/blackbox/BlackBoxTesting.scala | 4 +- .../blackbox/adapters/Tp2AdapterSpec.scala | 2 +- ...versionEnrichmentTransactionItemSpec.scala | 2 +- ...yConversionEnrichmentTransactionSpec.scala | 10 +- .../blackbox/misc/TransactionItemSpec.scala | 2 +- .../fs2/blackbox/misc/TransactionSpec.scala | 8 +- .../common/fs2/config/ConfigFileSpec.scala | 9 + .../common/fs2/config/ParsedConfigsSpec.scala | 3 + .../common/fs2/test/SchemaRegistry.scala | 231 ------- .../common/fs2/test/TestEnvironment.scala | 32 +- .../common/EtlPipeline.scala | 8 +- .../enrichments/EnrichmentManager.scala | 92 ++- .../common/enrichments/Transform.scala | 14 +- .../CurrencyConversionEnrichment.scala | 14 +- .../common/outputs/EnrichedEvent.scala | 208 +++++- .../common/utils/ConversionUtils.scala | 61 +- .../atomic/jsonschema/1-0-0 | 489 ++++++++++++++ .../EtlPipelineSpec.scala | 12 +- .../enrichments/EnrichmentManagerSpec.scala | 86 ++- .../CurrencyConversionEnrichmentSpec.scala | 444 ++++++++----- .../pii/PiiPseudonymizerEnrichmentSpec.scala | 3 +- .../outputs/EnrichedEventSpec.scala | 20 +- .../src/main/resources/application.conf | 4 + .../src/main/resources/application.conf | 4 + .../good/TransactionItemSpec.scala | 2 +- .../good/TransactionSpec.scala | 2 +- 78 files changed, 5651 insertions(+), 522 deletions(-) create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 delete mode 100644 modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala create mode 100644 modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 diff --git a/config/config.kinesis.extended.hocon b/config/config.kinesis.extended.hocon index 499a78e21..0ec76ad3a 100644 --- a/config/config.kinesis.extended.hocon +++ b/config/config.kinesis.extended.hocon @@ -389,4 +389,11 @@ # Version of the terraform module that deployed the app moduleVersion = 1.0.0 } + + # Optional. To activate/deactive enrich features + "featureFlags" : { + # If activated, enriched events will get validated against atomic schema. + # If not valid, a bad row will be emitted instead of the enriched event. + "validateEnrichedEvents": true + } } diff --git a/config/config.pubsub.extended.hocon b/config/config.pubsub.extended.hocon index 340b0c5e3..2bf800e05 100644 --- a/config/config.pubsub.extended.hocon +++ b/config/config.pubsub.extended.hocon @@ -188,4 +188,11 @@ # Version of the terraform module that deployed the app moduleVersion = 1.0.0 } + + # Optional. To activate/deactive enrich features + "featureFlags" : { + # If activated, enriched events will get validated against atomic schema. + # If not valid, a bad row will be emitted instead of the enriched event. + "validateEnrichedEvents": true + } } diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala index dcd6d9d28..ffe6715ce 100644 --- a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala @@ -18,6 +18,8 @@ import java.util.concurrent.TimeUnit import cats.effect.{ContextShift, IO, Clock, Blocker} +import io.circe.literal._ + import fs2.Stream import com.snowplowanalytics.iglu.client.Client @@ -43,10 +45,41 @@ class EnrichBench { implicit val ioClock: Clock[IO] = Clock.create[IO] + val client = Client.parseDefault[IO](json""" + { + "schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1", + "data": { + "cacheSize": 500, + "repositories": [ + { + "name": "Iglu Central", + "priority": 0, + "vendorPrefixes": [ "com.snowplowanalytics" ], + "connection": { + "http": { + "uri": "http://iglucentral.com" + } + } + }, + { + "name": "Iglu Central - GCP Mirror", + "priority": 1, + "vendorPrefixes": [ "com.snowplowanalytics" ], + "connection": { + "http": { + "uri": "http://mirror01.iglucentral.com" + } + } + } + ] + } + } + """).rethrowT.unsafeRunSync() + @Benchmark def measureEnrichWithMinimalPayload(state: EnrichBench.BenchState) = { implicit val CS: ContextShift[IO] = state.contextShift - Enrich.enrichWith[IO](IO.pure(EnrichmentRegistry()), state.blocker, Client.IgluCentral, None, (_: Option[Long]) => IO.unit)(state.raw).unsafeRunSync() + Enrich.enrichWith[IO](IO.pure(EnrichmentRegistry()), client, None, (_: Option[Long]) => IO.unit)(state.raw).unsafeRunSync() } @Benchmark @@ -83,19 +116,19 @@ object EnrichBench { raw = EnrichSpec.payload[IO] val input = Stream.emits(List( - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.40") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.41") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.42") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.43") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.44") :: EnrichSpec.querystring ), )).repeatN(10).map(cp => Payload(cp.toRaw, IO.unit)).covary[IO] diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala index b7c2d0b96..89a0e34b0 100644 --- a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala @@ -49,13 +49,13 @@ class EtlPipelineBench { @Benchmark def measureProcessEventsIO(state: EtlPipelineBench.BenchState) = { - val payload = EnrichSpec.colllectorPayload + val payload = EnrichSpec.collectorPayload EtlPipeline.processEvents[IO](state.adapterRegistry, state.enrichmentRegistryIo, Client.IgluCentral, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))).unsafeRunSync() } @Benchmark def measureProcessEventsId(state: EtlPipelineBench.BenchState) = { - val payload = EnrichSpec.colllectorPayload + val payload = EnrichSpec.collectorPayload EtlPipeline.processEvents[Id](state.adapterRegistry, state.enrichmentRegistryId, state.clientId, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))) } } diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala index b1b9e388f..0eea9f9f4 100644 --- a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala @@ -30,7 +30,7 @@ class ThriftLoaderBench { @Benchmark def measureNormalize(state: ThriftLoaderBench.BenchState) = { - Enrich.encodeEvent(state.event) + Enrich.serializeEnriched(state.event) } } @@ -42,7 +42,7 @@ object ThriftLoaderBench { @Setup(Level.Trial) def setup(): Unit = { - data = EnrichSpec.colllectorPayload.toRaw + data = EnrichSpec.collectorPayload.toRaw event = new EnrichedEvent() event.setApp_id("foo") diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala index 6572815bb..f342bb7f8 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala @@ -72,7 +72,7 @@ object Enrich { val registry: F[EnrichmentRegistry[F]] = env.enrichments.get.map(_.registry) val enrich: Enrich[F] = { implicit val rl: RegistryLookup[F] = env.registryLookup - enrichWith[F](registry, env.igluClient, env.sentry, env.processor) + enrichWith[F](registry, env.igluClient, env.sentry, env.processor, env.validateEnriched) } val enriched = @@ -101,7 +101,8 @@ object Enrich { enrichRegistry: F[EnrichmentRegistry[F]], igluClient: Client[F, Json], sentry: Option[SentryClient], - processor: Processor + processor: Processor, + validateEnriched: Boolean )( row: Array[Byte] ): F[Result] = { @@ -113,7 +114,7 @@ object Enrich { _ <- Logger[F].debug(payloadToString(payload)) etlTstamp <- Clock[F].realTime(TimeUnit.MILLISECONDS).map(millis => new DateTime(millis)) registry <- enrichRegistry - enriched <- EtlPipeline.processEvents[F](adapterRegistry, registry, igluClient, processor, etlTstamp, payload) + enriched <- EtlPipeline.processEvents[F](adapterRegistry, registry, igluClient, processor, etlTstamp, payload, validateEnriched) } yield (enriched, collectorTstamp) result.handleErrorWith(sendToSentry[F](row, sentry, processor, collectorTstamp)) @@ -153,7 +154,7 @@ object Enrich { ): BadRow.GenericError = { val base64 = new String(Base64.getEncoder.encode(row)) val rawPayload = BadRowPayload.RawPayload(base64) - val failure = Failure.GenericFailure(time, NonEmptyList.one(error.toString)) + val failure = Failure.GenericFailure(time, NonEmptyList.one(ConversionUtils.cleanStackTrace(error))) BadRow.GenericError(processor, failure, rawPayload) } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala index 783f94f06..bd671d1cd 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala @@ -77,6 +77,8 @@ import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.Input.Kinesis * @param streamsSettings parameters used to configure the streams * @param region region in the cloud where enrich runs * @param cloud cloud where enrich runs (AWS or GCP) + * @param validateEnriched Whether enriched event should be validated according + * to atomic schema * @tparam A type emitted by the source (e.g. `ConsumerRecord` for PubSub). * getPayload must be defined for this type, as well as checkpointing */ @@ -103,7 +105,8 @@ final case class Environment[F[_], A]( processor: Processor, streamsSettings: Environment.StreamsSettings, region: Option[String], - cloud: Option[Telemetry.Cloud] + cloud: Option[Telemetry.Cloud], + validateEnriched: Boolean ) object Environment { @@ -147,7 +150,8 @@ object Environment { processor: Processor, maxRecordSize: Int, cloud: Option[Telemetry.Cloud], - getRegion: => Option[String] + getRegion: => Option[String], + validateEnriched: Boolean ): Resource[F, Environment[F, A]] = { val file = parsedConfigs.configFile for { @@ -189,7 +193,8 @@ object Environment { processor, StreamsSettings(file.concurrency, maxRecordSize), getRegionFromConfig(file).orElse(getRegion), - cloud + cloud, + validateEnriched ) } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala index 853403091..8b0f5cf0c 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala @@ -97,7 +97,8 @@ object Run { processor, maxRecordSize, cloud, - getRegion + getRegion, + file.featureFlags.validateEnrichedEvents ) runEnvironment[F, Array[Byte]](env) case _ => @@ -116,7 +117,8 @@ object Run { processor, maxRecordSize, cloud, - getRegion + getRegion, + file.featureFlags.validateEnrichedEvents ) runEnvironment[F, A](env) } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala index f4d0e4615..d480e739c 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala @@ -27,7 +27,7 @@ import pureconfig.ConfigSource import pureconfig.module.catseffect.syntax._ import pureconfig.module.circe._ -import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{Concurrency, Input, Monitoring, Output, Outputs, Telemetry} +import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{Concurrency, FeatureFlags, Input, Monitoring, Output, Outputs, Telemetry} /** * Parsed HOCON configuration file @@ -37,6 +37,7 @@ import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{Concurrency, * @param assetsUpdatePeriod time after which assets should be updated, in minutes * @param monitoring configuration for sentry and metrics * @param telemetry configuration for telemetry + * @param featureFlags to activate/deactivate enrich features */ final case class ConfigFile( input: Input, @@ -44,7 +45,8 @@ final case class ConfigFile( concurrency: Concurrency, assetsUpdatePeriod: Option[FiniteDuration], monitoring: Option[Monitoring], - telemetry: Telemetry + telemetry: Telemetry, + featureFlags: FeatureFlags ) object ConfigFile { @@ -55,13 +57,13 @@ object ConfigFile { implicit val configFileDecoder: Decoder[ConfigFile] = deriveConfiguredDecoder[ConfigFile].emap { - case ConfigFile(_, _, _, Some(aup), _, _) if aup._1 <= 0L => + case ConfigFile(_, _, _, Some(aup), _, _, _) if aup._1 <= 0L => "assetsUpdatePeriod in config file cannot be less than 0".asLeft // TODO: use newtype // Remove pii output if streamName and region empty - case c @ ConfigFile(_, Outputs(good, Some(Output.Kinesis(s, _, _, _, _, _, _, _, _, _, _, _, _)), bad), _, _, _, _) if s.isEmpty => + case c @ ConfigFile(_, Outputs(good, Some(Output.Kinesis(s, _, _, _, _, _, _, _, _, _, _, _, _)), bad), _, _, _, _, _) if s.isEmpty => c.copy(output = Outputs(good, None, bad)).asRight // Remove pii output if topic empty - case c @ ConfigFile(_, Outputs(good, Some(Output.PubSub(t, _, _, _, _)), bad), _, _, _, _) if t.isEmpty => + case c @ ConfigFile(_, Outputs(good, Some(Output.PubSub(t, _, _, _, _)), bad), _, _, _, _, _) if t.isEmpty => c.copy(output = Outputs(good, None, bad)).asRight case other => other.asRight } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala index 86b445928..585bfb71b 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala @@ -353,4 +353,15 @@ object io { implicit val telemetryEncoder: Encoder[Telemetry] = deriveConfiguredEncoder[Telemetry] } + + case class FeatureFlags( + validateEnrichedEvents: Boolean + ) + + object FeatureFlags { + implicit val telemetryDecoder: Decoder[FeatureFlags] = + deriveConfiguredDecoder[FeatureFlags] + implicit val telemetryEncoder: Encoder[FeatureFlags] = + deriveConfiguredEncoder[FeatureFlags] + } } diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 new file mode 100644 index 000000000..942562087 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 @@ -0,0 +1,14 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "output", + "format": "jsonschema", + "version": "1-0-0" + }, + "properties": { + "output": { + "type": "string" + } + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 new file mode 100644 index 000000000..58d9b2f51 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 @@ -0,0 +1,18 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "test", + "format": "jsonschema", + "version": "1-0-1" + }, + "properties": { + "path": { + "properties": { + "id": { + "type": "integer" + } + } + } + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 new file mode 100644 index 000000000..4328e6cd7 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 @@ -0,0 +1,143 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a CallRail call completion event", + "self": { + "vendor": "com.callrail", + "name": "call_complete", + "format": "jsonschema", + "version": "1-0-2" + }, + + "type": "object", + "properties": { + "answered": { + "type": ["boolean", "null"] + }, + "customer_city": { + "type": ["string", "null"] + }, + "customer_country": { + "type": ["string", "null"] + }, + "customer_name": { + "type": ["string", "null"] + }, + "customer_phone_number": { + "type": ["string", "null"] + }, + "customer_state": { + "type": ["string", "null"] + }, + "customer_zip": { + "type": ["string", "null"] + }, + "callercity": { + "type": ["string", "null"] + }, + "callercountry": { + "type": ["string", "null"] + }, + "callername": { + "type": ["string", "null"] + }, + "callernum": { + "type": ["string", "null"] + }, + "callerstate": { + "type": ["string", "null"] + }, + "callerzip": { + "type": ["string", "null"] + }, + "callsource": { + "type": ["string", "null"] + }, + "datetime": { + "type": "string", + "format": "date-time" + }, + "destinationnum": { + "type": ["string", "null"] + }, + "duration": { + "type": ["number", "null"] + }, + "first_call": { + "type": ["boolean", "null"] + }, + "device_type": { + "type": ["string", "null"] + }, + "ga": { + "type": ["string", "null"] + }, + "gclid": { + "type": ["string", "null"] + }, + "id": { + "type": "string" + }, + "ip": { + "type": ["string", "null"] + }, + "keywords": { + "type": ["string", "null"] + }, + "kissmetrics_id": { + "type": ["string", "null"] + }, + "landingpage": { + "type": ["string", "null"] + }, + "recording": { + "type": ["string", "null"] + }, + "referrer": { + "type": ["string", "null"] + }, + "referrermedium": { + "type": ["string", "null"] + }, + "trackingnum": { + "type": ["string", "null"] + }, + "transcription": { + "type": ["string", "null"] + }, + "utm_campaign": { + "type": ["string", "null"] + }, + "utm_content": { + "type": ["string", "null"] + }, + "utm_medium": { + "type": ["string", "null"] + }, + "utm_source": { + "type": ["string", "null"] + }, + "utm_term": { + "type": ["string", "null"] + }, + "utma": { + "type": ["string", "null"] + }, + "utmb": { + "type": ["string", "null"] + }, + "utmc": { + "type": ["string", "null"] + }, + "utmv": { + "type": ["string", "null"] + }, + "utmx": { + "type": ["string", "null"] + }, + "utmz": { + "type": ["string", "null"] + } + }, + "required": ["datetime", "id"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 new file mode 100644 index 000000000..b71cb200c --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Google Analytics hit entity", + "self": { + "vendor": "com.google.analytics.measurement-protocol", + "name": "hit", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "type": { + "enum": [ + "event", + "exception", + "item", + "pageview", + "screenview", + "social", + "timing", + "transaction" + ] + }, + "nonInteractionHit": { + "type": ["boolean", "null"] + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 new file mode 100644 index 000000000..e813a7baa --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 @@ -0,0 +1,31 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Google Analytics pageview hit", + "self": { + "vendor": "com.google.analytics.measurement-protocol", + "name": "page_view", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "documentLocationUrl": { + "type": ["string", "null"], + "maxLength": 2048 + }, + "documentHostName": { + "type": ["string", "null"], + "maxLength": 100 + }, + "documentPath": { + "type": ["string", "null"], + "maxLength": 2048 + }, + "documentTitle": { + "type": ["string", "null"], + "maxLength": 1500 + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 new file mode 100644 index 000000000..280c3847f --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 @@ -0,0 +1,64 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Webhook event for creating a new contact.", + "self": { + "vendor": "com.hubspot", + "name": "contact_creation", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "objectId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The ID of the object that was created/changed/deleted. For contacts this is the vid; for companies, the companyId; and for deals the dealId." + }, + "changeSource": { + "type": "string", + "maxLength": 128, + "description": "The source of this change. Can be any of the change sources that you find on contact property histories." + }, + "eventId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The unique ID of the event that triggered this notification." + }, + "subscriptionId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The ID of the subscription that caused us to send you a notification of this event." + }, + "portalId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The customer's portalId that this event came from." + }, + "appId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The ID of your application. (In case you have multiple applications pointing to the same webhook URL.)" + }, + "occurredAt": { + "type": "string", + "format": "date-time", + "description": "When this event occurred." + }, + "attemptNumber": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "Which attempt this is to notify your service of this event (starting at 0). If your service times-out or throws an error as described in 'Retries' below, we will attempt to send the notification to your service again." + }, + "changeFlag": { + "type": "string", + "maxLength": 128 + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 new file mode 100644 index 000000000..84d7f7fda --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 @@ -0,0 +1,32 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a context generated by the IAB Spiders & Robots enrichment", + "self": { + "vendor": "com.iab.snowplow", + "name": "spiders_and_robots", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "spiderOrRobot": { + "description": "true if the IP address or user agent checked against the list is a spider or robot, false otherwise", + "type": "boolean" + }, + "category": { + "description": "Category based on activity if the IP/UA is a spider or robot, BROWSER otherwise", + "enum": ["SPIDER_OR_ROBOT", "ACTIVE_SPIDER_OR_ROBOT", "INACTIVE_SPIDER_OR_ROBOT", "BROWSER"] + }, + "reason": { + "description": "Type of failed check if the IP/UA is a spider or robot, PASSED_ALL otherwise", + "enum": ["FAILED_IP_EXCLUDE", "FAILED_UA_INCLUDE", "FAILED_UA_EXCLUDE", "PASSED_ALL"] + }, + "primaryImpact": { + "description": "Whether the spider or robot would affect page impression measurement, ad impression measurement, both or none", + "enum": ["PAGE_IMPRESSIONS", "AD_IMPRESSIONS", "PAGE_AND_AD_IMPRESSIONS", "UNKNOWN", "NONE"] + } + }, + "required": ["spiderOrRobot", "category", "reason", "primaryImpact"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 new file mode 100644 index 000000000..7b339054e --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 @@ -0,0 +1,67 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Mailchimp subscribe event", + "self": { + "vendor": "com.mailchimp", + "name": "subscribe", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "data": { + "type": "object", + "properties": { + "email": { + "type": "string" + }, + "email_type": { + "type": "string" + }, + "id": { + "type": "string" + }, + "web_id": { + "type": "string" + }, + "ip_opt": { + "type": "string" + }, + "ip_signup": { + "type": "string" + }, + "list_id": { + "type": "string" + }, + "merges": { + "type": "object", + "properties": { + "EMAIL": { + "type": ["string", "null"] + }, + "FNAME": { + "type": ["string", "null"] + }, + "LNAME": { + "type": ["string", "null"] + }, + "INTERESTS": { + "type": ["string", "null"] + } + }, + "additionalProperties": true + } + }, + "additionalProperties": false + }, + "fired_at": { + "type": "string", + "format": "date-time" + }, + "type": { + "type": "string" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 new file mode 100644 index 000000000..30acbb975 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 @@ -0,0 +1,56 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Mailgun message delivered event", + "self": { + "vendor": "com.mailgun", + "name": "message_delivered", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "event": { + "description": "Event name ('delivered').", + "type": "string", + "maxLength": 16 + }, + "recipient": { + "description": "Intended recipient.", + "type": "string", + "format": "email", + "maxLength": 256 + }, + "domain": { + "description": "Domain that sent the original message.", + "type": "string", + "format": "hostname", + "maxLength": 256 + }, + "messageHeaders": { + "description": "String list of all MIME headers of the original message dumped to a JSON string (order of headers preserved).", + "type": "string" + }, + "messageId": { + "description": "The Id of the message that delivered.", + "type": "string", + "maxLength": 1024 + }, + "timestamp": { + "description": "Timestamp in ISO8601 format.", + "type": "string", + "format": "date-time" + }, + "token": { + "description": "Randomly generated string with length 50 (see securing webhooks).", + "type": "string", + "maxLength": 50, + "minLength": 50 + }, + "signature": { + "description": "String with hexadecimal digits generate by HMAC algorithm (see securing webhooks).", + "type": "string", + "maxLength": 128 + } + }, + "additionalProperties": true +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 new file mode 100644 index 000000000..755d23ea8 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 @@ -0,0 +1,111 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Mandrill message sent event", + "self": { + "vendor": "com.mandrill", + "name": "message_sent", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "msg": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "_version": { + "type": "string" + }, + "clicks": { + "type": "array" + }, + "email": { + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "user_id": { + "type": "number" + } + }, + "additionalProperties": true + }, + "opens": { + "type": "array" + }, + "sender": { + "type": "string" + }, + "state": { + "type": "string" + }, + "subject": { + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "ts": { + "type": "string", + "format": "date-time" + }, + "reject": { + "type": ["string", "null"] + }, + "resends": { + "type": "array" + }, + "smtp_events": { + "type": "array", + "items": { + "type": "object", + "properties": { + "ts": { + "type": "string", + "format": "date-time" + }, + "type": { + "type": "string" + }, + "diag": { + "type": "string" + }, + "source_ip": { + "type": "string" + }, + "destination_ip": { + "type": "string" + }, + "size": { + "type": "integer" + } + }, + "additionalProperties": true + } + }, + "subaccount": { + "type": ["string", "null"] + }, + "template": { + "type": ["string", "null"] + } + }, + "additionalProperties": false + }, + "ts": { + "type": "string", + "format": "date-time" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 new file mode 100644 index 000000000..b7f5de6c1 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 @@ -0,0 +1,601 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Marketo description", + "self": { + "vendor": "com.marketo", + "name": "event", + "format": "jsonschema", + "version": "2-0-0" + }, + "type": "object", + "properties": { + "name": { + "type": "string", + "maxLength": 128 + }, + "description": { + "type": ["string", "null"], + "maxLength": 255 + }, + "step": { + "type": ["integer", "null"], + "minimum": 0, + "maximum": 32767 + }, + "campaign": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "name": { + "type": "string", + "maxLength": 255 + }, + "description": { + "type": ["string", "null"], + "maxLength": 255 + } + }, + "required": ["id", "name"] + }, + "company": { + "type": ["object", "null"], + "properties": { + "account_owner_email_address": { + "type": "string", + "format": "email" + }, + "account_owner_first_name": { + "type": "string", + "maxLength": 255 + }, + "account_owner_last_name": { + "type": "string", + "maxLength": 255 + }, + "annual_revenue": { + "type": "number", + "minimum": 0, + "maximum": 2147483647 + }, + "billing_address": { + "type": "string", + "maxLength": 255 + }, + "billing_city": { + "type": "string", + "maxLength": 255 + }, + "billing_country": { + "type": "string", + "maxLength": 255 + }, + "billing_postal_code": { + "type": "string", + "maxLength": 255 + }, + "billing_state": { + "type": "string", + "maxLength": 255 + }, + "name": { + "type": "string", + "maxLength": 255 + }, + "notes": { + "type": "string", + "maxLength": 255 + }, + "industry": { + "type": "string", + "maxLength": 255 + }, + "main_phone": { + "type": "string", + "maxLength": 255 + }, + "num_employees": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "parent_company_name": { + "type": "string", + "maxLength": 255 + }, + "sic_code": { + "type": "string", + "maxLength": 40 + }, + "site": { + "type": "string", + "maxLength": 255 + }, + "website": { + "type": "string", + "maxLength": 255 + } + } + }, + "lead": { + "type": ["object", "null"], + "properties": { + "acquisition_date": { + "type": "string", + "format": "date-time" + }, + "acquisition_program_name": { + "type": "string", + "maxLength": 255 + }, + "acquisition_program": { + "type": "string", + "maxLength": 255 + }, + "address": { + "type": "string", + "maxLength": 255 + }, + "anonymous_ip": { + "type": "string", + "maxLength": 45 + }, + "black_listed": { + "type": "boolean" + }, + "city": { + "type": "string", + "maxLength": 255 + }, + "country": { + "type": "string", + "maxLength": 255 + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "date_of_birth": { + "type": "string", + "format": "date" + }, + "department": { + "type": "string", + "maxLength": 255 + }, + "do_not_call_reason": { + "type": "string", + "maxLength": 512 + }, + "do_not_call": { + "type": "boolean" + }, + "email_address": { + "type": "string", + "format": "email" + }, + "email_invalid_cause": { + "type": "string", + "maxLength": 255 + }, + "email_invalid": { + "type": "boolean" + }, + "email_suspended_at": { + "type": "string", + "format": "date-time" + }, + "email_suspended_cause": { + "type": "string", + "maxLength": 2000 + }, + "email_suspended": { + "type": "boolean" + }, + "fax_number": { + "type": "string", + "maxLength": 255 + }, + "first_name": { + "type": "string", + "maxLength": 255 + }, + "full_name": { + "type": "string", + "maxLength": 255 + }, + "inferred_city": { + "type": "string", + "maxLength": 255 + }, + "inferred_company": { + "type": "string", + "maxLength": 255 + }, + "inferred_country": { + "type": "string", + "maxLength": 255 + }, + "inferred_metropolitan_area": { + "type": "string", + "maxLength": 255 + }, + "inferred_phone_area_code": { + "type": "string", + "maxLength": 255 + }, + "inferred_postal_code": { + "type": "string", + "maxLength": 255 + }, + "inferred_state_region": { + "type": "string", + "maxLength": 255 + }, + "is_customer": { + "type": "boolean" + }, + "is_partner": { + "type": "boolean" + }, + "job_title": { + "type": "string", + "maxLength": 255 + }, + "last_interesting_moment_date": { + "type": "string", + "format": "date-time" + }, + "last_interesting_moment_description": { + "type": "string", + "maxLength": 255 + }, + "last_interesting_moment_source": { + "type": "string", + "maxLength": 255 + }, + "last_interesting_moment_type": { + "type": "string", + "maxLength": 255 + }, + "last_name": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_email_address": { + "type": "string", + "format": "email" + }, + "lead_owner_first_name": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_job_title": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_last_name": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_phone_number": { + "type": "string", + "maxLength": 255 + }, + "lead_rating": { + "type": "string", + "maxLength": 255 + }, + "lead_score": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "lead_source": { + "type": "string", + "maxLength": 255 + }, + "lead_status": { + "type": "string", + "maxLength": 255 + }, + "lead_marketing_suspended": { + "type": "boolean" + }, + "facebook_display_name": { + "type": "string", + "maxLength": 255 + }, + "facebook_id": { + "type": "string", + "maxLength": 512 + }, + "facebook_photo_url": { + "type": "string", + "maxLength": 255 + }, + "facebook_profile_url": { + "type": "string", + "maxLength": 255 + }, + "facebook_reach": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "facebook_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "facebook_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "gender": { + "type": "string", + "maxLength": 6 + }, + "last_referred_enrollment": { + "type": "string", + "format": "date-time" + }, + "last_referred_visit": { + "type": "string", + "format": "date-time" + }, + "linkedin_display_name": { + "type": "string", + "maxLength": 255 + }, + "linkedin_id": { + "type": "string", + "maxLength": 512 + }, + "linkedin_photo_url": { + "type": "string", + "maxLength": 512 + }, + "linkedin_profile_url": { + "type": "string", + "maxLength": 512 + }, + "linkedin_reach": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "linkedin_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "linkedin_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "syndication_id": { + "type": "string", + "maxLength": 512 + }, + "total_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "total_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "twitter_display_name": { + "type": "string", + "maxLength": 255 + }, + "twitter_id": { + "type": "string", + "maxLength": 512 + }, + "twitter_photo_url": { + "type": "string", + "maxLength": 512 + }, + "twitter_profile_url": { + "type": "string", + "maxLength": 512 + }, + "twitter_reach": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "twitter_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "twitter_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "middle_name": { + "type": "string", + "maxLength": 255 + }, + "mobile_phone_number": { + "type": "string", + "maxLength": 255 + }, + "number_of_optys": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "original_referrer": { + "type": "string", + "maxLength": 255 + }, + "original_search_engine": { + "type": "string", + "maxLength": 255 + }, + "original_search_phrase": { + "type": "string", + "maxLength": 255 + }, + "original_source_info": { + "type": "string", + "maxLength": 255 + }, + "original_source_type": { + "type": "string", + "maxLength": 255 + }, + "person_notes": { + "type": "string", + "maxLength": 512 + }, + "person_timezone": { + "type": "string", + "maxLength": 255 + }, + "phone_number": { + "type": "string", + "maxLength": 255 + }, + "postal_code": { + "type": "string", + "maxLength": 255 + }, + "priority": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "registration_source_info": { + "type": "string", + "maxLength": 2000 + }, + "registration_source_type": { + "type": "string", + "maxLength": 255 + }, + "relative_score": { + "type": "integer", + "minimum": 0, + "maximum": 5 + }, + "relative_urgency": { + "type": "integer", + "minimum": 0, + "maximum": 5 + }, + "role": { + "type": "string", + "maxLength": 50 + }, + "salutation": { + "type": "string", + "maxLength": 255 + }, + "state": { + "type": "string", + "maxLength": 255 + }, + "total_opty_amount": { + "type": "number", + "minimum": 0, + "maximum": 2147483647 + }, + "total_opty_expected_revenue": { + "type": "number", + "minimum": 0, + "maximum": 2147483647 + }, + "unsubscribed_reason": { + "type": "string", + "maxLength": 512 + }, + "unsubscribed": { + "type": "boolean" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "urgency": { + "type": "integer", + "minimum": 0, + "maximum": 32767 + } + } + }, + "program": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "name": { + "type": "string", + "maxLength": 255 + }, + "description": { + "type": "string", + "maxLength": 2000 + } + } + }, + "social": { + "type": ["object", "null"], + "properties": { + "promo_code": { + "type": "string", + "maxLength": 255 + }, + "share_url": { + "type": "string", + "maxLength": 2000 + }, + "email": { + "type": "string", + "format": "email" + } + } + }, + "datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "forward_to_friend_link": { + "type": ["string", "null"], + "maxLength": 255 + }, + "munchkinId": { + "type": ["string", "null"], + "maxLength": 255 + }, + "unsubscribe_link": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "view_as_webpage_link": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "sp_send_alert_info": { + "type": ["string", "null"], + "maxLength": 2000 + } + }, + "additionalProperties": true, + "required": ["name"] +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 new file mode 100644 index 000000000..5839eb007 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 @@ -0,0 +1,211 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an Olark transcript event", + "self": { + "vendor": "com.olark", + "name": "transcript", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "string", + "maxLength": 128 + } + }, + "items": { + "type": "array", + "items": { + "type": "object", + "properties": { + "body": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "operatorId": { + "type": "string", + "maxLength": 64 + }, + "kind": { + "type": "string", + "maxLength": 64 + }, + "nickname": { + "type": "string", + "maxLength": 64 + }, + "visitorNickname": { + "type": "string", + "maxLength": 64 + } + }, + "additionalProperties": false + } + }, + "groups": { + "type": "array", + "items": { + "type": "object", + "properties": { + "kind": { + "type": "string", + "maxLength": 128 + }, + "name": { + "type": "string", + "maxLength": 128 + }, + "id": { + "type": "string", + "maxLength": 128 + } + }, + "additionalProperties": false + } + }, + "visitor": { + "type": "object", + "properties": { + "organization": { + "type": "string", + "maxLength": 128 + }, + "city": { + "type": "string", + "maxLength": 128 + }, + "ip": { + "type": "string", + "format": "ipv4" + }, + "phoneNumber": { + "type": "string", + "maxLength": 64 + }, + "browser": { + "type": "string", + "maxLength": 128 + }, + "fullName": { + "type": "string", + "maxLength": 256 + }, + "emailAddress": { + "type": "string", + "format": "email", + "maxLength": 256 + }, + "country": { + "type": "string", + "maxLength": 128 + }, + "operatingSystem": { + "type": "string", + "maxLength": 128 + }, + "id": { + "type": "string", + "maxLength": 64 + }, + "countryCode": { + "type": "string", + "maxLength": 8 + }, + "region": { + "type": "string", + "maxLength": 8 + }, + "conversationBeginPage": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "chatFeedback": { + "type": "object", + "properties": { + "knowledge": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "friendliness": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "responsiveness": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "overallChat": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "comments": { + "type": "string" + } + }, + "additionalProperties": false + }, + "kind": { + "type": "string", + "maxLength": 64 + } + }, + "additionalProperties": true + }, + "id": { + "type": "string", + "maxLength": 128 + }, + "kind": { + "type": "string", + "maxLength": 64 + }, + "manuallySubmitted": { + "type": "boolean" + }, + "operators": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "username": { + "type": "string", + "maxLength": 128 + }, + "emailAddress": { + "type": "string", + "format": "email", + "maxLength": 256 + }, + "id": { + "type": "string", + "maxLength": 128 + }, + "kind": { + "type": "string", + "maxLength": 64 + }, + "nickname": { + "type": "string", + "maxLength": 64 + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 new file mode 100644 index 000000000..c490000fb --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 @@ -0,0 +1,207 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a PagerDuty incident event", + "self": { + "vendor": "com.pagerduty", + "name": "incident", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "id": { + "type": "string" + }, + "created_on": { + "type": "string", + "format": "date-time" + }, + "data": { + "type": "object", + "properties": { + "incident": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "incident_number": { + "type": "integer" + }, + "created_on": { + "type": "string", + "format": "date-time" + }, + "status": { + "type": "string" + }, + "pending_actions": { + "type": "array" + }, + "html_url": { + "type": "string" + }, + "incident_key": { + "type": [ "string", "null" ] + }, + "service": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "html_url": { + "type": "string" + }, + "deleted_at": { + "type": "null" + } + }, + "additionalProperties": false + }, + "escalation_policy": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "deleted_at": { + "type": "null" + } + }, + "additionalProperties": false + }, + "assigned_to_user": { + "type": [ "object", "null" ], + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + } + }, + "additionalProperties": false + }, + "trigger_summary_data": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "subject": { + "type": "string" + }, + "client": { + "type": "string" + } + }, + "additionalProperties": false + }, + "trigger_details_html_url": { + "type": "string" + }, + "trigger_type": { + "type": "string" + }, + "last_status_change_on": { + "type": "string", + "format": "date-time" + }, + "last_status_change_by": { + "type": [ "object", "null" ], + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + } + }, + "additionalProperties": false + }, + "number_of_escalations": { + "type": "integer" + }, + "assigned_to": { + "type": "array", + "items": { + "type": "object", + "properties": { + "at": { + "type": "string" + }, + "object": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + }, + "type": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "resolved_by_user": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 new file mode 100644 index 000000000..29dcbc0ef --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Pingdom incident assigned event", + "self": { + "vendor": "com.pingdom", + "name": "incident_assign", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "check": { + "type": "string" + }, + "checkname": { + "type": "string" + }, + "host": { + "type": "string" + }, + "incidentid": { + "type": "integer" + }, + "description": { + "type": "string" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 new file mode 100644 index 000000000..c0def0617 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 @@ -0,0 +1,89 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a SendGrid processed event. Property descriptions derived from the SendGrid documentation: https://sendgrid.com/docs/for-developers/tracking-events/event/", + "self": { + "vendor": "com.sendgrid", + "name": "processed", + "version": "2-0-0", + "format": "jsonschema" + }, + "type": "object", + "properties": { + "timestamp": { + "description": "The timestamp of when the message was sent", + "type": "string", + "format": "date-time" + }, + "email": { + "description": "The email address of the recipient", + "type": "string", + "format": "email" + }, + "newsletter": { + "description": "Legacy Marketing Email tool fields", + "type": "object", + "properties": { + "newsletter_user_list_id": { + "type": "string" + }, + "newsletter_id": { + "type": "string" + }, + "newsletter_send_id": { + "type": "string" + } + }, + "additionalProperties": true + }, + "sg_event_id": { + "description": "A unique ID to this event that you can use for deduplication purposes. These IDs are up to 100 characters long and are URL safe.", + "type": "string", + "minLength": 22, + "maxLength": 4096 + }, + "smtp-id": { + "description": "A unique ID attached to the message by the originating system", + "type": "string" + }, + "category": { + "description": "Categories are custom tags that you set for the purpose of organizing your emails. Categories can be set as an array or string, and they will be returned as such when posted in your event endpoint.", + "type": ["array", "string"], + "items": { + "type": "string" + } + }, + "asm_group_id": { + "description": "The ID of the unsubscribe group the recipient’s email address is included in. ASM IDs correspond to the ID that is returned when you create an unsubscribe group.", + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "sg_message_id": { + "description": "A unique, internal SendGrid ID for the message. The first half of this is pulled from the smtp-id.", + "type": "string" + }, + "send_at": { + "description": "To schedule a send request for a large batch of emails, use the send_at parameter which will send all emails at approximately the same time. send_at is a UNIX timestamp.", + "type": "integer", + "maximum": 2147483647, + "minimum": 0 + }, + "marketing_campaign_id": { + "description": "For emails sent through our Marketing Campaigns feature, we add Marketing Campaigns specific parameters to the Event Webhook. Both marketing_campaign_name and marketing_campaign_id are displayed as unique arguments in the event data.", + "type": "integer" + }, + "marketing_campaign_name": { + "description": "For emails sent through our Marketing Campaigns feature, we add Marketing Campaigns specific parameters to the Event Webhook. Both marketing_campaign_name and marketing_campaign_id are displayed as unique arguments in the event data.", + "type": "string" + }, + "marketing_campaign_version": { + "description": "Displayed in the event data for emails sent as part of an A/B Test. The value for marketing_campaign_version are returned as A, B, C, etc.", + "type": "string" + }, + "marketing_campaign_split_id": { + "description": "Marketing campaign split id", + "type": "integer" + } + }, + "additionalProperties": true +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 new file mode 100644 index 000000000..46a019700 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for unit tests", + "self": { + "vendor": "com.snowplowanalytics.snowplow-website", + "name": "signup_form_submitted", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "name": { + "type": ["string", "null"] + }, + "email": { + "type": ["string", "null"] + }, + "company": { + "type": ["string", "null"] + }, + "eventsPerMonth": { + "type": ["string", "null"] + }, + "serviceType": { + "type": ["string", "null"] + } + }, + "additionalProperties": false +} + diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 new file mode 100644 index 000000000..45b9c6289 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 @@ -0,0 +1,167 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for API Request Enrichment configuration", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "api_request_enrichment_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + + "parameters": { + "type": "object", + "properties": { + "inputs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string", + "pattern": "^[A-Za-z0-9_-]+$" + }, + "pojo": { + "type": "object", + "properties": { + "field": { + "type": "string" + } + }, + "additionalProperties": false + }, + "json": { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["unstruct_event", "contexts", "derived_contexts"] + }, + "schemaCriterion": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/([1-9][0-9]*|\\*)-((?:0|[1-9][0-9]*)|\\*)-((?:0|[1-9][0-9]*)|\\*)$" + }, + "jsonPath": { + "type": "string", + "pattern": "^\\$.*$" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false, + "minProperties": 2, + "maxProperties": 2, + "required": ["key"] + } + }, + "api": { + "type": "object", + "minProperties": 1, + "maxProperties": 1, + "properties": { + "http": { + "type": "object", + "properties": { + "method": { + "type": "string", + "enum": ["GET", "POST", "PUT"] + }, + "uri": { + "type": "string" + }, + "timeout": { + "type": "integer", + "minimum": 1, + "maximum": 60000 + }, + "authentication": { + "type": "object", + "properties": { + "httpBasic": { + "type": "object", + "properties": { + "username": { + "type": "string" + }, + "password": { + "type": "string" + } + }, + "required": ["username", "password"], + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["method", "uri", "timeout", "authentication"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "outputs": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*(?:-(?:0|[1-9][0-9]*)){2})$" + }, + "json": { + "type": "object", + "properties": { + "jsonPath": { + "type": "string", + "pattern": "^\\$.*$" + } + }, + "required": ["jsonPath"], + "additionalProperties": false + } + }, + "required": ["schema"], + "minProperties": 2, + "maxProperties": 2, + "additionalProperties": false + } + }, + "cache": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "minimum": 1 + }, + "ttl": { + "type": "integer", + "minimum": 0, + "maximum": 86400 + } + }, + "additionalProperties": false, + "required": ["size", "ttl"] + } + }, + "additionalProperties": false, + "required": ["inputs", "api", "outputs", "cache"] + } + }, + "additionalProperties": false, + "required": ["name", "vendor", "enabled", "parameters"] +} + diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 new file mode 100644 index 000000000..5f898ff5f --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 @@ -0,0 +1,84 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for IAB Spiders & Robots enrichment config", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "iab_spiders_and_robots_enrichment", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + + "parameters": { + "type": "object", + "properties": { + "ipFile": { + "description": "Path to IP address exclude file", + "type": "object", + "properties": { + "database": { + "enum": ["ip_exclude_current_cidr.txt"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + "excludeUseragentFile": { + "description": "Path to user agent exclude file", + "type": "object", + "properties": { + "database": { + "enum": ["exclude_current.txt"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + "includeUseragentFile": { + "description": "Path to user agent include file", + "type": "object", + "properties": { + "database": { + "enum": ["include_current.txt"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + } + }, + "required": [ + "ipFile", + "excludeUseragentFile", + "includeUseragentFile" + ], + "additionalProperties": false + } + }, + "required": [ + "vendor", + "name", + "enabled", + "parameters" + ], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 new file mode 100644 index 000000000..45d678710 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 @@ -0,0 +1,164 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for PII pseudonymization enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "pii_enrichment_config", + "format": "jsonschema", + "version": "2-0-0" + }, + "type": "object", + "properties": { + "vendor": { + "type": "string", + "description": "The name of the vendor for this config (the only valid value for scala-common enrich is com.snowplowanalytics.snowplow.enrichments)" + }, + "name": { + "type": "string", + "description": "The name of the config (the only valid value for scala-common enrich is pii_enrichment_config)" + }, + "enabled": { + "type": "boolean", + "description": "Whether to enable this enrichment" + }, + "emitEvent": { + "type": "boolean", + "description": "Whether to emit identification events from this enrichment" + }, + "parameters": { + "type": "object", + "properties": { + "pii": { + "description": "List of all the fields for which pdeudonymization will be performed", + "type": "array", + "items": { + "type": "object", + "properties": { + "pojo": { + "description": "Scalar field which contains a single string value, on which pseudonymization will be performed in the enire field (e.g. `user-123`)", + "type": "object", + "properties": { + "field": { + "enum": [ + "user_id", + "user_ipaddress", + "user_fingerprint", + "domain_userid", + "network_userid", + "ip_organization", + "ip_domain", + "tr_orderid", + "ti_orderid", + "mkt_term", + "mkt_content", + "se_category", + "se_action", + "se_label", + "se_property", + "mkt_clickid", + "refr_domain_userid", + "domain_sessionid" + ] + } + }, + "required": [ + "field" + ], + "additionalProperties": false + }, + "json": { + "description": "JSON field which contains a JSON string value, on which pseudonymization will be performed on a specific JSON path", + "type": "object", + "properties": { + "field": { + "enum": [ + "contexts", + "derived_contexts", + "unstruct_event" + ] + }, + "schemaCriterion": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*|\\*)-((?:0|[1-9][0-9]*)|\\*)-((?:0|[1-9][0-9]*)|\\*)$" + }, + "jsonPath": { + "type": "string", + "pattern": "^\\$.*$" + } + }, + "required": [ + "field", + "schemaCriterion", + "jsonPath" + ], + "additionalProperties": false + } + }, + "oneOf": [ + { + "required": [ + "pojo" + ] + }, + { + "required": [ + "json" + ] + } + ], + "additionalProperties": false + } + }, + "strategy": { + "description": "The pseudonymization strategy which will be applied to all the fields specified in the `pii` section", + "type": "object", + "properties": { + "pseudonymize": { + "description": "Pseudonymization strategy that hashes using a specified algorithm", + "type": "object", + "properties": { + "hashFunction": { + "description": "The hash function that will be used by this strategy", + "enum": [ + "MD2", + "MD5", + "SHA-1", + "SHA-256", + "SHA-384", + "SHA-512" + ] + }, + "salt": { + "description": "A salt that will be added to the field durring hashing", + "type": "string" + } + }, + "required": [ + "hashFunction", + "salt" + ], + "additionalProperties": false + } + }, + "required": [ + "pseudonymize" + ], + "additionalProperties": false + } + }, + "required": [ + "pii", + "strategy" + ], + "additionalProperties": false + } + }, + "required": [ + "vendor", + "name", + "enabled", + "emitEvent", + "parameters" + ], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 new file mode 100644 index 000000000..941407ee6 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 @@ -0,0 +1,211 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for SQL Query enrichment configuration", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "sql_query_enrichment_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + + "parameters": { + "type": "object", + "properties": { + "inputs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "placeholder": { + "type": "integer", + "minimum": 1, + "maximum": 64 + }, + "pojo": { + "type": "object", + "properties": { + "field": { + "type": "string" + } + }, + "additionalProperties": false, + "required": ["field"] + }, + "json": { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["unstruct_event", "contexts", "derived_contexts"] + }, + "schemaCriterion": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*|\\*)-((?:0|[1-9][0-9]*)|\\*)-((?:0|[1-9][0-9]*)|\\*)$" + }, + "jsonPath": { + "type": "string" + } + }, + "additionalProperties": false, + "required": ["field", "schemaCriterion", "jsonPath"] + } + }, + "additionalProperties": false, + "minProperties": 2, + "maxProperties": 2, + "required": ["placeholder"] + } + }, + "database": { + "oneOf": [ + + { + "type": "object", + "properties": { + "postgresql": { + "type": "object", + "properties": { + "host": { + "type": "string" + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + }, + "sslMode": { + "type": "boolean" + }, + "username": { + "type": "string", + "minLength": 1 + }, + "password": { + "type": "string" + }, + "database": { + "type": "string", + "minLength": 1 + } + }, + "required": ["host", "port", "sslMode", "username", "password", "database"], + "additionalProperties": false + } + }, + "required": ["postgresql"], + "additionalProperties": false + }, + + { + "type": "object", + "properties": { + "mysql": { + "type": "object", + "properties": { + "host": { + "type": "string" + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + }, + "sslMode": { + "type": "boolean" + }, + "username": { + "type": "string", + "minLength": 1 + }, + "password": { + "type": "string" + }, + "database": { + "type": "string", + "minLength": 1 + } + }, + "required": ["host", "port", "sslMode", "username", "password", "database"], + "additionalProperties": false + } + }, + "required": ["mysql"] + } + ], + "additionalProperties": true + }, + "query": { + "type": "object", + "properties": { + "sql": { + "type": "string" + } + }, + "required": ["sql"], + "additionalProperties": false + }, + "output": { + "type": "object", + "properties": { + "expectedRows": { + "type": "string", + "enum": ["AT_LEAST_ONE", "AT_LEAST_ZERO", "AT_MOST_ONE", "EXACTLY_ONE"] + }, + "json": { + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*(?:-(?:0|[1-9][0-9]*)){2})$" + }, + "propertyNames": { + "type": "string", + "enum": ["AS_IS", "CAMEL_CASE", "PASCAL_CASE", "SNAKE_CASE", "LOWER_CASE", "UPPER_CASE"] + }, + "describes": { + "type": "string", + "enum": ["ALL_ROWS", "EVERY_ROW"] + } + }, + "required": ["schema", "propertyNames", "describes"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "cache": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "minimum": 0 + }, + "ttl": { + "type": "integer", + "minimum": 0, + "maximum": 86400 + } + }, + "additionalProperties": false, + "required": ["size", "ttl"] + } + }, + "additionalProperties": false, + "required": ["inputs", "database", "query", "output", "cache"] + } + }, + "additionalProperties": false, + "required": ["name", "vendor", "enabled", "parameters"] +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 new file mode 100644 index 000000000..31d30f048 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 @@ -0,0 +1,38 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for YAUAA enrichment config", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "yauaa_enrichment_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "cacheSize": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + "required": [ + "vendor", + "name", + "enabled" + ], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 new file mode 100644 index 000000000..4c7301ba3 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 @@ -0,0 +1,37 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema to configure the Segment webhook adapter", + "self": { + "vendor": "com.snowplowanalytics.snowplow.input-adapters", + "name": "segment_webhook_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "parameters": { + "type": "object", + "properties": { + "mappings": { + "type": "object", + "patternProperties": { + ".*": { + "type": "string" + } + } + } + }, + "required": ["mappings"], + "additionalProperties": false + } + }, + "required": ["name", "vendor", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 new file mode 100644 index 000000000..f3cc406d3 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 @@ -0,0 +1,489 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an atomic canonical Snowplow event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "atomic", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "app_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "platform": { + "type": ["string", "null"], + "maxLength": 255 + }, + "etl_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "collector_tstamp": { + "type": "string", + "format": "date-time" + }, + "dvce_created_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_id": { + "type": "string", + "maxLength": 36 + }, + "txn_id": { + "type": ["integer", "null"] + }, + "name_tracker": { + "type": ["string", "null"], + "maxLength": 128 + }, + "v_tracker": { + "type": ["string", "null"], + "maxLength": 100 + }, + "v_collector": { + "type": "string", + "maxLength": 100 + }, + "v_etl": { + "type": "string", + "maxLength": 100 + }, + "user_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "user_ipaddress": { + "type": ["string", "null"], + "maxLength": 128 + }, + "user_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_sessionidx": { + "type": ["integer", "null"] + }, + "network_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "geo_country": { + "type": ["string", "null"], + "maxLength": 2 + }, + "geo_region": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_city": { + "type": ["string", "null"], + "maxLength": 75 + }, + "geo_zipcode": { + "type": ["string", "null"], + "maxLength": 15 + }, + "geo_latitude": { + "type": ["number", "null"] + }, + "geo_longitude": { + "type": ["number", "null"] + }, + "geo_region_name": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_isp": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_organization": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_domain": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_netspeed": { + "type": ["string", "null"], + "maxLength": 100 + }, + "page_url": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_title": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "page_referrer": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "page_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "page_urlport": { + "type": ["integer", "null"] + }, + "page_urlpath": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "page_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "page_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "refr_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "refr_urlport": { + "type": ["integer", "null"] + }, + "refr_urlpath": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_medium": { + "type": ["string", "null"], + "maxLength": 25 + }, + "refr_source": { + "type": ["string", "null"], + "maxLength": 50 + }, + "refr_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_medium": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_source": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_content": { + "type": ["string", "null"], + "maxLength": 500 + }, + "mkt_campaign": { + "type": ["string", "null"], + "maxLength": 255 + }, + "se_category": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_action": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_label": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "se_property": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_value": { + "type": ["number", "null"] + }, + "tr_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_affiliation": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_total": { + "type": ["number", "null"] + }, + "tr_tax": { + "type": ["number", "null"] + }, + "tr_shipping": { + "type": ["number", "null"] + }, + "tr_city": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_state": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_country": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_sku": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_name": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_category": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_price": { + "type": ["number", "null"] + }, + "ti_quantity": { + "type": ["integer", "null"] + }, + "pp_xoffset_min": { + "type": ["integer", "null"] + }, + "pp_xoffset_max": { + "type": ["integer", "null"] + }, + "pp_yoffset_min": { + "type": ["integer", "null"] + }, + "pp_yoffset_max": { + "type": ["integer", "null"] + }, + "useragent": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "br_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_version": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_renderengine": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_lang": { + "type": ["string", "null"], + "maxLength": 255 + }, + "br_features_pdf": { + "type": ["boolean", "null"] + }, + "br_features_flash": { + "type": ["boolean", "null"] + }, + "br_features_java": { + "type": ["boolean", "null"] + }, + "br_features_director": { + "type": ["boolean", "null"] + }, + "br_features_quicktime": { + "type": ["boolean", "null"] + }, + "br_features_realplayer": { + "type": ["boolean", "null"] + }, + "br_features_windowsmedia": { + "type": ["boolean", "null"] + }, + "br_features_gears": { + "type": ["boolean", "null"] + }, + "br_features_silverlight": { + "type": ["boolean", "null"] + }, + "br_cookies": { + "type": ["boolean", "null"] + }, + "br_colordepth": { + "type": ["string", "null"], + "maxLength": 12 + }, + "br_viewwidth": { + "type": ["integer", "null"] + }, + "br_viewheight": { + "type": ["integer", "null"] + }, + "os_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_manufacturer": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_timezone": { + "type": ["string", "null"], + "maxLength": 255 + }, + "dvce_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "dvce_ismobile": { + "type": ["boolean", "null"] + }, + "dvce_screenwidth": { + "type": ["integer", "null"] + }, + "dvce_screenheight": { + "type": ["integer", "null"] + }, + "doc_charset": { + "type": ["string", "null"], + "maxLength": 128 + }, + "doc_width": { + "type": ["integer", "null"] + }, + "doc_height": { + "type": ["integer", "null"] + }, + "tr_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "tr_total_base": { + "type": ["number", "null"] + }, + "tr_tax_base": { + "type": ["number", "null"] + }, + "tr_shipping_base": { + "type": ["number", "null"] + }, + "ti_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "ti_price_base": { + "type": ["number", "null"] + }, + "base_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_timezone": { + "type": ["string", "null"], + "maxLength": 64 + }, + "mkt_clickid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "mkt_network": { + "type": ["string", "null"], + "maxLength": 64 + }, + "etl_tags": { + "type": ["string", "null"], + "maxLength": 500 + }, + "dvce_sent_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "refr_domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "refr_dvce_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "domain_sessionid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "derived_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event_vendor": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_name": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_format": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_version": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "true_tstamp": { + "type": ["string", "null"], + "format": "date-time" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 new file mode 100644 index 000000000..59ab71a5c --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 @@ -0,0 +1,74 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a campaign attribution enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "campaign_attribution", + "format": "jsonschema", + "version": "1-0-1" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "mapping": { + "type": ["string", "null"], + "enum": ["static", "script"] + }, + "fields": { + "type": "object", + "properties": { + "mktMedium": { + "$ref": "#/definitions/stringArray" + }, + "mktSource": { + "$ref": "#/definitions/stringArray" + }, + "mktTerm": { + "$ref": "#/definitions/stringArray" + }, + "mktContent": { + "$ref": "#/definitions/stringArray" + }, + "mktCampaign": { + "$ref": "#/definitions/stringArray" + }, + "mktClickId": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": ["mktMedium", "mktSource", "mktTerm", "mktContent", "mktCampaign"], + "additionalProperties": false + } + }, + "required": ["fields"], + "additionalProperties": false + } + }, + + "definitions": { + "stringArray": { + "type": "array", + "items": { + "type": "string" + } + } + }, + + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 new file mode 100644 index 000000000..3ddce8ba5 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 @@ -0,0 +1,32 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for custom contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "contexts", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "array", + + "items": { + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false + }, + + "minItems": 1 +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 new file mode 100644 index 000000000..e6a9a225c --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for custom contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "contexts", + "format": "jsonschema", + "version": "1-0-1" + }, + + "type": "array", + + "items": { + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 new file mode 100644 index 000000000..5d55cda9b --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 @@ -0,0 +1,46 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for configuration of currency-conversion enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "currency_conversion_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "apiKey": { + "type": "string" + }, + "baseCurrency": { + "type": "string" + }, + "rateAt": { + "enum": ["EOD_PRIOR"] + }, + "accountType": { + "type": "string", + "enum": ["DEVELOPER", "ENTERPRISE", "UNLIMITED"] + } + }, + "required": ["apiKey", "baseCurrency", "rateAt", "accountType"], + "additionalProperties": false + } + }, + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} + diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 new file mode 100644 index 000000000..ed185417b --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an array of enrichments", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "enrichments", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "array", + + "items": { + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 new file mode 100644 index 000000000..20c7457e9 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 @@ -0,0 +1,41 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for client geolocation contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "geolocation_context", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "latitude": { + "type": "number", + "minimum": -90, + "maximum": 90 + }, + "longitude": { + "type": "number", + "minimum": -180, + "maximum": 180 + }, + "latitudeLongitudeAccuracy": { + "type": "number" + }, + "altitude": { + "type": "number" + }, + "altitudeAccuracy": { + "type": "number" + }, + "bearing": { + "type": "number" + }, + "speed": { + "type": "number" + } + }, + "required": ["latitude", "longitude"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 new file mode 100644 index 000000000..2e4bb97ca --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 @@ -0,0 +1,44 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for client geolocation contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "geolocation_context", + "format": "jsonschema", + "version": "1-1-0" + }, + + "type": "object", + "properties": { + "latitude": { + "type": "number", + "minimum": -90, + "maximum": 90 + }, + "longitude": { + "type": "number", + "minimum": -180, + "maximum": 180 + }, + "latitudeLongitudeAccuracy": { + "type": ["number", "null"] + }, + "altitude": { + "type": ["number", "null"] + }, + "altitudeAccuracy": { + "type": ["number", "null"] + }, + "bearing": { + "type": ["number", "null"] + }, + "speed": { + "type": ["number", "null"] + }, + "timestamp": { + "type": ["integer", "null"] + } + }, + "required": ["latitude", "longitude"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 new file mode 100644 index 000000000..475b3f580 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 @@ -0,0 +1,90 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for MaxMind GeoIP2 ip lookups enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "ip_lookups", + "format": "jsonschema", + "version": "2-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string", + "maxLength": 256 + }, + "name": { + "type": "string", + "maxLength": 256 + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + + "geo": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoLite2-City.mmdb", "GeoIP2-City.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + + "isp": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoIP2-ISP.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + + "domain": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoIP2-Domain.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + + "connectionType": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoIP2-Connection-Type.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + } + + }, + "additionalProperties": false + } + }, + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 new file mode 100644 index 000000000..fe89dc373 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 @@ -0,0 +1,43 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for mobile contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "mobile_context", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "osType": { + "type": "string" + }, + "osVersion": { + "type": "string" + }, + "deviceManufacturer": { + "type": "string" + }, + "deviceModel": { + "type": "string" + }, + "carrier": { + "type": ["string", "null"] + }, + "openIdfa": { + "type": "string" + }, + "appleIdfa": { + "type": "string" + }, + "appleIdfv": { + "type": "string" + }, + "androidIdfa": { + "type": "string" + } + }, + "required": ["osType", "osVersion", "deviceManufacturer", "deviceModel"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 new file mode 100644 index 000000000..f5265aeb6 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 @@ -0,0 +1,246 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Snowplow payload", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "payload_data", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "array", + "items":{ + + "type": "object", + "properties": { + "tna": { + "type": "string" + }, + "aid": { + "type": "string" + }, + "p": { + "type": "string" + }, + "dtm": { + "type": "string" + }, + "tz": { + "type": "string" + }, + "e": { + "type": "string" + }, + "tid": { + "type": "string" + }, + "eid": { + "type": "string" + }, + "tv": { + "type": "string" + }, + "duid": { + "type": "string" + }, + "nuid": { + "type": "string" + }, + "uid": { + "type": "string" + }, + "vid": { + "type": "string" + }, + "ip": { + "type": "string" + }, + "res": { + "type": "string" + }, + "url": { + "type": "string" + }, + "page": { + "type": "string" + }, + "refr": { + "type": "string" + }, + "fp": { + "type": "string" + }, + "ctype": { + "type": "string" + }, + "cookie": { + "type": "string" + }, + "lang": { + "type": "string" + }, + "f_pdf": { + "type": "string" + }, + "f_qt": { + "type": "string" + }, + "f_realp": { + "type": "string" + }, + "f_wma": { + "type": "string" + }, + "f_dir": { + "type": "string" + }, + "f_fla": { + "type": "string" + }, + "f_java": { + "type": "string" + }, + "f_gears": { + "type": "string" + }, + "f_ag": { + "type": "string" + }, + "cd": { + "type": "string" + }, + "ds": { + "type": "string" + }, + "cs": { + "type": "string" + }, + "vp": { + "type": "string" + }, + "mac": { + "type": "string" + }, + "pp_mix": { + "type": "string" + }, + "pp_max": { + "type": "string" + }, + "pp_miy": { + "type": "string" + }, + "pp_may": { + "type": "string" + }, + "ad_ba": { + "type": "string" + }, + "ad_ca": { + "type": "string" + }, + "ad_ad": { + "type": "string" + }, + "ad_uid": { + "type": "string" + }, + "tr_id": { + "type": "string" + }, + "tr_af": { + "type": "string" + }, + "tr_tt": { + "type": "string" + }, + "tr_tx": { + "type": "string" + }, + "tr_sh": { + "type": "string" + }, + "tr_ci": { + "type": "string" + }, + "tr_st": { + "type": "string" + }, + "tr_co": { + "type": "string" + }, + "tr_cu": { + "type": "string" + }, + "ti_id": { + "type": "string" + }, + "ti_sk": { + "type": "string" + }, + "ti_nm": { + "type": "string" + }, + "ti_na": { + "type": "string" + }, + "ti_ca": { + "type": "string" + }, + "ti_pr": { + "type": "string" + }, + "ti_qu": { + "type": "string" + }, + "ti_cu": { + "type": "string" + }, + "sa": { + "type": "string" + }, + "sn": { + "type": "string" + }, + "st": { + "type": "string" + }, + "sp": { + "type": "string" + }, + "se_ca": { + "type": "string" + }, + "se_ac": { + "type": "string" + }, + "se_la": { + "type": "string" + }, + "se_pr": { + "type": "string" + }, + "se_va": { + "type": "string" + }, + "ue_na": { + "type": "string" + }, + "ue_pr": { + "type": "string" + }, + "ue_px": { + "type": "string" + }, + "co": { + "type": "string" + }, + "cx": { + "type": "string" + } + }, + "required": ["tv", "p", "e"], + "additionalProperties": false + }, + "minItems": 1 +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 new file mode 100644 index 000000000..2e8cab60f --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 @@ -0,0 +1,261 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Snowplow payload", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "payload_data", + "format": "jsonschema", + "version": "1-0-4" + }, + + "type": "array", + "items":{ + + "type": "object", + "properties": { + "tna": { + "type": "string" + }, + "aid": { + "type": "string" + }, + "p": { + "type": "string" + }, + "dtm": { + "type": "string" + }, + "tz": { + "type": "string" + }, + "e": { + "type": "string" + }, + "tid": { + "type": "string" + }, + "eid": { + "type": "string" + }, + "tv": { + "type": "string" + }, + "duid": { + "type": "string" + }, + "nuid": { + "type": "string" + }, + "uid": { + "type": "string" + }, + "vid": { + "type": "string" + }, + "ip": { + "type": "string" + }, + "res": { + "type": "string" + }, + "url": { + "type": "string" + }, + "page": { + "type": "string" + }, + "refr": { + "type": "string" + }, + "fp": { + "type": "string" + }, + "ctype": { + "type": "string" + }, + "cookie": { + "type": "string" + }, + "lang": { + "type": "string" + }, + "f_pdf": { + "type": "string" + }, + "f_qt": { + "type": "string" + }, + "f_realp": { + "type": "string" + }, + "f_wma": { + "type": "string" + }, + "f_dir": { + "type": "string" + }, + "f_fla": { + "type": "string" + }, + "f_java": { + "type": "string" + }, + "f_gears": { + "type": "string" + }, + "f_ag": { + "type": "string" + }, + "cd": { + "type": "string" + }, + "ds": { + "type": "string" + }, + "cs": { + "type": "string" + }, + "vp": { + "type": "string" + }, + "mac": { + "type": "string" + }, + "pp_mix": { + "type": "string" + }, + "pp_max": { + "type": "string" + }, + "pp_miy": { + "type": "string" + }, + "pp_may": { + "type": "string" + }, + "ad_ba": { + "type": "string" + }, + "ad_ca": { + "type": "string" + }, + "ad_ad": { + "type": "string" + }, + "ad_uid": { + "type": "string" + }, + "tr_id": { + "type": "string" + }, + "tr_af": { + "type": "string" + }, + "tr_tt": { + "type": "string" + }, + "tr_tx": { + "type": "string" + }, + "tr_sh": { + "type": "string" + }, + "tr_ci": { + "type": "string" + }, + "tr_st": { + "type": "string" + }, + "tr_co": { + "type": "string" + }, + "tr_cu": { + "type": "string" + }, + "ti_id": { + "type": "string" + }, + "ti_sk": { + "type": "string" + }, + "ti_nm": { + "type": "string" + }, + "ti_na": { + "type": "string" + }, + "ti_ca": { + "type": "string" + }, + "ti_pr": { + "type": "string" + }, + "ti_qu": { + "type": "string" + }, + "ti_cu": { + "type": "string" + }, + "sa": { + "type": "string" + }, + "sn": { + "type": "string" + }, + "st": { + "type": "string" + }, + "sp": { + "type": "string" + }, + "se_ca": { + "type": "string" + }, + "se_ac": { + "type": "string" + }, + "se_la": { + "type": "string" + }, + "se_pr": { + "type": "string" + }, + "se_va": { + "type": "string" + }, + "ue_na": { + "type": "string" + }, + "ue_pr": { + "type": "string" + }, + "ue_px": { + "type": "string" + }, + "co": { + "type": "string" + }, + "cx": { + "type": "string" + }, + "ua": { + "type": "string" + }, + "tnuid": { + "type": "string" + }, + "stm": { + "type": "string" + }, + "sid": { + "type": "string" + }, + "ttm": { + "type": "string" + } + }, + "required": ["tv", "p", "e"], + "additionalProperties": false + }, + "minItems": 1 +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 new file mode 100644 index 000000000..18e936687 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 @@ -0,0 +1,44 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for referer-parser customization enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "referer_parser", + "format": "jsonschema", + "version": "2-0-0" + }, + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "internalDomains": { + "type": "array", + "items": { + "type": "string" + } + }, + "database": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["internalDomains", "database", "uri"], + "additionalProperties": false + } + }, + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 new file mode 100644 index 000000000..edccfa3dc --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 @@ -0,0 +1,22 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a screen view event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "screen_view", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "id": { + "type": "string" + } + }, + "minProperties": 1, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 new file mode 100644 index 000000000..c64c8c538 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 @@ -0,0 +1,25 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Snowplow unstructured event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "unstruct_event", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 new file mode 100644 index 000000000..a3b0ad550 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 @@ -0,0 +1,21 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a URI redirect through a Snowplow event collector", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "uri_redirect", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "uri": { + "type": "string", + "format": "uri", + "maxLength": 8192 + } + }, + "required": ["uri"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 new file mode 100644 index 000000000..15db59eba --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 @@ -0,0 +1,45 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a StatusGator status change event", + "self": { + "vendor": "com.statusgator", + "name": "status_change", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "serviceName": { + "type": "string", + "maxLength": 128 + }, + "faviconUrl": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "statusPageUrl": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "homePageUrl": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "currentStatus": { + "type": "string", + "maxLength": 128 + }, + "lastStatus": { + "type": "string", + "maxLength": 128 + }, + "occurredAt": { + "type": "string", + "format": "date-time" + } + }, + "additionalProperties": true +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 new file mode 100644 index 000000000..35306fabb --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 @@ -0,0 +1,38 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for Unbounce (https://documentation.unbounce.com/hc/en-us/articles/203510044-Using-a-Webhook#content6)", + "self": { + "vendor": "com.unbounce", + "name": "form_post", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "pageId": { + "description": "The identifier Unbounce uses to uniquely identify your page. Eventually you’ll be able to use this with our API to manipulate and get information about your page. This is a 36-character UUID, for example, 'a2838d98-4cf4-11df-a3fd-00163e372d58'.", + "type": "string", + "maxLength": 36 + }, + "pageName": { + "description": "The name you gave your page, for example 'My Guaranteed to Convert Landing Page'.", + "type": "string" + }, + "variant": { + "description": "This identifies the page variant that the visitor saw when they visited your page, and will be a lower-case letter. The first variant is 'a', the next, 'b', and so on. If you have more than 26 variants the sequence will continue with 'aa', 'ab', etc.", + "type": "string", + "maxLength": 8 + }, + "pageUrl": { + "description": "The URL of the page that contains your form.", + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "data.json": { + "type": "object", + "additionalProperties": true + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 new file mode 100644 index 000000000..b3bc226ee --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 @@ -0,0 +1,231 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a context generated by the YAUAA enrichment after parsing the user agent", + "self": { + "vendor": "nl.basjes", + "name": "yauaa_context", + "format": "jsonschema", + "version": "1-0-3" + }, + "type": "object", + "properties": { + "deviceClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Desktop", "Anonymized", "Unknown", "UNKNOWN", "Mobile", "Tablet", "Phone", "Watch", "Virtual Reality", "eReader", "Set-top box", "TV", "Game Console", "Handheld Game Console", "Voice", "Robot", "Robot Mobile", "Spy", "Hacker", "Augmented Reality", "Robot Imitator"] + }, + "deviceName": { + "description": "Example: Google Nexus 6", + "type": "string", + "maxLength": 100 + }, + "deviceBrand": { + "description": "Example: Google", + "type": "string", + "maxLength": 50 + }, + "deviceCpu": { + "type": "string", + "maxLength": 50 + }, + "deviceCpuBits": { + "type": "string", + "maxLength": 20 + }, + "deviceFirmwareVersion": { + "type": "string", + "maxLength": 100 + }, + "deviceVersion": { + "type": "string", + "maxLength": 100 + }, + "operatingSystemClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Desktop", "Mobile", "Cloud", "Embedded", "Game Console", "Hacker", "Anonymized", "Unknown"] + }, + "operatingSystemName": { + "description": "Examples: Linux, Android.", + "type": "string", + "maxLength": 100 + }, + "operatingSystemVersion": { + "type": "string", + "maxLength": 50 + }, + "operatingSystemNameVersion": { + "type": "string", + "maxLength": 150 + }, + "operatingSystemVersionBuild": { + "type": "string", + "maxLength": 100 + }, + "layoutEngineClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Browser", "Mobile App", "Hacker", "Robot", "Unknown", "Special", "Cloud", "eReader"] + }, + "layoutEngineName": { + "type": "string", + "maxLength": 100 + }, + "layoutEngineVersion": { + "type": "string", + "maxLength": 50 + }, + "layoutEngineVersionMajor": { + "type": "string", + "maxLength": 20 + }, + "layoutEngineNameVersion": { + "type": "string", + "maxLength": 150 + }, + "layoutEngineNameVersionMajor": { + "type": "string", + "maxLength": 120 + }, + "layoutEngineBuild": { + "type": "string", + "maxLength": 100 + }, + "agentClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Browser", "Browser Webview", "Mobile App", "Robot", "Robot Mobile", "Cloud Application", "Email Client", "Voice", "Special", "Testclient", "Hacker", "Unknown", "Desktop App", "eReader"] + }, + "agentName": { + "description": "Example: Chrome.", + "type": "string", + "maxLength": 100 + }, + "agentVersion": { + "type": "string", + "maxLength": 100 + }, + "agentVersionMajor": { + "type": "string", + "maxLength": 100 + }, + "agentNameVersion": { + "type": "string", + "maxLength": 200 + }, + "agentNameVersionMajor": { + "type": "string", + "maxLength": 120 + }, + "agentBuild": { + "type": "string", + "maxLength": 100 + }, + "agentLanguage": { + "type": "string", + "maxLength": 50 + }, + "agentLanguageCode": { + "type": "string", + "maxLength": 20 + }, + "agentInformationEmail": { + "type": "string", + "format": "email" + }, + "agentInformationUrl": { + "type": "string" + }, + "agentSecurity": { + "type": "string", + "enum": ["Weak security", "Strong security", "Unknown", "Hacker", "No security"] + }, + "agentUuid": { + "type": "string" + }, + "webviewAppName": { + "type": "string" + }, + "webviewAppVersion": { + "type": "string" + }, + "webviewAppVersionMajor": { + "type": "string", + "maxLength": 50 + }, + "webviewAppNameVersionMajor": { + "type": "string", + "maxLength": 50 + }, + "facebookCarrier": { + "type": "string" + }, + "facebookDeviceClass": { + "type": "string", + "maxLength": 1024 + }, + "facebookDeviceName": { + "type": "string", + "maxLength": 1024 + }, + "facebookDeviceVersion": { + "type": "string" + }, + "facebookFBOP": { + "type": "string" + }, + "facebookFBSS": { + "type": "string" + }, + "facebookOperatingSystemName": { + "type": "string" + }, + "facebookOperatingSystemVersion": { + "type": "string" + }, + "anonymized": { + "type": "string" + }, + "hackerAttackVector": { + "type": "string" + }, + "hackerToolkit": { + "type": "string" + }, + "koboAffiliate": { + "type": "string" + }, + "koboPlatformId": { + "type": "string" + }, + "iECompatibilityVersion": { + "type": "string", + "maxLength": 100 + }, + "iECompatibilityVersionMajor": { + "type": "string", + "maxLength": 50 + }, + "iECompatibilityNameVersion": { + "type": "string", + "maxLength": 50 + }, + "iECompatibilityNameVersionMajor": { + "type": "string", + "maxLength": 70 + }, + "carrier": { + "type": "string" + }, + "gSAInstallationID": { + "type": "string" + }, + "networkType": { + "type": "string" + }, + "operatingSystemNameVersionMajor": { + "type": "string" + }, + "operatingSystemVersionMajor": { + "type": "string" + } + }, + "required": ["deviceClass"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala index ad9028d44..0e5a2e457 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala @@ -70,7 +70,7 @@ class EnrichSpec extends Specification with CatsIO with ScalaCheck { ) Enrich - .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor)( + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor, true)( EnrichSpec.payload ) .map(normalizeResult) @@ -85,7 +85,7 @@ class EnrichSpec extends Specification with CatsIO with ScalaCheck { prop { (collectorPayload: CollectorPayload) => val payload = collectorPayload.toRaw Enrich - .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor)(payload) + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor, true)(payload) .map(normalizeResult) .map { case List(Validated.Valid(e)) => e.event must beSome("page_view") diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala index 893c61661..5b7f7b1cd 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala @@ -48,7 +48,7 @@ import com.snowplowanalytics.snowplow.enrich.common.fs2.EnrichSpec object BlackBoxTesting extends Specification with CatsIO { val igluClient: Client[IO, Json] = - Client[IO, Json](Resolver(List(Registry.IgluCentral), None), CirceValidator) + Client[IO, Json](Resolver(List(Registry.EmbeddedRegistry), None), CirceValidator) private val serializer: TSerializer = new TSerializer() @@ -90,7 +90,7 @@ object BlackBoxTesting extends Specification with CatsIO { enrichmentConfig: Option[Json] = None ) = Enrich - .enrichWith(getEnrichmentRegistry(enrichmentConfig), igluClient, None, EnrichSpec.processor)( + .enrichWith(getEnrichmentRegistry(enrichmentConfig), igluClient, None, EnrichSpec.processor, true)( input ) .map { diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala index eaf46972d..139d19450 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala @@ -35,7 +35,7 @@ class Tp2AdapterSpec extends Specification with CatsIO { contentType = "application/json".some ) Enrich - .enrichWith(TestEnvironment.enrichmentReg.pure[IO], BlackBoxTesting.igluClient, None, EnrichSpec.processor)( + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], BlackBoxTesting.igluClient, None, EnrichSpec.processor, true)( input ) .map { diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala index 441080a8a..aa2262884 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala @@ -47,7 +47,7 @@ class CurrencyConversionEnrichmentTransactionItemSpec extends Specification with "ti_sku" -> "PBZ1001", "ti_quantity" -> "2", "ti_category" -> "APPAREL", - "ti_price" -> "2000", + "ti_price" -> "2000.0", "ti_price_base" -> "2240.45", "ti_name" -> "Blue t-shirt", "collector_tstamp" -> "2019-07-01 19:23:03.000" diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala index 60aab5233..fc3076395 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala @@ -44,15 +44,15 @@ class CurrencyConversionEnrichmentTransactionpec extends Specification with Cats "base_currency" -> "EUR", "tr_currency" -> "USD", "tr_affiliation" -> "pb", - "tr_total" -> "8000", + "tr_total" -> "8000.0", "tr_total_base" -> "7087.49", - "tr_tax" -> "200", + "tr_tax" -> "200.0", "tr_tax_base" -> "177.19", - "tr_shipping" -> "50", - "tr_shipping_base" -> "44.30", + "tr_shipping" -> "50.0", + "tr_shipping_base" -> "44.3", "tr_orderid" -> "order-123", "tr_state" -> "England", - "txn_id" -> "028288", + "txn_id" -> "28288", "tr_country" -> "UK", "tr_city" -> "London", "collector_tstamp" -> "2019-07-01 19:23:03.000" diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala index b1b53752b..4e616f7c2 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala @@ -40,7 +40,7 @@ class TransactionItemSpec extends Specification with CatsIO { "ti_quantity" -> "2", "ti_currency" -> "", "ti_category" -> "APPAREL", - "ti_price" -> "2000", + "ti_price" -> "2000.0", "ti_price_base" -> "" ) BlackBoxTesting.runTest(input, expected) diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala index 2625b919f..114e55a4b 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala @@ -38,15 +38,15 @@ class TransactionSpec extends Specification with CatsIO { "event_version" -> "1-0-0", "event" -> "transaction", "tr_affiliation" -> "pb", - "tr_total" -> "8000", + "tr_total" -> "8000.0", "tr_total_base" -> "", - "tr_tax" -> "200", + "tr_tax" -> "200.0", "tr_tax_base" -> "", - "tr_shipping" -> "50", + "tr_shipping" -> "50.0", "tr_shipping_base" -> "", "tr_orderid" -> "order-123", "tr_state" -> "England", - "txn_id" -> "028288", + "txn_id" -> "28288", "tr_country" -> "UK", "tr_city" -> "London", "contexts" -> json"""{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"http://snowplowanalytics.com/"}}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""".noSpaces diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala index 14440226b..7c2236f58 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala @@ -64,6 +64,9 @@ class ConfigFileSpec extends Specification with CatsIO { Some("665bhft5u6udjf"), Some("enrich-kinesis-ce"), Some("1.0.0") + ), + io.FeatureFlags( + true ) ) ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beRight(expected)) @@ -158,6 +161,9 @@ class ConfigFileSpec extends Specification with CatsIO { Some("665bhft5u6udjf"), Some("enrich-kinesis-ce"), Some("1.0.0") + ), + io.FeatureFlags( + true ) ) ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beRight(expected)) @@ -208,6 +214,9 @@ class ConfigFileSpec extends Specification with CatsIO { "collectorUri": "collector-g.snowplowanalytics.com", "collectorPort": "443", "secure": true + }, + "featureFlags" : { + "validateEnrichedEvents": true } }""" diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala index cfa620dd1..7d92405ba 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala @@ -65,6 +65,9 @@ class ParsedConfigsSpec extends Specification with CatsIO { Some("665bhft5u6udjf"), Some("enrich-kinesis-ce"), Some("1.0.0") + ), + io.FeatureFlags( + true ) ) diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala deleted file mode 100644 index d2094a868..000000000 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2020-2021 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common.fs2.test - -import io.circe.Json -import io.circe.literal._ - -import com.snowplowanalytics.iglu.core.SelfDescribingSchema -import com.snowplowanalytics.iglu.core.circe.implicits._ - -/** - * In-memory test registry to avoid unnecessary HTTP and FS IO. All schemas used in [[TestEnvironment]] - * Iglu Client - */ -object SchemaRegistry { - val acmeTest: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.acme", - "name": "test", - "format": "jsonschema", - "version": "1-0-1" - }, - "properties": { - "path": { - "properties": { - "id": { - "type": "integer" - } - } - } - } - }""" - - val acmeOutput: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.acme", - "name": "output", - "format": "jsonschema", - "version": "1-0-0" - }, - "properties": { - "output": { - "type": "string" - } - } - }""" - - // Defined on Iglu Central - val unstructEvent: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.snowplowanalytics.snowplow", - "name": "unstruct_event", - "format": "jsonschema", - "version": "1-0-0" - }, - "type": "object", - "properties": { - "schema": { - "type": "string", - "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" - }, - "data": {} - }, - "required": ["schema", "data"], - "additionalProperties": false - }""" - - // Defined on Iglu Central - val contexts: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.snowplowanalytics.snowplow", - "name": "contexts", - "format": "jsonschema", - "version": "1-0-1" - }, - "type": "array", - "items": { - "type": "object", - "properties": { - "schema": { - "type": "string", - "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" - }, - "data": {} - }, - "required": ["schema", "data"], - "additionalProperties": false - } - }""" - - // Defined on Iglu Central - val geolocationContext: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.snowplowanalytics.snowplow", - "name": "geolocation_context", - "format": "jsonschema", - "version": "1-1-0" - }, - "type": "object", - "properties": { - "latitude": { "type": "number", "minimum": -90, "maximum": 90 }, - "longitude": { "type": "number", "minimum": -180, "maximum": 180 }, - "latitudeLongitudeAccuracy": { "type": ["number", "null"] }, - "altitude": { "type": ["number", "null"] }, - "altitudeAccuracy": { "type": ["number", "null"] }, - "bearing": { "type": ["number", "null"] }, - "speed": { "type": ["number", "null"] }, - "timestamp": { "type": ["integer", "null"] } - }, - "required": ["latitude", "longitude"], - "additionalProperties": false - }""" - - // Defined on Iglu Central - val iabAbdRobots: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.iab.snowplow", - "name": "spiders_and_robots", - "format": "jsonschema", - "version": "1-0-0" - }, - "type": "object", - "properties": { - "spiderOrRobot": {"type": "boolean" }, - "category": {"enum": ["SPIDER_OR_ROBOT", "ACTIVE_SPIDER_OR_ROBOT", "INACTIVE_SPIDER_OR_ROBOT", "BROWSER"]}, - "reason": {"enum": ["FAILED_IP_EXCLUDE", "FAILED_UA_INCLUDE", "FAILED_UA_EXCLUDE", "PASSED_ALL"]}, - "primaryImpact": {"enum": ["PAGE_IMPRESSIONS", "AD_IMPRESSIONS", "PAGE_AND_AD_IMPRESSIONS", "UNKNOWN", "NONE"]} - }, - "required": ["spiderOrRobot", "category", "reason", "primaryImpact"], - "additionalProperties": false - }""" - - val yauaaContext: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "nl.basjes", - "name": "yauaa_context", - "format": "jsonschema", - "version": "1-0-3" - }, - "type": "object", - "properties": { - "deviceClass": {"enum":["Desktop","Anonymized","Unknown","UNKNOWN","Mobile","Tablet","Phone","Watch","Virtual Reality","eReader","Set-top box","TV","Game Console","Handheld Game Console","Voice","Robot","Robot Mobile","Spy","Hacker"]}, - "deviceName": {"type":"string","maxLength": 100 }, - "deviceBrand": {"type":"string","maxLength": 50 }, - "deviceCpu": {"type":"string","maxLength": 50 }, - "deviceCpuBits": {"type":"string","maxLength": 20 }, - "deviceFirmwareVersion": {"type":"string","maxLength": 100 }, - "deviceVersion": {"type":"string","maxLength": 100 }, - "operatingSystemClass": {"enum":["Desktop","Mobile","Cloud","Embedded","Game Console","Hacker","Anonymized","Unknown"] }, - "operatingSystemName": {"type":"string","maxLength": 100 }, - "operatingSystemVersion": {"type":"string","maxLength": 50 }, - "operatingSystemNameVersion": {"type":"string","maxLength": 150 }, - "operatingSystemVersionBuild": {"type":"string","maxLength": 100 }, - "layoutEngineClass": {"enum":["Browser", "Mobile App", "Hacker", "Robot", "Unknown"] }, - "layoutEngineName": {"type":"string","maxLength": 100 }, - "layoutEngineVersion": {"type":"string","maxLength": 50 }, - "layoutEngineVersionMajor": {"type":"string","maxLength": 20 }, - "layoutEngineNameVersion": {"type":"string","maxLength": 150 }, - "layoutEngineNameVersionMajor": {"type":"string","maxLength": 120 }, - "layoutEngineBuild": {"type":"string","maxLength": 100 }, - "agentClass": {"enum":["Browser", "Browser Webview", "Mobile App", "Robot", "Robot Mobile", "Cloud Application", "Email Client", "Voice", "Special", "Testclient", "Hacker", "Unknown"] }, - "agentName": {"type":"string","maxLength": 100 }, - "agentVersion": {"type":"string","maxLength": 100 }, - "agentVersionMajor": {"type":"string","maxLength": 100 }, - "agentNameVersion": {"type":"string","maxLength": 200 }, - "agentNameVersionMajor": {"type":"string","maxLength": 120 }, - "agentBuild": {"type":"string","maxLength": 100 }, - "agentLanguage": {"type":"string","maxLength": 50 }, - "agentLanguageCode": {"type":"string","maxLength": 20 }, - "agentInformationEmail": {"type":"string","format": "email" }, - "agentInformationUrl": {"type":"string"}, - "agentSecurity": {"type":"string","enum":["Weak security", "Strong security", "Unknown", "Hacker"] }, - "agentUuid": {"type":"string"}, - "webviewAppName": {"type":"string"}, - "webviewAppVersion": {"type":"string"}, - "webviewAppVersionMajor": {"type":"string","maxLength":50}, - "webviewAppNameVersionMajor": {"type":"string","maxLength":50}, - "facebookCarrier": {"type":"string"}, - "facebookDeviceClass": {"type":"string","maxLength":1024}, - "facebookDeviceName": {"type":"string","maxLength":1024}, - "facebookDeviceVersion": {"type":"string"}, - "facebookFBOP": {"type":"string"}, - "facebookFBSS": {"type":"string"}, - "facebookOperatingSystemName": {"type":"string"}, - "facebookOperatingSystemVersion": {"type":"string"}, - "anonymized": {"type":"string"}, - "hackerAttackVector": {"type":"string"}, - "hackerToolkit": {"type":"string"}, - "koboAffiliate": {"type":"string"}, - "koboPlatformId": {"type":"string"}, - "iECompatibilityVersion": {"type":"string","maxLength":100}, - "iECompatibilityVersionMajor": {"type":"string","maxLength":50}, - "iECompatibilityNameVersion": {"type":"string","maxLength":50}, - "iECompatibilityNameVersionMajor": {"type":"string","maxLength":70}, - "carrier": {"type":"string"}, - "gSAInstallationID": {"type":"string"}, - "networkType": {"type":"string"}, - "operatingSystemNameVersionMajor": {"type":"string"}, - "operatingSystemVersionMajor": {"type":"string"} - }, - "required": ["deviceClass"], - "additionalProperties": false - }""" - - private[test] implicit def jsonToSchema(json: Json): SelfDescribingSchema[Json] = - SelfDescribingSchema.parse(json).getOrElse(throw new IllegalStateException("InMemory SchemaRegistry JSON cannot be parsed as schema")) -} diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala index 0592cfa88..f777d5897 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala @@ -109,19 +109,8 @@ object TestEnvironment extends CatsIO { val ioBlocker: Resource[IO, Blocker] = Blocker[IO] - val embeddedRegistry = - Registry.InMemory( - Registry.Config("snowplow-enrich-pubsub embedded test registry", 1, List("com.acme")), - List( - SchemaRegistry.unstructEvent, - SchemaRegistry.contexts, - SchemaRegistry.geolocationContext, - SchemaRegistry.iabAbdRobots, - SchemaRegistry.yauaaContext, - SchemaRegistry.acmeTest, - SchemaRegistry.acmeOutput - ) - ) + val embeddedRegistry = Registry.EmbeddedRegistry + val igluClient: Client[IO, Json] = Client[IO, Json](Resolver(List(embeddedRegistry), None), CirceValidator) @@ -166,15 +155,20 @@ object TestEnvironment extends CatsIO { EnrichSpec.processor, StreamsSettings(Concurrency(10000, 64), 1024 * 1024), None, - None + None, + true ) _ <- Resource.eval(logger.info("TestEnvironment initialized")) } yield TestEnvironment(environment, counter, goodRef.get, piiRef.get, badRef.get) - def parseBad(bytes: Array[Byte]): BadRow = - parser - .parse(new String(bytes, UTF_8)) - .getOrElse(throw new RuntimeException("Error parsing bad row json")) + def parseBad(bytes: Array[Byte]): BadRow = { + val badRowStr = new String(bytes, UTF_8) + val parsed = + parser + .parse(badRowStr) + .getOrElse(throw new RuntimeException(s"Error parsing bad row json: $badRowStr")) + parsed .as[BadRow] - .getOrElse(throw new RuntimeException("Error decoding bad row")) + .getOrElse(throw new RuntimeException(s"Error decoding bad row: $parsed")) + } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala index 2834007ee..1ad9ebd7b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala @@ -46,6 +46,8 @@ object EtlPipeline { * @param processor The ETL application (Spark/Beam/Stream enrich) and its version * @param etlTstamp The ETL timestamp * @param input The ValidatedMaybeCanonicalInput + * @param validateEnrichedEvent Whether enriched event should be validated according + * to atomic schema * @return the ValidatedMaybeCanonicalOutput. Thanks to flatMap, will include any validation * errors contained within the ValidatedMaybeCanonicalInput */ @@ -55,7 +57,8 @@ object EtlPipeline { client: Client[F, Json], processor: Processor, etlTstamp: DateTime, - input: ValidatedNel[BadRow, Option[CollectorPayload]] + input: ValidatedNel[BadRow, Option[CollectorPayload]], + validateEnrichedEvent: Boolean = false // backward-compatibility ): F[List[Validated[BadRow, EnrichedEvent]]] = input match { case Validated.Valid(Some(payload)) => @@ -70,7 +73,8 @@ object EtlPipeline { client, processor, etlTstamp, - event + event, + validateEnrichedEvent ) .toValidated } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala index 482b031b3..71461e230 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala @@ -16,26 +16,24 @@ package enrichments import java.nio.charset.Charset import java.net.URI import java.time.Instant - import org.joda.time.DateTime - import io.circe.Json - import cats.Monad import cats.data.{EitherT, NonEmptyList, OptionT, ValidatedNel} import cats.effect.Clock import cats.implicits._ +import com.snowplowanalytics.refererparser._ + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.iglu.core.SelfDescribingData +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Payload, Processor} - -import com.snowplowanalytics.refererparser._ +import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailure import adapters.RawEvent import enrichments.{EventEnrichments => EE} @@ -50,12 +48,16 @@ import utils.{IgluUtils, ConversionUtils => CU} object EnrichmentManager { + val atomicSchema: SchemaKey = SchemaKey("com.snowplowanalytics.snowplow", "atomic", "jsonschema", SchemaVer.Full(1, 0, 0)) + /** * Run the enrichment workflow * @param registry Contain configuration for all enrichments to apply * @param client Iglu Client, for schema lookups and validation * @param processor Meta information about processing asset, for bad rows * @param etlTstamp ETL timestamp + * @param validateEnrichedEvent Whether enriched event should be validated according + * to atomic schema * @param raw Canonical input event to enrich * @return Enriched event or bad row if a problem occured */ @@ -64,7 +66,8 @@ object EnrichmentManager { client: Client[F, Json], processor: Processor, etlTstamp: DateTime, - raw: RawEvent + raw: RawEvent, + validateEnrichedEvent: Boolean = false // backward-compatibility ): EitherT[F, BadRow, EnrichedEvent] = for { enriched <- EitherT.fromEither[F](setupEnrichedEvent(raw, etlTstamp, processor)) @@ -92,6 +95,10 @@ object EnrichmentManager { enriched.pii = pii.asString } } + _ <- if (validateEnrichedEvent) + validateEnriched(enriched, raw, processor, client) + else + EitherT.rightT[F, BadRow](Monad[F].unit) } yield enriched /** @@ -497,14 +504,13 @@ object EnrichmentManager { currencyConversion match { case Some(currency) => event.base_currency = currency.baseCurrency.getCode - // Note that stringToMaybeDouble is applied to either-valid-or-null event POJO + // Note that jFloatToDouble is applied to either-valid-or-null event POJO // properties, so we don't expect any of these four vals to be a Failure - val trTax = CU.stringToMaybeDouble("tr_tx", event.tr_tax).toValidatedNel - val tiPrice = CU.stringToMaybeDouble("ti_pr", event.ti_price).toValidatedNel - val trTotal = CU.stringToMaybeDouble("tr_tt", event.tr_total).toValidatedNel - val trShipping = CU.stringToMaybeDouble("tr_sh", event.tr_shipping).toValidatedNel + val trTax = CU.jFloatToDouble("tr_tx", event.tr_tax).toValidatedNel + val tiPrice = CU.jFloatToDouble("ti_pr", event.ti_price).toValidatedNel + val trTotal = CU.jFloatToDouble("tr_tt", event.tr_total).toValidatedNel + val trShipping = CU.jFloatToDouble("tr_sh", event.tr_shipping).toValidatedNel (for { - // better-monadic-for convertedCu <- EitherT( (trTotal, trTax, trShipping, tiPrice) .mapN { @@ -522,12 +528,14 @@ object EnrichmentManager { .sequence .map(_.flatMap(_.toEither)) ) - _ = { - event.tr_total_base = convertedCu._1.orNull - event.tr_tax_base = convertedCu._2.orNull - event.tr_shipping_base = convertedCu._3.orNull - event.ti_price_base = convertedCu._4.orNull - } + trTotalBase <- EitherT.fromEither[F](CU.doubleToJFloat("tr_total_base ", convertedCu._1).leftMap(e => NonEmptyList.one(e))) + _ = trTotalBase.map(t => event.tr_total_base = t) + trTaxBase <- EitherT.fromEither[F](CU.doubleToJFloat("tr_tax_base ", convertedCu._2).leftMap(e => NonEmptyList.one(e))) + _ = trTaxBase.map(t => event.tr_tax_base = t) + trShippingBase <- EitherT.fromEither[F](CU.doubleToJFloat("tr_shipping_base ", convertedCu._3).leftMap(e => NonEmptyList.one(e))) + _ = trShippingBase.map(t => event.tr_shipping_base = t) + tiPriceBase <- EitherT.fromEither[F](CU.doubleToJFloat("ti_price_base ", convertedCu._4).leftMap(e => NonEmptyList.one(e))) + _ = tiPriceBase.map(t => event.ti_price_base = t) } yield ()).value case None => Monad[F].pure(().asRight) } @@ -746,4 +754,50 @@ object EnrichmentManager { Failure.EnrichmentFailures(Instant.now(), fs), Payload.EnrichmentPayload(pee, re) ) + + private def validateEnriched[F[_]: Clock: Monad: RegistryLookup]( + enriched: EnrichedEvent, + raw: RawEvent, + processor: Processor, + client: Client[F, Json] + ): EitherT[F, BadRow, Unit] = + EnrichedEvent + .toAtomic(enriched) + .leftMap(err => + EnrichmentManager.buildEnrichmentFailuresBadRow( + NonEmptyList( + EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.Simple( + "Error during conversion of enriched event to the atomic format" + ) + ), + List(EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.Simple(s"${CU.cleanStackTrace(err)}"))) + ), + EnrichedEvent.toPartiallyEnrichedEvent(enriched), + RawEvent.toRawEvent(raw), + processor + ) + ) + .toEitherT[F] + .flatMap(atomic => + client + .check(SelfDescribingData(atomicSchema, atomic)) + .leftMap(err => + EnrichmentManager.buildEnrichmentFailuresBadRow( + NonEmptyList( + EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.Simple( + s"Enriched event not valid against ${atomicSchema.toSchemaUri}" + ) + ), + List(EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.IgluError(atomicSchema, err))) + ), + EnrichedEvent.toPartiallyEnrichedEvent(enriched), + RawEvent.toRawEvent(raw), + processor + ) + ) + ) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala index 0937ea881..039bbdd4e 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala @@ -51,7 +51,7 @@ object Transform { ("ip", (ME.extractIp, "user_ipaddress")), ("aid", (ME.toTsvSafe, "app_id")), ("p", (ME.extractPlatform, "platform")), - ("tid", (CU.validateInteger, "txn_id")), + ("tid", (CU.stringToJInteger2, "txn_id")), ("uid", (ME.toTsvSafe, "user_id")), ("duid", (ME.toTsvSafe, "domain_userid")), ("nuid", (ME.toTsvSafe, "network_userid")), @@ -94,21 +94,21 @@ object Transform { ("ev_ac", (ME.toTsvSafe, "se_action")), // LEGACY tracker var. Leave for backwards compat ("ev_la", (ME.toTsvSafe, "se_label")), // LEGACY tracker var. Leave for backwards compat ("ev_pr", (ME.toTsvSafe, "se_property")), // LEGACY tracker var. Leave for backwards compat - ("ev_va", (CU.stringToDoubleLike, "se_value")), // LEGACY tracker var. Leave for backwards compat + ("ev_va", (CU.stringToJFloat2, "se_value")), // LEGACY tracker var. Leave for backwards compat ("se_ca", (ME.toTsvSafe, "se_category")), ("se_ac", (ME.toTsvSafe, "se_action")), ("se_la", (ME.toTsvSafe, "se_label")), ("se_pr", (ME.toTsvSafe, "se_property")), - ("se_va", (CU.stringToDoubleLike, "se_value")), + ("se_va", (CU.stringToJFloat2, "se_value")), // Custom unstructured events ("ue_pr", (JU.extractUnencJson, "unstruct_event")), ("ue_px", (JU.extractBase64EncJson, "unstruct_event")), // Ecommerce transactions ("tr_id", (ME.toTsvSafe, "tr_orderid")), ("tr_af", (ME.toTsvSafe, "tr_affiliation")), - ("tr_tt", (CU.stringToDoubleLike, "tr_total")), - ("tr_tx", (CU.stringToDoubleLike, "tr_tax")), - ("tr_sh", (CU.stringToDoubleLike, "tr_shipping")), + ("tr_tt", (CU.stringToJFloat2, "tr_total")), + ("tr_tx", (CU.stringToJFloat2, "tr_tax")), + ("tr_sh", (CU.stringToJFloat2, "tr_shipping")), ("tr_ci", (ME.toTsvSafe, "tr_city")), ("tr_st", (ME.toTsvSafe, "tr_state")), ("tr_co", (ME.toTsvSafe, "tr_country")), @@ -118,7 +118,7 @@ object Transform { ("ti_na", (ME.toTsvSafe, "ti_name")), // ERROR in Tracker Protocol ("ti_nm", (ME.toTsvSafe, "ti_name")), ("ti_ca", (ME.toTsvSafe, "ti_category")), - ("ti_pr", (CU.stringToDoubleLike, "ti_price")), + ("ti_pr", (CU.stringToJFloat2, "ti_price")), ("ti_qu", (CU.stringToJInteger2, "ti_quantity")), // Page pings ("pp_mix", (CU.stringToJInteger2, "pp_xoffset_min")), diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala index 5a4157208..88d1b6ae7 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala @@ -117,7 +117,7 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( initialCurrency: Option[Either[FailureDetails.EnrichmentFailure, CurrencyUnit]], value: Option[Double], tstamp: ZonedDateTime - ): F[Either[FailureDetails.EnrichmentFailure, Option[String]]] = + ): F[Either[FailureDetails.EnrichmentFailure, Option[Double]]] = (initialCurrency, value) match { case (Some(ic), Some(v)) => (for { @@ -131,8 +131,14 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( money.map( _.bimap( l => mkEnrichmentFailure(Right(l)), - r => (r.getAmount().toPlainString()).some - ) + r => + Either.catchNonFatal(r.getAmount().doubleValue) match { + case Left(e) => + Left(mkEnrichmentFailure(Left(e))) + case Right(a) => + Right(a.some) + } + ).flatten ) ) } yield res).value @@ -160,7 +166,7 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( collectorTstamp: Option[DateTime] ): F[ValidatedNel[ FailureDetails.EnrichmentFailure, - (Option[String], Option[String], Option[String], Option[String]) + (Option[Double], Option[Double], Option[Double], Option[Double]) ]] = collectorTstamp match { case Some(tstamp) => diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala index 543a837ba..2fe1d79a0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala @@ -2,7 +2,7 @@ * Copyright (c) 2012-2021 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. + * and you may not use enriched file except in compliance with the Apache License Version 2.0. * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, @@ -15,9 +15,14 @@ package com.snowplowanalytics.snowplow.enrich.common.outputs import java.lang.{Integer => JInteger} import java.lang.{Float => JFloat} import java.lang.{Byte => JByte} +import java.time.format.DateTimeFormatter import scala.beans.BeanProperty +import cats.implicits._ + +import io.circe.Json + import com.snowplowanalytics.snowplow.badrows.Payload.PartiallyEnrichedEvent /** @@ -37,7 +42,7 @@ import com.snowplowanalytics.snowplow.badrows.Payload.PartiallyEnrichedEvent // TODO: make the EnrichedEvent Avro-format, not Redshift-specific class EnrichedEvent extends Serializable { - // The application (site, game, app etc) this event belongs to, and the tracker platform + // The application (site, game, app etc) enriched event belongs to, and the tracker platform @BeanProperty var app_id: String = _ @BeanProperty var platform: String = _ @@ -46,10 +51,10 @@ class EnrichedEvent extends Serializable { @BeanProperty var collector_tstamp: String = _ @BeanProperty var dvce_created_tstamp: String = _ - // Transaction (i.e. this logging event) + // Transaction (i.e. enriched logging event) @BeanProperty var event: String = _ @BeanProperty var event_id: String = _ - @BeanProperty var txn_id: String = _ + @BeanProperty var txn_id: JInteger = _ // Versioning @BeanProperty var name_tracker: String = _ @@ -121,8 +126,7 @@ class EnrichedEvent extends Serializable { @BeanProperty var se_action: String = _ @BeanProperty var se_label: String = _ @BeanProperty var se_property: String = _ - @BeanProperty var se_value: String = - _ // Technically should be a Double but may be rendered incorrectly by Cascading with scientific notification (which Redshift can't process) + @BeanProperty var se_value: JFloat = _ // Unstructured Event @BeanProperty var unstruct_event: String = _ @@ -130,9 +134,9 @@ class EnrichedEvent extends Serializable { // Ecommerce transaction (from querystring) @BeanProperty var tr_orderid: String = _ @BeanProperty var tr_affiliation: String = _ - @BeanProperty var tr_total: String = _ - @BeanProperty var tr_tax: String = _ - @BeanProperty var tr_shipping: String = _ + @BeanProperty var tr_total: JFloat = _ + @BeanProperty var tr_tax: JFloat = _ + @BeanProperty var tr_shipping: JFloat = _ @BeanProperty var tr_city: String = _ @BeanProperty var tr_state: String = _ @BeanProperty var tr_country: String = _ @@ -142,7 +146,7 @@ class EnrichedEvent extends Serializable { @BeanProperty var ti_sku: String = _ @BeanProperty var ti_name: String = _ @BeanProperty var ti_category: String = _ - @BeanProperty var ti_price: String = _ + @BeanProperty var ti_price: JFloat = _ @BeanProperty var ti_quantity: JInteger = _ // Page Pings @@ -199,11 +203,11 @@ class EnrichedEvent extends Serializable { // Currency @BeanProperty var tr_currency: String = _ - @BeanProperty var tr_total_base: String = _ - @BeanProperty var tr_tax_base: String = _ - @BeanProperty var tr_shipping_base: String = _ + @BeanProperty var tr_total_base: JFloat = _ + @BeanProperty var tr_tax_base: JFloat = _ + @BeanProperty var tr_shipping_base: JFloat = _ @BeanProperty var ti_currency: String = _ - @BeanProperty var ti_price_base: String = _ + @BeanProperty var ti_price_base: JFloat = _ @BeanProperty var base_currency: String = _ // Geolocation @@ -249,6 +253,162 @@ class EnrichedEvent extends Serializable { } object EnrichedEvent { + + private val JsonSchemaDateTimeFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS") + + private def toKv[T]( + k: String, + v: T, + f: T => Json + ): Option[(String, Json)] = + Option(v).map(value => (k, f(value))) + + private def toKv(k: String, s: String): Option[(String, Json)] = toKv(k, s, Json.fromString) + private def toKv(k: String, i: JInteger): Option[(String, Json)] = toKv(k, i, (jInt: JInteger) => Json.fromInt(jInt)) + private def toKv(k: String, f: JFloat): Option[(String, Json)] = toKv(k, f, (jFloat: JFloat) => Json.fromFloatOrNull(jFloat)) + private def toKv(k: String, b: JByte): Option[(String, Json)] = toKv(k, b, (jByte: JByte) => Json.fromBoolean(jByte != 0)) + private def toDateKv(k: String, s: String): Option[(String, Json)] = + toKv( + k, + s, + (s: String) => Json.fromString(DateTimeFormatter.ISO_DATE_TIME.format(JsonSchemaDateTimeFormat.parse(s))) + ) + + def toAtomic(enriched: EnrichedEvent): Either[Throwable, Json] = + Either.catchNonFatal( + Json.fromFields( + toKv("app_id", enriched.app_id) ++ + toKv("platform", enriched.platform) ++ + toDateKv("etl_tstamp", enriched.etl_tstamp) ++ + toDateKv("collector_tstamp", enriched.collector_tstamp) ++ + toDateKv("dvce_created_tstamp", enriched.dvce_created_tstamp) ++ + toKv("event", enriched.event) ++ + toKv("event_id", enriched.event_id) ++ + toKv("txn_id", enriched.txn_id) ++ + toKv("name_tracker", enriched.name_tracker) ++ + toKv("v_tracker", enriched.v_tracker) ++ + toKv("v_collector", enriched.v_collector) ++ + toKv("v_etl", enriched.v_etl) ++ + toKv("user_id", enriched.user_id) ++ + toKv("user_ipaddress", enriched.user_ipaddress) ++ + toKv("user_fingerprint", enriched.user_fingerprint) ++ + toKv("domain_userid", enriched.domain_userid) ++ + toKv("domain_sessionidx", enriched.domain_sessionidx) ++ + toKv("network_userid", enriched.network_userid) ++ + toKv("geo_country", enriched.geo_country) ++ + toKv("geo_region", enriched.geo_region) ++ + toKv("geo_city", enriched.geo_city) ++ + toKv("geo_zipcode", enriched.geo_zipcode) ++ + toKv("geo_latitude", enriched.geo_latitude) ++ + toKv("geo_longitude", enriched.geo_longitude) ++ + toKv("geo_region_name", enriched.geo_region_name) ++ + toKv("ip_isp", enriched.ip_isp) ++ + toKv("ip_organization", enriched.ip_organization) ++ + toKv("ip_domain", enriched.ip_domain) ++ + toKv("ip_netspeed", enriched.ip_netspeed) ++ + toKv("page_url", enriched.page_url) ++ + toKv("page_title", enriched.page_title) ++ + toKv("page_referrer", enriched.page_referrer) ++ + toKv("page_urlscheme", enriched.page_urlscheme) ++ + toKv("page_urlhost", enriched.page_urlhost) ++ + toKv("page_urlport", enriched.page_urlport) ++ + toKv("page_urlpath", enriched.page_urlpath) ++ + toKv("page_urlquery", enriched.page_urlquery) ++ + toKv("page_urlfragment", enriched.page_urlfragment) ++ + toKv("refr_urlscheme", enriched.refr_urlscheme) ++ + toKv("refr_urlhost", enriched.refr_urlhost) ++ + toKv("refr_urlport", enriched.refr_urlport) ++ + toKv("refr_urlpath", enriched.refr_urlpath) ++ + toKv("refr_urlquery", enriched.refr_urlquery) ++ + toKv("refr_urlfragment", enriched.refr_urlfragment) ++ + toKv("refr_medium", enriched.refr_medium) ++ + toKv("refr_source", enriched.refr_source) ++ + toKv("refr_term", enriched.refr_term) ++ + toKv("mkt_medium", enriched.mkt_medium) ++ + toKv("mkt_source", enriched.mkt_source) ++ + toKv("mkt_term", enriched.mkt_term) ++ + toKv("mkt_content", enriched.mkt_content) ++ + toKv("mkt_campaign", enriched.mkt_campaign) ++ + toKv("se_category", enriched.se_category) ++ + toKv("se_action", enriched.se_action) ++ + toKv("se_label", enriched.se_label) ++ + toKv("se_property", enriched.se_property) ++ + toKv("se_value", enriched.se_value) ++ + toKv("tr_orderid", enriched.tr_orderid) ++ + toKv("tr_affiliation", enriched.tr_affiliation) ++ + toKv("tr_total", enriched.tr_total) ++ + toKv("tr_tax", enriched.tr_tax) ++ + toKv("tr_shipping", enriched.tr_shipping) ++ + toKv("tr_city", enriched.tr_city) ++ + toKv("tr_state", enriched.tr_state) ++ + toKv("tr_country", enriched.tr_country) ++ + toKv("ti_orderid", enriched.ti_orderid) ++ + toKv("ti_sku", enriched.ti_sku) ++ + toKv("ti_name", enriched.ti_name) ++ + toKv("ti_category", enriched.ti_category) ++ + toKv("ti_price", enriched.ti_price) ++ + toKv("ti_quantity", enriched.ti_quantity) ++ + toKv("pp_xoffset_min", enriched.pp_xoffset_min) ++ + toKv("pp_xoffset_max", enriched.pp_xoffset_max) ++ + toKv("pp_yoffset_min", enriched.pp_yoffset_min) ++ + toKv("pp_yoffset_max", enriched.pp_yoffset_max) ++ + toKv("useragent", enriched.useragent) ++ + toKv("br_name", enriched.br_name) ++ + toKv("br_family", enriched.br_family) ++ + toKv("br_version", enriched.br_version) ++ + toKv("br_type", enriched.br_type) ++ + toKv("br_renderengine", enriched.br_renderengine) ++ + toKv("br_lang", enriched.br_lang) ++ + toKv("br_features_pdf", enriched.br_features_pdf) ++ + toKv("br_features_flash", enriched.br_features_flash) ++ + toKv("br_features_java", enriched.br_features_java) ++ + toKv("br_features_director", enriched.br_features_director) ++ + toKv("br_features_quicktime", enriched.br_features_quicktime) ++ + toKv("br_features_realplayer", enriched.br_features_realplayer) ++ + toKv("br_features_windowsmedia", enriched.br_features_windowsmedia) ++ + toKv("br_features_gears", enriched.br_features_gears) ++ + toKv("br_features_silverlight", enriched.br_features_silverlight) ++ + toKv("br_cookies", enriched.br_cookies) ++ + toKv("br_colordepth", enriched.br_colordepth) ++ + toKv("br_viewwidth", enriched.br_viewwidth) ++ + toKv("br_viewheight", enriched.br_viewheight) ++ + toKv("os_name", enriched.os_name) ++ + toKv("os_family", enriched.os_family) ++ + toKv("os_manufacturer", enriched.os_manufacturer) ++ + toKv("os_timezone", enriched.os_timezone) ++ + toKv("dvce_type", enriched.dvce_type) ++ + toKv("dvce_ismobile", enriched.dvce_ismobile) ++ + toKv("dvce_screenwidth", enriched.dvce_screenwidth) ++ + toKv("dvce_screenheight", enriched.dvce_screenheight) ++ + toKv("doc_charset", enriched.doc_charset) ++ + toKv("doc_width", enriched.doc_width) ++ + toKv("doc_height", enriched.doc_height) ++ + toKv("tr_currency", enriched.tr_currency) ++ + toKv("tr_total_base", enriched.tr_total_base) ++ + toKv("tr_tax_base", enriched.tr_tax_base) ++ + toKv("tr_shipping_base", enriched.tr_shipping_base) ++ + toKv("ti_currency", enriched.ti_currency) ++ + toKv("ti_price_base", enriched.ti_price_base) ++ + toKv("base_currency", enriched.base_currency) ++ + toKv("geo_timezone", enriched.geo_timezone) ++ + toKv("mkt_clickid", enriched.mkt_clickid) ++ + toKv("mkt_network", enriched.mkt_network) ++ + toKv("etl_tags", enriched.etl_tags) ++ + toDateKv("dvce_sent_tstamp", enriched.dvce_sent_tstamp) ++ + toKv("refr_domain_userid", enriched.refr_domain_userid) ++ + toDateKv("refr_dvce_tstamp", enriched.refr_dvce_tstamp) ++ + toKv("domain_sessionid", enriched.domain_sessionid) ++ + toDateKv("derived_tstamp", enriched.derived_tstamp) ++ + toKv("event_vendor", enriched.event_vendor) ++ + toKv("event_name", enriched.event_name) ++ + toKv("event_format", enriched.event_format) ++ + toKv("event_version", enriched.event_version) ++ + toKv("event_fingerprint", enriched.event_fingerprint) ++ + toDateKv("true_tstamp", enriched.true_tstamp) + ) + ) + def toPartiallyEnrichedEvent(enrichedEvent: EnrichedEvent): PartiallyEnrichedEvent = PartiallyEnrichedEvent( app_id = Option(enrichedEvent.app_id), @@ -258,7 +418,7 @@ object EnrichedEvent { dvce_created_tstamp = Option(enrichedEvent.dvce_created_tstamp), event = Option(enrichedEvent.event), event_id = Option(enrichedEvent.event_id), - txn_id = Option(enrichedEvent.txn_id), + txn_id = Option(enrichedEvent.txn_id).map(_.toString), name_tracker = Option(enrichedEvent.name_tracker), v_tracker = Option(enrichedEvent.v_tracker), v_collector = Option(enrichedEvent.v_collector), @@ -308,13 +468,13 @@ object EnrichedEvent { se_action = Option(enrichedEvent.se_action), se_label = Option(enrichedEvent.se_label), se_property = Option(enrichedEvent.se_property), - se_value = Option(enrichedEvent.se_value), + se_value = Option(enrichedEvent.se_value).map(_.toString), unstruct_event = Option(enrichedEvent.unstruct_event), tr_orderid = Option(enrichedEvent.tr_orderid), tr_affiliation = Option(enrichedEvent.tr_affiliation), - tr_total = Option(enrichedEvent.tr_total), - tr_tax = Option(enrichedEvent.tr_tax), - tr_shipping = Option(enrichedEvent.tr_shipping), + tr_total = Option(enrichedEvent.tr_total).map(_.toString), + tr_tax = Option(enrichedEvent.tr_tax).map(_.toString), + tr_shipping = Option(enrichedEvent.tr_shipping).map(_.toString), tr_city = Option(enrichedEvent.tr_city), tr_state = Option(enrichedEvent.tr_state), tr_country = Option(enrichedEvent.tr_country), @@ -322,7 +482,7 @@ object EnrichedEvent { ti_sku = Option(enrichedEvent.ti_sku), ti_name = Option(enrichedEvent.ti_name), ti_category = Option(enrichedEvent.ti_category), - ti_price = Option(enrichedEvent.ti_price), + ti_price = Option(enrichedEvent.ti_price).map(_.toString), ti_quantity = Option(enrichedEvent.ti_quantity).map(Integer2int), pp_xoffset_min = Option(enrichedEvent.pp_xoffset_min).map(Integer2int), pp_xoffset_max = Option(enrichedEvent.pp_xoffset_max).map(Integer2int), @@ -360,11 +520,11 @@ object EnrichedEvent { doc_width = Option(enrichedEvent.doc_width).map(Integer2int), doc_height = Option(enrichedEvent.doc_height).map(Integer2int), tr_currency = Option(enrichedEvent.tr_currency), - tr_total_base = Option(enrichedEvent.tr_total_base), - tr_tax_base = Option(enrichedEvent.tr_tax_base), - tr_shipping_base = Option(enrichedEvent.tr_shipping_base), + tr_total_base = Option(enrichedEvent.tr_total_base).map(_.toString), + tr_tax_base = Option(enrichedEvent.tr_tax_base).map(_.toString), + tr_shipping_base = Option(enrichedEvent.tr_shipping_base).map(_.toString), ti_currency = Option(enrichedEvent.ti_currency), - ti_price_base = Option(enrichedEvent.ti_price_base), + ti_price_base = Option(enrichedEvent.ti_price_base).map(_.toString), base_currency = Option(enrichedEvent.base_currency), geo_timezone = Option(enrichedEvent.geo_timezone), mkt_clickid = Option(enrichedEvent.mkt_clickid), diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala index 5f7ec84a6..fc9be8673 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala @@ -13,13 +13,14 @@ package com.snowplowanalytics.snowplow.enrich.common package utils -import java.lang.{Byte => JByte, Integer => JInteger} +import java.lang.{Byte => JByte, Float => JFloat, Integer => JInteger} import java.lang.reflect.Field import java.math.{BigDecimal => JBigDecimal} import java.net.{InetAddress, URI, URLDecoder, URLEncoder} import java.nio.charset.Charset import java.nio.charset.StandardCharsets.UTF_8 import java.util.UUID +import java.io.{PrintWriter, StringWriter} import scala.collection.JavaConverters._ import scala.util.Try @@ -340,6 +341,25 @@ object ConversionUtils { FailureDetails.EnrichmentFailure(None, f) } + val stringToJFloat: String => Either[String, JFloat] = str => + if (Option(str).isEmpty) + null.asInstanceOf[JFloat].asRight + else + Either + .catchNonFatal(JFloat.valueOf(str)) + .leftMap(e => s"cannot be converted to java.lang.Float. Error : ${e.getMessage}") + + val stringToJFloat2: (String, String) => Either[FailureDetails.EnrichmentFailure, JFloat] = + (field, str) => + stringToJFloat(str).leftMap { e => + val f = FailureDetails.EnrichmentFailureMessage.InputData( + field, + Option(str), + e + ) + FailureDetails.EnrichmentFailure(None, f) + } + /** * Convert a String to a String containing a Redshift-compatible Double. * Necessary because Redshift does not support all Java Double syntaxes e.g. "3.4028235E38" @@ -398,6 +418,40 @@ object ConversionUtils { ) ) + /** Convert a java Float a Double */ + def jFloatToDouble(field: String, f: JFloat): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + Either + .catchNonFatal { + Option(f).map(_.toDouble) + } + .leftMap(_ => + FailureDetails.EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.InputData( + field, + Option(f).map(_.toString), + "cannot be converted to Double" + ) + ) + ) + + /** Convert a Double to a java Float */ + def doubleToJFloat(field: String, d: Option[Double]): Either[FailureDetails.EnrichmentFailure, Option[JFloat]] = + Either + .catchNonFatal { + d.map(dd => JFloat.valueOf(dd.toFloat)) + } + .leftMap(_ => + FailureDetails.EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.InputData( + field, + d.map(_.toString), + "cannot be converted to java Float" + ) + ) + ) + /** * Converts a String to a Double with two decimal places. Used to honor schemas with * multipleOf 0.01. @@ -537,4 +591,9 @@ object ConversionUtils { } .mkString("\t") + def cleanStackTrace(t: Throwable): String = { + val sw = new StringWriter + t.printStackTrace(new PrintWriter(sw)) + sw.toString + } } diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 new file mode 100644 index 000000000..0b7ff5790 --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 @@ -0,0 +1,489 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an atomic canonical Snowplow event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "atomic", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "app_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "platform": { + "type": ["string", "null"], + "maxLength": 255 + }, + "etl_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "collector_tstamp": { + "type": "string", + "format": "date-time" + }, + "dvce_created_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_id": { + "type": "string", + "maxLength": 36 + }, + "txn_id": { + "type": ["integer", "null"] + }, + "name_tracker": { + "type": ["string", "null"], + "maxLength": 128 + }, + "v_tracker": { + "type": ["string", "null"], + "maxLength": 100 + }, + "v_collector": { + "type": "string", + "maxLength": 100 + }, + "v_etl": { + "type": "string", + "maxLength": 100 + }, + "user_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "user_ipaddress": { + "type": ["string", "null"], + "maxLength": 128 + }, + "user_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_sessionidx": { + "type": ["integer", "null"] + }, + "network_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "geo_country": { + "type": ["string", "null"], + "maxLength": 2 + }, + "geo_region": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_city": { + "type": ["string", "null"], + "maxLength": 75 + }, + "geo_zipcode": { + "type": ["string", "null"], + "maxLength": 15 + }, + "geo_latitude": { + "type": ["number", "null"] + }, + "geo_longitude": { + "type": ["number", "null"] + }, + "geo_region_name": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_isp": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_organization": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_domain": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_netspeed": { + "type": ["string", "null"], + "maxLength": 100 + }, + "page_url": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_title": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "page_referrer": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "page_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "page_urlport": { + "type": ["integer", "null"] + }, + "page_urlpath": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "page_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "page_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "refr_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "refr_urlport": { + "type": ["integer", "null"] + }, + "refr_urlpath": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_medium": { + "type": ["string", "null"], + "maxLength": 25 + }, + "refr_source": { + "type": ["string", "null"], + "maxLength": 50 + }, + "refr_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_medium": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_source": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_content": { + "type": ["string", "null"], + "maxLength": 500 + }, + "mkt_campaign": { + "type": ["string", "null"], + "maxLength": 255 + }, + "se_category": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_action": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_label": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "se_property": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_value": { + "type": ["number", "null"] + }, + "tr_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_affiliation": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_total": { + "type": ["number", "null"] + }, + "tr_tax": { + "type": ["number", "null"] + }, + "tr_shipping": { + "type": ["number", "null"] + }, + "tr_city": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_state": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_country": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_sku": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_name": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_category": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_price": { + "type": ["number", "null"] + }, + "ti_quantity": { + "type": ["integer", "null"] + }, + "pp_xoffset_min": { + "type": ["integer", "null"] + }, + "pp_xoffset_max": { + "type": ["integer", "null"] + }, + "pp_yoffset_min": { + "type": ["integer", "null"] + }, + "pp_yoffset_max": { + "type": ["integer", "null"] + }, + "useragent": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "br_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_version": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_renderengine": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_lang": { + "type": ["string", "null"], + "maxLength": 255 + }, + "br_features_pdf": { + "type": ["boolean", "null"] + }, + "br_features_flash": { + "type": ["boolean", "null"] + }, + "br_features_java": { + "type": ["boolean", "null"] + }, + "br_features_director": { + "type": ["boolean", "null"] + }, + "br_features_quicktime": { + "type": ["boolean", "null"] + }, + "br_features_realplayer": { + "type": ["boolean", "null"] + }, + "br_features_windowsmedia": { + "type": ["boolean", "null"] + }, + "br_features_gears": { + "type": ["boolean", "null"] + }, + "br_features_silverlight": { + "type": ["boolean", "null"] + }, + "br_cookies": { + "type": ["boolean", "null"] + }, + "br_colordepth": { + "type": ["string", "null"], + "maxLength": 12 + }, + "br_viewwidth": { + "type": ["integer", "null"] + }, + "br_viewheight": { + "type": ["integer", "null"] + }, + "os_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_manufacturer": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_timezone": { + "type": ["string", "null"], + "maxLength": 255 + }, + "dvce_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "dvce_ismobile": { + "type": ["boolean", "null"] + }, + "dvce_screenwidth": { + "type": ["integer", "null"] + }, + "dvce_screenheight": { + "type": ["integer", "null"] + }, + "doc_charset": { + "type": ["string", "null"], + "maxLength": 128 + }, + "doc_width": { + "type": ["integer", "null"] + }, + "doc_height": { + "type": ["integer", "null"] + }, + "tr_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "tr_total_base": { + "type": ["number", "null"] + }, + "tr_tax_base": { + "type": ["number", "null"] + }, + "tr_shipping_base": { + "type": ["number", "null"] + }, + "ti_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "ti_price_base": { + "type": ["number", "null"] + }, + "base_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_timezone": { + "type": ["string", "null"], + "maxLength": 64 + }, + "mkt_clickid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "mkt_network": { + "type": ["string", "null"], + "maxLength": 64 + }, + "etl_tags": { + "type": ["string", "null"], + "maxLength": 500 + }, + "dvce_sent_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "refr_domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "refr_dvce_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "domain_sessionid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "derived_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event_vendor": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_name": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_format": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_version": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "true_tstamp": { + "type": ["string", "null"], + "format": "date-time" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala index afd3f47a6..0709cf0e9 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala @@ -64,7 +64,8 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - Some(collectorPayloadBatched).validNel + Some(collectorPayloadBatched).validNel, + true ) output must be like { case a :: b :: c :: d :: Nil => @@ -84,7 +85,8 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - Some(collectorPayload).validNel + Some(collectorPayload).validNel, + true ) ) must beValid.like { case Validated.Valid(_: EnrichedEvent) :: Nil => ok @@ -100,7 +102,8 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - invalidCollectorPayload + invalidCollectorPayload, + true ) must be like { case Validated.Invalid(_: BadRow.CPFormatViolation) :: Nil => ok case other => ko(s"One invalid CPFormatViolation expected, got ${other}") @@ -115,7 +118,8 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - collectorPayload.validNel[BadRow] + collectorPayload.validNel[BadRow], + true ) must beEqualTo(Nil) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index 6cb2b608b..92f688d92 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -20,6 +20,7 @@ import cats.data.NonEmptyList import io.circe.literal._ import org.joda.time.DateTime import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailureMessage import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} import loaders._ import adapters.RawEvent @@ -36,7 +37,6 @@ import enrichments.registry.{IabEnrichment, JavascriptScriptEnrichment, YauaaEnr import org.apache.commons.codec.digest.DigestUtils import org.specs2.mutable.Specification import org.specs2.matcher.EitherMatchers - import SpecHelpers._ class EnrichmentManagerSpec extends Specification with EitherMatchers { @@ -69,7 +69,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beLeft.like { @@ -102,7 +103,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beLeft.like { case _: BadRow.SchemaViolations => ok @@ -144,7 +146,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beLeft.like { case BadRow.EnrichmentFailures( @@ -207,7 +210,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beLeft.like { case BadRow.EnrichmentFailures( @@ -266,7 +270,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beRight } @@ -326,7 +331,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beRight } @@ -386,7 +392,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beRight } @@ -446,7 +453,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beLeft } @@ -507,7 +515,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beLeft } @@ -574,7 +583,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value must beLeft } @@ -594,7 +604,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value.map(_.useragent) must beRight(qs_ua) enriched.value.map(_.derived_contexts) must beRight((_: String).contains("\"agentName\":\"Firefox\"")) @@ -613,7 +624,8 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { client, processor, timestamp, - rawEvent + rawEvent, + true ) enriched.value.map(_.useragent) must beRight("header-useragent") } @@ -686,6 +698,41 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getCollectorVersionSet(input) must beRight(()) } } + + "validateEnriched" should { + "create a bad row if a field is oversized (tv)" >> { + EnrichmentManager + .enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + RawEvent(api, fatBody, None, source, context), + true + ) + .swap + .map { + case BadRow.EnrichmentFailures(_, failure, _) => + failure.messages.map(_.message match { + case EnrichmentFailureMessage.Simple(error) => error + case EnrichmentFailureMessage.IgluError(schemaKey, _) => schemaKey + case _ => None + }) + case _ => None + } + .getOrElse(None) === NonEmptyList( + s"Enriched event not valid against ${EnrichmentManager.atomicSchema.toSchemaUri}", + List(EnrichmentManager.atomicSchema) + ) + } + + "allow normal raw events" >> { + EnrichmentManager + .enrichEvent(enrichmentReg, client, processor, timestamp, RawEvent(api, leanBody, None, source, context), true) + .map(_ => true) + .getOrElse(false) must beTrue + } + } } object EnrichmentManagerSpec { @@ -706,6 +753,18 @@ object EnrichmentManagerSpec { None ) + val leanBody = Map( + "e" -> "pp", + "tv" -> "js-0.13.1", + "p" -> "web" + ).toOpt + + val fatBody = Map( + "e" -> "pp", + "tv" -> s"${"s" * 500}", + "p" -> "web" + ).toOpt + val iabEnrichment = IabEnrichment .parse( json"""{ @@ -739,4 +798,5 @@ object EnrichmentManagerSpec { .getOrElse(throw new RuntimeException("IAB enrichment couldn't be initialised")) // to make sure it's not none .enrichment[Id] .some + } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala index 90b13e5b6..fb34fcac8 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala @@ -28,22 +28,284 @@ import org.joda.money.CurrencyUnit import org.joda.time.DateTime import org.specs2.Specification -import org.specs2.matcher.DataTables -object CurrencyConversionEnrichmentSpec { - val OerApiKey = "OER_KEY" -} +class CurrencyConversionEnrichmentSpec extends Specification { + import CurrencyConversionEnrichmentSpec._ -/** Tests the convertCurrencies function */ -import CurrencyConversionEnrichmentSpec._ -class CurrencyConversionEnrichmentSpec extends Specification with DataTables { def is = skipAllIf(sys.env.get(OerApiKey).isEmpty) ^ s2""" - Failure test for Currency Conversion $e1 - Success test for Currency Conversion $e2 + Failure for invalid transaction currency $e1 + Failure for invalid transaction item currency $e2 + Failure for invalid OER API key $e3 + Success for all fields absent $e4 + Success for all fields absent except currency $e5 + Success for no transaction currency, tax, or shipping $e6 + Success for no transaction currency or total $e7 + Success for no transaction currency $e8 + Success for transaction item null $e9 + Success for valid app id and API key $e10 + Success for both currencies null $e11 + Success for converting to the same currency $e12 + Success for valid app id and API key $e13 """ + def e1 = { + val input = + Input( + Some("RUP"), + Some(11.00), + Some(1.17), + Some(0.00), + None, + Some(17.99), + Some(coTstamp) + ) + val expected: Result = Validated.Invalid( + NonEmptyList.of( + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") + ), + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") + ), + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") + ) + ) + ) + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e2 = { + val input = + Input( + None, + Some(12.00), + Some(0.7), + Some(0.00), + Some("HUL"), + Some(1.99), + Some(coTstamp) + ) + val expected: Result = ef( + FailureDetails.EnrichmentFailureMessage.InputData( + "ti_currency", + Some("HUL"), + "Unknown currency 'HUL'" + ) + ).invalidNel + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e3 = { + val input = + Input( + None, + Some(13.00), + Some(3.67), + Some(0.00), + Some("GBP"), + Some(2.99), + Some(coTstamp) + ) + val wrongKey = "8A8A8A8A8A8A8A8A8A8A8A8AA8A8A8A8" + val expected: Result = ef( + FailureDetails.EnrichmentFailureMessage.Simple( + "Open Exchange Rates error, type: [OtherErrors], message: [invalid_app_id]" + ) + ).invalidNel + val actual = runEnrichment(input, wrongKey) + actual must beEqualTo(expected) + } + + def e4 = { + val input = + Input( + None, + None, + None, + None, + None, + None, + None + ) + val expected: Result = + ef( + FailureDetails.EnrichmentFailureMessage.InputData( + "collector_tstamp", + None, + "missing" + ) + ).invalidNel + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e5 = { + val input = + Input( + Some("GBP"), + None, + None, + None, + Some("GBP"), + None, + None + ) + val expected: Result = + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("collector_tstamp", None, "missing") + ).invalidNel + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e6 = { + val input = + Input( + Some("GBP"), + Some(11.00), + None, + None, + None, + None, + Some(coTstamp) + ) + val expected: Result = (Some(12.75), None, None, None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e7 = { + val input = + Input( + Some("GBP"), + None, + Some(2.67), + Some(0.00), + None, + None, + Some(coTstamp) + ) + val expected: Result = (None, Some(3.09), Some(0.00), None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e8 = { + val input = + Input( + None, + None, + None, + None, + Some("GBP"), + Some(12.99), + Some(coTstamp) + ) + val expected: Result = (None, None, None, Some(15.05)).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e9 = { + val input = + Input( + Some("GBP"), + Some(11.00), + Some(2.67), + Some(0.00), + None, + None, + Some(coTstamp) + ) + val expected: Result = (Some(12.75), Some(3.09), Some(0.00), None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e10 = { + val input = + Input( + None, + Some(14.00), + Some(4.67), + Some(0.00), + Some("GBP"), + Some(10.99), + Some(coTstamp) + ) + val expected: Result = + (None, None, None, Some(12.74)).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e11 = { + val input = + Input( + None, + Some(11.00), + Some(2.67), + Some(0.00), + None, + Some(12.99), + Some(coTstamp) + ) + val expected: Result = (None, None, None, None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e12 = { + val input = + Input( + Some("EUR"), + Some(11.00), + Some(2.67), + Some(0.00), + Some("EUR"), + Some(12.99), + Some(coTstamp) + ) + val expected: Result = + ( + Some(11.00), + Some(2.67), + Some(0.00), + Some(12.99) + ).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e13 = { + val input = + Input( + Some("GBP"), + Some(16.00), + Some(2.67), + Some(0.00), + None, + Some(10.00), + Some(coTstamp) + ) + val expected: Result = (Some(18.54), Some(3.09), Some(0.00), None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } +} + +object CurrencyConversionEnrichmentSpec { + val OerApiKey = "OER_KEY" + lazy val validAppKey = sys.env .getOrElse(OerApiKey, throw new IllegalStateException( @@ -52,7 +314,7 @@ class CurrencyConversionEnrichmentSpec extends Specification with DataTables { ) type Result = ValidatedNel[ FailureDetails.EnrichmentFailure, - (Option[String], Option[String], Option[String], Option[String]) + (Option[Double], Option[Double], Option[Double], Option[Double]) ] val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) val ef: FailureDetails.EnrichmentFailureMessage => FailureDetails.EnrichmentFailure = m => @@ -60,141 +322,33 @@ class CurrencyConversionEnrichmentSpec extends Specification with DataTables { FailureDetails.EnrichmentInformation(schemaKey, "currency-conversion").some, m ) - val currencyInvalidRup: Result = Validated.Invalid( - NonEmptyList.of( - ef( - FailureDetails.EnrichmentFailureMessage - .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") - ), - ef( - FailureDetails.EnrichmentFailureMessage - .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") - ), - ef( - FailureDetails.EnrichmentFailureMessage - .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") - ) - ) - ) - val currencyInvalidHul: Result = ef( - FailureDetails.EnrichmentFailureMessage.InputData( - "ti_currency", - Some("HUL"), - "Unknown currency 'HUL'" - ) - ).invalidNel - val invalidAppKeyFailure: Result = ef( - FailureDetails.EnrichmentFailureMessage.Simple( - "Open Exchange Rates error, type: [OtherErrors], message: [invalid_app_id]" - ) - ).invalidNel val coTstamp: DateTime = new DateTime(2011, 3, 13, 0, 0) - def e1 = - "SPEC NAME" || "TRANSACTION CURRENCY" | "API KEY" | "TOTAL AMOUNT" | "TOTAL TAX" | "SHIPPING" | "TRANSACTION ITEM CURRENCY" | "TRANSACTION ITEM PRICE" | "DATETIME" | "CONVERTED TUPLE" | - "Invalid transaction currency" !! Some("RUP") ! validAppKey ! Some(11.00) ! Some(1.17) ! Some( - 0.00 - ) ! None ! Some(17.99) ! Some(coTstamp) ! currencyInvalidRup | - "Invalid transaction item currency" !! None ! validAppKey ! Some(12.00) ! Some(0.7) ! Some( - 0.00 - ) ! Some("HUL") ! Some(1.99) ! Some(coTstamp) ! currencyInvalidHul | - "Invalid OER API key" !! None ! "8A8A8A8A8A8A8A8A8A8A8A8AA8A8A8A8" ! Some(13.00) ! Some(3.67) ! Some( - 0.00 - ) ! Some("GBP") ! Some(2.99) ! Some(coTstamp) ! invalidAppKeyFailure |> { - ( - _, - trCurrency, - apiKey, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime, - expected - ) => - (for { - e <- CurrencyConversionConf(schemaKey, DeveloperAccount, apiKey, CurrencyUnit.EUR) - .enrichment[Id] - res <- e.convertCurrencies( - trCurrency, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime - ) - } yield res) must_== expected - } - - def e2 = - "SPEC NAME" || "TRANSACTION CURRENCY" | "API KEY" | "TOTAL AMOUNT" | "TOTAL TAX" | "SHIPPING" | "TRANSACTION ITEM CURRENCY" | "TRANSACTION ITEM PRICE" | "DATETIME" | "CONVERTED TUPLE" | - "All fields absent" !! None ! validAppKey ! None ! None ! None ! None ! None ! None ! ef( - FailureDetails.EnrichmentFailureMessage.InputData( - "collector_tstamp", - None, - "missing" - ) - ).invalidNel | - "All fields absent except currency" !! Some("GBP") ! validAppKey ! None ! None ! None ! Some( - "GBP" - ) ! None ! None ! ef( - FailureDetails.EnrichmentFailureMessage - .InputData("collector_tstamp", None, "missing") - ).invalidNel | - "No transaction currency, tax, or shipping" !! Some("GBP") ! validAppKey ! Some(11.00) ! None ! None ! None ! None ! Some( - coTstamp - ) ! (Some("12.75"), None, None, None).valid | - "No transaction currency or total" !! Some("GBP") ! validAppKey ! None ! Some(2.67) ! Some( - 0.00 - ) ! None ! None ! Some(coTstamp) ! (None, Some("3.09"), Some("0.00"), None).valid | - "No transaction currency" !! None ! validAppKey ! None ! None ! None ! Some("GBP") ! Some( - 12.99 - ) ! Some(coTstamp) ! (None, None, None, Some("15.05")).valid | - "Transaction Item Null" !! Some("GBP") ! validAppKey ! Some(11.00) ! Some(2.67) ! Some(0.00) ! None ! None ! Some( - coTstamp - ) ! (Some("12.75"), Some("3.09"), Some("0.00"), None).valid | - "Valid APP ID and API key" !! None ! validAppKey ! Some(14.00) ! Some(4.67) ! Some(0.00) ! Some( - "GBP" - ) ! Some(10.99) ! Some(coTstamp) ! (None, None, None, Some("12.74")).valid | - "Both Currency Null" !! None ! validAppKey ! Some(11.00) ! Some(2.67) ! Some(0.00) ! None ! Some( - 12.99 - ) ! Some(coTstamp) ! (None, None, None, None).valid | - "Convert to the same currency" !! Some("EUR") ! validAppKey ! Some(11.00) ! Some(2.67) ! Some( - 0.00 - ) ! Some("EUR") ! Some(12.99) ! Some(coTstamp) ! ( - Some("11.00"), - Some("2.67"), - Some("0.00"), - Some("12.99") - ).valid | - "Valid APP ID and API key" !! Some("GBP") ! validAppKey ! Some(16.00) ! Some(2.67) ! Some( - 0.00 - ) ! None ! Some(10.00) ! Some(coTstamp) ! (Some("18.54"), Some("3.09"), Some("0.00"), None).valid |> { - ( - _, - trCurrency, - apiKey, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime, - expected - ) => - (for { - e <- CurrencyConversionConf(schemaKey, DeveloperAccount, apiKey, CurrencyUnit.EUR).enrichment[Id] - res <- e.convertCurrencies( - trCurrency, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime - ) - } yield res) must_== expected - } + case class Input( + trCurrency: Option[String], + trTotal: Option[Double], + trTax: Option[Double], + trShipping: Option[Double], + tiCurrency: Option[String], + tiPrice: Option[Double], + collectorTstamp: Option[DateTime] + ) + + def runEnrichment( + input: Input, + apiKey: String = validAppKey + ) = + for { + e <- CurrencyConversionConf(schemaKey, DeveloperAccount, apiKey, CurrencyUnit.EUR) + .enrichment[Id] + res <- e.convertCurrencies( + input.trCurrency, + input.trTotal, + input.trTax, + input.trShipping, + input.tiCurrency, + input.tiPrice, + input.collectorTstamp + ) + } yield res } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 7bec98d2d..b71351eb0 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -174,7 +174,8 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher client, Processor("spark", "0.0.0"), new DateTime(1500000000L), - input + input, + true ) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala index a3d337e4f..6254d5488 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala @@ -47,7 +47,7 @@ class EnrichedEventSpec extends Specification { testField(_.collector_tstamp = "etl_tstamp", _.collector_tstamp) testField(_.event = "event", _.event) testField(_.event_id = "event_id", _.event_id) - testField(_.txn_id = "txn_id", _.txn_id) + testField(_.txn_id = JInteger.valueOf(0), _.txn_id) testField(_.name_tracker = "name_tracker", _.name_tracker) testField(_.v_tracker = "v_tracker", _.v_tracker) testField(_.v_collector = "v_collector", _.v_collector) @@ -97,13 +97,13 @@ class EnrichedEventSpec extends Specification { testField(_.se_action = "se_action", _.se_action) testField(_.se_label = "se_label", _.se_label) testField(_.se_property = "se_property", _.se_property) - testField(_.se_value = "se_value", _.se_value) + testField(_.se_value = JFloat.valueOf("0.0"), _.se_value) testField(_.unstruct_event = "unstruct_event", _.unstruct_event) testField(_.tr_orderid = "tr_orderid", _.tr_orderid) testField(_.tr_affiliation = "tr_affiliation", _.tr_affiliation) - testField(_.tr_total = "tr_total", _.tr_total) - testField(_.tr_tax = "tr_tax", _.tr_tax) - testField(_.tr_shipping = "tr_shipping", _.tr_shipping) + testField(_.tr_total = JFloat.valueOf("0.0"), _.tr_total) + testField(_.tr_tax = JFloat.valueOf("0.0"), _.tr_tax) + testField(_.tr_shipping = JFloat.valueOf("0.0"), _.tr_shipping) testField(_.tr_city = "tr_city", _.tr_city) testField(_.tr_state = "tr_state", _.tr_state) testField(_.tr_country = "tr_country", _.tr_country) @@ -111,7 +111,7 @@ class EnrichedEventSpec extends Specification { testField(_.ti_sku = "ti_sku", _.ti_sku) testField(_.ti_name = "ti_name", _.ti_name) testField(_.ti_category = "ti_category", _.ti_category) - testField(_.ti_price = "ti_price", _.ti_price) + testField(_.ti_price = JFloat.valueOf("0.0"), _.ti_price) testField(_.ti_quantity = JInteger.valueOf(0), _.ti_quantity) testField(_.pp_xoffset_min = JInteger.valueOf(0), _.pp_xoffset_min) testField(_.pp_xoffset_max = JInteger.valueOf(0), _.pp_xoffset_max) @@ -149,11 +149,11 @@ class EnrichedEventSpec extends Specification { testField(_.doc_width = JInteger.valueOf(0), _.doc_width) testField(_.doc_height = JInteger.valueOf(0), _.doc_height) testField(_.tr_currency = "tr_currency", _.tr_currency) - testField(_.tr_total_base = "tr_total_base", _.tr_total_base) - testField(_.tr_tax_base = "tr_tax_base", _.tr_tax_base) - testField(_.tr_shipping_base = "tr_shipping_base", _.tr_shipping_base) + testField(_.tr_total_base = JFloat.valueOf("0.0"), _.tr_total_base) + testField(_.tr_tax_base = JFloat.valueOf("0.0"), _.tr_tax_base) + testField(_.tr_shipping_base = JFloat.valueOf("0.0"), _.tr_shipping_base) testField(_.ti_currency = "ti_currency", _.ti_currency) - testField(_.ti_price_base = "ti_price_base", _.ti_price_base) + testField(_.ti_price_base = JFloat.valueOf("0.0"), _.ti_price_base) testField(_.base_currency = "base_currency", _.base_currency) testField(_.geo_timezone = "geo_timezone", _.geo_timezone) testField(_.mkt_clickid = "mkt_clickid", _.mkt_clickid) diff --git a/modules/kinesis/src/main/resources/application.conf b/modules/kinesis/src/main/resources/application.conf index 3d8228cb2..44ed9b377 100644 --- a/modules/kinesis/src/main/resources/application.conf +++ b/modules/kinesis/src/main/resources/application.conf @@ -74,4 +74,8 @@ "collectorPort": 443 "secure": true } + + "featureFlags" : { + "validateEnrichedEvents": true + } } diff --git a/modules/pubsub/src/main/resources/application.conf b/modules/pubsub/src/main/resources/application.conf index 05df53957..f2fe0d1af 100644 --- a/modules/pubsub/src/main/resources/application.conf +++ b/modules/pubsub/src/main/resources/application.conf @@ -43,4 +43,8 @@ "collectorPort": 443 "secure": true } + + "featureFlags" : { + "validateEnrichedEvents": true + } } diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala index d52242209..62cfd57ad 100644 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala +++ b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala @@ -98,7 +98,7 @@ object TransactionItemSpec { "1002", "Red shoes", "", - "4000", + "4000.0", "1", "", "", diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala index a2e7c5e36..3f73b6ee8 100644 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala +++ b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala @@ -88,7 +88,7 @@ object TransactionSpec { "", "order-123", "", - "8000", + "8000.0", "", "", "",