From 2873de8a0aaf6268dee401e2e7423890f7c2b8be Mon Sep 17 00:00:00 2001 From: "pavel.voropaev" Date: Fri, 3 Dec 2021 16:31:46 +0000 Subject: [PATCH] Validate enriched event against atomic schema before emitting (close #517) --- config/config.file.extended.hocon | 16 + config/config.kinesis.extended.hocon | 16 + config/config.pubsub.extended.hocon | 16 + .../EnrichBench.scala | 45 +- .../EtlPipelineBench.scala | 4 +- .../ThriftLoaderBench.scala | 4 +- .../snowplow/enrich/common/fs2/Enrich.scala | 19 +- .../enrich/common/fs2/Environment.scala | 11 +- .../snowplow/enrich/common/fs2/Run.scala | 6 +- .../enrich/common/fs2/config/ConfigFile.scala | 12 +- .../enrich/common/fs2/config/io.scala | 11 + .../enrich/common/fs2/io/Metrics.scala | 22 +- .../enrich/common/fs2/io/StatsDReporter.scala | 3 +- .../schemas/com.acme/output/jsonschema/1-0-0 | 14 + .../schemas/com.acme/test/jsonschema/1-0-1 | 18 + .../call_complete/jsonschema/1-0-2 | 143 +++++ .../hit/jsonschema/1-0-0 | 30 + .../page_view/jsonschema/1-0-0 | 31 + .../contact_creation/jsonschema/1-0-0 | 64 ++ .../spiders_and_robots/jsonschema/1-0-0 | 32 + .../com.mailchimp/subscribe/jsonschema/1-0-0 | 67 ++ .../message_delivered/jsonschema/1-0-0 | 56 ++ .../message_sent/jsonschema/1-0-0 | 111 ++++ .../com.marketo/event/jsonschema/2-0-0 | 601 ++++++++++++++++++ .../com.olark/transcript/jsonschema/1-0-0 | 211 ++++++ .../com.pagerduty/incident/jsonschema/1-0-0 | 207 ++++++ .../incident_assign/jsonschema/1-0-0 | 30 + .../com.sendgrid/processed/jsonschema/2-0-0 | 89 +++ .../signup_form_submitted/jsonschema/1-0-0 | 30 + .../jsonschema/1-0-0 | 167 +++++ .../jsonschema/1-0-0 | 84 +++ .../pii_enrichment_config/jsonschema/2-0-0 | 164 +++++ .../jsonschema/1-0-0 | 211 ++++++ .../yauaa_enrichment_config/jsonschema/1-0-0 | 38 ++ .../segment_webhook_config/jsonschema/1-0-0 | 37 ++ .../atomic/jsonschema/1-0-0 | 489 ++++++++++++++ .../campaign_attribution/jsonschema/1-0-1 | 74 +++ .../contexts/jsonschema/1-0-0 | 32 + .../contexts/jsonschema/1-0-1 | 30 + .../jsonschema/1-0-0 | 46 ++ .../enrichments/jsonschema/1-0-0 | 30 + .../geolocation_context/jsonschema/1-0-0 | 41 ++ .../geolocation_context/jsonschema/1-1-0 | 44 ++ .../ip_lookups/jsonschema/2-0-0 | 90 +++ .../mobile_context/jsonschema/1-0-0 | 43 ++ .../payload_data/jsonschema/1-0-0 | 246 +++++++ .../payload_data/jsonschema/1-0-4 | 261 ++++++++ .../referer_parser/jsonschema/2-0-0 | 44 ++ .../screen_view/jsonschema/1-0-0 | 22 + .../unstruct_event/jsonschema/1-0-0 | 25 + .../uri_redirect/jsonschema/1-0-0 | 21 + .../status_change/jsonschema/1-0-0 | 45 ++ .../com.unbounce/form_post/jsonschema/1-0-0 | 38 ++ .../nl.basjes/yauaa_context/jsonschema/1-0-3 | 231 +++++++ .../enrich/common/fs2/EnrichSpec.scala | 6 +- .../common/fs2/blackbox/BlackBoxTesting.scala | 4 +- .../blackbox/adapters/Tp2AdapterSpec.scala | 2 +- ...versionEnrichmentTransactionItemSpec.scala | 2 +- ...yConversionEnrichmentTransactionSpec.scala | 10 +- .../blackbox/misc/TransactionItemSpec.scala | 2 +- .../fs2/blackbox/misc/TransactionSpec.scala | 8 +- .../common/fs2/config/ConfigFileSpec.scala | 9 + .../common/fs2/config/ParsedConfigsSpec.scala | 3 + .../common/fs2/io/StatsDReporterSpec.scala | 10 +- .../enrich/common/fs2/test/Counter.scala | 8 +- .../common/fs2/test/SchemaRegistry.scala | 231 ------- .../common/fs2/test/TestEnvironment.scala | 32 +- .../common/EtlPipeline.scala | 12 +- .../enrichments/EnrichmentManager.scala | 118 +++- .../common/enrichments/Transform.scala | 14 +- .../CurrencyConversionEnrichment.scala | 14 +- .../common/outputs/EnrichedEvent.scala | 208 +++++- .../common/utils/ConversionUtils.scala | 61 +- .../atomic/jsonschema/1-0-0 | 489 ++++++++++++++ .../AcceptInvalid.scala | 21 + .../EtlPipelineSpec.scala | 16 +- .../enrichments/EnrichmentManagerSpec.scala | 153 +++-- .../CurrencyConversionEnrichmentSpec.scala | 444 ++++++++----- .../pii/PiiPseudonymizerEnrichmentSpec.scala | 11 +- .../outputs/EnrichedEventSpec.scala | 20 +- .../src/main/resources/application.conf | 4 + .../src/main/resources/application.conf | 4 + .../sources/Source.scala | 6 +- .../good/TransactionItemSpec.scala | 2 +- .../good/TransactionSpec.scala | 2 +- 85 files changed, 5836 insertions(+), 562 deletions(-) create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 create mode 100644 modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 delete mode 100644 modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala create mode 100644 modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 create mode 100644 modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala diff --git a/config/config.file.extended.hocon b/config/config.file.extended.hocon index 227029a11..fbf678c0f 100644 --- a/config/config.file.extended.hocon +++ b/config/config.file.extended.hocon @@ -125,4 +125,20 @@ # Version of the terraform module that deployed the app moduleVersion = 1.0.0 } + + # Optional. To activate/deactive enrich features that are still in beta + # or that are here for transition. + # This section might change in future versions + "featureFlags" : { + + # Enrich 3.0.0 introduces the validation of the enriched events against atomic schema + # before emitting. + # If set to false, a bad row will be emitted instead of the enriched event + # if validation fails. + # If set to true, invalid enriched events will be emitted, as before. + # WARNING: this feature flag will be removed in a future version + # and it will become impossible to emit invalid enriched events. + # More details: https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 + "acceptInvalid": false + } } diff --git a/config/config.kinesis.extended.hocon b/config/config.kinesis.extended.hocon index 499a78e21..94eb7c3df 100644 --- a/config/config.kinesis.extended.hocon +++ b/config/config.kinesis.extended.hocon @@ -389,4 +389,20 @@ # Version of the terraform module that deployed the app moduleVersion = 1.0.0 } + + # Optional. To activate/deactive enrich features that are still in beta + # or that are here for transition. + # This section might change in future versions + "featureFlags" : { + + # Enrich 3.0.0 introduces the validation of the enriched events against atomic schema + # before emitting. + # If set to false, a bad row will be emitted instead of the enriched event + # if validation fails. + # If set to true, invalid enriched events will be emitted, as before. + # WARNING: this feature flag will be removed in a future version + # and it will become impossible to emit invalid enriched events. + # More details: https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 + "acceptInvalid": false + } } diff --git a/config/config.pubsub.extended.hocon b/config/config.pubsub.extended.hocon index 340b0c5e3..c1e50ee40 100644 --- a/config/config.pubsub.extended.hocon +++ b/config/config.pubsub.extended.hocon @@ -188,4 +188,20 @@ # Version of the terraform module that deployed the app moduleVersion = 1.0.0 } + + # Optional. To activate/deactive enrich features that are still in beta + # or that are here for transition. + # This section might change in future versions + "featureFlags" : { + + # Enrich 3.0.0 introduces the validation of the enriched events against atomic schema + # before emitting. + # If set to false, a bad row will be emitted instead of the enriched event + # if validation fails. + # If set to true, invalid enriched events will be emitted, as before. + # WARNING: this feature flag will be removed in a future version + # and it will become impossible to emit invalid enriched events. + # More details: https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 + "acceptInvalid": false + } } diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala index b2d4ada4b..ebe8c0c1b 100644 --- a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala @@ -18,6 +18,8 @@ import java.util.concurrent.TimeUnit import cats.effect.{ContextShift, IO, Clock, Blocker} +import io.circe.literal._ + import fs2.Stream import com.snowplowanalytics.iglu.client.Client @@ -43,10 +45,41 @@ class EnrichBench { implicit val ioClock: Clock[IO] = Clock.create[IO] + val client = Client.parseDefault[IO](json""" + { + "schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1", + "data": { + "cacheSize": 500, + "repositories": [ + { + "name": "Iglu Central", + "priority": 0, + "vendorPrefixes": [ "com.snowplowanalytics" ], + "connection": { + "http": { + "uri": "http://iglucentral.com" + } + } + }, + { + "name": "Iglu Central - GCP Mirror", + "priority": 1, + "vendorPrefixes": [ "com.snowplowanalytics" ], + "connection": { + "http": { + "uri": "http://mirror01.iglucentral.com" + } + } + } + ] + } + } + """).rethrowT.unsafeRunSync() + @Benchmark def measureEnrichWithMinimalPayload(state: EnrichBench.BenchState) = { implicit val CS: ContextShift[IO] = state.contextShift - Enrich.enrichWith[IO](IO.pure(EnrichmentRegistry()), state.blocker, Client.IgluCentral, None, (_: Option[Long]) => IO.unit)(state.raw).unsafeRunSync() + Enrich.enrichWith[IO](IO.pure(EnrichmentRegistry()), client, None, (_: Option[Long]) => IO.unit)(state.raw).unsafeRunSync() } @Benchmark @@ -83,19 +116,19 @@ object EnrichBench { raw = EnrichSpec.payload[IO] val input = Stream.emits(List( - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.40") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.41") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.42") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.43") :: EnrichSpec.querystring ), - EnrichSpec.colllectorPayload.copy( + EnrichSpec.collectorPayload.copy( querystring = new BasicNameValuePair("ip", "125.12.2.44") :: EnrichSpec.querystring ), )).repeatN(10).map(cp => Payload(cp.toRaw, IO.unit)).covary[IO] diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala index ff2aa55e5..e61f594fe 100644 --- a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala @@ -49,13 +49,13 @@ class EtlPipelineBench { @Benchmark def measureProcessEventsIO(state: EtlPipelineBench.BenchState) = { - val payload = EnrichSpec.colllectorPayload + val payload = EnrichSpec.collectorPayload EtlPipeline.processEvents[IO](state.adapterRegistry, state.enrichmentRegistryIo, Client.IgluCentral, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))).unsafeRunSync() } @Benchmark def measureProcessEventsId(state: EtlPipelineBench.BenchState) = { - val payload = EnrichSpec.colllectorPayload + val payload = EnrichSpec.collectorPayload EtlPipeline.processEvents[Id](state.adapterRegistry, state.enrichmentRegistryId, state.clientId, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))) } } diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala index 736402125..5193248fc 100644 --- a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala @@ -30,7 +30,7 @@ class ThriftLoaderBench { @Benchmark def measureNormalize(state: ThriftLoaderBench.BenchState) = { - Enrich.encodeEvent(state.event) + Enrich.serializeEnriched(state.event) } } @@ -42,7 +42,7 @@ object ThriftLoaderBench { @Setup(Level.Trial) def setup(): Unit = { - data = EnrichSpec.colllectorPayload.toRaw + data = EnrichSpec.collectorPayload.toRaw event = new EnrichedEvent() event.setApp_id("foo") diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala index 6ca6e2aea..f3a552cf5 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Enrich.scala @@ -72,7 +72,7 @@ object Enrich { val registry: F[EnrichmentRegistry[F]] = env.enrichments.get.map(_.registry) val enrich: Enrich[F] = { implicit val rl: RegistryLookup[F] = env.registryLookup - enrichWith[F](registry, env.igluClient, env.sentry, env.processor) + enrichWith[F](registry, env.igluClient, env.sentry, env.processor, env.acceptInvalid, env.metrics.invalidCount) } val enriched = @@ -101,7 +101,9 @@ object Enrich { enrichRegistry: F[EnrichmentRegistry[F]], igluClient: Client[F, Json], sentry: Option[SentryClient], - processor: Processor + processor: Processor, + acceptInvalid: Boolean, + invalidCount: F[Unit] )( row: Array[Byte] ): F[Result] = { @@ -113,7 +115,16 @@ object Enrich { _ <- Logger[F].debug(payloadToString(payload)) etlTstamp <- Clock[F].realTime(TimeUnit.MILLISECONDS).map(millis => new DateTime(millis)) registry <- enrichRegistry - enriched <- EtlPipeline.processEvents[F](adapterRegistry, registry, igluClient, processor, etlTstamp, payload) + enriched <- EtlPipeline.processEvents[F]( + adapterRegistry, + registry, + igluClient, + processor, + etlTstamp, + payload, + acceptInvalid, + invalidCount + ) } yield (enriched, collectorTstamp) result.handleErrorWith(sendToSentry[F](row, sentry, processor, collectorTstamp)) @@ -153,7 +164,7 @@ object Enrich { ): BadRow.GenericError = { val base64 = new String(Base64.getEncoder.encode(row)) val rawPayload = BadRowPayload.RawPayload(base64) - val failure = Failure.GenericFailure(time, NonEmptyList.one(error.toString)) + val failure = Failure.GenericFailure(time, NonEmptyList.one(ConversionUtils.cleanStackTrace(error))) BadRow.GenericError(processor, failure, rawPayload) } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala index 682590412..dea076101 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Environment.scala @@ -77,6 +77,8 @@ import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.Input.Kinesis * @param streamsSettings parameters used to configure the streams * @param region region in the cloud where enrich runs * @param cloud cloud where enrich runs (AWS or GCP) + * @param acceptInvalid Whether enriched event not valid against atomic schema should be + * emitted. If false they will be emitted as bad rows. * @tparam A type emitted by the source (e.g. `ConsumerRecord` for PubSub). * getPayload must be defined for this type, as well as checkpointing */ @@ -103,7 +105,8 @@ final case class Environment[F[_], A]( processor: Processor, streamsSettings: Environment.StreamsSettings, region: Option[String], - cloud: Option[Telemetry.Cloud] + cloud: Option[Telemetry.Cloud], + acceptInvalid: Boolean ) object Environment { @@ -147,7 +150,8 @@ object Environment { processor: Processor, maxRecordSize: Int, cloud: Option[Telemetry.Cloud], - getRegion: => Option[String] + getRegion: => Option[String], + acceptInvalid: Boolean ): Resource[F, Environment[F, A]] = { val file = parsedConfigs.configFile for { @@ -189,7 +193,8 @@ object Environment { processor, StreamsSettings(file.concurrency, maxRecordSize), getRegionFromConfig(file).orElse(getRegion), - cloud + cloud, + acceptInvalid ) } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala index 853403091..d846fb1e5 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/Run.scala @@ -97,7 +97,8 @@ object Run { processor, maxRecordSize, cloud, - getRegion + getRegion, + file.featureFlags.acceptInvalid ) runEnvironment[F, Array[Byte]](env) case _ => @@ -116,7 +117,8 @@ object Run { processor, maxRecordSize, cloud, - getRegion + getRegion, + file.featureFlags.acceptInvalid ) runEnvironment[F, A](env) } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala index 04fe30307..644b11ae9 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFile.scala @@ -27,7 +27,7 @@ import pureconfig.ConfigSource import pureconfig.module.catseffect.syntax._ import pureconfig.module.circe._ -import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{Concurrency, Input, Monitoring, Output, Outputs, Telemetry} +import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{Concurrency, FeatureFlags, Input, Monitoring, Output, Outputs, Telemetry} /** * Parsed HOCON configuration file @@ -37,6 +37,7 @@ import com.snowplowanalytics.snowplow.enrich.common.fs2.config.io.{Concurrency, * @param assetsUpdatePeriod time after which assets should be updated, in minutes * @param monitoring configuration for sentry and metrics * @param telemetry configuration for telemetry + * @param featureFlags to activate/deactivate enrich features */ final case class ConfigFile( input: Input, @@ -44,7 +45,8 @@ final case class ConfigFile( concurrency: Concurrency, assetsUpdatePeriod: Option[FiniteDuration], monitoring: Option[Monitoring], - telemetry: Telemetry + telemetry: Telemetry, + featureFlags: FeatureFlags ) object ConfigFile { @@ -55,13 +57,13 @@ object ConfigFile { implicit val configFileDecoder: Decoder[ConfigFile] = deriveConfiguredDecoder[ConfigFile].emap { - case ConfigFile(_, _, _, Some(aup), _, _) if aup._1 <= 0L => + case ConfigFile(_, _, _, Some(aup), _, _, _) if aup._1 <= 0L => "assetsUpdatePeriod in config file cannot be less than 0".asLeft // TODO: use newtype // Remove pii output if streamName and region empty - case c @ ConfigFile(_, Outputs(good, Some(Output.Kinesis(s, _, _, _, _, _, _, _, _, _, _, _, _)), bad), _, _, _, _) if s.isEmpty => + case c @ ConfigFile(_, Outputs(good, Some(Output.Kinesis(s, _, _, _, _, _, _, _, _, _, _, _, _)), bad), _, _, _, _, _) if s.isEmpty => c.copy(output = Outputs(good, None, bad)).asRight // Remove pii output if topic empty - case c @ ConfigFile(_, Outputs(good, Some(Output.PubSub(t, _, _, _, _)), bad), _, _, _, _) if t.isEmpty => + case c @ ConfigFile(_, Outputs(good, Some(Output.PubSub(t, _, _, _, _)), bad), _, _, _, _, _) if t.isEmpty => c.copy(output = Outputs(good, None, bad)).asRight case other => other.asRight } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala index c13a1810d..08fcdc64c 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/io.scala @@ -353,4 +353,15 @@ object io { implicit val telemetryEncoder: Encoder[Telemetry] = deriveConfiguredEncoder[Telemetry] } + + case class FeatureFlags( + acceptInvalid: Boolean + ) + + object FeatureFlags { + implicit val telemetryDecoder: Decoder[FeatureFlags] = + deriveConfiguredDecoder[FeatureFlags] + implicit val telemetryEncoder: Encoder[FeatureFlags] = + deriveConfiguredEncoder[FeatureFlags] + } } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/Metrics.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/Metrics.scala index 6e71d5bdd..9a7313ba1 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/Metrics.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/Metrics.scala @@ -44,6 +44,9 @@ trait Metrics[F[_]] { /** Increment bad events */ def badCount: F[Unit] + + /** Increment invalid enriched events count */ + def invalidCount: F[Unit] } object Metrics { @@ -53,12 +56,14 @@ object Metrics { val RawCounterName = "raw" val GoodCounterName = "good" val BadCounterName = "bad" + val InvalidCounterName = "invalid_enriched" final case class MetricSnapshot( enrichLatency: Option[Long], // milliseconds rawCount: Int, goodCount: Int, - badCount: Int + badCount: Int, + invalidCount: Int ) trait Reporter[F[_]] { @@ -123,13 +128,18 @@ object Metrics { def badCount: F[Unit] = refsStatsd.badCount.update(_ + 1) *> refsStdout.badCount.update(_ + 1) + + def invalidCount: F[Unit] = + refsStatsd.invalidCount.update(_ + 1) *> + refsStdout.invalidCount.update(_ + 1) } private final case class MetricRefs[F[_]]( latency: Ref[F, Option[Long]], // milliseconds rawCount: Ref[F, Int], goodCount: Ref[F, Int], - badCount: Ref[F, Int] + badCount: Ref[F, Int], + invalidCount: Ref[F, Int] ) private object MetricRefs { @@ -139,7 +149,8 @@ object Metrics { rawCounter <- Ref.of[F, Int](0) goodCounter <- Ref.of[F, Int](0) badCounter <- Ref.of[F, Int](0) - } yield MetricRefs(latency, rawCounter, goodCounter, badCounter) + invalidCounter <- Ref.of[F, Int](0) + } yield MetricRefs(latency, rawCounter, goodCounter, badCounter, invalidCounter) def snapshot[F[_]: Monad](refs: MetricRefs[F]): F[MetricSnapshot] = for { @@ -147,7 +158,8 @@ object Metrics { rawCount <- refs.rawCount.getAndSet(0) goodCount <- refs.goodCount.getAndSet(0) badCount <- refs.badCount.getAndSet(0) - } yield MetricSnapshot(latency, rawCount, goodCount, badCount) + invalidCount <- refs.invalidCount.getAndSet(0) + } yield MetricSnapshot(latency, rawCount, goodCount, badCount, invalidCount) } def reporterStream[F[_]: Sync: Timer: ContextShift]( @@ -173,6 +185,7 @@ object Metrics { _ <- logger.info(s"${MetricsReporters.normalizeMetric(config.prefix, RawCounterName)} = ${snapshot.rawCount}") _ <- logger.info(s"${MetricsReporters.normalizeMetric(config.prefix, GoodCounterName)} = ${snapshot.goodCount}") _ <- logger.info(s"${MetricsReporters.normalizeMetric(config.prefix, BadCounterName)} = ${snapshot.badCount}") + _ <- logger.info(s"${MetricsReporters.normalizeMetric(config.prefix, InvalidCounterName)} = ${snapshot.invalidCount}") _ <- snapshot.enrichLatency .map(latency => logger.info(s"${MetricsReporters.normalizeMetric(config.prefix, LatencyGaugeName)} = $latency")) .getOrElse(Sync[F].unit) @@ -186,6 +199,7 @@ object Metrics { def rawCount(nb: Int): F[Unit] = Applicative[F].unit def goodCount: F[Unit] = Applicative[F].unit def badCount: F[Unit] = Applicative[F].unit + def invalidCount: F[Unit] = Applicative[F].unit } } diff --git a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporter.scala b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporter.scala index 5a08ee0e6..4b6a38d45 100644 --- a/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporter.scala +++ b/modules/common-fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporter.scala @@ -76,7 +76,8 @@ object StatsDReporter { List( Metrics.RawCounterName -> snapshot.rawCount.toString, Metrics.GoodCounterName -> snapshot.goodCount.toString, - Metrics.BadCounterName -> snapshot.badCount.toString + Metrics.BadCounterName -> snapshot.badCount.toString, + Metrics.InvalidCounterName -> snapshot.invalidCount.toString ) ++ snapshot.enrichLatency.map(l => Metrics.LatencyGaugeName -> l.toString) def sendMetric[F[_]: ContextShift: Sync]( diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 new file mode 100644 index 000000000..942562087 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/output/jsonschema/1-0-0 @@ -0,0 +1,14 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "output", + "format": "jsonschema", + "version": "1-0-0" + }, + "properties": { + "output": { + "type": "string" + } + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 new file mode 100644 index 000000000..58d9b2f51 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.acme/test/jsonschema/1-0-1 @@ -0,0 +1,18 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "test", + "format": "jsonschema", + "version": "1-0-1" + }, + "properties": { + "path": { + "properties": { + "id": { + "type": "integer" + } + } + } + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 new file mode 100644 index 000000000..4328e6cd7 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.callrail/call_complete/jsonschema/1-0-2 @@ -0,0 +1,143 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a CallRail call completion event", + "self": { + "vendor": "com.callrail", + "name": "call_complete", + "format": "jsonschema", + "version": "1-0-2" + }, + + "type": "object", + "properties": { + "answered": { + "type": ["boolean", "null"] + }, + "customer_city": { + "type": ["string", "null"] + }, + "customer_country": { + "type": ["string", "null"] + }, + "customer_name": { + "type": ["string", "null"] + }, + "customer_phone_number": { + "type": ["string", "null"] + }, + "customer_state": { + "type": ["string", "null"] + }, + "customer_zip": { + "type": ["string", "null"] + }, + "callercity": { + "type": ["string", "null"] + }, + "callercountry": { + "type": ["string", "null"] + }, + "callername": { + "type": ["string", "null"] + }, + "callernum": { + "type": ["string", "null"] + }, + "callerstate": { + "type": ["string", "null"] + }, + "callerzip": { + "type": ["string", "null"] + }, + "callsource": { + "type": ["string", "null"] + }, + "datetime": { + "type": "string", + "format": "date-time" + }, + "destinationnum": { + "type": ["string", "null"] + }, + "duration": { + "type": ["number", "null"] + }, + "first_call": { + "type": ["boolean", "null"] + }, + "device_type": { + "type": ["string", "null"] + }, + "ga": { + "type": ["string", "null"] + }, + "gclid": { + "type": ["string", "null"] + }, + "id": { + "type": "string" + }, + "ip": { + "type": ["string", "null"] + }, + "keywords": { + "type": ["string", "null"] + }, + "kissmetrics_id": { + "type": ["string", "null"] + }, + "landingpage": { + "type": ["string", "null"] + }, + "recording": { + "type": ["string", "null"] + }, + "referrer": { + "type": ["string", "null"] + }, + "referrermedium": { + "type": ["string", "null"] + }, + "trackingnum": { + "type": ["string", "null"] + }, + "transcription": { + "type": ["string", "null"] + }, + "utm_campaign": { + "type": ["string", "null"] + }, + "utm_content": { + "type": ["string", "null"] + }, + "utm_medium": { + "type": ["string", "null"] + }, + "utm_source": { + "type": ["string", "null"] + }, + "utm_term": { + "type": ["string", "null"] + }, + "utma": { + "type": ["string", "null"] + }, + "utmb": { + "type": ["string", "null"] + }, + "utmc": { + "type": ["string", "null"] + }, + "utmv": { + "type": ["string", "null"] + }, + "utmx": { + "type": ["string", "null"] + }, + "utmz": { + "type": ["string", "null"] + } + }, + "required": ["datetime", "id"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 new file mode 100644 index 000000000..b71cb200c --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/hit/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Google Analytics hit entity", + "self": { + "vendor": "com.google.analytics.measurement-protocol", + "name": "hit", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "type": { + "enum": [ + "event", + "exception", + "item", + "pageview", + "screenview", + "social", + "timing", + "transaction" + ] + }, + "nonInteractionHit": { + "type": ["boolean", "null"] + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 new file mode 100644 index 000000000..e813a7baa --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.google.analytics.measurement-protocol/page_view/jsonschema/1-0-0 @@ -0,0 +1,31 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Google Analytics pageview hit", + "self": { + "vendor": "com.google.analytics.measurement-protocol", + "name": "page_view", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "documentLocationUrl": { + "type": ["string", "null"], + "maxLength": 2048 + }, + "documentHostName": { + "type": ["string", "null"], + "maxLength": 100 + }, + "documentPath": { + "type": ["string", "null"], + "maxLength": 2048 + }, + "documentTitle": { + "type": ["string", "null"], + "maxLength": 1500 + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 new file mode 100644 index 000000000..280c3847f --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.hubspot/contact_creation/jsonschema/1-0-0 @@ -0,0 +1,64 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Webhook event for creating a new contact.", + "self": { + "vendor": "com.hubspot", + "name": "contact_creation", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "objectId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The ID of the object that was created/changed/deleted. For contacts this is the vid; for companies, the companyId; and for deals the dealId." + }, + "changeSource": { + "type": "string", + "maxLength": 128, + "description": "The source of this change. Can be any of the change sources that you find on contact property histories." + }, + "eventId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The unique ID of the event that triggered this notification." + }, + "subscriptionId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The ID of the subscription that caused us to send you a notification of this event." + }, + "portalId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The customer's portalId that this event came from." + }, + "appId": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "The ID of your application. (In case you have multiple applications pointing to the same webhook URL.)" + }, + "occurredAt": { + "type": "string", + "format": "date-time", + "description": "When this event occurred." + }, + "attemptNumber": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647, + "description": "Which attempt this is to notify your service of this event (starting at 0). If your service times-out or throws an error as described in 'Retries' below, we will attempt to send the notification to your service again." + }, + "changeFlag": { + "type": "string", + "maxLength": 128 + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 new file mode 100644 index 000000000..84d7f7fda --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.iab.snowplow/spiders_and_robots/jsonschema/1-0-0 @@ -0,0 +1,32 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a context generated by the IAB Spiders & Robots enrichment", + "self": { + "vendor": "com.iab.snowplow", + "name": "spiders_and_robots", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "spiderOrRobot": { + "description": "true if the IP address or user agent checked against the list is a spider or robot, false otherwise", + "type": "boolean" + }, + "category": { + "description": "Category based on activity if the IP/UA is a spider or robot, BROWSER otherwise", + "enum": ["SPIDER_OR_ROBOT", "ACTIVE_SPIDER_OR_ROBOT", "INACTIVE_SPIDER_OR_ROBOT", "BROWSER"] + }, + "reason": { + "description": "Type of failed check if the IP/UA is a spider or robot, PASSED_ALL otherwise", + "enum": ["FAILED_IP_EXCLUDE", "FAILED_UA_INCLUDE", "FAILED_UA_EXCLUDE", "PASSED_ALL"] + }, + "primaryImpact": { + "description": "Whether the spider or robot would affect page impression measurement, ad impression measurement, both or none", + "enum": ["PAGE_IMPRESSIONS", "AD_IMPRESSIONS", "PAGE_AND_AD_IMPRESSIONS", "UNKNOWN", "NONE"] + } + }, + "required": ["spiderOrRobot", "category", "reason", "primaryImpact"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 new file mode 100644 index 000000000..7b339054e --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailchimp/subscribe/jsonschema/1-0-0 @@ -0,0 +1,67 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Mailchimp subscribe event", + "self": { + "vendor": "com.mailchimp", + "name": "subscribe", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "data": { + "type": "object", + "properties": { + "email": { + "type": "string" + }, + "email_type": { + "type": "string" + }, + "id": { + "type": "string" + }, + "web_id": { + "type": "string" + }, + "ip_opt": { + "type": "string" + }, + "ip_signup": { + "type": "string" + }, + "list_id": { + "type": "string" + }, + "merges": { + "type": "object", + "properties": { + "EMAIL": { + "type": ["string", "null"] + }, + "FNAME": { + "type": ["string", "null"] + }, + "LNAME": { + "type": ["string", "null"] + }, + "INTERESTS": { + "type": ["string", "null"] + } + }, + "additionalProperties": true + } + }, + "additionalProperties": false + }, + "fired_at": { + "type": "string", + "format": "date-time" + }, + "type": { + "type": "string" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 new file mode 100644 index 000000000..30acbb975 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mailgun/message_delivered/jsonschema/1-0-0 @@ -0,0 +1,56 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Mailgun message delivered event", + "self": { + "vendor": "com.mailgun", + "name": "message_delivered", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "event": { + "description": "Event name ('delivered').", + "type": "string", + "maxLength": 16 + }, + "recipient": { + "description": "Intended recipient.", + "type": "string", + "format": "email", + "maxLength": 256 + }, + "domain": { + "description": "Domain that sent the original message.", + "type": "string", + "format": "hostname", + "maxLength": 256 + }, + "messageHeaders": { + "description": "String list of all MIME headers of the original message dumped to a JSON string (order of headers preserved).", + "type": "string" + }, + "messageId": { + "description": "The Id of the message that delivered.", + "type": "string", + "maxLength": 1024 + }, + "timestamp": { + "description": "Timestamp in ISO8601 format.", + "type": "string", + "format": "date-time" + }, + "token": { + "description": "Randomly generated string with length 50 (see securing webhooks).", + "type": "string", + "maxLength": 50, + "minLength": 50 + }, + "signature": { + "description": "String with hexadecimal digits generate by HMAC algorithm (see securing webhooks).", + "type": "string", + "maxLength": 128 + } + }, + "additionalProperties": true +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 new file mode 100644 index 000000000..755d23ea8 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.mandrill/message_sent/jsonschema/1-0-0 @@ -0,0 +1,111 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Mandrill message sent event", + "self": { + "vendor": "com.mandrill", + "name": "message_sent", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "msg": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "_version": { + "type": "string" + }, + "clicks": { + "type": "array" + }, + "email": { + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "user_id": { + "type": "number" + } + }, + "additionalProperties": true + }, + "opens": { + "type": "array" + }, + "sender": { + "type": "string" + }, + "state": { + "type": "string" + }, + "subject": { + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "ts": { + "type": "string", + "format": "date-time" + }, + "reject": { + "type": ["string", "null"] + }, + "resends": { + "type": "array" + }, + "smtp_events": { + "type": "array", + "items": { + "type": "object", + "properties": { + "ts": { + "type": "string", + "format": "date-time" + }, + "type": { + "type": "string" + }, + "diag": { + "type": "string" + }, + "source_ip": { + "type": "string" + }, + "destination_ip": { + "type": "string" + }, + "size": { + "type": "integer" + } + }, + "additionalProperties": true + } + }, + "subaccount": { + "type": ["string", "null"] + }, + "template": { + "type": ["string", "null"] + } + }, + "additionalProperties": false + }, + "ts": { + "type": "string", + "format": "date-time" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 new file mode 100644 index 000000000..b7f5de6c1 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.marketo/event/jsonschema/2-0-0 @@ -0,0 +1,601 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Marketo description", + "self": { + "vendor": "com.marketo", + "name": "event", + "format": "jsonschema", + "version": "2-0-0" + }, + "type": "object", + "properties": { + "name": { + "type": "string", + "maxLength": 128 + }, + "description": { + "type": ["string", "null"], + "maxLength": 255 + }, + "step": { + "type": ["integer", "null"], + "minimum": 0, + "maximum": 32767 + }, + "campaign": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "name": { + "type": "string", + "maxLength": 255 + }, + "description": { + "type": ["string", "null"], + "maxLength": 255 + } + }, + "required": ["id", "name"] + }, + "company": { + "type": ["object", "null"], + "properties": { + "account_owner_email_address": { + "type": "string", + "format": "email" + }, + "account_owner_first_name": { + "type": "string", + "maxLength": 255 + }, + "account_owner_last_name": { + "type": "string", + "maxLength": 255 + }, + "annual_revenue": { + "type": "number", + "minimum": 0, + "maximum": 2147483647 + }, + "billing_address": { + "type": "string", + "maxLength": 255 + }, + "billing_city": { + "type": "string", + "maxLength": 255 + }, + "billing_country": { + "type": "string", + "maxLength": 255 + }, + "billing_postal_code": { + "type": "string", + "maxLength": 255 + }, + "billing_state": { + "type": "string", + "maxLength": 255 + }, + "name": { + "type": "string", + "maxLength": 255 + }, + "notes": { + "type": "string", + "maxLength": 255 + }, + "industry": { + "type": "string", + "maxLength": 255 + }, + "main_phone": { + "type": "string", + "maxLength": 255 + }, + "num_employees": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "parent_company_name": { + "type": "string", + "maxLength": 255 + }, + "sic_code": { + "type": "string", + "maxLength": 40 + }, + "site": { + "type": "string", + "maxLength": 255 + }, + "website": { + "type": "string", + "maxLength": 255 + } + } + }, + "lead": { + "type": ["object", "null"], + "properties": { + "acquisition_date": { + "type": "string", + "format": "date-time" + }, + "acquisition_program_name": { + "type": "string", + "maxLength": 255 + }, + "acquisition_program": { + "type": "string", + "maxLength": 255 + }, + "address": { + "type": "string", + "maxLength": 255 + }, + "anonymous_ip": { + "type": "string", + "maxLength": 45 + }, + "black_listed": { + "type": "boolean" + }, + "city": { + "type": "string", + "maxLength": 255 + }, + "country": { + "type": "string", + "maxLength": 255 + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "date_of_birth": { + "type": "string", + "format": "date" + }, + "department": { + "type": "string", + "maxLength": 255 + }, + "do_not_call_reason": { + "type": "string", + "maxLength": 512 + }, + "do_not_call": { + "type": "boolean" + }, + "email_address": { + "type": "string", + "format": "email" + }, + "email_invalid_cause": { + "type": "string", + "maxLength": 255 + }, + "email_invalid": { + "type": "boolean" + }, + "email_suspended_at": { + "type": "string", + "format": "date-time" + }, + "email_suspended_cause": { + "type": "string", + "maxLength": 2000 + }, + "email_suspended": { + "type": "boolean" + }, + "fax_number": { + "type": "string", + "maxLength": 255 + }, + "first_name": { + "type": "string", + "maxLength": 255 + }, + "full_name": { + "type": "string", + "maxLength": 255 + }, + "inferred_city": { + "type": "string", + "maxLength": 255 + }, + "inferred_company": { + "type": "string", + "maxLength": 255 + }, + "inferred_country": { + "type": "string", + "maxLength": 255 + }, + "inferred_metropolitan_area": { + "type": "string", + "maxLength": 255 + }, + "inferred_phone_area_code": { + "type": "string", + "maxLength": 255 + }, + "inferred_postal_code": { + "type": "string", + "maxLength": 255 + }, + "inferred_state_region": { + "type": "string", + "maxLength": 255 + }, + "is_customer": { + "type": "boolean" + }, + "is_partner": { + "type": "boolean" + }, + "job_title": { + "type": "string", + "maxLength": 255 + }, + "last_interesting_moment_date": { + "type": "string", + "format": "date-time" + }, + "last_interesting_moment_description": { + "type": "string", + "maxLength": 255 + }, + "last_interesting_moment_source": { + "type": "string", + "maxLength": 255 + }, + "last_interesting_moment_type": { + "type": "string", + "maxLength": 255 + }, + "last_name": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_email_address": { + "type": "string", + "format": "email" + }, + "lead_owner_first_name": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_job_title": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_last_name": { + "type": "string", + "maxLength": 255 + }, + "lead_owner_phone_number": { + "type": "string", + "maxLength": 255 + }, + "lead_rating": { + "type": "string", + "maxLength": 255 + }, + "lead_score": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "lead_source": { + "type": "string", + "maxLength": 255 + }, + "lead_status": { + "type": "string", + "maxLength": 255 + }, + "lead_marketing_suspended": { + "type": "boolean" + }, + "facebook_display_name": { + "type": "string", + "maxLength": 255 + }, + "facebook_id": { + "type": "string", + "maxLength": 512 + }, + "facebook_photo_url": { + "type": "string", + "maxLength": 255 + }, + "facebook_profile_url": { + "type": "string", + "maxLength": 255 + }, + "facebook_reach": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "facebook_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "facebook_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "gender": { + "type": "string", + "maxLength": 6 + }, + "last_referred_enrollment": { + "type": "string", + "format": "date-time" + }, + "last_referred_visit": { + "type": "string", + "format": "date-time" + }, + "linkedin_display_name": { + "type": "string", + "maxLength": 255 + }, + "linkedin_id": { + "type": "string", + "maxLength": 512 + }, + "linkedin_photo_url": { + "type": "string", + "maxLength": 512 + }, + "linkedin_profile_url": { + "type": "string", + "maxLength": 512 + }, + "linkedin_reach": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "linkedin_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "linkedin_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "syndication_id": { + "type": "string", + "maxLength": 512 + }, + "total_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "total_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "twitter_display_name": { + "type": "string", + "maxLength": 255 + }, + "twitter_id": { + "type": "string", + "maxLength": 512 + }, + "twitter_photo_url": { + "type": "string", + "maxLength": 512 + }, + "twitter_profile_url": { + "type": "string", + "maxLength": 512 + }, + "twitter_reach": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "twitter_referred_enrollments": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "twitter_referred_visits": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "middle_name": { + "type": "string", + "maxLength": 255 + }, + "mobile_phone_number": { + "type": "string", + "maxLength": 255 + }, + "number_of_optys": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "original_referrer": { + "type": "string", + "maxLength": 255 + }, + "original_search_engine": { + "type": "string", + "maxLength": 255 + }, + "original_search_phrase": { + "type": "string", + "maxLength": 255 + }, + "original_source_info": { + "type": "string", + "maxLength": 255 + }, + "original_source_type": { + "type": "string", + "maxLength": 255 + }, + "person_notes": { + "type": "string", + "maxLength": 512 + }, + "person_timezone": { + "type": "string", + "maxLength": 255 + }, + "phone_number": { + "type": "string", + "maxLength": 255 + }, + "postal_code": { + "type": "string", + "maxLength": 255 + }, + "priority": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "registration_source_info": { + "type": "string", + "maxLength": 2000 + }, + "registration_source_type": { + "type": "string", + "maxLength": 255 + }, + "relative_score": { + "type": "integer", + "minimum": 0, + "maximum": 5 + }, + "relative_urgency": { + "type": "integer", + "minimum": 0, + "maximum": 5 + }, + "role": { + "type": "string", + "maxLength": 50 + }, + "salutation": { + "type": "string", + "maxLength": 255 + }, + "state": { + "type": "string", + "maxLength": 255 + }, + "total_opty_amount": { + "type": "number", + "minimum": 0, + "maximum": 2147483647 + }, + "total_opty_expected_revenue": { + "type": "number", + "minimum": 0, + "maximum": 2147483647 + }, + "unsubscribed_reason": { + "type": "string", + "maxLength": 512 + }, + "unsubscribed": { + "type": "boolean" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "urgency": { + "type": "integer", + "minimum": 0, + "maximum": 32767 + } + } + }, + "program": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "name": { + "type": "string", + "maxLength": 255 + }, + "description": { + "type": "string", + "maxLength": 2000 + } + } + }, + "social": { + "type": ["object", "null"], + "properties": { + "promo_code": { + "type": "string", + "maxLength": 255 + }, + "share_url": { + "type": "string", + "maxLength": 2000 + }, + "email": { + "type": "string", + "format": "email" + } + } + }, + "datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "forward_to_friend_link": { + "type": ["string", "null"], + "maxLength": 255 + }, + "munchkinId": { + "type": ["string", "null"], + "maxLength": 255 + }, + "unsubscribe_link": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "view_as_webpage_link": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "sp_send_alert_info": { + "type": ["string", "null"], + "maxLength": 2000 + } + }, + "additionalProperties": true, + "required": ["name"] +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 new file mode 100644 index 000000000..5839eb007 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.olark/transcript/jsonschema/1-0-0 @@ -0,0 +1,211 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an Olark transcript event", + "self": { + "vendor": "com.olark", + "name": "transcript", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "string", + "maxLength": 128 + } + }, + "items": { + "type": "array", + "items": { + "type": "object", + "properties": { + "body": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "operatorId": { + "type": "string", + "maxLength": 64 + }, + "kind": { + "type": "string", + "maxLength": 64 + }, + "nickname": { + "type": "string", + "maxLength": 64 + }, + "visitorNickname": { + "type": "string", + "maxLength": 64 + } + }, + "additionalProperties": false + } + }, + "groups": { + "type": "array", + "items": { + "type": "object", + "properties": { + "kind": { + "type": "string", + "maxLength": 128 + }, + "name": { + "type": "string", + "maxLength": 128 + }, + "id": { + "type": "string", + "maxLength": 128 + } + }, + "additionalProperties": false + } + }, + "visitor": { + "type": "object", + "properties": { + "organization": { + "type": "string", + "maxLength": 128 + }, + "city": { + "type": "string", + "maxLength": 128 + }, + "ip": { + "type": "string", + "format": "ipv4" + }, + "phoneNumber": { + "type": "string", + "maxLength": 64 + }, + "browser": { + "type": "string", + "maxLength": 128 + }, + "fullName": { + "type": "string", + "maxLength": 256 + }, + "emailAddress": { + "type": "string", + "format": "email", + "maxLength": 256 + }, + "country": { + "type": "string", + "maxLength": 128 + }, + "operatingSystem": { + "type": "string", + "maxLength": 128 + }, + "id": { + "type": "string", + "maxLength": 64 + }, + "countryCode": { + "type": "string", + "maxLength": 8 + }, + "region": { + "type": "string", + "maxLength": 8 + }, + "conversationBeginPage": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "chatFeedback": { + "type": "object", + "properties": { + "knowledge": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "friendliness": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "responsiveness": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "overallChat": { + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "comments": { + "type": "string" + } + }, + "additionalProperties": false + }, + "kind": { + "type": "string", + "maxLength": 64 + } + }, + "additionalProperties": true + }, + "id": { + "type": "string", + "maxLength": 128 + }, + "kind": { + "type": "string", + "maxLength": 64 + }, + "manuallySubmitted": { + "type": "boolean" + }, + "operators": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "username": { + "type": "string", + "maxLength": 128 + }, + "emailAddress": { + "type": "string", + "format": "email", + "maxLength": 256 + }, + "id": { + "type": "string", + "maxLength": 128 + }, + "kind": { + "type": "string", + "maxLength": 64 + }, + "nickname": { + "type": "string", + "maxLength": 64 + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 new file mode 100644 index 000000000..c490000fb --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pagerduty/incident/jsonschema/1-0-0 @@ -0,0 +1,207 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a PagerDuty incident event", + "self": { + "vendor": "com.pagerduty", + "name": "incident", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "id": { + "type": "string" + }, + "created_on": { + "type": "string", + "format": "date-time" + }, + "data": { + "type": "object", + "properties": { + "incident": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "incident_number": { + "type": "integer" + }, + "created_on": { + "type": "string", + "format": "date-time" + }, + "status": { + "type": "string" + }, + "pending_actions": { + "type": "array" + }, + "html_url": { + "type": "string" + }, + "incident_key": { + "type": [ "string", "null" ] + }, + "service": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "html_url": { + "type": "string" + }, + "deleted_at": { + "type": "null" + } + }, + "additionalProperties": false + }, + "escalation_policy": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "deleted_at": { + "type": "null" + } + }, + "additionalProperties": false + }, + "assigned_to_user": { + "type": [ "object", "null" ], + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + } + }, + "additionalProperties": false + }, + "trigger_summary_data": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "subject": { + "type": "string" + }, + "client": { + "type": "string" + } + }, + "additionalProperties": false + }, + "trigger_details_html_url": { + "type": "string" + }, + "trigger_type": { + "type": "string" + }, + "last_status_change_on": { + "type": "string", + "format": "date-time" + }, + "last_status_change_by": { + "type": [ "object", "null" ], + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + } + }, + "additionalProperties": false + }, + "number_of_escalations": { + "type": "integer" + }, + "assigned_to": { + "type": "array", + "items": { + "type": "object", + "properties": { + "at": { + "type": "string" + }, + "object": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + }, + "type": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "resolved_by_user": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "html_url": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 new file mode 100644 index 000000000..29dcbc0ef --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.pingdom/incident_assign/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Pingdom incident assigned event", + "self": { + "vendor": "com.pingdom", + "name": "incident_assign", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "check": { + "type": "string" + }, + "checkname": { + "type": "string" + }, + "host": { + "type": "string" + }, + "incidentid": { + "type": "integer" + }, + "description": { + "type": "string" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 new file mode 100644 index 000000000..c0def0617 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.sendgrid/processed/jsonschema/2-0-0 @@ -0,0 +1,89 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a SendGrid processed event. Property descriptions derived from the SendGrid documentation: https://sendgrid.com/docs/for-developers/tracking-events/event/", + "self": { + "vendor": "com.sendgrid", + "name": "processed", + "version": "2-0-0", + "format": "jsonschema" + }, + "type": "object", + "properties": { + "timestamp": { + "description": "The timestamp of when the message was sent", + "type": "string", + "format": "date-time" + }, + "email": { + "description": "The email address of the recipient", + "type": "string", + "format": "email" + }, + "newsletter": { + "description": "Legacy Marketing Email tool fields", + "type": "object", + "properties": { + "newsletter_user_list_id": { + "type": "string" + }, + "newsletter_id": { + "type": "string" + }, + "newsletter_send_id": { + "type": "string" + } + }, + "additionalProperties": true + }, + "sg_event_id": { + "description": "A unique ID to this event that you can use for deduplication purposes. These IDs are up to 100 characters long and are URL safe.", + "type": "string", + "minLength": 22, + "maxLength": 4096 + }, + "smtp-id": { + "description": "A unique ID attached to the message by the originating system", + "type": "string" + }, + "category": { + "description": "Categories are custom tags that you set for the purpose of organizing your emails. Categories can be set as an array or string, and they will be returned as such when posted in your event endpoint.", + "type": ["array", "string"], + "items": { + "type": "string" + } + }, + "asm_group_id": { + "description": "The ID of the unsubscribe group the recipient’s email address is included in. ASM IDs correspond to the ID that is returned when you create an unsubscribe group.", + "type": "integer", + "maximum": 32767, + "minimum": 0 + }, + "sg_message_id": { + "description": "A unique, internal SendGrid ID for the message. The first half of this is pulled from the smtp-id.", + "type": "string" + }, + "send_at": { + "description": "To schedule a send request for a large batch of emails, use the send_at parameter which will send all emails at approximately the same time. send_at is a UNIX timestamp.", + "type": "integer", + "maximum": 2147483647, + "minimum": 0 + }, + "marketing_campaign_id": { + "description": "For emails sent through our Marketing Campaigns feature, we add Marketing Campaigns specific parameters to the Event Webhook. Both marketing_campaign_name and marketing_campaign_id are displayed as unique arguments in the event data.", + "type": "integer" + }, + "marketing_campaign_name": { + "description": "For emails sent through our Marketing Campaigns feature, we add Marketing Campaigns specific parameters to the Event Webhook. Both marketing_campaign_name and marketing_campaign_id are displayed as unique arguments in the event data.", + "type": "string" + }, + "marketing_campaign_version": { + "description": "Displayed in the event data for emails sent as part of an A/B Test. The value for marketing_campaign_version are returned as A, B, C, etc.", + "type": "string" + }, + "marketing_campaign_split_id": { + "description": "Marketing campaign split id", + "type": "integer" + } + }, + "additionalProperties": true +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 new file mode 100644 index 000000000..46a019700 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow-website/signup_form_submitted/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for unit tests", + "self": { + "vendor": "com.snowplowanalytics.snowplow-website", + "name": "signup_form_submitted", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "name": { + "type": ["string", "null"] + }, + "email": { + "type": ["string", "null"] + }, + "company": { + "type": ["string", "null"] + }, + "eventsPerMonth": { + "type": ["string", "null"] + }, + "serviceType": { + "type": ["string", "null"] + } + }, + "additionalProperties": false +} + diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 new file mode 100644 index 000000000..45b9c6289 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0 @@ -0,0 +1,167 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for API Request Enrichment configuration", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "api_request_enrichment_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + + "parameters": { + "type": "object", + "properties": { + "inputs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string", + "pattern": "^[A-Za-z0-9_-]+$" + }, + "pojo": { + "type": "object", + "properties": { + "field": { + "type": "string" + } + }, + "additionalProperties": false + }, + "json": { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["unstruct_event", "contexts", "derived_contexts"] + }, + "schemaCriterion": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/([1-9][0-9]*|\\*)-((?:0|[1-9][0-9]*)|\\*)-((?:0|[1-9][0-9]*)|\\*)$" + }, + "jsonPath": { + "type": "string", + "pattern": "^\\$.*$" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false, + "minProperties": 2, + "maxProperties": 2, + "required": ["key"] + } + }, + "api": { + "type": "object", + "minProperties": 1, + "maxProperties": 1, + "properties": { + "http": { + "type": "object", + "properties": { + "method": { + "type": "string", + "enum": ["GET", "POST", "PUT"] + }, + "uri": { + "type": "string" + }, + "timeout": { + "type": "integer", + "minimum": 1, + "maximum": 60000 + }, + "authentication": { + "type": "object", + "properties": { + "httpBasic": { + "type": "object", + "properties": { + "username": { + "type": "string" + }, + "password": { + "type": "string" + } + }, + "required": ["username", "password"], + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["method", "uri", "timeout", "authentication"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "outputs": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*(?:-(?:0|[1-9][0-9]*)){2})$" + }, + "json": { + "type": "object", + "properties": { + "jsonPath": { + "type": "string", + "pattern": "^\\$.*$" + } + }, + "required": ["jsonPath"], + "additionalProperties": false + } + }, + "required": ["schema"], + "minProperties": 2, + "maxProperties": 2, + "additionalProperties": false + } + }, + "cache": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "minimum": 1 + }, + "ttl": { + "type": "integer", + "minimum": 0, + "maximum": 86400 + } + }, + "additionalProperties": false, + "required": ["size", "ttl"] + } + }, + "additionalProperties": false, + "required": ["inputs", "api", "outputs", "cache"] + } + }, + "additionalProperties": false, + "required": ["name", "vendor", "enabled", "parameters"] +} + diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 new file mode 100644 index 000000000..5f898ff5f --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0 @@ -0,0 +1,84 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for IAB Spiders & Robots enrichment config", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "iab_spiders_and_robots_enrichment", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + + "parameters": { + "type": "object", + "properties": { + "ipFile": { + "description": "Path to IP address exclude file", + "type": "object", + "properties": { + "database": { + "enum": ["ip_exclude_current_cidr.txt"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + "excludeUseragentFile": { + "description": "Path to user agent exclude file", + "type": "object", + "properties": { + "database": { + "enum": ["exclude_current.txt"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + "includeUseragentFile": { + "description": "Path to user agent include file", + "type": "object", + "properties": { + "database": { + "enum": ["include_current.txt"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + } + }, + "required": [ + "ipFile", + "excludeUseragentFile", + "includeUseragentFile" + ], + "additionalProperties": false + } + }, + "required": [ + "vendor", + "name", + "enabled", + "parameters" + ], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 new file mode 100644 index 000000000..45d678710 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0 @@ -0,0 +1,164 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for PII pseudonymization enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "pii_enrichment_config", + "format": "jsonschema", + "version": "2-0-0" + }, + "type": "object", + "properties": { + "vendor": { + "type": "string", + "description": "The name of the vendor for this config (the only valid value for scala-common enrich is com.snowplowanalytics.snowplow.enrichments)" + }, + "name": { + "type": "string", + "description": "The name of the config (the only valid value for scala-common enrich is pii_enrichment_config)" + }, + "enabled": { + "type": "boolean", + "description": "Whether to enable this enrichment" + }, + "emitEvent": { + "type": "boolean", + "description": "Whether to emit identification events from this enrichment" + }, + "parameters": { + "type": "object", + "properties": { + "pii": { + "description": "List of all the fields for which pdeudonymization will be performed", + "type": "array", + "items": { + "type": "object", + "properties": { + "pojo": { + "description": "Scalar field which contains a single string value, on which pseudonymization will be performed in the enire field (e.g. `user-123`)", + "type": "object", + "properties": { + "field": { + "enum": [ + "user_id", + "user_ipaddress", + "user_fingerprint", + "domain_userid", + "network_userid", + "ip_organization", + "ip_domain", + "tr_orderid", + "ti_orderid", + "mkt_term", + "mkt_content", + "se_category", + "se_action", + "se_label", + "se_property", + "mkt_clickid", + "refr_domain_userid", + "domain_sessionid" + ] + } + }, + "required": [ + "field" + ], + "additionalProperties": false + }, + "json": { + "description": "JSON field which contains a JSON string value, on which pseudonymization will be performed on a specific JSON path", + "type": "object", + "properties": { + "field": { + "enum": [ + "contexts", + "derived_contexts", + "unstruct_event" + ] + }, + "schemaCriterion": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*|\\*)-((?:0|[1-9][0-9]*)|\\*)-((?:0|[1-9][0-9]*)|\\*)$" + }, + "jsonPath": { + "type": "string", + "pattern": "^\\$.*$" + } + }, + "required": [ + "field", + "schemaCriterion", + "jsonPath" + ], + "additionalProperties": false + } + }, + "oneOf": [ + { + "required": [ + "pojo" + ] + }, + { + "required": [ + "json" + ] + } + ], + "additionalProperties": false + } + }, + "strategy": { + "description": "The pseudonymization strategy which will be applied to all the fields specified in the `pii` section", + "type": "object", + "properties": { + "pseudonymize": { + "description": "Pseudonymization strategy that hashes using a specified algorithm", + "type": "object", + "properties": { + "hashFunction": { + "description": "The hash function that will be used by this strategy", + "enum": [ + "MD2", + "MD5", + "SHA-1", + "SHA-256", + "SHA-384", + "SHA-512" + ] + }, + "salt": { + "description": "A salt that will be added to the field durring hashing", + "type": "string" + } + }, + "required": [ + "hashFunction", + "salt" + ], + "additionalProperties": false + } + }, + "required": [ + "pseudonymize" + ], + "additionalProperties": false + } + }, + "required": [ + "pii", + "strategy" + ], + "additionalProperties": false + } + }, + "required": [ + "vendor", + "name", + "enabled", + "emitEvent", + "parameters" + ], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 new file mode 100644 index 000000000..941407ee6 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/sql_query_enrichment_config/jsonschema/1-0-0 @@ -0,0 +1,211 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for SQL Query enrichment configuration", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "sql_query_enrichment_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + + "parameters": { + "type": "object", + "properties": { + "inputs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "placeholder": { + "type": "integer", + "minimum": 1, + "maximum": 64 + }, + "pojo": { + "type": "object", + "properties": { + "field": { + "type": "string" + } + }, + "additionalProperties": false, + "required": ["field"] + }, + "json": { + "type": "object", + "properties": { + "field": { + "type": "string", + "enum": ["unstruct_event", "contexts", "derived_contexts"] + }, + "schemaCriterion": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*|\\*)-((?:0|[1-9][0-9]*)|\\*)-((?:0|[1-9][0-9]*)|\\*)$" + }, + "jsonPath": { + "type": "string" + } + }, + "additionalProperties": false, + "required": ["field", "schemaCriterion", "jsonPath"] + } + }, + "additionalProperties": false, + "minProperties": 2, + "maxProperties": 2, + "required": ["placeholder"] + } + }, + "database": { + "oneOf": [ + + { + "type": "object", + "properties": { + "postgresql": { + "type": "object", + "properties": { + "host": { + "type": "string" + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + }, + "sslMode": { + "type": "boolean" + }, + "username": { + "type": "string", + "minLength": 1 + }, + "password": { + "type": "string" + }, + "database": { + "type": "string", + "minLength": 1 + } + }, + "required": ["host", "port", "sslMode", "username", "password", "database"], + "additionalProperties": false + } + }, + "required": ["postgresql"], + "additionalProperties": false + }, + + { + "type": "object", + "properties": { + "mysql": { + "type": "object", + "properties": { + "host": { + "type": "string" + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + }, + "sslMode": { + "type": "boolean" + }, + "username": { + "type": "string", + "minLength": 1 + }, + "password": { + "type": "string" + }, + "database": { + "type": "string", + "minLength": 1 + } + }, + "required": ["host", "port", "sslMode", "username", "password", "database"], + "additionalProperties": false + } + }, + "required": ["mysql"] + } + ], + "additionalProperties": true + }, + "query": { + "type": "object", + "properties": { + "sql": { + "type": "string" + } + }, + "required": ["sql"], + "additionalProperties": false + }, + "output": { + "type": "object", + "properties": { + "expectedRows": { + "type": "string", + "enum": ["AT_LEAST_ONE", "AT_LEAST_ZERO", "AT_MOST_ONE", "EXACTLY_ONE"] + }, + "json": { + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:([a-zA-Z0-9-_.]+)/([a-zA-Z0-9-_]+)/([a-zA-Z0-9-_]+)/([1-9][0-9]*(?:-(?:0|[1-9][0-9]*)){2})$" + }, + "propertyNames": { + "type": "string", + "enum": ["AS_IS", "CAMEL_CASE", "PASCAL_CASE", "SNAKE_CASE", "LOWER_CASE", "UPPER_CASE"] + }, + "describes": { + "type": "string", + "enum": ["ALL_ROWS", "EVERY_ROW"] + } + }, + "required": ["schema", "propertyNames", "describes"], + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "cache": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "minimum": 0 + }, + "ttl": { + "type": "integer", + "minimum": 0, + "maximum": 86400 + } + }, + "additionalProperties": false, + "required": ["size", "ttl"] + } + }, + "additionalProperties": false, + "required": ["inputs", "database", "query", "output", "cache"] + } + }, + "additionalProperties": false, + "required": ["name", "vendor", "enabled", "parameters"] +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 new file mode 100644 index 000000000..31d30f048 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.enrichments/yauaa_enrichment_config/jsonschema/1-0-0 @@ -0,0 +1,38 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for YAUAA enrichment config", + "self": { + "vendor": "com.snowplowanalytics.snowplow.enrichments", + "name": "yauaa_enrichment_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "cacheSize": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + "required": [ + "vendor", + "name", + "enabled" + ], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 new file mode 100644 index 000000000..4c7301ba3 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow.input-adapters/segment_webhook_config/jsonschema/1-0-0 @@ -0,0 +1,37 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema to configure the Segment webhook adapter", + "self": { + "vendor": "com.snowplowanalytics.snowplow.input-adapters", + "name": "segment_webhook_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "parameters": { + "type": "object", + "properties": { + "mappings": { + "type": "object", + "patternProperties": { + ".*": { + "type": "string" + } + } + } + }, + "required": ["mappings"], + "additionalProperties": false + } + }, + "required": ["name", "vendor", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 new file mode 100644 index 000000000..f3cc406d3 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 @@ -0,0 +1,489 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an atomic canonical Snowplow event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "atomic", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "app_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "platform": { + "type": ["string", "null"], + "maxLength": 255 + }, + "etl_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "collector_tstamp": { + "type": "string", + "format": "date-time" + }, + "dvce_created_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_id": { + "type": "string", + "maxLength": 36 + }, + "txn_id": { + "type": ["integer", "null"] + }, + "name_tracker": { + "type": ["string", "null"], + "maxLength": 128 + }, + "v_tracker": { + "type": ["string", "null"], + "maxLength": 100 + }, + "v_collector": { + "type": "string", + "maxLength": 100 + }, + "v_etl": { + "type": "string", + "maxLength": 100 + }, + "user_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "user_ipaddress": { + "type": ["string", "null"], + "maxLength": 128 + }, + "user_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_sessionidx": { + "type": ["integer", "null"] + }, + "network_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "geo_country": { + "type": ["string", "null"], + "maxLength": 2 + }, + "geo_region": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_city": { + "type": ["string", "null"], + "maxLength": 75 + }, + "geo_zipcode": { + "type": ["string", "null"], + "maxLength": 15 + }, + "geo_latitude": { + "type": ["number", "null"] + }, + "geo_longitude": { + "type": ["number", "null"] + }, + "geo_region_name": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_isp": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_organization": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_domain": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_netspeed": { + "type": ["string", "null"], + "maxLength": 100 + }, + "page_url": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_title": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "page_referrer": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "page_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "page_urlport": { + "type": ["integer", "null"] + }, + "page_urlpath": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "page_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "page_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "refr_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "refr_urlport": { + "type": ["integer", "null"] + }, + "refr_urlpath": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_medium": { + "type": ["string", "null"], + "maxLength": 25 + }, + "refr_source": { + "type": ["string", "null"], + "maxLength": 50 + }, + "refr_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_medium": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_source": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_content": { + "type": ["string", "null"], + "maxLength": 500 + }, + "mkt_campaign": { + "type": ["string", "null"], + "maxLength": 255 + }, + "se_category": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_action": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_label": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "se_property": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_value": { + "type": ["number", "null"] + }, + "tr_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_affiliation": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_total": { + "type": ["number", "null"] + }, + "tr_tax": { + "type": ["number", "null"] + }, + "tr_shipping": { + "type": ["number", "null"] + }, + "tr_city": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_state": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_country": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_sku": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_name": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_category": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_price": { + "type": ["number", "null"] + }, + "ti_quantity": { + "type": ["integer", "null"] + }, + "pp_xoffset_min": { + "type": ["integer", "null"] + }, + "pp_xoffset_max": { + "type": ["integer", "null"] + }, + "pp_yoffset_min": { + "type": ["integer", "null"] + }, + "pp_yoffset_max": { + "type": ["integer", "null"] + }, + "useragent": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "br_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_version": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_renderengine": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_lang": { + "type": ["string", "null"], + "maxLength": 255 + }, + "br_features_pdf": { + "type": ["boolean", "null"] + }, + "br_features_flash": { + "type": ["boolean", "null"] + }, + "br_features_java": { + "type": ["boolean", "null"] + }, + "br_features_director": { + "type": ["boolean", "null"] + }, + "br_features_quicktime": { + "type": ["boolean", "null"] + }, + "br_features_realplayer": { + "type": ["boolean", "null"] + }, + "br_features_windowsmedia": { + "type": ["boolean", "null"] + }, + "br_features_gears": { + "type": ["boolean", "null"] + }, + "br_features_silverlight": { + "type": ["boolean", "null"] + }, + "br_cookies": { + "type": ["boolean", "null"] + }, + "br_colordepth": { + "type": ["string", "null"], + "maxLength": 12 + }, + "br_viewwidth": { + "type": ["integer", "null"] + }, + "br_viewheight": { + "type": ["integer", "null"] + }, + "os_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_manufacturer": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_timezone": { + "type": ["string", "null"], + "maxLength": 255 + }, + "dvce_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "dvce_ismobile": { + "type": ["boolean", "null"] + }, + "dvce_screenwidth": { + "type": ["integer", "null"] + }, + "dvce_screenheight": { + "type": ["integer", "null"] + }, + "doc_charset": { + "type": ["string", "null"], + "maxLength": 128 + }, + "doc_width": { + "type": ["integer", "null"] + }, + "doc_height": { + "type": ["integer", "null"] + }, + "tr_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "tr_total_base": { + "type": ["number", "null"] + }, + "tr_tax_base": { + "type": ["number", "null"] + }, + "tr_shipping_base": { + "type": ["number", "null"] + }, + "ti_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "ti_price_base": { + "type": ["number", "null"] + }, + "base_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_timezone": { + "type": ["string", "null"], + "maxLength": 64 + }, + "mkt_clickid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "mkt_network": { + "type": ["string", "null"], + "maxLength": 64 + }, + "etl_tags": { + "type": ["string", "null"], + "maxLength": 500 + }, + "dvce_sent_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "refr_domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "refr_dvce_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "domain_sessionid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "derived_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event_vendor": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_name": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_format": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_version": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "true_tstamp": { + "type": ["string", "null"], + "format": "date-time" + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 new file mode 100644 index 000000000..59ab71a5c --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-1 @@ -0,0 +1,74 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a campaign attribution enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "campaign_attribution", + "format": "jsonschema", + "version": "1-0-1" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "mapping": { + "type": ["string", "null"], + "enum": ["static", "script"] + }, + "fields": { + "type": "object", + "properties": { + "mktMedium": { + "$ref": "#/definitions/stringArray" + }, + "mktSource": { + "$ref": "#/definitions/stringArray" + }, + "mktTerm": { + "$ref": "#/definitions/stringArray" + }, + "mktContent": { + "$ref": "#/definitions/stringArray" + }, + "mktCampaign": { + "$ref": "#/definitions/stringArray" + }, + "mktClickId": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": ["mktMedium", "mktSource", "mktTerm", "mktContent", "mktCampaign"], + "additionalProperties": false + } + }, + "required": ["fields"], + "additionalProperties": false + } + }, + + "definitions": { + "stringArray": { + "type": "array", + "items": { + "type": "string" + } + } + }, + + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 new file mode 100644 index 000000000..3ddce8ba5 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0 @@ -0,0 +1,32 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for custom contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "contexts", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "array", + + "items": { + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false + }, + + "minItems": 1 +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 new file mode 100644 index 000000000..e6a9a225c --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for custom contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "contexts", + "format": "jsonschema", + "version": "1-0-1" + }, + + "type": "array", + + "items": { + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 new file mode 100644 index 000000000..5d55cda9b --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0 @@ -0,0 +1,46 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for configuration of currency-conversion enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "currency_conversion_config", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "apiKey": { + "type": "string" + }, + "baseCurrency": { + "type": "string" + }, + "rateAt": { + "enum": ["EOD_PRIOR"] + }, + "accountType": { + "type": "string", + "enum": ["DEVELOPER", "ENTERPRISE", "UNLIMITED"] + } + }, + "required": ["apiKey", "baseCurrency", "rateAt", "accountType"], + "additionalProperties": false + } + }, + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} + diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 new file mode 100644 index 000000000..ed185417b --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/enrichments/jsonschema/1-0-0 @@ -0,0 +1,30 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an array of enrichments", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "enrichments", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "array", + + "items": { + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false + } +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 new file mode 100644 index 000000000..20c7457e9 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0 @@ -0,0 +1,41 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for client geolocation contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "geolocation_context", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "latitude": { + "type": "number", + "minimum": -90, + "maximum": 90 + }, + "longitude": { + "type": "number", + "minimum": -180, + "maximum": 180 + }, + "latitudeLongitudeAccuracy": { + "type": "number" + }, + "altitude": { + "type": "number" + }, + "altitudeAccuracy": { + "type": "number" + }, + "bearing": { + "type": "number" + }, + "speed": { + "type": "number" + } + }, + "required": ["latitude", "longitude"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 new file mode 100644 index 000000000..2e4bb97ca --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0 @@ -0,0 +1,44 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for client geolocation contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "geolocation_context", + "format": "jsonschema", + "version": "1-1-0" + }, + + "type": "object", + "properties": { + "latitude": { + "type": "number", + "minimum": -90, + "maximum": 90 + }, + "longitude": { + "type": "number", + "minimum": -180, + "maximum": 180 + }, + "latitudeLongitudeAccuracy": { + "type": ["number", "null"] + }, + "altitude": { + "type": ["number", "null"] + }, + "altitudeAccuracy": { + "type": ["number", "null"] + }, + "bearing": { + "type": ["number", "null"] + }, + "speed": { + "type": ["number", "null"] + }, + "timestamp": { + "type": ["integer", "null"] + } + }, + "required": ["latitude", "longitude"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 new file mode 100644 index 000000000..475b3f580 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0 @@ -0,0 +1,90 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for MaxMind GeoIP2 ip lookups enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "ip_lookups", + "format": "jsonschema", + "version": "2-0-0" + }, + + "type": "object", + "properties": { + "vendor": { + "type": "string", + "maxLength": 256 + }, + "name": { + "type": "string", + "maxLength": 256 + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + + "geo": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoLite2-City.mmdb", "GeoIP2-City.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + + "isp": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoIP2-ISP.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + + "domain": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoIP2-Domain.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + }, + + "connectionType": { + "type": "object", + "properties": { + "database": { + "enum": ["GeoIP2-Connection-Type.mmdb"] + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["database", "uri"] + } + + }, + "additionalProperties": false + } + }, + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 new file mode 100644 index 000000000..fe89dc373 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0 @@ -0,0 +1,43 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for mobile contexts", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "mobile_context", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "osType": { + "type": "string" + }, + "osVersion": { + "type": "string" + }, + "deviceManufacturer": { + "type": "string" + }, + "deviceModel": { + "type": "string" + }, + "carrier": { + "type": ["string", "null"] + }, + "openIdfa": { + "type": "string" + }, + "appleIdfa": { + "type": "string" + }, + "appleIdfv": { + "type": "string" + }, + "androidIdfa": { + "type": "string" + } + }, + "required": ["osType", "osVersion", "deviceManufacturer", "deviceModel"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 new file mode 100644 index 000000000..f5265aeb6 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-0 @@ -0,0 +1,246 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Snowplow payload", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "payload_data", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "array", + "items":{ + + "type": "object", + "properties": { + "tna": { + "type": "string" + }, + "aid": { + "type": "string" + }, + "p": { + "type": "string" + }, + "dtm": { + "type": "string" + }, + "tz": { + "type": "string" + }, + "e": { + "type": "string" + }, + "tid": { + "type": "string" + }, + "eid": { + "type": "string" + }, + "tv": { + "type": "string" + }, + "duid": { + "type": "string" + }, + "nuid": { + "type": "string" + }, + "uid": { + "type": "string" + }, + "vid": { + "type": "string" + }, + "ip": { + "type": "string" + }, + "res": { + "type": "string" + }, + "url": { + "type": "string" + }, + "page": { + "type": "string" + }, + "refr": { + "type": "string" + }, + "fp": { + "type": "string" + }, + "ctype": { + "type": "string" + }, + "cookie": { + "type": "string" + }, + "lang": { + "type": "string" + }, + "f_pdf": { + "type": "string" + }, + "f_qt": { + "type": "string" + }, + "f_realp": { + "type": "string" + }, + "f_wma": { + "type": "string" + }, + "f_dir": { + "type": "string" + }, + "f_fla": { + "type": "string" + }, + "f_java": { + "type": "string" + }, + "f_gears": { + "type": "string" + }, + "f_ag": { + "type": "string" + }, + "cd": { + "type": "string" + }, + "ds": { + "type": "string" + }, + "cs": { + "type": "string" + }, + "vp": { + "type": "string" + }, + "mac": { + "type": "string" + }, + "pp_mix": { + "type": "string" + }, + "pp_max": { + "type": "string" + }, + "pp_miy": { + "type": "string" + }, + "pp_may": { + "type": "string" + }, + "ad_ba": { + "type": "string" + }, + "ad_ca": { + "type": "string" + }, + "ad_ad": { + "type": "string" + }, + "ad_uid": { + "type": "string" + }, + "tr_id": { + "type": "string" + }, + "tr_af": { + "type": "string" + }, + "tr_tt": { + "type": "string" + }, + "tr_tx": { + "type": "string" + }, + "tr_sh": { + "type": "string" + }, + "tr_ci": { + "type": "string" + }, + "tr_st": { + "type": "string" + }, + "tr_co": { + "type": "string" + }, + "tr_cu": { + "type": "string" + }, + "ti_id": { + "type": "string" + }, + "ti_sk": { + "type": "string" + }, + "ti_nm": { + "type": "string" + }, + "ti_na": { + "type": "string" + }, + "ti_ca": { + "type": "string" + }, + "ti_pr": { + "type": "string" + }, + "ti_qu": { + "type": "string" + }, + "ti_cu": { + "type": "string" + }, + "sa": { + "type": "string" + }, + "sn": { + "type": "string" + }, + "st": { + "type": "string" + }, + "sp": { + "type": "string" + }, + "se_ca": { + "type": "string" + }, + "se_ac": { + "type": "string" + }, + "se_la": { + "type": "string" + }, + "se_pr": { + "type": "string" + }, + "se_va": { + "type": "string" + }, + "ue_na": { + "type": "string" + }, + "ue_pr": { + "type": "string" + }, + "ue_px": { + "type": "string" + }, + "co": { + "type": "string" + }, + "cx": { + "type": "string" + } + }, + "required": ["tv", "p", "e"], + "additionalProperties": false + }, + "minItems": 1 +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 new file mode 100644 index 000000000..2e8cab60f --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4 @@ -0,0 +1,261 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Snowplow payload", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "payload_data", + "format": "jsonschema", + "version": "1-0-4" + }, + + "type": "array", + "items":{ + + "type": "object", + "properties": { + "tna": { + "type": "string" + }, + "aid": { + "type": "string" + }, + "p": { + "type": "string" + }, + "dtm": { + "type": "string" + }, + "tz": { + "type": "string" + }, + "e": { + "type": "string" + }, + "tid": { + "type": "string" + }, + "eid": { + "type": "string" + }, + "tv": { + "type": "string" + }, + "duid": { + "type": "string" + }, + "nuid": { + "type": "string" + }, + "uid": { + "type": "string" + }, + "vid": { + "type": "string" + }, + "ip": { + "type": "string" + }, + "res": { + "type": "string" + }, + "url": { + "type": "string" + }, + "page": { + "type": "string" + }, + "refr": { + "type": "string" + }, + "fp": { + "type": "string" + }, + "ctype": { + "type": "string" + }, + "cookie": { + "type": "string" + }, + "lang": { + "type": "string" + }, + "f_pdf": { + "type": "string" + }, + "f_qt": { + "type": "string" + }, + "f_realp": { + "type": "string" + }, + "f_wma": { + "type": "string" + }, + "f_dir": { + "type": "string" + }, + "f_fla": { + "type": "string" + }, + "f_java": { + "type": "string" + }, + "f_gears": { + "type": "string" + }, + "f_ag": { + "type": "string" + }, + "cd": { + "type": "string" + }, + "ds": { + "type": "string" + }, + "cs": { + "type": "string" + }, + "vp": { + "type": "string" + }, + "mac": { + "type": "string" + }, + "pp_mix": { + "type": "string" + }, + "pp_max": { + "type": "string" + }, + "pp_miy": { + "type": "string" + }, + "pp_may": { + "type": "string" + }, + "ad_ba": { + "type": "string" + }, + "ad_ca": { + "type": "string" + }, + "ad_ad": { + "type": "string" + }, + "ad_uid": { + "type": "string" + }, + "tr_id": { + "type": "string" + }, + "tr_af": { + "type": "string" + }, + "tr_tt": { + "type": "string" + }, + "tr_tx": { + "type": "string" + }, + "tr_sh": { + "type": "string" + }, + "tr_ci": { + "type": "string" + }, + "tr_st": { + "type": "string" + }, + "tr_co": { + "type": "string" + }, + "tr_cu": { + "type": "string" + }, + "ti_id": { + "type": "string" + }, + "ti_sk": { + "type": "string" + }, + "ti_nm": { + "type": "string" + }, + "ti_na": { + "type": "string" + }, + "ti_ca": { + "type": "string" + }, + "ti_pr": { + "type": "string" + }, + "ti_qu": { + "type": "string" + }, + "ti_cu": { + "type": "string" + }, + "sa": { + "type": "string" + }, + "sn": { + "type": "string" + }, + "st": { + "type": "string" + }, + "sp": { + "type": "string" + }, + "se_ca": { + "type": "string" + }, + "se_ac": { + "type": "string" + }, + "se_la": { + "type": "string" + }, + "se_pr": { + "type": "string" + }, + "se_va": { + "type": "string" + }, + "ue_na": { + "type": "string" + }, + "ue_pr": { + "type": "string" + }, + "ue_px": { + "type": "string" + }, + "co": { + "type": "string" + }, + "cx": { + "type": "string" + }, + "ua": { + "type": "string" + }, + "tnuid": { + "type": "string" + }, + "stm": { + "type": "string" + }, + "sid": { + "type": "string" + }, + "ttm": { + "type": "string" + } + }, + "required": ["tv", "p", "e"], + "additionalProperties": false + }, + "minItems": 1 +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 new file mode 100644 index 000000000..18e936687 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/referer_parser/jsonschema/2-0-0 @@ -0,0 +1,44 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for referer-parser customization enrichment", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "referer_parser", + "format": "jsonschema", + "version": "2-0-0" + }, + "type": "object", + "properties": { + "vendor": { + "type": "string" + }, + "name": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "parameters": { + "type": "object", + "properties": { + "internalDomains": { + "type": "array", + "items": { + "type": "string" + } + }, + "database": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["internalDomains", "database", "uri"], + "additionalProperties": false + } + }, + "required": ["name", "vendor", "enabled", "parameters"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 new file mode 100644 index 000000000..edccfa3dc --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0 @@ -0,0 +1,22 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a screen view event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "screen_view", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "id": { + "type": "string" + } + }, + "minProperties": 1, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 new file mode 100644 index 000000000..c64c8c538 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0 @@ -0,0 +1,25 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a Snowplow unstructured event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "unstruct_event", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + + "properties": { + + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + }, + + "data": {} + }, + + "required": ["schema", "data"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 new file mode 100644 index 000000000..a3b0ad550 --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0 @@ -0,0 +1,21 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a URI redirect through a Snowplow event collector", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "uri_redirect", + "format": "jsonschema", + "version": "1-0-0" + }, + + "type": "object", + "properties": { + "uri": { + "type": "string", + "format": "uri", + "maxLength": 8192 + } + }, + "required": ["uri"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 new file mode 100644 index 000000000..15db59eba --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.statusgator/status_change/jsonschema/1-0-0 @@ -0,0 +1,45 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a StatusGator status change event", + "self": { + "vendor": "com.statusgator", + "name": "status_change", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "serviceName": { + "type": "string", + "maxLength": 128 + }, + "faviconUrl": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "statusPageUrl": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "homePageUrl": { + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "currentStatus": { + "type": "string", + "maxLength": 128 + }, + "lastStatus": { + "type": "string", + "maxLength": 128 + }, + "occurredAt": { + "type": "string", + "format": "date-time" + } + }, + "additionalProperties": true +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 new file mode 100644 index 000000000..35306fabb --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/com.unbounce/form_post/jsonschema/1-0-0 @@ -0,0 +1,38 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for Unbounce (https://documentation.unbounce.com/hc/en-us/articles/203510044-Using-a-Webhook#content6)", + "self": { + "vendor": "com.unbounce", + "name": "form_post", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "pageId": { + "description": "The identifier Unbounce uses to uniquely identify your page. Eventually you’ll be able to use this with our API to manipulate and get information about your page. This is a 36-character UUID, for example, 'a2838d98-4cf4-11df-a3fd-00163e372d58'.", + "type": "string", + "maxLength": 36 + }, + "pageName": { + "description": "The name you gave your page, for example 'My Guaranteed to Convert Landing Page'.", + "type": "string" + }, + "variant": { + "description": "This identifies the page variant that the visitor saw when they visited your page, and will be a lower-case letter. The first variant is 'a', the next, 'b', and so on. If you have more than 26 variants the sequence will continue with 'aa', 'ab', etc.", + "type": "string", + "maxLength": 8 + }, + "pageUrl": { + "description": "The URL of the page that contains your form.", + "type": "string", + "format": "uri", + "maxLength": 8192 + }, + "data.json": { + "type": "object", + "additionalProperties": true + } + }, + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 new file mode 100644 index 000000000..b3bc226ee --- /dev/null +++ b/modules/common-fs2/src/test/resources/iglu-client-embedded/schemas/nl.basjes/yauaa_context/jsonschema/1-0-3 @@ -0,0 +1,231 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for a context generated by the YAUAA enrichment after parsing the user agent", + "self": { + "vendor": "nl.basjes", + "name": "yauaa_context", + "format": "jsonschema", + "version": "1-0-3" + }, + "type": "object", + "properties": { + "deviceClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Desktop", "Anonymized", "Unknown", "UNKNOWN", "Mobile", "Tablet", "Phone", "Watch", "Virtual Reality", "eReader", "Set-top box", "TV", "Game Console", "Handheld Game Console", "Voice", "Robot", "Robot Mobile", "Spy", "Hacker", "Augmented Reality", "Robot Imitator"] + }, + "deviceName": { + "description": "Example: Google Nexus 6", + "type": "string", + "maxLength": 100 + }, + "deviceBrand": { + "description": "Example: Google", + "type": "string", + "maxLength": 50 + }, + "deviceCpu": { + "type": "string", + "maxLength": 50 + }, + "deviceCpuBits": { + "type": "string", + "maxLength": 20 + }, + "deviceFirmwareVersion": { + "type": "string", + "maxLength": 100 + }, + "deviceVersion": { + "type": "string", + "maxLength": 100 + }, + "operatingSystemClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Desktop", "Mobile", "Cloud", "Embedded", "Game Console", "Hacker", "Anonymized", "Unknown"] + }, + "operatingSystemName": { + "description": "Examples: Linux, Android.", + "type": "string", + "maxLength": 100 + }, + "operatingSystemVersion": { + "type": "string", + "maxLength": 50 + }, + "operatingSystemNameVersion": { + "type": "string", + "maxLength": 150 + }, + "operatingSystemVersionBuild": { + "type": "string", + "maxLength": 100 + }, + "layoutEngineClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Browser", "Mobile App", "Hacker", "Robot", "Unknown", "Special", "Cloud", "eReader"] + }, + "layoutEngineName": { + "type": "string", + "maxLength": 100 + }, + "layoutEngineVersion": { + "type": "string", + "maxLength": 50 + }, + "layoutEngineVersionMajor": { + "type": "string", + "maxLength": 20 + }, + "layoutEngineNameVersion": { + "type": "string", + "maxLength": 150 + }, + "layoutEngineNameVersionMajor": { + "type": "string", + "maxLength": 120 + }, + "layoutEngineBuild": { + "type": "string", + "maxLength": 100 + }, + "agentClass": { + "description": "See https://yauaa.basjes.nl/README-Output.html", + "enum": ["Browser", "Browser Webview", "Mobile App", "Robot", "Robot Mobile", "Cloud Application", "Email Client", "Voice", "Special", "Testclient", "Hacker", "Unknown", "Desktop App", "eReader"] + }, + "agentName": { + "description": "Example: Chrome.", + "type": "string", + "maxLength": 100 + }, + "agentVersion": { + "type": "string", + "maxLength": 100 + }, + "agentVersionMajor": { + "type": "string", + "maxLength": 100 + }, + "agentNameVersion": { + "type": "string", + "maxLength": 200 + }, + "agentNameVersionMajor": { + "type": "string", + "maxLength": 120 + }, + "agentBuild": { + "type": "string", + "maxLength": 100 + }, + "agentLanguage": { + "type": "string", + "maxLength": 50 + }, + "agentLanguageCode": { + "type": "string", + "maxLength": 20 + }, + "agentInformationEmail": { + "type": "string", + "format": "email" + }, + "agentInformationUrl": { + "type": "string" + }, + "agentSecurity": { + "type": "string", + "enum": ["Weak security", "Strong security", "Unknown", "Hacker", "No security"] + }, + "agentUuid": { + "type": "string" + }, + "webviewAppName": { + "type": "string" + }, + "webviewAppVersion": { + "type": "string" + }, + "webviewAppVersionMajor": { + "type": "string", + "maxLength": 50 + }, + "webviewAppNameVersionMajor": { + "type": "string", + "maxLength": 50 + }, + "facebookCarrier": { + "type": "string" + }, + "facebookDeviceClass": { + "type": "string", + "maxLength": 1024 + }, + "facebookDeviceName": { + "type": "string", + "maxLength": 1024 + }, + "facebookDeviceVersion": { + "type": "string" + }, + "facebookFBOP": { + "type": "string" + }, + "facebookFBSS": { + "type": "string" + }, + "facebookOperatingSystemName": { + "type": "string" + }, + "facebookOperatingSystemVersion": { + "type": "string" + }, + "anonymized": { + "type": "string" + }, + "hackerAttackVector": { + "type": "string" + }, + "hackerToolkit": { + "type": "string" + }, + "koboAffiliate": { + "type": "string" + }, + "koboPlatformId": { + "type": "string" + }, + "iECompatibilityVersion": { + "type": "string", + "maxLength": 100 + }, + "iECompatibilityVersionMajor": { + "type": "string", + "maxLength": 50 + }, + "iECompatibilityNameVersion": { + "type": "string", + "maxLength": 50 + }, + "iECompatibilityNameVersionMajor": { + "type": "string", + "maxLength": 70 + }, + "carrier": { + "type": "string" + }, + "gSAInstallationID": { + "type": "string" + }, + "networkType": { + "type": "string" + }, + "operatingSystemNameVersionMajor": { + "type": "string" + }, + "operatingSystemVersionMajor": { + "type": "string" + } + }, + "required": ["deviceClass"], + "additionalProperties": false +} diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala index c043d107e..807c5c1e8 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/EnrichSpec.scala @@ -70,7 +70,7 @@ class EnrichSpec extends Specification with CatsIO with ScalaCheck { ) Enrich - .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor)( + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor, false, IO.unit)( EnrichSpec.payload ) .map(normalizeResult) @@ -85,7 +85,9 @@ class EnrichSpec extends Specification with CatsIO with ScalaCheck { prop { (collectorPayload: CollectorPayload) => val payload = collectorPayload.toRaw Enrich - .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor)(payload) + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], TestEnvironment.igluClient, None, EnrichSpec.processor, false, IO.unit)( + payload + ) .map(normalizeResult) .map { case List(Validated.Valid(e)) => e.event must beSome("page_view") diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala index 893c61661..363051ae2 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/BlackBoxTesting.scala @@ -48,7 +48,7 @@ import com.snowplowanalytics.snowplow.enrich.common.fs2.EnrichSpec object BlackBoxTesting extends Specification with CatsIO { val igluClient: Client[IO, Json] = - Client[IO, Json](Resolver(List(Registry.IgluCentral), None), CirceValidator) + Client[IO, Json](Resolver(List(Registry.EmbeddedRegistry), None), CirceValidator) private val serializer: TSerializer = new TSerializer() @@ -90,7 +90,7 @@ object BlackBoxTesting extends Specification with CatsIO { enrichmentConfig: Option[Json] = None ) = Enrich - .enrichWith(getEnrichmentRegistry(enrichmentConfig), igluClient, None, EnrichSpec.processor)( + .enrichWith(getEnrichmentRegistry(enrichmentConfig), igluClient, None, EnrichSpec.processor, false, IO.unit)( input ) .map { diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala index eaf46972d..ab832a9d2 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/adapters/Tp2AdapterSpec.scala @@ -35,7 +35,7 @@ class Tp2AdapterSpec extends Specification with CatsIO { contentType = "application/json".some ) Enrich - .enrichWith(TestEnvironment.enrichmentReg.pure[IO], BlackBoxTesting.igluClient, None, EnrichSpec.processor)( + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], BlackBoxTesting.igluClient, None, EnrichSpec.processor, false, IO.unit)( input ) .map { diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala index 441080a8a..aa2262884 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionItemSpec.scala @@ -47,7 +47,7 @@ class CurrencyConversionEnrichmentTransactionItemSpec extends Specification with "ti_sku" -> "PBZ1001", "ti_quantity" -> "2", "ti_category" -> "APPAREL", - "ti_price" -> "2000", + "ti_price" -> "2000.0", "ti_price_base" -> "2240.45", "ti_name" -> "Blue t-shirt", "collector_tstamp" -> "2019-07-01 19:23:03.000" diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala index 60aab5233..fc3076395 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/enrichments/CurrencyConversionEnrichmentTransactionSpec.scala @@ -44,15 +44,15 @@ class CurrencyConversionEnrichmentTransactionpec extends Specification with Cats "base_currency" -> "EUR", "tr_currency" -> "USD", "tr_affiliation" -> "pb", - "tr_total" -> "8000", + "tr_total" -> "8000.0", "tr_total_base" -> "7087.49", - "tr_tax" -> "200", + "tr_tax" -> "200.0", "tr_tax_base" -> "177.19", - "tr_shipping" -> "50", - "tr_shipping_base" -> "44.30", + "tr_shipping" -> "50.0", + "tr_shipping_base" -> "44.3", "tr_orderid" -> "order-123", "tr_state" -> "England", - "txn_id" -> "028288", + "txn_id" -> "28288", "tr_country" -> "UK", "tr_city" -> "London", "collector_tstamp" -> "2019-07-01 19:23:03.000" diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala index b1b53752b..4e616f7c2 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionItemSpec.scala @@ -40,7 +40,7 @@ class TransactionItemSpec extends Specification with CatsIO { "ti_quantity" -> "2", "ti_currency" -> "", "ti_category" -> "APPAREL", - "ti_price" -> "2000", + "ti_price" -> "2000.0", "ti_price_base" -> "" ) BlackBoxTesting.runTest(input, expected) diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala index 2625b919f..114e55a4b 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/blackbox/misc/TransactionSpec.scala @@ -38,15 +38,15 @@ class TransactionSpec extends Specification with CatsIO { "event_version" -> "1-0-0", "event" -> "transaction", "tr_affiliation" -> "pb", - "tr_total" -> "8000", + "tr_total" -> "8000.0", "tr_total_base" -> "", - "tr_tax" -> "200", + "tr_tax" -> "200.0", "tr_tax_base" -> "", - "tr_shipping" -> "50", + "tr_shipping" -> "50.0", "tr_shipping_base" -> "", "tr_orderid" -> "order-123", "tr_state" -> "England", - "txn_id" -> "028288", + "txn_id" -> "28288", "tr_country" -> "UK", "tr_city" -> "London", "contexts" -> json"""{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"http://snowplowanalytics.com/"}}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""".noSpaces diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala index ea6ebd3c5..75a192754 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ConfigFileSpec.scala @@ -64,6 +64,9 @@ class ConfigFileSpec extends Specification with CatsIO { Some("665bhft5u6udjf"), Some("enrich-kinesis-ce"), Some("1.0.0") + ), + io.FeatureFlags( + false ) ) ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beRight(expected)) @@ -158,6 +161,9 @@ class ConfigFileSpec extends Specification with CatsIO { Some("665bhft5u6udjf"), Some("enrich-kinesis-ce"), Some("1.0.0") + ), + io.FeatureFlags( + false ) ) ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beRight(expected)) @@ -208,6 +214,9 @@ class ConfigFileSpec extends Specification with CatsIO { "collectorUri": "collector-g.snowplowanalytics.com", "collectorPort": "443", "secure": true + }, + "featureFlags" : { + "acceptInvalid": false } }""" diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala index cfa620dd1..c0a24f2ab 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/config/ParsedConfigsSpec.scala @@ -65,6 +65,9 @@ class ParsedConfigsSpec extends Specification with CatsIO { Some("665bhft5u6udjf"), Some("enrich-kinesis-ce"), Some("1.0.0") + ), + io.FeatureFlags( + false ) ) diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporterSpec.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporterSpec.scala index 361b9e3d9..be6db020b 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporterSpec.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/io/StatsDReporterSpec.scala @@ -23,7 +23,7 @@ class StatsDReporterSpec extends Specification { "StatsDeporter" should { "serialize metrics" in { - val snapshot = Metrics.MetricSnapshot(Some(10000L), 10, 20, 30) + val snapshot = Metrics.MetricSnapshot(Some(10000L), 10, 20, 30, 0) val result = StatsDReporter.serializedMetrics(snapshot, TestConfig) @@ -32,13 +32,14 @@ class StatsDReporterSpec extends Specification { "snowplow.test.raw:10|c|#tag1:abc", "snowplow.test.good:20|c|#tag1:abc", "snowplow.test.bad:30|c|#tag1:abc", - "snowplow.test.latency:10000|g|#tag1:abc" + "snowplow.test.latency:10000|g|#tag1:abc", + "snowplow.test.invalid_enriched:0|c|#tag1:abc" ) ) } "serialize metrics when latency is empty" in { - val snapshot = Metrics.MetricSnapshot(None, 10, 20, 30) + val snapshot = Metrics.MetricSnapshot(None, 10, 20, 30, 40) val result = StatsDReporter.serializedMetrics(snapshot, TestConfig) @@ -46,7 +47,8 @@ class StatsDReporterSpec extends Specification { exactly( "snowplow.test.raw:10|c|#tag1:abc", "snowplow.test.good:20|c|#tag1:abc", - "snowplow.test.bad:30|c|#tag1:abc" + "snowplow.test.bad:30|c|#tag1:abc", + "snowplow.test.invalid_enriched:40|c|#tag1:abc" ) ) } diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/Counter.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/Counter.scala index 77d6c2655..ffdabca3e 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/Counter.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/Counter.scala @@ -29,11 +29,12 @@ case class Counter( raw: Long, good: Long, bad: Long, - latency: Option[Long] + latency: Option[Long], + invalid: Long ) object Counter { - val empty: Counter = Counter(0L, 0L, 0L, None) + val empty: Counter = Counter(0L, 0L, 0L, None, 0L) def make[F[_]: Sync]: F[Ref[F, Counter]] = Ref.of[F, Counter](empty) @@ -56,5 +57,8 @@ object Counter { def badCount: F[Unit] = ref.update(cnt => cnt.copy(bad = cnt.bad + 1)) + + def invalidCount: F[Unit] = + ref.update(cnt => cnt.copy(invalid = cnt.invalid + 1)) } } diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala deleted file mode 100644 index e89d47ce7..000000000 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/SchemaRegistry.scala +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. - * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the Apache License Version 2.0 is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.common.fs2.test - -import io.circe.Json -import io.circe.literal._ - -import com.snowplowanalytics.iglu.core.SelfDescribingSchema -import com.snowplowanalytics.iglu.core.circe.implicits._ - -/** - * In-memory test registry to avoid unnecessary HTTP and FS IO. All schemas used in [[TestEnvironment]] - * Iglu Client - */ -object SchemaRegistry { - val acmeTest: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.acme", - "name": "test", - "format": "jsonschema", - "version": "1-0-1" - }, - "properties": { - "path": { - "properties": { - "id": { - "type": "integer" - } - } - } - } - }""" - - val acmeOutput: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.acme", - "name": "output", - "format": "jsonschema", - "version": "1-0-0" - }, - "properties": { - "output": { - "type": "string" - } - } - }""" - - // Defined on Iglu Central - val unstructEvent: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.snowplowanalytics.snowplow", - "name": "unstruct_event", - "format": "jsonschema", - "version": "1-0-0" - }, - "type": "object", - "properties": { - "schema": { - "type": "string", - "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" - }, - "data": {} - }, - "required": ["schema", "data"], - "additionalProperties": false - }""" - - // Defined on Iglu Central - val contexts: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.snowplowanalytics.snowplow", - "name": "contexts", - "format": "jsonschema", - "version": "1-0-1" - }, - "type": "array", - "items": { - "type": "object", - "properties": { - "schema": { - "type": "string", - "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" - }, - "data": {} - }, - "required": ["schema", "data"], - "additionalProperties": false - } - }""" - - // Defined on Iglu Central - val geolocationContext: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.snowplowanalytics.snowplow", - "name": "geolocation_context", - "format": "jsonschema", - "version": "1-1-0" - }, - "type": "object", - "properties": { - "latitude": { "type": "number", "minimum": -90, "maximum": 90 }, - "longitude": { "type": "number", "minimum": -180, "maximum": 180 }, - "latitudeLongitudeAccuracy": { "type": ["number", "null"] }, - "altitude": { "type": ["number", "null"] }, - "altitudeAccuracy": { "type": ["number", "null"] }, - "bearing": { "type": ["number", "null"] }, - "speed": { "type": ["number", "null"] }, - "timestamp": { "type": ["integer", "null"] } - }, - "required": ["latitude", "longitude"], - "additionalProperties": false - }""" - - // Defined on Iglu Central - val iabAbdRobots: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "com.iab.snowplow", - "name": "spiders_and_robots", - "format": "jsonschema", - "version": "1-0-0" - }, - "type": "object", - "properties": { - "spiderOrRobot": {"type": "boolean" }, - "category": {"enum": ["SPIDER_OR_ROBOT", "ACTIVE_SPIDER_OR_ROBOT", "INACTIVE_SPIDER_OR_ROBOT", "BROWSER"]}, - "reason": {"enum": ["FAILED_IP_EXCLUDE", "FAILED_UA_INCLUDE", "FAILED_UA_EXCLUDE", "PASSED_ALL"]}, - "primaryImpact": {"enum": ["PAGE_IMPRESSIONS", "AD_IMPRESSIONS", "PAGE_AND_AD_IMPRESSIONS", "UNKNOWN", "NONE"]} - }, - "required": ["spiderOrRobot", "category", "reason", "primaryImpact"], - "additionalProperties": false - }""" - - val yauaaContext: SelfDescribingSchema[Json] = - json"""{ - "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", - "self": { - "vendor": "nl.basjes", - "name": "yauaa_context", - "format": "jsonschema", - "version": "1-0-3" - }, - "type": "object", - "properties": { - "deviceClass": {"enum":["Desktop","Anonymized","Unknown","UNKNOWN","Mobile","Tablet","Phone","Watch","Virtual Reality","eReader","Set-top box","TV","Game Console","Handheld Game Console","Voice","Robot","Robot Mobile","Spy","Hacker"]}, - "deviceName": {"type":"string","maxLength": 100 }, - "deviceBrand": {"type":"string","maxLength": 50 }, - "deviceCpu": {"type":"string","maxLength": 50 }, - "deviceCpuBits": {"type":"string","maxLength": 20 }, - "deviceFirmwareVersion": {"type":"string","maxLength": 100 }, - "deviceVersion": {"type":"string","maxLength": 100 }, - "operatingSystemClass": {"enum":["Desktop","Mobile","Cloud","Embedded","Game Console","Hacker","Anonymized","Unknown"] }, - "operatingSystemName": {"type":"string","maxLength": 100 }, - "operatingSystemVersion": {"type":"string","maxLength": 50 }, - "operatingSystemNameVersion": {"type":"string","maxLength": 150 }, - "operatingSystemVersionBuild": {"type":"string","maxLength": 100 }, - "layoutEngineClass": {"enum":["Browser", "Mobile App", "Hacker", "Robot", "Unknown"] }, - "layoutEngineName": {"type":"string","maxLength": 100 }, - "layoutEngineVersion": {"type":"string","maxLength": 50 }, - "layoutEngineVersionMajor": {"type":"string","maxLength": 20 }, - "layoutEngineNameVersion": {"type":"string","maxLength": 150 }, - "layoutEngineNameVersionMajor": {"type":"string","maxLength": 120 }, - "layoutEngineBuild": {"type":"string","maxLength": 100 }, - "agentClass": {"enum":["Browser", "Browser Webview", "Mobile App", "Robot", "Robot Mobile", "Cloud Application", "Email Client", "Voice", "Special", "Testclient", "Hacker", "Unknown"] }, - "agentName": {"type":"string","maxLength": 100 }, - "agentVersion": {"type":"string","maxLength": 100 }, - "agentVersionMajor": {"type":"string","maxLength": 100 }, - "agentNameVersion": {"type":"string","maxLength": 200 }, - "agentNameVersionMajor": {"type":"string","maxLength": 120 }, - "agentBuild": {"type":"string","maxLength": 100 }, - "agentLanguage": {"type":"string","maxLength": 50 }, - "agentLanguageCode": {"type":"string","maxLength": 20 }, - "agentInformationEmail": {"type":"string","format": "email" }, - "agentInformationUrl": {"type":"string"}, - "agentSecurity": {"type":"string","enum":["Weak security", "Strong security", "Unknown", "Hacker"] }, - "agentUuid": {"type":"string"}, - "webviewAppName": {"type":"string"}, - "webviewAppVersion": {"type":"string"}, - "webviewAppVersionMajor": {"type":"string","maxLength":50}, - "webviewAppNameVersionMajor": {"type":"string","maxLength":50}, - "facebookCarrier": {"type":"string"}, - "facebookDeviceClass": {"type":"string","maxLength":1024}, - "facebookDeviceName": {"type":"string","maxLength":1024}, - "facebookDeviceVersion": {"type":"string"}, - "facebookFBOP": {"type":"string"}, - "facebookFBSS": {"type":"string"}, - "facebookOperatingSystemName": {"type":"string"}, - "facebookOperatingSystemVersion": {"type":"string"}, - "anonymized": {"type":"string"}, - "hackerAttackVector": {"type":"string"}, - "hackerToolkit": {"type":"string"}, - "koboAffiliate": {"type":"string"}, - "koboPlatformId": {"type":"string"}, - "iECompatibilityVersion": {"type":"string","maxLength":100}, - "iECompatibilityVersionMajor": {"type":"string","maxLength":50}, - "iECompatibilityNameVersion": {"type":"string","maxLength":50}, - "iECompatibilityNameVersionMajor": {"type":"string","maxLength":70}, - "carrier": {"type":"string"}, - "gSAInstallationID": {"type":"string"}, - "networkType": {"type":"string"}, - "operatingSystemNameVersionMajor": {"type":"string"}, - "operatingSystemVersionMajor": {"type":"string"} - }, - "required": ["deviceClass"], - "additionalProperties": false - }""" - - private[test] implicit def jsonToSchema(json: Json): SelfDescribingSchema[Json] = - SelfDescribingSchema.parse(json).getOrElse(throw new IllegalStateException("InMemory SchemaRegistry JSON cannot be parsed as schema")) -} diff --git a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala index b6812d9b1..58d772400 100644 --- a/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala +++ b/modules/common-fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/common/fs2/test/TestEnvironment.scala @@ -109,19 +109,8 @@ object TestEnvironment extends CatsIO { val ioBlocker: Resource[IO, Blocker] = Blocker[IO] - val embeddedRegistry = - Registry.InMemory( - Registry.Config("snowplow-enrich-pubsub embedded test registry", 1, List("com.acme")), - List( - SchemaRegistry.unstructEvent, - SchemaRegistry.contexts, - SchemaRegistry.geolocationContext, - SchemaRegistry.iabAbdRobots, - SchemaRegistry.yauaaContext, - SchemaRegistry.acmeTest, - SchemaRegistry.acmeOutput - ) - ) + val embeddedRegistry = Registry.EmbeddedRegistry + val igluClient: Client[IO, Json] = Client[IO, Json](Resolver(List(embeddedRegistry), None), CirceValidator) @@ -166,15 +155,20 @@ object TestEnvironment extends CatsIO { EnrichSpec.processor, StreamsSettings(Concurrency(10000, 64), 1024 * 1024), None, - None + None, + true ) _ <- Resource.eval(logger.info("TestEnvironment initialized")) } yield TestEnvironment(environment, counter, goodRef.get, piiRef.get, badRef.get) - def parseBad(bytes: Array[Byte]): BadRow = - parser - .parse(new String(bytes, UTF_8)) - .getOrElse(throw new RuntimeException("Error parsing bad row json")) + def parseBad(bytes: Array[Byte]): BadRow = { + val badRowStr = new String(bytes, UTF_8) + val parsed = + parser + .parse(badRowStr) + .getOrElse(throw new RuntimeException(s"Error parsing bad row json: $badRowStr")) + parsed .as[BadRow] - .getOrElse(throw new RuntimeException("Error decoding bad row")) + .getOrElse(throw new RuntimeException(s"Error decoding bad row: $parsed")) + } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala index a3347a953..093d5f0e1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala @@ -46,6 +46,10 @@ object EtlPipeline { * @param processor The ETL application (Spark/Beam/Stream enrich) and its version * @param etlTstamp The ETL timestamp * @param input The ValidatedMaybeCanonicalInput + * @param acceptInvalid Whether enriched events that are invalid against + * atomic schema should be emitted as enriched events. + * If not they will be emitted as bad rows + * @param invalidCount Function to increment the count of invalid events * @return the ValidatedMaybeCanonicalOutput. Thanks to flatMap, will include any validation * errors contained within the ValidatedMaybeCanonicalInput */ @@ -55,7 +59,9 @@ object EtlPipeline { client: Client[F, Json], processor: Processor, etlTstamp: DateTime, - input: ValidatedNel[BadRow, Option[CollectorPayload]] + input: ValidatedNel[BadRow, Option[CollectorPayload]], + acceptInvalid: Boolean, + invalidCount: F[Unit] ): F[List[Validated[BadRow, EnrichedEvent]]] = input match { case Validated.Valid(Some(payload)) => @@ -70,7 +76,9 @@ object EtlPipeline { client, processor, etlTstamp, - event + event, + acceptInvalid, + invalidCount ) .toValidated } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala index 832fc134d..f3cdf264b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala @@ -16,26 +16,25 @@ package enrichments import java.nio.charset.Charset import java.net.URI import java.time.Instant - import org.joda.time.DateTime - +import org.slf4j.LoggerFactory import io.circe.Json - import cats.Monad import cats.data.{EitherT, NonEmptyList, OptionT, ValidatedNel} import cats.effect.Clock import cats.implicits._ +import com.snowplowanalytics.refererparser._ + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.iglu.core.SelfDescribingData +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Payload, Processor} - -import com.snowplowanalytics.refererparser._ +import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailure import adapters.RawEvent import enrichments.{EventEnrichments => EE} @@ -50,6 +49,10 @@ import utils.{IgluUtils, ConversionUtils => CU} object EnrichmentManager { + val atomicSchema: SchemaKey = SchemaKey("com.snowplowanalytics.snowplow", "atomic", "jsonschema", SchemaVer.Full(1, 0, 0)) + + private val logger = LoggerFactory.getLogger("InvalidEnriched") + /** * Run the enrichment workflow * @param registry Contain configuration for all enrichments to apply @@ -57,6 +60,10 @@ object EnrichmentManager { * @param processor Meta information about processing asset, for bad rows * @param etlTstamp ETL timestamp * @param raw Canonical input event to enrich + * @param acceptInvalid Whether enriched events that are invalid against + * atomic schema should be emitted as enriched events. + * If not they will be emitted as bad rows + * @param invalidCount Function to increment the count of invalid events * @return Enriched event or bad row if a problem occured */ def enrichEvent[F[_]: Monad: RegistryLookup: Clock]( @@ -64,7 +71,9 @@ object EnrichmentManager { client: Client[F, Json], processor: Processor, etlTstamp: DateTime, - raw: RawEvent + raw: RawEvent, + acceptInvalid: Boolean, + invalidCount: F[Unit] ): EitherT[F, BadRow, EnrichedEvent] = for { enriched <- EitherT.fromEither[F](setupEnrichedEvent(raw, etlTstamp, processor)) @@ -92,6 +101,7 @@ object EnrichmentManager { enriched.pii = pii.asString } } + _ <- validateEnriched(enriched, raw, processor, client, acceptInvalid, invalidCount) } yield enriched /** @@ -497,14 +507,13 @@ object EnrichmentManager { currencyConversion match { case Some(currency) => event.base_currency = currency.baseCurrency.getCode - // Note that stringToMaybeDouble is applied to either-valid-or-null event POJO + // Note that jFloatToDouble is applied to either-valid-or-null event POJO // properties, so we don't expect any of these four vals to be a Failure - val trTax = CU.stringToMaybeDouble("tr_tx", event.tr_tax).toValidatedNel - val tiPrice = CU.stringToMaybeDouble("ti_pr", event.ti_price).toValidatedNel - val trTotal = CU.stringToMaybeDouble("tr_tt", event.tr_total).toValidatedNel - val trShipping = CU.stringToMaybeDouble("tr_sh", event.tr_shipping).toValidatedNel + val trTax = CU.jFloatToDouble("tr_tx", event.tr_tax).toValidatedNel + val tiPrice = CU.jFloatToDouble("ti_pr", event.ti_price).toValidatedNel + val trTotal = CU.jFloatToDouble("tr_tt", event.tr_total).toValidatedNel + val trShipping = CU.jFloatToDouble("tr_sh", event.tr_shipping).toValidatedNel (for { - // better-monadic-for convertedCu <- EitherT( (trTotal, trTax, trShipping, tiPrice) .mapN { @@ -522,12 +531,14 @@ object EnrichmentManager { .sequence .map(_.flatMap(_.toEither)) ) - _ = { - event.tr_total_base = convertedCu._1.orNull - event.tr_tax_base = convertedCu._2.orNull - event.tr_shipping_base = convertedCu._3.orNull - event.ti_price_base = convertedCu._4.orNull - } + trTotalBase <- EitherT.fromEither[F](CU.doubleToJFloat("tr_total_base ", convertedCu._1).leftMap(e => NonEmptyList.one(e))) + _ = trTotalBase.map(t => event.tr_total_base = t) + trTaxBase <- EitherT.fromEither[F](CU.doubleToJFloat("tr_tax_base ", convertedCu._2).leftMap(e => NonEmptyList.one(e))) + _ = trTaxBase.map(t => event.tr_tax_base = t) + trShippingBase <- EitherT.fromEither[F](CU.doubleToJFloat("tr_shipping_base ", convertedCu._3).leftMap(e => NonEmptyList.one(e))) + _ = trShippingBase.map(t => event.tr_shipping_base = t) + tiPriceBase <- EitherT.fromEither[F](CU.doubleToJFloat("ti_price_base ", convertedCu._4).leftMap(e => NonEmptyList.one(e))) + _ = tiPriceBase.map(t => event.ti_price_base = t) } yield ()).value case None => Monad[F].pure(().asRight) } @@ -746,4 +757,73 @@ object EnrichmentManager { Failure.EnrichmentFailures(Instant.now(), fs), Payload.EnrichmentPayload(pee, re) ) + + /** + * Validates enriched events against atomic schema. + * For now it's possible to accept enriched events that are not valid. + * See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 + */ + private def validateEnriched[F[_]: Clock: Monad: RegistryLookup]( + enriched: EnrichedEvent, + raw: RawEvent, + processor: Processor, + client: Client[F, Json], + acceptInvalid: Boolean, + invalidCount: F[Unit] + ): EitherT[F, BadRow, Unit] = + EitherT( + for { + validated <- EnrichedEvent + .toAtomic(enriched) + .leftMap(err => + EnrichmentManager.buildEnrichmentFailuresBadRow( + NonEmptyList( + EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.Simple( + "Error during conversion of enriched event to the atomic format" + ) + ), + List(EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.Simple(err.toString))) + ), + EnrichedEvent.toPartiallyEnrichedEvent(enriched), + RawEvent.toRawEvent(raw), + processor + ) + ) + .toEitherT[F] + .flatMap(atomic => + client + .check(SelfDescribingData(atomicSchema, atomic)) + .leftMap(err => + EnrichmentManager.buildEnrichmentFailuresBadRow( + NonEmptyList( + EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.Simple( + s"Enriched event not valid against ${atomicSchema.toSchemaUri}" + ) + ), + List(EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.IgluError(atomicSchema, err))) + ), + EnrichedEvent.toPartiallyEnrichedEvent(enriched), + RawEvent.toRawEvent(raw), + processor + ) + ) + ) + .value + validation <- validated match { + case Left(br) if !acceptInvalid => + Monad[F].pure(Left(br)) + case Left(br) => + for { + _ <- invalidCount + _ <- Monad[F].pure(logger.debug(s"Enriched event not valid against atomic schema. Bad row: ${br.compact}")) + } yield Right(()) + case _ => + Monad[F].pure(Right(())) + } + } yield validation + ) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala index 645ea51fe..3c1596519 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala @@ -51,7 +51,7 @@ object Transform { ("ip", (ME.extractIp, "user_ipaddress")), ("aid", (ME.toTsvSafe, "app_id")), ("p", (ME.extractPlatform, "platform")), - ("tid", (CU.validateInteger, "txn_id")), + ("tid", (CU.stringToJInteger2, "txn_id")), ("uid", (ME.toTsvSafe, "user_id")), ("duid", (ME.toTsvSafe, "domain_userid")), ("nuid", (ME.toTsvSafe, "network_userid")), @@ -94,21 +94,21 @@ object Transform { ("ev_ac", (ME.toTsvSafe, "se_action")), // LEGACY tracker var. Leave for backwards compat ("ev_la", (ME.toTsvSafe, "se_label")), // LEGACY tracker var. Leave for backwards compat ("ev_pr", (ME.toTsvSafe, "se_property")), // LEGACY tracker var. Leave for backwards compat - ("ev_va", (CU.stringToDoubleLike, "se_value")), // LEGACY tracker var. Leave for backwards compat + ("ev_va", (CU.stringToJFloat2, "se_value")), // LEGACY tracker var. Leave for backwards compat ("se_ca", (ME.toTsvSafe, "se_category")), ("se_ac", (ME.toTsvSafe, "se_action")), ("se_la", (ME.toTsvSafe, "se_label")), ("se_pr", (ME.toTsvSafe, "se_property")), - ("se_va", (CU.stringToDoubleLike, "se_value")), + ("se_va", (CU.stringToJFloat2, "se_value")), // Custom unstructured events ("ue_pr", (JU.extractUnencJson, "unstruct_event")), ("ue_px", (JU.extractBase64EncJson, "unstruct_event")), // Ecommerce transactions ("tr_id", (ME.toTsvSafe, "tr_orderid")), ("tr_af", (ME.toTsvSafe, "tr_affiliation")), - ("tr_tt", (CU.stringToDoubleLike, "tr_total")), - ("tr_tx", (CU.stringToDoubleLike, "tr_tax")), - ("tr_sh", (CU.stringToDoubleLike, "tr_shipping")), + ("tr_tt", (CU.stringToJFloat2, "tr_total")), + ("tr_tx", (CU.stringToJFloat2, "tr_tax")), + ("tr_sh", (CU.stringToJFloat2, "tr_shipping")), ("tr_ci", (ME.toTsvSafe, "tr_city")), ("tr_st", (ME.toTsvSafe, "tr_state")), ("tr_co", (ME.toTsvSafe, "tr_country")), @@ -118,7 +118,7 @@ object Transform { ("ti_na", (ME.toTsvSafe, "ti_name")), // ERROR in Tracker Protocol ("ti_nm", (ME.toTsvSafe, "ti_name")), ("ti_ca", (ME.toTsvSafe, "ti_category")), - ("ti_pr", (CU.stringToDoubleLike, "ti_price")), + ("ti_pr", (CU.stringToJFloat2, "ti_price")), ("ti_qu", (CU.stringToJInteger2, "ti_quantity")), // Page pings ("pp_mix", (CU.stringToJInteger2, "pp_xoffset_min")), diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala index c0e7ff011..c2727ca10 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala @@ -117,7 +117,7 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( initialCurrency: Option[Either[FailureDetails.EnrichmentFailure, CurrencyUnit]], value: Option[Double], tstamp: ZonedDateTime - ): F[Either[FailureDetails.EnrichmentFailure, Option[String]]] = + ): F[Either[FailureDetails.EnrichmentFailure, Option[Double]]] = (initialCurrency, value) match { case (Some(ic), Some(v)) => (for { @@ -131,8 +131,14 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( money.map( _.bimap( l => mkEnrichmentFailure(Right(l)), - r => (r.getAmount().toPlainString()).some - ) + r => + Either.catchNonFatal(r.getAmount().doubleValue) match { + case Left(e) => + Left(mkEnrichmentFailure(Left(e))) + case Right(a) => + Right(a.some) + } + ).flatten ) ) } yield res).value @@ -160,7 +166,7 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( collectorTstamp: Option[DateTime] ): F[ValidatedNel[ FailureDetails.EnrichmentFailure, - (Option[String], Option[String], Option[String], Option[String]) + (Option[Double], Option[Double], Option[Double], Option[Double]) ]] = collectorTstamp match { case Some(tstamp) => diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala index c60574b6c..aa4db0c56 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/outputs/EnrichedEvent.scala @@ -2,7 +2,7 @@ * Copyright (c) 2012-2022 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, - * and you may not use this file except in compliance with the Apache License Version 2.0. + * and you may not use enriched file except in compliance with the Apache License Version 2.0. * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, @@ -15,9 +15,14 @@ package com.snowplowanalytics.snowplow.enrich.common.outputs import java.lang.{Integer => JInteger} import java.lang.{Float => JFloat} import java.lang.{Byte => JByte} +import java.time.format.DateTimeFormatter import scala.beans.BeanProperty +import cats.implicits._ + +import io.circe.Json + import com.snowplowanalytics.snowplow.badrows.Payload.PartiallyEnrichedEvent /** @@ -37,7 +42,7 @@ import com.snowplowanalytics.snowplow.badrows.Payload.PartiallyEnrichedEvent // TODO: make the EnrichedEvent Avro-format, not Redshift-specific class EnrichedEvent extends Serializable { - // The application (site, game, app etc) this event belongs to, and the tracker platform + // The application (site, game, app etc) enriched event belongs to, and the tracker platform @BeanProperty var app_id: String = _ @BeanProperty var platform: String = _ @@ -46,10 +51,10 @@ class EnrichedEvent extends Serializable { @BeanProperty var collector_tstamp: String = _ @BeanProperty var dvce_created_tstamp: String = _ - // Transaction (i.e. this logging event) + // Transaction (i.e. enriched logging event) @BeanProperty var event: String = _ @BeanProperty var event_id: String = _ - @BeanProperty var txn_id: String = _ + @BeanProperty var txn_id: JInteger = _ // Versioning @BeanProperty var name_tracker: String = _ @@ -121,8 +126,7 @@ class EnrichedEvent extends Serializable { @BeanProperty var se_action: String = _ @BeanProperty var se_label: String = _ @BeanProperty var se_property: String = _ - @BeanProperty var se_value: String = - _ // Technically should be a Double but may be rendered incorrectly by Cascading with scientific notification (which Redshift can't process) + @BeanProperty var se_value: JFloat = _ // Unstructured Event @BeanProperty var unstruct_event: String = _ @@ -130,9 +134,9 @@ class EnrichedEvent extends Serializable { // Ecommerce transaction (from querystring) @BeanProperty var tr_orderid: String = _ @BeanProperty var tr_affiliation: String = _ - @BeanProperty var tr_total: String = _ - @BeanProperty var tr_tax: String = _ - @BeanProperty var tr_shipping: String = _ + @BeanProperty var tr_total: JFloat = _ + @BeanProperty var tr_tax: JFloat = _ + @BeanProperty var tr_shipping: JFloat = _ @BeanProperty var tr_city: String = _ @BeanProperty var tr_state: String = _ @BeanProperty var tr_country: String = _ @@ -142,7 +146,7 @@ class EnrichedEvent extends Serializable { @BeanProperty var ti_sku: String = _ @BeanProperty var ti_name: String = _ @BeanProperty var ti_category: String = _ - @BeanProperty var ti_price: String = _ + @BeanProperty var ti_price: JFloat = _ @BeanProperty var ti_quantity: JInteger = _ // Page Pings @@ -199,11 +203,11 @@ class EnrichedEvent extends Serializable { // Currency @BeanProperty var tr_currency: String = _ - @BeanProperty var tr_total_base: String = _ - @BeanProperty var tr_tax_base: String = _ - @BeanProperty var tr_shipping_base: String = _ + @BeanProperty var tr_total_base: JFloat = _ + @BeanProperty var tr_tax_base: JFloat = _ + @BeanProperty var tr_shipping_base: JFloat = _ @BeanProperty var ti_currency: String = _ - @BeanProperty var ti_price_base: String = _ + @BeanProperty var ti_price_base: JFloat = _ @BeanProperty var base_currency: String = _ // Geolocation @@ -249,6 +253,162 @@ class EnrichedEvent extends Serializable { } object EnrichedEvent { + + private val JsonSchemaDateTimeFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS") + + private def toKv[T]( + k: String, + v: T, + f: T => Json + ): Option[(String, Json)] = + Option(v).map(value => (k, f(value))) + + private def toKv(k: String, s: String): Option[(String, Json)] = toKv(k, s, Json.fromString) + private def toKv(k: String, i: JInteger): Option[(String, Json)] = toKv(k, i, (jInt: JInteger) => Json.fromInt(jInt)) + private def toKv(k: String, f: JFloat): Option[(String, Json)] = toKv(k, f, (jFloat: JFloat) => Json.fromFloatOrNull(jFloat)) + private def toKv(k: String, b: JByte): Option[(String, Json)] = toKv(k, b, (jByte: JByte) => Json.fromBoolean(jByte != 0)) + private def toDateKv(k: String, s: String): Option[(String, Json)] = + toKv( + k, + s, + (s: String) => Json.fromString(DateTimeFormatter.ISO_DATE_TIME.format(JsonSchemaDateTimeFormat.parse(s))) + ) + + def toAtomic(enriched: EnrichedEvent): Either[Throwable, Json] = + Either.catchNonFatal( + Json.fromFields( + toKv("app_id", enriched.app_id) ++ + toKv("platform", enriched.platform) ++ + toDateKv("etl_tstamp", enriched.etl_tstamp) ++ + toDateKv("collector_tstamp", enriched.collector_tstamp) ++ + toDateKv("dvce_created_tstamp", enriched.dvce_created_tstamp) ++ + toKv("event", enriched.event) ++ + toKv("event_id", enriched.event_id) ++ + toKv("txn_id", enriched.txn_id) ++ + toKv("name_tracker", enriched.name_tracker) ++ + toKv("v_tracker", enriched.v_tracker) ++ + toKv("v_collector", enriched.v_collector) ++ + toKv("v_etl", enriched.v_etl) ++ + toKv("user_id", enriched.user_id) ++ + toKv("user_ipaddress", enriched.user_ipaddress) ++ + toKv("user_fingerprint", enriched.user_fingerprint) ++ + toKv("domain_userid", enriched.domain_userid) ++ + toKv("domain_sessionidx", enriched.domain_sessionidx) ++ + toKv("network_userid", enriched.network_userid) ++ + toKv("geo_country", enriched.geo_country) ++ + toKv("geo_region", enriched.geo_region) ++ + toKv("geo_city", enriched.geo_city) ++ + toKv("geo_zipcode", enriched.geo_zipcode) ++ + toKv("geo_latitude", enriched.geo_latitude) ++ + toKv("geo_longitude", enriched.geo_longitude) ++ + toKv("geo_region_name", enriched.geo_region_name) ++ + toKv("ip_isp", enriched.ip_isp) ++ + toKv("ip_organization", enriched.ip_organization) ++ + toKv("ip_domain", enriched.ip_domain) ++ + toKv("ip_netspeed", enriched.ip_netspeed) ++ + toKv("page_url", enriched.page_url) ++ + toKv("page_title", enriched.page_title) ++ + toKv("page_referrer", enriched.page_referrer) ++ + toKv("page_urlscheme", enriched.page_urlscheme) ++ + toKv("page_urlhost", enriched.page_urlhost) ++ + toKv("page_urlport", enriched.page_urlport) ++ + toKv("page_urlpath", enriched.page_urlpath) ++ + toKv("page_urlquery", enriched.page_urlquery) ++ + toKv("page_urlfragment", enriched.page_urlfragment) ++ + toKv("refr_urlscheme", enriched.refr_urlscheme) ++ + toKv("refr_urlhost", enriched.refr_urlhost) ++ + toKv("refr_urlport", enriched.refr_urlport) ++ + toKv("refr_urlpath", enriched.refr_urlpath) ++ + toKv("refr_urlquery", enriched.refr_urlquery) ++ + toKv("refr_urlfragment", enriched.refr_urlfragment) ++ + toKv("refr_medium", enriched.refr_medium) ++ + toKv("refr_source", enriched.refr_source) ++ + toKv("refr_term", enriched.refr_term) ++ + toKv("mkt_medium", enriched.mkt_medium) ++ + toKv("mkt_source", enriched.mkt_source) ++ + toKv("mkt_term", enriched.mkt_term) ++ + toKv("mkt_content", enriched.mkt_content) ++ + toKv("mkt_campaign", enriched.mkt_campaign) ++ + toKv("se_category", enriched.se_category) ++ + toKv("se_action", enriched.se_action) ++ + toKv("se_label", enriched.se_label) ++ + toKv("se_property", enriched.se_property) ++ + toKv("se_value", enriched.se_value) ++ + toKv("tr_orderid", enriched.tr_orderid) ++ + toKv("tr_affiliation", enriched.tr_affiliation) ++ + toKv("tr_total", enriched.tr_total) ++ + toKv("tr_tax", enriched.tr_tax) ++ + toKv("tr_shipping", enriched.tr_shipping) ++ + toKv("tr_city", enriched.tr_city) ++ + toKv("tr_state", enriched.tr_state) ++ + toKv("tr_country", enriched.tr_country) ++ + toKv("ti_orderid", enriched.ti_orderid) ++ + toKv("ti_sku", enriched.ti_sku) ++ + toKv("ti_name", enriched.ti_name) ++ + toKv("ti_category", enriched.ti_category) ++ + toKv("ti_price", enriched.ti_price) ++ + toKv("ti_quantity", enriched.ti_quantity) ++ + toKv("pp_xoffset_min", enriched.pp_xoffset_min) ++ + toKv("pp_xoffset_max", enriched.pp_xoffset_max) ++ + toKv("pp_yoffset_min", enriched.pp_yoffset_min) ++ + toKv("pp_yoffset_max", enriched.pp_yoffset_max) ++ + toKv("useragent", enriched.useragent) ++ + toKv("br_name", enriched.br_name) ++ + toKv("br_family", enriched.br_family) ++ + toKv("br_version", enriched.br_version) ++ + toKv("br_type", enriched.br_type) ++ + toKv("br_renderengine", enriched.br_renderengine) ++ + toKv("br_lang", enriched.br_lang) ++ + toKv("br_features_pdf", enriched.br_features_pdf) ++ + toKv("br_features_flash", enriched.br_features_flash) ++ + toKv("br_features_java", enriched.br_features_java) ++ + toKv("br_features_director", enriched.br_features_director) ++ + toKv("br_features_quicktime", enriched.br_features_quicktime) ++ + toKv("br_features_realplayer", enriched.br_features_realplayer) ++ + toKv("br_features_windowsmedia", enriched.br_features_windowsmedia) ++ + toKv("br_features_gears", enriched.br_features_gears) ++ + toKv("br_features_silverlight", enriched.br_features_silverlight) ++ + toKv("br_cookies", enriched.br_cookies) ++ + toKv("br_colordepth", enriched.br_colordepth) ++ + toKv("br_viewwidth", enriched.br_viewwidth) ++ + toKv("br_viewheight", enriched.br_viewheight) ++ + toKv("os_name", enriched.os_name) ++ + toKv("os_family", enriched.os_family) ++ + toKv("os_manufacturer", enriched.os_manufacturer) ++ + toKv("os_timezone", enriched.os_timezone) ++ + toKv("dvce_type", enriched.dvce_type) ++ + toKv("dvce_ismobile", enriched.dvce_ismobile) ++ + toKv("dvce_screenwidth", enriched.dvce_screenwidth) ++ + toKv("dvce_screenheight", enriched.dvce_screenheight) ++ + toKv("doc_charset", enriched.doc_charset) ++ + toKv("doc_width", enriched.doc_width) ++ + toKv("doc_height", enriched.doc_height) ++ + toKv("tr_currency", enriched.tr_currency) ++ + toKv("tr_total_base", enriched.tr_total_base) ++ + toKv("tr_tax_base", enriched.tr_tax_base) ++ + toKv("tr_shipping_base", enriched.tr_shipping_base) ++ + toKv("ti_currency", enriched.ti_currency) ++ + toKv("ti_price_base", enriched.ti_price_base) ++ + toKv("base_currency", enriched.base_currency) ++ + toKv("geo_timezone", enriched.geo_timezone) ++ + toKv("mkt_clickid", enriched.mkt_clickid) ++ + toKv("mkt_network", enriched.mkt_network) ++ + toKv("etl_tags", enriched.etl_tags) ++ + toDateKv("dvce_sent_tstamp", enriched.dvce_sent_tstamp) ++ + toKv("refr_domain_userid", enriched.refr_domain_userid) ++ + toDateKv("refr_dvce_tstamp", enriched.refr_dvce_tstamp) ++ + toKv("domain_sessionid", enriched.domain_sessionid) ++ + toDateKv("derived_tstamp", enriched.derived_tstamp) ++ + toKv("event_vendor", enriched.event_vendor) ++ + toKv("event_name", enriched.event_name) ++ + toKv("event_format", enriched.event_format) ++ + toKv("event_version", enriched.event_version) ++ + toKv("event_fingerprint", enriched.event_fingerprint) ++ + toDateKv("true_tstamp", enriched.true_tstamp) + ) + ) + def toPartiallyEnrichedEvent(enrichedEvent: EnrichedEvent): PartiallyEnrichedEvent = PartiallyEnrichedEvent( app_id = Option(enrichedEvent.app_id), @@ -258,7 +418,7 @@ object EnrichedEvent { dvce_created_tstamp = Option(enrichedEvent.dvce_created_tstamp), event = Option(enrichedEvent.event), event_id = Option(enrichedEvent.event_id), - txn_id = Option(enrichedEvent.txn_id), + txn_id = Option(enrichedEvent.txn_id).map(_.toString), name_tracker = Option(enrichedEvent.name_tracker), v_tracker = Option(enrichedEvent.v_tracker), v_collector = Option(enrichedEvent.v_collector), @@ -308,13 +468,13 @@ object EnrichedEvent { se_action = Option(enrichedEvent.se_action), se_label = Option(enrichedEvent.se_label), se_property = Option(enrichedEvent.se_property), - se_value = Option(enrichedEvent.se_value), + se_value = Option(enrichedEvent.se_value).map(_.toString), unstruct_event = Option(enrichedEvent.unstruct_event), tr_orderid = Option(enrichedEvent.tr_orderid), tr_affiliation = Option(enrichedEvent.tr_affiliation), - tr_total = Option(enrichedEvent.tr_total), - tr_tax = Option(enrichedEvent.tr_tax), - tr_shipping = Option(enrichedEvent.tr_shipping), + tr_total = Option(enrichedEvent.tr_total).map(_.toString), + tr_tax = Option(enrichedEvent.tr_tax).map(_.toString), + tr_shipping = Option(enrichedEvent.tr_shipping).map(_.toString), tr_city = Option(enrichedEvent.tr_city), tr_state = Option(enrichedEvent.tr_state), tr_country = Option(enrichedEvent.tr_country), @@ -322,7 +482,7 @@ object EnrichedEvent { ti_sku = Option(enrichedEvent.ti_sku), ti_name = Option(enrichedEvent.ti_name), ti_category = Option(enrichedEvent.ti_category), - ti_price = Option(enrichedEvent.ti_price), + ti_price = Option(enrichedEvent.ti_price).map(_.toString), ti_quantity = Option(enrichedEvent.ti_quantity).map(Integer2int), pp_xoffset_min = Option(enrichedEvent.pp_xoffset_min).map(Integer2int), pp_xoffset_max = Option(enrichedEvent.pp_xoffset_max).map(Integer2int), @@ -360,11 +520,11 @@ object EnrichedEvent { doc_width = Option(enrichedEvent.doc_width).map(Integer2int), doc_height = Option(enrichedEvent.doc_height).map(Integer2int), tr_currency = Option(enrichedEvent.tr_currency), - tr_total_base = Option(enrichedEvent.tr_total_base), - tr_tax_base = Option(enrichedEvent.tr_tax_base), - tr_shipping_base = Option(enrichedEvent.tr_shipping_base), + tr_total_base = Option(enrichedEvent.tr_total_base).map(_.toString), + tr_tax_base = Option(enrichedEvent.tr_tax_base).map(_.toString), + tr_shipping_base = Option(enrichedEvent.tr_shipping_base).map(_.toString), ti_currency = Option(enrichedEvent.ti_currency), - ti_price_base = Option(enrichedEvent.ti_price_base), + ti_price_base = Option(enrichedEvent.ti_price_base).map(_.toString), base_currency = Option(enrichedEvent.base_currency), geo_timezone = Option(enrichedEvent.geo_timezone), mkt_clickid = Option(enrichedEvent.mkt_clickid), diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala index 30cb4cf11..5079c6d8b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala @@ -13,13 +13,14 @@ package com.snowplowanalytics.snowplow.enrich.common package utils -import java.lang.{Byte => JByte, Integer => JInteger} +import java.lang.{Byte => JByte, Float => JFloat, Integer => JInteger} import java.lang.reflect.Field import java.math.{BigDecimal => JBigDecimal} import java.net.{InetAddress, URI, URLDecoder, URLEncoder} import java.nio.charset.Charset import java.nio.charset.StandardCharsets.UTF_8 import java.util.UUID +import java.io.{PrintWriter, StringWriter} import scala.collection.JavaConverters._ import scala.util.Try @@ -340,6 +341,25 @@ object ConversionUtils { FailureDetails.EnrichmentFailure(None, f) } + val stringToJFloat: String => Either[String, JFloat] = str => + if (Option(str).isEmpty) + null.asInstanceOf[JFloat].asRight + else + Either + .catchNonFatal(JFloat.valueOf(str)) + .leftMap(e => s"cannot be converted to java.lang.Float. Error : ${e.getMessage}") + + val stringToJFloat2: (String, String) => Either[FailureDetails.EnrichmentFailure, JFloat] = + (field, str) => + stringToJFloat(str).leftMap { e => + val f = FailureDetails.EnrichmentFailureMessage.InputData( + field, + Option(str), + e + ) + FailureDetails.EnrichmentFailure(None, f) + } + /** * Convert a String to a String containing a Redshift-compatible Double. * Necessary because Redshift does not support all Java Double syntaxes e.g. "3.4028235E38" @@ -398,6 +418,40 @@ object ConversionUtils { ) ) + /** Convert a java Float a Double */ + def jFloatToDouble(field: String, f: JFloat): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + Either + .catchNonFatal { + Option(f).map(_.toDouble) + } + .leftMap(_ => + FailureDetails.EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.InputData( + field, + Option(f).map(_.toString), + "cannot be converted to Double" + ) + ) + ) + + /** Convert a Double to a java Float */ + def doubleToJFloat(field: String, d: Option[Double]): Either[FailureDetails.EnrichmentFailure, Option[JFloat]] = + Either + .catchNonFatal { + d.map(dd => JFloat.valueOf(dd.toFloat)) + } + .leftMap(_ => + FailureDetails.EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.InputData( + field, + d.map(_.toString), + "cannot be converted to java Float" + ) + ) + ) + /** * Converts a String to a Double with two decimal places. Used to honor schemas with * multipleOf 0.01. @@ -537,4 +591,9 @@ object ConversionUtils { } .mkString("\t") + def cleanStackTrace(t: Throwable): String = { + val sw = new StringWriter + t.printStackTrace(new PrintWriter(sw)) + sw.toString + } } diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 new file mode 100644 index 000000000..0b7ff5790 --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.snowplowanalytics.snowplow/atomic/jsonschema/1-0-0 @@ -0,0 +1,489 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for an atomic canonical Snowplow event", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "atomic", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "app_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "platform": { + "type": ["string", "null"], + "maxLength": 255 + }, + "etl_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "collector_tstamp": { + "type": "string", + "format": "date-time" + }, + "dvce_created_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_id": { + "type": "string", + "maxLength": 36 + }, + "txn_id": { + "type": ["integer", "null"] + }, + "name_tracker": { + "type": ["string", "null"], + "maxLength": 128 + }, + "v_tracker": { + "type": ["string", "null"], + "maxLength": 100 + }, + "v_collector": { + "type": "string", + "maxLength": 100 + }, + "v_etl": { + "type": "string", + "maxLength": 100 + }, + "user_id": { + "type": ["string", "null"], + "maxLength": 255 + }, + "user_ipaddress": { + "type": ["string", "null"], + "maxLength": 128 + }, + "user_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "domain_sessionidx": { + "type": ["integer", "null"] + }, + "network_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "geo_country": { + "type": ["string", "null"], + "maxLength": 2 + }, + "geo_region": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_city": { + "type": ["string", "null"], + "maxLength": 75 + }, + "geo_zipcode": { + "type": ["string", "null"], + "maxLength": 15 + }, + "geo_latitude": { + "type": ["number", "null"] + }, + "geo_longitude": { + "type": ["number", "null"] + }, + "geo_region_name": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_isp": { + "type": ["string", "null"], + "maxLength": 100 + }, + "ip_organization": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_domain": { + "type": ["string", "null"], + "maxLength": 128 + }, + "ip_netspeed": { + "type": ["string", "null"], + "maxLength": 100 + }, + "page_url": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_title": { + "type": ["string", "null"], + "maxLength": 2000 + }, + "page_referrer": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "page_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "page_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "page_urlport": { + "type": ["integer", "null"] + }, + "page_urlpath": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "page_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "page_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_urlscheme": { + "type": ["string", "null"], + "maxLength": 16 + }, + "refr_urlhost": { + "type": ["string", "null"], + "maxLength": 255 + }, + "refr_urlport": { + "type": ["integer", "null"] + }, + "refr_urlpath": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlquery": { + "type": ["string", "null"], + "maxLength": 6000 + }, + "refr_urlfragment": { + "type": ["string", "null"], + "maxLength": 3000 + }, + "refr_medium": { + "type": ["string", "null"], + "maxLength": 25 + }, + "refr_source": { + "type": ["string", "null"], + "maxLength": 50 + }, + "refr_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_medium": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_source": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_term": { + "type": ["string", "null"], + "maxLength": 255 + }, + "mkt_content": { + "type": ["string", "null"], + "maxLength": 500 + }, + "mkt_campaign": { + "type": ["string", "null"], + "maxLength": 255 + }, + "se_category": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_action": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_label": { + "type": ["string", "null"], + "maxLength": 4096 + }, + "se_property": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "se_value": { + "type": ["number", "null"] + }, + "tr_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_affiliation": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_total": { + "type": ["number", "null"] + }, + "tr_tax": { + "type": ["number", "null"] + }, + "tr_shipping": { + "type": ["number", "null"] + }, + "tr_city": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_state": { + "type": ["string", "null"], + "maxLength": 255 + }, + "tr_country": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_orderid": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_sku": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_name": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_category": { + "type": ["string", "null"], + "maxLength": 255 + }, + "ti_price": { + "type": ["number", "null"] + }, + "ti_quantity": { + "type": ["integer", "null"] + }, + "pp_xoffset_min": { + "type": ["integer", "null"] + }, + "pp_xoffset_max": { + "type": ["integer", "null"] + }, + "pp_yoffset_min": { + "type": ["integer", "null"] + }, + "pp_yoffset_max": { + "type": ["integer", "null"] + }, + "useragent": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "br_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_version": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_renderengine": { + "type": ["string", "null"], + "maxLength": 50 + }, + "br_lang": { + "type": ["string", "null"], + "maxLength": 255 + }, + "br_features_pdf": { + "type": ["boolean", "null"] + }, + "br_features_flash": { + "type": ["boolean", "null"] + }, + "br_features_java": { + "type": ["boolean", "null"] + }, + "br_features_director": { + "type": ["boolean", "null"] + }, + "br_features_quicktime": { + "type": ["boolean", "null"] + }, + "br_features_realplayer": { + "type": ["boolean", "null"] + }, + "br_features_windowsmedia": { + "type": ["boolean", "null"] + }, + "br_features_gears": { + "type": ["boolean", "null"] + }, + "br_features_silverlight": { + "type": ["boolean", "null"] + }, + "br_cookies": { + "type": ["boolean", "null"] + }, + "br_colordepth": { + "type": ["string", "null"], + "maxLength": 12 + }, + "br_viewwidth": { + "type": ["integer", "null"] + }, + "br_viewheight": { + "type": ["integer", "null"] + }, + "os_name": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_family": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_manufacturer": { + "type": ["string", "null"], + "maxLength": 50 + }, + "os_timezone": { + "type": ["string", "null"], + "maxLength": 255 + }, + "dvce_type": { + "type": ["string", "null"], + "maxLength": 50 + }, + "dvce_ismobile": { + "type": ["boolean", "null"] + }, + "dvce_screenwidth": { + "type": ["integer", "null"] + }, + "dvce_screenheight": { + "type": ["integer", "null"] + }, + "doc_charset": { + "type": ["string", "null"], + "maxLength": 128 + }, + "doc_width": { + "type": ["integer", "null"] + }, + "doc_height": { + "type": ["integer", "null"] + }, + "tr_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "tr_total_base": { + "type": ["number", "null"] + }, + "tr_tax_base": { + "type": ["number", "null"] + }, + "tr_shipping_base": { + "type": ["number", "null"] + }, + "ti_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "ti_price_base": { + "type": ["number", "null"] + }, + "base_currency": { + "type": ["string", "null"], + "maxLength": 3 + }, + "geo_timezone": { + "type": ["string", "null"], + "maxLength": 64 + }, + "mkt_clickid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "mkt_network": { + "type": ["string", "null"], + "maxLength": 64 + }, + "etl_tags": { + "type": ["string", "null"], + "maxLength": 500 + }, + "dvce_sent_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "refr_domain_userid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "refr_dvce_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "domain_sessionid": { + "type": ["string", "null"], + "maxLength": 128 + }, + "derived_tstamp": { + "type": ["string", "null"], + "format": "date-time" + }, + "event_vendor": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_name": { + "type": ["string", "null"], + "maxLength": 1000 + }, + "event_format": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_version": { + "type": ["string", "null"], + "maxLength": 128 + }, + "event_fingerprint": { + "type": ["string", "null"], + "maxLength": 128 + }, + "true_tstamp": { + "type": ["string", "null"], + "format": "date-time" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala new file mode 100644 index 000000000..1c7229a63 --- /dev/null +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/AcceptInvalid.scala @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2022-2022 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.common + +/* For transition, until EnrichmentManager.validateEnriched never accepts invalid any more + * See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 + */ +object AcceptInvalid { + val acceptInvalid = false + val countInvalid = () +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala index 81c01aa9f..48032c6bf 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala @@ -64,7 +64,9 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - Some(collectorPayloadBatched).validNel + Some(collectorPayloadBatched).validNel, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) output must be like { case a :: b :: c :: d :: Nil => @@ -84,7 +86,9 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - Some(collectorPayload).validNel + Some(collectorPayload).validNel, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) ) must beValid.like { case Validated.Valid(_: EnrichedEvent) :: Nil => ok @@ -100,7 +104,9 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - invalidCollectorPayload + invalidCollectorPayload, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) must be like { case Validated.Invalid(_: BadRow.CPFormatViolation) :: Nil => ok case other => ko(s"One invalid CPFormatViolation expected, got ${other}") @@ -115,7 +121,9 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { client, processor, dateTime, - collectorPayload.validNel[BadRow] + collectorPayload.validNel[BadRow], + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) must beEqualTo(Nil) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index 3cc48cf9f..d9fc78111 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -20,6 +20,7 @@ import cats.data.NonEmptyList import io.circe.literal._ import org.joda.time.DateTime import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailureMessage import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} import loaders._ import adapters.RawEvent @@ -36,7 +37,6 @@ import enrichments.registry.{IabEnrichment, JavascriptScriptEnrichment, YauaaEnr import org.apache.commons.codec.digest.DigestUtils import org.specs2.mutable.Specification import org.specs2.matcher.EitherMatchers - import SpecHelpers._ class EnrichmentManagerSpec extends Specification with EitherMatchers { @@ -64,12 +64,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { """ ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beLeft.like { @@ -97,12 +99,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beLeft.like { case _: BadRow.SchemaViolations => ok @@ -139,12 +143,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "p" -> "web" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beLeft.like { case BadRow.EnrichmentFailures( @@ -202,12 +208,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "p" -> "web" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beLeft.like { case BadRow.EnrichmentFailures( @@ -261,12 +269,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { }""" ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beRight } @@ -321,12 +331,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beRight } @@ -381,12 +393,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beRight } @@ -441,12 +455,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value must beLeft } @@ -501,14 +517,15 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - def enriched = - EnrichmentManager.enrichEvent( - enrichmentReg, - client, - processor, - timestamp, - rawEvent - ) + val enriched = EnrichmentManager.enrichEvent[Id]( + enrichmentReg, + client, + processor, + timestamp, + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid + ) enriched.value must beLeft } @@ -568,14 +585,15 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ) ).some ) - def enriched = - EnrichmentManager.enrichEvent( - enrichmentReg, - client, - processor, - timestamp, - rawEvent - ) + val enriched = EnrichmentManager.enrichEvent[Id]( + enrichmentReg, + client, + processor, + timestamp, + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid + ) enriched.value must beLeft } @@ -589,12 +607,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value.map(_.useragent) must beRight(qs_ua) enriched.value.map(_.derived_contexts) must beRight((_: String).contains("\"agentName\":\"Firefox\"")) @@ -608,12 +628,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) - val enriched = EnrichmentManager.enrichEvent( + val enriched = EnrichmentManager.enrichEvent[Id]( enrichmentReg, client, processor, timestamp, - rawEvent + rawEvent, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) enriched.value.map(_.useragent) must beRight("header-useragent") } @@ -686,6 +708,50 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getCollectorVersionSet(input) must beRight(()) } } + + "validateEnriched" should { + "create a bad row if a field is oversized" >> { + EnrichmentManager + .enrichEvent[Id]( + enrichmentReg, + client, + processor, + timestamp, + RawEvent(api, fatBody, None, source, context), + false, + AcceptInvalid.countInvalid + ) + .swap + .map { + case BadRow.EnrichmentFailures(_, failure, _) => + failure.messages.map(_.message match { + case EnrichmentFailureMessage.Simple(error) => error + case EnrichmentFailureMessage.IgluError(schemaKey, _) => schemaKey + case _ => None + }) + case _ => None + } + .getOrElse(None) === NonEmptyList( + s"Enriched event not valid against ${EnrichmentManager.atomicSchema.toSchemaUri}", + List(EnrichmentManager.atomicSchema) + ) + } + + "not create a bad row if a field is oversized and acceptInvalid is set to true" >> { + EnrichmentManager + .enrichEvent[Id]( + enrichmentReg, + client, + processor, + timestamp, + RawEvent(api, fatBody, None, source, context), + true, + AcceptInvalid.countInvalid + ) + .map(_ => true) + .getOrElse(false) must beTrue + } + } } object EnrichmentManagerSpec { @@ -706,6 +772,18 @@ object EnrichmentManagerSpec { None ) + val leanBody = Map( + "e" -> "pp", + "tv" -> "js-0.13.1", + "p" -> "web" + ).toOpt + + val fatBody = Map( + "e" -> "pp", + "tv" -> s"${"s" * 500}", + "p" -> "web" + ).toOpt + val iabEnrichment = IabEnrichment .parse( json"""{ @@ -739,4 +817,5 @@ object EnrichmentManagerSpec { .getOrElse(throw new RuntimeException("IAB enrichment couldn't be initialised")) // to make sure it's not none .enrichment[Id] .some + } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala index cbc2b6787..44a668540 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala @@ -28,22 +28,284 @@ import org.joda.money.CurrencyUnit import org.joda.time.DateTime import org.specs2.Specification -import org.specs2.matcher.DataTables -object CurrencyConversionEnrichmentSpec { - val OerApiKey = "OER_KEY" -} +class CurrencyConversionEnrichmentSpec extends Specification { + import CurrencyConversionEnrichmentSpec._ -/** Tests the convertCurrencies function */ -import CurrencyConversionEnrichmentSpec._ -class CurrencyConversionEnrichmentSpec extends Specification with DataTables { def is = skipAllIf(sys.env.get(OerApiKey).isEmpty) ^ s2""" - Failure test for Currency Conversion $e1 - Success test for Currency Conversion $e2 + Failure for invalid transaction currency $e1 + Failure for invalid transaction item currency $e2 + Failure for invalid OER API key $e3 + Success for all fields absent $e4 + Success for all fields absent except currency $e5 + Success for no transaction currency, tax, or shipping $e6 + Success for no transaction currency or total $e7 + Success for no transaction currency $e8 + Success for transaction item null $e9 + Success for valid app id and API key $e10 + Success for both currencies null $e11 + Success for converting to the same currency $e12 + Success for valid app id and API key $e13 """ + def e1 = { + val input = + Input( + Some("RUP"), + Some(11.00), + Some(1.17), + Some(0.00), + None, + Some(17.99), + Some(coTstamp) + ) + val expected: Result = Validated.Invalid( + NonEmptyList.of( + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") + ), + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") + ), + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") + ) + ) + ) + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e2 = { + val input = + Input( + None, + Some(12.00), + Some(0.7), + Some(0.00), + Some("HUL"), + Some(1.99), + Some(coTstamp) + ) + val expected: Result = ef( + FailureDetails.EnrichmentFailureMessage.InputData( + "ti_currency", + Some("HUL"), + "Unknown currency 'HUL'" + ) + ).invalidNel + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e3 = { + val input = + Input( + None, + Some(13.00), + Some(3.67), + Some(0.00), + Some("GBP"), + Some(2.99), + Some(coTstamp) + ) + val wrongKey = "8A8A8A8A8A8A8A8A8A8A8A8AA8A8A8A8" + val expected: Result = ef( + FailureDetails.EnrichmentFailureMessage.Simple( + "Open Exchange Rates error, type: [OtherErrors], message: [invalid_app_id]" + ) + ).invalidNel + val actual = runEnrichment(input, wrongKey) + actual must beEqualTo(expected) + } + + def e4 = { + val input = + Input( + None, + None, + None, + None, + None, + None, + None + ) + val expected: Result = + ef( + FailureDetails.EnrichmentFailureMessage.InputData( + "collector_tstamp", + None, + "missing" + ) + ).invalidNel + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e5 = { + val input = + Input( + Some("GBP"), + None, + None, + None, + Some("GBP"), + None, + None + ) + val expected: Result = + ef( + FailureDetails.EnrichmentFailureMessage + .InputData("collector_tstamp", None, "missing") + ).invalidNel + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e6 = { + val input = + Input( + Some("GBP"), + Some(11.00), + None, + None, + None, + None, + Some(coTstamp) + ) + val expected: Result = (Some(12.75), None, None, None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e7 = { + val input = + Input( + Some("GBP"), + None, + Some(2.67), + Some(0.00), + None, + None, + Some(coTstamp) + ) + val expected: Result = (None, Some(3.09), Some(0.00), None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e8 = { + val input = + Input( + None, + None, + None, + None, + Some("GBP"), + Some(12.99), + Some(coTstamp) + ) + val expected: Result = (None, None, None, Some(15.05)).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e9 = { + val input = + Input( + Some("GBP"), + Some(11.00), + Some(2.67), + Some(0.00), + None, + None, + Some(coTstamp) + ) + val expected: Result = (Some(12.75), Some(3.09), Some(0.00), None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e10 = { + val input = + Input( + None, + Some(14.00), + Some(4.67), + Some(0.00), + Some("GBP"), + Some(10.99), + Some(coTstamp) + ) + val expected: Result = + (None, None, None, Some(12.74)).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e11 = { + val input = + Input( + None, + Some(11.00), + Some(2.67), + Some(0.00), + None, + Some(12.99), + Some(coTstamp) + ) + val expected: Result = (None, None, None, None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e12 = { + val input = + Input( + Some("EUR"), + Some(11.00), + Some(2.67), + Some(0.00), + Some("EUR"), + Some(12.99), + Some(coTstamp) + ) + val expected: Result = + ( + Some(11.00), + Some(2.67), + Some(0.00), + Some(12.99) + ).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } + + def e13 = { + val input = + Input( + Some("GBP"), + Some(16.00), + Some(2.67), + Some(0.00), + None, + Some(10.00), + Some(coTstamp) + ) + val expected: Result = (Some(18.54), Some(3.09), Some(0.00), None).valid + val actual = runEnrichment(input) + actual must beEqualTo(expected) + } +} + +object CurrencyConversionEnrichmentSpec { + val OerApiKey = "OER_KEY" + lazy val validAppKey = sys.env .getOrElse(OerApiKey, throw new IllegalStateException( @@ -52,7 +314,7 @@ class CurrencyConversionEnrichmentSpec extends Specification with DataTables { ) type Result = ValidatedNel[ FailureDetails.EnrichmentFailure, - (Option[String], Option[String], Option[String], Option[String]) + (Option[Double], Option[Double], Option[Double], Option[Double]) ] val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) val ef: FailureDetails.EnrichmentFailureMessage => FailureDetails.EnrichmentFailure = m => @@ -60,141 +322,33 @@ class CurrencyConversionEnrichmentSpec extends Specification with DataTables { FailureDetails.EnrichmentInformation(schemaKey, "currency-conversion").some, m ) - val currencyInvalidRup: Result = Validated.Invalid( - NonEmptyList.of( - ef( - FailureDetails.EnrichmentFailureMessage - .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") - ), - ef( - FailureDetails.EnrichmentFailureMessage - .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") - ), - ef( - FailureDetails.EnrichmentFailureMessage - .InputData("tr_currency", Some("RUP"), "Unknown currency 'RUP'") - ) - ) - ) - val currencyInvalidHul: Result = ef( - FailureDetails.EnrichmentFailureMessage.InputData( - "ti_currency", - Some("HUL"), - "Unknown currency 'HUL'" - ) - ).invalidNel - val invalidAppKeyFailure: Result = ef( - FailureDetails.EnrichmentFailureMessage.Simple( - "Open Exchange Rates error, type: [OtherErrors], message: [invalid_app_id]" - ) - ).invalidNel val coTstamp: DateTime = new DateTime(2011, 3, 13, 0, 0) - def e1 = - "SPEC NAME" || "TRANSACTION CURRENCY" | "API KEY" | "TOTAL AMOUNT" | "TOTAL TAX" | "SHIPPING" | "TRANSACTION ITEM CURRENCY" | "TRANSACTION ITEM PRICE" | "DATETIME" | "CONVERTED TUPLE" | - "Invalid transaction currency" !! Some("RUP") ! validAppKey ! Some(11.00) ! Some(1.17) ! Some( - 0.00 - ) ! None ! Some(17.99) ! Some(coTstamp) ! currencyInvalidRup | - "Invalid transaction item currency" !! None ! validAppKey ! Some(12.00) ! Some(0.7) ! Some( - 0.00 - ) ! Some("HUL") ! Some(1.99) ! Some(coTstamp) ! currencyInvalidHul | - "Invalid OER API key" !! None ! "8A8A8A8A8A8A8A8A8A8A8A8AA8A8A8A8" ! Some(13.00) ! Some(3.67) ! Some( - 0.00 - ) ! Some("GBP") ! Some(2.99) ! Some(coTstamp) ! invalidAppKeyFailure |> { - ( - _, - trCurrency, - apiKey, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime, - expected - ) => - (for { - e <- CurrencyConversionConf(schemaKey, DeveloperAccount, apiKey, CurrencyUnit.EUR) - .enrichment[Id] - res <- e.convertCurrencies( - trCurrency, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime - ) - } yield res) must_== expected - } - - def e2 = - "SPEC NAME" || "TRANSACTION CURRENCY" | "API KEY" | "TOTAL AMOUNT" | "TOTAL TAX" | "SHIPPING" | "TRANSACTION ITEM CURRENCY" | "TRANSACTION ITEM PRICE" | "DATETIME" | "CONVERTED TUPLE" | - "All fields absent" !! None ! validAppKey ! None ! None ! None ! None ! None ! None ! ef( - FailureDetails.EnrichmentFailureMessage.InputData( - "collector_tstamp", - None, - "missing" - ) - ).invalidNel | - "All fields absent except currency" !! Some("GBP") ! validAppKey ! None ! None ! None ! Some( - "GBP" - ) ! None ! None ! ef( - FailureDetails.EnrichmentFailureMessage - .InputData("collector_tstamp", None, "missing") - ).invalidNel | - "No transaction currency, tax, or shipping" !! Some("GBP") ! validAppKey ! Some(11.00) ! None ! None ! None ! None ! Some( - coTstamp - ) ! (Some("12.75"), None, None, None).valid | - "No transaction currency or total" !! Some("GBP") ! validAppKey ! None ! Some(2.67) ! Some( - 0.00 - ) ! None ! None ! Some(coTstamp) ! (None, Some("3.09"), Some("0.00"), None).valid | - "No transaction currency" !! None ! validAppKey ! None ! None ! None ! Some("GBP") ! Some( - 12.99 - ) ! Some(coTstamp) ! (None, None, None, Some("15.05")).valid | - "Transaction Item Null" !! Some("GBP") ! validAppKey ! Some(11.00) ! Some(2.67) ! Some(0.00) ! None ! None ! Some( - coTstamp - ) ! (Some("12.75"), Some("3.09"), Some("0.00"), None).valid | - "Valid APP ID and API key" !! None ! validAppKey ! Some(14.00) ! Some(4.67) ! Some(0.00) ! Some( - "GBP" - ) ! Some(10.99) ! Some(coTstamp) ! (None, None, None, Some("12.74")).valid | - "Both Currency Null" !! None ! validAppKey ! Some(11.00) ! Some(2.67) ! Some(0.00) ! None ! Some( - 12.99 - ) ! Some(coTstamp) ! (None, None, None, None).valid | - "Convert to the same currency" !! Some("EUR") ! validAppKey ! Some(11.00) ! Some(2.67) ! Some( - 0.00 - ) ! Some("EUR") ! Some(12.99) ! Some(coTstamp) ! ( - Some("11.00"), - Some("2.67"), - Some("0.00"), - Some("12.99") - ).valid | - "Valid APP ID and API key" !! Some("GBP") ! validAppKey ! Some(16.00) ! Some(2.67) ! Some( - 0.00 - ) ! None ! Some(10.00) ! Some(coTstamp) ! (Some("18.54"), Some("3.09"), Some("0.00"), None).valid |> { - ( - _, - trCurrency, - apiKey, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime, - expected - ) => - (for { - e <- CurrencyConversionConf(schemaKey, DeveloperAccount, apiKey, CurrencyUnit.EUR).enrichment[Id] - res <- e.convertCurrencies( - trCurrency, - trAmountTotal, - trAmountTax, - trAmountShipping, - tiCurrency, - tiPrice, - dateTime - ) - } yield res) must_== expected - } + case class Input( + trCurrency: Option[String], + trTotal: Option[Double], + trTax: Option[Double], + trShipping: Option[Double], + tiCurrency: Option[String], + tiPrice: Option[Double], + collectorTstamp: Option[DateTime] + ) + + def runEnrichment( + input: Input, + apiKey: String = validAppKey + ) = + for { + e <- CurrencyConversionConf(schemaKey, DeveloperAccount, apiKey, CurrencyUnit.EUR) + .enrichment[Id] + res <- e.convertCurrencies( + input.trCurrency, + input.trTotal, + input.trTax, + input.trShipping, + input.tiCurrency, + input.tiPrice, + input.collectorTstamp + ) + } yield res } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 4af1049d7..85cce54df 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -25,6 +25,9 @@ import org.joda.time.DateTime import org.apache.commons.codec.digest.DigestUtils +import org.specs2.Specification +import org.specs2.matcher.ValidatedMatchers + import com.snowplowanalytics.iglu.core._ import com.snowplowanalytics.iglu.client.Client @@ -43,9 +46,7 @@ import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.utils.BlockerF import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ - -import org.specs2.Specification -import org.specs2.matcher.ValidatedMatchers +import com.snowplowanalytics.snowplow.enrich.common.AcceptInvalid class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatchers { def is = s2""" @@ -174,7 +175,9 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher client, Processor("spark", "0.0.0"), new DateTime(1500000000L), - input + input, + AcceptInvalid.acceptInvalid, + AcceptInvalid.countInvalid ) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala index e661baa91..004321499 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala @@ -47,7 +47,7 @@ class EnrichedEventSpec extends Specification { testField(_.collector_tstamp = "etl_tstamp", _.collector_tstamp) testField(_.event = "event", _.event) testField(_.event_id = "event_id", _.event_id) - testField(_.txn_id = "txn_id", _.txn_id) + testField(_.txn_id = JInteger.valueOf(0), _.txn_id) testField(_.name_tracker = "name_tracker", _.name_tracker) testField(_.v_tracker = "v_tracker", _.v_tracker) testField(_.v_collector = "v_collector", _.v_collector) @@ -97,13 +97,13 @@ class EnrichedEventSpec extends Specification { testField(_.se_action = "se_action", _.se_action) testField(_.se_label = "se_label", _.se_label) testField(_.se_property = "se_property", _.se_property) - testField(_.se_value = "se_value", _.se_value) + testField(_.se_value = JFloat.valueOf("0.0"), _.se_value) testField(_.unstruct_event = "unstruct_event", _.unstruct_event) testField(_.tr_orderid = "tr_orderid", _.tr_orderid) testField(_.tr_affiliation = "tr_affiliation", _.tr_affiliation) - testField(_.tr_total = "tr_total", _.tr_total) - testField(_.tr_tax = "tr_tax", _.tr_tax) - testField(_.tr_shipping = "tr_shipping", _.tr_shipping) + testField(_.tr_total = JFloat.valueOf("0.0"), _.tr_total) + testField(_.tr_tax = JFloat.valueOf("0.0"), _.tr_tax) + testField(_.tr_shipping = JFloat.valueOf("0.0"), _.tr_shipping) testField(_.tr_city = "tr_city", _.tr_city) testField(_.tr_state = "tr_state", _.tr_state) testField(_.tr_country = "tr_country", _.tr_country) @@ -111,7 +111,7 @@ class EnrichedEventSpec extends Specification { testField(_.ti_sku = "ti_sku", _.ti_sku) testField(_.ti_name = "ti_name", _.ti_name) testField(_.ti_category = "ti_category", _.ti_category) - testField(_.ti_price = "ti_price", _.ti_price) + testField(_.ti_price = JFloat.valueOf("0.0"), _.ti_price) testField(_.ti_quantity = JInteger.valueOf(0), _.ti_quantity) testField(_.pp_xoffset_min = JInteger.valueOf(0), _.pp_xoffset_min) testField(_.pp_xoffset_max = JInteger.valueOf(0), _.pp_xoffset_max) @@ -149,11 +149,11 @@ class EnrichedEventSpec extends Specification { testField(_.doc_width = JInteger.valueOf(0), _.doc_width) testField(_.doc_height = JInteger.valueOf(0), _.doc_height) testField(_.tr_currency = "tr_currency", _.tr_currency) - testField(_.tr_total_base = "tr_total_base", _.tr_total_base) - testField(_.tr_tax_base = "tr_tax_base", _.tr_tax_base) - testField(_.tr_shipping_base = "tr_shipping_base", _.tr_shipping_base) + testField(_.tr_total_base = JFloat.valueOf("0.0"), _.tr_total_base) + testField(_.tr_tax_base = JFloat.valueOf("0.0"), _.tr_tax_base) + testField(_.tr_shipping_base = JFloat.valueOf("0.0"), _.tr_shipping_base) testField(_.ti_currency = "ti_currency", _.ti_currency) - testField(_.ti_price_base = "ti_price_base", _.ti_price_base) + testField(_.ti_price_base = JFloat.valueOf("0.0"), _.ti_price_base) testField(_.base_currency = "base_currency", _.base_currency) testField(_.geo_timezone = "geo_timezone", _.geo_timezone) testField(_.mkt_clickid = "mkt_clickid", _.mkt_clickid) diff --git a/modules/kinesis/src/main/resources/application.conf b/modules/kinesis/src/main/resources/application.conf index 3d8228cb2..41ae4a2e5 100644 --- a/modules/kinesis/src/main/resources/application.conf +++ b/modules/kinesis/src/main/resources/application.conf @@ -74,4 +74,8 @@ "collectorPort": 443 "secure": true } + + "featureFlags" : { + "acceptInvalid": false + } } diff --git a/modules/pubsub/src/main/resources/application.conf b/modules/pubsub/src/main/resources/application.conf index 05df53957..655282874 100644 --- a/modules/pubsub/src/main/resources/application.conf +++ b/modules/pubsub/src/main/resources/application.conf @@ -43,4 +43,8 @@ "collectorPort": 443 "secure": true } + + "featureFlags" : { + "acceptInvalid": false + } } diff --git a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/Source.scala b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/Source.scala index fe700d40c..acbc6d4c5 100644 --- a/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/Source.scala +++ b/modules/stream/common/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/Source.scala @@ -159,13 +159,15 @@ abstract class Source( val canonicalInput: ValidatedNel[BadRow, Option[CollectorPayload]] = ThriftLoader.toCollectorPayload(binaryData, processor) Either.catchNonFatal( - EtlPipeline.processEvents( + EtlPipeline.processEvents[Id]( adapterRegistry, enrichmentRegistry, client, processor, new DateTime(System.currentTimeMillis), - canonicalInput + canonicalInput, + true, // See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 + () // See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 ) ) match { case Left(throwable) => diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala index eca8f4860..875ee00a8 100644 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala +++ b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionItemSpec.scala @@ -98,7 +98,7 @@ object TransactionItemSpec { "1002", "Red shoes", "", - "4000", + "4000.0", "1", "", "", diff --git a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala index 8fdcde70e..e0f87afc1 100644 --- a/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala +++ b/modules/stream/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/good/TransactionSpec.scala @@ -88,7 +88,7 @@ object TransactionSpec { "", "order-123", "", - "8000", + "8000.0", "", "", "",