diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala index a70d887..c6424bd 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala @@ -242,12 +242,202 @@ case class Event( object Event { + val FIELD_SIZES: Map[String, Int] = Map( + "app_id" -> 255, + "platform" -> 255, + "event" -> 128, + "event_id" -> 36, + "name_tracker" -> 128, + "v_tracker" -> 100, + "v_collector" -> 100, + "v_etl" -> 100, + "user_id" -> 255, + "user_ipaddress" -> 128, + "user_fingerprint" -> 128, + "domain_userid" -> 128, + "network_userid" -> 128, + "geo_country" -> 2, + "geo_region" -> 3, + "geo_city" -> 75, + "geo_zipcode" -> 15, + "geo_region_name" -> 100, + "ip_isp" -> 100, + "ip_organization" -> 128, + "ip_domain" -> 128, + "ip_netspeed" -> 100, + "page_url" -> 4096, + "page_title" -> 2000, + "page_referrer" -> 4096, + "page_urlscheme" -> 16, + "page_urlhost" -> 255, + "page_urlpath" -> 3000, + "page_urlquery" -> 6000, + "page_urlfragment" -> 3000, + "refr_urlscheme" -> 16, + "refr_urlhost" -> 255, + "refr_urlpath" -> 6000, + "refr_urlquery" -> 6000, + "refr_urlfragment" -> 3000, + "refr_medium" -> 25, + "refr_source" -> 50, + "refr_term" -> 255, + "mkt_medium" -> 255, + "mkt_source" -> 255, + "mkt_term" -> 255, + "mkt_content" -> 500, + "mkt_campaign" -> 255, + "se_category" -> 1000, + "se_action" -> 1000, + "se_label" -> 4096, + "se_property" -> 1000, + "tr_orderid" -> 255, + "tr_affiliation" -> 255, + "tr_city" -> 255, + "tr_state" -> 255, + "tr_country" -> 255, + "ti_orderid" -> 255, + "ti_sku" -> 255, + "ti_name" -> 255, + "ti_category" -> 255, + "useragent" -> 1000, + "br_name" -> 50, + "br_family" -> 50, + "br_version" -> 50, + "br_type" -> 50, + "br_renderengine" -> 50, + "br_lang" -> 255, + "br_colordepth" -> 12, + "os_name" -> 50, + "os_family" -> 50, + "os_manufacturer" -> 50, + "os_timezone" -> 255, + "dvce_type" -> 50, + "doc_charset" -> 128, + "tr_currency" -> 3, + "ti_currency" -> 3, + "base_currency" -> 3, + "geo_timezone" -> 64, + "mkt_clickid" -> 128, + "mkt_network" -> 64, + "etl_tags" -> 500, + "refr_domain_userid" -> 128, + "domain_sessionid" -> 128, + "event_vendor" -> 1000, + "event_name" -> 1000, + "event_format" -> 128, + "event_version" -> 128, + "event_fingerprint" -> 128 + ) + + private def validateStr( + k: String, + value: String + ): List[String] = + if (value.length > FIELD_SIZES.getOrElse(k, Int.MaxValue)) + List(s"Field $k value $value longer than maximum allowed size ${FIELD_SIZES.getOrElse(k, Int.MaxValue)}") + else + List.empty[String] + + private def validateStr( + k: String, + v: Option[String] + ): List[String] = + v match { + case Some(value) => validateStr(k, value) + case None => List.empty[String] + } + + private def validator(e: Event): List[String] = + validateStr("app_id", e.app_id) ++ + validateStr("platform", e.platform) ++ + validateStr("event", e.event) ++ + validateStr("name_tracker", e.name_tracker) ++ + validateStr("v_tracker", e.v_tracker) ++ + validateStr("v_collector", e.v_collector) ++ + validateStr("v_etl", e.v_etl) ++ + validateStr("user_id", e.user_id) ++ + validateStr("user_ipaddress", e.user_ipaddress) ++ + validateStr("user_fingerprint", e.user_fingerprint) ++ + validateStr("domain_userid", e.domain_userid) ++ + validateStr("network_userid", e.network_userid) ++ + validateStr("geo_country", e.geo_country) ++ + validateStr("geo_region", e.geo_region) ++ + validateStr("geo_city", e.geo_city) ++ + validateStr("geo_zipcode", e.geo_zipcode) ++ + validateStr("geo_region_name", e.geo_region_name) ++ + validateStr("ip_isp", e.ip_isp) ++ + validateStr("ip_organization", e.ip_organization) ++ + validateStr("ip_domain", e.ip_domain) ++ + validateStr("ip_netspeed", e.ip_netspeed) ++ + validateStr("page_url", e.page_url) ++ + validateStr("page_title", e.page_title) ++ + validateStr("page_referrer", e.page_referrer) ++ + validateStr("page_urlscheme", e.page_urlscheme) ++ + validateStr("page_urlhost", e.page_urlhost) ++ + validateStr("page_urlpath", e.page_urlpath) ++ + validateStr("page_urlquery", e.page_urlquery) ++ + validateStr("page_urlfragment", e.page_urlfragment) ++ + validateStr("refr_urlscheme", e.refr_urlscheme) ++ + validateStr("refr_urlhost", e.refr_urlhost) ++ + validateStr("refr_urlpath", e.refr_urlpath) ++ + validateStr("refr_urlquery", e.refr_urlquery) ++ + validateStr("refr_urlfragment", e.refr_urlfragment) ++ + validateStr("refr_medium", e.refr_medium) ++ + validateStr("refr_source", e.refr_source) ++ + validateStr("refr_term", e.refr_term) ++ + validateStr("mkt_medium", e.mkt_medium) ++ + validateStr("mkt_source", e.mkt_source) ++ + validateStr("mkt_term", e.mkt_term) ++ + validateStr("mkt_content", e.mkt_content) ++ + validateStr("mkt_campaign", e.mkt_campaign) ++ + validateStr("se_category", e.se_category) ++ + validateStr("se_action", e.se_action) ++ + validateStr("se_label", e.se_label) ++ + validateStr("se_property", e.se_property) ++ + validateStr("tr_orderid", e.tr_orderid) ++ + validateStr("tr_affiliation", e.tr_affiliation) ++ + validateStr("tr_city", e.tr_city) ++ + validateStr("tr_state", e.tr_state) ++ + validateStr("tr_country", e.tr_country) ++ + validateStr("ti_orderid", e.ti_orderid) ++ + validateStr("ti_sku", e.ti_sku) ++ + validateStr("ti_name", e.ti_name) ++ + validateStr("ti_category", e.ti_category) ++ + validateStr("useragent", e.useragent) ++ + validateStr("br_name", e.br_name) ++ + validateStr("br_family", e.br_family) ++ + validateStr("br_version", e.br_version) ++ + validateStr("br_type", e.br_type) ++ + validateStr("br_renderengine", e.br_renderengine) ++ + validateStr("br_lang", e.br_lang) ++ + validateStr("br_colordepth", e.br_colordepth) ++ + validateStr("os_name", e.os_name) ++ + validateStr("os_family", e.os_family) ++ + validateStr("os_manufacturer", e.os_manufacturer) ++ + validateStr("os_timezone", e.os_timezone) ++ + validateStr("dvce_type", e.dvce_type) ++ + validateStr("doc_charset", e.doc_charset) ++ + validateStr("tr_currency", e.tr_currency) ++ + validateStr("ti_currency", e.ti_currency) ++ + validateStr("base_currency", e.base_currency) ++ + validateStr("geo_timezone", e.geo_timezone) ++ + validateStr("mkt_clickid", e.mkt_clickid) ++ + validateStr("mkt_network", e.mkt_network) ++ + validateStr("etl_tags", e.etl_tags) ++ + validateStr("refr_domain_userid", e.refr_domain_userid) ++ + validateStr("domain_sessionid", e.domain_sessionid) ++ + validateStr("event_vendor", e.event_vendor) ++ + validateStr("event_name", e.event_name) ++ + validateStr("event_format", e.event_format) ++ + validateStr("event_version", e.event_version) ++ + validateStr("event_fingerprint", e.event_fingerprint) + /** * Automatically derived Circe encoder */ implicit val jsonEncoder: Encoder.AsObject[Event] = deriveEncoder[Event] - implicit def eventDecoder: Decoder[Event] = deriveDecoder[Event] + implicit def eventDecoder: Decoder[Event] = deriveDecoder[Event].ensure(validator) /** * Derived TSV parser for the Event class diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingError.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingError.scala index 5db1363..4facccc 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingError.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingError.scala @@ -78,7 +78,7 @@ object ParsingError { case InvalidValue(key, value, message) => Json.obj( "type" := "InvalidValue", - "key" := key, + "key" := key.name, "value" := value, "message" := message ) diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala index c200941..dd5113d 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala @@ -52,13 +52,20 @@ private[decode] object ValueDecoder { implicit final val stringColumnDecoder: ValueDecoder[String] = fromFunc[String] { case (key, value) => - if (value.isEmpty) InvalidValue(key, value, s"Field $key cannot be empty").asLeft else value.asRight + if (value.length > Event.FIELD_SIZES.getOrElse(key.name, Int.MaxValue)) + InvalidValue(key, value, s"Field ${key.name} value $value longer than maximum allowed size ${Event.FIELD_SIZES.getOrElse(key.name, Int.MaxValue)}").asLeft + else if (value.isEmpty) InvalidValue(key, value, s"Field ${key.name} cannot be empty").asLeft + else value.asRight } implicit final val stringOptionColumnDecoder: ValueDecoder[Option[String]] = fromFunc[Option[String]] { - case (_, value) => - if (value.isEmpty) none[String].asRight else value.some.asRight + case (k, value) => + if (value.length > Event.FIELD_SIZES.getOrElse(k.name, Int.MaxValue)) + InvalidValue(k, value, s"Field ${k.name} value $value longer than maximum allowed size ${Event.FIELD_SIZES.getOrElse(k.name, Int.MaxValue)}").asLeft + else if (value.isEmpty) none[String].asRight + else value.some.asRight + } implicit final val intColumnDecoder: ValueDecoder[Option[Int]] = diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala index 9600153..bc0924a 100644 --- a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala @@ -87,7 +87,7 @@ object EventGen { val event: Gen[Event] = for { - app_id <- Gen.option(strGen(512, Gen.alphaNumChar)) + app_id <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("app_id", Int.MaxValue), Gen.alphaNumChar)) platform <- Gen.option(platform) etl_tstamp <- Gen.option(instantGen) collector_tstamp <- instantGen @@ -95,85 +95,85 @@ object EventGen { event <- Gen.option(eventType) event_id <- Gen.uuid txn_id <- Gen.option(Gen.chooseNum(1, 10000)) - name_tracker <- Gen.option(strGen(256, Gen.alphaNumChar)) - v_tracker <- Gen.option(strGen(256, Gen.alphaNumChar)) - v_collector <- strGen(512, Gen.alphaNumChar) - v_etl <- strGen(512, Gen.alphaNumChar) + name_tracker <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("name_tracker", Int.MaxValue), Gen.alphaNumChar)) + v_tracker <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("v_tracker", Int.MaxValue), Gen.alphaNumChar)) + v_collector <- strGen(Event.FIELD_SIZES.getOrElse("v_collector", Int.MaxValue), Gen.alphaNumChar) + v_etl <- strGen(Event.FIELD_SIZES.getOrElse("v_etl", Int.MaxValue), Gen.alphaNumChar) user_id <- Gen.option(Gen.uuid).map(_.map(_.toString())) user_ipaddress <- Gen.option(ipAddress) - user_fingerprint <- Gen.option(strGen(512, Gen.alphaNumChar)) + user_fingerprint <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("user_fingerprint", Int.MaxValue), Gen.alphaNumChar)) domain_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) domain_sessionidx <- Gen.option(Gen.chooseNum(1, 10000)) network_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) - geo_country <- Gen.option(strGen(3, Gen.alphaUpperChar)) - geo_region <- Gen.option(strGen(100, Gen.alphaNumChar)) - geo_city <- Gen.option(strGen(512, Gen.alphaChar)) - geo_zipcode <- Gen.option(strGen(6, Gen.alphaNumChar)) + geo_country <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("geo_country", Int.MaxValue), Gen.alphaUpperChar)) + geo_region <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("geo_region", Int.MaxValue), Gen.alphaNumChar)) + geo_city <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("geo_city", Int.MaxValue), Gen.alphaChar)) + geo_zipcode <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("geo_zipcode", Int.MaxValue), Gen.alphaNumChar)) geo_latitude <- Gen.option(Arbitrary.arbitrary[Double]) geo_longitude <- Gen.option(Arbitrary.arbitrary[Double]) - geo_region_name <- Gen.option(strGen(512, Gen.alphaChar)) - ip_isp <- Gen.option(strGen(512, Gen.alphaNumChar)) - ip_organization <- Gen.option(strGen(512, Gen.alphaNumChar)) - ip_domain <- Gen.option(strGen(512, Gen.alphaNumChar)) - ip_netspeed <- Gen.option(strGen(50, Gen.alphaNumChar)) - page_url <- Gen.option(strGen(512, Gen.alphaNumChar)) - page_title <- Gen.option(strGen(512, Gen.alphaNumChar)) - page_referrer <- Gen.option(strGen(512, Gen.alphaNumChar)) + geo_region_name <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("geo_region_name", Int.MaxValue), Gen.alphaChar)) + ip_isp <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ip_isp", Int.MaxValue), Gen.alphaNumChar)) + ip_organization <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ip_organization", Int.MaxValue), Gen.alphaNumChar)) + ip_domain <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ip_domain", Int.MaxValue), Gen.alphaNumChar)) + ip_netspeed <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ip_netspeed", Int.MaxValue), Gen.alphaNumChar)) + page_url <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("page_url", Int.MaxValue), Gen.alphaNumChar)) + page_title <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("page_title", Int.MaxValue), Gen.alphaNumChar)) + page_referrer <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("page_referrer", Int.MaxValue), Gen.alphaNumChar)) page_urlscheme <- Gen.option(Gen.oneOf("http", "https")) - page_urlhost <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_urlhost <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("page_urlhost", Int.MaxValue), Gen.alphaNumChar)) page_urlport <- Gen.option(Gen.chooseNum(1, 65000)) - page_urlpath <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_urlpath <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("page_urlpath", Int.MaxValue), Gen.alphaNumChar)) page_urlquery <- Gen.option(queryString) - page_urlfragment <- Gen.option(strGen(512, Gen.alphaNumChar)) - refr_urlscheme <- Gen.option(strGen(10, Gen.alphaNumChar)) - refr_urlhost <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_urlfragment <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("page_urlfragment", Int.MaxValue), Gen.alphaNumChar)) + refr_urlscheme <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_urlscheme", Int.MaxValue), Gen.alphaNumChar)) + refr_urlhost <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_urlhost", Int.MaxValue), Gen.alphaNumChar)) refr_urlport <- Gen.option(Gen.chooseNum(1, 65000)) - refr_urlpath <- Gen.option(strGen(512, Gen.alphaNumChar)) - refr_urlquery <- Gen.option(strGen(512, Gen.alphaNumChar)) - refr_urlfragment <- Gen.option(strGen(512, Gen.alphaNumChar)) - refr_medium <- Gen.option(strGen(512, Gen.alphaNumChar)) - refr_source <- Gen.option(strGen(512, Gen.alphaNumChar)) - refr_term <- Gen.option(strGen(512, Gen.alphaNumChar)) - mkt_medium <- Gen.option(strGen(512, Gen.alphaNumChar)) - mkt_source <- Gen.option(strGen(512, Gen.alphaNumChar)) - mkt_term <- Gen.option(strGen(512, Gen.alphaNumChar)) - mkt_content <- Gen.option(strGen(512, Gen.alphaNumChar)) - mkt_campaign <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_urlpath <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_urlpath", Int.MaxValue), Gen.alphaNumChar)) + refr_urlquery <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_urlquery", Int.MaxValue), Gen.alphaNumChar)) + refr_urlfragment <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_urlfragment", Int.MaxValue), Gen.alphaNumChar)) + refr_medium <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_medium", Int.MaxValue), Gen.alphaNumChar)) + refr_source <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_source", Int.MaxValue), Gen.alphaNumChar)) + refr_term <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("refr_term", Int.MaxValue), Gen.alphaNumChar)) + mkt_medium <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("mkt_medium", Int.MaxValue), Gen.alphaNumChar)) + mkt_source <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("mkt_source", Int.MaxValue), Gen.alphaNumChar)) + mkt_term <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("mkt_term", Int.MaxValue), Gen.alphaNumChar)) + mkt_content <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("mkt_content", Int.MaxValue), Gen.alphaNumChar)) + mkt_campaign <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("mkt_campaign", Int.MaxValue), Gen.alphaNumChar)) contexts <- Gen.oneOf(contexts, Contexts(Nil)) - se_category <- Gen.option(strGen(512, Gen.alphaNumChar)) - se_action <- Gen.option(strGen(512, Gen.alphaNumChar)) - se_label <- Gen.option(strGen(512, Gen.alphaNumChar)) - se_property <- Gen.option(strGen(512, Gen.alphaNumChar)) + se_category <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("se_category", Int.MaxValue), Gen.alphaNumChar)) + se_action <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("se_action", Int.MaxValue), Gen.alphaNumChar)) + se_label <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("se_label", Int.MaxValue), Gen.alphaNumChar)) + se_property <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("se_property", Int.MaxValue), Gen.alphaNumChar)) se_value <- Gen.option(Arbitrary.arbitrary[Double]) unstruct_event = event match { case Some("unstruct") => unstruct case _ => UnstructEvent(None) } tr_orderid <- Gen.option(Gen.uuid).map(_.map(_.toString())) - tr_affiliation <- Gen.option(strGen(512, Gen.alphaNumChar)) + tr_affiliation <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("tr_affiliation", Int.MaxValue), Gen.alphaNumChar)) tr_total <- Gen.option(Arbitrary.arbitrary[Double]) tr_tax <- Gen.option(Arbitrary.arbitrary[Double]) tr_shipping <- Gen.option(Arbitrary.arbitrary[Double]) - tr_city <- Gen.option(strGen(512, Gen.alphaNumChar)) - tr_state <- Gen.option(strGen(512, Gen.alphaNumChar)) - tr_country <- Gen.option(strGen(512, Gen.alphaNumChar)) + tr_city <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("tr_city", Int.MaxValue), Gen.alphaNumChar)) + tr_state <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("tr_state", Int.MaxValue), Gen.alphaNumChar)) + tr_country <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("tr_country", Int.MaxValue), Gen.alphaNumChar)) ti_orderid <- Gen.option(Gen.uuid).map(_.map(_.toString())) - ti_sku <- Gen.option(strGen(512, Gen.alphaNumChar)) - ti_name <- Gen.option(strGen(512, Gen.alphaNumChar)) - ti_category <- Gen.option(strGen(512, Gen.alphaNumChar)) + ti_sku <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ti_sku", Int.MaxValue), Gen.alphaNumChar)) + ti_name <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ti_name", Int.MaxValue), Gen.alphaNumChar)) + ti_category <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ti_category", Int.MaxValue), Gen.alphaNumChar)) ti_price <- Gen.option(Arbitrary.arbitrary[Double]) ti_quantity <- Gen.option(Gen.chooseNum(1, 100)) pp_xoffset_min <- Gen.option(Gen.chooseNum(1, 10000)) pp_xoffset_max <- Gen.option(Gen.chooseNum(1, 10000)) pp_yoffset_min <- Gen.option(Gen.chooseNum(1, 10000)) pp_yoffset_max <- Gen.option(Gen.chooseNum(1, 10000)) - useragent <- Gen.option(strGen(512, Gen.alphaNumChar)) - br_name <- Gen.option(strGen(512, Gen.alphaNumChar)) - br_family <- Gen.option(strGen(512, Gen.alphaNumChar)) - br_version <- Gen.option(strGen(512, Gen.alphaNumChar)) - br_type <- Gen.option(strGen(512, Gen.alphaNumChar)) - br_renderengine <- Gen.option(strGen(512, Gen.alphaNumChar)) - br_lang <- Gen.option(strGen(512, Gen.alphaNumChar)) + useragent <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("useragent", Int.MaxValue), Gen.alphaNumChar)) + br_name <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("br_name", Int.MaxValue), Gen.alphaNumChar)) + br_family <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("br_family", Int.MaxValue), Gen.alphaNumChar)) + br_version <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("br_version", Int.MaxValue), Gen.alphaNumChar)) + br_type <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("br_type", Int.MaxValue), Gen.alphaNumChar)) + br_renderengine <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("br_renderengine", Int.MaxValue), Gen.alphaNumChar)) + br_lang <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("br_lang", Int.MaxValue), Gen.alphaNumChar)) br_features_pdf <- Gen.option(Arbitrary.arbitrary[Boolean]) br_features_flash <- Gen.option(Arbitrary.arbitrary[Boolean]) br_features_java <- Gen.option(Arbitrary.arbitrary[Boolean]) @@ -184,31 +184,31 @@ object EventGen { br_features_gears <- Gen.option(Arbitrary.arbitrary[Boolean]) br_features_silverlight <- Gen.option(Arbitrary.arbitrary[Boolean]) br_cookies <- Gen.option(Arbitrary.arbitrary[Boolean]) - br_colordepth <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_colordepth <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("br_colordepth", Int.MaxValue), Gen.alphaNumChar)) br_viewwidth <- Gen.option(Gen.chooseNum(1, 10000)) br_viewheight <- Gen.option(Gen.chooseNum(1, 10000)) - os_name <- Gen.option(strGen(512, Gen.alphaNumChar)) - os_family <- Gen.option(strGen(512, Gen.alphaNumChar)) - os_manufacturer <- Gen.option(strGen(512, Gen.alphaNumChar)) - os_timezone <- Gen.option(strGen(512, Gen.alphaNumChar)) - dvce_type <- Gen.option(strGen(512, Gen.alphaNumChar)) + os_name <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("os_name", Int.MaxValue), Gen.alphaNumChar)) + os_family <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("os_family", Int.MaxValue), Gen.alphaNumChar)) + os_manufacturer <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("os_manufacturer", Int.MaxValue), Gen.alphaNumChar)) + os_timezone <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("os_timezone", Int.MaxValue), Gen.alphaNumChar)) + dvce_type <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("dvce_type", Int.MaxValue), Gen.alphaNumChar)) dvce_ismobile <- Gen.option(Arbitrary.arbitrary[Boolean]) dvce_screenwidth <- Gen.option(Gen.chooseNum(1, 10000)) dvce_screenheight <- Gen.option(Gen.chooseNum(1, 10000)) - doc_charset <- Gen.option(strGen(512, Gen.alphaNumChar)) + doc_charset <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("doc_charset", Int.MaxValue), Gen.alphaNumChar)) doc_width <- Gen.option(Gen.chooseNum(1, 10000)) doc_height <- Gen.option(Gen.chooseNum(1, 10000)) - tr_currency <- Gen.option(strGen(512, Gen.alphaNumChar)) + tr_currency <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("tr_currency", Int.MaxValue), Gen.alphaNumChar)) tr_total_base <- Gen.option(Arbitrary.arbitrary[Double]) tr_tax_base <- Gen.option(Arbitrary.arbitrary[Double]) tr_shipping_base <- Gen.option(Arbitrary.arbitrary[Double]) - ti_currency <- Gen.option(strGen(512, Gen.alphaNumChar)) + ti_currency <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("ti_currency", Int.MaxValue), Gen.alphaNumChar)) ti_price_base <- Gen.option(Arbitrary.arbitrary[Double]) - base_currency <- Gen.option(strGen(512, Gen.alphaNumChar)) - geo_timezone <- Gen.option(strGen(512, Gen.alphaNumChar)) + base_currency <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("base_currency", Int.MaxValue), Gen.alphaNumChar)) + geo_timezone <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("geo_timezone", Int.MaxValue), Gen.alphaNumChar)) mkt_clickid <- Gen.option(Gen.uuid).map(_.map(_.toString())) - mkt_network <- Gen.option(strGen(512, Gen.alphaNumChar)) - etl_tags <- Gen.option(strGen(512, Gen.alphaNumChar)) + mkt_network <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("mkt_network", Int.MaxValue), Gen.alphaNumChar)) + etl_tags <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("etl_tags", Int.MaxValue), Gen.alphaNumChar)) dvce_sent_tstamp <- Gen.option(instantGen) refr_domain_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) refr_dvce_tstamp <- Gen.option(instantGen) @@ -218,8 +218,8 @@ object EventGen { event_vendor <- Gen.option(Gen.identifier) event_name <- Gen.option(Gen.identifier) event_format <- Gen.option("jsonschema") - event_version <- Gen.option(strGen(10, Gen.alphaNumChar)) - event_fingerprint <- Gen.option(strGen(512, Gen.alphaNumChar)) + event_version <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("event_version", Int.MaxValue), Gen.alphaNumChar)) + event_fingerprint <- Gen.option(strGen(Event.FIELD_SIZES.getOrElse("event_fingerprint", Int.MaxValue), Gen.alphaNumChar)) true_tstamp <- Gen.option(instantGen) } yield Event( app_id, diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala index 3cd0fd9..5e475f9 100644 --- a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala @@ -13,21 +13,27 @@ package com.snowplowanalytics.snowplow.analytics.scalasdk import cats.data.NonEmptyList - -import io.circe.{Decoder, Json} +import io.circe.{Decoder, DecodingFailure, Json} import io.circe.syntax._ -import io.circe.parser._ - import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError._ import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo._ + import org.specs2.Specification +import java.time.Instant +import io.circe.parser._ +import cats.data.Validated.Invalid + +import java.util.UUID + class ParsingErrorSpec extends Specification { def is = s2""" ParsingError encoder-decoder works correctly with NotTSV error $e1 works correctly with FieldNumberMismatch error $e2 works correctly with RowDecodingError $e3 + works correctly with TSV oversized columns $e4 + works correctly with JSON oversized columns $e5 """ def e1 = { @@ -95,6 +101,44 @@ class ParsingErrorSpec extends Specification { (decoded must beEqualTo(expected)) and (encoded must beEqualTo(errorJson)) } + def e4 = { + val badEvent = Event.minimal(UUID.randomUUID(), Instant.now(), "v" * 101, "v_etl").copy(geo_country = Some("sssss")) + val expected = Invalid( + RowDecodingError( + NonEmptyList.of( + InvalidValue(Symbol("v_collector"), "v" * 101, s"Field v_collector value ${"v" * 101} longer than maximum allowed size 100"), + InvalidValue(Symbol("geo_country"), "sssss", s"Field geo_country value sssss longer than maximum allowed size 2") + ) + ) + ) + Event.parse(badEvent.toTsv) must beEqualTo(expected) + } + + def e5 = { + val badEvent = Event.minimal(UUID.randomUUID(), Instant.now(), "v" * 101, "v_etl").copy(geo_country = Some("sssss")) + val expected = + RowDecodingError( + NonEmptyList.of( + InvalidValue(Symbol("v_collector"), "v" * 101, s"Field v_collector value ${"v" * 101} longer than maximum allowed size 100"), + InvalidValue(Symbol("geo_country"), "sssss", s"Field geo_country value sssss longer than maximum allowed size 2") + ) + ) + + decode[Event](s"""{ + "app_id" : "bbb05861-0f11-4986-b23b-87e6e22609b1", + "collector_tstamp" : "2021-12-06T15:47:07.920430Z", + "event_id" : "bbb05861-0f11-4986-b23b-87e6e22609be", + "v_collector" : "${"v" * 101}", + "v_etl" : "v_etl", + "geo_country" : "sssss", + "contexts" : {}, + "unstruct_event": {}, + "derived_contexts" : {} + }""".stripMargin) must beEqualTo( + Left(DecodingFailure(s"Field v_collector value ${"v" * 101} longer than maximum allowed size 100", List())) + ) + } + private def parseJson(jsonStr: String): Json = parse(jsonStr).getOrElse(throw new RuntimeException("Failed to parse expected JSON."))