From 052450f84eec6bbd59756006896a856390160cb8 Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Fri, 15 Nov 2024 13:07:59 +0000 Subject: [PATCH 1/4] Preserve original field order when merging parquet Fields We use the `Migrations.mergeSchema` function in the loaders, for combining a series of schemas (e.g. `1-0-0`, `1-0-1`, `1-0-2`) into a reconciled column Before this PR, `mergeSchemas` contained some logic to preserve field order... but it was the wrong logic. It preserved field order of the newer schema, ignoring whether a field was present in the older schema. After this PR, we preserve field order of the older schema. New fields added in a later schema are appended to the end of the field list. This feature change is needed for loaders that only allow field additions when they are appended to the end of a struct. E.g. the Lake Loader for Hudi/Glue. --- .../iglu.schemaddl/parquet/Field.scala | 15 +- .../iglu.schemaddl/parquet/Migrations.scala | 32 +- .../iglu/schemaddl/parquet/FieldSpec.scala | 72 ++++- .../schemaddl/parquet/MigrationSpec.scala | 302 +++++++++++------- 4 files changed, 280 insertions(+), 141 deletions(-) diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala index 270cc35c..ed47a1b0 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala @@ -43,16 +43,15 @@ object Field { Field(name, Type.Array(constructedType.value, itemNullability), nullability) } - def normalize(field: Field): Field = { - val fieldType = field.fieldType match { - case Type.Struct(fields) => Type.Struct(collapseDuplicateFields(fields.map(normalize))) - case Type.Array(Type.Struct(fields), nullability) => Type.Array( - Type.Struct(collapseDuplicateFields(fields.map(normalize))) - , nullability) + def normalize(field: Field): Field = + field.copy(name = normalizeName(field), fieldType = normalizeType(field.fieldType)) + + private def normalizeType(t: Type): Type = + t match { + case Type.Struct(fields) => Type.Struct(collapseDuplicateFields(fields.map(normalize)).sortBy(_.name)) + case Type.Array(el, nullability) => Type.Array(normalizeType(el), nullability) case other => other } - field.copy(name = normalizeName(field), fieldType = fieldType) - } private def collapseDuplicateFields(normFields: NonEmptyVector[Field]): NonEmptyVector[Field] = { val endMap = normFields diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala index 44b2ac93..8a3b4df4 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala @@ -81,7 +81,10 @@ object Migrations { val mergedType: Option[Type] = sourceType match { case sourceStruct@Struct(sourceFields) => targetType match { case targetStruct@Type.Struct(targetFields) => - val forwardMigration = sourceFields.map(srcField => MigrationFieldPair(srcField.name :: path, srcField, targetStruct.focus(srcField.name)).migrations) + val forwardMigration = sourceFields.map { + srcField => + MigrationFieldPair(srcField.name :: path, srcField, targetStruct.focus(srcField.name)).migrations + } // Comparing struct target fields to the source. This will detect additions. val reverseMigration = targetFields.map(tgtField => MigrationFieldPair(tgtField.name :: path, tgtField, sourceStruct.focus(tgtField.name)).migrations) @@ -96,22 +99,23 @@ object Migrations { val tgtFields = reverseMigration.toVector.traverse(_.result).toVector.flatten val tgtFieldNames = tgtFields.map(_.name) val allSrcFields = forwardMigration.toVector.traverse(_.result).toVector.flatten - val allSrcFieldMap = allSrcFields.map(f => f.name -> f).toMap - // swap fields in src and target as they would be rearranged in nested structs or arrays - val reorderedTgtFields = tgtFields.map { t => - allSrcFieldMap.get(t.name) match { - case Some(value) if value.fieldType.isInstanceOf[Struct] => value - case Some(value) if value.fieldType.isInstanceOf[Array] => value - case _ => t - } + val allSrcFieldNames = allSrcFields.map(_.name) + + val srcFields: Vector[Field] = allSrcFields.map { + srcField => + if (tgtFieldNames.contains(srcField.name)) + srcField + else + // drop not null constraints from removed fields. + srcField.copy(nullability = Type.Nullability.Nullable) + } + val newTgtFields = tgtFields.filter { + tgtField => + !allSrcFieldNames.contains(tgtField.name) } - val srcFields: Vector[Field] = allSrcFields.filter(srcField => !tgtFieldNames.contains(srcField.name)).map( - // drop not null constrains from removed fields. - _.copy(nullability = Type.Nullability.Nullable) - ) // failed migration would produce no fields in source - NonEmptyVector.fromVector(reorderedTgtFields ++ srcFields).map { nonEmpty => + NonEmptyVector.fromVector(srcFields ++ newTgtFields).map { nonEmpty => Type.Struct(nonEmpty) } diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala index 0d4aae20..fbebf905 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala @@ -36,6 +36,8 @@ class FieldSpec extends org.specs2.Specification { def is = s2""" normalName handles camel case and disallowed characters $e13 normalize would collapse colliding names $e14 normalize would collapse colliding names with deterministic type selection $e15 + normalize would sort fields in order of name $e16 + normalize would sort nested fields in order of name $e17 """ // a helper @@ -409,15 +411,15 @@ class FieldSpec extends org.specs2.Specification { def is = s2""" fieldType = Type.Struct( fields = NonEmptyVector.of( Field( - name = "_ga", + name = "__b", fieldType = Type.Integer, - nullability = Nullable, - accessors = Set("_ga", "_Ga") + nullability = Nullable ), Field( - name = "__b", + name = "_ga", fieldType = Type.Integer, - nullability = Nullable + nullability = Nullable, + accessors = Set("_ga", "_Ga") ), ) ), @@ -458,5 +460,65 @@ class FieldSpec extends org.specs2.Specification { def is = s2""" (input1 must_== expected) and (input2 must_== expected) } + def e16 = { + val input = { + val struct = Type.Struct( + NonEmptyVector.of( + Field("Apple", Type.String, Nullable), + Field("cherry", Type.String, Nullable), + Field("banana", Type.String, Nullable), + Field("Damson", Type.String, Nullable) + ) + ) + Field("top", struct, Nullable) + } + + val expected = { + val struct = Type.Struct( + NonEmptyVector.of( + Field("apple", Type.String, Nullable, Set("Apple")), + Field("banana", Type.String, Nullable, Set("banana")), + Field("cherry", Type.String, Nullable, Set("cherry")), + Field("damson", Type.String, Nullable, Set("Damson")) + ) + ) + Field("top", struct, Nullable) + } + + Field.normalize(input) must beEqualTo(expected) + } + + def e17 = { + val input = { + val nested = Type.Struct( + NonEmptyVector.of( + Field("Apple", Type.String, Nullable), + Field("cherry", Type.String, Nullable), + Field("banana", Type.String, Nullable), + Field("Damson", Type.String, Nullable) + ) + ) + val arr = Field("nested_arr", Type.Array(nested, Nullable), Nullable) + val struct = Field("nested_obj", nested, Nullable) + Field("top", Type.Struct(NonEmptyVector.of(arr, struct)), Nullable) + } + + val expected = { + val nested = Type.Struct( + NonEmptyVector.of( + Field("apple", Type.String, Nullable, Set("Apple")), + Field("banana", Type.String, Nullable, Set("banana")), + Field("cherry", Type.String, Nullable, Set("cherry")), + Field("damson", Type.String, Nullable, Set("Damson")) + ) + ) + val arr = Field("nested_arr", Type.Array(nested, Nullable), Nullable) + val struct = Field("nested_obj", nested, Nullable) + Field("top", Type.Struct(NonEmptyVector.of(arr, struct)), Nullable) + } + + Field.normalize(input) must beEqualTo(expected) + } + private def fieldNormalName(name: String) = Field.normalizeName(Field(name, Type.String, nullability = Nullable)) } diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala index 1ff70339..8c43e653 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala @@ -1,9 +1,12 @@ package com.snowplowanalytics.iglu.schemaddl.parquet +import cats.data.NonEmptyVector import cats.syntax.either._ + import com.snowplowanalytics.iglu.schemaddl.SpecHelpers import com.snowplowanalytics.iglu.schemaddl.parquet.Migrations._ import com.snowplowanalytics.iglu.schemaddl.parquet.Type.Struct +import com.snowplowanalytics.iglu.schemaddl.parquet.Type.Nullability.{Required, Nullable} class MigrationSpec extends org.specs2.Specification { @@ -23,11 +26,12 @@ class MigrationSpec extends org.specs2.Specification { Produce migration for removal fields in structs in arrays $e10 Error for type casting in nested arrays $e11 Produce migration for nullables arrays $e12 - Preserve ordering in nested arrays and structs $e13 - Suggest version change correctly $e14 - Collapse field name collisions $e15 - Drop not null constraint when field is removed in next generation $e16 - Drop not null constraint when field is added in next generation $e17 + Suggest version change correctly $e13 + Collapse field name collisions $e14 + Drop not null constraint when field is removed in next generation $e15 + Drop not null constraint when field is added in next generation $e16 + Preserve ordering of fields in a struct $e17 + Preserve ordering in nested arrays and structs $e18 """ def e1 = { @@ -60,9 +64,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /objectKey/string2Key") + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /object_key/string2_key") ) } @@ -87,9 +91,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(leanBase, schema2).leftMap(_.map(_.toString)) should beLeft(List("Incompatible type change String to Long at /objectKey/nestedKey1")) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /objectKey/nestedKey1") + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + Migrations.mergeSchemas(leanBase, schema2).leftMap(_.map(_.toString)) should beLeft(List("Incompatible type change String to Long at /object_key/nested_key1")) and ( + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /object_key/nested_key1") ) } @@ -113,10 +117,10 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /objectKey/nestedKey3")) + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /object_key/nested_key3")) } // Produce migration for new top-level fields $e5 @@ -129,7 +133,7 @@ class MigrationSpec extends org.specs2.Specification { | "type": "string", | "maxLength": 500 | }, - | "string2Key": { + | "stringKey2": { | "type": "string", | "maxLength": 500 | }, @@ -144,9 +148,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /string2Key") + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /string_key2") ) } @@ -170,9 +174,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(leanBase, schema2).leftMap(_.toString) should beLeft(Set("Incompatible type change String to Long at /stringKey")) - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /stringKey") + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + Migrations.mergeSchemas(leanBase, schema2).leftMap(_.toString) should beLeft(Set("Incompatible type change String to Long at /string_key")) + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /string_key") } // Produce migration for removal of top-level fields $e7 @@ -192,9 +196,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /stringKey")) + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /string_key")) } // Error with invalid type change in arrays AND nullable change $e8 @@ -257,7 +261,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -277,11 +281,11 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema1, schema2).leftMap(_.toString) should beRight(schema2) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(schema2) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual - Set("Schema key addition at /arrayKey/[arrayDown]/nestedKey3") + Set("Schema key addition at /array_key/[arrayDown]/nested_key3") ) } @@ -304,7 +308,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -324,11 +328,11 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema2, schema1).leftMap(_.toString) should beRight(schema2) and ( + Migrations.mergeSchemas(schema2, schema1) should beRight(schema2) and ( Migrations.assessSchemaMigration(schema2, schema1).map(_.toString) shouldEqual - Set("Key removal at /arrayKey/[arrayDown]/nestedKey3") + Set("Key removal at /array_key/[arrayDown]/nested_key3") ) } @@ -399,7 +403,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -419,89 +423,16 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - - Migrations.mergeSchemas(schema1, schema2).leftMap(_.toString) should beRight(schema1) and ( - Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Changing nullable property to required at /arrayKey/[arrayDown]/nestedKey2" - )) - } - - // Preserve ordering in nested arrays and structs $e13 - def e13 = { - val input1 = SpecHelpers.parseSchema( - """ - |{"type": "object", - |"properties": { - | "arrayKey": { - | "type": "array", - | "items": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "integer" }, - | "nestedKey2": { "type": "integer" } - | }, - | "required": ["nestedKey2"] - | } - | } - | }, - | "objectKey": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "string" }, - | "nestedKey2": { "type": ["integer", "null"] }, - | "nestedKey3": { "type": "boolean" }, - | "string2Key": { - | "type": "string", - | "maxLength": 500 - | } - | } - |} - |} - """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get - - val input2 = SpecHelpers.parseSchema( - """ - |{"type": "object", - |"properties": { - | "arrayKey": { - | "type": "array", - | "items": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "integer" }, - | "nestedKey0": { "type": "integer" }, - | "nestedKey2": { "type": "integer" } - | }, - | "required": ["nestedKey2"] - | }} - | }, - | "objectKey": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "string" }, - | "nestedKey0": { "type": "integer" }, - | "nestedKey2": { "type": ["integer", "null"] }, - | "nestedKey3": { "type": "boolean" }, - | "string2Key": { - | "type": "string", - | "maxLength": 500 - | } - | } - |} - |} - """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema1, schema2).leftMap(_.toString) should beRight(schema2) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(schema1) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Schema key addition at /arrayKey/[arrayDown]/nestedKey0" + "Changing nullable property to required at /array_key/[arrayDown]/nested_key2" )) } - // Suggest version change correctly $e14 - def e14 = { + // Suggest version change correctly $e13 + def e13 = { val major: ParquetSchemaMigrations = Set(IncompatibleType(List("/"), Type.Boolean, Type.Double)) val patch: ParquetSchemaMigrations = Set(KeyAddition(Nil, Type.Boolean)) @@ -509,7 +440,7 @@ class MigrationSpec extends org.specs2.Specification { isSchemaMigrationBreakingFromMigrations(patch) shouldEqual false } - def e15 = { + def e14 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -531,11 +462,11 @@ class MigrationSpec extends org.specs2.Specification { val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(schema1, schema2).map( - f => f.fieldType.asInstanceOf[Struct].fields.head.accessors.mkString(",") - ) should beRight("colliding_key,collidingKey") + f => f.fieldType.asInstanceOf[Struct].fields.head.accessors + ) should beRight(beEqualTo(Set("colliding_key", "collidingKey"))) } - def e16 = { + def e15 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -563,7 +494,7 @@ class MigrationSpec extends org.specs2.Specification { } - def e17 = { + def e16 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -590,10 +521,153 @@ class MigrationSpec extends org.specs2.Specification { ) should beRight(true) } + + // Preserve ordering of fields in a struct $e17 + def e17 = { + val struct1 = Type.Struct( + NonEmptyVector.of( + Field("zzz", Type.String, Nullable), + Field("yyy", Type.String, Nullable), + Field("xxx", Type.String, Nullable), + ) + ) + + val struct2 = Type.Struct( + NonEmptyVector.of( + Field("xxx", Type.String, Required), + Field("new1", Type.String, Required), + Field("yyy", Type.String, Required), + Field("new2", Type.String, Required), + Field("zzz", Type.String, Required), + ) + ) + + val expectedStruct = Type.Struct( + NonEmptyVector.of( + // original fields + Field("zzz", Type.String, Nullable), + Field("yyy", Type.String, Nullable), + Field("xxx", Type.String, Nullable), + // added fields + Field("new1", Type.String, Nullable), + Field("new2", Type.String, Nullable), + ) + ) + + val field1 = Field("top", struct1, Required) + val field2 = Field("top", struct2, Required) + val expected = Field("top", expectedStruct, Required) + + Migrations.mergeSchemas(field1, field2) must beRight(expected) + } + + // Preserve ordering in nested arrays and structs $e18 + def e18 = { + val input1 = SpecHelpers.parseSchema( + """ + |{"type": "object", + |"properties": { + | "arrayKey": { + | "type": "array", + | "items": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "integer" }, + | "nestedKey2": { "type": "integer" } + | }, + | "required": ["nestedKey2"] + | } + | }, + | "objectKey": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "string" }, + | "nestedKey2": { "type": ["integer", "null"] }, + | "nestedKey3": { "type": "boolean" }, + | "string2Key": { + | "type": "string", + | "maxLength": 500 + | } + | } + | } + |} + |} + """.stripMargin) + val schema1 = Field.normalize((Field.build("top", input1, enforceValuePresence = true).get)) + + val input2 = SpecHelpers.parseSchema( + """ + |{"type": "object", + |"properties": { + | "arrayKey": { + | "type": "array", + | "items": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "integer" }, + | "nestedKey0": { "type": "integer" }, + | "nestedKey2": { "type": "integer" } + | }, + | "required": ["nestedKey2"] + | }}, + | "objectKey": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "string" }, + | "nestedKey0": { "type": "integer" }, + | "nestedKey2": { "type": ["integer", "null"] }, + | "nestedKey3": { "type": "boolean" }, + | "string2Key": { + | "type": "string", + | "maxLength": 500 + | } + | } + | } + |} + |} + """.stripMargin) + val schema2 = Field.normalize((Field.build("top", input2, enforceValuePresence = true).get)) + + val expected = { + + val arrayStruct = Struct( + NonEmptyVector.of( + Field("nested_key1", Type.Long, Nullable, Set("nestedKey1")), + Field("nested_key2", Type.Long, Required, Set("nestedKey2")), + Field("nested_key0", Type.Long, Nullable, Set("nestedKey0")) + ) + ) + + val objectStruct = Struct( + NonEmptyVector.of( + Field("nested_key1", Type.String, Nullable, Set("nestedKey1")), + Field("nested_key2", Type.Long, Nullable, Set("nestedKey2")), + Field("nested_key3", Type.Boolean, Nullable, Set("nestedKey3")), + Field("string2_key", Type.String, Nullable, Set("string2Key")), + Field("nested_key0", Type.Long, Nullable, Set("nestedKey0")) + ) + ) + + val topStruct = Struct( + NonEmptyVector.of( + Field("array_key", Type.Array(arrayStruct, Required), Nullable, Set("arrayKey")), + Field("object_key", objectStruct, Nullable, Set("objectKey")) + ) + ) + Field("top", topStruct, Required) + } + + Migrations.mergeSchemas(schema1, schema2) should beRight(expected) and ( + Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( + "Schema key addition at /array_key/[arrayDown]/nested_key0", + "Schema key addition at /object_key/nested_key0" + )) + } + } object MigrationSpec { - val leanBase: Field = Field.build("top", SpecHelpers.parseSchema( + val leanBase: Field = Field.normalize(Field.build("top", SpecHelpers.parseSchema( """ |{"type": "object", |"properties": { @@ -611,5 +685,5 @@ object MigrationSpec { | } |} |} - """.stripMargin), enforceValuePresence = false).get + """.stripMargin), enforceValuePresence = false).get) } From 31a6e1914ddad0ca12fe2909369c2eb6717f877c Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Fri, 22 Nov 2024 08:56:03 +0000 Subject: [PATCH 2/4] Review feedback --- .../iglu.schemaddl/parquet/Migrations.scala | 5 ++++- .../iglu/schemaddl/parquet/MigrationSpec.scala | 16 ++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala index 8a3b4df4..a13f331c 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala @@ -87,7 +87,10 @@ object Migrations { } // Comparing struct target fields to the source. This will detect additions. - val reverseMigration = targetFields.map(tgtField => MigrationFieldPair(tgtField.name :: path, tgtField, sourceStruct.focus(tgtField.name)).migrations) + val reverseMigration = targetFields.map { + tgtField => + MigrationFieldPair(tgtField.name :: path, tgtField, sourceStruct.focus(tgtField.name)).migrations + } migrations ++= forwardMigration.iterator.flatMap(_.migrations) diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala index 8c43e653..e3709b2a 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala @@ -526,18 +526,18 @@ class MigrationSpec extends org.specs2.Specification { def e17 = { val struct1 = Type.Struct( NonEmptyVector.of( - Field("zzz", Type.String, Nullable), - Field("yyy", Type.String, Nullable), + Field("vvv", Type.String, Nullable), Field("xxx", Type.String, Nullable), + Field("zzz", Type.String, Nullable), ) ) val struct2 = Type.Struct( NonEmptyVector.of( + Field("vvv", Type.String, Required), + Field("www", Type.String, Required), Field("xxx", Type.String, Required), - Field("new1", Type.String, Required), Field("yyy", Type.String, Required), - Field("new2", Type.String, Required), Field("zzz", Type.String, Required), ) ) @@ -545,12 +545,12 @@ class MigrationSpec extends org.specs2.Specification { val expectedStruct = Type.Struct( NonEmptyVector.of( // original fields - Field("zzz", Type.String, Nullable), - Field("yyy", Type.String, Nullable), + Field("vvv", Type.String, Nullable), Field("xxx", Type.String, Nullable), + Field("zzz", Type.String, Nullable), // added fields - Field("new1", Type.String, Nullable), - Field("new2", Type.String, Nullable), + Field("www", Type.String, Nullable), + Field("yyy", Type.String, Nullable), ) ) From db04e30ceb867151087d4aad96dfbb84b72b4b23 Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Fri, 22 Nov 2024 10:34:38 +0000 Subject: [PATCH 3/4] Amendment: Fields should be normalized as part of merging schemas --- .../iglu.schemaddl/parquet/Migrations.scala | 6 +- .../schemaddl/parquet/MigrationSpec.scala | 96 ++++++++++++------- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala index a13f331c..fa2a9947 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala @@ -182,7 +182,7 @@ object Migrations { Generates list of all migration for the Schema pair. Top level of schema is always Struct. */ def assessSchemaMigration(source: Field, target: Field): ParquetSchemaMigrations = - MigrationFieldPair(Nil, source, Some(target)).migrations.migrations + MigrationFieldPair(Nil, Field.normalize(source), Some(Field.normalize(target))).migrations.migrations // [parquet] to access this in tests @@ -194,7 +194,7 @@ object Migrations { }) def mergeSchemas(source: Field, target: Field): Either[List[Breaking], Field] = { - val merged = MigrationFieldPair(Nil, source, Some(target)).migrations + val merged = MigrationFieldPair(Nil, Field.normalize(source), Some(Field.normalize(target))).migrations merged.result match { case Some(field) => field.asRight case None => merged.migrations.foldLeft(List.empty[Breaking])((accErr, migration) => migration match { @@ -205,5 +205,5 @@ object Migrations { } def isSchemaMigrationBreaking(source: Field, target: Field): Boolean = - isSchemaMigrationBreakingFromMigrations(MigrationFieldPair(Nil, source, Some(target)).migrations.migrations) + isSchemaMigrationBreakingFromMigrations(MigrationFieldPair(Nil, Field.normalize(source), Some(Field.normalize(target))).migrations.migrations) } diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala index e3709b2a..5d781fe9 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala @@ -21,7 +21,7 @@ class MigrationSpec extends org.specs2.Specification { Produce migration for new top-level fields $e5 Error for type casting in top-level fields $e6 Produce migration for removal of top-level fields $e7 - Error with invalid type change in arrays AND nullable change $e8 + Error with invalid type change in arrays AND nullable change $e8 Produce migration for adding fields in structs in arrays $e9 Produce migration for removal fields in structs in arrays $e10 Error for type casting in nested arrays $e11 @@ -30,8 +30,9 @@ class MigrationSpec extends org.specs2.Specification { Collapse field name collisions $e14 Drop not null constraint when field is removed in next generation $e15 Drop not null constraint when field is added in next generation $e16 - Preserve ordering of fields in a struct $e17 - Preserve ordering in nested arrays and structs $e18 + Normalize the field order as part of merging schemas $e17 + Preserve ordering of fields in a struct $e18 + Preserve ordering in nested arrays and structs $e19 """ def e1 = { @@ -64,8 +65,8 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( + val schema2 = Field.build("top", input2, enforceValuePresence = false).get + Migrations.mergeSchemas(leanBase, schema2) should beRight(Field.normalize(schema2)) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /object_key/string2_key") ) } @@ -91,7 +92,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(leanBase, schema2).leftMap(_.map(_.toString)) should beLeft(List("Incompatible type change String to Long at /object_key/nested_key1")) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /object_key/nested_key1") ) @@ -117,7 +118,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /object_key/nested_key3")) @@ -148,8 +149,8 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( + val schema2 = Field.build("top", input2, enforceValuePresence = false).get + Migrations.mergeSchemas(leanBase, schema2) should beRight(Field.normalize(schema2)) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /string_key2") ) } @@ -174,7 +175,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(leanBase, schema2).leftMap(_.toString) should beLeft(Set("Incompatible type change String to Long at /string_key")) Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /string_key") } @@ -196,7 +197,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /string_key")) } @@ -234,11 +235,11 @@ class MigrationSpec extends org.specs2.Specification { val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(schema1, schema2).leftMap(_.map(_.toString)) should beLeft(List( - "Incompatible type change Long to String at /arrayKey/[arrayDown]" + "Incompatible type change Long to String at /array_key/[arrayDown]" )) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Changing nullable property to required at /arrayKey/[arrayDown]", - "Incompatible type change Long to String at /arrayKey/[arrayDown]" + "Changing nullable property to required at /array_key/[arrayDown]", + "Incompatible type change Long to String at /array_key/[arrayDown]" )) } @@ -261,7 +262,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) + val schema1 = Field.build("top", input1, enforceValuePresence = false).get val input2 = SpecHelpers.parseSchema( """ @@ -281,9 +282,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(schema1, schema2) should beRight(schema2) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(Field.normalize(schema2)) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set("Schema key addition at /array_key/[arrayDown]/nested_key3") ) @@ -308,7 +309,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) + val schema1 = Field.build("top", input1, enforceValuePresence = false).get val input2 = SpecHelpers.parseSchema( """ @@ -328,9 +329,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(schema2, schema1) should beRight(schema2) and ( + Migrations.mergeSchemas(schema2, schema1) should beRight(Field.normalize(schema2)) and ( Migrations.assessSchemaMigration(schema2, schema1).map(_.toString) shouldEqual Set("Key removal at /array_key/[arrayDown]/nested_key3") ) @@ -377,10 +378,10 @@ class MigrationSpec extends org.specs2.Specification { val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(schema1, schema2).leftMap(_.map(_.toString)) should beLeft(List( - "Incompatible type change String to Long at /arrayKey/[arrayDown]/nestedKey1" + "Incompatible type change String to Long at /array_key/[arrayDown]/nested_key1" )) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Incompatible type change String to Long at /arrayKey/[arrayDown]/nestedKey1" + "Incompatible type change String to Long at /array_key/[arrayDown]/nested_key1" )) } @@ -403,7 +404,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) + val schema1 = Field.build("top", input1, enforceValuePresence = false).get val input2 = SpecHelpers.parseSchema( """ @@ -423,9 +424,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(schema1, schema2) should beRight(schema1) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(Field.normalize(schema1)) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( "Changing nullable property to required at /array_key/[arrayDown]/nested_key2" )) @@ -449,7 +450,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) + val schema1 = Field.build("top", input1, enforceValuePresence = false).get val input2 = SpecHelpers.parseSchema( """ @@ -459,7 +460,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(schema1, schema2).map( f => f.fieldType.asInstanceOf[Struct].fields.head.accessors @@ -476,7 +477,7 @@ class MigrationSpec extends org.specs2.Specification { |"required" : ["k1"] |} """.stripMargin) - val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) + val schema1 = Field.build("top", input1, enforceValuePresence = false).get val input2 = SpecHelpers.parseSchema( """ @@ -486,7 +487,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(schema1, schema2).map( f => f.fieldType.asInstanceOf[Struct].fields.last.nullability.nullable @@ -503,7 +504,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) + val schema1 = Field.build("top", input1, enforceValuePresence = false).get val input2 = SpecHelpers.parseSchema( """ @@ -514,7 +515,7 @@ class MigrationSpec extends org.specs2.Specification { | "required" : ["k2"] |} """.stripMargin) - val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(schema1, schema2).map( f => f.fieldType.asInstanceOf[Struct].fields.forall(_.nullability.nullable) @@ -522,8 +523,31 @@ class MigrationSpec extends org.specs2.Specification { } - // Preserve ordering of fields in a struct $e17 + // Normalize field order as part of merging schemas $e17 def e17 = { + val inputStruct = Type.Struct( + NonEmptyVector.of( + Field("ccc", Type.String, Nullable), + Field("aaa", Type.String, Nullable), + Field("bbb", Type.String, Nullable), + ) + ) + + val expectedStruct = Type.Struct( + NonEmptyVector.of( + Field("aaa", Type.String, Nullable), + Field("bbb", Type.String, Nullable), + Field("ccc", Type.String, Nullable), + ) + ) + val input = Field("top", inputStruct, Required) + val expected = Field("top", expectedStruct, Required) + + Migrations.mergeSchemas(input, input) must beRight(expected) + } + + // Preserve ordering of fields in a struct $e18 + def e18 = { val struct1 = Type.Struct( NonEmptyVector.of( Field("vvv", Type.String, Nullable), @@ -561,8 +585,8 @@ class MigrationSpec extends org.specs2.Specification { Migrations.mergeSchemas(field1, field2) must beRight(expected) } - // Preserve ordering in nested arrays and structs $e18 - def e18 = { + // Preserve ordering in nested arrays and structs $e19 + def e19 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -593,7 +617,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.normalize((Field.build("top", input1, enforceValuePresence = true).get)) + val schema1 = (Field.build("top", input1, enforceValuePresence = true).get) val input2 = SpecHelpers.parseSchema( """ @@ -626,7 +650,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.normalize((Field.build("top", input2, enforceValuePresence = true).get)) + val schema2 = (Field.build("top", input2, enforceValuePresence = true).get) val expected = { From 89d7603678fcc07ec356a89485c999e20b9e55bb Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Fri, 22 Nov 2024 13:08:39 +0000 Subject: [PATCH 4/4] Revert "Amendment: Fields should be normalized as part of merging schemas" This reverts commit db04e30ceb867151087d4aad96dfbb84b72b4b23. --- .../iglu.schemaddl/parquet/Migrations.scala | 6 +- .../schemaddl/parquet/MigrationSpec.scala | 96 +++++++------------ 2 files changed, 39 insertions(+), 63 deletions(-) diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala index fa2a9947..a13f331c 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala @@ -182,7 +182,7 @@ object Migrations { Generates list of all migration for the Schema pair. Top level of schema is always Struct. */ def assessSchemaMigration(source: Field, target: Field): ParquetSchemaMigrations = - MigrationFieldPair(Nil, Field.normalize(source), Some(Field.normalize(target))).migrations.migrations + MigrationFieldPair(Nil, source, Some(target)).migrations.migrations // [parquet] to access this in tests @@ -194,7 +194,7 @@ object Migrations { }) def mergeSchemas(source: Field, target: Field): Either[List[Breaking], Field] = { - val merged = MigrationFieldPair(Nil, Field.normalize(source), Some(Field.normalize(target))).migrations + val merged = MigrationFieldPair(Nil, source, Some(target)).migrations merged.result match { case Some(field) => field.asRight case None => merged.migrations.foldLeft(List.empty[Breaking])((accErr, migration) => migration match { @@ -205,5 +205,5 @@ object Migrations { } def isSchemaMigrationBreaking(source: Field, target: Field): Boolean = - isSchemaMigrationBreakingFromMigrations(MigrationFieldPair(Nil, Field.normalize(source), Some(Field.normalize(target))).migrations.migrations) + isSchemaMigrationBreakingFromMigrations(MigrationFieldPair(Nil, source, Some(target)).migrations.migrations) } diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala index 5d781fe9..e3709b2a 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala @@ -21,7 +21,7 @@ class MigrationSpec extends org.specs2.Specification { Produce migration for new top-level fields $e5 Error for type casting in top-level fields $e6 Produce migration for removal of top-level fields $e7 - Error with invalid type change in arrays AND nullable change $e8 + Error with invalid type change in arrays AND nullable change $e8 Produce migration for adding fields in structs in arrays $e9 Produce migration for removal fields in structs in arrays $e10 Error for type casting in nested arrays $e11 @@ -30,9 +30,8 @@ class MigrationSpec extends org.specs2.Specification { Collapse field name collisions $e14 Drop not null constraint when field is removed in next generation $e15 Drop not null constraint when field is added in next generation $e16 - Normalize the field order as part of merging schemas $e17 - Preserve ordering of fields in a struct $e18 - Preserve ordering in nested arrays and structs $e19 + Preserve ordering of fields in a struct $e17 + Preserve ordering in nested arrays and structs $e18 """ def e1 = { @@ -65,8 +64,8 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(leanBase, schema2) should beRight(Field.normalize(schema2)) and ( + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /object_key/string2_key") ) } @@ -92,7 +91,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2).leftMap(_.map(_.toString)) should beLeft(List("Incompatible type change String to Long at /object_key/nested_key1")) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /object_key/nested_key1") ) @@ -118,7 +117,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /object_key/nested_key3")) @@ -149,8 +148,8 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(leanBase, schema2) should beRight(Field.normalize(schema2)) and ( + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /string_key2") ) } @@ -175,7 +174,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2).leftMap(_.toString) should beLeft(Set("Incompatible type change String to Long at /string_key")) Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /string_key") } @@ -197,7 +196,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /string_key")) } @@ -235,11 +234,11 @@ class MigrationSpec extends org.specs2.Specification { val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(schema1, schema2).leftMap(_.map(_.toString)) should beLeft(List( - "Incompatible type change Long to String at /array_key/[arrayDown]" + "Incompatible type change Long to String at /arrayKey/[arrayDown]" )) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Changing nullable property to required at /array_key/[arrayDown]", - "Incompatible type change Long to String at /array_key/[arrayDown]" + "Changing nullable property to required at /arrayKey/[arrayDown]", + "Incompatible type change Long to String at /arrayKey/[arrayDown]" )) } @@ -262,7 +261,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -282,9 +281,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema1, schema2) should beRight(Field.normalize(schema2)) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(schema2) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set("Schema key addition at /array_key/[arrayDown]/nested_key3") ) @@ -309,7 +308,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -329,9 +328,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema2, schema1) should beRight(Field.normalize(schema2)) and ( + Migrations.mergeSchemas(schema2, schema1) should beRight(schema2) and ( Migrations.assessSchemaMigration(schema2, schema1).map(_.toString) shouldEqual Set("Key removal at /array_key/[arrayDown]/nested_key3") ) @@ -378,10 +377,10 @@ class MigrationSpec extends org.specs2.Specification { val schema2 = Field.build("top", input2, enforceValuePresence = false).get Migrations.mergeSchemas(schema1, schema2).leftMap(_.map(_.toString)) should beLeft(List( - "Incompatible type change String to Long at /array_key/[arrayDown]/nested_key1" + "Incompatible type change String to Long at /arrayKey/[arrayDown]/nestedKey1" )) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Incompatible type change String to Long at /array_key/[arrayDown]/nested_key1" + "Incompatible type change String to Long at /arrayKey/[arrayDown]/nestedKey1" )) } @@ -404,7 +403,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -424,9 +423,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema1, schema2) should beRight(Field.normalize(schema1)) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(schema1) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( "Changing nullable property to required at /array_key/[arrayDown]/nested_key2" )) @@ -450,7 +449,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -460,7 +459,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(schema1, schema2).map( f => f.fieldType.asInstanceOf[Struct].fields.head.accessors @@ -477,7 +476,7 @@ class MigrationSpec extends org.specs2.Specification { |"required" : ["k1"] |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -487,7 +486,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(schema1, schema2).map( f => f.fieldType.asInstanceOf[Struct].fields.last.nullability.nullable @@ -504,7 +503,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -515,7 +514,7 @@ class MigrationSpec extends org.specs2.Specification { | "required" : ["k2"] |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(schema1, schema2).map( f => f.fieldType.asInstanceOf[Struct].fields.forall(_.nullability.nullable) @@ -523,31 +522,8 @@ class MigrationSpec extends org.specs2.Specification { } - // Normalize field order as part of merging schemas $e17 + // Preserve ordering of fields in a struct $e17 def e17 = { - val inputStruct = Type.Struct( - NonEmptyVector.of( - Field("ccc", Type.String, Nullable), - Field("aaa", Type.String, Nullable), - Field("bbb", Type.String, Nullable), - ) - ) - - val expectedStruct = Type.Struct( - NonEmptyVector.of( - Field("aaa", Type.String, Nullable), - Field("bbb", Type.String, Nullable), - Field("ccc", Type.String, Nullable), - ) - ) - val input = Field("top", inputStruct, Required) - val expected = Field("top", expectedStruct, Required) - - Migrations.mergeSchemas(input, input) must beRight(expected) - } - - // Preserve ordering of fields in a struct $e18 - def e18 = { val struct1 = Type.Struct( NonEmptyVector.of( Field("vvv", Type.String, Nullable), @@ -585,8 +561,8 @@ class MigrationSpec extends org.specs2.Specification { Migrations.mergeSchemas(field1, field2) must beRight(expected) } - // Preserve ordering in nested arrays and structs $e19 - def e19 = { + // Preserve ordering in nested arrays and structs $e18 + def e18 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -617,7 +593,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = (Field.build("top", input1, enforceValuePresence = true).get) + val schema1 = Field.normalize((Field.build("top", input1, enforceValuePresence = true).get)) val input2 = SpecHelpers.parseSchema( """ @@ -650,7 +626,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = (Field.build("top", input2, enforceValuePresence = true).get) + val schema2 = Field.normalize((Field.build("top", input2, enforceValuePresence = true).get)) val expected = {