From 052450f84eec6bbd59756006896a856390160cb8 Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Fri, 15 Nov 2024 13:07:59 +0000 Subject: [PATCH] Preserve original field order when merging parquet Fields We use the `Migrations.mergeSchema` function in the loaders, for combining a series of schemas (e.g. `1-0-0`, `1-0-1`, `1-0-2`) into a reconciled column Before this PR, `mergeSchemas` contained some logic to preserve field order... but it was the wrong logic. It preserved field order of the newer schema, ignoring whether a field was present in the older schema. After this PR, we preserve field order of the older schema. New fields added in a later schema are appended to the end of the field list. This feature change is needed for loaders that only allow field additions when they are appended to the end of a struct. E.g. the Lake Loader for Hudi/Glue. --- .../iglu.schemaddl/parquet/Field.scala | 15 +- .../iglu.schemaddl/parquet/Migrations.scala | 32 +- .../iglu/schemaddl/parquet/FieldSpec.scala | 72 ++++- .../schemaddl/parquet/MigrationSpec.scala | 302 +++++++++++------- 4 files changed, 280 insertions(+), 141 deletions(-) diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala index 270cc35c..ed47a1b0 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Field.scala @@ -43,16 +43,15 @@ object Field { Field(name, Type.Array(constructedType.value, itemNullability), nullability) } - def normalize(field: Field): Field = { - val fieldType = field.fieldType match { - case Type.Struct(fields) => Type.Struct(collapseDuplicateFields(fields.map(normalize))) - case Type.Array(Type.Struct(fields), nullability) => Type.Array( - Type.Struct(collapseDuplicateFields(fields.map(normalize))) - , nullability) + def normalize(field: Field): Field = + field.copy(name = normalizeName(field), fieldType = normalizeType(field.fieldType)) + + private def normalizeType(t: Type): Type = + t match { + case Type.Struct(fields) => Type.Struct(collapseDuplicateFields(fields.map(normalize)).sortBy(_.name)) + case Type.Array(el, nullability) => Type.Array(normalizeType(el), nullability) case other => other } - field.copy(name = normalizeName(field), fieldType = fieldType) - } private def collapseDuplicateFields(normFields: NonEmptyVector[Field]): NonEmptyVector[Field] = { val endMap = normFields diff --git a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala index 44b2ac93..8a3b4df4 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics/iglu.schemaddl/parquet/Migrations.scala @@ -81,7 +81,10 @@ object Migrations { val mergedType: Option[Type] = sourceType match { case sourceStruct@Struct(sourceFields) => targetType match { case targetStruct@Type.Struct(targetFields) => - val forwardMigration = sourceFields.map(srcField => MigrationFieldPair(srcField.name :: path, srcField, targetStruct.focus(srcField.name)).migrations) + val forwardMigration = sourceFields.map { + srcField => + MigrationFieldPair(srcField.name :: path, srcField, targetStruct.focus(srcField.name)).migrations + } // Comparing struct target fields to the source. This will detect additions. val reverseMigration = targetFields.map(tgtField => MigrationFieldPair(tgtField.name :: path, tgtField, sourceStruct.focus(tgtField.name)).migrations) @@ -96,22 +99,23 @@ object Migrations { val tgtFields = reverseMigration.toVector.traverse(_.result).toVector.flatten val tgtFieldNames = tgtFields.map(_.name) val allSrcFields = forwardMigration.toVector.traverse(_.result).toVector.flatten - val allSrcFieldMap = allSrcFields.map(f => f.name -> f).toMap - // swap fields in src and target as they would be rearranged in nested structs or arrays - val reorderedTgtFields = tgtFields.map { t => - allSrcFieldMap.get(t.name) match { - case Some(value) if value.fieldType.isInstanceOf[Struct] => value - case Some(value) if value.fieldType.isInstanceOf[Array] => value - case _ => t - } + val allSrcFieldNames = allSrcFields.map(_.name) + + val srcFields: Vector[Field] = allSrcFields.map { + srcField => + if (tgtFieldNames.contains(srcField.name)) + srcField + else + // drop not null constraints from removed fields. + srcField.copy(nullability = Type.Nullability.Nullable) + } + val newTgtFields = tgtFields.filter { + tgtField => + !allSrcFieldNames.contains(tgtField.name) } - val srcFields: Vector[Field] = allSrcFields.filter(srcField => !tgtFieldNames.contains(srcField.name)).map( - // drop not null constrains from removed fields. - _.copy(nullability = Type.Nullability.Nullable) - ) // failed migration would produce no fields in source - NonEmptyVector.fromVector(reorderedTgtFields ++ srcFields).map { nonEmpty => + NonEmptyVector.fromVector(srcFields ++ newTgtFields).map { nonEmpty => Type.Struct(nonEmpty) } diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala index 0d4aae20..fbebf905 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/FieldSpec.scala @@ -36,6 +36,8 @@ class FieldSpec extends org.specs2.Specification { def is = s2""" normalName handles camel case and disallowed characters $e13 normalize would collapse colliding names $e14 normalize would collapse colliding names with deterministic type selection $e15 + normalize would sort fields in order of name $e16 + normalize would sort nested fields in order of name $e17 """ // a helper @@ -409,15 +411,15 @@ class FieldSpec extends org.specs2.Specification { def is = s2""" fieldType = Type.Struct( fields = NonEmptyVector.of( Field( - name = "_ga", + name = "__b", fieldType = Type.Integer, - nullability = Nullable, - accessors = Set("_ga", "_Ga") + nullability = Nullable ), Field( - name = "__b", + name = "_ga", fieldType = Type.Integer, - nullability = Nullable + nullability = Nullable, + accessors = Set("_ga", "_Ga") ), ) ), @@ -458,5 +460,65 @@ class FieldSpec extends org.specs2.Specification { def is = s2""" (input1 must_== expected) and (input2 must_== expected) } + def e16 = { + val input = { + val struct = Type.Struct( + NonEmptyVector.of( + Field("Apple", Type.String, Nullable), + Field("cherry", Type.String, Nullable), + Field("banana", Type.String, Nullable), + Field("Damson", Type.String, Nullable) + ) + ) + Field("top", struct, Nullable) + } + + val expected = { + val struct = Type.Struct( + NonEmptyVector.of( + Field("apple", Type.String, Nullable, Set("Apple")), + Field("banana", Type.String, Nullable, Set("banana")), + Field("cherry", Type.String, Nullable, Set("cherry")), + Field("damson", Type.String, Nullable, Set("Damson")) + ) + ) + Field("top", struct, Nullable) + } + + Field.normalize(input) must beEqualTo(expected) + } + + def e17 = { + val input = { + val nested = Type.Struct( + NonEmptyVector.of( + Field("Apple", Type.String, Nullable), + Field("cherry", Type.String, Nullable), + Field("banana", Type.String, Nullable), + Field("Damson", Type.String, Nullable) + ) + ) + val arr = Field("nested_arr", Type.Array(nested, Nullable), Nullable) + val struct = Field("nested_obj", nested, Nullable) + Field("top", Type.Struct(NonEmptyVector.of(arr, struct)), Nullable) + } + + val expected = { + val nested = Type.Struct( + NonEmptyVector.of( + Field("apple", Type.String, Nullable, Set("Apple")), + Field("banana", Type.String, Nullable, Set("banana")), + Field("cherry", Type.String, Nullable, Set("cherry")), + Field("damson", Type.String, Nullable, Set("Damson")) + ) + ) + val arr = Field("nested_arr", Type.Array(nested, Nullable), Nullable) + val struct = Field("nested_obj", nested, Nullable) + Field("top", Type.Struct(NonEmptyVector.of(arr, struct)), Nullable) + } + + Field.normalize(input) must beEqualTo(expected) + } + private def fieldNormalName(name: String) = Field.normalizeName(Field(name, Type.String, nullability = Nullable)) } diff --git a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala index 1ff70339..8c43e653 100644 --- a/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala +++ b/modules/core/src/test/scala/com/snowplowanalytics/iglu/schemaddl/parquet/MigrationSpec.scala @@ -1,9 +1,12 @@ package com.snowplowanalytics.iglu.schemaddl.parquet +import cats.data.NonEmptyVector import cats.syntax.either._ + import com.snowplowanalytics.iglu.schemaddl.SpecHelpers import com.snowplowanalytics.iglu.schemaddl.parquet.Migrations._ import com.snowplowanalytics.iglu.schemaddl.parquet.Type.Struct +import com.snowplowanalytics.iglu.schemaddl.parquet.Type.Nullability.{Required, Nullable} class MigrationSpec extends org.specs2.Specification { @@ -23,11 +26,12 @@ class MigrationSpec extends org.specs2.Specification { Produce migration for removal fields in structs in arrays $e10 Error for type casting in nested arrays $e11 Produce migration for nullables arrays $e12 - Preserve ordering in nested arrays and structs $e13 - Suggest version change correctly $e14 - Collapse field name collisions $e15 - Drop not null constraint when field is removed in next generation $e16 - Drop not null constraint when field is added in next generation $e17 + Suggest version change correctly $e13 + Collapse field name collisions $e14 + Drop not null constraint when field is removed in next generation $e15 + Drop not null constraint when field is added in next generation $e16 + Preserve ordering of fields in a struct $e17 + Preserve ordering in nested arrays and structs $e18 """ def e1 = { @@ -60,9 +64,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /objectKey/string2Key") + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /object_key/string2_key") ) } @@ -87,9 +91,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(leanBase, schema2).leftMap(_.map(_.toString)) should beLeft(List("Incompatible type change String to Long at /objectKey/nestedKey1")) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /objectKey/nestedKey1") + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + Migrations.mergeSchemas(leanBase, schema2).leftMap(_.map(_.toString)) should beLeft(List("Incompatible type change String to Long at /object_key/nested_key1")) and ( + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /object_key/nested_key1") ) } @@ -113,10 +117,10 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /objectKey/nestedKey3")) + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /object_key/nested_key3")) } // Produce migration for new top-level fields $e5 @@ -129,7 +133,7 @@ class MigrationSpec extends org.specs2.Specification { | "type": "string", | "maxLength": 500 | }, - | "string2Key": { + | "stringKey2": { | "type": "string", | "maxLength": 500 | }, @@ -144,9 +148,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(schema2) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /string2Key") + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Schema key addition at /string_key2") ) } @@ -170,9 +174,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - Migrations.mergeSchemas(leanBase, schema2).leftMap(_.toString) should beLeft(Set("Incompatible type change String to Long at /stringKey")) - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /stringKey") + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) + Migrations.mergeSchemas(leanBase, schema2).leftMap(_.toString) should beLeft(Set("Incompatible type change String to Long at /string_key")) + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Incompatible type change String to Long at /string_key") } // Produce migration for removal of top-level fields $e7 @@ -192,9 +196,9 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(leanBase, schema2) should beRight(leanBase) and ( - Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /stringKey")) + Migrations.assessSchemaMigration(leanBase, schema2).map(_.toString) shouldEqual Set("Key removal at /string_key")) } // Error with invalid type change in arrays AND nullable change $e8 @@ -257,7 +261,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -277,11 +281,11 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema1, schema2).leftMap(_.toString) should beRight(schema2) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(schema2) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual - Set("Schema key addition at /arrayKey/[arrayDown]/nestedKey3") + Set("Schema key addition at /array_key/[arrayDown]/nested_key3") ) } @@ -304,7 +308,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -324,11 +328,11 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema2, schema1).leftMap(_.toString) should beRight(schema2) and ( + Migrations.mergeSchemas(schema2, schema1) should beRight(schema2) and ( Migrations.assessSchemaMigration(schema2, schema1).map(_.toString) shouldEqual - Set("Key removal at /arrayKey/[arrayDown]/nestedKey3") + Set("Key removal at /array_key/[arrayDown]/nested_key3") ) } @@ -399,7 +403,7 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get + val schema1 = Field.normalize(Field.build("top", input1, enforceValuePresence = false).get) val input2 = SpecHelpers.parseSchema( """ @@ -419,89 +423,16 @@ class MigrationSpec extends org.specs2.Specification { |} |} """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get - - Migrations.mergeSchemas(schema1, schema2).leftMap(_.toString) should beRight(schema1) and ( - Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Changing nullable property to required at /arrayKey/[arrayDown]/nestedKey2" - )) - } - - // Preserve ordering in nested arrays and structs $e13 - def e13 = { - val input1 = SpecHelpers.parseSchema( - """ - |{"type": "object", - |"properties": { - | "arrayKey": { - | "type": "array", - | "items": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "integer" }, - | "nestedKey2": { "type": "integer" } - | }, - | "required": ["nestedKey2"] - | } - | } - | }, - | "objectKey": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "string" }, - | "nestedKey2": { "type": ["integer", "null"] }, - | "nestedKey3": { "type": "boolean" }, - | "string2Key": { - | "type": "string", - | "maxLength": 500 - | } - | } - |} - |} - """.stripMargin) - val schema1 = Field.build("top", input1, enforceValuePresence = false).get - - val input2 = SpecHelpers.parseSchema( - """ - |{"type": "object", - |"properties": { - | "arrayKey": { - | "type": "array", - | "items": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "integer" }, - | "nestedKey0": { "type": "integer" }, - | "nestedKey2": { "type": "integer" } - | }, - | "required": ["nestedKey2"] - | }} - | }, - | "objectKey": { - | "type": "object", - | "properties": { - | "nestedKey1": { "type": "string" }, - | "nestedKey0": { "type": "integer" }, - | "nestedKey2": { "type": ["integer", "null"] }, - | "nestedKey3": { "type": "boolean" }, - | "string2Key": { - | "type": "string", - | "maxLength": 500 - | } - | } - |} - |} - """.stripMargin) - val schema2 = Field.build("top", input2, enforceValuePresence = false).get + val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) - Migrations.mergeSchemas(schema1, schema2).leftMap(_.toString) should beRight(schema2) and ( + Migrations.mergeSchemas(schema1, schema2) should beRight(schema1) and ( Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( - "Schema key addition at /arrayKey/[arrayDown]/nestedKey0" + "Changing nullable property to required at /array_key/[arrayDown]/nested_key2" )) } - // Suggest version change correctly $e14 - def e14 = { + // Suggest version change correctly $e13 + def e13 = { val major: ParquetSchemaMigrations = Set(IncompatibleType(List("/"), Type.Boolean, Type.Double)) val patch: ParquetSchemaMigrations = Set(KeyAddition(Nil, Type.Boolean)) @@ -509,7 +440,7 @@ class MigrationSpec extends org.specs2.Specification { isSchemaMigrationBreakingFromMigrations(patch) shouldEqual false } - def e15 = { + def e14 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -531,11 +462,11 @@ class MigrationSpec extends org.specs2.Specification { val schema2 = Field.normalize(Field.build("top", input2, enforceValuePresence = false).get) Migrations.mergeSchemas(schema1, schema2).map( - f => f.fieldType.asInstanceOf[Struct].fields.head.accessors.mkString(",") - ) should beRight("colliding_key,collidingKey") + f => f.fieldType.asInstanceOf[Struct].fields.head.accessors + ) should beRight(beEqualTo(Set("colliding_key", "collidingKey"))) } - def e16 = { + def e15 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -563,7 +494,7 @@ class MigrationSpec extends org.specs2.Specification { } - def e17 = { + def e16 = { val input1 = SpecHelpers.parseSchema( """ |{"type": "object", @@ -590,10 +521,153 @@ class MigrationSpec extends org.specs2.Specification { ) should beRight(true) } + + // Preserve ordering of fields in a struct $e17 + def e17 = { + val struct1 = Type.Struct( + NonEmptyVector.of( + Field("zzz", Type.String, Nullable), + Field("yyy", Type.String, Nullable), + Field("xxx", Type.String, Nullable), + ) + ) + + val struct2 = Type.Struct( + NonEmptyVector.of( + Field("xxx", Type.String, Required), + Field("new1", Type.String, Required), + Field("yyy", Type.String, Required), + Field("new2", Type.String, Required), + Field("zzz", Type.String, Required), + ) + ) + + val expectedStruct = Type.Struct( + NonEmptyVector.of( + // original fields + Field("zzz", Type.String, Nullable), + Field("yyy", Type.String, Nullable), + Field("xxx", Type.String, Nullable), + // added fields + Field("new1", Type.String, Nullable), + Field("new2", Type.String, Nullable), + ) + ) + + val field1 = Field("top", struct1, Required) + val field2 = Field("top", struct2, Required) + val expected = Field("top", expectedStruct, Required) + + Migrations.mergeSchemas(field1, field2) must beRight(expected) + } + + // Preserve ordering in nested arrays and structs $e18 + def e18 = { + val input1 = SpecHelpers.parseSchema( + """ + |{"type": "object", + |"properties": { + | "arrayKey": { + | "type": "array", + | "items": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "integer" }, + | "nestedKey2": { "type": "integer" } + | }, + | "required": ["nestedKey2"] + | } + | }, + | "objectKey": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "string" }, + | "nestedKey2": { "type": ["integer", "null"] }, + | "nestedKey3": { "type": "boolean" }, + | "string2Key": { + | "type": "string", + | "maxLength": 500 + | } + | } + | } + |} + |} + """.stripMargin) + val schema1 = Field.normalize((Field.build("top", input1, enforceValuePresence = true).get)) + + val input2 = SpecHelpers.parseSchema( + """ + |{"type": "object", + |"properties": { + | "arrayKey": { + | "type": "array", + | "items": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "integer" }, + | "nestedKey0": { "type": "integer" }, + | "nestedKey2": { "type": "integer" } + | }, + | "required": ["nestedKey2"] + | }}, + | "objectKey": { + | "type": "object", + | "properties": { + | "nestedKey1": { "type": "string" }, + | "nestedKey0": { "type": "integer" }, + | "nestedKey2": { "type": ["integer", "null"] }, + | "nestedKey3": { "type": "boolean" }, + | "string2Key": { + | "type": "string", + | "maxLength": 500 + | } + | } + | } + |} + |} + """.stripMargin) + val schema2 = Field.normalize((Field.build("top", input2, enforceValuePresence = true).get)) + + val expected = { + + val arrayStruct = Struct( + NonEmptyVector.of( + Field("nested_key1", Type.Long, Nullable, Set("nestedKey1")), + Field("nested_key2", Type.Long, Required, Set("nestedKey2")), + Field("nested_key0", Type.Long, Nullable, Set("nestedKey0")) + ) + ) + + val objectStruct = Struct( + NonEmptyVector.of( + Field("nested_key1", Type.String, Nullable, Set("nestedKey1")), + Field("nested_key2", Type.Long, Nullable, Set("nestedKey2")), + Field("nested_key3", Type.Boolean, Nullable, Set("nestedKey3")), + Field("string2_key", Type.String, Nullable, Set("string2Key")), + Field("nested_key0", Type.Long, Nullable, Set("nestedKey0")) + ) + ) + + val topStruct = Struct( + NonEmptyVector.of( + Field("array_key", Type.Array(arrayStruct, Required), Nullable, Set("arrayKey")), + Field("object_key", objectStruct, Nullable, Set("objectKey")) + ) + ) + Field("top", topStruct, Required) + } + + Migrations.mergeSchemas(schema1, schema2) should beRight(expected) and ( + Migrations.assessSchemaMigration(schema1, schema2).map(_.toString) shouldEqual Set( + "Schema key addition at /array_key/[arrayDown]/nested_key0", + "Schema key addition at /object_key/nested_key0" + )) + } + } object MigrationSpec { - val leanBase: Field = Field.build("top", SpecHelpers.parseSchema( + val leanBase: Field = Field.normalize(Field.build("top", SpecHelpers.parseSchema( """ |{"type": "object", |"properties": { @@ -611,5 +685,5 @@ object MigrationSpec { | } |} |} - """.stripMargin), enforceValuePresence = false).get + """.stripMargin), enforceValuePresence = false).get) }