Skip to content

Commit

Permalink
PARQUET-2425: Support non-grouped repeated fields in AvroSchemaConverter
Browse files Browse the repository at this point in the history
  • Loading branch information
clairemcginty committed Jan 29, 2024
1 parent 15ad966 commit 407488a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,8 @@ private Schema convertFields(String name, List<Type> parquetFields, Map<String,
Integer nameCount = names.merge(name, 1, (oldValue, value) -> oldValue + 1);
for (Type parquetType : parquetFields) {
Schema fieldSchema = convertField(parquetType, names);
if (parquetType.isRepetition(REPEATED)) {
throw new UnsupportedOperationException(
"REPEATED not supported outside LIST or MAP. Type: " + parquetType);
if (parquetType.isRepetition(REPEATED)) { // If a repeated field is ungrouped, treat as REQUIRED per spec
fields.add(new Schema.Field(parquetType.getName(), Schema.createArray(fieldSchema)));
} else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) {
fields.add(new Schema.Field(parquetType.getName(), optional(fieldSchema), null, NULL_VALUE));
} else { // REQUIRED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,24 @@ public void testOldAvroListOfLists() throws Exception {
+ "}");
}

@Test
public void testConvertUngroupedRepeatedField() throws Exception {
testParquetToAvroConversion(
NEW_BEHAVIOR,
new Schema.Parser()
.parse("{" + " \"type\" : \"record\","
+ " \"name\" : \"SchemaWithRepeatedField\","
+ " \"fields\" : [ {"
+ " \"name\" : \"repeatedField\","
+ " \"type\" : {"
+ " \"type\" : \"array\","
+ " \"items\" : \"int\""
+ " }"
+ " }]"
+ "}"),
"message SchemaWithRepeatedField { repeated int32 repeatedField; }");
}

@Test
public void testOldThriftListOfLists() throws Exception {
Schema listOfLists = optional(Schema.createArray(Schema.createArray(Schema.create(INT))));
Expand Down

0 comments on commit 407488a

Please sign in to comment.