Skip to content

Commit

Permalink
PARQUET-2425: Support non-grouped repeated fields in AvroSchemaConverter
Browse files Browse the repository at this point in the history
  • Loading branch information
clairemcginty committed Jan 29, 2024
1 parent 15ad966 commit a45a817
Showing 2 changed files with 17 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -300,9 +300,8 @@ private Schema convertFields(String name, List<Type> parquetFields, Map<String,
Integer nameCount = names.merge(name, 1, (oldValue, value) -> oldValue + 1);
for (Type parquetType : parquetFields) {
Schema fieldSchema = convertField(parquetType, names);
if (parquetType.isRepetition(REPEATED)) {
throw new UnsupportedOperationException(
"REPEATED not supported outside LIST or MAP. Type: " + parquetType);
if (parquetType.isRepetition(REPEATED)) { // If a repeated field is ungrouped, treat as REQUIRED per spec
fields.add(new Schema.Field(parquetType.getName(), Schema.createArray(fieldSchema)));
} else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) {
fields.add(new Schema.Field(parquetType.getName(), optional(fieldSchema), null, NULL_VALUE));
} else { // REQUIRED
Original file line number Diff line number Diff line change
@@ -422,6 +422,21 @@ public void testOldAvroListOfLists() throws Exception {
+ "}");
}

@Test
public void testConvertUngroupedRepeatedField() throws Exception {
testParquetToAvroConversion(
NEW_BEHAVIOR,
new Schema.Parser()
.parse("{\"type\": \"record\","
+ " \"name\": \"SchemaWithRepeatedField\","
+ " \"fields\": [{"
+ " \"name\": \"repeatedField\","
+ " \"type\": {\"type\": \"array\",\"items\": \"int\"}"
+ " }]"
+ "}"),
"message SchemaWithRepeatedField { repeated int32 repeatedField; }");
}

@Test
public void testOldThriftListOfLists() throws Exception {
Schema listOfLists = optional(Schema.createArray(Schema.createArray(Schema.create(INT))));

0 comments on commit a45a817

Please sign in to comment.