Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-36696: [Go] Improve the MapOf and ListOf helpers #36697

Merged
merged 2 commits into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 41 additions & 21 deletions go/parquet/schema/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,43 +24,62 @@ import (
// ListOf is a convenience helper function to create a properly structured
// list structure according to the Parquet Spec.
//
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
//
// <list-repetition> can only be optional or required. panics if repeated.
// <element-repetition> can only be optional or required. panics if repeated.
// <list-repetition> can only be optional or required.
// <element-repetition> can only be optional or required.
func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if rep == parquet.Repetitions.Repeated || n.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: listof repetition and element repetition must not be repeated.")
return ListOfWithName(n.Name(), n, rep, fieldID)
}

// ListOf is a convenience helper function to create a properly structured
// list structure according to the Parquet Spec.
//
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
//
// <list-repetition> can only be optional or required.
// <element-repetition> can only be optional or required.
func ListOfWithName(listName string, element Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if rep == parquet.Repetitions.Repeated {
return nil, xerrors.Errorf("parquet: listof repetition must not be repeated, got :%s", rep)
}
listName := n.Name()

switch n := n.(type) {
if element.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.Errorf("parquet: element repetition must not be repeated, got: %s", element.RepetitionType())
}

switch n := element.(type) {
case *PrimitiveNode:
n.name = "element"
case *GroupNode:
n.name = "element"
}

list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{n}, -1 /* fieldID */)
list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{element}, -1 /* fieldID */)
if err != nil {
return nil, err
}

return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID)
}

// MapOf is a convenience helper function to create a properly structured
// parquet map node setup according to the Parquet Spec.
//
// <map-repetition> group <name> (MAP) {
// repeated group key_value {
// required <key-type> key;
// <value-repetition> <value-type> value;
// }
// }
// <map-repetition> group <name> (MAP) {
// repeated group key_value {
// required <key-type> key;
// <value-repetition> <value-type> value;
// }
// }
//
// key node will be renamed to "key", value node if not nil will be renamed to "value"
//
Expand All @@ -69,14 +88,15 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
// the key node *must* be required repetition. panics if optional or repeated
//
// value node can be nil (omitted) or have a repetition of required or optional *only*.
// panics if value node is not nil and has a repetition of repeated.
func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if mapRep == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: map repetition cannot be Repeated")
return nil, xerrors.Errorf("parquet: map repetition cannot be Repeated, got: %s", mapRep)
}

if key.RepetitionType() != parquet.Repetitions.Required {
return nil, xerrors.New("parquet: map key repetition must be Required")
return nil, xerrors.Errorf("parquet: map key repetition must be Required, got: %s", key.RepetitionType())
}

if value != nil {
if value.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: map value cannot have repetition Repeated")
Expand Down
19 changes: 19 additions & 0 deletions go/parquet/schema/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,25 @@ func TestListOfNested(t *testing.T) {
}`, strings.TrimSpace(buf.String()))
}

func TestListOfWithNameNested(t *testing.T) {
n, err := schema.ListOfWithName("arrays", schema.NewInt32Node("element", parquet.Repetitions.Required, -1), parquet.Repetitions.Required, -1)
assert.NoError(t, err)
final, err := schema.ListOf(n, parquet.Repetitions.Required, -1)
assert.NoError(t, err)

var buf bytes.Buffer
schema.PrintSchema(final, &buf, 4)
assert.Equal(t,
`required group field_id=-1 arrays (List) {
repeated group field_id=-1 list {
required group field_id=-1 element (List) {
repeated group field_id=-1 list {
required int32 field_id=-1 element;
}
}
}
}`, strings.TrimSpace(buf.String()))
}
func TestMapOfNestedTypes(t *testing.T) {
n, err := schema.NewGroupNode("student", parquet.Repetitions.Required, schema.FieldList{
schema.NewByteArrayNode("name", parquet.Repetitions.Required, -1),
Expand Down