Skip to content

Commit

Permalink
remove duplicate AppendNull in case of null struct data
Browse files Browse the repository at this point in the history
bugfix to remove duplicate AppendNull in case of null parent struct data.

Change: Added  a check for structbuilder.AppendNull() that skips child fields.

Reason:
StructBuilder.AppendNull() recursively calls appendNull for all subfields; calling AppendNull on child fields was incorrectly adding offsets causing data to be dropped or end up in the wrong rows.
  • Loading branch information
loicalleyne committed Nov 13, 2023
1 parent af89ca4 commit 91bb9e2
Showing 1 changed file with 48 additions and 11 deletions.
59 changes: 48 additions & 11 deletions go/arrow/avro/reader_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package avro
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math/big"

Expand All @@ -42,6 +43,10 @@ type dataLoader struct {
children []*dataLoader
}

var (
NullStructData = errors.New("null struct data")
)

func newDataLoader() *dataLoader { return &dataLoader{idx: 0, depth: 0} }

// drawTree takes the tree of field builders produced by mapFieldBuilders()
Expand Down Expand Up @@ -81,29 +86,49 @@ func (d *dataLoader) drawTree(field *fieldPos) {
}
}

// loadDatum loads decoded Avro data to the schema fields' builder functions.
// Since array.StructBuilder.AppendNull() will recursively append null to all of the
// struct's fields, in the case of nil being passed to a struct's builderFunc it will
// return a NullStructData error to signal that all its sub-fields can be skipped.
func (d *dataLoader) loadDatum(data any) error {
if d.list == nil && d.mapField == nil {
if d.mapValue != nil {
d.mapValue.appendFunc(data)
}
var NullParent *fieldPos
for _, f := range d.fields {
if f.parent == NullParent {
continue
}
if d.mapValue == nil {
err := f.appendFunc(f.getValue(data))
if err != nil {
if err == NullStructData {
NullParent = f
continue
}
return err
}
} else {
switch dt := data.(type) {
case nil:
err := f.appendFunc(dt)
if err != nil {
if err == NullStructData {
NullParent = f
continue
}
return err
}
case []any:
if len(d.children) < 1 {
for _, e := range dt {
err := f.appendFunc(e)
if err != nil {
if err == NullStructData {
NullParent = f
continue
}
return err
}
}
Expand All @@ -115,6 +140,10 @@ func (d *dataLoader) loadDatum(data any) error {
case map[string]any:
err := f.appendFunc(f.getValue(dt))
if err != nil {
if err == NullStructData {
NullParent = f
continue
}
return err
}
}
Expand Down Expand Up @@ -147,9 +176,17 @@ func (d *dataLoader) loadDatum(data any) error {
if d.item != nil {
d.item.appendFunc(e)
}
var NullParent *fieldPos
for _, f := range d.fields {
if f.parent == NullParent {
continue
}
err := f.appendFunc(f.getValue(e))
if err != nil {
if err == NullStructData {
NullParent = f
continue
}
return err
}
}
Expand All @@ -168,9 +205,17 @@ func (d *dataLoader) loadDatum(data any) error {
if d.item != nil {
d.item.appendFunc(e)
}
var NullParent *fieldPos
for _, f := range d.fields {
if f.parent == NullParent {
continue
}
err := f.appendFunc(f.getValue(e))
if err != nil {
if err == NullStructData {
NullParent = f
continue
}
return err
}
}
Expand All @@ -188,6 +233,7 @@ func (d *dataLoader) loadDatum(data any) error {
case nil:
d.mapField.appendFunc(dt)
case map[string]any:

d.mapField.appendFunc(dt)
for k, v := range dt {
d.mapKey.appendFunc(k)
Expand All @@ -197,7 +243,6 @@ func (d *dataLoader) loadDatum(data any) error {
d.children[0].loadDatum(v)
}
}

}
}
}
Expand Down Expand Up @@ -511,8 +556,9 @@ func mapFieldBuilders(b array.Builder, field arrow.Field, parent *fieldPos) {
switch data.(type) {
case nil:
bt.AppendNull()
return NullStructData
default:
appendStructData(bt, data)
bt.Append(true)
}
return nil
}
Expand Down Expand Up @@ -828,12 +874,3 @@ func appendTimestampData(b *array.TimestampBuilder, data interface{}) {
}
}
}

func appendStructData(b *array.StructBuilder, data interface{}) {
switch data.(type) {
case nil:
b.AppendNull()
default:
b.Append(true)
}
}

0 comments on commit 91bb9e2

Please sign in to comment.