Skip to content

Commit

Permalink
ARROW-17499: [Go] Shift MakeArrayOfNull to array Package (#13944)
Browse files Browse the repository at this point in the history
Authored-by: Matt Topol <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
zeroshade authored Aug 22, 2022
1 parent 510eb6f commit 62461b0
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 4 deletions.
153 changes: 153 additions & 0 deletions go/arrow/array/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,156 @@ func DictArrayFromJSON(mem memory.Allocator, dt *arrow.DictionaryType, indicesJS

return NewDictionaryArray(dt, indices, dict), nil
}

func getMaxBufferLen(dt arrow.DataType, length int) int {
bufferLen := int(bitutil.BytesForBits(int64(length)))

maxOf := func(bl int) int {
if bl > bufferLen {
return bl
}
return bufferLen
}

switch dt := dt.(type) {
case *arrow.DictionaryType:
bufferLen = maxOf(getMaxBufferLen(dt.ValueType, length))
return maxOf(getMaxBufferLen(dt.IndexType, length))
case *arrow.FixedSizeBinaryType:
return maxOf(dt.ByteWidth * length)
case arrow.FixedWidthDataType:
return maxOf(int(bitutil.BytesForBits(int64(dt.BitWidth()))) * length)
case *arrow.StructType:
for _, f := range dt.Fields() {
bufferLen = maxOf(getMaxBufferLen(f.Type, length))
}
return bufferLen
case *arrow.SparseUnionType:
// type codes
bufferLen = maxOf(length)
// creates children of the same length of the union
for _, f := range dt.Fields() {
bufferLen = maxOf(getMaxBufferLen(f.Type, length))
}
return bufferLen
case *arrow.DenseUnionType:
// type codes
bufferLen = maxOf(length)
// offsets
bufferLen = maxOf(arrow.Int32SizeBytes * length)
// create children of length 1
for _, f := range dt.Fields() {
bufferLen = maxOf(getMaxBufferLen(f.Type, 1))
}
return bufferLen
case arrow.OffsetsDataType:
return maxOf(dt.OffsetTypeTraits().BytesRequired(length + 1))
case *arrow.FixedSizeListType:
return maxOf(getMaxBufferLen(dt.Elem(), int(dt.Len())*length))
case arrow.ExtensionType:
return maxOf(getMaxBufferLen(dt.StorageType(), length))
default:
panic(fmt.Errorf("arrow/array: arrayofnull not implemented for type %s", dt))
}
}

type nullArrayFactory struct {
mem memory.Allocator
dt arrow.DataType
len int
buf *memory.Buffer
}

func (n *nullArrayFactory) create() *Data {
if n.buf == nil {
bufLen := getMaxBufferLen(n.dt, n.len)
n.buf = memory.NewResizableBuffer(n.mem)
n.buf.Resize(bufLen)
defer n.buf.Release()
}

var (
dt = n.dt
bufs = []*memory.Buffer{memory.SliceBuffer(n.buf, 0, int(bitutil.BytesForBits(int64(n.len))))}
childData []arrow.ArrayData
dictData arrow.ArrayData
)
defer bufs[0].Release()

if ex, ok := dt.(arrow.ExtensionType); ok {
dt = ex.StorageType()
}

if nf, ok := dt.(arrow.NestedType); ok {
childData = make([]arrow.ArrayData, len(nf.Fields()))
}

switch dt := dt.(type) {
case *arrow.NullType:
case *arrow.DictionaryType:
bufs = append(bufs, n.buf)
arr := MakeArrayOfNull(n.mem, dt.ValueType, 0)
defer arr.Release()
dictData = arr.Data()
case arrow.FixedWidthDataType:
bufs = append(bufs, n.buf)
case arrow.BinaryDataType:
bufs = append(bufs, n.buf, n.buf)
case arrow.OffsetsDataType:
bufs = append(bufs, n.buf)
childData[0] = n.createChild(dt, 0, 0)
defer childData[0].Release()
case *arrow.FixedSizeListType:
childData[0] = n.createChild(dt, 0, n.len*int(dt.Len()))
defer childData[0].Release()
case *arrow.StructType:
for i := range dt.Fields() {
childData[i] = n.createChild(dt, i, n.len)
defer childData[i].Release()
}
case arrow.UnionType:
bufs[0].Release()
bufs[0] = nil
bufs = append(bufs, n.buf)
// buffer is zeroed, but 0 may not be a valid type code
if dt.TypeCodes()[0] != 0 {
bufs[1] = memory.NewResizableBuffer(n.mem)
bufs[1].Resize(n.len)
defer bufs[1].Release()
memory.Set(bufs[1].Bytes(), byte(dt.TypeCodes()[0]))
}

// for sparse unions we create children with the same length
childLen := n.len
if dt.Mode() == arrow.DenseMode {
// for dense unions, offsets are all 0 and make children
// with length 1
bufs = append(bufs, n.buf)
childLen = 1
}
for i := range dt.Fields() {
childData[i] = n.createChild(dt, i, childLen)
defer childData[i].Release()
}
}

out := NewData(n.dt, n.len, bufs, childData, n.len, 0)
if dictData != nil {
out.SetDictionary(dictData)
}
return out
}

func (n *nullArrayFactory) createChild(dt arrow.DataType, i, length int) *Data {
childFactory := &nullArrayFactory{
mem: n.mem, dt: n.dt.(arrow.NestedType).Fields()[i].Type,
len: length, buf: n.buf}
return childFactory.create()
}

// MakeArrayOfNull creates an array of size length which is all null of the given data type.
func MakeArrayOfNull(mem memory.Allocator, dt arrow.DataType, length int) arrow.Array {
data := (&nullArrayFactory{mem: mem, dt: dt, len: length}).create()
defer data.Release()
return MakeFromData(data)
}
8 changes: 4 additions & 4 deletions go/arrow/flight/flightsql/sqlite_server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,9 @@ func (s *FlightSqliteServerSuite) TestCommandGetTables() {
s.NoError(err)
defer rdr.Release()

catalogName := scalar.MakeArrayOfNull(arrow.BinaryTypes.String, 3, s.mem)
catalogName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3)
defer catalogName.Release()
schemaName := scalar.MakeArrayOfNull(arrow.BinaryTypes.String, 3, s.mem)
schemaName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3)
defer schemaName.Release()

tableName := s.fromJSON(arrow.BinaryTypes.String, `["foreignTable", "intTable", "sqlite_sequence"]`)
Expand Down Expand Up @@ -243,9 +243,9 @@ func (s *FlightSqliteServerSuite) TestCommandGetTablesWithExistingTableTypeFilte
s.NoError(err)
defer rdr.Release()

catalogName := scalar.MakeArrayOfNull(arrow.BinaryTypes.String, 3, s.mem)
catalogName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3)
defer catalogName.Release()
schemaName := scalar.MakeArrayOfNull(arrow.BinaryTypes.String, 3, s.mem)
schemaName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3)
defer schemaName.Release()

tableName := s.fromJSON(arrow.BinaryTypes.String, `["foreignTable", "intTable", "sqlite_sequence"]`)
Expand Down
2 changes: 2 additions & 0 deletions go/arrow/scalar/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,8 @@ func GetScalar(arr arrow.Array, idx int) (Scalar, error) {
}

// MakeArrayOfNull creates an array of size length which is all null of the given data type.
//
// Deprecated: Use array.MakeArrayOfNull
func MakeArrayOfNull(dt arrow.DataType, length int, mem memory.Allocator) arrow.Array {
var (
buffers = []*memory.Buffer{nil}
Expand Down

0 comments on commit 62461b0

Please sign in to comment.