-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-17275: [Go][Integration] Handle Large offset types in IPC read/write #13770
Changes from all commits
df7f105
cb2f193
312ed14
85e9f5c
51b57e3
716de4f
6ec8283
4e07896
f5478df
9c92586
807cdf1
b99f9e1
941aafb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,7 @@ func TestConcatenate(t *testing.T) { | |
{arrow.BinaryTypes.String}, | ||
{arrow.BinaryTypes.LargeString}, | ||
{arrow.ListOf(arrow.PrimitiveTypes.Int8)}, | ||
{arrow.LargeListOf(arrow.PrimitiveTypes.Int8)}, | ||
{arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)}, | ||
{arrow.StructOf()}, | ||
{arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)}, | ||
|
@@ -158,6 +159,32 @@ func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Arra | |
bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) | ||
defer bldr.Release() | ||
|
||
valid := make([]bool, len(offsetsVector)-1) | ||
for i := range valid { | ||
valid[i] = true | ||
} | ||
bldr.AppendValues(offsetsVector, valid) | ||
vb := bldr.ValueBuilder().(*array.Int8Builder) | ||
for i := 0; i < values.Len(); i++ { | ||
if values.IsValid(i) { | ||
vb.Append(values.Value(i)) | ||
} else { | ||
vb.AppendNull() | ||
} | ||
} | ||
return bldr.NewArray() | ||
case arrow.LARGE_LIST: | ||
valuesSize := size * 8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm... so, the way I understand this code, Meaning that the multiplier here is arbitrary and you could have e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea, I'm doing * 8 here since it's expected to be 64-bit integers for the test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, I understand that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
TL;DR: |
||
values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) | ||
defer values.Release() | ||
offsetsVector := cts.largeoffsets(int64(valuesSize), int32(size)) | ||
// ensure the first and last offsets encompass the whole values | ||
offsetsVector[0] = 0 | ||
offsetsVector[len(offsetsVector)-1] = int64(valuesSize) | ||
|
||
bldr := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) | ||
defer bldr.Release() | ||
|
||
valid := make([]bool, len(offsetsVector)-1) | ||
for i := range valid { | ||
valid[i] = true | ||
|
@@ -263,6 +290,16 @@ func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { | |
return offsets | ||
} | ||
|
||
func (cts *ConcatTestSuite) largeoffsets(length int64, slicecount int32) []int64 { | ||
offsets := make([]int64, slicecount+1) | ||
dist := rand.New(rand.NewSource(cts.seed)) | ||
for i := range offsets { | ||
offsets[i] = dist.Int63n(length + 1) | ||
} | ||
sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) | ||
return offsets | ||
} | ||
|
||
func (cts *ConcatTestSuite) TestCheckConcat() { | ||
for _, sz := range cts.sizes { | ||
cts.Run(fmt.Sprintf("size %d", sz), func() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the PR title incorrect? This is updating the compatibility matrix for Large List support, not Large String and Large Binary which are already ticked for Go.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're absolutely right that I need to rename the PR (I ended up tying the change with the IPC fixes to my other changes adding the LargeList type so i just incorporated it). Also, I honestly don't know why the Large String and Large Binary were ticked, they weren't supported until this change.