Skip to content

Commit

Permalink
GH-38242: [Java] Fix incorrect internal struct accounting for DenseUn…
Browse files Browse the repository at this point in the history
…ionVector#getBufferSizeFor (#38305)

### What changes are included in this PR?

Fix incorrect implementation of `DenseUnionVector.getBufferSizeFor`.
Sum the type id counts before calling `getBufferSizeFor` on the union's child vectors.

### Are these changes tested?

Yes. A test verifies the OOB read (requires bounds check), and an example count is correct, fails as expected before fix.

**This PR contains a "Critical Fix".** 

For users of DenseUnionVector the size can calculated incorrectly, as well as cause of out-of-bounds buffer reads which may return garbage (or potentially segfaulting) if bounds checking is off.

* Closes: #38242

Authored-by: Dan Stone <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
wotbrew authored Oct 23, 2023
1 parent 6ce6ec2 commit 3793560
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 2 deletions.
20 changes: 18 additions & 2 deletions java/vector/src/main/codegen/templates/DenseUnionVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -730,8 +730,24 @@ public int getBufferSizeFor(final int count) {
if (count == 0) {
return 0;
}
return (int) (count * TYPE_WIDTH + (long) count * OFFSET_WIDTH
+ DataSizeRoundingUtil.divideBy8Ceil(count) + internalStruct.getBufferSizeFor(count));
int[] counts = new int[Byte.MAX_VALUE + 1];
for (int i = 0; i < count; i++) {
byte typeId = getTypeId(i);
if (typeId != -1) {
counts[typeId] += 1;
}
}
long childBytes = 0;
for (int typeId = 0; typeId < childVectors.length; typeId++) {
ValueVector childVector = childVectors[typeId];
if (childVector != null) {
childBytes += childVector.getBufferSizeFor(counts[typeId]);
}
}
return (int) (count * TYPE_WIDTH + (long) count * OFFSET_WIDTH + childBytes);
}
@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.vector.complex;

import static org.junit.jupiter.api.Assertions.*;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.holders.NullableIntHolder;
import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
import org.apache.arrow.vector.types.UnionMode;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.junit.jupiter.api.Test;

public class TestDenseUnionBufferSize {
@Test
public void testBufferSize() {
try (BufferAllocator allocator = new RootAllocator();
DenseUnionVector duv = new DenseUnionVector("duv", allocator,
FieldType.nullable(new ArrowType.Union(UnionMode.Dense, null)), null)) {

byte aTypeId = 42;
byte bTypeId = 7;

duv.addVector(aTypeId, new IntVector("a", FieldType.notNullable(new ArrowType.Int(32, true)), allocator));
duv.addVector(bTypeId, new VarBinaryVector("b", FieldType.notNullable(new ArrowType.Binary()), allocator));

NullableIntHolder intHolder = new NullableIntHolder();
NullableVarBinaryHolder varBinaryHolder = new NullableVarBinaryHolder();

int aCount = BaseValueVector.INITIAL_VALUE_ALLOCATION + 1;
for (int i = 0; i < aCount; i++) {
duv.setTypeId(i, aTypeId);
duv.setSafe(i, intHolder);
}

int bCount = 2;
for (int i = 0; i < bCount; i++) {
duv.setTypeId(i + aCount, bTypeId);
duv.setSafe(i + aCount, varBinaryHolder);
}

int count = aCount + bCount;
duv.setValueCount(count);

// will not necessarily see an error unless bounds checking is on.
assertDoesNotThrow(duv::getBufferSize);

IntVector intVector = duv.getIntVector(aTypeId);
VarBinaryVector varBinaryVector = duv.getVarBinaryVector(bTypeId);

long overhead = DenseUnionVector.TYPE_WIDTH + DenseUnionVector.OFFSET_WIDTH;

assertEquals(overhead * count + intVector.getBufferSize() + varBinaryVector.getBufferSize(),
duv.getBufferSize());

assertEquals(overhead * (aCount + 1) + intVector.getBufferSizeFor(aCount) + varBinaryVector.getBufferSizeFor(1),
duv.getBufferSizeFor(aCount + 1));

}
}
}

0 comments on commit 3793560

Please sign in to comment.