From 2785a73e9adf33e5f1ddd22c45ff9b1c46b69161 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 18 Jun 2019 22:19:03 -0700 Subject: [PATCH] ARROW-5583: [Java] When the isSet of a NullableValueHolder is 0, the buffer field should not be used For each variable-width vector, like the VarCharVector, it has a set method that uses a NullableValueHolder as the input parameter. When the isSet field is set to 0, it means the value to set is null, so the buffer field of the NullableValueHolder is invalid, and should not be used. For example, the user may set a null value in the VarCharVector with the following code snippet: NullableVarCharHolder holder = new NullableVarCharHolder(); holder.isSet = 0; ... varCharVector.set(i, holder); Please note that in the code above, the holder.buffer is not set, so it is null. According to the VarCharVector#set method, it will set the bytes using holder.buffer even if holder.isSet equals 0. This will lead to an exception. Author: liyafan82 Closes #4543 from liyafan82/fly_0613_varlen and squashes the following commits: d792e0a38 Revise the code for setSafe methods 43a51c20b Resolve comments 40c1548e6 When the isSet of a NullableValueHolder is 0, the buffer field should not be used --- .../apache/arrow/vector/VarBinaryVector.java | 23 +++- .../apache/arrow/vector/VarCharVector.java | 23 +++- .../apache/arrow/vector/TestValueVector.java | 122 ++++++++++++++++++ 3 files changed, 154 insertions(+), 14 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index ce5f6eddfeef8..093ffac909921 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -242,10 +242,14 @@ public void set(int index, NullableVarBinaryHolder holder) { assert index >= 0; fillHoles(index); BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - final int dataLength = holder.end - holder.start; final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + if (holder.isSet != 0) { + final int dataLength = holder.end - holder.start; + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); + valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + } else { + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset); + } lastSet = index; } @@ -259,13 +263,18 @@ public void set(int index, NullableVarBinaryHolder holder) { */ public void setSafe(int index, NullableVarBinaryHolder holder) { assert index >= 0; - final int dataLength = holder.end - holder.start; fillEmpties(index); - handleSafe(index, dataLength); BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + if (holder.isSet != 0) { + final int dataLength = holder.end - holder.start; + handleSafe(index, dataLength); + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); + valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + } else { + handleSafe(index, 0); + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset); + } lastSet = index; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java index 317d588a7dde9..5b2623c500b78 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java @@ -241,10 +241,14 @@ public void set(int index, NullableVarCharHolder holder) { assert index >= 0; fillHoles(index); BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - final int dataLength = holder.end - holder.start; final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + if (holder.isSet != 0) { + final int dataLength = holder.end - holder.start; + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); + valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + } else { + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset); + } lastSet = index; } @@ -258,13 +262,18 @@ public void set(int index, NullableVarCharHolder holder) { */ public void setSafe(int index, NullableVarCharHolder holder) { assert index >= 0; - final int dataLength = holder.end - holder.start; fillEmpties(index); - handleSafe(index, dataLength); BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + if (holder.isSet != 0) { + final int dataLength = holder.end - holder.start; + handleSafe(index, dataLength); + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); + valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); + } else { + handleSafe(index, 0); + offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset); + } lastSet = index; } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 2361389f60f06..0c1ae548667b7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -35,6 +35,8 @@ import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.holders.NullableVarBinaryHolder; +import org.apache.arrow.vector.holders.NullableVarCharHolder; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -2051,4 +2053,124 @@ public void testDefaultAllocNewAll() { } } + + @Test + public void testSetNullableVarCharHolder() { + try (VarCharVector vector = new VarCharVector("", allocator)) { + vector.allocateNew(100, 10); + + NullableVarCharHolder nullHolder = new NullableVarCharHolder(); + nullHolder.isSet = 0; + + NullableVarCharHolder stringHolder = new NullableVarCharHolder(); + stringHolder.isSet = 1; + + String str = "hello"; + ArrowBuf buf = allocator.buffer(16); + buf.setBytes(0, str.getBytes()); + + stringHolder.start = 0; + stringHolder.end = str.length(); + stringHolder.buffer = buf; + + vector.set(0, nullHolder); + vector.set(1, stringHolder); + + // verify results + assertTrue(vector.isNull(0)); + assertEquals(str, new String(vector.get(1))); + + buf.close(); + } + } + + @Test + public void testSetNullableVarCharHolderSafe() { + try (VarCharVector vector = new VarCharVector("", allocator)) { + vector.allocateNew(5, 1); + + NullableVarCharHolder nullHolder = new NullableVarCharHolder(); + nullHolder.isSet = 0; + + NullableVarCharHolder stringHolder = new NullableVarCharHolder(); + stringHolder.isSet = 1; + + String str = "hello world"; + ArrowBuf buf = allocator.buffer(16); + buf.setBytes(0, str.getBytes()); + + stringHolder.start = 0; + stringHolder.end = str.length(); + stringHolder.buffer = buf; + + vector.setSafe(0, stringHolder); + vector.setSafe(1, nullHolder); + + // verify results + assertEquals(str, new String(vector.get(0))); + assertTrue(vector.isNull(1)); + + buf.close(); + } + } + + @Test + public void testSetNullableVarBinaryHolder() { + try (VarBinaryVector vector = new VarBinaryVector("", allocator)) { + vector.allocateNew(100, 10); + + NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder(); + nullHolder.isSet = 0; + + NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder(); + binHolder.isSet = 1; + + String str = "hello"; + ArrowBuf buf = allocator.buffer(16); + buf.setBytes(0, str.getBytes()); + + binHolder.start = 0; + binHolder.end = str.length(); + binHolder.buffer = buf; + + vector.set(0, nullHolder); + vector.set(1, binHolder); + + // verify results + assertTrue(vector.isNull(0)); + assertEquals(str, new String(vector.get(1))); + + buf.close(); + } + } + + @Test + public void testSetNullableVarBinaryHolderSafe() { + try (VarBinaryVector vector = new VarBinaryVector("", allocator)) { + vector.allocateNew(5, 1); + + NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder(); + nullHolder.isSet = 0; + + NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder(); + binHolder.isSet = 1; + + String str = "hello world"; + ArrowBuf buf = allocator.buffer(16); + buf.setBytes(0, str.getBytes()); + + binHolder.start = 0; + binHolder.end = str.length(); + binHolder.buffer = buf; + + vector.setSafe(0, binHolder); + vector.setSafe(1, nullHolder); + + // verify results + assertEquals(str, new String(vector.get(0))); + assertTrue(vector.isNull(1)); + + buf.close(); + } + } }