From d62d9901fe42cc3e8eb510594c8c4f592042a4d0 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 2 Nov 2020 17:44:28 +0000 Subject: [PATCH] support BigEndian in Decimal256Vector --- .../apache/arrow/vector/Decimal256Vector.java | 108 +++++++++++------- .../arrow/vector/util/DecimalUtility.java | 2 +- .../arrow/vector/TestDecimal256Vector.java | 5 +- 3 files changed, 73 insertions(+), 42 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java index ed10468095b88..c5fef82d05276 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; import java.math.BigDecimal; +import java.nio.ByteOrder; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -43,6 +44,7 @@ */ public final class Decimal256Vector extends BaseFixedWidthVector { public static final byte TYPE_WIDTH = 32; + private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; private final FieldReader reader; private final int precision; @@ -197,7 +199,7 @@ public void set(int index, ArrowBuf buffer) { /** * Set the decimal element at given index to the provided array of bytes. - * Decimal256 is now implemented as Little Endian. This API allows the user + * Decimal256 is now implemented as Native Endian. This API allows the user * to pass a decimal value in the form of byte array in BE byte order. * *

Consumers of Arrow code can use this API instead of first swapping @@ -218,25 +220,38 @@ public void setBigEndian(int index, byte[] value) { valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH); long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - // swap bytes to convert BE to LE - for (int byteIdx = 0; byteIdx < length; ++byteIdx) { - PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]); - } - - if (length == TYPE_WIDTH) { - return; - } - if (length == 0) { PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH, (byte) 0); - } else if (length < TYPE_WIDTH) { - // sign extend - final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); - PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); + return; + } + if (LITTLE_ENDIAN) { + // swap bytes to convert BE to LE + for (int byteIdx = 0; byteIdx < length; ++byteIdx) { + PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]); + } + + if (length == TYPE_WIDTH) { + return; + } + + if (length < TYPE_WIDTH) { + // sign extend + final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); + PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); + return; + } } else { - throw new IllegalArgumentException( - "Invalid decimal value length. Valid length in [1 - 32], got " + length); + if (length <= TYPE_WIDTH) { + // copy data from value to outAddress + PlatformDependent.copyMemory(value, 0, outAddress + Decimal256Vector.TYPE_WIDTH - length, length); + // sign extend + final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); + PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad); + return; + } } + throw new IllegalArgumentException( + "Invalid decimal value length. Valid length in [1 - 32], got " + length); } /** @@ -255,7 +270,7 @@ public void set(int index, long start, ArrowBuf buffer) { * Sets the element at given index using the buffer whose size maybe <= 32 bytes. * @param index index to write the decimal to * @param start start of value in the buffer - * @param buffer contains the decimal in little endian bytes + * @param buffer contains the decimal in native endian bytes * @param length length of the value in the buffer */ public void setSafe(int index, long start, ArrowBuf buffer, int length) { @@ -268,12 +283,22 @@ public void setSafe(int index, long start, ArrowBuf buffer, int length) { long inAddress = buffer.memoryAddress() + start; long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - PlatformDependent.copyMemory(inAddress, outAddress, length); - // sign extend - if (length < 32) { - byte msb = PlatformDependent.getByte(inAddress + length - 1); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); + if (LITTLE_ENDIAN) { + PlatformDependent.copyMemory(inAddress, outAddress, length); + // sign extend + if (length < TYPE_WIDTH) { + byte msb = PlatformDependent.getByte(inAddress + length - 1); + final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); + PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); + } + } else { + PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length); + // sign extend + if (length < TYPE_WIDTH) { + byte msb = PlatformDependent.getByte(inAddress); + final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); + PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad); + } } } @@ -296,16 +321,26 @@ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) // not using buffer.getByte() to avoid boundary checks for every byte. long inAddress = buffer.memoryAddress() + start; long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - // swap bytes to convert BE to LE - for (int byteIdx = 0; byteIdx < length; ++byteIdx) { - byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx); - PlatformDependent.putByte(outAddress + byteIdx, val); - } - // sign extend - if (length < 32) { - byte msb = PlatformDependent.getByte(inAddress); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); + if (LITTLE_ENDIAN) { + // swap bytes to convert BE to LE + for (int byteIdx = 0; byteIdx < length; ++byteIdx) { + byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx); + PlatformDependent.putByte(outAddress + byteIdx, val); + } + // sign extend + if (length < 32) { + byte msb = PlatformDependent.getByte(inAddress); + final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); + PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); + } + } else { + PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length); + // sign extend + if (length < TYPE_WIDTH) { + byte msb = PlatformDependent.getByte(inAddress); + final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); + PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad); + } } } @@ -329,12 +364,7 @@ public void set(int index, BigDecimal value) { */ public void set(int index, long value) { BitVectorHelper.setBit(validityBuffer, index); - final long addressOfValue = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - PlatformDependent.putLong(addressOfValue, value); - final long padValue = Long.signum(value) == -1 ? -1L : 0L; - PlatformDependent.putLong(addressOfValue + Long.BYTES, padValue); - PlatformDependent.putLong(addressOfValue + 2 * Long.BYTES, padValue); - PlatformDependent.putLong(addressOfValue + 3 * Long.BYTES, padValue); + DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java index 4280314aaaa88..4ffec1f7b6b1d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java @@ -139,7 +139,7 @@ public static void writeLongToArrowBuf(long value, ArrowBuf bytebuf, int index, throw new UnsupportedOperationException("DeciimalUtility.writeLongToArrowBuf() currently supports " + "128-bit or 256-bit width data"); } - final long addressOfValue = bytebuf.memoryAddress() + (long) index * DECIMAL_BYTE_LENGTH; + final long addressOfValue = bytebuf.memoryAddress() + (long) index * byteWidth; final long padValue = Long.signum(value) == -1 ? -1L : 0L; if (LITTLE_ENDIAN) { PlatformDependent.putLong(addressOfValue, value); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java index 7aa48f4bfb455..82c912cef2fdd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java @@ -210,11 +210,12 @@ public void testBigDecimalReadWrite() { } /** - * Test {@link Decimal256Vector#setBigEndian(int, byte[])} which takes BE layout input and stores in LE layout. + * Test {@link Decimal256Vector#setBigEndian(int, byte[])} which takes BE layout input and stores in native-endian + * (NE) layout. * Cases to cover: input byte array in different lengths in range [1-16] and negative values. */ @Test - public void decimalBE2LE() { + public void decimalBE2NE() { try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal", new ArrowType.Decimal(23, 2, 256), allocator)) { decimalVector.allocateNew();