Skip to content

Commit

Permalink
Address comment
Browse files Browse the repository at this point in the history
  • Loading branch information
boneanxs committed Mar 7, 2024
1 parent 67bdfd5 commit ab1d17a
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 27 deletions.
19 changes: 12 additions & 7 deletions velox/vector/arrow/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,17 @@ TypePtr importFromArrowImpl(
// Parse "d:".
int precision = std::stoi(&format[2], &sz);
// Parse ",".
int scale = std::stoi(&format[2 + sz + 1], &sz);
int idx = 2 + sz + 1;
int scale = std::stoi(&format[idx], &sz);
// Handle bitwidth.
if (format[idx + sz] == ',') {
int bitWidth = std::stoi(&format[idx + sz + 1], &sz);
if (bitWidth != 128) {
VELOX_USER_FAIL(
"Conversion failed for '{}'. Velox decimal does not support custom bitwidth.",
format);
}
}
return DECIMAL(precision, scale);
} catch (std::invalid_argument&) {
VELOX_USER_FAIL(
Expand Down Expand Up @@ -1434,12 +1444,7 @@ VectorPtr createShortDecimalVector(
}

return createFlatVector<TypeKind::BIGINT>(
pool,
type,
nulls,
length,
values,
nullCount);
pool, type, nulls, length, values, nullCount);
}

bool isREE(const ArrowSchema& arrowSchema) {
Expand Down
48 changes: 28 additions & 20 deletions velox/vector/arrow/tests/ArrowBridgeArrayTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1045,21 +1045,27 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
// Takes a vector with input data, generates an input ArrowArray and Velox
// Vector (using vector maker). Then converts ArrowArray into Velox vector and
// assert that both Velox vectors are semantically the same.
template <typename T>
template <typename TOutput, typename TInput = TOutput>
void testArrowImport(
const char* format,
const std::vector<std::optional<T>>& inputValues) {
const std::vector<std::optional<TInput>>& inputValues) {
ArrowContextHolder holder;
auto arrowArray = fillArrowArray(inputValues, holder);

auto arrowSchema = makeArrowSchema(format);
auto output = importFromArrow(arrowSchema, arrowArray, pool_.get());
assertVectorContent(inputValues, output, arrowArray.null_count);
if constexpr (
std::is_same_v<TInput, int128_t> && std::is_same_v<TOutput, int64_t>) {
assertShortDecimalVectorContent(
inputValues, output, arrowArray.null_count);
} else {
assertVectorContent(inputValues, output, arrowArray.null_count);
}

// Buffer views are not reusable. Strings might need to create an additional
// buffer, depending on the string sizes, in which case the buffers could be
// reusable. So we don't check them in here.
if constexpr (!std::is_same_v<T, std::string>) {
if constexpr (!std::is_same_v<TInput, std::string>) {
EXPECT_FALSE(BaseVector::isVectorWritable(output));
} else {
size_t totalLength = 0;
Expand Down Expand Up @@ -1102,21 +1108,6 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
}
}

void testShortDecimalImport() {
auto arrowSchema = makeArrowSchema("d:5,2");
ArrowContextHolder holder;
auto arrowArray = fillArrowArray(
std::vector<std::optional<int128_t>>{
1, -1, 0, 12345, -12345, std::nullopt},
holder);
auto output = importFromArrow(arrowSchema, arrowArray, pool_.get());
assertVectorContent(
std::vector<std::optional<int64_t>>{
1, -1, 0, 12345, -12345, std::nullopt},
output,
1);
}

void testImportScalar() {
testArrowImport<bool>("b", {});
testArrowImport<bool>("b", {true});
Expand Down Expand Up @@ -1149,7 +1140,8 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
testArrowImport<Timestamp>(
"ttn", {Timestamp(0, 0), std::nullopt, Timestamp(1699308257, 1234)});

testShortDecimalImport();
testArrowImport<int64_t, int128_t>(
"d:5,2", {1, -1, 0, 12345, -12345, std::nullopt});
}

template <typename TOutput, typename TInput>
Expand Down Expand Up @@ -1229,6 +1221,22 @@ class ArrowBridgeArrayImportTest : public ArrowBridgeArrayExportTest {
}

private:
void assertShortDecimalVectorContent(
const std::vector<std::optional<int128_t>>& expectedValues,
const VectorPtr& actual,
size_t nullCount) {
std::vector<std::optional<int64_t>> decValues;
decValues.reserve(expectedValues.size());
for (const auto& value : expectedValues) {
if (value) {
decValues.emplace_back(static_cast<int64_t>(*value));
} else {
decValues.emplace_back(std::nullopt);
}
}
assertVectorContent(decValues, actual, nullCount);
}

// Creates timestamp from bigint and asserts the content of actual vector with
// the expected timestamp values.
void assertTimestampVectorContent(
Expand Down
4 changes: 4 additions & 0 deletions velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,10 @@ TEST_F(ArrowBridgeSchemaImportTest, scalar) {
VELOX_ASSERT_THROW(
*testSchemaImport("d2,15"),
"Unable to convert 'd2,15' ArrowSchema decimal format to Velox decimal");
EXPECT_EQ(*DECIMAL(10, 4), *testSchemaImport("d:10,4,128"));
VELOX_ASSERT_THROW(
*testSchemaImport("d:10,4,256"),
"Conversion failed for 'd:10,4,256'. Velox decimal does not support custom bitwidth.");
}

TEST_F(ArrowBridgeSchemaImportTest, complexTypes) {
Expand Down

0 comments on commit ab1d17a

Please sign in to comment.