From 7f43495f1f33215e33cd93c0a49da1f27bebfdaa Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Tue, 6 Jun 2023 17:31:19 -0700 Subject: [PATCH] Add DynamicRowView and its reader (#5075) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/5075 A DynamicRowView can represent a row vector of any width of any types, without having to be typed. It has two functions: 1. size() : width of the struct. 2. at(int i) : return generic view for the field i. This simplify and enable reading vectors of rows with unknown or unlimited types. This is also needed to support row types in the GenericWriter copy_from operation. This can't be used in the simple function interface as top level because our function signature does not support it but it generics can be casted to it. It is needed to enable row support in generic writer copy_from operation will be added. Reviewed By: kevinwilfong, kagamiori Differential Revision: D46299159 fbshipit-source-id: 8b86a1dc0d9ccabaded98cf77745b852d1318a59 --- velox/expression/ComplexViewTypes.h | 43 +++++++++- velox/expression/VectorReaders.h | 45 ++++++++++ velox/expression/tests/RowViewTest.cpp | 109 ++++++++++++++++++++++++- 3 files changed, 192 insertions(+), 5 deletions(-) diff --git a/velox/expression/ComplexViewTypes.h b/velox/expression/ComplexViewTypes.h index 40c5d3385434..0eb028b50ec8 100644 --- a/velox/expression/ComplexViewTypes.h +++ b/velox/expression/ComplexViewTypes.h @@ -878,6 +878,45 @@ class MapView { vector_size_t size_; }; +class GenericView; + +// A view type that is used to represent a row of any size of any children +// types. Function `at(index)` returns a generic view for the field at `index`. +template +class DynamicRowView { + using readers_t = std::vector>>; + + public: + DynamicRowView(const readers_t* childReaders, vector_size_t offset) + : childReaders_{*childReaders}, offset_{offset} {} + + vector_size_t offset() const { + return offset_; + } + + using elem_n_t = typename std::conditional< + returnsOptionalValues, + OptionalAccessor, + GenericView>::type; + + template + elem_n_t at(IndexT index) { + if constexpr (returnsOptionalValues) { + return elem_n_t{childReaders_[index].get(), offset_}; + } else { + return childReaders_[index]->operator[](offset_); + } + } + + size_t size() const { + return childReaders_.size(); + } + + private: + const readers_t& childReaders_; + vector_size_t offset_; +}; + template class RowView { using reader_t = std::tuple>...>; @@ -894,10 +933,6 @@ class RowView { return offset_; } - vector_size_t childVectorAt() const { - return offset_; - } - public: RowView(const reader_t* childReaders, vector_size_t offset) : childReaders_{childReaders}, offset_{offset} {} diff --git a/velox/expression/VectorReaders.h b/velox/expression/VectorReaders.h index 9cc09caf1d4c..9343471e1e5b 100644 --- a/velox/expression/VectorReaders.h +++ b/velox/expression/VectorReaders.h @@ -702,6 +702,51 @@ struct VectorReader> { mutable std::optional castType_ = std::nullopt; }; +template <> +struct VectorReader { + using in_vector_t = RowVector; + using exec_in_t = DynamicRowView; + using exec_null_free_in_t = DynamicRowView; + + explicit VectorReader(const DecodedVector* decoded) + : decoded_(*decoded), + vector_(detail::getDecoded(decoded_)), + childrenDecoders_{vector_.childrenSize()} { + for (int i = 0; i < vector_.childrenSize(); i++) { + childReaders_.push_back(std::make_unique>( + detail::decode(childrenDecoders_[i], *vector_.childAt(i)))); + } + } + + exec_in_t operator[](size_t offset) const { + auto index = decoded_.index(offset); + return {&childReaders_, index}; + } + + exec_null_free_in_t readNullFree(size_t offset) const { + auto index = decoded_.index(offset); + return {&childReaders_, index}; + } + + bool isSet(size_t offset) const { + return !decoded_.isNullAt(offset); + } + + bool mayHaveNulls() const { + return decoded_.mayHaveNulls(); + } + + const BaseVector* baseVector() const { + return decoded_.base(); + } + + private: + const DecodedVector& decoded_; + const in_vector_t& vector_; + std::vector childrenDecoders_; + std::vector>> childReaders_; +}; + template struct VectorReader> : public VectorReader { explicit VectorReader(const DecodedVector* decoded) diff --git a/velox/expression/tests/RowViewTest.cpp b/velox/expression/tests/RowViewTest.cpp index 51fb430fc166..edc5ed0720fe 100644 --- a/velox/expression/tests/RowViewTest.cpp +++ b/velox/expression/tests/RowViewTest.cpp @@ -14,9 +14,11 @@ * limitations under the License. */ +#include #include -#include "gtest/gtest.h" + #include "velox/expression/VectorReaders.h" +#include "velox/functions/Udf.h" #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" namespace { @@ -204,4 +206,109 @@ TEST_F(NullableRowViewTest, materialize) { ASSERT_EQ(reader[0].materialize(), expected); } +class DynamicRowViewTest : public functions::test::FunctionBaseTest {}; + +TEST_F(DynamicRowViewTest, emptyRow) { + auto rowVector = vectorMaker_.rowVector({}); + rowVector->resize(10); + DecodedVector decoded; + exec::VectorReader reader(decode(decoded, *rowVector.get())); + ASSERT_FALSE(reader.mayHaveNulls()); + for (int i = 0; i < 10; i++) { + ASSERT_EQ(reader[i].size(), 0); + ASSERT_TRUE(reader.isSet(i)); + } +} + +TEST_F(DynamicRowViewTest, mixedRow) { + auto arrayVector = + vectorMaker_.arrayVector({{1}, {2, 3}, {3, 4, 5}}); + + auto rowVector = vectorMaker_.rowVector( + {makeFlatVector({1, 2, 3}), + makeFlatVector({true, false, true}), + arrayVector}); + + DecodedVector decoded; + exec::VectorReader reader(decode(decoded, *rowVector.get())); + ASSERT_FALSE(reader.mayHaveNulls()); + + for (int i = 0; i < 3; i++) { + ASSERT_TRUE(reader.isSet(i)); + } + auto dynamicRowView = reader[1]; + auto nullFreeDynamicRowView = reader.readNullFree(1); + + EXPECT_FALSE(dynamicRowView.at(0)->tryCastTo()); + EXPECT_FALSE(dynamicRowView.at(0)->tryCastTo()); + EXPECT_TRUE(dynamicRowView.at(0)->tryCastTo()); + + for (int i = 0; i < 3; i++) { + ASSERT_EQ(reader[i].at(0)->castTo(), i + 1); + ASSERT_EQ(reader.readNullFree(i).at(0).castTo(), i + 1); + } + EXPECT_FALSE(nullFreeDynamicRowView.at(1).tryCastTo()); + EXPECT_FALSE(nullFreeDynamicRowView.at(1).tryCastTo()); + EXPECT_TRUE(nullFreeDynamicRowView.at(1).tryCastTo()); + + for (int i = 0; i < 3; i++) { + ASSERT_EQ(reader[i].at(1)->castTo(), (i % 2 == 0)); + ASSERT_EQ(reader.readNullFree(i).at(1).castTo(), (i % 2 == 0)); + } + + EXPECT_FALSE(nullFreeDynamicRowView.at(2).tryCastTo>()); + EXPECT_FALSE(nullFreeDynamicRowView.at(2).tryCastTo>()); + auto arrayView = reader[2].at(2)->castTo>(); + ASSERT_EQ(arrayView.size(), 3); + ASSERT_EQ(arrayView[0], 3); + ASSERT_EQ(arrayView[1], 4); + ASSERT_EQ(arrayView[2], 5); +} + +TEST_F(DynamicRowViewTest, rowWithNullsInFields) { + auto rowVector = vectorMaker_.rowVector( + {makeNullableFlatVector({1, std::nullopt, 2})}); + + DecodedVector decoded; + exec::VectorReader reader(decode(decoded, *rowVector.get())); + ASSERT_FALSE(reader.mayHaveNulls()); + ASSERT_TRUE(reader[0].at(0)); + ASSERT_FALSE(reader[1].at(0)); + ASSERT_TRUE(reader[2].at(0)); +} + +template +struct StructWidthIfRow { + VELOX_DEFINE_FUNCTION_TYPES(T); + // TODO: Ideally we would like to use DynamicRow instead of Any and make this + // strictly typed. But function signature does not support expressions + // row(...). + void call(int64_t& out, const arg_type& input) { + if (auto dyanmicRowView = input.template tryCastTo()) { + out = dyanmicRowView->size(); + } else { + out = 0; + } + } +}; + +TEST_F(DynamicRowViewTest, castToDynamicRowInFunction) { + registerFunction({"struct_width"}); + { + auto flatVector = makeFlatVector({1, 2}); + + // Input is not struct. + auto result = evaluate("struct_width(c0)", makeRowVector({flatVector})); + test::assertEqualVectors(makeFlatVector({0, 0}), result); + + result = evaluate( + "struct_width(c0)", makeRowVector({makeRowVector({flatVector})})); + test::assertEqualVectors(makeFlatVector({1, 1}), result); + + result = evaluate( + "struct_width(c0)", + makeRowVector({makeRowVector({flatVector, flatVector})})); + test::assertEqualVectors(makeFlatVector({2, 2}), result); + } +} } // namespace