Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DynamicRowView and its reader #5075

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions velox/expression/ComplexViewTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,45 @@ class MapView {
vector_size_t size_;
};

class GenericView;

// A view type that is used to represent a row of any size of any children
// types. Function `at(index)` returns a generic view for the field at `index`.
template <bool returnsOptionalValues>
class DynamicRowView {
using readers_t = std::vector<std::unique_ptr<VectorReader<Any>>>;

public:
DynamicRowView(const readers_t* childReaders, vector_size_t offset)
: childReaders_{*childReaders}, offset_{offset} {}

vector_size_t offset() const {
return offset_;
}

using elem_n_t = typename std::conditional<
returnsOptionalValues,
OptionalAccessor<Any>,
GenericView>::type;

template <typename IndexT>
elem_n_t at(IndexT index) {
if constexpr (returnsOptionalValues) {
return elem_n_t{childReaders_[index].get(), offset_};
} else {
return childReaders_[index]->operator[](offset_);
}
}

size_t size() const {
return childReaders_.size();
}

private:
const readers_t& childReaders_;
vector_size_t offset_;
};

template <bool returnsOptionalValues, typename... T>
class RowView {
using reader_t = std::tuple<std::unique_ptr<VectorReader<T>>...>;
Expand All @@ -894,10 +933,6 @@ class RowView {
return offset_;
}

vector_size_t childVectorAt() const {
return offset_;
}

public:
RowView(const reader_t* childReaders, vector_size_t offset)
: childReaders_{childReaders}, offset_{offset} {}
Expand Down
45 changes: 45 additions & 0 deletions velox/expression/VectorReaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,51 @@ struct VectorReader<Generic<T>> {
mutable std::optional<const std::type_info*> castType_ = std::nullopt;
};

template <>
struct VectorReader<DynamicRow> {
using in_vector_t = RowVector;
using exec_in_t = DynamicRowView<true>;
using exec_null_free_in_t = DynamicRowView<false>;

explicit VectorReader(const DecodedVector* decoded)
: decoded_(*decoded),
vector_(detail::getDecoded<in_vector_t>(decoded_)),
childrenDecoders_{vector_.childrenSize()} {
for (int i = 0; i < vector_.childrenSize(); i++) {
childReaders_.push_back(std::make_unique<VectorReader<Any>>(
detail::decode(childrenDecoders_[i], *vector_.childAt(i))));
}
}

exec_in_t operator[](size_t offset) const {
auto index = decoded_.index(offset);
return {&childReaders_, index};
}

exec_null_free_in_t readNullFree(size_t offset) const {
auto index = decoded_.index(offset);
return {&childReaders_, index};
}

bool isSet(size_t offset) const {
return !decoded_.isNullAt(offset);
}

bool mayHaveNulls() const {
return decoded_.mayHaveNulls();
}

const BaseVector* baseVector() const {
return decoded_.base();
}

private:
const DecodedVector& decoded_;
const in_vector_t& vector_;
std::vector<DecodedVector> childrenDecoders_;
std::vector<std::unique_ptr<VectorReader<Any>>> childReaders_;
};

template <typename T>
struct VectorReader<CustomType<T>> : public VectorReader<typename T::type> {
explicit VectorReader(const DecodedVector* decoded)
Expand Down
109 changes: 108 additions & 1 deletion velox/expression/tests/RowViewTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <optional>
#include "gtest/gtest.h"

#include "velox/expression/VectorReaders.h"
#include "velox/functions/Udf.h"
#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"

namespace {
Expand Down Expand Up @@ -204,4 +206,109 @@ TEST_F(NullableRowViewTest, materialize) {
ASSERT_EQ(reader[0].materialize(), expected);
}

class DynamicRowViewTest : public functions::test::FunctionBaseTest {};

TEST_F(DynamicRowViewTest, emptyRow) {
auto rowVector = vectorMaker_.rowVector({});
rowVector->resize(10);
DecodedVector decoded;
exec::VectorReader<DynamicRow> reader(decode(decoded, *rowVector.get()));
ASSERT_FALSE(reader.mayHaveNulls());
for (int i = 0; i < 10; i++) {
ASSERT_EQ(reader[i].size(), 0);
ASSERT_TRUE(reader.isSet(i));
}
}

TEST_F(DynamicRowViewTest, mixedRow) {
auto arrayVector =
vectorMaker_.arrayVector<int32_t>({{1}, {2, 3}, {3, 4, 5}});

auto rowVector = vectorMaker_.rowVector(
{makeFlatVector<int32_t>({1, 2, 3}),
makeFlatVector<bool>({true, false, true}),
arrayVector});

DecodedVector decoded;
exec::VectorReader<DynamicRow> reader(decode(decoded, *rowVector.get()));
ASSERT_FALSE(reader.mayHaveNulls());

for (int i = 0; i < 3; i++) {
ASSERT_TRUE(reader.isSet(i));
}
auto dynamicRowView = reader[1];
auto nullFreeDynamicRowView = reader.readNullFree(1);

EXPECT_FALSE(dynamicRowView.at(0)->tryCastTo<int64_t>());
EXPECT_FALSE(dynamicRowView.at(0)->tryCastTo<Varchar>());
EXPECT_TRUE(dynamicRowView.at(0)->tryCastTo<int32_t>());

for (int i = 0; i < 3; i++) {
ASSERT_EQ(reader[i].at(0)->castTo<int32_t>(), i + 1);
ASSERT_EQ(reader.readNullFree(i).at(0).castTo<int32_t>(), i + 1);
}
EXPECT_FALSE(nullFreeDynamicRowView.at(1).tryCastTo<int64_t>());
EXPECT_FALSE(nullFreeDynamicRowView.at(1).tryCastTo<Varchar>());
EXPECT_TRUE(nullFreeDynamicRowView.at(1).tryCastTo<bool>());

for (int i = 0; i < 3; i++) {
ASSERT_EQ(reader[i].at(1)->castTo<bool>(), (i % 2 == 0));
ASSERT_EQ(reader.readNullFree(i).at(1).castTo<bool>(), (i % 2 == 0));
}

EXPECT_FALSE(nullFreeDynamicRowView.at(2).tryCastTo<Array<Varchar>>());
EXPECT_FALSE(nullFreeDynamicRowView.at(2).tryCastTo<Array<int64_t>>());
auto arrayView = reader[2].at(2)->castTo<Array<int32_t>>();
ASSERT_EQ(arrayView.size(), 3);
ASSERT_EQ(arrayView[0], 3);
ASSERT_EQ(arrayView[1], 4);
ASSERT_EQ(arrayView[2], 5);
}

TEST_F(DynamicRowViewTest, rowWithNullsInFields) {
auto rowVector = vectorMaker_.rowVector(
{makeNullableFlatVector<int64_t>({1, std::nullopt, 2})});

DecodedVector decoded;
exec::VectorReader<DynamicRow> reader(decode(decoded, *rowVector.get()));
ASSERT_FALSE(reader.mayHaveNulls());
ASSERT_TRUE(reader[0].at(0));
ASSERT_FALSE(reader[1].at(0));
ASSERT_TRUE(reader[2].at(0));
}

template <typename T>
struct StructWidthIfRow {
VELOX_DEFINE_FUNCTION_TYPES(T);
// TODO: Ideally we would like to use DynamicRow instead of Any and make this
// strictly typed. But function signature does not support expressions
// row(...).
void call(int64_t& out, const arg_type<Any>& input) {
if (auto dyanmicRowView = input.template tryCastTo<DynamicRow>()) {
out = dyanmicRowView->size();
} else {
out = 0;
}
}
};

TEST_F(DynamicRowViewTest, castToDynamicRowInFunction) {
registerFunction<StructWidthIfRow, int64_t, Any>({"struct_width"});
{
auto flatVector = makeFlatVector<int64_t>({1, 2});

// Input is not struct.
auto result = evaluate("struct_width(c0)", makeRowVector({flatVector}));
test::assertEqualVectors(makeFlatVector<int64_t>({0, 0}), result);

result = evaluate(
"struct_width(c0)", makeRowVector({makeRowVector({flatVector})}));
test::assertEqualVectors(makeFlatVector<int64_t>({1, 1}), result);

result = evaluate(
"struct_width(c0)",
makeRowVector({makeRowVector({flatVector, flatVector})}));
test::assertEqualVectors(makeFlatVector<int64_t>({2, 2}), result);
}
}
} // namespace