Skip to content

Commit

Permalink
Add DynamicRowView and its reader (#5075)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #5075

A DynamicRowView can represent a row vector of any width of any types, without having to be typed.
It has two functions:
1. size() : width of the struct.
2. at(int i) : return generic view for the field i.

This simplify and enable reading vectors of rows with unknown
or unlimited types. This is also needed  to support row types
in the GenericWriter copy_from operation.

This can't be used in the simple function interface as top level because our
function signature does not support it but it generics can be casted
to it. It is needed to enable row
support in generic writer copy_from operation will be added.

Reviewed By: kevinwilfong, kagamiori

Differential Revision: D46299159

fbshipit-source-id: 8b86a1dc0d9ccabaded98cf77745b852d1318a59
  • Loading branch information
laithsakka authored and facebook-github-bot committed Jun 7, 2023
1 parent 148676b commit 7f43495
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 5 deletions.
43 changes: 39 additions & 4 deletions velox/expression/ComplexViewTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,45 @@ class MapView {
vector_size_t size_;
};

class GenericView;

// A view type that is used to represent a row of any size of any children
// types. Function `at(index)` returns a generic view for the field at `index`.
template <bool returnsOptionalValues>
class DynamicRowView {
using readers_t = std::vector<std::unique_ptr<VectorReader<Any>>>;

public:
DynamicRowView(const readers_t* childReaders, vector_size_t offset)
: childReaders_{*childReaders}, offset_{offset} {}

vector_size_t offset() const {
return offset_;
}

using elem_n_t = typename std::conditional<
returnsOptionalValues,
OptionalAccessor<Any>,
GenericView>::type;

template <typename IndexT>
elem_n_t at(IndexT index) {
if constexpr (returnsOptionalValues) {
return elem_n_t{childReaders_[index].get(), offset_};
} else {
return childReaders_[index]->operator[](offset_);
}
}

size_t size() const {
return childReaders_.size();
}

private:
const readers_t& childReaders_;
vector_size_t offset_;
};

template <bool returnsOptionalValues, typename... T>
class RowView {
using reader_t = std::tuple<std::unique_ptr<VectorReader<T>>...>;
Expand All @@ -894,10 +933,6 @@ class RowView {
return offset_;
}

vector_size_t childVectorAt() const {
return offset_;
}

public:
RowView(const reader_t* childReaders, vector_size_t offset)
: childReaders_{childReaders}, offset_{offset} {}
Expand Down
45 changes: 45 additions & 0 deletions velox/expression/VectorReaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,51 @@ struct VectorReader<Generic<T>> {
mutable std::optional<const std::type_info*> castType_ = std::nullopt;
};

template <>
struct VectorReader<DynamicRow> {
using in_vector_t = RowVector;
using exec_in_t = DynamicRowView<true>;
using exec_null_free_in_t = DynamicRowView<false>;

explicit VectorReader(const DecodedVector* decoded)
: decoded_(*decoded),
vector_(detail::getDecoded<in_vector_t>(decoded_)),
childrenDecoders_{vector_.childrenSize()} {
for (int i = 0; i < vector_.childrenSize(); i++) {
childReaders_.push_back(std::make_unique<VectorReader<Any>>(
detail::decode(childrenDecoders_[i], *vector_.childAt(i))));
}
}

exec_in_t operator[](size_t offset) const {
auto index = decoded_.index(offset);
return {&childReaders_, index};
}

exec_null_free_in_t readNullFree(size_t offset) const {
auto index = decoded_.index(offset);
return {&childReaders_, index};
}

bool isSet(size_t offset) const {
return !decoded_.isNullAt(offset);
}

bool mayHaveNulls() const {
return decoded_.mayHaveNulls();
}

const BaseVector* baseVector() const {
return decoded_.base();
}

private:
const DecodedVector& decoded_;
const in_vector_t& vector_;
std::vector<DecodedVector> childrenDecoders_;
std::vector<std::unique_ptr<VectorReader<Any>>> childReaders_;
};

template <typename T>
struct VectorReader<CustomType<T>> : public VectorReader<typename T::type> {
explicit VectorReader(const DecodedVector* decoded)
Expand Down
109 changes: 108 additions & 1 deletion velox/expression/tests/RowViewTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <optional>
#include "gtest/gtest.h"

#include "velox/expression/VectorReaders.h"
#include "velox/functions/Udf.h"
#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"

namespace {
Expand Down Expand Up @@ -204,4 +206,109 @@ TEST_F(NullableRowViewTest, materialize) {
ASSERT_EQ(reader[0].materialize(), expected);
}

class DynamicRowViewTest : public functions::test::FunctionBaseTest {};

TEST_F(DynamicRowViewTest, emptyRow) {
auto rowVector = vectorMaker_.rowVector({});
rowVector->resize(10);
DecodedVector decoded;
exec::VectorReader<DynamicRow> reader(decode(decoded, *rowVector.get()));
ASSERT_FALSE(reader.mayHaveNulls());
for (int i = 0; i < 10; i++) {
ASSERT_EQ(reader[i].size(), 0);
ASSERT_TRUE(reader.isSet(i));
}
}

TEST_F(DynamicRowViewTest, mixedRow) {
auto arrayVector =
vectorMaker_.arrayVector<int32_t>({{1}, {2, 3}, {3, 4, 5}});

auto rowVector = vectorMaker_.rowVector(
{makeFlatVector<int32_t>({1, 2, 3}),
makeFlatVector<bool>({true, false, true}),
arrayVector});

DecodedVector decoded;
exec::VectorReader<DynamicRow> reader(decode(decoded, *rowVector.get()));
ASSERT_FALSE(reader.mayHaveNulls());

for (int i = 0; i < 3; i++) {
ASSERT_TRUE(reader.isSet(i));
}
auto dynamicRowView = reader[1];
auto nullFreeDynamicRowView = reader.readNullFree(1);

EXPECT_FALSE(dynamicRowView.at(0)->tryCastTo<int64_t>());
EXPECT_FALSE(dynamicRowView.at(0)->tryCastTo<Varchar>());
EXPECT_TRUE(dynamicRowView.at(0)->tryCastTo<int32_t>());

for (int i = 0; i < 3; i++) {
ASSERT_EQ(reader[i].at(0)->castTo<int32_t>(), i + 1);
ASSERT_EQ(reader.readNullFree(i).at(0).castTo<int32_t>(), i + 1);
}
EXPECT_FALSE(nullFreeDynamicRowView.at(1).tryCastTo<int64_t>());
EXPECT_FALSE(nullFreeDynamicRowView.at(1).tryCastTo<Varchar>());
EXPECT_TRUE(nullFreeDynamicRowView.at(1).tryCastTo<bool>());

for (int i = 0; i < 3; i++) {
ASSERT_EQ(reader[i].at(1)->castTo<bool>(), (i % 2 == 0));
ASSERT_EQ(reader.readNullFree(i).at(1).castTo<bool>(), (i % 2 == 0));
}

EXPECT_FALSE(nullFreeDynamicRowView.at(2).tryCastTo<Array<Varchar>>());
EXPECT_FALSE(nullFreeDynamicRowView.at(2).tryCastTo<Array<int64_t>>());
auto arrayView = reader[2].at(2)->castTo<Array<int32_t>>();
ASSERT_EQ(arrayView.size(), 3);
ASSERT_EQ(arrayView[0], 3);
ASSERT_EQ(arrayView[1], 4);
ASSERT_EQ(arrayView[2], 5);
}

TEST_F(DynamicRowViewTest, rowWithNullsInFields) {
auto rowVector = vectorMaker_.rowVector(
{makeNullableFlatVector<int64_t>({1, std::nullopt, 2})});

DecodedVector decoded;
exec::VectorReader<DynamicRow> reader(decode(decoded, *rowVector.get()));
ASSERT_FALSE(reader.mayHaveNulls());
ASSERT_TRUE(reader[0].at(0));
ASSERT_FALSE(reader[1].at(0));
ASSERT_TRUE(reader[2].at(0));
}

template <typename T>
struct StructWidthIfRow {
VELOX_DEFINE_FUNCTION_TYPES(T);
// TODO: Ideally we would like to use DynamicRow instead of Any and make this
// strictly typed. But function signature does not support expressions
// row(...).
void call(int64_t& out, const arg_type<Any>& input) {
if (auto dyanmicRowView = input.template tryCastTo<DynamicRow>()) {
out = dyanmicRowView->size();
} else {
out = 0;
}
}
};

TEST_F(DynamicRowViewTest, castToDynamicRowInFunction) {
registerFunction<StructWidthIfRow, int64_t, Any>({"struct_width"});
{
auto flatVector = makeFlatVector<int64_t>({1, 2});

// Input is not struct.
auto result = evaluate("struct_width(c0)", makeRowVector({flatVector}));
test::assertEqualVectors(makeFlatVector<int64_t>({0, 0}), result);

result = evaluate(
"struct_width(c0)", makeRowVector({makeRowVector({flatVector})}));
test::assertEqualVectors(makeFlatVector<int64_t>({1, 1}), result);

result = evaluate(
"struct_width(c0)",
makeRowVector({makeRowVector({flatVector, flatVector})}));
test::assertEqualVectors(makeFlatVector<int64_t>({2, 2}), result);
}
}
} // namespace

0 comments on commit 7f43495

Please sign in to comment.