Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement zip_view for external sort. #4930

Merged
merged 2 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 175 additions & 16 deletions tiledb/common/alt_var_length_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@
* The difference between `alt_var_length_view` and `var_length_view` is that
* `alt_var_length_view` maintains a materialized range of subranges, whereas
* `var_length_view` creates subrange views on the fly as proxy objects. As a
* result
* * An `alt_var_length_view` does not need to refer to the offsets array after
* it is constructed
* * An `alt_var_length_view` can be sorted
* result:
* - An `alt_var_length_view` does not need to refer to the offsets array
* after it is constructed
* - An `alt_var_length_view` can be sorted
*
*
* Usage example:
Expand Down Expand Up @@ -69,12 +69,15 @@
* @tparam I Type of the index range, assumed to be a random access range.
*
* @todo R could be a view rather than a range.
* @todo Would using `std::ranges::view_interface` be better tha `view_base`?
*/
template <
std::ranges::random_access_range R,
std::ranges::random_access_range I>
class alt_var_length_view : public std::ranges::view_base {
// Forward reference of the iterator over the range of variable length data
/**
* Forward reference of the iterator over the range of variable length data
*/
template <class Value>
struct private_iterator;

Expand Down Expand Up @@ -115,30 +118,167 @@ class alt_var_length_view : public std::ranges::view_base {
/** Move assignment */
alt_var_length_view& operator=(alt_var_length_view&&) = default;

/** Primary constructor. All offsets are contained in the input (notably, the
* index to the end of the data range). */
/**
* Constructor taking iterator pairs for the data and index ranges, arrow
* format
*/
alt_var_length_view(
std::ranges::iterator_t<R> data_begin,
[[maybe_unused]] std::ranges::iterator_t<R> data_end,
std::ranges::iterator_t<const I> begin_index,
std::ranges::iterator_t<const I> index_end) {
auto num_subranges = index_end - begin_index - 1;

subranges_.reserve(num_subranges);
for (long i = 0; i < num_subranges; ++i) {
subranges_.emplace_back(
data_begin + begin_index[i], data_begin + begin_index[i + 1]);
}
}

/**
* Constructor taking iterator pairs for the data and index ranges, tiledb
* format
*/
alt_var_length_view(
std::ranges::iterator_t<R> data_begin,
[[maybe_unused]] std::ranges::iterator_t<R> data_end,
std::ranges::iterator_t<const I> begin_index,
std::ranges::iterator_t<const I> index_end,
data_index_type missing_index) {
auto num_subranges = index_end - begin_index;

subranges_.reserve(num_subranges);
for (long i = 0; i < num_subranges - 1; ++i) {
subranges_.emplace_back(
data_begin + begin_index[i], data_begin + begin_index[i + 1]);
}
subranges_.emplace_back(
data_begin + begin_index[num_subranges - 1],
data_begin + missing_index);
}

/**
* Constructor taking iterator pairs for the data and index ranges, with
* sizes, arrow format
*/
alt_var_length_view(
std::ranges::iterator_t<R> data_begin,
[[maybe_unused]] std::ranges::iterator_t<R> data_end,
[[maybe_unused]] std::ranges::range_difference_t<R> n_data,
std::ranges::iterator_t<const I> begin_index,
[[maybe_unused]] std::ranges::iterator_t<const I> index_end,
std::ranges::range_difference_t<I> n_index) {
auto num_subranges = n_index - 1;

subranges_.reserve(num_subranges);
for (long i = 0; i < num_subranges; ++i) {
subranges_.emplace_back(
data_begin + begin_index[i], data_begin + begin_index[i + 1]);
}
}

/**
* Constructor taking iterator pairs for the data and index ranges, with
* sizes, tiledb format
*/
alt_var_length_view(
std::ranges::iterator_t<R> data_begin,
[[maybe_unused]] std::ranges::iterator_t<R> data_end,
[[maybe_unused]] std::ranges::range_difference_t<R> n_data,
std::ranges::iterator_t<const I> begin_index,
[[maybe_unused]] std::ranges::iterator_t<const I> index_end,
std::ranges::range_difference_t<I> n_index,
data_index_type missing_index) {
auto num_subranges = n_index;

subranges_.reserve(num_subranges);
for (long i = 0; i < num_subranges - 1; ++i) {
subranges_.emplace_back(
data_begin + begin_index[i], data_begin + begin_index[i + 1]);
}
subranges_.emplace_back(
data_begin + begin_index[num_subranges - 1],
data_begin + missing_index);
}

/**
* Constructor taking ranges for the data and index ranges arrow
* format
*/
alt_var_length_view(R& data, const I& index) {
auto num_subranges = std::ranges::size(index) - 1;
auto data_begin(std::ranges::begin(data));
auto index_begin(std::ranges::begin(index));

subranges_.reserve(num_subranges);

for (size_t i = 0; i < num_subranges; ++i) {
subranges_.emplace_back(
data_begin + index_begin[i], data_begin + index_begin[i + 1]);
}
}

/**
* Constructor taking ranges for the data and index ranges
* tiledb format
*/
alt_var_length_view(R& data, const I& index, data_index_type missing_index) {
auto num_subranges = std::ranges::size(index);
auto data_begin(std::ranges::begin(data));
[[maybe_unused]] auto index_begin(std::ranges::begin(index));

subranges_.reserve(num_subranges);

subranges_.reserve(std::ranges::size(index) - 1);
for (size_t i = 0; i < std::ranges::size(index) - 1; ++i) {
for (size_t i = 0; i < num_subranges - 1; ++i) {
subranges_.emplace_back(data_begin + index[i], data_begin + index[i + 1]);
}
subranges_.emplace_back(
data_begin + index.back(), data_begin + missing_index);
}

/** Constructor. The offsets do not contain the final index value (which would
* be the end of the data range), so the final index is passed in as a
* separate argument.
/**
* Constructor taking ranges for the data and index ranges, with sizes, arrow
* format
*/
alt_var_length_view(R& data, const I& index, data_index_type end_index) {
alt_var_length_view(
R& data,
[[maybe_unused]] std::ranges::range_difference_t<R> n_data,
const I& index,
std::ranges::range_difference_t<I> n_index) {
auto num_subranges = n_index - 1;
auto data_begin(std::ranges::begin(data));
auto index_begin(std::ranges::begin(index));

subranges_.reserve(std::ranges::size(index) - 1);
subranges_.reserve(num_subranges);

for (size_t i = 0; i < std::ranges::size(index) - 1; ++i) {
for (long i = 0; i < num_subranges; ++i) {
subranges_.emplace_back(
data_begin + index_begin[i], data_begin + index_begin[i + 1]);
}
}

/**
* Constructor taking ranges for the data and index ranges, with sizes,
* tiledb format
*/
alt_var_length_view(
R& data,
[[maybe_unused]] std::ranges::range_difference_t<R> n_data,
const I& index,
std::ranges::range_difference_t<I> n_index,
data_index_type missing_index) {
auto num_subranges = n_index;
auto data_begin(std::ranges::begin(data));
[[maybe_unused]] auto index_begin(std::ranges::begin(index));

subranges_.reserve(num_subranges);

for (long i = 0; i < num_subranges - 1; ++i) {
subranges_.emplace_back(data_begin + index[i], data_begin + index[i + 1]);
}
subranges_.emplace_back(data_begin + index.back(), data_begin + end_index);
subranges_.emplace_back(
data_begin + index[num_subranges - 1], data_begin + missing_index);
}

/** Return iterator to the beginning of the var length view */
Expand Down Expand Up @@ -180,4 +320,23 @@ class alt_var_length_view : public std::ranges::view_base {
std::vector<var_length_type> subranges_;
};

/** Deduction guide for alt_var_length_view */
template <class R, class I>
alt_var_length_view(R, R, I, I)
-> alt_var_length_view<std::ranges::subrange<R>, std::ranges::subrange<I>>;

/** Deduction guide for alt_var_length_view */
template <class R, class I, class J>
alt_var_length_view(R, R, I, I, J)
-> alt_var_length_view<std::ranges::subrange<R>, std::ranges::subrange<I>>;

/** Deduction guide for alt_var_length_view */
template <class R, class I, class J, class K>
alt_var_length_view(R, R, J, I, I, K)
-> alt_var_length_view<std::ranges::subrange<R>, std::ranges::subrange<I>>;

/** Deduction guide for alt_var_length_view */
template <class R, class I, class J, class K, class L>
alt_var_length_view(R, R, J, I, I, K, L)
-> alt_var_length_view<std::ranges::subrange<R>, std::ranges::subrange<I>>;
#endif // TILEDB_ALT_VAR_LENGTH_VIEW_H
2 changes: 1 addition & 1 deletion tiledb/common/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ commence(unit_test memory_tracker_types)
conclude(unit_test)

commence(unit_test common_utils)
this_target_sources(main.cc unit_alt_var_length_view.cc unit_iterator_facade.cc unit_permutation_view.cc unit_proxy_sort.cc unit_var_length_view.cc)
this_target_sources(main.cc unit_alt_var_length_view.cc unit_iterator_facade.cc unit_permutation_view.cc unit_proxy_sort.cc unit_var_length_view.cc unit_zip_view.cc)
this_target_object_libraries(baseline)
conclude(unit_test)

127 changes: 119 additions & 8 deletions tiledb/common/test/unit_alt_var_length_view.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,44 @@ TEST_CASE(
// Simple test that the alt_var_length_view can be constructed
TEST_CASE("alt_var_length_view: Basic constructor", "[alt_var_length_view]") {
std::vector<double> r = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
std::vector<size_t> o = {0, 3, 6, 10};
std::vector<std::vector<double>> expected = {
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0},
{7.0, 8.0, 9.0, 10.0},
};

SECTION("iterator pair") {
auto u = alt_var_length_view(r.begin(), r.end(), o.begin(), o.end());
auto v = alt_var_length_view{r.begin(), r.end(), o.begin(), o.end()};
alt_var_length_view w(r.begin(), r.end(), o.begin(), o.end());
alt_var_length_view x{r.begin(), r.end(), o.begin(), o.end()};

SECTION("Arrow format") {
std::vector<size_t> o = {0, 3, 6, 10};
CHECK(size(u) == 3);
CHECK(size(v) == 3);
CHECK(size(w) == 3);
CHECK(size(x) == 3);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}
SECTION("iterator pair with size") {
auto u = alt_var_length_view(r.begin(), r.end(), 6, o.begin(), o.end(), 3);
auto v = alt_var_length_view{r.begin(), r.end(), 6, o.begin(), o.end(), 3};
alt_var_length_view w(r.begin(), r.end(), 6, o.begin(), o.end(), 3);
alt_var_length_view x{r.begin(), r.end(), 6, o.begin(), o.end(), 3};

CHECK(size(u) == 2);
CHECK(size(v) == 2);
CHECK(size(w) == 2);
CHECK(size(x) == 2);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}
SECTION("range") {
auto u = alt_var_length_view(r, o);
auto v = alt_var_length_view{r, o};
alt_var_length_view w(r, o);
Expand All @@ -128,18 +163,94 @@ TEST_CASE("alt_var_length_view: Basic constructor", "[alt_var_length_view]") {
CHECK(size(v) == 3);
CHECK(size(w) == 3);
CHECK(size(x) == 3);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}

SECTION("range with size") {
auto u = alt_var_length_view(r, 6, o, 3);
auto v = alt_var_length_view{r, 6, o, 3};
alt_var_length_view w(r, 6, o, 3);
alt_var_length_view x{r, 6, o, 3};

CHECK(size(u) == 2);
CHECK(size(v) == 2);
CHECK(size(w) == 2);
CHECK(size(x) == 2);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}
SECTION("TileDB format") {
std::vector<size_t> o = {0, 3, 6};
auto u = alt_var_length_view(r, o, 10);
auto v = alt_var_length_view{r, o, 10};
alt_var_length_view w(r, o, 10);
alt_var_length_view x{r, o, 10};

SECTION("iterator pair, tiledb format") {
auto u =
alt_var_length_view(r.begin(), r.end(), o.begin(), o.end() - 1, 10);
auto v =
alt_var_length_view{r.begin(), r.end(), o.begin(), o.end() - 1, 10};
alt_var_length_view w(r.begin(), r.end(), o.begin(), o.end() - 1, 10);
alt_var_length_view x{r.begin(), r.end(), o.begin(), o.end() - 1, 10};

CHECK(size(u) == 3);
CHECK(size(v) == 3);
CHECK(size(w) == 3);
CHECK(size(x) == 3);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}

SECTION("iterator pair with size, tiledb format") {
auto u =
alt_var_length_view(r.begin(), r.end(), 6, o.begin(), o.end(), 2, 6);
auto v =
alt_var_length_view{r.begin(), r.end(), 6, o.begin(), o.end(), 2, 6};
alt_var_length_view w(r.begin(), r.end(), 6, o.begin(), o.end(), 2, 6);
alt_var_length_view x{r.begin(), r.end(), 6, o.begin(), o.end(), 2, 6};

CHECK(size(u) == 2);
CHECK(size(v) == 2);
CHECK(size(w) == 2);
CHECK(size(x) == 2);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}

SECTION("range, tiledb format") {
auto u = alt_var_length_view(r, std::ranges::views::take(o, 3), 10);
auto v = alt_var_length_view{r, std::ranges::views::take(o, 3), 10};
alt_var_length_view w(r, std::ranges::views::take(o, 3), 10);
alt_var_length_view x{r, std::ranges::views::take(o, 3), 10};

CHECK(size(u) == 3);
CHECK(size(v) == 3);
CHECK(size(w) == 3);
CHECK(size(x) == 3);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}

SECTION("range with size, tiledb format") {
auto u = alt_var_length_view(r, 6, o, 2, 6);
auto v = alt_var_length_view{r, 6, o, 2, 6};
alt_var_length_view w(r, 6, o, 2, 6);
alt_var_length_view x{r, 6, o, 2, 6};

CHECK(size(u) == 2);
CHECK(size(v) == 2);
CHECK(size(w) == 2);
CHECK(size(x) == 2);

for (auto&& i : v) {
CHECK(std::ranges::equal(i, expected[&i - &*v.begin()]));
}
}
}

Expand Down
Loading
Loading