From d861e95970846254671b384856f4ee06b031696a Mon Sep 17 00:00:00 2001 From: Andrew Lumsdaine Date: Thu, 2 May 2024 00:56:12 -0700 Subject: [PATCH] Implement zip_view for external sort. (#4930) This PR implements a `zip_view` for zipping together a set of ranges. It is intended to implement the `std::ranges::zip_view` as defined for C++23. From https://en.cppreference.com/w/cpp/ranges/zip_view: 1. A zip_view is a range adaptor that takes one or more views, and produces a view whose ith element is a tuple-like value consisting of the ith elements of all views. The size of produced view is the minimum of sizes of all adapted views. 2. `zip()` is a customization point object that constructs a `zip_view` Currently, the `zip_view` only supports zipping together ranges that are `random_access_range`s. In addition, the `size()` and `end()` functions are only provided if all of the ranges are `sized_range`s The iterator from a `zip_view` is essentially a tuple of pointers to the beginning of each of the zipped ranges, plus an index that keeps track of the iterator's location in the zipped ranges. The size of a `zip_view` is the size of the smallest range. The end iterator of a zip view is the begin iterator plus the size of the zip_view. Unit tests have similar coverage to the tests for the var length views. Tests also include zipping a var length view and iterating through with `std::for_each` and `for`. [sc-43639] --- TYPE: IMPROVEMENT DESC: Implement zip_view for external sort. --- tiledb/common/test/CMakeLists.txt | 2 +- tiledb/common/test/unit_zip_view.cc | 292 ++++++++++++++++++++++++++++ tiledb/common/zip_view.h | 261 +++++++++++++++++++++++++ 3 files changed, 554 insertions(+), 1 deletion(-) create mode 100644 tiledb/common/test/unit_zip_view.cc create mode 100644 tiledb/common/zip_view.h diff --git a/tiledb/common/test/CMakeLists.txt b/tiledb/common/test/CMakeLists.txt index c5af2d47d80..e6f7df42ed5 100644 --- a/tiledb/common/test/CMakeLists.txt +++ b/tiledb/common/test/CMakeLists.txt @@ -42,7 +42,7 @@ commence(unit_test memory_tracker_types) conclude(unit_test) commence(unit_test common_utils) - this_target_sources(main.cc unit_alt_var_length_view.cc unit_iterator_facade.cc unit_permutation_view.cc unit_proxy_sort.cc unit_var_length_view.cc) + this_target_sources(main.cc unit_alt_var_length_view.cc unit_iterator_facade.cc unit_permutation_view.cc unit_proxy_sort.cc unit_var_length_view.cc unit_zip_view.cc) this_target_object_libraries(baseline) conclude(unit_test) diff --git a/tiledb/common/test/unit_zip_view.cc b/tiledb/common/test/unit_zip_view.cc new file mode 100644 index 00000000000..0ca003509b5 --- /dev/null +++ b/tiledb/common/test/unit_zip_view.cc @@ -0,0 +1,292 @@ +/** + * @file unit_zip_view.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file implements unit tests for the zip_view class. + */ + +#include +#include +#include +#include "../zip_view.h" + +#include "../alt_var_length_view.h" + +TEST_CASE("zip_view: Null test", "[zip_view][null_test]") { + REQUIRE(true); +} + +TEST_CASE("zip_view: Range concepts", "[zip_view][concepts]") { + using test_type = zip_view, std::vector>; + + CHECK(std::ranges::range); + CHECK(!std::ranges::borrowed_range); + CHECK(std::ranges::sized_range); + CHECK(std::ranges::input_range); + CHECK(!std::ranges:: + output_range>); + CHECK(std::ranges::forward_range); + CHECK(std::ranges::bidirectional_range); + CHECK(std::ranges::random_access_range); + CHECK(!std::ranges::contiguous_range); + CHECK(std::ranges::common_range); + + // @todo Fix so that it passes on ubuntu. + // CHECK(std::ranges::viewable_range); + + // @todo: Should this be a view? + CHECK(!std::ranges::view); +} + +TEST_CASE("zip_view: Iterator concepts", "[zip_view][concepts]") { + using test_type = zip_view, std::vector>; + using test_type_iterator = std::ranges::iterator_t; + using test_type_const_iterator = std::ranges::iterator_t; + + CHECK(std::input_or_output_iterator); + CHECK(std::input_or_output_iterator); + CHECK(std::input_iterator); + CHECK(std::input_iterator); + CHECK(!std::output_iterator< + test_type_iterator, + std::ranges::range_value_t>); + CHECK(!std::output_iterator< + test_type_const_iterator, + std::ranges::range_value_t>); + CHECK(std::forward_iterator); + CHECK(std::forward_iterator); + CHECK(std::bidirectional_iterator); + CHECK(std::bidirectional_iterator); + CHECK(std::random_access_iterator); + CHECK(std::random_access_iterator); +} + +// Test that the zip_view value_type satisfies the expected concepts +TEST_CASE("zip_view: value_type concepts", "[zip_view][concepts]") { + using test_type = zip_view, std::vector>; + CHECK(std::ranges::range); + + using test_iterator_type = std::ranges::iterator_t; + using test_iterator_value_type = std::iter_value_t; + using test_iterator_reference_type = + std::iter_reference_t; + + using range_value_type = std::ranges::range_value_t; + using range_reference_type = std::ranges::range_reference_t; + + CHECK(std::is_same_v); + CHECK(std::is_same_v); +} + +TEST_CASE("zip_view: constructor", "[zip_view]") { + std::vector a{1, 2, 3}; + std::vector b{4, 5, 6}; + std::vector c{7, 8, 9}; + + SECTION("Zip one range") { + auto z = zip(a); + auto it = z.begin(); + CHECK(*it == std::tuple{1}); + ++it; + CHECK(*it == std::tuple{2}); + ++it; + CHECK(*it == std::tuple{3}); + it = z.begin(); + std::get<0>(*it) = 99; + CHECK(a[0] == 99); + } + + SECTION("Zip three ranges") { + auto z = zip(a, b, c); + auto it = z.begin(); + CHECK(*it == std::tuple{1, 4, 7}); + ++it; + CHECK(*it == std::tuple{2, 5, 8}); + ++it; + CHECK(*it == std::tuple{3, 6, 9}); + it = z.begin(); + std::get<0>(*it) = 41; + std::get<1>(*it) = 42; + std::get<2>(*it) = 43; + CHECK(a[0] == 41); + CHECK(b[0] == 42); + CHECK(c[0] == 43); + } +} + +TEST_CASE("zip_view: size()", "[zip_view]") { + std::vector a{1, 2, 3}; + std::vector b{4, 5, 6, 7, 8, 9}; + std::vector c{10, 11, 12, 13}; + + CHECK(zip(a).size() == 3); + CHECK(zip(b).size() == 6); + CHECK(zip(c).size() == 4); + CHECK(zip(a, b).size() == 3); + CHECK(zip(a, c).size() == 3); + CHECK(zip(b, c).size() == 4); + CHECK(zip(a, b, c).size() == 3); +} + +TEST_CASE("zip_view: end()", "[zip_view]") { + std::vector a{1, 2, 3}; + std::vector b{4, 5, 6, 7, 8, 9}; + std::vector c{10, 11, 12, 13}; + + [[maybe_unused]] auto x = zip(a).begin(); + [[maybe_unused]] auto y = zip(a).end(); + + CHECK(zip(a).end() == zip(a).begin() + 3); + CHECK(zip(b).end() == zip(b).begin() + 6); + CHECK(zip(c).end() == zip(c).begin() + 4); + CHECK(zip(a, b).end() == zip(a, b).begin() + 3); + CHECK(zip(a, c).end() == zip(a, c).begin() + 3); + CHECK(zip(b, c).end() == zip(b, c).begin() + 4); + CHECK(zip(a, b, c).end() == zip(a, b, c).begin() + 3); + + CHECK(zip(a).end() - zip(a).begin() == 3); + CHECK(zip(b).end() - zip(b).begin() == 6); + CHECK(zip(c).end() - zip(c).begin() == 4); + CHECK(zip(a, b).end() - zip(a, b).begin() == 3); + CHECK(zip(a, c).end() - zip(a, c).begin() == 3); + CHECK(zip(b, c).end() - zip(b, c).begin() == 4); + CHECK(zip(a, b, c).end() - zip(a, b, c).begin() == 3); +} + +TEST_CASE("zip_view: basic iterator properties", "[zip_view]") { + std::vector a{1, 2, 3}; + std::vector b{4, 5, 6, 7, 8, 9}; + std::vector c{10, 11, 12, 13}; + + auto z = zip(a, b, c); + auto it = z.begin(); + auto it2 = z.begin(); + CHECK(it == it2); + CHECK(*it == *it2); + it++; + CHECK(it != it2); + it2++; + CHECK(it == it2); + CHECK(*it == *it2); + auto jt = z.end(); + CHECK(it != jt); + CHECK(it < jt); + CHECK(it <= jt); + CHECK(jt > it); + CHECK(jt >= it); + CHECK(jt == jt); + CHECK(jt >= jt); + CHECK(jt <= jt); + + it = z.begin(); + auto x = *it++; + CHECK(x == std::tuple{1, 4, 10}); + CHECK(it == z.begin() + 1); + + it = z.begin(); + auto y = *++it; + CHECK(y == std::tuple{2, 5, 11}); + CHECK(it == z.begin() + 1); + + CHECK(it[0] == *it); + CHECK(it[1] == *(it + 1)); + CHECK(it[2] == *(it + 2)); + CHECK(it[0] == std::tuple{2, 5, 11}); +} + +TEST_CASE("zip_view: alt_var_length_view", "[zip_view]") { + std::vector r = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + std::vector o = {0, 3, 6, 10}; + auto v = alt_var_length_view{r, o}; + std::vector a{1, 2, 3}; + std::vector b{4, 5, 6, 7, 8, 9}; + std::vector c{10, 11, 12, 13}; + + auto z = zip(a, b, c, v); + auto it = z.begin(); + CHECK(std::get<0>(*it) == 1); + CHECK(std::get<1>(*it) == 4); + CHECK(std::get<2>(*it) == 10); + CHECK(std::ranges::equal( + std::get<3>(*it++), std::vector{1.0, 2.0, 3.0})); + CHECK(std::ranges::equal( + std::get<3>(*it++), std::vector{4.0, 5.0, 6.0})); + CHECK(std::ranges::equal( + std::get<3>(*it++), std::vector{7.0, 8.0, 9.0, 10.0})); +} + +TEST_CASE( + "zip_view: for, std::for_each with alt_var_length_view", "[zip_view]") { + std::vector r = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + std::vector o = {0, 3, 6, 10}; + auto v = alt_var_length_view{r, o}; + std::vector a{8, 6, 7}; + + auto z = zip(a, v); + + SECTION("for_each") { + size_t count = 0; + std::for_each(z.begin(), z.end(), [&a, &count](auto x) { + auto&& [i, j] = x; + CHECK(i == a[count]); + switch (count) { + case 0: + CHECK(std::ranges::equal(j, std::vector{1.0, 2.0, 3.0})); + break; + case 1: + CHECK(std::ranges::equal(j, std::vector{4.0, 5.0, 6.0})); + break; + case 2: + CHECK( + std::ranges::equal(j, std::vector{7.0, 8.0, 9.0, 10.0})); + break; + } + ++count; + }); + } + SECTION("for") { + size_t count = 0; + for (auto x : z) { + auto&& [i, j] = x; + CHECK(i == a[count]); + switch (count) { + case 0: + CHECK(std::ranges::equal(j, std::vector{1.0, 2.0, 3.0})); + break; + case 1: + CHECK(std::ranges::equal(j, std::vector{4.0, 5.0, 6.0})); + break; + case 2: + CHECK( + std::ranges::equal(j, std::vector{7.0, 8.0, 9.0, 10.0})); + break; + } + ++count; + } + } +} diff --git a/tiledb/common/zip_view.h b/tiledb/common/zip_view.h new file mode 100644 index 00000000000..a502bda0ae7 --- /dev/null +++ b/tiledb/common/zip_view.h @@ -0,0 +1,261 @@ +/** + * @file zip_view.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file implements a zip view for zipping together a set of ranges. + * It is intended to implement the zip view as defined for C++23. From + * https://en.cppreference.com/w/cpp/ranges/zip_view: + * 1) A zip_view is a range adaptor that takes one or more views, and produces + * a view whose ith element is a tuple-like value consisting of the ith elements + * of all views. The size of produced view is the minimum of sizes of all + * adapted views. 2) zip is a customization point object that constructs a + * zip_view. + */ + +#ifndef TILEDB_ZIP_VIEW_H +#define TILEDB_ZIP_VIEW_H + +#include +#include "iterator_facade.h" + +// @todo Should this take viewable ranges? +// template +template +class zip_view { + /** + * Forward declaration to private (random access) iterator class + * @todo gneralize to non-random access ranges + */ + template + struct private_iterator; + + using iterator_type = private_iterator; + using const_iterator_type = private_iterator; + + public: + /**************************************************************************** + * Constructors + ****************************************************************************/ + + /** + * Construct a zip view from a set of ranges. The ranges are stored in a + * tuple. + * @param rngs The ranges to zip + * + * @tparam Ranges The types of the ranges to zip + * @param rngs The ranges to zip + */ + template + zip_view(Ranges&&... rngs) + : ranges_{std::forward(rngs)...} { + } + + /**************************************************************************** + * + * Iterator accessors. If all the ranges are random access ranges (which + * we assume is the case for now), an iterator is a tuple of begin iterators + * for each of + * the ranges being zipped, plus an index. + * + * @note The end iterator is only defined if all the ranges are sized ranges, + * in which case the size of the zipped view is the minimum of the sizes of + * the ranges being zipped, and the end iterator is the begin iterator plus + * the size of the zipped view. + * + ****************************************************************************/ + + /** Return an iterator to the beginning of the zipped view. */ + auto begin() { + return std::apply( + [](auto&&... rngs) { + return iterator_type(std::ranges::begin(rngs)...); + }, + ranges_); + } + + /** + * Return an iterator to the end of the zipped view. See above for what we + * mean by "end" + */ + auto end() + requires(std::ranges::sized_range && ...) + { + return std::apply( + [this](auto&&... rngs) { + return iterator_type(std::ranges::begin(rngs)..., this->size()); + }, + ranges_); + } + + /** Return an iterator to the beginning of a const zipped view. */ + auto begin() const { + return std::apply( + [](auto&&... rngs) { + return const_iterator_type(std::ranges::cbegin(rngs)...); + }, + ranges_); + } + + /** + * Return an iterator to the end of a const zipped view. See above for what + * we mean by "end" + */ + auto end() const + requires(std::ranges::sized_range && ...) + { + return std::apply( + [this](auto&&... rngs) { + return const_iterator_type( + std::ranges::cbegin(rngs)..., this->size()); + }, + ranges_); + } + /** Return an iterator to the beginning of a const zipped view. */ + auto cbegin() const { + return std::apply( + [](auto&&... rngs) { + return const_iterator_type(std::ranges::cbegin(rngs)...); + }, + ranges_); + } + + /** + * Return an iterator to the end of a const zipped view. See above for what + * we mean by "end" + */ + auto cend() const { + return std::apply( + [this](auto&&... rngs) { + return const_iterator_type( + std::ranges::cbegin(rngs)..., this->size()); + }, + ranges_); + } + + /** + * @brief The size of the zipped view is the size of the smallest range in the + * view. Requires that all the ranges are sized ranges. + * @return The size of the smallest range in the zip view + */ + auto size() const + requires(std::ranges::sized_range && ...) + { + return std::apply( + [](auto&&... rngs) { return std::min({std::ranges::size(rngs)...}); }, + ranges_); + } + + private: + /** + * This is a very straightforward iterator for a zip view over a set of + * random access ranges. It keeps an iterator for each of the ranges being + * zipped, along with an index into them. + * @tparam Rs + */ + template + struct private_iterator : public iterator_facade> { + using value_type_ = std::tuple...>; + using index_type = + std::common_type_t...>; + + /** Default constructor */ + private_iterator() = default; + + /** Construct an iterator from a set of begin iterators */ + private_iterator( + std::ranges::iterator_t... begins, index_type index = 0) + : index_(index) + , begins_(begins...) { + } + + /************************************************************************* + * Functions needed for iterator_facade + * Here we just supply the minimum needed to make the iterator work + *************************************************************************/ + + /** + * Dereference the iterator -- the critical function for defining the + * iterator sinc the facade bases many type aliases and other functions + * based on it and its signature + */ + value_type_ dereference() const { + return std::apply( + [this](auto&&... iters) { return value_type_(iters[index_]...); }, + begins_); + } + + /** Advance the iterator by n */ + auto advance(index_type n) { + index_ += n; + return *this; + } + + /** Return the distance to another iterator */ + auto distance_to(const private_iterator& other) const { + return other.index_ - index_; + } + + /** Compare two iterators for equality */ + bool operator==(const private_iterator& other) const { + return begins_ == other.begins_ && index_ == other.index_; + } + + /************************************************************************* + * Data members + *************************************************************************/ + + /** Current index of iterator */ + index_type index_; + + /** Begin iterators for each of the ranges being zipped */ + std::tuple...> begins_; + }; + + private: + /** The ranges being zipped */ + std::tuple ranges_; +}; + +/** + * Define "zip()" cpo for creating zip views + */ +namespace _zip { +struct _fn { + // @todo Should this take viewable ranges? + // template + template + auto constexpr operator()(T&&... t) const { + return zip_view{std::forward(t)...}; + } +}; +} // namespace _zip +inline namespace _cpo { +inline constexpr auto zip = _zip::_fn{}; +} // namespace _cpo + +#endif // TILEDB_ZIP_VIEW_H