diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp index 701ed67e666..4e4eec3547f 100644 --- a/cpp/benchmarks/fixture/nvbench_fixture.hpp +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -25,12 +26,17 @@ #include #include #include +#include +#include #include namespace cudf { + namespace detail { static std::string rmm_mode_param{"--rmm_mode"}; ///< RMM mode command-line parameter name +static std::string cuio_host_mem_param{ + "--cuio_host_mem"}; ///< cuio host memory mode parameter name } // namespace detail /** @@ -75,6 +81,30 @@ struct nvbench_base_fixture { "\nExpecting: cuda, pool, async, arena, managed, or managed_pool"); } + inline rmm::host_async_resource_ref make_cuio_host_pinned() + { + static std::shared_ptr mr = + std::make_shared(); + return *mr; + } + + inline rmm::host_async_resource_ref make_cuio_host_pinned_pool() + { + using host_pooled_mr = rmm::mr::pool_memory_resource; + static std::shared_ptr mr = std::make_shared( + std::make_shared().get(), + size_t{1} * 1024 * 1024 * 1024); + + return *mr; + } + + inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode) + { + if (mode == "pinned") return make_cuio_host_pinned(); + if (mode == "pinned_pool") return make_cuio_host_pinned_pool(); + CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool"); + } + nvbench_base_fixture(int argc, char const* const* argv) { for (int i = 1; i < argc - 1; ++i) { @@ -82,16 +112,24 @@ struct nvbench_base_fixture { if (arg == detail::rmm_mode_param) { i++; rmm_mode = argv[i]; + } else if (arg == detail::cuio_host_mem_param) { + i++; + cuio_host_mode = argv[i]; } } mr = create_memory_resource(rmm_mode); rmm::mr::set_current_device_resource(mr.get()); std::cout << "RMM memory resource = " << rmm_mode << "\n"; + + cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode)); + std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n"; } std::shared_ptr mr; std::string rmm_mode{"pool"}; + + std::string cuio_host_mode{"pinned"}; }; } // namespace cudf diff --git a/cpp/benchmarks/fixture/nvbench_main.cpp b/cpp/benchmarks/fixture/nvbench_main.cpp index 64c4d83ac17..f46cb11a6c3 100644 --- a/cpp/benchmarks/fixture/nvbench_main.cpp +++ b/cpp/benchmarks/fixture/nvbench_main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,20 +21,22 @@ #include -// strip off the rmm_mode parameter before passing the +// strip off the rmm_mode and cuio_host_mem parameters before passing the // remaining arguments to nvbench::option_parser #undef NVBENCH_MAIN_PARSE -#define NVBENCH_MAIN_PARSE(argc, argv) \ - nvbench::option_parser parser; \ - std::vector m_args; \ - for (int i = 0; i < argc; ++i) { \ - std::string arg = argv[i]; \ - if (arg == cudf::detail::rmm_mode_param) { \ - i += 2; \ - } else { \ - m_args.push_back(arg); \ - } \ - } \ +#define NVBENCH_MAIN_PARSE(argc, argv) \ + nvbench::option_parser parser; \ + std::vector m_args; \ + for (int i = 0; i < argc; ++i) { \ + std::string arg = argv[i]; \ + if (arg == cudf::detail::rmm_mode_param) { \ + i += 2; \ + } else if (arg == cudf::detail::cuio_host_mem_param) { \ + i += 2; \ + } else { \ + m_args.push_back(arg); \ + } \ + } \ parser.parse(m_args) // this declares/defines the main() function using the definitions above diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp new file mode 100644 index 00000000000..858501877b0 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp @@ -0,0 +1,208 @@ +/* + * Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +#include + +#include +#include +#include // for bad_alloc + +namespace cudf::detail { + +/*! \p rmm_host_allocator is a CUDA-specific host memory allocator + * that employs \c a `rmm::host_async_resource_ref` for allocation. + * + * This implementation is ported from pinned_host_vector in cudf. + * + * \see https://en.cppreference.com/w/cpp/memory/allocator + */ +template +class rmm_host_allocator; + +/*! \p rmm_host_allocator is a CUDA-specific host memory allocator + * that employs \c an `cudf::host_async_resource_ref` for allocation. + * + * This implementation is ported from pinned_host_vector in cudf. + * + * \see https://en.cppreference.com/w/cpp/memory/allocator + */ +template <> +class rmm_host_allocator { + public: + using value_type = void; ///< The type of the elements in the allocator + using pointer = void*; ///< The type returned by address() / allocate() + using const_pointer = void const*; ///< The type returned by address() + using size_type = std::size_t; ///< The type used for the size of the allocation + using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + + /** + * @brief converts a `rmm_host_allocator` to `rmm_host_allocator` + */ + template + struct rebind { + using other = rmm_host_allocator; ///< The rebound type + }; +}; + +/*! \p rmm_host_allocator is a CUDA-specific host memory allocator + * that employs \c `rmm::host_async_resource_ref` for allocation. + * + * The \p rmm_host_allocator provides an interface for host memory allocation through the user + * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of + * this reference and therefore it is the user's responsibility to ensure its lifetime for the + * duration of the lifetime of the \p rmm_host_allocator. This implementation is ported from + * pinned_host_vector in cudf. + * + * \see https://en.cppreference.com/w/cpp/memory/allocator + */ +template +class rmm_host_allocator { + public: + using value_type = T; ///< The type of the elements in the allocator + using pointer = T*; ///< The type returned by address() / allocate() + using const_pointer = T const*; ///< The type returned by address() + using reference = T&; ///< The parameter type for address() + using const_reference = T const&; ///< The parameter type for address() + using size_type = std::size_t; ///< The type used for the size of the allocation + using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers + + typedef cuda::std::true_type propagate_on_container_move_assignment; + + /** + * @brief converts a `rmm_host_allocator` to `rmm_host_allocator` + */ + template + struct rebind { + using other = rmm_host_allocator; ///< The rebound type + }; + + /** + * @brief Cannot declare an empty host allocator. + */ + rmm_host_allocator() = delete; + + /** + * @brief Construct from a `cudf::host_async_resource_ref` + */ + rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream) + : mr(_mr), stream(_stream) + { + } + + /** + * @brief Copy constructor + */ + rmm_host_allocator(rmm_host_allocator const& other) = default; + + /** + * @brief Move constructor + */ + rmm_host_allocator(rmm_host_allocator&& other) = default; + + /** + * @brief Assignment operator + */ + rmm_host_allocator& operator=(rmm_host_allocator const& other) + { + mr = other.mr; + return *this; + } + + /** + * @brief rmm_host_allocator's null destructor does nothing. + */ + inline ~rmm_host_allocator() {} + + /** + * @brief This method allocates storage for objects in host memory. + * + * @param cnt The number of objects to allocate. + * @return a \c pointer to the newly allocated objects. + * @note This method does not invoke \p value_type's constructor. + * It is the responsibility of the caller to initialize the + * objects at the returned \c pointer. + */ + inline pointer allocate(size_type cnt) + { + if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if + return static_cast( + mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream)); + } + + /** + * @brief This method deallocates host memory previously allocated + * with this \c rmm_host_allocator. + * + * @param p A \c pointer to the previously allocated memory. + * @note The second parameter is the number of objects previously allocated. + * @note This method does not invoke \p value_type's destructor. + * It is the responsibility of the caller to destroy + * the objects stored at \p p. + */ + inline void deallocate(pointer p, size_type cnt) + { + mr.deallocate_async(p, cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); + } + + /** + * @brief This method returns the maximum size of the \c cnt parameter + * accepted by the \p allocate() method. + * + * @return The maximum number of objects that may be allocated + * by a single call to \p allocate(). + */ + constexpr inline size_type max_size() const + { + return (std::numeric_limits::max)() / sizeof(T); + } + + /** + * @brief This method tests this \p rmm_host_allocator for equality to + * another. + * + * @param x The other \p rmm_host_allocator of interest. + * @return This method always returns \c true. + */ + inline bool operator==(rmm_host_allocator const& x) const { return x.mr == mr; } + + /** + * @brief This method tests this \p rmm_host_allocator for inequality + * to another. + * + * @param x The other \p rmm_host_allocator of interest. + * @return This method always returns \c false. + */ + inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); } + + private: + rmm::host_async_resource_ref mr; + rmm::cuda_stream_view stream; +}; + +/** + * @brief A vector class with rmm host memory allocator + */ +template +using rmm_host_vector = thrust::host_vector>; + +} // namespace cudf::detail diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp new file mode 100644 index 00000000000..ea79d6a3029 --- /dev/null +++ b/cpp/include/cudf/io/memory_resource.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf::io { + +/** + * @brief Set the rmm resource to be used for host memory allocations by + * cudf::detail::hostdevice_vector + * + * hostdevice_vector is a utility class that uses a pair of host and device-side buffers for + * bouncing state between the cpu and the gpu. The resource set with this function (typically a + * pinned memory allocator) is what it uses to allocate space for it's host-side buffer. + * + * @param mr The rmm resource to be used for host-side allocations + * @return The previous resource that was in use + */ +rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr); + +/** + * @brief Get the rmm resource being used for host memory allocations by + * cudf::detail::hostdevice_vector + * + * @return The rmm resource used for host-side allocations + */ +rmm::host_async_resource_ref get_host_memory_resource(); + +} // namespace cudf::io diff --git a/cpp/include/cudf/utilities/export.hpp b/cpp/include/cudf/utilities/export.hpp new file mode 100644 index 00000000000..dcc72d3e1f6 --- /dev/null +++ b/cpp/include/cudf/utilities/export.hpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// Macros used for defining symbol visibility, only GLIBC is supported +#if (defined(__GNUC__) && !defined(__MINGW32__) && !defined(__MINGW64__)) +#define CUDF_EXPORT __attribute__((visibility("default"))) +#define CUDF_HIDDEN __attribute__((visibility("hidden"))) +#else +#define CUDF_EXPORT +#define CUDF_HIDDEN +#endif diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index aa4f96aa2e0..0931bb4a55c 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -634,8 +634,8 @@ void reader::impl::build_string_dict_indices() thrust::fill( rmm::exec_policy_nosync(_stream), str_dict_index_count.begin(), str_dict_index_count.end(), 0); thrust::for_each(rmm::exec_policy_nosync(_stream), - pass.pages.begin(), - pass.pages.end(), + pass.pages.d_begin(), + pass.pages.d_end(), set_str_dict_index_count{str_dict_index_count, pass.chunks}); size_t const total_str_dict_indexes = thrust::reduce( diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index 0f8961334cf..2f7a6131e3d 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,18 @@ #include "config_utils.hpp" #include +#include + +#include +#include +#include #include #include -namespace cudf::io::detail { +namespace cudf::io { + +namespace detail { namespace cufile_integration { @@ -80,4 +87,38 @@ bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_ } // namespace nvcomp_integration -} // namespace cudf::io::detail +inline std::mutex& host_mr_lock() +{ + static std::mutex map_lock; + return map_lock; +} + +inline rmm::host_async_resource_ref default_pinned_mr() +{ + static rmm::mr::pinned_host_memory_resource default_mr{}; + return default_mr; +} + +CUDF_EXPORT inline auto& host_mr() +{ + static rmm::host_async_resource_ref host_mr = default_pinned_mr(); + return host_mr; +} + +} // namespace detail + +rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr) +{ + std::lock_guard lock{detail::host_mr_lock()}; + auto last_mr = detail::host_mr(); + detail::host_mr() = mr; + return last_mr; +} + +rmm::host_async_resource_ref get_host_memory_resource() +{ + std::lock_guard lock{detail::host_mr_lock()}; + return detail::host_mr(); +} + +} // namespace cudf::io diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 3cd70801cdf..a1e8af51858 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -19,13 +19,15 @@ #include "config_utils.hpp" #include "hostdevice_span.hpp" -#include +#include +#include #include #include #include #include #include +#include #include @@ -33,13 +35,6 @@ namespace cudf::detail { -inline bool hostdevice_vector_uses_pageable_buffer() -{ - static bool const use_pageable = - cudf::io::detail::getenv_or("LIBCUDF_IO_PREFER_PAGEABLE_TMP_MEMORY", 0); - return use_pageable; -} - /** * @brief A helper class that wraps fixed-length device memory for the GPU, and * a mirror host pinned memory for the CPU. @@ -62,23 +57,12 @@ class hostdevice_vector { } explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream) - : d_data(0, stream) + : h_data({cudf::io::get_host_memory_resource(), stream}), d_data(0, stream) { CUDF_EXPECTS(initial_size <= max_size, "initial_size cannot be larger than max_size"); - if (hostdevice_vector_uses_pageable_buffer()) { - h_data_owner = thrust::host_vector(); - } else { - h_data_owner = cudf::detail::pinned_host_vector(); - } - - std::visit( - [&](auto&& v) { - v.reserve(max_size); - v.resize(initial_size); - host_data = v.data(); - }, - h_data_owner); + h_data.reserve(max_size); + h_data.resize(initial_size); current_size = initial_size; d_data.resize(max_size, stream); @@ -88,7 +72,7 @@ class hostdevice_vector { { CUDF_EXPECTS(size() < capacity(), "Cannot insert data into hostdevice_vector because capacity has been exceeded."); - host_data[current_size++] = data; + h_data[current_size++] = data; } [[nodiscard]] size_t capacity() const noexcept { return d_data.size(); } @@ -96,11 +80,11 @@ class hostdevice_vector { [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); } [[nodiscard]] bool empty() const noexcept { return size() == 0; } - [[nodiscard]] T& operator[](size_t i) { return host_data[i]; } - [[nodiscard]] T const& operator[](size_t i) const { return host_data[i]; } + [[nodiscard]] T& operator[](size_t i) { return h_data[i]; } + [[nodiscard]] T const& operator[](size_t i) const { return h_data[i]; } - [[nodiscard]] T* host_ptr(size_t offset = 0) { return host_data + offset; } - [[nodiscard]] T const* host_ptr(size_t offset = 0) const { return host_data + offset; } + [[nodiscard]] T* host_ptr(size_t offset = 0) { return h_data.data() + offset; } + [[nodiscard]] T const* host_ptr(size_t offset = 0) const { return h_data.data() + offset; } [[nodiscard]] T* begin() { return host_ptr(); } [[nodiscard]] T const* begin() const { return host_ptr(); } @@ -171,7 +155,7 @@ class hostdevice_vector { */ [[nodiscard]] operator hostdevice_span() { - return hostdevice_span{host_data, d_data.data(), size()}; + return hostdevice_span{h_data.data(), d_data.data(), size()}; } /** @@ -186,12 +170,11 @@ class hostdevice_vector { CUDF_EXPECTS(offset < d_data.size(), "Offset is out of bounds."); CUDF_EXPECTS(count <= d_data.size() - offset, "The span with given offset and count is out of bounds."); - return hostdevice_span{host_data + offset, d_data.data() + offset, count}; + return hostdevice_span{h_data.data() + offset, d_data.data() + offset, count}; } private: - std::variant, cudf::detail::pinned_host_vector> h_data_owner; - T* host_data = nullptr; + cudf::detail::rmm_host_vector h_data; size_t current_size = 0; rmm::device_uvector d_data; }; diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index fa9d2ee88ce..135a40b076a 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -376,6 +376,7 @@ ConfigureTest( utilities_tests/column_debug_tests.cpp utilities_tests/column_utilities_tests.cpp utilities_tests/column_wrapper_tests.cpp + utilities_tests/io_utilities_tests.cpp utilities_tests/lists_column_wrapper_tests.cpp utilities_tests/logger_tests.cpp utilities_tests/default_stream_tests.cpp diff --git a/cpp/tests/utilities_tests/io_utilities_tests.cpp b/cpp/tests/utilities_tests/io_utilities_tests.cpp new file mode 100644 index 00000000000..6981ad71f1e --- /dev/null +++ b/cpp/tests/utilities_tests/io_utilities_tests.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +class IoUtilitiesTest : public cudf::test::BaseFixture {}; + +TEST(IoUtilitiesTest, HostMemoryGetAndSet) +{ + // Global environment for temporary files + auto const temp_env = static_cast( + ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); + + // pinned/pooled host memory resource + using host_pooled_mr = rmm::mr::pool_memory_resource; + host_pooled_mr mr(std::make_shared().get(), + size_t{128} * 1024 * 1024); + + // set new resource + auto last_mr = cudf::io::get_host_memory_resource(); + cudf::io::set_host_memory_resource(mr); + + constexpr int num_rows = 32 * 1024; + auto valids = + cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 2; }); + auto values = thrust::make_counting_iterator(0); + + cudf::test::fixed_width_column_wrapper col(values, values + num_rows, valids); + + cudf::table_view expected({col}); + auto filepath = temp_env->get_temp_filepath("IoUtilsMemTest.parquet"); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_args); + + cudf::io::parquet_reader_options const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto const result = cudf::io::read_parquet(read_opts); + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected); + + // reset memory resource back + cudf::io::set_host_memory_resource(last_mr); +}