Pinned vector factory that uses the global pool (#15895)

closes #15612 Expanded the set of vector factories to cover pinned vectors. The functions return `cudf::detail::host_vector`, which use a type-erased allocator, allowing us to utilize the runtime configurable global pinned (previously host) resource. The `pinned_host_vector` type has been removed as it can only support the non-pooled pinned allocations. Its use is not replaced with `cudf::detail::host_vector`. Moved the global host (now pinned) resource out of cuIO and changed the type to host_device. User-specified resources are now required to allocate device-accessible memory. The name has been changed to pinned to reflect the new requirement. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Alessandro Bellina (https://github.com/abellina) - Yunsong Wang (https://github.com/PointKernel) - Mark Harris (https://github.com/harrism) - David Wendt (https://github.com/davidwendt) URL: #15895
rapidsai · Jun 12, 2024 · f7ba6ab · f7ba6ab
1 parent d2cd1d4
commit f7ba6ab
Show file tree

Hide file tree

Showing 28 changed files with 487 additions and 637 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -664,6 +664,7 @@ add_library(
   src/utilities/default_stream.cpp
   src/utilities/linked_column.cpp
   src/utilities/logger.cpp
+  src/utilities/pinned_memory.cpp
   src/utilities/stacktrace.cpp
   src/utilities/stream_pool.cpp
   src/utilities/traits.cpp

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
@@ -81,17 +81,18 @@ struct nvbench_base_fixture {
               "\nExpecting: cuda, pool, async, arena, managed, or managed_pool");
   }
 
-  inline rmm::host_async_resource_ref make_cuio_host_pinned()
+  inline rmm::host_device_async_resource_ref make_cuio_host_pinned()
   {
     static std::shared_ptr<rmm::mr::pinned_host_memory_resource> mr =
       std::make_shared<rmm::mr::pinned_host_memory_resource>();
     return *mr;
   }
 
-  inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
+  inline rmm::host_device_async_resource_ref create_cuio_host_memory_resource(
+    std::string const& mode)
   {
     if (mode == "pinned") return make_cuio_host_pinned();
-    if (mode == "pinned_pool") return cudf::io::get_host_memory_resource();
+    if (mode == "pinned_pool") return cudf::get_pinned_memory_resource();
     CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
   }
 
@@ -112,14 +113,14 @@ struct nvbench_base_fixture {
     rmm::mr::set_current_device_resource(mr.get());
     std::cout << "RMM memory resource = " << rmm_mode << "\n";
 
-    cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
+    cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
     std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
   }
 
   ~nvbench_base_fixture()
   {
     // Ensure the the pool is freed before the CUDA context is destroyed:
-    cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
+    cudf::set_pinned_memory_resource(this->make_cuio_host_pinned());
   }
 
   std::shared_ptr<rmm::mr::device_memory_resource> mr;

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
@@ -19,6 +19,9 @@
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/logger.hpp>
 
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
 #include <unistd.h>
 
 #include <cstdio>
@@ -28,6 +31,14 @@
 
 temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"};
 
+// Don't use cudf's pinned pool for the source data
+rmm::host_async_resource_ref pinned_memory_resource()
+{
+  static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{};
+
+  return mr;
+}
+
 std::string random_file_in_dir(std::string const& dir_path)
 {
   // `mkstemp` modifies the template in place
@@ -41,6 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path)
 
 cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
   : type{type},
+    pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}),
     d_buffer{0, cudf::get_default_stream()},
     file_name{random_file_in_dir(tmpdir.path())},
     void_sink{cudf::io::data_sink::create()}

diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf_test/file_utilities.hpp>
 
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
+#include <cudf/detail/utilities/host_vector.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
 
@@ -79,7 +79,7 @@ class cuio_source_sink_pair {
 
   io_type const type;
   std::vector<char> h_buffer;
-  cudf::detail::pinned_host_vector<char> pinned_buffer;
+  cudf::detail::host_vector<char> pinned_buffer;
   rmm::device_uvector<std::byte> d_buffer;
   std::string const file_name;
   std::unique_ptr<cudf::io::data_sink> void_sink;

diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
@@ -20,9 +20,9 @@
 #include <benchmarks/io/nvbench_helpers.hpp>
 
 #include <cudf/detail/utilities/stream_pool.hpp>
-#include <cudf/io/memory_resource.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
 #include <cudf/utilities/thread_pool.hpp>
 
 #include <nvtx3/nvtx3.hpp>

diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,6 @@
 #include <cudf_test/file_utilities.hpp>
 
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/utilities/pinned_host_vector.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
@@ -132,9 +131,10 @@ static void bench_multibyte_split(nvbench::state& state,
 
   auto const delim_factor = static_cast<double>(delim_percent) / 100;
   std::unique_ptr<cudf::io::datasource> datasource;
-  auto device_input      = create_random_input(file_size_approx, delim_factor, 0.05, delim);
-  auto host_input        = std::vector<char>{};
-  auto host_pinned_input = cudf::detail::pinned_host_vector<char>{};
+  auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
+  auto host_input   = std::vector<char>{};
+  auto host_pinned_input =
+    cudf::detail::make_pinned_vector_async<char>(0, cudf::get_default_stream());
 
   if (source_type != data_chunk_source_type::device &&
       source_type != data_chunk_source_type::host_pinned) {

diff --git a/...cudf/detail/utilities/rmm_host_vector.hpp → ...ude/cudf/detail/utilities/host_vector.hpp b/...cudf/detail/utilities/rmm_host_vector.hpp → ...ude/cudf/detail/utilities/host_vector.hpp
@@ -19,6 +19,7 @@
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
+#include <rmm/aligned.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <thrust/host_vector.h>
@@ -32,8 +33,6 @@ namespace cudf::detail {
 /*! \p rmm_host_allocator is a CUDA-specific host memory allocator
  *  that employs \c a `rmm::host_async_resource_ref` for allocation.
  *
- * This implementation is ported from pinned_host_vector in cudf.
- *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
 template <typename T>
@@ -42,8 +41,6 @@ class rmm_host_allocator;
 /*! \p rmm_host_allocator is a CUDA-specific host memory allocator
  *  that employs \c an `cudf::host_async_resource_ref` for allocation.
  *
- * This implementation is ported from pinned_host_vector in cudf.
- *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
 template <>
@@ -70,8 +67,7 @@ class rmm_host_allocator<void> {
  * The \p rmm_host_allocator provides an interface for host memory allocation through the user
  * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of
  * this reference and therefore it is the user's responsibility to ensure its lifetime for the
- * duration of the lifetime of the \p rmm_host_allocator. This implementation is ported from
- * pinned_host_vector in cudf.
+ * duration of the lifetime of the \p rmm_host_allocator.
  *
  *  \see https://en.cppreference.com/w/cpp/memory/allocator
  */
@@ -121,8 +117,12 @@ class rmm_host_allocator {
   inline pointer allocate(size_type cnt)
   {
     if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
-    return static_cast<pointer>(
-      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream));
+    auto const result =
+      mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream);
+    // Synchronize to ensure the memory is allocated before thrust::host_vector initialization
+    // TODO: replace thrust::host_vector with a type that does not require synchronization
+    stream.synchronize();
+    return static_cast<pointer>(result);
   }
 
   /**
@@ -182,6 +182,6 @@ class rmm_host_allocator {
  * @brief A vector class with rmm host memory allocator
  */
 template <typename T>
-using rmm_host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
+using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
 
 }  // namespace cudf::detail