Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add general purpose host memory allocator reference to cuIO with a demo of pooled-pinned allocation. #15079

Merged
merged 13 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions cpp/benchmarks/fixture/nvbench_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once

#include <cudf/io/config_utils.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_device.hpp>
Expand All @@ -25,12 +26,17 @@
#include <rmm/mr/device/owning_wrapper.hpp>
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>
#include <rmm/mr/pinned_host_memory_resource.hpp>
#include <rmm/resource_ref.hpp>

#include <string>

namespace cudf {

namespace detail {
static std::string rmm_mode_param{"--rmm_mode"}; ///< RMM mode command-line parameter name
static std::string cuio_host_mem_param{
"--cuio_host_mem"}; ///< cuio host memory mode parameter name
} // namespace detail

/**
Expand Down Expand Up @@ -75,23 +81,55 @@ struct nvbench_base_fixture {
"\nExpecting: cuda, pool, async, arena, managed, or managed_pool");
}

inline rmm::host_async_resource_ref make_cuio_host_pinned()
{
static std::shared_ptr<rmm::mr::pinned_host_memory_resource> mr =
std::make_shared<rmm::mr::pinned_host_memory_resource>();
return *mr;
}

inline rmm::host_async_resource_ref make_cuio_host_pinned_pool()
{
using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
static std::shared_ptr<host_pooled_mr> mr = std::make_shared<host_pooled_mr>(
std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
size_t{1} * 1024 * 1024 * 1024);

return *mr;
}

inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
{
if (mode == "pinned") return make_cuio_host_pinned();
if (mode == "pinned_pool") return make_cuio_host_pinned_pool();
CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
}

nvbench_base_fixture(int argc, char const* const* argv)
{
for (int i = 1; i < argc - 1; ++i) {
std::string arg = argv[i];
if (arg == detail::rmm_mode_param) {
i++;
rmm_mode = argv[i];
} else if (arg == detail::cuio_host_mem_param) {
i++;
cuio_host_mode = argv[i];
}
}

mr = create_memory_resource(rmm_mode);
rmm::mr::set_current_device_resource(mr.get());
std::cout << "RMM memory resource = " << rmm_mode << "\n";

cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
}

std::shared_ptr<rmm::mr::device_memory_resource> mr;
std::string rmm_mode{"pool"};

std::string cuio_host_mode{"pinned"};
};

} // namespace cudf
28 changes: 15 additions & 13 deletions cpp/benchmarks/fixture/nvbench_main.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,20 +21,22 @@

#include <vector>

// strip off the rmm_mode parameter before passing the
// strip off the rmm_mode and cuio_host_mem parameters before passing the
// remaining arguments to nvbench::option_parser
#undef NVBENCH_MAIN_PARSE
#define NVBENCH_MAIN_PARSE(argc, argv) \
nvbench::option_parser parser; \
std::vector<std::string> m_args; \
for (int i = 0; i < argc; ++i) { \
std::string arg = argv[i]; \
if (arg == cudf::detail::rmm_mode_param) { \
i += 2; \
} else { \
m_args.push_back(arg); \
} \
} \
#define NVBENCH_MAIN_PARSE(argc, argv) \
nvbench::option_parser parser; \
std::vector<std::string> m_args; \
for (int i = 0; i < argc; ++i) { \
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
std::string arg = argv[i]; \
if (arg == cudf::detail::rmm_mode_param) { \
i += 2; \
} else if (arg == cudf::detail::cuio_host_mem_param) { \
i += 2; \
} else { \
m_args.push_back(arg); \
} \
} \
parser.parse(m_args)

// this declares/defines the main() function using the definitions above
Expand Down
225 changes: 225 additions & 0 deletions cpp/include/cudf/detail/utilities/rmm_host_vector.hpp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment (non-blocking): In an ideal (and not too distant) world, this entire file will be unnecessary.

One shouldn't need to define their own allocator, or vector type. We should have an cuda::mr::allocator that can be constructed from a cuda::mr::resource_ref.

I understand not wanting to wait for that, but I just want to give you a heads up on what is coming.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. This is definitely worth replacing.

Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/*
* Copyright 2024 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/utilities/default_stream.hpp>

#include <rmm/resource_ref.hpp>

#include <cstddef>
#include <limits>
#include <new> // for bad_alloc

#include <cudf/utilities/error.hpp>
#include <thrust/host_vector.h>

namespace cudf::detail {

/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
* that employs \c a `rmm::host_async_resource_ref` for allocation.
*
* This implementation is ported from pinned_host_vector in cudf.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <typename T>
class rmm_host_allocator;

/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
* that employs \c an `cudf::host_async_resource_ref` for allocation.
*
* This implementation is ported from pinned_host_vector in cudf.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <>
class rmm_host_allocator<void> {
public:
using value_type = void; ///< The type of the elements in the allocator
using pointer = void*; ///< The type returned by address() / allocate()
using const_pointer = void const*; ///< The type returned by address()
using size_type = std::size_t; ///< The type used for the size of the allocation
using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers

/**
* @brief converts a `rmm_host_allocator<void>` to `rmm_host_allocator<U>`
*/
template <typename U>
struct rebind {
using other = rmm_host_allocator<U>; ///< The rebound type
};
};

/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
* that employs \c `rmm::host_async_resource_ref` for allocation.
*
* This implementation is ported from pinned_host_vector in cudf.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <typename T>
class rmm_host_allocator {
public:
using value_type = T; ///< The type of the elements in the allocator
using pointer = T*; ///< The type returned by address() / allocate()
using const_pointer = T const*; ///< The type returned by address()
using reference = T&; ///< The parameter type for address()
using const_reference = T const&; ///< The parameter type for address()
using size_type = std::size_t; ///< The type used for the size of the allocation
using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers

typedef cuda::std::true_type propagate_on_container_move_assignment;

/**
* @brief converts a `rmm_host_allocator<T>` to `rmm_host_allocator<U>`
*/
template <typename U>
struct rebind {
using other = rmm_host_allocator<U>; ///< The rebound type
};

/**
* @brief Cannot declare an empty host allocator.
*/
rmm_host_allocator() = delete;

/**
* @brief Construct from a `cudf::host_async_resource_ref`
*/
rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream)
: mr(_mr), stream(_stream)
{
}

/**
* @brief Copy constructor
*/
rmm_host_allocator(rmm_host_allocator const& other) = default;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In rmm::device_buffer and device_uvector we delete the copy constructor and copy-assignment operator because they don't allow specifying a stream. YMMV, just suggesting it may be good practice.

https://github.com/rapidsai/rmm/blob/f132d4b0daa976e1ec6cbcef24f5454fe510a394/include/rmm/device_buffer.hpp#L85

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would be better done as a followup. There are a number of places in cudf code using the assignment operator and thrust itself hits the copy constructor for mysterious reasons. For example, just calling reserve on the wrapping thrust::host_vector causes it to happen (h_data.reserve(max_size);). Something happening internally in thrust::detail::contiguous_storage


/**
* @brief Move constructor
*/
rmm_host_allocator(rmm_host_allocator&& other) = default;

/**
* @brief Assignment operator
*/
rmm_host_allocator& operator=(rmm_host_allocator const& col)
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
{
mr = col.mr;
return *this;
}

/**
* @brief rmm_host_allocator's null destructor does nothing.
*/
inline ~rmm_host_allocator() {}

/**
* @brief This method returns the address of a \c reference of
* interest.
*
* @param r The \c reference of interest.
* @return \c r's address.
*/
inline pointer address(reference r) { return std::addressof(r); }

/**
* @brief This method returns the address of a \c const_reference
* of interest.
*
* @param r The \c const_reference of interest.
* @return \c r's address.
*/
inline const_pointer address(const_reference r) { return std::addressof(r); }
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief This method allocates storage for objects in host memory.
*
* @param cnt The number of objects to allocate.
* @return a \c pointer to the newly allocated objects.
* @note The second parameter to this function is meant as a
* hint pointer to a nearby memory location, but is
* not used by this allocator.
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
* @note This method does not invoke \p value_type's constructor.
* It is the responsibility of the caller to initialize the
* objects at the returned \c pointer.
*/
inline pointer allocate(size_type cnt, const_pointer /*hint*/ = 0)
{
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
return static_cast<pointer>(
mr.allocate_async(cnt * sizeof(value_type), THRUST_MR_DEFAULT_ALIGNMENT, stream));
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* @brief This method deallocates host memory previously allocated
* with this \c rmm_host_allocator.
*
* @param p A \c pointer to the previously allocated memory.
* @note The second parameter is the number of objects previously allocated.
* @note This method does not invoke \p value_type's destructor.
* It is the responsibility of the caller to destroy
* the objects stored at \p p.
*/
inline void deallocate(pointer p, size_type cnt)
{
mr.deallocate_async(p, cnt * sizeof(value_type), THRUST_MR_DEFAULT_ALIGNMENT, stream);
}

/**
* @brief This method returns the maximum size of the \c cnt parameter
* accepted by the \p allocate() method.
*
* @return The maximum number of objects that may be allocated
* by a single call to \p allocate().
*/
constexpr inline size_type max_size() const
{
return (std::numeric_limits<size_type>::max)() / sizeof(T);
}

/**
* @brief This method tests this \p rmm_host_allocator for equality to
* another.
*
* @param x The other \p rmm_host_allocator of interest.
* @return This method always returns \c true.
*/
inline bool operator==(rmm_host_allocator const& x) const { return x.mr == mr; }

/**
* @brief This method tests this \p rmm_host_allocator for inequality
* to another.
*
* @param x The other \p rmm_host_allocator of interest.
* @return This method always returns \c false.
*/
inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); }

private:
rmm::host_async_resource_ref mr;
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
rmm::cuda_stream_view stream;
};

/**
* @brief A vector class with rmm host memory allocator
*/
template <typename T>
using rmm_host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;

} // namespace cudf::detail
44 changes: 44 additions & 0 deletions cpp/include/cudf/io/config_utils.hpp
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/resource_ref.hpp>

namespace cudf::io {

/**
* @brief Set the rmm resource to be used for host memory allocations by
* cudf::detail::hostdevice_vector
*
* hostdevice_vector is a utility class that uses a pair of host and device-side buffers for
* bouncing state between the cpu and the gpu. The resource set with this function (typically a
* pinned memory allocator) is what it uses to allocate space for it's host-side buffer.
*
* @param mr The rmm resource to be used for host-side allocations
* @return The previous resource that was in use
*/
rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr);

/**
* @brief Get the rmm resource being used for host memory allocations by
* cudf::detail::hostdevice_vector
*
* @return The rmm resource used for host-side allocations
*/
rmm::host_async_resource_ref get_host_memory_resource();

} // namespace cudf::io
Loading
Loading