Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor pinned memory vector and ORC+Parquet writers #13206

Merged
merged 19 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ outputs:
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
- test -f $PREFIX/include/cudf/detail/utilities/logger.hpp
- test -f $PREFIX/include/cudf/detail/utilities/pinned_allocator.hpp
- test -f $PREFIX/include/cudf/detail/utilities/pinned_host_vector.hpp
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
- test -f $PREFIX/include/cudf/detail/utilities/visitor_overload.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp
Expand Down
5 changes: 2 additions & 3 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <cudf_test/file_utilities.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/pinned_allocator.hpp>
#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/text/data_chunk_source_factories.hpp>
#include <cudf/io/text/detail/bgzip_utils.hpp>
Expand All @@ -33,7 +33,6 @@
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <thrust/host_vector.h>
#include <thrust/transform.h>

#include <nvbench/nvbench.cuh>
Expand Down Expand Up @@ -136,7 +135,7 @@ static void bench_multibyte_split(nvbench::state& state,
std::unique_ptr<cudf::io::datasource> datasource;
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = std::vector<char>{};
auto host_pinned_input = thrust::host_vector<char, cudf::detail::pinned_allocator<char>>{};
auto host_pinned_input = cudf::detail::pinned_host_vector<char>{};

if (source_type != data_chunk_source_type::device &&
source_type != data_chunk_source_type::host_pinned) {
Expand Down
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, pinned memory is always on the host side thus not sure this renaming is really needed.

Copy link
Contributor Author

@ttnghia ttnghia Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO the name *_host_vector is better expressing its purpose, similar to having thurst::host_vector instead of just thrust::vector.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I read the name as host_vector in pinned memory, so the name looks good.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2008-2022 NVIDIA Corporation
* Copyright 2008-2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,6 +22,8 @@

#include <cudf/utilities/error.hpp>

#include <thrust/host_vector.h>

namespace cudf::detail {

/*! \p pinned_allocator is a CUDA-specific host memory allocator
Expand Down Expand Up @@ -199,4 +201,11 @@ class pinned_allocator {
return !operator==(x);
}
};

/**
* @brief A vector class with pinned host memory allocator
*/
template <typename T>
using pinned_host_vector = thrust::host_vector<T, pinned_allocator<T>>;

} // namespace cudf::detail
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/detail/orc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class writer {
*/
explicit writer(std::unique_ptr<cudf::io::data_sink> sink,
orc_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand All @@ -109,7 +109,7 @@ class writer {
*/
explicit writer(std::unique_ptr<cudf::io::data_sink> sink,
chunked_orc_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/detail/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class writer {
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
parquet_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand All @@ -171,7 +171,7 @@ class writer {
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
chunked_parquet_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/detail/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,7 @@ namespace detail {
/**
* @brief Whether writer writes in chunks or all at once
*/
enum class SingleWriteMode : bool { YES, NO };
enum class single_write_mode : bool { YES, NO };
} // namespace detail
} // namespace io
} // namespace cudf
8 changes: 4 additions & 4 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ void write_orc(orc_writer_options const& options)
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");

auto writer = std::make_unique<detail_orc::writer>(
std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream());
std::move(sinks[0]), options, io_detail::single_write_mode::YES, cudf::get_default_stream());

writer->write(options.get_table());
}
Expand All @@ -444,7 +444,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");

writer = std::make_unique<detail_orc::writer>(
std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream());
std::move(sinks[0]), options, io_detail::single_write_mode::NO, cudf::get_default_stream());
}

/**
Expand Down Expand Up @@ -519,7 +519,7 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const

auto sinks = make_datasinks(options.get_sink());
auto writer = std::make_unique<detail_parquet::writer>(
std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream());
std::move(sinks), options, io_detail::single_write_mode::YES, cudf::get_default_stream());

writer->write(options.get_table(), options.get_partitions());

Expand Down Expand Up @@ -575,7 +575,7 @@ parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options co
auto sinks = make_datasinks(options.get_sink());

writer = std::make_unique<detail_parquet::writer>(
std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream());
std::move(sinks), options, io_detail::single_write_mode::NO, cudf::get_default_stream());
}

/**
Expand Down
Loading