rapidsai · rapids-bot · Aug 24, 2021 · Jun 27, 2021 · Jun 27, 2021 · Jul 2, 2021
@@ -289,6 +289,7 @@ add_library(cudf
     src/io/parquet/writer_impl.cu
     src/io/statistics/orc_column_statistics.cu
     src/io/statistics/parquet_column_statistics.cu
+    src/io/text/multibyte_split.cu
     src/io/utilities/column_buffer.cpp
     src/io/utilities/data_sink.cpp
     src/io/utilities/datasource.cpp

@@ -242,3 +242,8 @@ ConfigureBench(STRINGS_BENCH
 # - json benchmark -------------------------------------------------------------------
 ConfigureBench(JSON_BENCH
   string/json_benchmark.cpp)
+
+###################################################################################################
+# - io benchmark ---------------------------------------------------------------------
+ConfigureBench(MULTIBYTE_SPLIT_BENCHMARK
+  io/text/multibyte_split_benchmark.cpp)
@@ -33,6 +33,8 @@ using cudf::io::io_type;
   benchmark(name##_buffer_output, type_or_group, static_cast<uint32_t>(io_type::HOST_BUFFER)); \
   benchmark(name##_void_output, type_or_group, static_cast<uint32_t>(io_type::VOID));
 
+std::string random_file_in_dir(std::string const& dir_path);
+
 /**
  * @brief Class to create a coupled `source_info` and `sink_info` of given type.
  */

@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/io/cuio_benchmark_common.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/file_utilities.hpp>
+
+#include <cudf/io/text/data_chunk_source_factories.hpp>
+#include <cudf/io/text/multibyte_split.hpp>
+#include <cudf/types.hpp>
+
+#include <thrust/transform.h>
+
+#include <cstdio>
+#include <fstream>
+#include <memory>
+
+using cudf::test::fixed_width_column_wrapper;
+
+temp_directory const temp_dir("cudf_gbench");
+
+static void BM_multibyte_split(benchmark::State& state)
+{
+  auto delimiters = std::vector<std::string>({"😀", "😎", ",", "::"});
+
+  int32_t num_chars = state.range(0);
+  auto host_input   = std::string(num_chars, 'x');
+  auto device_input = cudf::string_scalar(host_input);
+
+  auto temp_file_name = random_file_in_dir(temp_dir.path());
+
+  close(mkstemp(const_cast<char*>(temp_file_name.data())));
+  {
+    auto temp_fostream = std::ofstream(temp_file_name, std::ofstream::out);
+    temp_fostream << host_input;
+    temp_fostream.close();
+  }
+
+  cudaDeviceSynchronize();
+
+  auto source = cudf::io::text::make_source_from_file(temp_file_name);
+  // auto source = cudf::io::text::make_source(device_input);
+  // auto source = cudf::io::text::make_source(host_input);
+
+  for (auto _ : state) {
+    cuda_event_timer raii(state, true);
+    auto output = cudf::io::text::multibyte_split(*source, delimiters);
+  }
+
+  state.SetBytesProcessed(state.iterations() * num_chars);
+}
+
+class MultibyteSplitBenchmark : public cudf::benchmark {
+};
+
+#define TRANSPOSE_BM_BENCHMARK_DEFINE(name)                                     \
+  BENCHMARK_DEFINE_F(MultibyteSplitBenchmark, name)(::benchmark::State & state) \
+  {                                                                             \
+    BM_multibyte_split(state);                                                  \
+  }                                                                             \
+  BENCHMARK_REGISTER_F(MultibyteSplitBenchmark, name)                           \
+    ->Range(1 << 30, 1 << 30)                                                   \
+    ->UseManualTime()                                                           \
+    ->Unit(benchmark::kMillisecond);
+
+TRANSPOSE_BM_BENCHMARK_DEFINE(multibyte_split_simple);
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_pool.hpp>
+#include <rmm/device_buffer.hpp>
+
+namespace cudf {
+namespace io {
+namespace text {
+
+/**
+ * @brief represents a possibly-shared view over device memory.
+ */
+struct data_chunk {
+  data_chunk(device_span<char const> data) : _data(data) {}
+
+  operator cudf::device_span<char const>() { return _data; }
+
+  uint32_t size() const { return _data.size(); }
+
+ private:
+  device_span<char const> _data;
+};
+
+/**
+ * @brief a reader capable of producing views over device memory
+ *
+ */
+class data_chunk_reader {
+ public:
+  virtual data_chunk get_next_chunk(uint32_t size, rmm::cuda_stream_view stream) = 0;
+};
+
+/**
+ * @brief a data source capable of creating a reader which can produce views of the data source in
+ * device memory.
+ *
+ */
+class data_chunk_source {
+ public:
+  virtual std::unique_ptr<data_chunk_reader> create_reader() = 0;
+};
+
+}  // namespace text
+}  // namespace io
+}  // namespace cudf
@@ -0,0 +1,201 @@
+#pragma once
+
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/io/text/data_chunk_source.hpp>
+#include <cudf/scalar/scalar.hpp>
+
+#include <rmm/device_buffer.hpp>
+
+#include <thrust/host_vector.h>
+#include <thrust/system/cuda/experimental/pinned_allocator.h>
+
+#include <fstream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+
+namespace cudf {
+namespace io {
+namespace text {
+
+namespace {
+
+/**
+ * @brief a reader which produces views of device memory which contain a copy of the data from an
+ * istream.
+ *
+ */
+class istream_data_chunk_reader : public data_chunk_reader {
+ public:
+  istream_data_chunk_reader(std::unique_ptr<std::istream> datastream)
-  istream_data_chunk_reader(std::unique_ptr<std::istream> datastream)
+  istream_data_chunk_reader(std::istream&& datastream)
-  istream_data_chunk_reader(std::unique_ptr<std::istream> datastream)
+  istream_data_chunk_reader(std::istream&& datastream)
+    : _datastream(std::move(datastream)), _buffers()
+  {
+    // create an event to track the completion of the last device-to-host copy.
+    CUDA_TRY(cudaEventCreate(&prev_host_copy_event));  //
+  }
+
+  ~istream_data_chunk_reader()
+  {
+    CUDA_TRY(cudaEventDestroy(prev_host_copy_event));  //
+  }
+
+  device_span<char> find_or_create_data(uint32_t size, rmm::cuda_stream_view stream)
+  {
+    auto search = _buffers.find(stream.value());
+
+    if (search == _buffers.end() || search->second.size() < size) {
+      _buffers[stream.value()] = rmm::device_buffer(size, stream);
+    }
+
+    return device_span<char>(static_cast<char*>(_buffers[stream.value()].data()), size);
+  }
+
+  data_chunk get_next_chunk(uint32_t read_size, rmm::cuda_stream_view stream) override
+  {
+    CUDF_FUNC_RANGE();
+
+    // synchronize on the last host-to-device copy, so we don't clobber the host buffer.
+    CUDA_TRY(cudaEventSynchronize(prev_host_copy_event));
+
+    // resize the host buffer as necessary to contain the requested number of bytes
+    if (_host_buffer.size() < read_size) { _host_buffer.resize(read_size); }
+
+    // read data from the host istream in to the pinned host memory buffer
+    _datastream->read(_host_buffer.data(), read_size);
+
+    // adjust the read size to reflect how many bytes were actually read from the data stream
+    read_size = _datastream->gcount();
+
+    // get a view over some device memory we can use to buffer the read data on to device.
+    auto chunk_span = find_or_create_data(read_size, stream);
+
+    // copy the host-pinned data on to device
+    CUDA_TRY(cudaMemcpyAsync(  //
+      chunk_span.data(),
+      _host_buffer.data(),
+      read_size,
+      cudaMemcpyHostToDevice,
+      stream.value()));
+
+    // record the host-to-device copy.
+    CUDA_TRY(cudaEventRecord(prev_host_copy_event, stream.value()));
+
+    // return the view over device memory so it can be processed.
+    return data_chunk(chunk_span);
+  }
+
+ private:
+  std::unique_ptr<std::istream> _datastream;
+  std::unordered_map<cudaStream_t, rmm::device_buffer> _buffers;
+  cudaEvent_t prev_host_copy_event;
+  thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>>
+    _host_buffer{};
+};
+
+/**
+ * @brief a reader which produces view of device memory which represent a subset of the input device
+ * span
+ *
+ */
+class device_span_data_chunk_reader : public data_chunk_reader {
+ public:
+  device_span_data_chunk_reader(device_span<char const> data) : _data(data) {}
+
+  data_chunk get_next_chunk(uint32_t read_size, rmm::cuda_stream_view stream) override
+  {
+    // limit the read size to the number of bytes remaining in the device_span.
+    if (read_size > _data.size() - _position) { read_size = _data.size() - _position; }
+
+    // create a view over the device span
+    auto chunk_span = _data.subspan(_position, read_size);
+
+    // increment position
+    _position += read_size;
+
+    // return the view over device memory so it can be processed.
+    return data_chunk(chunk_span);
+  }
+
+ private:
+  device_span<char const> _data;
+  uint64_t _position = 0;
+};
+
+/**
+ * @brief a file data source which creates an istream_data_chunk_reader
+ *
+ */
+class file_data_chunk_source : public data_chunk_source {
+ public:
+  file_data_chunk_source(std::string filename) : _filename(filename) {}
+  std::unique_ptr<data_chunk_reader> create_reader() override
+  {
+    return std::make_unique<istream_data_chunk_reader>(
+      std::make_unique<std::ifstream>(_filename, std::ifstream::in));
+  }
+
+ private:
+  std::string _filename;
+};
+
+/**
+ * @brief a host string data source which creates an istream_data_chunk_reader
+ */
+class string_data_chunk_source : public data_chunk_source {
+ public:
+  string_data_chunk_source(std::string const& data) : _data(data) {}
+  std::unique_ptr<data_chunk_reader> create_reader() override
+  {
+    return std::make_unique<istream_data_chunk_reader>(std::make_unique<std::istringstream>(_data));
+  }
+
+ private:
+  std::string const& _data;
+};
+
+/**
+ * @brief a device span data source which creates an istream_data_chunk_reader
+ */
+class device_span_data_chunk_source : public data_chunk_source {
+ public:
+  device_span_data_chunk_source(device_span<char const> data) : _data(data) {}
+  std::unique_ptr<data_chunk_reader> create_reader() override
+  {
+    return std::make_unique<device_span_data_chunk_reader>(_data);
+  }
+
+ private:
+  device_span<char const> _data;
+};
+
+}  // namespace
+
+/**
+ * @brief Creates a data source capable of producing device-buffered views of the given string.
+ */
+std::unique_ptr<data_chunk_source> make_source(std::string const& data)
+{
+  return std::make_unique<string_data_chunk_source>(data);
+}
+
+/**
+ * @brief Creates a data source capable of producing device-buffered views of the file
+ */
+std::unique_ptr<data_chunk_source> make_source_from_file(std::string const& filename)
+{
+  return std::make_unique<file_data_chunk_source>(filename);
+}
+
+/**
+ * @brief Creates a data source capable of producing views of the given device string scalar
+ */
+std::unique_ptr<data_chunk_source> make_source(cudf::string_scalar& data)
+{
+  auto data_span = device_span<char const>(data.data(), data.size());
+  return std::make_unique<device_span_data_chunk_source>(data_span);
+}
+
+}  // namespace text
+}  // namespace io
+}  // namespace cudf
@@ -0,0 +1,22 @@
+#include <cudf/io/text/data_chunk_source.hpp>
+
+#include <cudf/column/column.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <iostream>
+#include <memory>
+
+namespace cudf {
+namespace io {
+namespace text {
+
+std::unique_ptr<cudf::column> multibyte_split(
+  data_chunk_source& source,
+  std::vector<std::string> const& delimeters,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+}  // namespace text
+}  // namespace io
+}  // namespace cudf