diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 71391c8c444..c5ffd75341e 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -40,6 +40,7 @@ #include #include #include +#include using std::string; using std::vector; @@ -336,13 +337,18 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) for (const auto index : opts_.get_use_cols_indexes()) { column_flags_[index] = column_parse::enabled; } - num_active_cols_ = opts_.get_use_cols_indexes().size(); + num_active_cols_ = std::unordered_set(opts_.get_use_cols_indexes().begin(), + opts_.get_use_cols_indexes().end()) + .size(); for (const auto &name : opts_.get_use_cols_names()) { const auto it = std::find(col_names_.begin(), col_names_.end(), name); if (it != col_names_.end()) { - column_flags_[it - col_names_.begin()] = column_parse::enabled; - num_active_cols_++; + auto curr_it = it - col_names_.begin(); + if (column_flags_[curr_it] == column_parse::disabled) { + column_flags_[curr_it] = column_parse::enabled; + num_active_cols_++; + } } } } diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 8996dd95e06..9c3a9a1b015 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -527,6 +527,42 @@ TEST_F(CsvReaderTest, MultiColumn) expect_column_data_equal(float64_values, view.column(14)); } +TEST_F(CsvReaderTest, RepeatColumn) +{ + constexpr auto num_rows = 10; + auto int16_values = random_values(num_rows); + auto int64_values = random_values(num_rows); + auto uint64_values = random_values(num_rows); + auto float32_values = random_values(num_rows); + + auto filepath = temp_env->get_temp_dir() + "RepeatColumn.csv"; + { + std::ostringstream line; + for (int i = 0; i < num_rows; ++i) { + line << int16_values[i] << "," << int64_values[i] << "," << uint64_values[i] << "," + << float32_values[i] << "\n"; + } + std::ofstream outfile(filepath, std::ofstream::out); + outfile << line.str(); + } + + // repeats column in indexes and names, misses 1 column. + cudf_io::csv_reader_options in_opts = + cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + .dtypes(std::vector{"int16", "int64", "uint64", "float"}) + .names({"A", "B", "C", "D"}) + .use_cols_indexes({1, 0, 0}) + .use_cols_names({"D", "B", "B"}) + .header(-1); + auto result = cudf_io::read_csv(in_opts); + + const auto view = result.tbl->view(); + EXPECT_EQ(3, view.num_columns()); + expect_column_data_equal(int16_values, view.column(0)); + expect_column_data_equal(int64_values, view.column(1)); + expect_column_data_equal(float32_values, view.column(2)); +} + TEST_F(CsvReaderTest, Booleans) { auto filepath = temp_env->get_temp_dir() + "Booleans.csv";