From de579a59714f960fe33440811b4c49e5efeb3f3f Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 21 May 2021 16:05:18 -0400 Subject: [PATCH] Added decimal writing for CSV writer (#8296) Addresses #7110 column_to_strings_fn was specialized for fixed point type to enable support for csv writer. A test was added to validate output file created by csv writer for decimal type column. Authors: - Kumar Aatish (https://github.com/kaatish) Approvers: - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) - Vukasin Milovanovic (https://github.com/vuule) - Devavret Makkar (https://github.com/devavret) URL: https://github.com/rapidsai/cudf/pull/8296 --- cpp/src/io/csv/writer_impl.cu | 12 +++- cpp/tests/io/csv_test.cpp | 104 ++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index d2b6be5eead..13760381373 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -119,7 +119,8 @@ struct column_to_strings_fn { return not((std::is_same::value) || (std::is_integral::value) || (std::is_floating_point::value) || - (cudf::is_timestamp()) || (cudf::is_duration())); + (cudf::is_fixed_point()) || (cudf::is_timestamp()) || + (cudf::is_duration())); } explicit column_to_strings_fn( @@ -189,6 +190,15 @@ struct column_to_strings_fn { return cudf::strings::detail::from_floats(column, stream_, mr_); } + // fixed point: + // + template + std::enable_if_t(), std::unique_ptr> operator()( + column_view const& column) const + { + return cudf::strings::detail::from_fixed_point(column, stream_, mr_); + } + // timestamps: // template diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 6bc08cf24a6..e45b67505ba 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -22,9 +22,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -61,6 +63,16 @@ using table_view = cudf::table_view; auto const temp_env = static_cast( ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); +// Base test fixture for tests +struct CsvWriterTest : public cudf::test::BaseFixture { +}; + +template +struct CsvFixedPointWriterTest : public CsvWriterTest { +}; + +TYPED_TEST_CASE(CsvFixedPointWriterTest, cudf::test::FixedPointTypes); + // Base test fixture for tests struct CsvReaderTest : public cudf::test::BaseFixture { }; @@ -307,6 +319,98 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumn) expect_column_data_equal(std::vector(sequence, sequence + num_rows), view.column(0)); } +TYPED_TEST(CsvFixedPointWriterTest, SingleColumnNegativeScale) +{ + std::vector reference_strings = { + "1.23", "-8.76", "5.43", "-0.12", "0.25", "-0.23", "-0.27", "0.00", "0.00"}; + + auto validity = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return (i % 2 == 0) ? true : false; }); + cudf::test::strings_column_wrapper strings( + reference_strings.begin(), reference_strings.end(), validity); + + std::vector valid_reference_strings; + thrust::copy_if(thrust::host, + reference_strings.begin(), + reference_strings.end(), + thrust::make_counting_iterator(0), + std::back_inserter(valid_reference_strings), + validity.functor()); + reference_strings = valid_reference_strings; + + using DecimalType = TypeParam; + auto input_column = cudf::strings::to_fixed_point( + cudf::strings_column_view(strings), + cudf::data_type{cudf::type_to_id(), numeric::scale_type{-2}}); + + auto input_table = cudf::table_view{std::vector{*input_column}}; + + auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnNegativeScale.csv"; + + cudf_io::csv_writer_options writer_options = + cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table); + + cudf_io::write_csv(writer_options); + + std::vector result_strings; + result_strings.reserve(reference_strings.size()); + + std::ifstream read_result_file(filepath); + assert(read_result_file.is_open()); + + std::copy(std::istream_iterator(read_result_file), + std::istream_iterator(), + std::back_inserter(result_strings)); + + EXPECT_EQ(result_strings, reference_strings); +} + +TYPED_TEST(CsvFixedPointWriterTest, SingleColumnPositiveScale) +{ + std::vector reference_strings = { + "123000", "-876000", "543000", "-12000", "25000", "-23000", "-27000", "0000", "0000"}; + + auto validity = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return (i % 2 == 0) ? true : false; }); + cudf::test::strings_column_wrapper strings( + reference_strings.begin(), reference_strings.end(), validity); + + std::vector valid_reference_strings; + thrust::copy_if(thrust::host, + reference_strings.begin(), + reference_strings.end(), + thrust::make_counting_iterator(0), + std::back_inserter(valid_reference_strings), + validity.functor()); + reference_strings = valid_reference_strings; + + using DecimalType = TypeParam; + auto input_column = cudf::strings::to_fixed_point( + cudf::strings_column_view(strings), + cudf::data_type{cudf::type_to_id(), numeric::scale_type{3}}); + + auto input_table = cudf::table_view{std::vector{*input_column}}; + + auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnPositiveScale.csv"; + + cudf_io::csv_writer_options writer_options = + cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table); + + cudf_io::write_csv(writer_options); + + std::vector result_strings; + result_strings.reserve(reference_strings.size()); + + std::ifstream read_result_file(filepath); + assert(read_result_file.is_open()); + + std::copy(std::istream_iterator(read_result_file), + std::istream_iterator(), + std::back_inserter(result_strings)); + + EXPECT_EQ(result_strings, reference_strings); +} + TEST_F(CsvReaderTest, MultiColumn) { constexpr auto num_rows = 10;