Skip to content

Commit

Permalink
Added decimal writing for CSV writer (#8296)
Browse files Browse the repository at this point in the history
Addresses #7110 

column_to_strings_fn was specialized for fixed point type to enable support for csv writer. A test was added to validate output file created by csv writer for decimal type column.

Authors:
  - Kumar Aatish (https://github.com/kaatish)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Devavret Makkar (https://github.com/devavret)

URL: #8296
  • Loading branch information
kaatish authored May 21, 2021
1 parent 5c6b92a commit de579a5
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 1 deletion.
12 changes: 11 additions & 1 deletion cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ struct column_to_strings_fn {
return not((std::is_same<column_type, cudf::string_view>::value) ||
(std::is_integral<column_type>::value) ||
(std::is_floating_point<column_type>::value) ||
(cudf::is_timestamp<column_type>()) || (cudf::is_duration<column_type>()));
(cudf::is_fixed_point<column_type>()) || (cudf::is_timestamp<column_type>()) ||
(cudf::is_duration<column_type>()));
}

explicit column_to_strings_fn(
Expand Down Expand Up @@ -189,6 +190,15 @@ struct column_to_strings_fn {
return cudf::strings::detail::from_floats(column, stream_, mr_);
}

// fixed point:
//
template <typename column_type>
std::enable_if_t<cudf::is_fixed_point<column_type>(), std::unique_ptr<column>> operator()(
column_view const& column) const
{
return cudf::strings::detail::from_fixed_point(column, stream_, mr_);
}

// timestamps:
//
template <typename column_type>
Expand Down
104 changes: 104 additions & 0 deletions cpp/tests/io/csv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
#include <cudf_test/type_lists.hpp>

#include <cudf/detail/iterator.cuh>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/io/csv.hpp>
#include <cudf/io/datasource.hpp>
#include <cudf/strings/convert/convert_datetime.hpp>
#include <cudf/strings/convert/convert_fixed_point.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
Expand Down Expand Up @@ -61,6 +63,16 @@ using table_view = cudf::table_view;
auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));

// Base test fixture for tests
struct CsvWriterTest : public cudf::test::BaseFixture {
};

template <typename T>
struct CsvFixedPointWriterTest : public CsvWriterTest {
};

TYPED_TEST_CASE(CsvFixedPointWriterTest, cudf::test::FixedPointTypes);

// Base test fixture for tests
struct CsvReaderTest : public cudf::test::BaseFixture {
};
Expand Down Expand Up @@ -307,6 +319,98 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumn)
expect_column_data_equal(std::vector<TypeParam>(sequence, sequence + num_rows), view.column(0));
}

TYPED_TEST(CsvFixedPointWriterTest, SingleColumnNegativeScale)
{
std::vector<std::string> reference_strings = {
"1.23", "-8.76", "5.43", "-0.12", "0.25", "-0.23", "-0.27", "0.00", "0.00"};

auto validity = cudf::detail::make_counting_transform_iterator(
0, [](auto i) { return (i % 2 == 0) ? true : false; });
cudf::test::strings_column_wrapper strings(
reference_strings.begin(), reference_strings.end(), validity);

std::vector<std::string> valid_reference_strings;
thrust::copy_if(thrust::host,
reference_strings.begin(),
reference_strings.end(),
thrust::make_counting_iterator(0),
std::back_inserter(valid_reference_strings),
validity.functor());
reference_strings = valid_reference_strings;

using DecimalType = TypeParam;
auto input_column = cudf::strings::to_fixed_point(
cudf::strings_column_view(strings),
cudf::data_type{cudf::type_to_id<DecimalType>(), numeric::scale_type{-2}});

auto input_table = cudf::table_view{std::vector<cudf::column_view>{*input_column}};

auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnNegativeScale.csv";

cudf_io::csv_writer_options writer_options =
cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table);

cudf_io::write_csv(writer_options);

std::vector<std::string> result_strings;
result_strings.reserve(reference_strings.size());

std::ifstream read_result_file(filepath);
assert(read_result_file.is_open());

std::copy(std::istream_iterator<std::string>(read_result_file),
std::istream_iterator<std::string>(),
std::back_inserter(result_strings));

EXPECT_EQ(result_strings, reference_strings);
}

TYPED_TEST(CsvFixedPointWriterTest, SingleColumnPositiveScale)
{
std::vector<std::string> reference_strings = {
"123000", "-876000", "543000", "-12000", "25000", "-23000", "-27000", "0000", "0000"};

auto validity = cudf::detail::make_counting_transform_iterator(
0, [](auto i) { return (i % 2 == 0) ? true : false; });
cudf::test::strings_column_wrapper strings(
reference_strings.begin(), reference_strings.end(), validity);

std::vector<std::string> valid_reference_strings;
thrust::copy_if(thrust::host,
reference_strings.begin(),
reference_strings.end(),
thrust::make_counting_iterator(0),
std::back_inserter(valid_reference_strings),
validity.functor());
reference_strings = valid_reference_strings;

using DecimalType = TypeParam;
auto input_column = cudf::strings::to_fixed_point(
cudf::strings_column_view(strings),
cudf::data_type{cudf::type_to_id<DecimalType>(), numeric::scale_type{3}});

auto input_table = cudf::table_view{std::vector<cudf::column_view>{*input_column}};

auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnPositiveScale.csv";

cudf_io::csv_writer_options writer_options =
cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table);

cudf_io::write_csv(writer_options);

std::vector<std::string> result_strings;
result_strings.reserve(reference_strings.size());

std::ifstream read_result_file(filepath);
assert(read_result_file.is_open());

std::copy(std::istream_iterator<std::string>(read_result_file),
std::istream_iterator<std::string>(),
std::back_inserter(result_strings));

EXPECT_EQ(result_strings, reference_strings);
}

TEST_F(CsvReaderTest, MultiColumn)
{
constexpr auto num_rows = 10;
Expand Down

0 comments on commit de579a5

Please sign in to comment.