Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support nested types for nth_element reduction #9043

Merged
merged 10 commits into from
Aug 24, 2021
29 changes: 23 additions & 6 deletions cpp/src/reductions/reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cudf/reduction.hpp>
#include <cudf/scalar/scalar_factories.hpp>

#include <cudf/structs/structs_column_view.hpp>
#include <rmm/cuda_stream_view.hpp>

namespace cudf {
Expand Down Expand Up @@ -112,13 +113,29 @@ std::unique_ptr<scalar> reduce(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
std::unique_ptr<scalar> result = make_default_constructed_scalar(output_dtype, stream, mr);
result->set_valid_async(false, stream);

// check if input column is empty
if (col.size() <= col.null_count()) return result;
// Returns default scalar if input column is non-valid. In terms of nested columns, we need to
// handcraft the default scalar with input column.
if (col.size() <= col.null_count()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like we need something like a make_empty_scalar_like(column_view)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. And I created the method make_empty_scalar_like based on the above code.

if (!is_nested(output_dtype)) {
auto result = make_default_constructed_scalar(output_dtype, stream, mr);
result->set_valid_async(false, stream);
return result;
} else if (col.type().id() == type_id::LIST) {
auto result = make_list_scalar(empty_like(col)->view(), stream, mr);
result->set_valid_async(false, stream);
return result;
} else if (col.type().id() == type_id::STRUCT) {
// Struct scalar inputs must have exactly 1 row.
CUDF_EXPECTS(!col.is_empty(), "Can not create empty struct scalar");
auto result = get_element(col, 1, stream, mr);
result->set_valid_async(false, stream);
return result;
} else {
CUDF_FAIL("Unsupported data type for default scalar");
}
}

result =
std::unique_ptr<scalar> result =
aggregation_dispatcher(agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg);
return result;
sperlingxx marked this conversation as resolved.
Show resolved Hide resolved
}
Expand Down
57 changes: 57 additions & 0 deletions cpp/tests/groupby/nth_element_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,5 +405,62 @@ TYPED_TEST(groupby_nth_element_lists_test, EmptyInput)
keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(2));
}

struct groupby_nth_element_structs_test : BaseFixture {
};

TEST_F(groupby_nth_element_structs_test, Basics)
{
using structs = cudf::test::structs_column_wrapper;
using ints = cudf::test::fixed_width_column_wrapper<int>;
using doubles = cudf::test::fixed_width_column_wrapper<double>;
using strings = cudf::test::strings_column_wrapper;

auto keys = ints{0, 0, 0, 1, 1, 1, 2, 2, 2, 3};
auto child0 = ints{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
auto child1 = doubles{0.1, 1.2, 2.3, 3.4, 4.51, 5.3e4, 6.3231, -0.07, 832.1, 9.999};
auto child2 = strings{"", "a", "b", "c", "d", "e", "f", "g", "HH", "JJJ"};
auto values = structs{{child0, child1, child2}, {1, 0, 1, 0, 1, 1, 1, 1, 0, 1}};

auto expected_keys = ints{0, 1, 2, 3};
auto expected_ch0 = ints{1, 4, 7, 0};
auto expected_ch1 = doubles{1.2, 4.51, -0.07, 0.0};
auto expected_ch2 = strings{"a", "d", "g", ""};
auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {0, 1, 1, 0}};
test_single_agg(
keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(1));

expected_keys = ints{0, 1, 2, 3};
expected_ch0 = ints{0, 4, 6, 9};
expected_ch1 = doubles{0.1, 4.51, 6.3231, 9.999};
expected_ch2 = strings{"", "d", "f", "JJJ"};
expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {1, 1, 1, 1}};
test_single_agg(keys,
values,
expected_keys,
expected_values,
cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE));
}

TEST_F(groupby_nth_element_structs_test, EmptyInput)
{
using structs = cudf::test::structs_column_wrapper;
using ints = cudf::test::fixed_width_column_wrapper<int>;
using doubles = cudf::test::fixed_width_column_wrapper<double>;
using strings = cudf::test::strings_column_wrapper;

auto keys = ints{};
auto child0 = ints{};
auto child1 = doubles{};
auto child2 = strings{};
auto values = structs{{child0, child1, child2}};

auto expected_keys = ints{};
auto expected_ch0 = ints{};
auto expected_ch1 = doubles{};
auto expected_ch2 = strings{};
auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}};
test_single_agg(
keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(0));
}
} // namespace test
} // namespace cudf
194 changes: 194 additions & 0 deletions cpp/tests/reductions/reduction_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
#include <cudf/dictionary/encode.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/reduction.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/types.hpp>
#include <cudf/wrappers/timestamps.hpp>
#include <cudf_test/table_utilities.hpp>

#include <thrust/iterator/counting_iterator.h>

Expand Down Expand Up @@ -1872,4 +1874,196 @@ TYPED_TEST(DictionaryReductionTest, Quantile)
output_type);
}

//-------------------------------------------------------------------
sperlingxx marked this conversation as resolved.
Show resolved Hide resolved
struct ListReductionTest : public cudf::test::BaseFixture {
void reduction_test(cudf::column_view const& input_data,
cudf::column_view const& expected_value,
bool succeeded_condition,
bool is_valid,
std::unique_ptr<aggregation> const& agg)
{
auto statement = [&]() {
std::unique_ptr<cudf::scalar> result =
cudf::reduce(input_data, agg, cudf::data_type(cudf::type_id::LIST));
auto list_result = dynamic_cast<cudf::list_scalar*>(result.get());
EXPECT_EQ(is_valid, list_result->is_valid());
if (is_valid) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_value, list_result->view()); }
};

if (succeeded_condition) {
CUDF_EXPECT_NO_THROW(statement());
} else {
EXPECT_ANY_THROW(statement());
}
}
};

TEST_F(ListReductionTest, ListReductionNthElement)
{
using ListCol = cudf::test::lists_column_wrapper<int>;
using ElementCol = cudf::test::fixed_width_column_wrapper<int>;

// test without nulls
ListCol col{{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}};
this->reduction_test(col,
ElementCol{0, 5, -3}, // expected_value,
true,
true,
cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE));

// test with null-include
std::vector<bool> validity{1, 0, 0, 1, 1, 0};
ListCol col_nulls({{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}, validity.begin());
this->reduction_test(col_nulls,
ElementCol{-2}, // expected_value,
true,
true,
cudf::make_nth_element_aggregation(1, cudf::null_policy::EXCLUDE));

// test with null-include
this->reduction_test(col_nulls,
ElementCol{}, // expected_value,
true,
false,
cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE));
}

TEST_F(ListReductionTest, NonValidListReductionNthElement)
{
using ListCol = cudf::test::lists_column_wrapper<int>;
using ElementCol = cudf::test::fixed_width_column_wrapper<int>;

// test against col.size() <= col.null_count()
std::vector<bool> validity{0};
this->reduction_test(ListCol{{{1, 2}}, validity.begin()},
ElementCol{}, // expected_value,
true,
false,
cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE));

// test against empty input
this->reduction_test(ListCol{},
ElementCol{{0}, {0}}, // expected_value,
true,
false,
cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE));
}

//-------------------------------------------------------------------
sperlingxx marked this conversation as resolved.
Show resolved Hide resolved
struct StructReductionTest : public cudf::test::BaseFixture {
using StructCol = cudf::test::structs_column_wrapper;

void reduction_test(StructCol const& struct_column,
cudf::table_view const& expected_value,
bool succeeded_condition,
bool is_valid,
std::unique_ptr<aggregation> const& agg)
{
auto statement = [&]() {
std::unique_ptr<cudf::scalar> result =
cudf::reduce(struct_column, agg, cudf::data_type(cudf::type_id::LIST));
auto struct_result = dynamic_cast<cudf::struct_scalar*>(result.get());
EXPECT_EQ(is_valid, struct_result->is_valid());
if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUAL(expected_value, struct_result->view()); }
};

if (succeeded_condition) {
CUDF_EXPECT_NO_THROW(statement());
} else {
EXPECT_ANY_THROW(statement());
}
}
};

TEST_F(StructReductionTest, StructReductionNthElement)
{
using ChildCol = cudf::test::fixed_width_column_wrapper<int>;

// test without nulls
auto child0 = *ChildCol{-3, 2, 1, 0, 5, -3, -2, 28}.release();
auto child1 = *ChildCol{0, 1, 2, 3, 4, 5, 6, 7}.release();
auto child2 =
*ChildCol{{-10, 10, -100, 100, -1000, 1000, -10000, 10000}, {1, 0, 0, 1, 1, 1, 0, 1}}.release();
std::vector<std::unique_ptr<cudf::column>> input_vector;
input_vector.push_back(std::make_unique<cudf::column>(child0));
input_vector.push_back(std::make_unique<cudf::column>(child1));
input_vector.push_back(std::make_unique<cudf::column>(child2));
StructCol struct_col(std::move(input_vector));
auto result_col0 = ChildCol{1};
auto result_col1 = ChildCol{2};
auto result_col2 = ChildCol{{0}, {0}};
this->reduction_test(
struct_col,
cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value,
true,
true,
cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE));

// test with null-include
std::vector<std::unique_ptr<cudf::column>> input_vector_null_include;
input_vector_null_include.push_back(std::make_unique<cudf::column>(child0));
input_vector_null_include.push_back(std::make_unique<cudf::column>(child1));
input_vector_null_include.push_back(std::make_unique<cudf::column>(child2));
std::vector<bool> validity{1, 1, 1, 0, 1, 0, 0, 1};
StructCol struct_col_null_include(std::move(input_vector_null_include), validity);
result_col0 = ChildCol{{0}, {0}};
result_col1 = ChildCol{{0}, {0}};
result_col2 = ChildCol{{0}, {0}};
this->reduction_test(
struct_col_null_include,
cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value,
true,
false,
cudf::make_nth_element_aggregation(6, cudf::null_policy::INCLUDE));

// test with null-exclude
std::vector<std::unique_ptr<cudf::column>> input_vector_null_exclude;
input_vector_null_exclude.push_back(std::make_unique<cudf::column>(child0));
input_vector_null_exclude.push_back(std::make_unique<cudf::column>(child1));
input_vector_null_exclude.push_back(std::make_unique<cudf::column>(child2));
StructCol struct_col_with_null_exclude(std::move(input_vector_null_exclude), validity);
result_col0 = ChildCol{{28}, {1}};
result_col1 = ChildCol{{7}, {1}};
result_col2 = ChildCol{{10000}, {1}};
this->reduction_test(
struct_col_with_null_exclude,
cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value,
true,
true,
cudf::make_nth_element_aggregation(4, cudf::null_policy::EXCLUDE));
}

TEST_F(StructReductionTest, NonValidStructReductionNthElement)
{
using ChildCol = cudf::test::fixed_width_column_wrapper<int>;

// test against col.size() <= col.null_count()
auto child0 = ChildCol{-3, 3};
auto child1 = ChildCol{0, 0};
auto child2 = ChildCol{{-10, 10}, {0, 1}};
auto struct_col = StructCol{{child0, child1, child2}, {0, 0}};
auto ret_col0 = ChildCol{{0}, {0}};
auto ret_col1 = ChildCol{{0}, {0}};
auto ret_col2 = ChildCol{{0}, {0}};
this->reduction_test(struct_col,
cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value,
true,
false,
cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE));

// test against empty input (would fail because we can not create empty struct scalar)
child0 = ChildCol{};
child1 = ChildCol{};
child2 = ChildCol{};
struct_col = StructCol{{child0, child1, child2}};
ret_col0 = ChildCol{};
ret_col1 = ChildCol{};
ret_col2 = ChildCol{};
this->reduction_test(struct_col,
cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value,
false,
false,
cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE));
}

CUDF_TEST_PROGRAM_MAIN()