Skip to content

Commit

Permalink
add pytest for groupby.diff
Browse files Browse the repository at this point in the history
  • Loading branch information
karthikeyann committed Oct 15, 2021
1 parent 575ef12 commit ee30e27
Showing 1 changed file with 83 additions and 0 deletions.
83 changes: 83 additions & 0 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1913,6 +1913,89 @@ def test_groupby_shift_row_zero_shift(nelem, fill_value):
)


@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
@pytest.mark.parametrize("direction", [1, -1])
def test_groupby_diff_row(nelem, shift_perc, direction):
pdf = make_frame(pd.DataFrame, nelem=nelem, extra_vals=["val2"])
gdf = cudf.from_pandas(pdf)
n_shift = int(nelem * shift_perc) * direction

expected = pdf.groupby(["x", "y"]).diff(periods=n_shift)
got = gdf.groupby(["x", "y"]).diff(periods=n_shift)

assert_groupby_results_equal(
expected[["val", "val2"]], got[["val", "val2"]]
)


@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
@pytest.mark.parametrize("direction", [1, -1])
def test_groupby_diff_row_mixed_numerics(nelem, shift_perc, direction):
t = rand_dataframe(
dtypes_meta=[
{"dtype": "int64", "null_frequency": 0, "cardinality": 10},
{"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
{"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
{"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
{
"dtype": "datetime64[ns]",
"null_frequency": 0.4,
"cardinality": 10,
},
{
"dtype": "timedelta64[ns]",
"null_frequency": 0.4,
"cardinality": 10,
},
],
rows=nelem,
use_threads=False,
)
pdf = t.to_pandas()
gdf = cudf.from_pandas(pdf)
n_shift = int(nelem * shift_perc) * direction

expected = pdf.groupby(["0"]).diff(periods=n_shift)
got = gdf.groupby(["0"]).diff(periods=n_shift)

assert_groupby_results_equal(
expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
)


@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
def test_groupby_diff_row_zero_shift(nelem):
t = rand_dataframe(
dtypes_meta=[
{"dtype": "int64", "null_frequency": 0, "cardinality": 10},
{"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
{"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
{
"dtype": "datetime64[ns]",
"null_frequency": 0.4,
"cardinality": 10,
},
{
"dtype": "timedelta64[ns]",
"null_frequency": 0.4,
"cardinality": 10,
},
],
rows=nelem,
use_threads=False,
)
gdf = cudf.from_pandas(t.to_pandas())

expected = gdf
got = gdf.groupby(["0"]).shift(periods=0)

assert_groupby_results_equal(
expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
)


# TODO: test for category columns when cudf.Scalar supports category type
@pytest.mark.parametrize("nelem", [10, 100, 1000])
def test_groupby_fillna_multi_value(nelem):
Expand Down

0 comments on commit ee30e27

Please sign in to comment.