Skip to content

Commit

Permalink
Merge pull request #2859 from rgsl888prabhu/2495_tail_method_fail_on_…
Browse files Browse the repository at this point in the history
…string

[REVIEW] Fix tail method issue for string columns
  • Loading branch information
Keith Kraus authored Sep 28, 2019
2 parents 5efdfc2 + 039f3dd commit 989190f
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
- PR #2809 Add hash_df and group_split dispatch functions for dask
- PR #2843 Parquet reader: fix skip_rows when not aligned with page or row_group boundaries
- PR #2851 Deleted existing dask-cudf/record.txt
- PR #2859 Fix tail method issue for string columns
- PR #2852 Fixed `cumsum()` and `cumprod()` on boolean series.
- PR #2750 Fixed casting values to cudf::bool8 so non-zero values always cast to true
- PR #2873 Fixed dask_cudf read_partition bug by generating ParquetDatasetPiece
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1847,6 +1847,13 @@ def check_frame_series_equality(left, right):
check_frame_series_equality(gdf["a"].tail(-2), gdf["a"][2:])


def test_tail_for_string():
gdf = DataFrame()
gdf["id"] = Series(["a", "b"], dtype=np.object)
gdf["v"] = Series([1, 2])
assert_eq(gdf.tail(3), gdf.to_pandas().tail(3))


@pytest.mark.parametrize("drop", [True, False])
def test_reset_index(pdf, gdf, drop):
assert_eq(pdf.reset_index(drop=drop), gdf.reset_index(drop=drop))
Expand Down
7 changes: 1 addition & 6 deletions python/nvstrings/nvstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,7 @@ def __getitem__(self, key):
if isinstance(key, int):
return self.gather([key])
if isinstance(key, slice):
start = 0 if key.start is None else key.start
end = self.size() if key.stop is None else key.stop
step = 1 if key.step is None or key.step == 0 else key.step
# negative slicing check
end = self.size() + end if end < 0 else end
start = self.size() + start if start < 0 else start
start, end, step = key.indices(self.size())
rtn = pyniNVStrings.n_sublist(self.m_cptr, start, end, step)
if rtn is not None:
rtn = nvstrings(rtn)
Expand Down
9 changes: 9 additions & 0 deletions python/nvstrings/tests/test_array.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Copyright (c) 2018-2019, NVIDIA CORPORATION.

from utils import assert_eq

import nvstrings


Expand Down Expand Up @@ -44,3 +46,10 @@ def test_scalar_scatter():
got = s1.scalar_scatter("+", [1, 3], 2)
expected = ["a", "+", "c", "+"]
assert got.to_host() == expected


def test_slice_negative_start():
strs = nvstrings.to_device(["a", "b"])
got = strs[slice(-3, None, None)]
expected = ["a", "b"]
assert_eq(got, expected)

0 comments on commit 989190f

Please sign in to comment.