From 051c68252b081a307c147ab129d659c7661e44df Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 21 Oct 2021 10:54:13 -0700 Subject: [PATCH 01/18] create new PR --- cpp/src/datetime/datetime_ops.cu | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 6e892b3e461..677bbe906ad 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -116,6 +116,34 @@ struct ceil_timestamp { } }; +struct floor_timestamp { + template + CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const + { + using namespace cuda::std::chrono; + // want to use this with D, H, T (minute), S, L (millisecond), U + switch (COMPONENT) { + case datetime_component::DAY: + return time_point_cast(floor(ts)); + case datetime_component::HOUR: + return time_point_cast(floor(ts)); + case datetime_component::MINUTE: + return time_point_cast(floor(ts)); + case datetime_component::SECOND: + return time_point_cast(floor(ts)); + case datetime_component::MILLISECOND: + return time_point_cast(floor(ts)); + case datetime_component::MICROSECOND: + return time_point_cast(floor(ts)); + case datetime_component::NANOSECOND: + return time_point_cast(floor(ts)); + default: cudf_assert(false && "Unexpected resolution"); + } + + return {}; + } +}; + // Number of days until month indexed by leap year and month (0-based index) static __device__ int16_t const days_until_month[2][13] = { {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, // For non leap years From d75947b4f6f18415e272f952cd442ccbba455061 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 21 Oct 2021 19:02:53 -0700 Subject: [PATCH 02/18] added floor_general and floor_timestamp methods --- cpp/src/datetime/datetime_ops.cu | 69 +++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 677bbe906ad..7cbacbd0a14 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -116,6 +116,7 @@ struct ceil_timestamp { } }; +template struct floor_timestamp { template CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const @@ -224,7 +225,7 @@ struct is_leap_year_op { // Specific function for applying ceil/floor date ops template -struct dispatch_ceil { +struct dispatch_ceil_or_floor { template std::enable_if_t(), std::unique_ptr> operator()( cudf::column_view const& column, @@ -431,7 +432,19 @@ std::unique_ptr ceil_general(column_view const& column, rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher( - column.type(), dispatch_ceil>{}, column, stream, mr); + column.type(), dispatch_ceil_or_floor>{}, column, stream, mr); +} + +template +std::unique_ptr floor_general(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return cudf::type_dispatcher(column.type(), + dispatch_ceil_or_floor>{}, + column, + stream, + mr); } std::unique_ptr extract_year(column_view const& column, @@ -588,6 +601,58 @@ std::unique_ptr ceil_nanosecond(column_view const& column, column, rmm::cuda_stream_default, mr); } +std::unique_ptr floor_day(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_second(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_millisecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_microsecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_nanosecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); From ded37c151b50c6583ddd65ab9f33a3a748c1818f Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 22 Oct 2021 04:01:20 +0000 Subject: [PATCH 03/18] Added initial cython bindings --- python/cudf/cudf/_lib/cpp/datetime.pxd | 13 +++++++++++ python/cudf/cudf/_lib/datetime.pyx | 26 ++++++++++++++++++++++ python/cudf/cudf/core/column/datetime.py | 3 +++ python/cudf/cudf/core/series.py | 7 ++++++ python/cudf/cudf/tests/test_datetime.py | 28 ++++++++++++++++++++++++ 5 files changed, 77 insertions(+) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index c27eb324008..3a6b2e8ed19 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -23,6 +23,19 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: const column_view& column ) except + cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except + + + cdef unique_ptr[column] floor_day(const column_view& column) except + + cdef unique_ptr[column] floor_hour(const column_view& column) except + + cdef unique_ptr[column] floor_minute(const column_view& column) except + + cdef unique_ptr[column] floor_second(const column_view& column) except + + cdef unique_ptr[column] floor_millisecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_microsecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_nanosecond(const column_view& column) except + + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 4921d1b4ace..766e9faa520 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -85,6 +85,32 @@ def ceil_datetime(Column col, object field): result = Column.from_unique_ptr(move(c_result)) return result +def floor_datetime(Column col, object field): + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + # https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html + if field == "D": + c_result = move(libcudf_datetime.floor_day(col_view)) + elif field == "H": + c_result = move(libcudf_datetime.floor_hour(col_view)) + elif field == "T": + c_result = move(libcudf_datetime.floor_minute(col_view)) + elif field == "S": + c_result = move(libcudf_datetime.floor_second(col_view)) + elif field == "L": + c_result = move(libcudf_datetime.floor_millisecond(col_view)) + elif field == "U": + c_result = move(libcudf_datetime.floor_microsecond(col_view)) + elif field == "N": + c_result = move(libcudf_datetime.floor_nanosecond(col_view)) + else: + raise ValueError(f"Invalid resolution: '{field}'") + + result = Column.from_unique_ptr(move(c_result)) + return result + def is_leap_year(Column col): """Returns a boolean indicator whether the year of the date is a leap year diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b1d69316863..159bcadc702 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -224,6 +224,9 @@ def get_dt_field(self, field: str) -> ColumnBase: def ceil(self, field: str) -> ColumnBase: return libcudf.datetime.ceil_datetime(self, field) + + def floor(self, field: str) -> ColumnBase: + return libcudf.datetime.floor_datetime(self, field) def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, cudf.Scalar): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index c8d8837cbaa..fa8519cd027 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5019,6 +5019,13 @@ def ceil(self, field): return Series( data=out_column, index=self.series._index, name=self.series.name ) + + def floor(self, field): + out_column = self.series._column.floor(field) + + return Series( + data=out_column, index=self.series._index, name=self.series.name + ) def strftime(self, date_format, *args, **kwargs): """ diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 3bbac217283..e99446bfe0f 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1631,3 +1631,31 @@ def test_ceil(data, time_type, resolution): expect = ps.dt.ceil(resolution) got = gs.dt.ceil(resolution) assert_eq(expect, got) + +@pytest.mark.parametrize( + "data", + [ + ( + [ + "2020-05-31 08:00:00", + "1999-12-31 18:40:10", + "2000-12-31 04:00:05", + "1900-02-28 07:00:06", + "1800-03-14 07:30:20", + "2100-03-14 07:30:20", + "1970-01-01 00:00:09", + "1969-12-31 12:59:10", + ] + ) + ], +) +@pytest.mark.parametrize("time_type", DATETIME_TYPES) +@pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) +def test_floor(data, time_type, resolution): + + ps = pd.Series(data, dtype=time_type) + gs = cudf.from_pandas(ps) + + expect = ps.dt.floor(resolution) + got = gs.dt.floor(resolution) + assert_eq(expect, got) \ No newline at end of file From 58487a90e8fd9fdc04a14e39dc2bac5d1412a0fc Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 22 Oct 2021 04:33:05 +0000 Subject: [PATCH 04/18] fixed space issue --- python/cudf/cudf/tests/test_datetime.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index e99446bfe0f..12d38251a30 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1658,4 +1658,5 @@ def test_floor(data, time_type, resolution): expect = ps.dt.floor(resolution) got = gs.dt.floor(resolution) - assert_eq(expect, got) \ No newline at end of file + assert_eq(expect, got) + \ No newline at end of file From 1ee42eeb59e43dc05d5aaba7422d7422dc6b7876 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Thu, 21 Oct 2021 23:08:53 -0700 Subject: [PATCH 05/18] adding tests in libcudf --- cpp/tests/datetime/datetime_ops_test.cpp | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 1d3e87279e5..fc7940149a0 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -827,4 +827,30 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } +TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) +{ + using T = TypeParam; + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + // Time in seconds since epoch + // Dates converted using epochconverter.com + auto timestamps_s = + cudf::test::fixed_width_column_wrapper{{ + 978307485L, // 2001-01-01 00:04:45 GMT + 978307498L, // 2001-01-01 00:04:58 GMT + 978307504L, // 2001-01-01 00:05:04 GMT + }}; + + auto expected_minute = + cudf::test::fixed_width_column_wrapper{{ + 978307500L, // 2001-01-01 00:05:00 GMT + 978307500L, // 2001-01-01 00:05:00 GMT + 978307560L, // 2001-01-01 00:06:00 GMT + }}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(timestamps_s), expected_minute); +} + CUDF_TEST_PROGRAM_MAIN() From 0f55eb4dcb2d80319714786d22b279973a45ba0f Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 22 Oct 2021 13:35:28 +0000 Subject: [PATCH 06/18] Fixed style issues --- python/cudf/cudf/_lib/cpp/datetime.pxd | 10 ++++++---- python/cudf/cudf/_lib/datetime.pyx | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 3a6b2e8ed19..c437041ceec 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -22,8 +22,9 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] ceil_microsecond( const column_view& column ) except + - cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except + - + cdef unique_ptr[column] ceil_nanosecond( + const column_view& column + ) except + cdef unique_ptr[column] floor_day(const column_view& column) except + cdef unique_ptr[column] floor_hour(const column_view& column) except + cdef unique_ptr[column] floor_minute(const column_view& column) except + @@ -34,8 +35,9 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] floor_microsecond( const column_view& column ) except + - cdef unique_ptr[column] floor_nanosecond(const column_view& column) except + - + cdef unique_ptr[column] floor_nanosecond( + const column_view& column + ) except + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 766e9faa520..580e55a4308 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -85,6 +85,7 @@ def ceil_datetime(Column col, object field): result = Column.from_unique_ptr(move(c_result)) return result + def floor_datetime(Column col, object field): cdef unique_ptr[column] c_result cdef column_view col_view = col.view() From 2440210790c2ebd79161e695a73d871a89e12147 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 22 Oct 2021 13:40:27 +0000 Subject: [PATCH 07/18] fixed more style issues --- python/cudf/cudf/_lib/cpp/datetime.pxd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index c437041ceec..71064a555c9 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -24,7 +24,7 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: ) except + cdef unique_ptr[column] ceil_nanosecond( const column_view& column - ) except + + ) except + cdef unique_ptr[column] floor_day(const column_view& column) except + cdef unique_ptr[column] floor_hour(const column_view& column) except + cdef unique_ptr[column] floor_minute(const column_view& column) except + @@ -37,7 +37,7 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: ) except + cdef unique_ptr[column] floor_nanosecond( const column_view& column - ) except + + ) except + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months From 9a6458e35540709909c4856238dc267b68b00d45 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 22 Oct 2021 19:00:57 +0000 Subject: [PATCH 08/18] fixed issue with black formatting --- python/cudf/cudf/core/column/datetime.py | 5 ++- python/cudf/cudf/core/series.py | 46 ++++++++++++++++-------- python/cudf/cudf/tests/test_datetime.py | 17 ++++++--- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 159bcadc702..7492c127a67 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -224,7 +224,7 @@ def get_dt_field(self, field: str) -> ColumnBase: def ceil(self, field: str) -> ColumnBase: return libcudf.datetime.ceil_datetime(self, field) - + def floor(self, field: str) -> ColumnBase: return libcudf.datetime.floor_datetime(self, field) @@ -350,8 +350,7 @@ def as_string_column( ) def _default_na_value(self) -> DatetimeLikeScalar: - """Returns the default NA value for this column - """ + """Returns the default NA value for this column""" return np.datetime64("nat", self.time_unit) def mean(self, skipna=None, dtype=np.float64) -> ScalarLike: diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index fa8519cd027..e5524ad4ec9 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -85,7 +85,7 @@ def _append_new_row_inplace(col: ColumnLike, value: ScalarLike): """Append a scalar `value` to the end of `col` inplace. - Cast to common type if possible + Cast to common type if possible """ to_type = find_common_type([type(value), col.dtype]) val_col = as_column(value, dtype=to_type) @@ -392,7 +392,12 @@ def from_masked_array(cls, data, mask, null_count=None): return cls(data=col) def __init__( - self, data=None, index=None, dtype=None, name=None, nan_as_null=True, + self, + data=None, + index=None, + dtype=None, + name=None, + nan_as_null=True, ): if isinstance(data, pd.Series): if name is None: @@ -1363,7 +1368,8 @@ def _binaryop( operands = lhs._make_operands_for_binop(other, fill_value, reflect) return ( lhs._from_data( - data=lhs._colwise_binop(operands, fn), index=lhs._index, + data=lhs._colwise_binop(operands, fn), + index=lhs._index, ) if operands is not NotImplemented else NotImplemented @@ -2120,8 +2126,7 @@ def data(self): @property def index(self): - """The index object - """ + """The index object""" return self._index @index.setter @@ -2130,8 +2135,7 @@ def index(self, _index): @property def nullmask(self): - """The gpu buffer for the null-mask - """ + """The gpu buffer for the null-mask""" return cudf.Series(self._column.nullmask) def as_mask(self): @@ -3649,7 +3653,10 @@ def quantile( index = np.asarray(q) if len(self) == 0: result = column_empty_like( - index, dtype=self.dtype, masked=True, newsize=len(index), + index, + dtype=self.dtype, + masked=True, + newsize=len(index), ) else: index = None @@ -3702,7 +3709,10 @@ def _describe_numeric(self): data = _format_stats_values(data) return Series( - data=data, index=index, nan_as_null=False, name=self.name, + data=data, + index=index, + nan_as_null=False, + name=self.name, ) def _describe_timedelta(self): @@ -4693,7 +4703,9 @@ def quarter(self): np.int8 ) return Series._from_data( - {None: res}, index=self.series._index, name=self.series.name, + {None: res}, + index=self.series._index, + name=self.series.name, ) def isocalendar(self): @@ -4886,7 +4898,9 @@ def is_quarter_start(self): result = ((day == cudf.Scalar(1)) & first_month).fillna(False) return Series._from_data( - {None: result}, index=self.series._index, name=self.series.name, + {None: result}, + index=self.series._index, + name=self.series.name, ) @property @@ -4934,7 +4948,9 @@ def is_quarter_end(self): result = ((day == last_day) & last_month).fillna(False) return Series._from_data( - {None: result}, index=self.series._index, name=self.series.name, + {None: result}, + index=self.series._index, + name=self.series.name, ) @property @@ -5004,7 +5020,9 @@ def is_year_end(self): result = cudf._lib.copying.copy_if_else(leap, non_leap, leap_dates) result = result.fillna(False) return Series._from_data( - {None: result}, index=self.series._index, name=self.series.name, + {None: result}, + index=self.series._index, + name=self.series.name, ) def _get_dt_field(self, field): @@ -5019,7 +5037,7 @@ def ceil(self, field): return Series( data=out_column, index=self.series._index, name=self.series.name ) - + def floor(self, field): out_column = self.series._column.floor(field) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 12d38251a30..1243d807319 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -580,7 +580,11 @@ def test_datetime_dataframe(): dtype="datetime64[ns]", freq=None, ), - pd.DatetimeIndex([], dtype="datetime64[ns]", freq=None,), + pd.DatetimeIndex( + [], + dtype="datetime64[ns]", + freq=None, + ), pd.Series([1, 2, 3]).astype("datetime64[ns]"), pd.Series([1, 2, 3]).astype("datetime64[us]"), pd.Series([1, 2, 3]).astype("datetime64[ms]"), @@ -681,7 +685,11 @@ def test_to_datetime_not_implemented(): pd.Series([0, 1, -1]), pd.Series([0, 1, -1, 100, 200, 47637]), [10, 12, 1200, 15003], - pd.DatetimeIndex([], dtype="datetime64[ns]", freq=None,), + pd.DatetimeIndex( + [], + dtype="datetime64[ns]", + freq=None, + ), pd.Index([1, 2, 3, 4]), ], ) @@ -941,7 +949,8 @@ def test_datetime_subtract(data, other, data_dtype, other_dtype): ) @pytest.mark.parametrize("dtype", DATETIME_TYPES) @pytest.mark.parametrize( - "op", ["add", "sub"], + "op", + ["add", "sub"], ) def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op): gsr = cudf.Series(data=data, dtype=dtype) @@ -1632,6 +1641,7 @@ def test_ceil(data, time_type, resolution): got = gs.dt.ceil(resolution) assert_eq(expect, got) + @pytest.mark.parametrize( "data", [ @@ -1659,4 +1669,3 @@ def test_floor(data, time_type, resolution): expect = ps.dt.floor(resolution) got = gs.dt.floor(resolution) assert_eq(expect, got) - \ No newline at end of file From 4862ce4177ac985a49bd1b10cd266b9320014ca7 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Mon, 25 Oct 2021 16:33:37 +0000 Subject: [PATCH 09/18] black formatting --- python/cudf/cudf/core/series.py | 36 ++++++------------------- python/cudf/cudf/tests/test_datetime.py | 15 +++-------- 2 files changed, 11 insertions(+), 40 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e5524ad4ec9..e70157eb606 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -392,12 +392,7 @@ def from_masked_array(cls, data, mask, null_count=None): return cls(data=col) def __init__( - self, - data=None, - index=None, - dtype=None, - name=None, - nan_as_null=True, + self, data=None, index=None, dtype=None, name=None, nan_as_null=True, ): if isinstance(data, pd.Series): if name is None: @@ -1368,8 +1363,7 @@ def _binaryop( operands = lhs._make_operands_for_binop(other, fill_value, reflect) return ( lhs._from_data( - data=lhs._colwise_binop(operands, fn), - index=lhs._index, + data=lhs._colwise_binop(operands, fn), index=lhs._index, ) if operands is not NotImplemented else NotImplemented @@ -3653,10 +3647,7 @@ def quantile( index = np.asarray(q) if len(self) == 0: result = column_empty_like( - index, - dtype=self.dtype, - masked=True, - newsize=len(index), + index, dtype=self.dtype, masked=True, newsize=len(index), ) else: index = None @@ -3709,10 +3700,7 @@ def _describe_numeric(self): data = _format_stats_values(data) return Series( - data=data, - index=index, - nan_as_null=False, - name=self.name, + data=data, index=index, nan_as_null=False, name=self.name, ) def _describe_timedelta(self): @@ -4703,9 +4691,7 @@ def quarter(self): np.int8 ) return Series._from_data( - {None: res}, - index=self.series._index, - name=self.series.name, + {None: res}, index=self.series._index, name=self.series.name, ) def isocalendar(self): @@ -4898,9 +4884,7 @@ def is_quarter_start(self): result = ((day == cudf.Scalar(1)) & first_month).fillna(False) return Series._from_data( - {None: result}, - index=self.series._index, - name=self.series.name, + {None: result}, index=self.series._index, name=self.series.name, ) @property @@ -4948,9 +4932,7 @@ def is_quarter_end(self): result = ((day == last_day) & last_month).fillna(False) return Series._from_data( - {None: result}, - index=self.series._index, - name=self.series.name, + {None: result}, index=self.series._index, name=self.series.name, ) @property @@ -5020,9 +5002,7 @@ def is_year_end(self): result = cudf._lib.copying.copy_if_else(leap, non_leap, leap_dates) result = result.fillna(False) return Series._from_data( - {None: result}, - index=self.series._index, - name=self.series.name, + {None: result}, index=self.series._index, name=self.series.name, ) def _get_dt_field(self, field): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 1243d807319..000f9f6d117 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -580,11 +580,7 @@ def test_datetime_dataframe(): dtype="datetime64[ns]", freq=None, ), - pd.DatetimeIndex( - [], - dtype="datetime64[ns]", - freq=None, - ), + pd.DatetimeIndex([], dtype="datetime64[ns]", freq=None,), pd.Series([1, 2, 3]).astype("datetime64[ns]"), pd.Series([1, 2, 3]).astype("datetime64[us]"), pd.Series([1, 2, 3]).astype("datetime64[ms]"), @@ -685,11 +681,7 @@ def test_to_datetime_not_implemented(): pd.Series([0, 1, -1]), pd.Series([0, 1, -1, 100, 200, 47637]), [10, 12, 1200, 15003], - pd.DatetimeIndex( - [], - dtype="datetime64[ns]", - freq=None, - ), + pd.DatetimeIndex([], dtype="datetime64[ns]", freq=None,), pd.Index([1, 2, 3, 4]), ], ) @@ -949,8 +941,7 @@ def test_datetime_subtract(data, other, data_dtype, other_dtype): ) @pytest.mark.parametrize("dtype", DATETIME_TYPES) @pytest.mark.parametrize( - "op", - ["add", "sub"], + "op", ["add", "sub"], ) def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op): gsr = cudf.Series(data=data, dtype=dtype) From de4966033dfbd45a33eb0b79820dc26fdb8569aa Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 25 Oct 2021 10:51:27 -0700 Subject: [PATCH 10/18] changing tests to resemble TestCeilDatetime format --- cpp/tests/datetime/datetime_ops_test.cpp | 69 ++++++++++++++++++------ 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index fc7940149a0..4390ba90cac 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -834,23 +834,58 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) using namespace cudf::datetime; using namespace cuda::std::chrono; - // Time in seconds since epoch - // Dates converted using epochconverter.com - auto timestamps_s = - cudf::test::fixed_width_column_wrapper{{ - 978307485L, // 2001-01-01 00:04:45 GMT - 978307498L, // 2001-01-01 00:04:58 GMT - 978307504L, // 2001-01-01 00:05:04 GMT - }}; - - auto expected_minute = - cudf::test::fixed_width_column_wrapper{{ - 978307500L, // 2001-01-01 00:05:00 GMT - 978307500L, // 2001-01-01 00:05:00 GMT - 978307560L, // 2001-01-01 00:06:00 GMT - }}; - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(timestamps_s), expected_minute); + auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT + auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT + + auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop_)); + + auto host_val = to_host(input); + thrust::host_vector timestamps = host_val.first; + + + thrust::host_vector floored_day(timestamps.size()); + thrust::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_day = + fixed_width_column_wrapper(floored_day.begin(), floored_day.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day); + + + thrust::host_vector floored_hour(timestamps.size()); + thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_hour = fixed_width_column_wrapper(floored_hour.begin(), + floored_hour.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); + + + std::vector floored_minute(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_minute = fixed_width_column_wrapper( + floored_minute.begin(), floored_minute.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); + + + std::vector floored_second(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_second = fixed_width_column_wrapper( + floored_second.begin(), floored_second.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); + + + std::vector floored_millisecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_millisecond = fixed_width_column_wrapper( + floored_millisecond.begin(), floored_millisecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); } CUDF_TEST_PROGRAM_MAIN() From 9cbb75e68d29c73d464d3ff748d1eefe8c418c76 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 25 Oct 2021 12:17:14 -0700 Subject: [PATCH 11/18] . --- cpp/tests/datetime/datetime_ops_test.cpp | 68 +++++++++++------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 4390ba90cac..d60b533963f 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -842,7 +842,6 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) auto host_val = to_host(input); thrust::host_vector timestamps = host_val.first; - thrust::host_vector floored_day(timestamps.size()); thrust::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) { return time_point_cast(floor(i)); @@ -851,41 +850,38 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) fixed_width_column_wrapper(floored_day.begin(), floored_day.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day); - - thrust::host_vector floored_hour(timestamps.size()); - thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_hour = fixed_width_column_wrapper(floored_hour.begin(), - floored_hour.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); - - - std::vector floored_minute(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_minute = fixed_width_column_wrapper( - floored_minute.begin(), floored_minute.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); - - - std::vector floored_second(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_second = fixed_width_column_wrapper( - floored_second.begin(), floored_second.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); - - - std::vector floored_millisecond(timestamps.size()); - std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { - return time_point_cast(floor(i)); - }); - auto expected_millisecond = fixed_width_column_wrapper( - floored_millisecond.begin(), floored_millisecond.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); +// thrust::host_vector floored_hour(timestamps.size()); +// thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { +// return time_point_cast(floor(i)); +// }); +// auto expected_hour = fixed_width_column_wrapper(floored_hour.begin(), +// floored_hour.end()); +// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); + +// std::vector floored_minute(timestamps.size()); +// std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { +// return time_point_cast(floor(i)); +// }); +// auto expected_minute = fixed_width_column_wrapper( +// floored_minute.begin(), floored_minute.end()); +// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); + +// std::vector floored_second(timestamps.size()); +// std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { +// return time_point_cast(floor(i)); +// }); +// auto expected_second = fixed_width_column_wrapper( +// floored_second.begin(), floored_second.end()); +// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); + +// std::vector floored_millisecond(timestamps.size()); +// std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { +// return time_point_cast(floor(i)); +// }); +// auto expected_millisecond = fixed_width_column_wrapper( +// floored_millisecond.begin(), floored_millisecond.end()); +// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); +// } CUDF_TEST_PROGRAM_MAIN() From df519dddacbde71ef2180de823ab87de2ff78ca0 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 25 Oct 2021 18:16:27 -0700 Subject: [PATCH 12/18] added declarations for floor methods in header file datetime.hpp --- cpp/include/cudf/datetime.hpp | 91 +++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index d67984daa7c..4d23d116fb4 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -378,5 +378,96 @@ std::unique_ptr ceil_nanosecond( column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Round up to the nearest day + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_day( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round up to the nearest hour + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_hour( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round up to the nearest minute + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_minute( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round up to the nearest second + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_second( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round up to the nearest millisecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_millisecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round up to the nearest microsecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_microsecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round up to the nearest nanosecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_nanosecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace datetime } // namespace cudf From 0ae000fcc577f5f7adc5e3f17faf0d2ac96a3c81 Mon Sep 17 00:00:00 2001 From: Sheilah Date: Mon, 25 Oct 2021 18:17:43 -0700 Subject: [PATCH 13/18] all tests passing now --- cpp/tests/datetime/datetime_ops_test.cpp | 67 ++++++++++++------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index d60b533963f..d2703b8acd0 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -846,42 +846,41 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) thrust::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) { return time_point_cast(floor(i)); }); - auto expected_day = - fixed_width_column_wrapper(floored_day.begin(), floored_day.end()); + auto expected_day = fixed_width_column_wrapper(floored_day.begin(), + floored_day.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day); -// thrust::host_vector floored_hour(timestamps.size()); -// thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { -// return time_point_cast(floor(i)); -// }); -// auto expected_hour = fixed_width_column_wrapper(floored_hour.begin(), -// floored_hour.end()); -// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); - -// std::vector floored_minute(timestamps.size()); -// std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { -// return time_point_cast(floor(i)); -// }); -// auto expected_minute = fixed_width_column_wrapper( -// floored_minute.begin(), floored_minute.end()); -// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); - -// std::vector floored_second(timestamps.size()); -// std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { -// return time_point_cast(floor(i)); -// }); -// auto expected_second = fixed_width_column_wrapper( -// floored_second.begin(), floored_second.end()); -// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); - -// std::vector floored_millisecond(timestamps.size()); -// std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { -// return time_point_cast(floor(i)); -// }); -// auto expected_millisecond = fixed_width_column_wrapper( -// floored_millisecond.begin(), floored_millisecond.end()); -// CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); -// + thrust::host_vector floored_hour(timestamps.size()); + thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_hour = fixed_width_column_wrapper( + floored_hour.begin(), floored_hour.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); + + std::vector floored_minute(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_minute = fixed_width_column_wrapper( + floored_minute.begin(), floored_minute.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); + + std::vector floored_second(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_second = fixed_width_column_wrapper( + floored_second.begin(), floored_second.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); + + std::vector floored_millisecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_millisecond = fixed_width_column_wrapper( + floored_millisecond.begin(), floored_millisecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); } CUDF_TEST_PROGRAM_MAIN() From c944d284efd029c678bd8f31183efecc91d699ec Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Tue, 26 Oct 2021 16:24:09 +0000 Subject: [PATCH 14/18] fix bug in function docs --- cpp/include/cudf/datetime.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 4d23d116fb4..629a5113ce1 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -379,7 +379,7 @@ std::unique_ptr ceil_nanosecond( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round up to the nearest day + * @brief Round down to the nearest day * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -392,7 +392,7 @@ std::unique_ptr floor_day( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round up to the nearest hour + * @brief Round down to the nearest hour * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -405,7 +405,7 @@ std::unique_ptr floor_hour( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round up to the nearest minute + * @brief Round down to the nearest minute * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -418,7 +418,7 @@ std::unique_ptr floor_minute( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round up to the nearest second + * @brief Round down to the nearest second * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -431,7 +431,7 @@ std::unique_ptr floor_second( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round up to the nearest millisecond + * @brief Round down to the nearest millisecond * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -444,7 +444,7 @@ std::unique_ptr floor_millisecond( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round up to the nearest microsecond + * @brief Round down to the nearest microsecond * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. @@ -457,7 +457,7 @@ std::unique_ptr floor_microsecond( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Round up to the nearest nanosecond + * @brief Round down to the nearest nanosecond * * @param column cudf::column_view of the input datetime values * @param mr Device memory resource used to allocate device memory of the returned column. From f705facb82a38c25a42d2d01d698d690f1b36793 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Wed, 27 Oct 2021 15:53:09 +0000 Subject: [PATCH 15/18] fix bug in function docs --- docs/cudf/source/api_docs/series.rst | 2 + python/cudf/cudf/core/series.py | 56 ++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 95cf58adf0e..c32a4a22ef2 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -297,6 +297,8 @@ Datetime methods strftime isocalendar + ceil + floor Timedelta properties diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 9958619291d..58e745c4fe8 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5016,6 +5016,34 @@ def _get_dt_field(self, field): ) def ceil(self, field): + """ + Perform ceil operation on the data to the specified freq. + + Parameters + ---------- + field : str + One of ["D", "H", "T", "S", "L", "U", "N"] + See `frequency aliases `_ + for more details on these aliases. + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.ceil("T") + 0 2001-01-01 00:05:00 + 1 2001-01-01 00:05:00 + 2 2001-01-01 00:06:00 + dtype: datetime64[ns] + """ out_column = self.series._column.ceil(field) return Series( @@ -5023,6 +5051,34 @@ def ceil(self, field): ) def floor(self, field): + """ + Perform floor operation on the data to the specified freq. + + Parameters + ---------- + field : str + One of ["D", "H", "T", "S", "L", "U", "N"] + See `frequency aliases `_ + for more details on these aliases. + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.floor("T") + 0 2001-01-01 00:04:00 + 1 2001-01-01 00:04:00 + 2 2001-01-01 00:05:00 + dtype: datetime64[ns] + """ out_column = self.series._column.floor(field) return Series( From 41c3b24e9a0be749abe06b7f889b21dd0579aaa3 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Wed, 27 Oct 2021 15:57:13 +0000 Subject: [PATCH 16/18] updated copyright year --- cpp/include/cudf/datetime.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 629a5113ce1..71e5968bf07 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 8757628f891d1283092a12cf143cd3dd6db4df68 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Wed, 27 Oct 2021 17:56:36 +0000 Subject: [PATCH 17/18] fixed test cases --- python/cudf/cudf/tests/test_datetime.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 000f9f6d117..85f1055da89 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1625,8 +1625,8 @@ def test_error_values(): @pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) def test_ceil(data, time_type, resolution): - ps = pd.Series(data, dtype=time_type) - gs = cudf.from_pandas(ps) + gs = cudf.Series(data, dtype=time_type) + ps = gs.to_pandas() expect = ps.dt.ceil(resolution) got = gs.dt.ceil(resolution) @@ -1654,8 +1654,8 @@ def test_ceil(data, time_type, resolution): @pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) def test_floor(data, time_type, resolution): - ps = pd.Series(data, dtype=time_type) - gs = cudf.from_pandas(ps) + gs = cudf.Series(data, dtype=time_type) + ps = gs.to_pandas() expect = ps.dt.floor(resolution) got = gs.dt.floor(resolution) From aba0c31278520562f5dd61aa7cc421e1c8c4546f Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Thu, 28 Oct 2021 19:13:08 +0000 Subject: [PATCH 18/18] fixing function doc issue --- python/cudf/cudf/core/series.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index de5a32eff3d..97388559aa7 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5017,9 +5017,8 @@ def ceil(self, field): Returns ------- - DatetimeIndex, TimedeltaIndex, or Series - Index of the same type for a DatetimeIndex or TimedeltaIndex, - or a Series with the same index for a Series. + Series + Series with the same index for a Series. Examples -------- @@ -5052,9 +5051,8 @@ def floor(self, field): Returns ------- - DatetimeIndex, TimedeltaIndex, or Series - Index of the same type for a DatetimeIndex or TimedeltaIndex, - or a Series with the same index for a Series. + Series + Series with the same index for a Series. Examples --------