From d6a331f755de08829f35362b62948533c3190cd9 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Mon, 19 Jul 2021 22:37:27 +0000 Subject: [PATCH 01/18] added extract_quarter --- cpp/include/cudf/datetime.hpp | 16 +++++++++++ cpp/include/cudf/detail/datetime.hpp | 5 ++++ cpp/src/datetime/datetime_ops.cu | 33 ++++++++++++++++++++++ cpp/tests/datetime/datetime_ops_test.cpp | 35 +++++++++++++++++++++++- 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 980c824fdf2..bfe09c3c5d3 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -206,6 +206,22 @@ std::unique_ptr is_leap_year( cudf::column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns the quarter of the date + * + * `output[i] == true` if year of `column[i]` is a leap year + * `output[i] == false` if year of `column[i]` is not a leap year + * `output[i] is null` if `column[i]` is null + * + * @param[in] cudf::column_view of the input datetime values + * + * @returns cudf::column of datatype INT16 indicating which quarter the date is in + * @throw cudf::logic_error if input column datatype is not a TIMESTAMP + */ +std::unique_ptr extract_quarter( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace datetime } // namespace cudf diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 9cc319b5011..3a4459d9f95 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -135,6 +135,11 @@ std::unique_ptr is_leap_year( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr extract_quarter( + cudf::column_view const& column, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace detail } // namespace datetime } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 41f3e7dcfee..994a63e68c5 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -127,6 +127,25 @@ struct extract_day_num_of_year { } }; +// Extract the day number of the year present in the timestamp +struct extract_quarter_op { + template + CUDA_DEVICE_CALLABLE int16_t operator()(Timestamp const ts) const + { + using namespace cuda::std::chrono; + + // Only has the days - time component is chopped off, which is what we want + auto const days_since_epoch = floor(ts); + auto const date = year_month_day(days_since_epoch); + auto const month = unsigned{date.month()}; + + if (month <= 3) return 1; + if (month <= 6) return 2; + if (month <= 9) return 3; + return 4; + } +}; + struct is_leap_year_op { template CUDA_DEVICE_CALLABLE bool operator()(Timestamp const ts) const @@ -376,6 +395,13 @@ std::unique_ptr is_leap_year(column_view const& column, return apply_datetime_op(column, stream, mr); } +std::unique_ptr extract_quarter(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return apply_datetime_op(column, stream, mr); +} + } // namespace detail std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) @@ -452,5 +478,12 @@ std::unique_ptr is_leap_year(column_view const& column, rmm::mr::device_ return detail::is_leap_year(column, rmm::cuda_stream_default, mr); } +std::unique_ptr extract_quarter(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_quarter(column, rmm::cuda_stream_default, mr); +} + } // namespace datetime } // namespace cudf diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index cdfc9de395c..5ed2ba57ca2 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -550,7 +550,7 @@ TEST_F(BasicDatetimeOpsTest, TestIsLeapYear) 915148800L, // 1999-01-01 00:00:00 GMT - non leap year -11663029161L, // 1600-5-31 05:40:39 GMT - leap year 707904541L, // 1992-06-07 08:09:01 GMT - leap year - 2181048447L, // 1900-11-20 09:12:33 GMT - non leap year + -2181005247L, // 1900-11-20 09:12:33 GMT - non leap year 0L, // UNIX EPOCH 1970-01-01 00:00:00 GMT - non leap year -12212553600L, // First full year of Gregorian Calandar 1583-01-01 00:00:00 - non-leap-year 0L, // null @@ -567,4 +567,37 @@ TEST_F(BasicDatetimeOpsTest, TestIsLeapYear) {true, false, true, true, true, true, true, true, false, true, true, false}}); } +TEST_F(BasicDatetimeOpsTest, TestQuarter) +{ + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + // Time in seconds since epoch + // Dates converted using epochconverter.com + auto timestamps_s = + cudf::test::fixed_width_column_wrapper{ + { + 1594332839L, // 2020-07-09 10:13:59 GMT - leap year + 0L, // null + 915148800L, // 1999-01-01 00:00:00 GMT - non leap year + -11663029161L, // 1600-5-31 05:40:39 GMT - leap year + 707904541L, // 1992-06-07 08:09:01 GMT - leap year + -2181005247L, // 1900-11-20 09:12:33 GMT - non leap year + 0L, // UNIX EPOCH 1970-01-01 00:00:00 GMT - non leap year + -12212553600L, // First full year of Gregorian Calandar 1583-01-01 00:00:00 - non-leap-year + 0L, // null + 13591632822L, // 2400-09-13 13:33:42 GMT - leap year + 4539564243L, // 2113-11-08 06:04:03 GMT - non leap year + 0L // null + }, + {true, false, true, true, true, true, true, true, false, true, true, false}}; + + auto quarter = cudf::test::fixed_width_column_wrapper{ + {3, 6, 1, 2, 2, 4, 1, 1, 1, 3, 4, 3}, + {true, false, true, true, true, true, true, true, false, true, true, false}}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); +} + CUDF_TEST_PROGRAM_MAIN() From e79f6c77fa469c7b125676d7e5824a0969ef8caf Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Tue, 20 Jul 2021 20:31:55 +0000 Subject: [PATCH 02/18] updated documentation --- cpp/include/cudf/datetime.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index bfe09c3c5d3..3dd25d1a7ba 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -209,8 +209,10 @@ std::unique_ptr is_leap_year( /** * @brief Returns the quarter of the date * - * `output[i] == true` if year of `column[i]` is a leap year - * `output[i] == false` if year of `column[i]` is not a leap year + * `output[i] == 1` if month of `column[i]` is in first quarter + * `output[i] == 2` if month of `column[i]` is in second quarter + * `output[i] == 3` if month of `column[i]` is in third quarter + * `output[i] == 4` if month of `column[i]` is in fourth quarter * `output[i] is null` if `column[i]` is null * * @param[in] cudf::column_view of the input datetime values From 477b31949f45f34d9d9534ec22a1350cd62655e3 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Wed, 21 Jul 2021 20:22:07 +0000 Subject: [PATCH 03/18] updated rounding aand added new testcases --- cpp/src/datetime/datetime_ops.cu | 10 ++++----- cpp/tests/datetime/datetime_ops_test.cpp | 28 +++++++++++++----------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 994a63e68c5..c152e165ab5 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -29,6 +29,8 @@ #include #include +#include + namespace cudf { namespace datetime { namespace detail { @@ -127,7 +129,7 @@ struct extract_day_num_of_year { } }; -// Extract the day number of the year present in the timestamp +// Extract the the quarter to which the timestamp belongs to struct extract_quarter_op { template CUDA_DEVICE_CALLABLE int16_t operator()(Timestamp const ts) const @@ -139,10 +141,8 @@ struct extract_quarter_op { auto const date = year_month_day(days_since_epoch); auto const month = unsigned{date.month()}; - if (month <= 3) return 1; - if (month <= 6) return 2; - if (month <= 9) return 3; - return 4; + return (month + 2) / + 3; // (x + y - 1) / y = ceil(x/y), where x and y are unsigned. x = month, y = 3 } }; diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 5ed2ba57ca2..b20387c6a4a 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -578,24 +578,26 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) auto timestamps_s = cudf::test::fixed_width_column_wrapper{ { - 1594332839L, // 2020-07-09 10:13:59 GMT - leap year + 1594332839L, // 2020-07-09 10:13:59 GMT 0L, // null - 915148800L, // 1999-01-01 00:00:00 GMT - non leap year - -11663029161L, // 1600-5-31 05:40:39 GMT - leap year - 707904541L, // 1992-06-07 08:09:01 GMT - leap year - -2181005247L, // 1900-11-20 09:12:33 GMT - non leap year - 0L, // UNIX EPOCH 1970-01-01 00:00:00 GMT - non leap year - -12212553600L, // First full year of Gregorian Calandar 1583-01-01 00:00:00 - non-leap-year + 915148800L, // 1999-01-01 00:00:00 GMT + -11663029161L, // 1600-5-31 05:40:39 GMT + 707904541L, // 1992-06-07 08:09:01 GMT + -2181005247L, // 1900-11-20 09:12:33 GMT + 0L, // UNIX EPOCH 1970-01-01 00:00:00 GMT + -12212553600L, // First full year of Gregorian Calandar 1583-01-01 00:00:00 0L, // null - 13591632822L, // 2400-09-13 13:33:42 GMT - leap year - 4539564243L, // 2113-11-08 06:04:03 GMT - non leap year - 0L // null + 13591632822L, // 2400-09-13 13:33:42 GMT + 4539564243L, // 2113-11-08 06:04:03 GMT + 0L, // null + 1608581568L, // 2020-12-21 08:12:48 GMT + 1584821568L, // 2020-03-21 08:12:48 GMT }, - {true, false, true, true, true, true, true, true, false, true, true, false}}; + {true, false, true, true, true, true, true, true, false, true, true, false, true, true}}; auto quarter = cudf::test::fixed_width_column_wrapper{ - {3, 6, 1, 2, 2, 4, 1, 1, 1, 3, 4, 3}, - {true, false, true, true, true, true, true, true, false, true, true, false}}; + {3, 6, 1, 2, 2, 4, 1, 1, 1, 3, 4, 3, 4, 4, 1}, + {true, false, true, true, true, true, true, true, false, true, true, false, true, true}}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } From 5507131d850794ebfc5bf09b61ffca233c37520e Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Wed, 21 Jul 2021 21:07:57 +0000 Subject: [PATCH 04/18] updated docs --- cpp/include/cudf/datetime.hpp | 11 ++++------- cpp/src/datetime/datetime_ops.cu | 2 -- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 3dd25d1a7ba..57b4d8a760e 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -209,15 +209,12 @@ std::unique_ptr is_leap_year( /** * @brief Returns the quarter of the date * - * `output[i] == 1` if month of `column[i]` is in first quarter - * `output[i] == 2` if month of `column[i]` is in second quarter - * `output[i] == 3` if month of `column[i]` is in third quarter - * `output[i] == 4` if month of `column[i]` is in fourth quarter - * `output[i] is null` if `column[i]` is null + * `output[i]` will be a value from {1, 2, 3, 4} corresponding to the quater of month given by + * `column[i]`. `output[i] is null` if `column[i]` is null * - * @param[in] cudf::column_view of the input datetime values + * @param[in] The input column containing datetime values * - * @returns cudf::column of datatype INT16 indicating which quarter the date is in + * @return A column of INT16 type indicating which quarter the date is in * @throw cudf::logic_error if input column datatype is not a TIMESTAMP */ std::unique_ptr extract_quarter( diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index c152e165ab5..f1e9923c025 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -29,8 +29,6 @@ #include #include -#include - namespace cudf { namespace datetime { namespace detail { From ad78c48143f466dafe8aa080c3e18ad6f9d01870 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Wed, 21 Jul 2021 21:32:28 +0000 Subject: [PATCH 05/18] updated tests --- cpp/tests/datetime/datetime_ops_test.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 34846e07c10..f6a145047b0 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -574,6 +575,7 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) using namespace cudf::test; using namespace cudf::datetime; using namespace cuda::std::chrono; + using namespace cudf::test::iterators; // Time in seconds since epoch // Dates converted using epochconverter.com @@ -595,11 +597,10 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) 1608581568L, // 2020-12-21 08:12:48 GMT 1584821568L, // 2020-03-21 08:12:48 GMT }, - {true, false, true, true, true, true, true, true, false, true, true, false, true, true}}; + nulls_at({1, 8, 11})}; auto quarter = cudf::test::fixed_width_column_wrapper{ - {3, 6, 1, 2, 2, 4, 1, 1, 1, 3, 4, 3, 4, 4, 1}, - {true, false, true, true, true, true, true, true, false, true, true, false, true, true}}; + {3, 6, 1, 2, 2, 4, 1, 1, 1, 3, 4, 3, 4, 1}, nulls_at({1, 8, 11})}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } From ed191d6470c520a7bae5c1230e81da47709f2135 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Thu, 22 Jul 2021 13:56:52 +0000 Subject: [PATCH 06/18] added tests and updated docs --- cpp/include/cudf/datetime.hpp | 9 +++------ cpp/tests/datetime/datetime_ops_test.cpp | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 57b4d8a760e..f8d937438ed 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -207,15 +207,12 @@ std::unique_ptr is_leap_year( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Returns the quarter of the date + * `column[i]`. It will be null` if the input row at `column[i]` is null. * - * `output[i]` will be a value from {1, 2, 3, 4} corresponding to the quater of month given by - * `column[i]`. `output[i] is null` if `column[i]` is null - * - * @param[in] The input column containing datetime values + * @throw cudf::logic_error if input column datatype is not a TIMESTAMP * + * @param The input column containing datetime values * @return A column of INT16 type indicating which quarter the date is in - * @throw cudf::logic_error if input column datatype is not a TIMESTAMP */ std::unique_ptr extract_quarter( cudf::column_view const& column, diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index f6a145047b0..b57945676b5 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -600,7 +600,7 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) nulls_at({1, 8, 11})}; auto quarter = cudf::test::fixed_width_column_wrapper{ - {3, 6, 1, 2, 2, 4, 1, 1, 1, 3, 4, 3, 4, 1}, nulls_at({1, 8, 11})}; + {3, 0 /*null*/, 1, 2, 2, 4, 1, 1, 0 /*null*/, 3, 4, 0 /*null*/, 4, 1}, nulls_at({1, 8, 11})}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } From 8b207789e21ae15ad39b8ac21fac60d4c9b7e158 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Thu, 22 Jul 2021 15:15:19 +0000 Subject: [PATCH 07/18] updated comments --- cpp/src/datetime/datetime_ops.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index f1e9923c025..4d8acb3bd3b 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -139,8 +139,8 @@ struct extract_quarter_op { auto const date = year_month_day(days_since_epoch); auto const month = unsigned{date.month()}; - return (month + 2) / - 3; // (x + y - 1) / y = ceil(x/y), where x and y are unsigned. x = month, y = 3 + // (x + y - 1) / y = ceil(x/y), where x and y are unsigned. x = month, y = 3 + return (month + 2) / 3; } }; From 80d1ddf4e9392cb8bdbc9b4eb04f85a179ede943 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Thu, 22 Jul 2021 16:06:16 +0000 Subject: [PATCH 08/18] undo delete --- cpp/include/cudf/datetime.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index f8d937438ed..8383b504739 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -207,6 +207,9 @@ std::unique_ptr is_leap_year( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** + * @brief Returns the quarter of the date + * + * `output[i]` will be a value from {1, 2, 3, 4} corresponding to the quater of month given by * `column[i]`. It will be null` if the input row at `column[i]` is null. * * @throw cudf::logic_error if input column datatype is not a TIMESTAMP From c2bbce651da51c03f6f41297406e606c75a5d91a Mon Sep 17 00:00:00 2001 From: shaneding Date: Sun, 25 Jul 2021 09:46:13 -0400 Subject: [PATCH 09/18] Update cpp/include/cudf/datetime.hpp Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/include/cudf/datetime.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 8383b504739..3d90ac063e1 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -210,7 +210,7 @@ std::unique_ptr is_leap_year( * @brief Returns the quarter of the date * * `output[i]` will be a value from {1, 2, 3, 4} corresponding to the quater of month given by - * `column[i]`. It will be null` if the input row at `column[i]` is null. + * `column[i]`. It will be null if the input row at `column[i]` is null. * * @throw cudf::logic_error if input column datatype is not a TIMESTAMP * From fa2a8b10cc4d2a9e42abaf177bc801555630d077 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Mon, 26 Jul 2021 21:00:42 +0000 Subject: [PATCH 10/18] python bndings for quarter --- python/cudf/cudf/_lib/cpp/datetime.pxd | 1 + python/cudf/cudf/_lib/datetime.pyx | 10 ++++++++++ python/cudf/cudf/core/series.py | 23 +++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 56ebc3a77fc..0f19137b508 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -18,3 +18,4 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: ) except + cdef unique_ptr[column] day_of_year(const column_view& column) except + cdef unique_ptr[column] is_leap_year(const column_view& column) except + + cdef unique_ptr[column] extract_quarter(const column_view& column) except + diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3b13cedcfd7..325998919f7 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -67,3 +67,13 @@ def is_leap_year(Column col): c_result = move(libcudf_datetime.is_leap_year(col_view)) return Column.from_unique_ptr(move(c_result)) + + +def extract_quarter(Column col): + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + c_result = move(libcudf_datetime.extract_quarter(col_view)) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 8e9f1127438..a02dd917bd4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6407,6 +6407,29 @@ def is_leap_year(self): name=self.series.name, ) + @property + def quarter(self): + """ + Integer indicator for which quarter of the year the date belongs in. + + There are 4 quarters in a year. With the first quarter being from + January - March, second quarter being April - June, third quarter + being July - September and fourth quarter being October - December. + + Returns + ------- + Series + Integer indicating which quarter the date belongs to. + """ + res = libcudf.datetime.extract_quarter(self.series._column).fillna( + False + ) + return Series._from_data( + ColumnAccessor({None: res}), + index=self.series._index, + name=self.series.name, + ) + def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) return Series( From 8251c3b558994bed6f1a01e38556dc5d6dbdc2eb Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Tue, 27 Jul 2021 15:43:39 +0000 Subject: [PATCH 11/18] added test for quarter --- python/cudf/cudf/core/index.py | 19 ++++++++++++++- python/cudf/cudf/tests/test_datetime.py | 31 +++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 691b6ab2e29..95793a4aafa 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -13,7 +13,7 @@ from pandas._config import get_option import cudf -from cudf._lib.datetime import is_leap_year +from cudf._lib.datetime import extract_quarter, is_leap_year from cudf._lib.filling import sequence from cudf._lib.search import search_sorted from cudf._lib.table import Table @@ -2357,6 +2357,23 @@ def is_leap_year(self): res = is_leap_year(self._values).fillna(False) return cupy.asarray(res) + @property + def quarter(self): + """ + Integer indicator for which quarter of the year the date belongs in. + + There are 4 quarters in a year. With the first quarter being from + January - March, second quarter being April - June, third quarter + being July - September and fourth quarter being October - December. + + Returns + ------- + Series + Integer indicating which quarter the date belongs to. + """ + res = extract_quarter(self._values).fillna(False) + return Int64Index(res, dtype="int64") + def to_pandas(self): nanos = self._values.astype("datetime64[ns]") return pd.DatetimeIndex(nanos.to_pandas(), name=self.name) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 022c9d93676..8ffd46680e3 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1297,3 +1297,34 @@ def test_is_leap_year(): got2 = gIndex.is_leap_year assert_eq(expect2, got2) + + +def test_quarter(): + data = [ + "2020-05-31 08:00:00", + "1999-12-31 18:40:00", + "2000-12-31 04:00:00", + "1900-02-28 07:00:00", + "1800-03-14 07:30:00", + "2100-03-14 07:30:00", + "1970-01-01 00:00:00", + "1969-12-31 12:59:00", + ] + + # Series + ps = pd.Series(data, dtype="datetime64[s]") + gs = cudf.from_pandas(ps) + + expect = ps.dt.quarter + got = gs.dt.quarter.astype(np.int64) + + assert_eq(expect, got) + + # DatetimeIndex + pIndex = pd.DatetimeIndex(data) + gIndex = cudf.from_pandas(pIndex) + + expect2 = pIndex.quarter + got2 = gIndex.quarter + + assert_eq(expect2, got2) From 38ed07c343e2f0225f9f69ddcf008f177336847f Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Tue, 3 Aug 2021 14:03:20 +0000 Subject: [PATCH 12/18] removed fillna --- python/cudf/cudf/_lib/datetime.pyx | 4 ++++ python/cudf/cudf/core/index.py | 13 +++++++++++-- python/cudf/cudf/core/series.py | 6 +----- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 325998919f7..9a434b7d214 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -70,6 +70,10 @@ def is_leap_year(Column col): def extract_quarter(Column col): + """ + Returns a column which contains the corresponding quarter of the year + for every timestamp inside the input column. + """ cdef unique_ptr[column] c_result cdef column_view col_view = col.view() diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 95793a4aafa..1d2a0718ae5 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2368,10 +2368,19 @@ def quarter(self): Returns ------- - Series + Int64Index Integer indicating which quarter the date belongs to. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> pIndex = pd.DatetimeIndex(["2020-05-31 08:00:00", + ... "1999-12-31 18:40:00"]) + >>> cudf.from_pandas(pIndex).quarter + Int64Index([2, 4], dtype='int64') """ - res = extract_quarter(self._values).fillna(False) + res = extract_quarter(self._values) return Int64Index(res, dtype="int64") def to_pandas(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 9afe313b57c..2c61a810d14 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6454,16 +6454,13 @@ def quarter(self): There are 4 quarters in a year. With the first quarter being from January - March, second quarter being April - June, third quarter being July - September and fourth quarter being October - December. - Boolean indicator if the date is the first day of the month. Returns ------- Series Integer indicating which quarter the date belongs to. """ - res = libcudf.datetime.extract_quarter(self.series._column).fillna( - False - ) + res = libcudf.datetime.extract_quarter(self.series._column) return Series._from_data( ColumnAccessor({None: res}), index=self.series._index, @@ -6473,7 +6470,6 @@ def quarter(self): @property def is_month_start(self): """ - Booleans indicating if dates are the first day of the month. """ return (self.day == 1).fillna(False) From d6598c69fc47f96b865ada89309375b34c058ed8 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Tue, 3 Aug 2021 14:08:12 +0000 Subject: [PATCH 13/18] added example --- python/cudf/cudf/core/series.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 2c61a810d14..f8df6b0984e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6459,6 +6459,16 @@ def quarter(self): ------- Series Integer indicating which quarter the date belongs to. + + Examples + ------- + >>> import cudf + >>> s = cudf.Series(["2020-05-31 08:00:00","1999-12-31 18:40:00"], + ... dtype="datetime64[ms]") + >>> s.dt.quarter + 0 2 + 1 4 + dtype: int16 """ res = libcudf.datetime.extract_quarter(self.series._column) return Series._from_data( From ff31e1e353ebfe02a2b7dfcbeb47059bd6650fd8 Mon Sep 17 00:00:00 2001 From: shaneding Date: Wed, 4 Aug 2021 11:56:03 -0400 Subject: [PATCH 14/18] Update python/cudf/cudf/core/index.py Co-authored-by: Michael Wang --- python/cudf/cudf/core/index.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 1d2a0718ae5..ef4e9554613 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2373,11 +2373,10 @@ def quarter(self): Examples -------- - >>> import pandas as pd >>> import cudf - >>> pIndex = pd.DatetimeIndex(["2020-05-31 08:00:00", + >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00", ... "1999-12-31 18:40:00"]) - >>> cudf.from_pandas(pIndex).quarter + >>> gIndex.quarter Int64Index([2, 4], dtype='int64') """ res = extract_quarter(self._values) From 0f0c2807d1f8dccb471e229b60d660fe6cc73649 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Sat, 7 Aug 2021 20:11:54 +0000 Subject: [PATCH 15/18] fixed styling --- python/cudf/cudf/_lib/datetime.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index ec6d229d8a6..51ceb7c0d8a 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -80,10 +80,11 @@ def extract_quarter(Column col): cdef column_view col_view = col.view() with nogil: - c_result = move(libcudf_datetime.extract_quarter(col_view)) - + c_result = move(libcudf_datetime.extract_quarter(col_view)) + return Column.from_unique_ptr(move(c_result)) - + + def days_in_month(Column col): """Extracts the number of days in the month of the date """ From b5272868b802a2c604644c546004cb0cef70b6b9 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Sat, 7 Aug 2021 20:41:06 +0000 Subject: [PATCH 16/18] fixed styling --- python/cudf/cudf/tests/test_datetime.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index fcf613ed358..e5cada6b03f 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1331,6 +1331,7 @@ def test_quarter(): assert_eq(expect2, got2) + @pytest.mark.parametrize("dtype", DATETIME_TYPES) def test_days_in_months(dtype): nrows = 1000 @@ -1349,6 +1350,7 @@ def test_days_in_months(dtype): assert_eq(ps.dt.days_in_month, gs.dt.days_in_month) + @pytest.mark.parametrize( "data", [ From 630083d7d68a8a03f360e106abaacc9dc53f77fa Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Tue, 10 Aug 2021 19:07:28 +0000 Subject: [PATCH 17/18] changed return type --- python/cudf/cudf/core/index.py | 4 ++-- python/cudf/cudf/core/series.py | 4 ++-- python/cudf/cudf/tests/test_datetime.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 38f824af664..2c6390cfd60 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2303,10 +2303,10 @@ def quarter(self): >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00", ... "1999-12-31 18:40:00"]) >>> gIndex.quarter - Int64Index([2, 4], dtype='int64') + Int8Index([2, 4], dtype='int8') """ res = extract_quarter(self._values) - return Int64Index(res, dtype="int64") + return Int8Index(res, dtype="int8") def to_pandas(self): nanos = self._values.astype("datetime64[ns]") diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 98cb60f8f2d..f486de5c8a3 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5965,14 +5965,14 @@ def quarter(self): >>> s.dt.quarter 0 2 1 4 - dtype: int16 + dtype: int8 """ res = libcudf.datetime.extract_quarter(self.series._column) return Series._from_data( ColumnAccessor({None: res}), index=self.series._index, name=self.series.name, - ) + ).astype(np.int8) @property def is_month_start(self): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index e5cada6b03f..0e176e18639 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1318,9 +1318,9 @@ def test_quarter(): gs = cudf.from_pandas(ps) expect = ps.dt.quarter - got = gs.dt.quarter.astype(np.int64) + got = gs.dt.quarter - assert_eq(expect, got) + assert_eq(expect, got, check_dtype=False) # DatetimeIndex pIndex = pd.DatetimeIndex(data) @@ -1329,7 +1329,7 @@ def test_quarter(): expect2 = pIndex.quarter got2 = gIndex.quarter - assert_eq(expect2, got2) + assert_eq(expect2.values, got2.values, check_dtype=False) @pytest.mark.parametrize("dtype", DATETIME_TYPES) From 0502eeb8f9502de6b74ac81effb075fcf3edb3b5 Mon Sep 17 00:00:00 2001 From: Shane Ding Date: Tue, 10 Aug 2021 20:44:26 +0000 Subject: [PATCH 18/18] updated files --- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/series.py | 10 +++++----- python/cudf/cudf/tests/test_datetime.py | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 2c6390cfd60..dd732f6a4c1 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2294,7 +2294,7 @@ def quarter(self): Returns ------- - Int64Index + Int8Index Integer indicating which quarter the date belongs to. Examples diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f486de5c8a3..3724bf04136 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5967,12 +5967,12 @@ def quarter(self): 1 4 dtype: int8 """ - res = libcudf.datetime.extract_quarter(self.series._column) + res = libcudf.datetime.extract_quarter(self.series._column).astype( + np.int8 + ) return Series._from_data( - ColumnAccessor({None: res}), - index=self.series._index, - name=self.series.name, - ).astype(np.int8) + {None: res}, index=self.series._index, name=self.series.name, + ) @property def is_month_start(self): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 0e176e18639..904595ad5a5 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1329,6 +1329,7 @@ def test_quarter(): expect2 = pIndex.quarter got2 = gIndex.quarter + assert isinstance(got2, cudf.Int8Index) assert_eq(expect2.values, got2.values, check_dtype=False)