diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst index f19c368d8282..2b1fd9b9a768 100644 --- a/velox/docs/functions/spark/datetime.rst +++ b/velox/docs/functions/spark/datetime.rst @@ -52,6 +52,13 @@ These functions support TIMESTAMP and DATE input types. Returns null if ``string`` does not match ``format`` or if ``format`` is invalid. +.. function:: week_of_year(x) -> integer + + Returns the `ISO-Week`_ of the year from x. The value ranges from ``1`` to ``53``. + A week is considered to start on a Monday and week 1 is the first week with >3 days. + +.. _ISO-Week: https://en.wikipedia.org/wiki/ISO_week_date + .. spark:function:: year(x) -> integer Returns the year from ``x``. diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 8d2a83825923..6842bf875599 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -39,6 +39,60 @@ struct YearFunction : public InitSessionTimezone { } }; +template +struct WeekFunction : public InitSessionTimezone { + VELOX_DEFINE_FUNCTION_TYPES(T); + + FOLLY_ALWAYS_INLINE int32_t getWeek(const std::tm& time) { + // The computation of ISO week from date follows the algorithm here: + // https://en.wikipedia.org/wiki/ISO_week_date + int32_t week = floor( + 10 + (time.tm_yday + 1) - + (time.tm_wday ? time.tm_wday : kDaysInWeek)) / + kDaysInWeek; + + if (week == 0) { + // Distance in days between the first day of the current year and the + // Monday of the current week. + auto mondayOfWeek = + time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek; + // Distance in days between the first day and the first Monday of the + // current year. + auto firstMondayOfYear = + 1 + (mondayOfWeek + kDaysInWeek - 1) % kDaysInWeek; + + if ((util::isLeapYear(time.tm_year + 1900 - 1) && + firstMondayOfYear == 2) || + firstMondayOfYear == 3 || firstMondayOfYear == 4) { + week = 53; + } else { + week = 52; + } + } else if (week == 53) { + // Distance in days between the first day of the current year and the + // Monday of the current week. + auto mondayOfWeek = + time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek; + auto daysInYear = util::isLeapYear(time.tm_year + 1900) ? 366 : 365; + if (daysInYear - mondayOfWeek < 3) { + week = 1; + } + } + + return week; + } + + FOLLY_ALWAYS_INLINE void call( + int32_t& result, + const arg_type& timestamp) { + result = getWeek(getDateTime(timestamp, this->timeZone_)); + } + + FOLLY_ALWAYS_INLINE void call(int32_t& result, const arg_type& date) { + result = getWeek(getDateTime(date)); + } +}; + template struct UnixTimestampFunction { // unix_timestamp(); diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp index b8cc01b75e94..4431c318e020 100644 --- a/velox/functions/sparksql/Register.cpp +++ b/velox/functions/sparksql/Register.cpp @@ -189,6 +189,8 @@ void registerFunctions(const std::string& prefix) { // Register date functions. registerFunction({prefix + "year"}); registerFunction({prefix + "year"}); + registerFunction({prefix + "week_of_year"}); + registerFunction({prefix + "week_of_year"}); registerFunction({prefix + "unix_timestamp"}); diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index 88dae6a02daf..b3e25aba22ca 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -30,6 +30,10 @@ class DateTimeFunctionsTest : public SparkFunctionBaseTest { {core::QueryConfig::kAdjustTimestampToTimezone, "true"}, }); } + + int32_t parseDate(const std::string& dateStr) { + return DATE()->toDays(dateStr); + } }; TEST_F(DateTimeFunctionsTest, year) { @@ -66,6 +70,29 @@ TEST_F(DateTimeFunctionsTest, yearDate) { EXPECT_EQ(1920, year(DATE()->toDays("1920-01-01"))); } +TEST_F(DateTimeFunctionsTest, weekOfYear) { + const auto weekOfYear = [&](const char* dateString) { + auto date = std::make_optional(parseDate(dateString)); + return evaluateOnce("week_of_year(c0)", {date}, {DATE()}) + .value(); + }; + + EXPECT_EQ(1, weekOfYear("1919-12-31")); + EXPECT_EQ(1, weekOfYear("1920-01-01")); + EXPECT_EQ(1, weekOfYear("1920-01-04")); + EXPECT_EQ(2, weekOfYear("1920-01-05")); + EXPECT_EQ(53, weekOfYear("1960-01-01")); + EXPECT_EQ(53, weekOfYear("1960-01-03")); + EXPECT_EQ(1, weekOfYear("1960-01-04")); + EXPECT_EQ(1, weekOfYear("1969-12-31")); + EXPECT_EQ(1, weekOfYear("1970-01-01")); + EXPECT_EQ(1, weekOfYear("0001-01-01")); + EXPECT_EQ(52, weekOfYear("9999-12-31")); + EXPECT_EQ(8, weekOfYear("2008-02-20")); + EXPECT_EQ(15, weekOfYear("2015-04-08")); + EXPECT_EQ(15, weekOfYear("2013-04-08")); +} + TEST_F(DateTimeFunctionsTest, unixTimestamp) { const auto unixTimestamp = [&](std::optional dateStr) { return evaluateOnce("unix_timestamp(c0)", dateStr);