From 19113642f4bd1b78de1db81f3303a98c7ccc3398 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Mon, 28 Oct 2024 10:50:06 -0700 Subject: [PATCH] Add support for z, zz, zzz in format_datetime (#11323) Summary: This diff adds support for JODA's z, zz, zzz patterns (all equivalent) in Presto's forma_datetime function. This is used to format time zone abbreviations. Reviewed By: pedroerp Differential Revision: D64774281 --- velox/functions/lib/DateTimeFormatter.cpp | 31 +++++++++--- .../prestosql/tests/DateTimeFunctionsTest.cpp | 33 ++++++++++--- velox/type/tz/TimeZoneMap.cpp | 43 +--------------- velox/type/tz/TimeZoneMap.h | 49 ++++++++++++++++++- 4 files changed, 99 insertions(+), 57 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 89215fe91704..ec4f973213ad 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1203,10 +1203,17 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { size += std::max((int)token.pattern.minRepresentDigits, 9); break; case DateTimeFormatSpecifier::TIMEZONE: - VELOX_NYI( - "Date format specifier is not yet implemented: {} ({})", - getSpecifierName(token.pattern.specifier), - token.pattern.minRepresentDigits); + if (token.pattern.minRepresentDigits <= 3) { + // The longest abbreviation according to here is 5, e.g. some time + // zones use the offset as the abbreviation, like +0530. + // https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + size += 5; + } else { + VELOX_NYI( + "Date format specifier is not yet implemented: {} ({})", + getSpecifierName(token.pattern.specifier), + token.pattern.minRepresentDigits); + } break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: @@ -1451,9 +1458,19 @@ int32_t DateTimeFormatter::format( } break; case DateTimeFormatSpecifier::TIMEZONE: { - // TODO: implement short name time zone, need a map from full name to - // short name - VELOX_UNSUPPORTED("time zone name is not yet supported"); + VELOX_USER_CHECK_NOT_NULL( + timezone, + "The time zone cannot be formatted if it is not present."); + if (token.pattern.minRepresentDigits <= 3) { + const std::string& abbrev = timezone->getShortName( + std::chrono::milliseconds(timestamp.toMillis()), + tz::TimeZone::TChoose::kEarliest); + std::memcpy(result, abbrev.data(), abbrev.length()); + result += abbrev.length(); + } else { + // TODO: implement full name time zone + VELOX_NYI("full time zone name is not yet supported"); + } } break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: { diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 97807df92a0e..afa4f5a8ac91 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3269,6 +3269,28 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_EQ("+05:30", formatDatetime(parseTimestamp("1970-01-01"), "ZZ")); EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-01-01"), "Z")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zzz")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zz")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + + // Test daylight savings. + setQueryTimeZone("America/Los_Angeles"); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + EXPECT_EQ("PDT", formatDatetime(parseTimestamp("1970-10-01"), "z")); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-03-10 01:00"), "z")); + EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-03-10 03:00"), "z")); + EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-11-03 01:00"), "z")); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-11-03 02:00"), "z")); + + // Test a long abbreviation. + setQueryTimeZone("Asia/Colombo"); + EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-10-01"), "z")); + + setQueryTimeZone("Asia/Kolkata"); + // We don't support more than 3 'z's yet. + EXPECT_THROW( + formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + // Literal test cases. EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'")); EXPECT_EQ("'", formatDatetime(parseTimestamp("1970-01-01"), "''")); @@ -3313,15 +3335,14 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_THROW( formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError); - // System errors for patterns we haven't implemented yet. + // Time zone name patterns aren't supported when there isn't a time zone + // available. EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxRuntimeError); - EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxUserError); } TEST_F(DateTimeFunctionsTest, formatDateTimeTimezone) { diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 93dacadba677..8b5972118f75 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -20,9 +20,7 @@ #include #include #include -#include "velox/common/base/Exceptions.h" #include "velox/common/testutil/TestValue.h" -#include "velox/external/date/tz.h" using facebook::velox::common::testutil::TestValue; @@ -222,36 +220,8 @@ std::string normalizeTimeZone(const std::string& originalZoneId) { } return originalZoneId; } - -template -void validateRangeImpl(time_point timePoint) { - using namespace velox::date; - static constexpr auto kMinYear = date::year::min(); - static constexpr auto kMaxYear = date::year::max(); - - auto year = year_month_day(floor(timePoint)).year(); - - if (year < kMinYear || year > kMaxYear) { - // This is a special case where we intentionally throw - // VeloxRuntimeError to avoid it being suppressed by TRY(). - VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE( - "Timepoint is outside of supported year range: [{}, {}], got {}", - (int)kMinYear, - (int)kMaxYear, - (int)year); - } -} - } // namespace -void validateRange(time_point timePoint) { - validateRangeImpl(timePoint); -} - -void validateRange(time_point timePoint) { - validateRangeImpl(timePoint); -} - std::string getTimeZoneName(int64_t timeZoneID) { return locateZone(timeZoneID, true)->name(); } @@ -337,17 +307,7 @@ TimeZone::seconds TimeZone::to_sys( return (timePoint - offset_).time_since_epoch(); } - if (choose == TimeZone::TChoose::kFail) { - // By default, throws. - return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch(); - } - - auto dateChoose = (choose == TimeZone::TChoose::kEarliest) - ? date::choose::earliest - : date::choose::latest; - return date::zoned_time{tz_, timePoint, dateChoose} - .get_sys_time() - .time_since_epoch(); + return getZonedTime(timePoint, choose).get_sys_time().time_since_epoch(); } TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { @@ -360,5 +320,4 @@ TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { } return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch(); } - } // namespace facebook::velox::tz diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 9554d7506328..ea0a5f518d58 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -18,6 +18,8 @@ #include #include +#include "velox/common/base/Exceptions.h" +#include "velox/external/date/tz.h" namespace facebook::velox::date { class time_zone; @@ -68,8 +70,24 @@ int16_t getTimeZoneID(int32_t offsetMinutes); template using time_point = std::chrono::time_point; -void validateRange(time_point timePoint); -void validateRange(time_point timePoint); +template +void validateRange(time_point timePoint) { + using namespace velox::date; + static constexpr auto kMinYear = date::year::min(); + static constexpr auto kMaxYear = date::year::max(); + + auto year = year_month_day(floor(timePoint)).year(); + + if (year < kMinYear || year > kMaxYear) { + // This is a special case where we intentionally throw + // VeloxRuntimeError to avoid it being suppressed by TRY(). + VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE( + "Timepoint is outside of supported year range: [{}, {}], got {}", + (int)kMinYear, + (int)kMaxYear, + (int)year); + } +} /// TimeZone is the proxy object for time zone management. It provides access to /// time zone names, their IDs (as defined in TimeZoneDatabase.cpp and @@ -151,7 +169,34 @@ class TimeZone { return tz_; } + /// Returns the short name (abbreviation) of the time zone for the given + /// timestamp. Note that the timestamp is needed for time zones that support + /// daylight savings time as the short name will change depending on the date + /// (e.g. PST/PDT). + template + std::string getShortName(TDuration timestamp, TChoose choose = TChoose::kFail) + const { + date::local_time timePoint{timestamp}; + validateRange(date::sys_time(timestamp)); + + return getZonedTime(timePoint, choose).get_info().abbrev; + } + private: + template + date::zoned_time getZonedTime( + date::local_time timestamp, + TChoose choose) const { + if (choose == TChoose::kFail) { + // By default, throws. + return date::zoned_time{tz_, timestamp}; + } + + auto dateChoose = (choose == TChoose::kEarliest) ? date::choose::earliest + : date::choose::latest; + return date::zoned_time{tz_, timestamp, dateChoose}; + } + const date::time_zone* tz_{nullptr}; const std::chrono::minutes offset_{0}; const std::string timeZoneName_;