diff --git a/CMake/resolve_dependency_modules/boost.cmake b/CMake/resolve_dependency_modules/boost.cmake index bdf6633d0375..842cba6f1334 100644 --- a/CMake/resolve_dependency_modules/boost.cmake +++ b/CMake/resolve_dependency_modules/boost.cmake @@ -13,30 +13,13 @@ # limitations under the License. include_guard(GLOBAL) -if(CMAKE_SYSTEM_NAME MATCHES "Darwin") - if(ON_APPLE_M1) - list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c") - else() - list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c") - endif() -endif() - -# ICU is only needed with Boost build from source -set_source(ICU) -resolve_dependency( - ICU - COMPONENTS - data - i18n - io - uc - tu - test) - add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/boost) -if(${ICU_SOURCE} STREQUAL "BUNDLED") - # ensure ICU is built before Boost - add_dependencies(boost_regex ICU ICU::i18n) + +if(ICU_SOURCE) + if(${ICU_SOURCE} STREQUAL "BUNDLED") + # ensure ICU is built before Boost + add_dependencies(boost_regex ICU ICU::i18n) + endif() endif() # This prevents system boost from leaking in diff --git a/CMakeLists.txt b/CMakeLists.txt index 97ba92170c0f..168eee277044 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -385,6 +385,25 @@ endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +if(CMAKE_SYSTEM_NAME MATCHES "Darwin") + if(ON_APPLE_M1) + list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c") + else() + list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c") + endif() +endif() + +set_source(ICU) +resolve_dependency( + ICU + COMPONENTS + data + i18n + io + uc + tu + test) + set(BOOST_INCLUDE_LIBRARIES atomic context diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh index 5874e98e9651..5822d902e848 100755 --- a/scripts/setup-ubuntu.sh +++ b/scripts/setup-ubuntu.sh @@ -92,7 +92,6 @@ function install_velox_deps_from_apt { libc-ares-dev \ libcurl4-openssl-dev \ libssl-dev \ - libicu-dev \ libdouble-conversion-dev \ libgoogle-glog-dev \ libbz2-dev \ @@ -111,6 +110,7 @@ function install_velox_deps_from_apt { flex \ libfl-dev \ tzdata + ${SUDO} apt remove -y libicu-dev } function install_fmt { @@ -297,4 +297,3 @@ function install_apt_deps { fi fi ) - diff --git a/velox/functions/lib/CMakeLists.txt b/velox/functions/lib/CMakeLists.txt index bdff97bba2d9..5cee3af311d1 100644 --- a/velox/functions/lib/CMakeLists.txt +++ b/velox/functions/lib/CMakeLists.txt @@ -24,7 +24,8 @@ velox_link_libraries(velox_functions_util velox_vector velox_common_base) velox_add_library(velox_functions_lib_date_time_formatter DateTimeFormatter.cpp DateTimeFormatterBuilder.cpp) -velox_link_libraries(velox_functions_lib_date_time_formatter velox_type_tz) +velox_link_libraries(velox_functions_lib_date_time_formatter velox_type_tz + ICU::i18n ICU::uc) velox_add_library( velox_functions_lib diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index ec4f973213ad..5676a63efc8c 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1209,10 +1209,10 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { // https://en.wikipedia.org/wiki/List_of_tz_database_time_zones size += 5; } else { - VELOX_NYI( - "Date format specifier is not yet implemented: {} ({})", - getSpecifierName(token.pattern.specifier), - token.pattern.minRepresentDigits); + // The longest time zone long name is 40, Australian Central Western + // Standard Time. + // https://www.timeanddate.com/time/zones/ + size += 50; } break; @@ -1468,8 +1468,11 @@ int32_t DateTimeFormatter::format( std::memcpy(result, abbrev.data(), abbrev.length()); result += abbrev.length(); } else { - // TODO: implement full name time zone - VELOX_NYI("full time zone name is not yet supported"); + std::string longName = timezone->getLongName( + std::chrono::milliseconds(timestamp.toMillis()), + tz::TimeZone::TChoose::kEarliest); + std::memcpy(result, longName.data(), longName.length()); + result += longName.length(); } } break; diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index afa4f5a8ac91..30090173d8f8 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3272,6 +3272,12 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zzz")); EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zz")); EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + EXPECT_EQ( + "India Standard Time", + formatDatetime(parseTimestamp("1970-01-01"), "zzzz")); + EXPECT_EQ( + "India Standard Time", + formatDatetime(parseTimestamp("1970-01-01"), "zzzzzzzzzzzzzzzzzzzzzz")); // Test daylight savings. setQueryTimeZone("America/Los_Angeles"); @@ -3281,16 +3287,47 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-03-10 03:00"), "z")); EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-11-03 01:00"), "z")); EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-11-03 02:00"), "z")); + EXPECT_EQ( + "Pacific Standard Time", + formatDatetime(parseTimestamp("1970-01-01"), "zzzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("1970-10-01"), "zzzz")); + EXPECT_EQ( + "Pacific Standard Time", + formatDatetime(parseTimestamp("2024-03-10 01:00"), "zzzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("2024-03-10 03:00"), "zzzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("2024-11-03 01:00"), "zzzz")); + EXPECT_EQ( + "Pacific Standard Time", + formatDatetime(parseTimestamp("2024-11-03 02:00"), "zzzz")); + + // Test ambiguous time. + EXPECT_EQ( + "PDT", formatDatetime(parseTimestamp("2024-11-03 01:30:00"), "zzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("2024-11-03 01:30:00"), "zzzz")); // Test a long abbreviation. setQueryTimeZone("Asia/Colombo"); EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-10-01"), "z")); + EXPECT_EQ( + "India Standard Time", + formatDatetime(parseTimestamp("1970-10-01"), "zzzz")); - setQueryTimeZone("Asia/Kolkata"); - // We don't support more than 3 'z's yet. - EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + // Test a long long name. + setQueryTimeZone("Australia/Eucla"); + EXPECT_EQ("+0845", formatDatetime(parseTimestamp("1970-10-01"), "z")); + EXPECT_EQ( + "Australian Central Western Standard Time", + formatDatetime(parseTimestamp("1970-10-01"), "zzzz")); + setQueryTimeZone("Asia/Kolkata"); // Literal test cases. EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'")); EXPECT_EQ("'", formatDatetime(parseTimestamp("1970-01-01"), "''")); diff --git a/velox/type/tz/CMakeLists.txt b/velox/type/tz/CMakeLists.txt index b7d96feb1262..614b594000a7 100644 --- a/velox/type/tz/CMakeLists.txt +++ b/velox/type/tz/CMakeLists.txt @@ -23,4 +23,6 @@ velox_link_libraries( velox_external_date Boost::regex fmt::fmt - Folly::folly) + Folly::folly + ICU::i18n + ICU::uc) diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 2b7e8968ed04..b68c88c6be04 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -20,6 +20,16 @@ #include #include #include + +#include +#include +#include + +// The ICU libraries define TRUE/FALSE macros which frequently conflict with +// other libraries that use these as enum/variable names. +#undef TRUE +#undef FALSE + #include "velox/common/base/Exceptions.h" #include "velox/common/testutil/TestValue.h" #include "velox/external/date/tz.h" @@ -379,4 +389,48 @@ std::string TimeZone::getShortName( return getZonedTime(tz_, timePoint, choose).get_info().abbrev; } + +std::string TimeZone::getLongName( + TimeZone::milliseconds timestamp, + TimeZone::TChoose choose) const { + static const icu::Locale locale("en", "US"); + + validateRange(date::sys_time(timestamp)); + + // Time zone offsets only have one name. + if (tz_ == nullptr) { + return timeZoneName_; + } + + // Special case for UTC. ICU uses "GMT" for some reason which is an + // abbreviation. + if (timeZoneID_ == 0) { + return "Coordinated Universal Time"; + } + + // Get the ICU TimeZone by name + std::unique_ptr tz(icu::TimeZone::createTimeZone( + icu::UnicodeString(timeZoneName_.data(), timeZoneName_.length()))); + VELOX_USER_CHECK_NOT_NULL(tz); + + // According to the documentation this is how to determine if DST applies to + // a given timestamp in a given time zone. + // https://howardhinnant.github.io/date/tz.html#sys_info + date::local_time timePoint{timestamp}; + bool isDst = getZonedTime(tz_, timePoint, choose).get_info().save != + std::chrono::minutes(0); + + // Construct the long name for the time zone. + // Note that ICU does not have DST information for many time zones prior to + // 1970, so it's important to specify it explicitly. + icu::UnicodeString longName; + tz->getDisplayName( + isDst, icu::TimeZone::EDisplayType::LONG, locale, longName); + + // Convert the UnicodeString back to a string and write it out + std::string longNameStr; + longName.toUTF8String(longNameStr); + + return longNameStr; +} } // namespace facebook::velox::tz diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index c7e5c0f6087e..c04cc308657b 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -160,6 +160,14 @@ class TimeZone { milliseconds timestamp, TChoose choose = TChoose::kFail) const; + /// Returns the long name of the time zone for the given timestamp, e.g. + /// Pacific Standard Time. Note that the timestamp is needed for time zones + /// that support daylight savings time as the long name will change depending + /// on the date (e.g. Pacific Standard Time vs Pacific Daylight Time). + std::string getLongName( + milliseconds timestamp, + TChoose choose = TChoose::kFail) const; + private: const date::time_zone* tz_{nullptr}; const std::chrono::minutes offset_{0}; diff --git a/velox/type/tz/tests/TimeZoneMapTest.cpp b/velox/type/tz/tests/TimeZoneMapTest.cpp index 0d0fa21cb43c..364fd2baf297 100644 --- a/velox/type/tz/tests/TimeZoneMapTest.cpp +++ b/velox/type/tz/tests/TimeZoneMapTest.cpp @@ -259,5 +259,29 @@ TEST(TimeZoneMapTest, getShortName) { EXPECT_EQ("PST", toShortName("America/Los_Angeles", ts)); } +TEST(TimeZoneMapTest, getLongName) { + auto toLongName = [&](std::string_view name, size_t ts) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->getLongName(milliseconds{ts}); + }; + + // Test an offset that maps to an actual time zone. + EXPECT_EQ("Coordinated Universal Time", toLongName("+00:00", 0)); + + // Test offsets that do not map to named time zones. + EXPECT_EQ("+00:01", toLongName("+00:01", 0)); + EXPECT_EQ("-00:01", toLongName("-00:01", 0)); + EXPECT_EQ("+01:00", toLongName("+01:00", 0)); + EXPECT_EQ("-01:01", toLongName("-01:01", 0)); + + // In "2024-07-25", America/Los_Angeles was in daylight savings time (UTC-07). + size_t ts = 1721890800000; + EXPECT_EQ("Pacific Daylight Time", toLongName("America/Los_Angeles", ts)); + + // In "2024-01-01", it was not (UTC-08). + ts = 1704096000000; + EXPECT_EQ("Pacific Standard Time", toLongName("America/Los_Angeles", ts)); +} } // namespace } // namespace facebook::velox::tz