From f28c9287b335263c249405b09fa5ec2efcd6ae5f Mon Sep 17 00:00:00 2001 From: birdstorm Date: Fri, 11 Nov 2022 18:39:25 +0800 Subject: [PATCH 1/8] support extract datetime from string Signed-off-by: birdstorm --- dbms/src/Common/MyDuration.h | 12 +- dbms/src/Common/MyTime.cpp | 442 ++++++++++++++---- dbms/src/Common/MyTime.h | 7 +- dbms/src/Common/StringUtils/StringUtils.h | 96 ++++ dbms/src/Flash/Coprocessor/DAGUtils.cpp | 1 + dbms/src/Functions/FunctionsDateTime.cpp | 1 + dbms/src/Functions/FunctionsDateTime.h | 306 ++++++++++-- dbms/src/Functions/FunctionsDuration.h | 93 +--- .../tests/gtest_datetime_extract.cpp | 107 ++--- .../expr/extract_datetime_from_string.test | 26 ++ 10 files changed, 789 insertions(+), 302 deletions(-) create mode 100644 tests/fullstack-test/expr/extract_datetime_from_string.test diff --git a/dbms/src/Common/MyDuration.h b/dbms/src/Common/MyDuration.h index 8c454d23d9f..5c5798abb29 100644 --- a/dbms/src/Common/MyDuration.h +++ b/dbms/src/Common/MyDuration.h @@ -41,6 +41,12 @@ class MyDuration static constexpr Int64 NANOS_PER_MINUTE = 60 * NANOS_PER_SECOND; static constexpr Int64 NANOS_PER_HOUR = 60 * NANOS_PER_MINUTE; + static const int8_t DefaultFsp = 6; + + Int64 nanos; + UInt8 fsp; + +public: static constexpr Int64 MAX_HOUR_PART = 838; static constexpr Int64 MAX_MINUTE_PART = 59; static constexpr Int64 MAX_SECOND_PART = 59; @@ -48,12 +54,6 @@ class MyDuration static constexpr Int64 MAX_NANOS = MAX_HOUR_PART * NANOS_PER_HOUR + MAX_MINUTE_PART * NANOS_PER_MINUTE + MAX_SECOND_PART * NANOS_PER_SECOND + MAX_MICRO_PART * NANOS_PER_MICRO; static_assert(MAX_NANOS > 0); - static const int8_t DefaultFsp = 6; - - Int64 nanos; - UInt8 fsp; - -public: MyDuration() = default; explicit MyDuration(Int64 nanos_) : nanos(nanos_) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 49e061968f7..4b60d919378 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -262,35 +263,39 @@ bool isPunctuation(char c) return (c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B && c <= 0x7E); } -std::tuple, String, bool, String, String, String, String> splitDatetime(String format) +std::tuple, String, bool, String, String, String, String, bool> splitDatetime(String format) { std::vector seps; String frac; - bool has_tz = false; + bool has_tz = false, truncated = false; auto [tz_idx, tz_sign, tz_hour, tz_sep, tz_minute] = getTimeZone(format); if (tz_idx > 0) { has_tz = true; - while (tz_idx > 0 && isPunctuation(format[tz_idx - 1])) + for (; tz_idx > 0 && isPunctuation(format[tz_idx - 1]); tz_idx--) { // in case of multiple separators, e.g. 2020-10--10 - tz_idx--; } format = format.substr(0, tz_idx); } - int frac_idx = getFracIndex(format); + size_t frac_idx = getFracIndex(format); if (frac_idx > 0) { - frac = format.substr(frac_idx + 1); - while (frac_idx > 0 && isPunctuation(format[frac_idx - 1])) + size_t frac_end = frac_idx + 1; + for (; frac_end < format.length() && isdigit(format[frac_end]); frac_end++) + { + // + } + truncated = (frac_end != format.length()); + frac = format.substr(frac_idx + 1, frac_end); + for (; frac_idx > 0 && isPunctuation(format[frac_idx - 1]); frac_idx--) { // in case of multiple separators, e.g. 2020-10-10 11:00:00..123456 - frac_idx--; } format = format.substr(0, frac_idx); } seps = parseDateFormat(format); - return std::make_tuple(std::move(seps), std::move(frac), std::move(has_tz), std::move(tz_sign), std::move(tz_hour), std::move(tz_sep), std::move(tz_minute)); + return std::make_tuple(std::move(seps), std::move(frac), std::move(has_tz), std::move(tz_sign), std::move(tz_hour), std::move(tz_sep), std::move(tz_minute), std::move(truncated)); } @@ -677,6 +682,50 @@ inline bool numberToDateTime(Int64 number, MyDateTime & result, bool allowZeroDa return getDatetime(number, result); } +// returns frac, overflow, matched. eg., "999" fsp=2 will overflow. +std::tuple parseFrac(const String & str, int8_t fsp) +{ + if (str.empty()) + { + return {0, false, true}; + } + if (fsp == -1) + { + fsp = 6; + } + if (fsp < 0 || fsp > 6) + { + return {0, false, false}; + } + try + { + int len = str.length(); + if (fsp >= len) + { + UInt32 tmp = std::stoul(str); + return {tmp * std::pow(10, 6 - len), false, true}; + } + + // Round when fsp < string length. + UInt32 tmp = std::stoul(str.substr(0, fsp + 1)); + tmp = (tmp + 5) / 10; + if (tmp >= std::pow(10, fsp)) + { + // overflow + return {0, true, true}; + } + // Get the final frac, with 6 digit number + // 1236 round 3 -> 124 -> 124000 + // 0312 round 2 -> 3 -> 30000 + // 999 round 2 -> 100 -> overflow + return {tmp * std::pow(10, 6 - fsp), false, true}; + } + catch (std::exception & e) + { + return {0, false, false}; + } +} + // isFloat is true means that the input string is float format like "1212.111" std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp, CheckTimeFunc checkTimeFunc, bool isFloat) { @@ -686,17 +735,15 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t bool hhmmss = false; - auto [seps, frac_str, has_tz, tz_sign, tz_hour, tz_sep, tz_minute] = splitDatetime(str); - - bool truncated_or_incorrect = false; + auto [seps, frac_str, has_tz, tz_sign, tz_hour, tz_sep, tz_minute, truncated_or_incorrect] = splitDatetime(str); - // no_absorb tests if can absorb FSP or TZ - auto no_absorb = [](const std::vector & seps) { + // no_absorb tests if it can absorb FSP or TZ + auto no_absorb = [](const std::vector & s) { // if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed // or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can // not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because // if date part has no separators, we can't use -/+ as separators between date & time. - return seps.size() > 5 || (seps.size() == 1 && seps[0].size() > 4); + return s.size() > 5 || (s.size() == 1 && s[0].size() > 4); }; if (!frac_str.empty() && !isFloat) @@ -710,7 +757,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t if (has_tz && !tz_sign.empty()) { - // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), + // if tz_sign is empty, it's certain that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), // therefore we could safely skip this branch. if (!no_absorb(seps) && !(!tz_minute.empty() && tz_sep.empty())) { @@ -729,6 +776,10 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t switch (seps.size()) { + case 0: + { + return {Field(), is_date}; + } // No delimiter case 1: { @@ -736,16 +787,12 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t if (isFloat) { MyDateTime date_time(0); - if (seps[0] == "0") - { - return {date_time.toPackedUInt(), is_date}; - } if (numberToDateTime(std::stoll(seps[0]), date_time)) { return {Field(), is_date}; } std::tie(year, month, day, hour, minute, second) = std::tuple(date_time.year, date_time.month, date_time.day, date_time.hour, date_time.minute, date_time.second); - if (l >= 9 && l <= 14) + if (seps[0] == "0" || (l >= 9 && l <= 14)) { hhmmss = true; } @@ -822,33 +869,38 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t // We must handle float => string => datetime, the difference is that fractional // part of float type is discarded directly, while fractional part of string type // is parsed to HH:MM:SS. - int ret = 0; - switch (frac_str.size()) - { - case 0: - is_date = true; - break; - case 1: - case 2: - { - ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); //NOLINT(cert-err34-c): check conversion error manually - truncated_or_incorrect = (ret != 1); - break; - } - case 3: - case 4: - { - ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); //NOLINT(cert-err34-c): check conversion error manually - truncated_or_incorrect = (ret != 2); - break; - } - default: + if (!isFloat) { - ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); //NOLINT(cert-err34-c): check conversion error manually - truncated_or_incorrect = (ret != 3); - break; - } + // '20170118.123423' => 2017-01-18 12:34:23.234 + int ret = 0; + switch (frac_str.size()) + { + case 0: + is_date = true; + break; + case 1: + case 2: + { + ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); //NOLINT(cert-err34-c): check conversion error manually + truncated_or_incorrect = (ret != 1); + break; + } + case 3: + case 4: + { + ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); //NOLINT(cert-err34-c): check conversion error manually + truncated_or_incorrect = (ret != 2); + break; + } + default: + { + ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); //NOLINT(cert-err34-c): check conversion error manually + truncated_or_incorrect = (ret != 3); + break; + } + } } + // 20170118.123423 => 2017-01-18 00:00:00 } if (l == 9 || l == 10) { @@ -867,6 +919,10 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } break; } + case 2: + { + return {Field(), is_date}; + } case 3: { // YYYY-MM-DD @@ -900,23 +956,24 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } default: { - return {Field(), is_date}; + // For case like `2020-05-28 23:59:59 00:00:00`, the seps should be > 6, the reluctant parts should be truncated. + seps.resize(6); + if (!scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second})) + return {Field(), is_date}; + hhmmss = true; } } - // If str is sepereated by delimiters, the first one is year, and if the year is 2 digit, + // If str is separated by delimiters, the first one is year, and if the year is 2 digit, // we should adjust it. // TODO: adjust year is very complex, now we only consider the simplest way. if (seps[0].size() <= 2 && !isFloat) { - if (year == 0 && month == 0 && day == 0 && hour == 0 && minute == 0 && second == 0 && frac_str.empty()) - { - // Skip a special case "00-00-00". - } - else + if (!(year == 0 && month == 0 && day == 0 && hour == 0 && minute == 0 && second == 0 && frac_str.empty())) { year = adjustYear(year); } + // Skip a special case "00-00-00". } UInt32 micro_second = 0; @@ -924,34 +981,11 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t { // If input string is "20170118.999", without hhmmss, fsp is meaningless. // TODO: this case is not only meaningless, but erroneous, please confirm. - if (static_cast(fsp) >= frac_str.size()) - { - micro_second = std::stoul(frac_str); - micro_second = micro_second * std::pow(10, 6 - frac_str.size()); - } - else + bool overflow, matched; + std::tie(micro_second, overflow, matched) = parseFrac(frac_str, fsp); + if (!matched) { - auto result_frac = frac_str.substr(0, fsp + 1); - micro_second = std::stoul(result_frac); - micro_second = (micro_second + 5) / 10; - // Overflow - if (micro_second >= std::pow(10, fsp)) - { - MyDateTime datetime(year, month, day, hour, minute, second, 0); - UInt64 result = addSeconds(datetime.toPackedUInt(), 1); - MyDateTime result_datetime(result); - year = result_datetime.year; - month = result_datetime.month; - day = result_datetime.day; - hour = result_datetime.hour; - minute = result_datetime.minute; - second = result_datetime.second; - micro_second = 0; - } - else - { - micro_second = micro_second * std::pow(10, 6 - fsp); - } + return {Field(), is_date}; } } @@ -1008,6 +1042,254 @@ Field parseMyDateTimeFromFloat(const String & str, int8_t fsp, CheckTimeFunc che return parseMyDateTimeAndJudgeIsDate(str, fsp, checkTimeFunc, true).first; } +// returns fraction, overflow, matched +std::tuple matchFrac(String & str, int8_t fsp) +{ + if (str.empty() || str[0] != '.') + { + return {0, false, false}; + } + str = str.substr(1); + auto [result, idx, matched] = number(str); + if (!matched) + { + return {0, false, false}; + } + String digits = str.substr(0, idx); + str = str.substr(idx); + return parseFrac(digits, fsp); +} + +bool matchHHMMSSDelimited(String & str, int * hhmmss, bool requireColon) +{ + auto [hour, idx, matched] = number(str); + if (!matched) + { + return false; + } + String rest = str.substr(idx); + hhmmss[0] = hour; + + for (int i = 1; i < 3; i++) + { + auto [remain, matched1] = matchColon(rest); + if (matched1) + { + int num; + std::tie(num, idx, matched) = number(remain); + if (!matched) + { + return false; + } + hhmmss[i] = num; + rest = remain.substr(idx); + } + else + { + if (i == 1 && requireColon) + { + return false; + } + break; + } + } + + str = rest; + return true; +} + +bool matchHHMMSSCompact(String & str, int * hhmmss) +{ + auto [num, idx, matched] = number(str); + if (!matched) + { + return false; + } + hhmmss[0] = num / 10000; + hhmmss[1] = (num / 100) % 100; + hhmmss[2] = num % 100; + str = str.substr(idx); + return true; +} + +bool matchDayHHMMSS(String & str, int & day, int * hhmmss) +{ + // Day HH:MM:SS + auto [result, idx, matched] = number(str); + if (!matched) + { + return false; + } + day = result; + String rest; + std::tie(rest, matched) = space1(str.substr(idx)); + if (!matched) + { + return false; + } + str = rest; + return matchHHMMSSDelimited(str, hhmmss, false); +} + +void hhmmssAddOverflow(int * hhmmss, bool & overflow) +{ + int mod[3] = {-1, 60, 60}; + for (int i = 2; i >= 0 && overflow; i--) + { + hhmmss[i]++; + if (hhmmss[i] == mod[i]) + { + overflow = true; + hhmmss[i] = 0; + } + else + { + overflow = false; + } + } +} + +bool checkHHMMSS(const int * hhmmss) +{ + return hhmmss[1] < 60 && hhmmss[2] < 60; +} + +// duration, matched +std::pair matchDuration(const String & str, int8_t fsp) +{ + if (fsp < 0 || fsp > 6 || str.empty()) + { + return {MyDuration(), false}; + } + bool negative = str[0] == '-'; + String rest; + if (negative) + { + rest = str.substr(1); + } + else + { + rest = str; + } + rest = space0(rest); + int chars_len = rest.length(); + + int hhmmss[3] = {0, 0, 0}; + int day; + + if (matchDayHHMMSS(rest, day, hhmmss)) + { + hhmmss[0] += 24 * day; + } + else if (!matchHHMMSSDelimited(rest, hhmmss, true) && !matchHHMMSSCompact(rest, hhmmss)) + { + return {MyDuration(), false}; + } + + rest = space0(rest); + auto [frac, overflow, matched] = matchFrac(rest, fsp); + if (!matched || (rest.length() > 0 && chars_len >= 12)) + { + return {MyDuration(), false}; + } + + if (overflow) + { + hhmmssAddOverflow(hhmmss, overflow); + frac = 0; + } + + if (!checkHHMMSS(hhmmss)) + { + return {MyDuration(), false}; + } + + if (hhmmss[0] > MyDuration::MAX_HOUR_PART) + { + if (negative) + { + return {MyDuration(-MyDuration::MAX_NANOS, fsp), true}; + } + else + { + return {MyDuration(MyDuration::MAX_NANOS, fsp), true}; + } + } + + Int64 nanosecond = (hhmmss[0] * 3600 + hhmmss[1] * 60 + hhmmss[2]) * 1000000000LL + frac * 1000LL; + if (negative) + { + nanosecond = -nanosecond; + } + return {MyDuration(nanosecond, fsp), true}; +} + +bool canFallbackToDateTime(const String & str) +{ + auto [digits, rest, matched] = digit(str, 1); + if (!matched) + { + return false; + } + if (digits.length() == 12 || digits.length() == 14) + { + return true; + } + std::tie(rest, matched) = anyPunct(rest); + if (!matched) + { + return false; + } + String tmp; + std::tie(tmp, rest, matched) = digit(rest, 1); + if (!matched) + { + return false; + } + std::tie(rest, matched) = anyPunct(rest); + if (!matched) + { + return false; + } + std::tie(tmp, rest, matched) = digit(rest, 1); + if (!matched) + { + return false; + } + return !rest.empty() && (rest[0] == ' ' || rest[0] == 'T'); +} + +Field parseMyDuration(const String & str, int8_t fsp) +{ + String rest = Poco::trim(str); + auto [duration, matched] = matchDuration(rest, fsp); + if (matched) + { + return Field(duration.nanoSecond()); + } + if (!canFallbackToDateTime(rest)) + { + return Field(); + } + Field packed_uint_value = parseMyDateTime(rest, fsp); + if (packed_uint_value.isNull()) + { + return Field(); + } + UInt64 packed_uint = packed_uint_value.template safeGet(); + MyDateTime datetime(packed_uint); + duration = datetime.convertToMyDuration(fsp); + + return Field(duration.nanoSecond()); +} + +MyDuration MyDateTime::convertToMyDuration(int fsp) +{ + Int64 frac = micro_second * 1000LL; + // TODO: check convert validation + return MyDuration((hour * 3600LL + minute * 60LL + second) * 1000000000LL + frac, fsp); +} + String MyDateTime::toString(int fsp) const { const static String format = "%Y-%m-%d %H:%i:%s"; diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index cdfad663540..f5d422cc895 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -129,6 +130,8 @@ struct MyDateTime : public MyTimeBase String toString(int fsp) const; + MyDuration convertToMyDuration(int fsp); + static MyDateTime getSystemDateTimeByTimezone(const TimezoneInfo &, UInt8 fsp); }; @@ -178,7 +181,7 @@ struct MyDateTimeParser // Parsing method. Parse from ctx.view[ctx.pos]. // If success, update `datetime`, `ctx` and return true. - // If fail, return false. + // If fails, return false. using ParserCallback = std::function; std::vector parsers; }; @@ -197,6 +200,8 @@ Field parseMyDateTime(const String & str, int8_t fsp = DefaultFsp, CheckTimeFunc Field parseMyDateTimeFromFloat(const String & str, int8_t fsp = DefaultFsp, CheckTimeFunc checkTimeFunc = DefaultCheckTimeFunc); std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp = DefaultFsp, CheckTimeFunc checkTimeFunc = DefaultCheckTimeFunc, bool isFloat = DefaultIsFloat); +Field parseMyDuration(const String & str, int8_t fsp = DefaultFsp); + void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to, bool throw_exception = false); void convertTimeZoneByOffset(UInt64 from_time, UInt64 & to_time, bool from_utc, Int64 offset, bool throw_exception = false); diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 61b85f0912b..47c35a2fb2e 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -149,3 +149,99 @@ inline bool equalsCaseInsensitive(char a, char b) { return a == b || (isAlphaASCII(a) && alternateCaseIfAlphaASCII(a) == b); } + +inline bool isPunctuation(char c) +{ + return (c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B && c <= 0x7E); +} + +// match at lease x digits +inline std::tuple digit(const std::string & str, int x) +{ + for (size_t i = 0, times = x; i < times && i < str.length() && isdigit(str[i]); i++, x--) + { + // find x digit + } + if (x > 0) + { + return {"", str, false}; + } + size_t idx = 0; + for (; idx < str.length() && isdigit(str[idx]); idx++) + { + // remove all remaining spaces + } + return {str.substr(0, idx), str.substr(idx), true}; +} + +// remove at lease x spaces +inline std::pair space(const std::string & str, int x) +{ + for (size_t i = 0, times = x; i < times && i < str.length() && isspace(str[i]); i++, x--) + { + // find x spaces + } + if (x > 0) + { + return {str, false}; + } + size_t idx = 0; + for (; idx < str.length() && isspace(str[idx]); idx++) + { + // remove all remaining spaces + } + return {str.substr(idx), true}; +} + +// remove at least one space +inline std::pair space1(const std::string & str) +{ + return space(str, 1); +} + +// remove at least zero space +inline std::string space0(const std::string & str) +{ + return space(str, 0).first; +} + +inline std::pair anyPunct(const std::string & str) +{ + if (!str.empty() && isPunctuation(str[0])) + { + return {str.substr(1), true}; + } + return {str, false}; +} + +inline std::pair matchColon(const std::string & str) +{ + std::string rest = space0(str); + if (rest.empty() || rest[0] != ':') + { + return {str, false}; + } + rest = rest.substr(1); + rest = space0(rest); + return {rest, true}; +} + +// number, idx of rest string, matched +inline std::tuple number(const std::string & str) +{ + size_t idx = 0; + for (; idx < str.length() && isdigit(str[idx]); idx++) + { + // find digit string + } + std::string digits = str.substr(0, idx); + try + { + int result = std::stoi(digits); + return {result, idx, true}; + } + catch (std::exception & e) + { + return {0, 0, false}; + } +} \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index c2383c6698c..e130091e615 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -584,6 +584,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::FromUnixTime1Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::FromUnixTime2Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::ExtractDatetime, "extractMyDateTime"}, + {tipb::ScalarFuncSig::ExtractDatetimeFromString, "extractMyDateTimeFromString"}, {tipb::ScalarFuncSig::ExtractDuration, "extractMyDuration"}, //{tipb::ScalarFuncSig::AddDateStringString, "cast"}, diff --git a/dbms/src/Functions/FunctionsDateTime.cpp b/dbms/src/Functions/FunctionsDateTime.cpp index 4ff382ae28a..556701402e2 100644 --- a/dbms/src/Functions/FunctionsDateTime.cpp +++ b/dbms/src/Functions/FunctionsDateTime.cpp @@ -249,6 +249,7 @@ void registerFunctionsDateTime(FunctionFactory & factory) factory.registerFunction(FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index ef6bcce81aa..bfac6ccec36 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2753,8 +2754,6 @@ class FunctionExtractMyDateTime : public IFunction if (!arguments[0]->isString()) throw TiFlashException(fmt::format("First argument for function {} (unit) must be String", getName()), Errors::Coprocessor::BadRequest); - // TODO: Support Extract from string, see https://github.com/pingcap/tidb/issues/22700 - // if (!(arguments[1]->isString() || arguments[1]->isDateOrDateTime())) if (!arguments[1]->isMyDateOrMyDateTime()) throw TiFlashException( fmt::format("Illegal type {} of second argument of function {}. Must be DateOrDateTime.", arguments[1]->getName(), getName()), @@ -2814,42 +2813,282 @@ class FunctionExtractMyDateTime : public IFunction template static void dispatch(const ColumnPtr col_from, PaddedPODArray & vec_to) { - if (const auto * from = checkAndGetColumn(col_from.get()); from) + if (const auto * from = checkAndGetColumn(col_from.get()); from) { - const auto & data = from->getChars(); - const auto & offsets = from->getOffsets(); - if (checkColumnConst(from)) + const auto & data = from->getData(); + vectorDatetime(data, vec_to); + } + } + + template + static void vectorDatetime(const ColumnUInt64::Container & vec_from, PaddedPODArray & vec_to) + { + vec_to.resize(vec_from.size()); + for (size_t i = 0; i < vec_from.size(); i++) + { + vec_to[i] = F(vec_from[i]); + } + } +}; + +struct ExtractMyDurationImpl +{ + static Int64 signMultiplier(const MyDuration & duration) + { + return duration.isNeg() ? -1 : 1; + } + + static Int64 extractHour(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.hours(); + } + + static Int64 extractMinute(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.minutes(); + } + + static Int64 extractSecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.seconds(); + } + + static Int64 extractMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.microSecond(); + } + + static Int64 extractSecondMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.seconds() * 1000000LL + duration.microSecond()); + } + + static Int64 extractMinuteMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * ((duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); + } + + static Int64 extractMinuteSecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.minutes() * 100LL + duration.seconds()); + } + + static Int64 extractHourMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * ((duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); + } + + static Int64 extractHourSecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()); + } + + static Int64 extractHourMinute(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 100LL + duration.minutes()); + } + + static Int64 extractDayMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * ((duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); + } + + static Int64 extractDaySecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()); + } + + static Int64 extractDayMinute(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 100LL + duration.minutes()); + } + + static Int64 extractDayHour(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.hours(); + } +}; + +struct ExtractMyDateTimeFromStringImpl +{ + static Int64 extractDayMicrosecond(String dtStr) + { + Field duration_field = parseMyDuration(dtStr); + Int64 result = 0; + if (duration_field.isNull()) + { + // TODO: should return null + return 0; + } + MyDuration duration(duration_field.template safeGet()); + result = ExtractMyDurationImpl::extractDayMicrosecond(duration.nanoSecond()); + + Field datetime_field = parseMyDateTime(dtStr); + if (!datetime_field.isNull()) + { + MyDateTime datetime(datetime_field.template safeGet()); + if (datetime.hour == duration.hours() && datetime.minute == duration.minutes() && datetime.second == duration.seconds() && datetime.year > 0) { - StringRef string_ref(data.data(), offsets[0] - 1); - constantString(string_ref, from->size(), vec_to); + return ExtractMyDateTimeImpl::extractDayMicrosecond(datetime.toPackedUInt()); } - else + } + return result; + } + + static Int64 extractDaySecond(String dtStr) + { + Field duration_field = parseMyDuration(dtStr); + Int64 result = 0; + if (duration_field.isNull()) + { + // TODO: should return null + return 0; + } + MyDuration duration(duration_field.template safeGet()); + result = ExtractMyDurationImpl::extractDaySecond(duration.nanoSecond()); + + Field datetime_field = parseMyDateTime(dtStr); + if (!datetime_field.isNull()) + { + MyDateTime datetime(datetime_field.template safeGet()); + if (datetime.hour == duration.hours() && datetime.minute == duration.minutes() && datetime.second == duration.seconds() && datetime.year > 0) { - vectorString(data, offsets, vec_to); + return ExtractMyDateTimeImpl::extractDaySecond(datetime.toPackedUInt()); } } - else if (const auto * from = checkAndGetColumn(col_from.get()); from) + return result; + } + + static Int64 extractDayMinute(String dtStr) + { + Field duration_field = parseMyDuration(dtStr); + Int64 result = 0; + if (duration_field.isNull()) { - const auto & data = from->getData(); - if (checkColumnConst(from)) + // TODO: should return null + return 0; + } + MyDuration duration(duration_field.template safeGet()); + result = ExtractMyDurationImpl::extractDayMinute(duration.nanoSecond()); + + Field datetime_field = parseMyDateTime(dtStr); + if (!datetime_field.isNull()) + { + MyDateTime datetime(datetime_field.template safeGet()); + if (datetime.hour == duration.hours() && datetime.minute == duration.minutes() && datetime.second == duration.seconds() && datetime.year > 0) { - constantDatetime(from->getUInt(0), from->size(), vec_to); + return ExtractMyDateTimeImpl::extractDayMinute(datetime.toPackedUInt()); } - else + } + return result; + } + + static Int64 extractDayHour(String dtStr) + { + Field duration_field = parseMyDuration(dtStr); + Int64 result = 0; + if (duration_field.isNull()) + { + // TODO: should return null + return 0; + } + MyDuration duration(duration_field.template safeGet()); + result = ExtractMyDurationImpl::extractDayHour(duration.nanoSecond()); + + Field datetime_field = parseMyDateTime(dtStr); + if (!datetime_field.isNull()) + { + MyDateTime datetime(datetime_field.template safeGet()); + if (datetime.hour == duration.hours() && datetime.minute == duration.minutes() && datetime.second == duration.seconds() && datetime.year > 0) { - vectorDatetime(data, vec_to); + return ExtractMyDateTimeImpl::extractDayHour(datetime.toPackedUInt()); } } + return result; + } +}; + +class FunctionExtractMyDateTimeFromString : public IFunction +{ +public: + static constexpr auto name = "extractMyDateTimeFromString"; + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!arguments[0]->isString()) + throw TiFlashException(fmt::format("First argument for function {} (unit) must be String", getName()), Errors::Coprocessor::BadRequest); + + if (!arguments[1]->isString()) + throw TiFlashException( + fmt::format("Illegal type {} of second argument of function {}. Must be String.", arguments[1]->getName(), getName()), + Errors::Coprocessor::BadRequest); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const auto * unit_column = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); + if (!unit_column) + throw TiFlashException( + fmt::format("First argument for function {} must be constant String", getName()), + Errors::Coprocessor::BadRequest); + + String unit = Poco::toLower(unit_column->getValue()); + + auto col_from = block.getByPosition(arguments[1]).column; + + size_t rows = block.rows(); + auto col_to = ColumnInt64::create(rows); + auto & vec_to = col_to->getData(); + + if (unit == "day_microsecond") + dispatch(col_from, vec_to); + else if (unit == "day_second") + dispatch(col_from, vec_to); + else if (unit == "day_minute") + dispatch(col_from, vec_to); + else if (unit == "day_hour") + dispatch(col_from, vec_to); + else + throw TiFlashException(fmt::format("Function {} does not support '{}' unit", getName(), unit), Errors::Coprocessor::BadRequest); + + block.getByPosition(result).column = std::move(col_to); } +private: + using Func = Int64 (*)(String); + template - static void constantString(const StringRef & from, size_t size, PaddedPODArray & vec_to) + static void dispatch(const ColumnPtr col_from, PaddedPODArray & vec_to) { - vec_to.resize(size); - auto from_value = get(parseMyDateTime(from.toString())); - for (size_t i = 0; i < size; ++i) + if (const auto * from = checkAndGetColumn(col_from.get()); from) { - vec_to[i] = F(from_value); + const auto & data = from->getChars(); + const auto & offsets = from->getOffsets(); + vectorString(data, offsets, vec_to); } } @@ -2859,38 +3098,17 @@ class FunctionExtractMyDateTime : public IFunction const ColumnString::Offsets & offsets_from, PaddedPODArray & vec_to) { - vec_to.resize(offsets_from.size() + 1); + vec_to.resize(offsets_from.size()); size_t current_offset = 0; for (size_t i = 0; i < offsets_from.size(); i++) { size_t next_offset = offsets_from[i]; size_t string_size = next_offset - current_offset - 1; StringRef string_value(&vec_from[current_offset], string_size); - auto packed_value = get(parseMyDateTime(string_value.toString())); - vec_to[i] = F(packed_value); + vec_to[i] = F(string_value.toString()); current_offset = next_offset; } } - - template - static void constantDatetime(const UInt64 & from, size_t size, PaddedPODArray & vec_to) - { - vec_to.resize(size); - for (size_t i = 0; i < size; ++i) - { - vec_to[i] = F(from); - } - } - - template - static void vectorDatetime(const ColumnUInt64::Container & vec_from, PaddedPODArray & vec_to) - { - vec_to.resize(vec_from.size()); - for (size_t i = 0; i < vec_from.size(); i++) - { - vec_to[i] = F(vec_from[i]); - } - } }; struct SysDateWithFsp diff --git a/dbms/src/Functions/FunctionsDuration.h b/dbms/src/Functions/FunctionsDuration.h index 1cb046b4d40..1a40c741856 100644 --- a/dbms/src/Functions/FunctionsDuration.h +++ b/dbms/src/Functions/FunctionsDuration.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -88,98 +89,6 @@ class FunctionMyDurationToSec : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override; }; -struct ExtractMyDurationImpl -{ - static Int64 signMultiplier(const MyDuration & duration) - { - return duration.isNeg() ? -1 : 1; - } - - static Int64 extractHour(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * duration.hours(); - } - - static Int64 extractMinute(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * duration.minutes(); - } - - static Int64 extractSecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * duration.seconds(); - } - - static Int64 extractMicrosecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * duration.microSecond(); - } - - static Int64 extractSecondMicrosecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * (duration.seconds() * 1000000LL + duration.microSecond()); - } - - static Int64 extractMinuteMicrosecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * ((duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); - } - - static Int64 extractMinuteSecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * (duration.minutes() * 100LL + duration.seconds()); - } - - static Int64 extractHourMicrosecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * ((duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); - } - - static Int64 extractHourSecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * (duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()); - } - - static Int64 extractHourMinute(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * (duration.hours() * 100LL + duration.minutes()); - } - - static Int64 extractDayMicrosecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * ((duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); - } - - static Int64 extractDaySecond(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * (duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()); - } - - static Int64 extractDayMinute(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * (duration.hours() * 100LL + duration.minutes()); - } - - static Int64 extractDayHour(Int64 nano) - { - MyDuration duration(nano); - return signMultiplier(duration) * duration.hours(); - } -}; - class FunctionExtractMyDuration : public IFunction { public: diff --git a/dbms/src/Functions/tests/gtest_datetime_extract.cpp b/dbms/src/Functions/tests/gtest_datetime_extract.cpp index d0ff74ac3e9..95f88a4a421 100644 --- a/dbms/src/Functions/tests/gtest_datetime_extract.cpp +++ b/dbms/src/Functions/tests/gtest_datetime_extract.cpp @@ -13,9 +13,7 @@ // limitations under the License. #include -#include #include -#include #include #include #include @@ -26,7 +24,6 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" -#include #pragma GCC diagnostic pop @@ -39,62 +36,41 @@ class TestDateTimeExtract : public DB::tests::FunctionTest { }; -// Disabled for now, since we haven't supported ExtractFromString yet -TEST_F(TestDateTimeExtract, DISABLED_ExtractFromString) +TEST_F(TestDateTimeExtract, ExtractFromString) try { - const Context context = TiFlashTestEnv::getContext(); - - auto & factory = FunctionFactory::instance(); - std::vector units{ - "year", - "quarter", - "month", - "week", - "day", "day_microsecond", "day_second", "day_minute", "day_hour", - "year_month", }; String datetime_value{"2021/1/29 12:34:56.123456"}; - std::vector results{2021, 1, 1, 4, 29, 29123456123456, 29123456, 291234, 2912, 202101}; + std::vector results{29123456123456, 29123456, 291234, 2912}; for (size_t i = 0; i < units.size(); ++i) { const auto & unit = units[i]; - Block block; - - MutableColumnPtr col_units = ColumnString::create(); - col_units->insert(Field(unit.c_str(), unit.size())); - col_units = ColumnConst::create(col_units->getPtr(), 1); - - auto col_datetime = ColumnString::create(); - col_datetime->insert(Field(datetime_value.data(), datetime_value.size())); - ColumnWithTypeAndName unit_ctn = ColumnWithTypeAndName(std::move(col_units), std::make_shared(), "unit"); - ColumnWithTypeAndName datetime_ctn - = ColumnWithTypeAndName(std::move(col_datetime), std::make_shared(), "datetime_value"); - block.insert(unit_ctn); - block.insert(datetime_ctn); - // for result from extract - block.insert({}); - - // test extract - auto func_builder_ptr = factory.tryGet("extractMyDateTime", context); - ASSERT_TRUE(func_builder_ptr != nullptr); + const auto & result = results[i]; + // nullable/non-null string + ASSERT_COLUMN_EQ(toNullableVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toNullableVec({datetime_value}))); + ASSERT_COLUMN_EQ(toVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toVec({datetime_value}))); + // const string + ASSERT_COLUMN_EQ(createConstColumn(1, result), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), createConstColumn(1, {datetime_value}))); + } - ColumnNumbers arg_cols_idx{0, 1}; - size_t res_col_idx = 2; - func_builder_ptr->build({unit_ctn, datetime_ctn})->execute(block, arg_cols_idx, res_col_idx); - const IColumn * ctn_res = block.getByPosition(res_col_idx).column.get(); - const auto * col_res = checkAndGetColumn(ctn_res); + datetime_value = "12:34:56.123456"; + results = {123456123456, 123456, 1234, 12}; - Field res_field; - col_res->get(0, res_field); - Int64 s = res_field.get(); - EXPECT_EQ(results[i], s); + for (size_t i = 0; i < units.size(); ++i) + { + const auto & unit = units[i]; + const auto & result = results[i]; + // nullable/non-null string + ASSERT_COLUMN_EQ(toNullableVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toNullableVec({datetime_value}))); + ASSERT_COLUMN_EQ(toVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toVec({datetime_value}))); + // const string + ASSERT_COLUMN_EQ(createConstColumn(1, result), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), createConstColumn(1, {datetime_value}))); } } CATCH @@ -102,10 +78,6 @@ CATCH TEST_F(TestDateTimeExtract, ExtractFromMyDateTime) try { - const Context context = TiFlashTestEnv::getContext(); - - auto & factory = FunctionFactory::instance(); - std::vector units{ "year", "quarter", @@ -124,37 +96,14 @@ try for (size_t i = 0; i < units.size(); ++i) { const auto & unit = units[i]; - Block block; - - MutableColumnPtr col_units = ColumnString::create(); - col_units->insert(Field(unit.c_str(), unit.size())); - col_units = ColumnConst::create(col_units->getPtr(), 1); - - auto col_datetime = ColumnUInt64::create(); - col_datetime->insert(Field(datetime_value.toPackedUInt())); - ColumnWithTypeAndName unit_ctn = ColumnWithTypeAndName(std::move(col_units), std::make_shared(), "unit"); - ColumnWithTypeAndName datetime_ctn - = ColumnWithTypeAndName(std::move(col_datetime), std::make_shared(), "datetime_value"); - - block.insert(unit_ctn); - block.insert(datetime_ctn); - // for result from extract - block.insert({}); - - // test extract - auto func_builder_ptr = factory.tryGet("extractMyDateTime", context); - ASSERT_TRUE(func_builder_ptr != nullptr); - - ColumnNumbers arg_cols_idx{0, 1}; - size_t res_col_idx = 2; - func_builder_ptr->build({unit_ctn, datetime_ctn})->execute(block, arg_cols_idx, res_col_idx); - const IColumn * ctn_res = block.getByPosition(res_col_idx).column.get(); - const auto * col_res = checkAndGetColumn(ctn_res); - - Field res_field; - col_res->get(0, res_field); - Int64 s = res_field.get(); - EXPECT_EQ(results[i], s); + const auto & result = results[i]; + // nullable/non-null datetime + ASSERT_COLUMN_EQ(toNullableVec({result}), executeFunction("extractMyDateTime", createConstColumn(1, {unit}), createDateTimeColumn({datetime_value}, 6))); + ASSERT_COLUMN_EQ(toVec({result}), executeFunction("extractMyDateTime", createConstColumn(1, {unit}), createDateTimeColumn({datetime_value}, 6))); + // const datetime + ASSERT_COLUMN_EQ(createConstColumn(1, result), executeFunction("extractMyDateTime", createConstColumn(1, {unit}), createDateTimeColumnConst(1, {datetime_value}, 6))); + // null + ASSERT_COLUMN_EQ(toNullableVec({std::nullopt}), executeFunction("extractMyDateTime", createConstColumn(1, {unit}), createDateTimeColumn({std::nullopt}, 6))); } } CATCH diff --git a/tests/fullstack-test/expr/extract_datetime_from_string.test b/tests/fullstack-test/expr/extract_datetime_from_string.test new file mode 100644 index 00000000000..2eae2baf23c --- /dev/null +++ b/tests/fullstack-test/expr/extract_datetime_from_string.test @@ -0,0 +1,26 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t +mysql> create table test.t (a varchar(50)) +mysql> insert into test.t values('2021-03-13 12:34:56.123456') +mysql> alter table test.t set tiflash replica 1 +func> wait_table test t + +mysql> set session tidb_isolation_read_engines='tiflash'; select a from test.t where extract(day_microsecond from a) = 13123456123456 and extract(day_second from a) = 13123456 and extract(day_minute from a) = 131234 and extract(day_hour from a) = 1312; +a +2021-03-13 12:34:56.123456 + + +mysql> drop table if exists test.t From a6df07d97742a144baa4f78c9a2f11485b355b43 Mon Sep 17 00:00:00 2001 From: birdstorm Date: Mon, 14 Nov 2022 02:17:32 +0800 Subject: [PATCH 2/8] format Signed-off-by: birdstorm --- dbms/src/Common/MyTime.cpp | 2 +- dbms/src/Functions/FunctionsDuration.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 4b60d919378..28edd49f7d4 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include +#include #include #include #include diff --git a/dbms/src/Functions/FunctionsDuration.h b/dbms/src/Functions/FunctionsDuration.h index 1a40c741856..b61193310dc 100644 --- a/dbms/src/Functions/FunctionsDuration.h +++ b/dbms/src/Functions/FunctionsDuration.h @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include #include From f625e3d080d97efbaed1dad6781e4cbcdd9729fb Mon Sep 17 00:00:00 2001 From: birdstorm Date: Wed, 16 Nov 2022 12:10:28 +0800 Subject: [PATCH 3/8] fix tests Signed-off-by: birdstorm --- dbms/src/Common/MyTime.cpp | 74 +++++++++++++++++++++++--------------- dbms/src/Common/MyTime.h | 2 +- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 28edd49f7d4..08fc2665d95 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -76,33 +76,18 @@ bool scanTimeArgs(const std::vector & seps, std::initializer_list return true; } -// find index of fractional point. -int getFracIndex(const String & format) -{ - int idx = -1; - for (int i = int(format.size()) - 1; i >= 0; i--) - { - if (std::ispunct(format[i])) - { - if (format[i] == '.') - { - idx = i; - } - break; - } - } - return idx; -} - // helper for date part splitting, punctuation characters are valid separators anywhere, // while space and 'T' are valid separators only between date and time. -bool isValidSeperator(char c, int previous_parts) +bool isValidSeparator(char c, int previous_parts) { if (isPunctuation(c)) return true; // for https://github.com/pingcap/tics/issues/4036 - return previous_parts == 2 && (c == 'T' || isWhitespaceASCII(c)); + if (previous_parts == 2 && (c == 'T' || isWhitespaceASCII(c))) + return true; + + return previous_parts > 4 && !isdigit(c); } std::vector parseDateFormat(String format) @@ -112,25 +97,31 @@ std::vector parseDateFormat(String format) if (format.empty()) return {}; - if (!std::isdigit(format[0]) || !std::isdigit(format[format.size() - 1])) + // Date format must start with number. + if (!std::isdigit(format[0])) { return {}; } + size_t start = 0; + // Initialize `seps` with capacity of 6. The input `format` is typically + // a date time of the form "2006-01-02 15:04:05", which has 6 numeric parts + // (the fractional second part is usually removed by `splitDateTime`). + // Setting `seps`'s capacity to 6 avoids reallocation in this common case. std::vector seps; seps.reserve(6); - size_t start = 0; - for (size_t i = 0; i < format.size(); i++) + + for (size_t i = 1; i < format.length() - 1; i++) { - if (isValidSeperator(format[i], seps.size())) + if (isValidSeparator(format[i], seps.size())) { int previous_parts = seps.size(); - seps.push_back(format.substr(start, i - start)); + seps.push_back(format.substr(start, i)); start = i + 1; - for (size_t j = i + 1; j < format.size(); j++) + for (size_t j = i + 1; j < format.length(); j++) { - if (!isValidSeperator(format[j], previous_parts)) + if (!isValidSeparator(format[j], previous_parts)) break; start++; i++; @@ -263,6 +254,32 @@ bool isPunctuation(char c) return (c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B && c <= 0x7E); } +// getFracIndex finds the last '.' for get fracStr, index = -1 means fracStr not found. +// but for format like '2019.01.01 00:00:00', the index should be -1. +// It will not be affected by the time zone suffix. For format like '2020-01-01 12:00:00.123456+05:00', the index should be 19. +int getFracIndex(const String & format) +{ + auto [tz_idx, tz_sign, tz_hour, tz_sep, tz_minute] = getTimeZone(format); + int end = format.length() - 1; + if (tz_idx != -1) + { + end = tz_idx - 1; + } + int idx = -1; + for (int i = end; i >= 0; i--) + { + if (isPunctuation(format[i])) + { + if (format[i] == '.') + { + idx = i; + } + break; + } + } + return idx; +} + std::tuple, String, bool, String, String, String, String, bool> splitDatetime(String format) { std::vector seps; @@ -278,9 +295,10 @@ std::tuple, String, bool, String, String, String, String, bo } format = format.substr(0, tz_idx); } - size_t frac_idx = getFracIndex(format); + int frac_idx = getFracIndex(format); if (frac_idx > 0) { + // Only contain digits size_t frac_end = frac_idx + 1; for (; frac_end < format.length() && isdigit(format[frac_end]); frac_end++) { diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index f5d422cc895..2a04e6574b8 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -227,7 +227,7 @@ inline time_t getEpochSecond(const MyDateTime & my_time, const DateLUTImpl & tim bool isPunctuation(char c); -bool isValidSeperator(char c, int previous_parts); +bool isValidSeparator(char c, int previous_parts); // Build CoreTime value with checking overflow of internal bit fields, return true if input is invalid. // Note that this function will not check if the input is logically a valid datetime value. From f9cadfa3d0d0870e03127afed2698093c5315e88 Mon Sep 17 00:00:00 2001 From: birdstorm Date: Wed, 16 Nov 2022 13:26:49 +0800 Subject: [PATCH 4/8] add overflow test Signed-off-by: birdstorm --- dbms/src/Common/MyTime.cpp | 78 +++++++++++++---------- dbms/src/Common/StringUtils/StringUtils.h | 2 +- dbms/src/Common/tests/gtest_mytime.cpp | 2 + 3 files changed, 46 insertions(+), 36 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 08fc2665d95..c00a41b375f 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -594,6 +594,39 @@ bool noNeedCheckTime(Int32, Int32, Int32, Int32, Int32, Int32) return true; } +UInt64 addSeconds(UInt64 t, Int64 delta) +{ + // todo support zero date + if (t == 0) + { + return t; + } + MyDateTime my_time(t); + Int64 current_second = my_time.hour * MyTimeBase::SECOND_IN_ONE_HOUR + my_time.minute * MyTimeBase::SECOND_IN_ONE_MINUTE + my_time.second; + current_second += delta; + if (current_second >= 0) + { + Int64 days = current_second / MyTimeBase::SECOND_IN_ONE_DAY; + current_second = current_second % MyTimeBase::SECOND_IN_ONE_DAY; + if (days != 0) + addDays(my_time, days); + } + else + { + Int64 days = (-current_second) / MyTimeBase::SECOND_IN_ONE_DAY; + if ((-current_second) % MyTimeBase::SECOND_IN_ONE_DAY != 0) + { + days++; + } + current_second += days * MyTimeBase::SECOND_IN_ONE_DAY; + addDays(my_time, -days); + } + my_time.hour = current_second / MyTimeBase::SECOND_IN_ONE_HOUR; + my_time.minute = (current_second % MyTimeBase::SECOND_IN_ONE_HOUR) / MyTimeBase::SECOND_IN_ONE_MINUTE; + my_time.second = current_second % MyTimeBase::SECOND_IN_ONE_MINUTE; + return my_time.toPackedUInt(); +} + // Return true if the time is invalid. inline bool getDatetime(const Int64 & num, MyDateTime & result) { @@ -995,11 +1028,12 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } UInt32 micro_second = 0; - if (hhmmss && !frac_str.empty()) + bool overflow; + if (hhmmss) { // If input string is "20170118.999", without hhmmss, fsp is meaningless. // TODO: this case is not only meaningless, but erroneous, please confirm. - bool overflow, matched; + bool matched; std::tie(micro_second, overflow, matched) = parseFrac(frac_str, fsp); if (!matched) { @@ -1014,6 +1048,13 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t MyDateTime result(year, month, day, hour, minute, second, micro_second); + if (overflow) + { + // fraction part overflow, add one second to result + MyDateTime tmp(addSeconds(result.toPackedUInt(), 1)); + result = tmp; + } + if (has_tz) { if (!hhmmss) @@ -1596,39 +1637,6 @@ bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & return false; } -UInt64 addSeconds(UInt64 t, Int64 delta) -{ - // todo support zero date - if (t == 0) - { - return t; - } - MyDateTime my_time(t); - Int64 current_second = my_time.hour * MyTimeBase::SECOND_IN_ONE_HOUR + my_time.minute * MyTimeBase::SECOND_IN_ONE_MINUTE + my_time.second; - current_second += delta; - if (current_second >= 0) - { - Int64 days = current_second / MyTimeBase::SECOND_IN_ONE_DAY; - current_second = current_second % MyTimeBase::SECOND_IN_ONE_DAY; - if (days != 0) - addDays(my_time, days); - } - else - { - Int64 days = (-current_second) / MyTimeBase::SECOND_IN_ONE_DAY; - if ((-current_second) % MyTimeBase::SECOND_IN_ONE_DAY != 0) - { - days++; - } - current_second += days * MyTimeBase::SECOND_IN_ONE_DAY; - addDays(my_time, -days); - } - my_time.hour = current_second / MyTimeBase::SECOND_IN_ONE_HOUR; - my_time.minute = (current_second % MyTimeBase::SECOND_IN_ONE_HOUR) / MyTimeBase::SECOND_IN_ONE_MINUTE; - my_time.second = current_second % MyTimeBase::SECOND_IN_ONE_MINUTE; - return my_time.toPackedUInt(); -} - void fillMonthAndDay(int day_num, int & month, int & day, const int * accumulated_days_per_month) { month = day_num / 31; diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 47c35a2fb2e..b66648cb827 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -244,4 +244,4 @@ inline std::tuple number(const std::string & str) { return {0, 0, false}; } -} \ No newline at end of file +} diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 10f356711c7..5f005adbe07 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -121,6 +121,8 @@ try { checkParseMyDateTime(str, expected, type_with_fraction); } + DataTypeMyDateTime tp(2); + checkParseMyDateTime("2010-12-31 23:59:59.99999", "2011-01-01 00:00:00.00", tp); } catch (Exception & e) { From 81c21db900b06e09c1a5ca6de46bfd8a28215a62 Mon Sep 17 00:00:00 2001 From: birdstorm Date: Wed, 16 Nov 2022 14:15:45 +0800 Subject: [PATCH 5/8] minor Signed-off-by: birdstorm --- dbms/src/Common/MyTime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index c00a41b375f..907cb735359 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1028,7 +1028,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } UInt32 micro_second = 0; - bool overflow; + bool overflow = false; if (hhmmss) { // If input string is "20170118.999", without hhmmss, fsp is meaningless. From a14fa4517c078a78239d9cc9bbe994cffa444e65 Mon Sep 17 00:00:00 2001 From: birdstorm Date: Thu, 17 Nov 2022 18:20:52 +0800 Subject: [PATCH 6/8] resolve comments Signed-off-by: birdstorm --- dbms/src/Common/MyTime.cpp | 6 --- dbms/src/Common/MyTime.h | 2 - dbms/src/Common/tests/gtest_mytime.cpp | 51 ++++++++++++++++++ dbms/src/Functions/FunctionsDateTime.h | 26 ++++----- .../tests/gtest_datetime_extract.cpp | 53 ++++++++++--------- 5 files changed, 92 insertions(+), 46 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 907cb735359..be5713554b3 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -248,12 +248,6 @@ std::tuple getTimeZone(const String & liter return std::make_tuple(-1, "", "", "", ""); } -// TODO: make unified helper -bool isPunctuation(char c) -{ - return (c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B && c <= 0x7E); -} - // getFracIndex finds the last '.' for get fracStr, index = -1 means fracStr not found. // but for format like '2019.01.01 00:00:00', the index should be -1. // It will not be affected by the time zone suffix. For format like '2020-01-01 12:00:00.123456+05:00', the index should be 19. diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index 2a04e6574b8..aa591c683a2 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -225,8 +225,6 @@ inline time_t getEpochSecond(const MyDateTime & my_time, const DateLUTImpl & tim return time_zone.makeDateTime(my_time.year, my_time.month, my_time.day, my_time.hour, my_time.minute, my_time.second); } -bool isPunctuation(char c); - bool isValidSeparator(char c, int previous_parts); // Build CoreTime value with checking overflow of internal bit fields, return true if input is invalid. diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 5f005adbe07..829c551411d 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -121,6 +121,57 @@ try { checkParseMyDateTime(str, expected, type_with_fraction); } + cases_with_fsp = { + {"2012-12-31 11:30:45", "2012-12-31 11:30:45"}, + {"0000-00-00 00:00:00", "0000-00-00 00:00:00"}, + {"0001-01-01 00:00:00", "0001-01-01 00:00:00"}, + {"00-12-31 11:30:45", "2000-12-31 11:30:45"}, + {"12-12-31 11:30:45", "2012-12-31 11:30:45"}, + {"2012-12-31", "2012-12-31 00:00:00"}, + {"20121231", "2012-12-31 00:00:00"}, + {"121231", "2012-12-31 00:00:00"}, + {"2012^12^31 11+30+45", "2012-12-31 11:30:45"}, + {"2012^12^31T11+30+45", "2012-12-31 11:30:45"}, + {"2012-2-1 11:30:45", "2012-02-01 11:30:45"}, + {"12-2-1 11:30:45", "2012-02-01 11:30:45"}, + {"20121231113045", "2012-12-31 11:30:45"}, + {"121231113045", "2012-12-31 11:30:45"}, + {"2012-02-29", "2012-02-29 00:00:00"}, + {"00-00-00", "0000-00-00 00:00:00"}, + // {"00-00-00 00:00:00.123", "2000-00-00 00:00:00.123"}, + {"11111111111", "2011-11-11 11:11:01"}, + {"1701020301.", "2017-01-02 03:01:00"}, + // {"1701020304.1", "2017-01-02 03:04:01.0"}, + // {"1701020302.11", "2017-01-02 03:02:11.00"}, + {"170102036", "2017-01-02 03:06:00"}, + {"170102039.", "2017-01-02 03:09:00"}, + // {"170102037.11", "2017-01-02 03:07:11.00"}, + {"2018-01-01 18", "2018-01-01 18:00:00"}, + {"18-01-01 18", "2018-01-01 18:00:00"}, + // {"2018.01.01", "2018-01-01 00:00:00.00"}, + // {"2020.10.10 10.10.10", "2020-10-10 10:10:10.00"}, + // {"2020-10-10 10-10.10", "2020-10-10 10:10:10.00"}, + // {"2020-10-10 10.10", "2020-10-10 10:10:00.00"}, + // {"2018.01.01", "2018-01-01 00:00:00.00"}, + {"2018.01.01 00:00:00", "2018-01-01 00:00:00"}, + {"2018/01/01-00:00:00", "2018-01-01 00:00:00"}, + {"4710072", "2047-10-07 02:00:00"}, + {"2016-06-01 00:00:00 00:00:00", "2016-06-01 00:00:00"}, + {"2020-06-01 00:00:00ads!,?*da;dsx", "2020-06-01 00:00:00"}, + + {"2020-05-28 23:59:59 00:00:00", "2020-05-28 23:59:59"}, + {"2020-05-28 23:59:59-00:00:00", "2020-05-28 23:59:59"}, + {"2020-05-28 23:59:59T T00:00:00", "2020-05-28 23:59:59"}, + {"2020-10-22 10:31-10:12", "2020-10-22 10:31:10"}, + {"2018.01.01 01:00:00", "2018-01-01 01:00:00"}, + + // {"2020-01-01 12:00:00.123456+05:00", "2020-01-01 07:00:00.123456"} + }; + DataTypeMyDateTime type_with_zero_fraction(0); + for (auto & [str, expected] : cases_with_fsp) + { + checkParseMyDateTime(str, expected, type_with_zero_fraction); + } DataTypeMyDateTime tp(2); checkParseMyDateTime("2010-12-31 23:59:59.99999", "2011-01-01 00:00:00.00", tp); } diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index bfac6ccec36..834e618707d 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -2752,12 +2752,12 @@ class FunctionExtractMyDateTime : public IFunction DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!arguments[0]->isString()) - throw TiFlashException(fmt::format("First argument for function {} (unit) must be String", getName()), Errors::Coprocessor::BadRequest); + throw Exception(fmt::format("First argument for function {} (unit) must be String", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (!arguments[1]->isMyDateOrMyDateTime()) - throw TiFlashException( + throw Exception( fmt::format("Illegal type {} of second argument of function {}. Must be DateOrDateTime.", arguments[1]->getName(), getName()), - Errors::Coprocessor::BadRequest); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(); } @@ -2769,9 +2769,9 @@ class FunctionExtractMyDateTime : public IFunction { const auto * unit_column = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!unit_column) - throw TiFlashException( + throw Exception( fmt::format("First argument for function {} must be constant String", getName()), - Errors::Coprocessor::BadRequest); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); String unit = Poco::toLower(unit_column->getValue()); @@ -2802,7 +2802,7 @@ class FunctionExtractMyDateTime : public IFunction else if (unit == "year_month") dispatch(col_from, vec_to); else - throw TiFlashException(fmt::format("Function {} does not support '{}' unit", getName(), unit), Errors::Coprocessor::BadRequest); + throw Exception(fmt::format("Function {} does not support '{}' unit", getName(), unit), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); block.getByPosition(result).column = std::move(col_to); } @@ -2824,7 +2824,7 @@ class FunctionExtractMyDateTime : public IFunction static void vectorDatetime(const ColumnUInt64::Container & vec_from, PaddedPODArray & vec_to) { vec_to.resize(vec_from.size()); - for (size_t i = 0; i < vec_from.size(); i++) + for (size_t i = 0; i < vec_from.size(); ++i) { vec_to[i] = F(vec_from[i]); } @@ -3035,12 +3035,12 @@ class FunctionExtractMyDateTimeFromString : public IFunction DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!arguments[0]->isString()) - throw TiFlashException(fmt::format("First argument for function {} (unit) must be String", getName()), Errors::Coprocessor::BadRequest); + throw Exception(fmt::format("First argument for function {} (unit) must be String", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (!arguments[1]->isString()) - throw TiFlashException( + throw Exception( fmt::format("Illegal type {} of second argument of function {}. Must be String.", arguments[1]->getName(), getName()), - Errors::Coprocessor::BadRequest); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(); } @@ -3052,9 +3052,9 @@ class FunctionExtractMyDateTimeFromString : public IFunction { const auto * unit_column = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!unit_column) - throw TiFlashException( + throw Exception( fmt::format("First argument for function {} must be constant String", getName()), - Errors::Coprocessor::BadRequest); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); String unit = Poco::toLower(unit_column->getValue()); @@ -3073,7 +3073,7 @@ class FunctionExtractMyDateTimeFromString : public IFunction else if (unit == "day_hour") dispatch(col_from, vec_to); else - throw TiFlashException(fmt::format("Function {} does not support '{}' unit", getName(), unit), Errors::Coprocessor::BadRequest); + throw Exception(fmt::format("Function {} does not support '{}' unit", getName(), unit), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); block.getByPosition(result).column = std::move(col_to); } diff --git a/dbms/src/Functions/tests/gtest_datetime_extract.cpp b/dbms/src/Functions/tests/gtest_datetime_extract.cpp index 95f88a4a421..62eebbfabc5 100644 --- a/dbms/src/Functions/tests/gtest_datetime_extract.cpp +++ b/dbms/src/Functions/tests/gtest_datetime_extract.cpp @@ -39,38 +39,41 @@ class TestDateTimeExtract : public DB::tests::FunctionTest TEST_F(TestDateTimeExtract, ExtractFromString) try { + auto test = [&](const std::vector & units, const String & datetime_value, const std::vector & results) { + for (size_t i = 0; i < units.size(); ++i) + { + const auto & unit = units[i]; + const auto & result = results[i]; + // nullable/non-null string + ASSERT_COLUMN_EQ(toNullableVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toNullableVec({datetime_value}))); + ASSERT_COLUMN_EQ(toVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toVec({datetime_value}))); + // const string + ASSERT_COLUMN_EQ(createConstColumn(1, result), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), createConstColumn(1, {datetime_value}))); + // null + ASSERT_COLUMN_EQ(toNullableVec({std::nullopt}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toNullableVec({std::nullopt}))); + } + }; + std::vector units{ "day_microsecond", "day_second", "day_minute", "day_hour", }; - String datetime_value{"2021/1/29 12:34:56.123456"}; - std::vector results{29123456123456, 29123456, 291234, 2912}; - - for (size_t i = 0; i < units.size(); ++i) - { - const auto & unit = units[i]; - const auto & result = results[i]; - // nullable/non-null string - ASSERT_COLUMN_EQ(toNullableVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toNullableVec({datetime_value}))); - ASSERT_COLUMN_EQ(toVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toVec({datetime_value}))); - // const string - ASSERT_COLUMN_EQ(createConstColumn(1, result), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), createConstColumn(1, {datetime_value}))); - } - - datetime_value = "12:34:56.123456"; - results = {123456123456, 123456, 1234, 12}; - - for (size_t i = 0; i < units.size(); ++i) + std::vector>> test_cases = { + {"2021/1/29 12:34:56.123456", {29123456123456, 29123456, 291234, 2912}}, + {"12:34:56.123456", {123456123456, 123456, 1234, 12}}, + {" \t\r2012^12^31T11+30+45 \n ", {31113045000000, 31113045, 311130, 3111}}, + {"20121231113045", {31113045000000, 31113045, 311130, 3111}}, + {"121231113045", {31113045000000, 31113045, 311130, 3111}}, + // {"1701020304.1", {2030401000000, 2030401, 20304, 203}}, + {"2018-01-01 18", {1180000000000, 1180000, 11800, 118}}, + {"18-01-01 18", {1180000000000, 1180000, 11800, 118}}, + {"2020-01-01 12:00:00.123456+05:00", {1070000123456, 1070000, 10700, 107}}, + }; + for (auto & [datetime_value, results] : test_cases) { - const auto & unit = units[i]; - const auto & result = results[i]; - // nullable/non-null string - ASSERT_COLUMN_EQ(toNullableVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toNullableVec({datetime_value}))); - ASSERT_COLUMN_EQ(toVec({result}), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), toVec({datetime_value}))); - // const string - ASSERT_COLUMN_EQ(createConstColumn(1, result), executeFunction("extractMyDateTimeFromString", createConstColumn(1, {unit}), createConstColumn(1, {datetime_value}))); + test(units, datetime_value, results); } } CATCH From 88eacebdcea7b574b757fa8a3e0189659b87d5ac Mon Sep 17 00:00:00 2001 From: birdstorm Date: Sat, 26 Nov 2022 16:43:56 +0800 Subject: [PATCH 7/8] resolve comments Signed-off-by: birdstorm --- dbms/src/Common/MyTime.cpp | 4 ++-- dbms/src/Common/StringUtils/StringUtils.h | 8 ++++---- dbms/src/Functions/FunctionsDateTime.h | 16 ++++++++-------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index be5713554b3..8fd2d28c7f9 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1208,7 +1208,7 @@ bool checkHHMMSS(const int * hhmmss) } // duration, matched -std::pair matchDuration(const String & str, int8_t fsp) +std::pair parseMyDurationImpl(const String & str, int8_t fsp) { if (fsp < 0 || fsp > 6 || str.empty()) { @@ -1315,7 +1315,7 @@ bool canFallbackToDateTime(const String & str) Field parseMyDuration(const String & str, int8_t fsp) { String rest = Poco::trim(str); - auto [duration, matched] = matchDuration(rest, fsp); + auto [duration, matched] = parseMyDurationImpl(rest, fsp); if (matched) { return Field(duration.nanoSecond()); diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index b66648cb827..72aec33b350 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -167,7 +167,7 @@ inline std::tuple digit(const std::string & str, return {"", str, false}; } size_t idx = 0; - for (; idx < str.length() && isdigit(str[idx]); idx++) + for (auto sz = str.length(); idx < sz && isdigit(str[idx]); idx++) { // remove all remaining spaces } @@ -177,7 +177,7 @@ inline std::tuple digit(const std::string & str, // remove at lease x spaces inline std::pair space(const std::string & str, int x) { - for (size_t i = 0, times = x; i < times && i < str.length() && isspace(str[i]); i++, x--) + for (size_t i = 0, times = x, sz = str.length(); i < times && i < sz && isspace(str[i]); i++, x--) { // find x spaces } @@ -186,7 +186,7 @@ inline std::pair space(const std::string & str, int x) return {str, false}; } size_t idx = 0; - for (; idx < str.length() && isspace(str[idx]); idx++) + for (auto sz = str.length(); idx < sz && isspace(str[idx]); idx++) { // remove all remaining spaces } @@ -230,7 +230,7 @@ inline std::pair matchColon(const std::string & str) inline std::tuple number(const std::string & str) { size_t idx = 0; - for (; idx < str.length() && isdigit(str[idx]); idx++) + for (auto sz = str.length(); idx < sz && isdigit(str[idx]); idx++) { // find digit string } diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index 834e618707d..a5461d43e11 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -2824,7 +2824,7 @@ class FunctionExtractMyDateTime : public IFunction static void vectorDatetime(const ColumnUInt64::Container & vec_from, PaddedPODArray & vec_to) { vec_to.resize(vec_from.size()); - for (size_t i = 0; i < vec_from.size(); ++i) + for (size_t i = 0, sz = vec_from.size(); i < sz; ++i) { vec_to[i] = F(vec_from[i]); } @@ -3100,7 +3100,7 @@ class FunctionExtractMyDateTimeFromString : public IFunction { vec_to.resize(offsets_from.size()); size_t current_offset = 0; - for (size_t i = 0; i < offsets_from.size(); i++) + for (size_t i = 0, sz = offsets_from.size(); i < sz; i++) { size_t next_offset = offsets_from[i]; size_t string_size = next_offset - current_offset - 1; @@ -3350,7 +3350,7 @@ struct TiDBLastDayTransformerImpl typename ColumnVector::Container & vec_to, typename ColumnVector::Container & vec_null_map) { - for (size_t i = 0; i < vec_from.size(); ++i) + for (size_t i = 0, sz = vec_from.size(); i < sz; ++i) { bool is_null = false; MyTimeBase val(vec_from[i]); @@ -3385,7 +3385,7 @@ struct TiDBDayOfWeekTransformerImpl typename ColumnVector::Container & vec_to, typename ColumnVector::Container & vec_null_map) { - for (size_t i = 0; i < vec_from.size(); ++i) + for (size_t i = 0, sz = vec_from.size(); i < sz; ++i) { bool is_null = false; MyTimeBase val(vec_from[i]); @@ -3422,7 +3422,7 @@ struct TiDBDayOfYearTransformerImpl typename ColumnVector::Container & vec_to, typename ColumnVector::Container & vec_null_map) { - for (size_t i = 0; i < vec_from.size(); ++i) + for (size_t i = 0, sz = vec_from.size(); i < sz; ++i) { bool is_null = false; MyTimeBase val(vec_from[i]); @@ -3457,7 +3457,7 @@ struct TiDBWeekOfYearTransformerImpl typename ColumnVector::Container & vec_null_map) { bool is_null = false; - for (size_t i = 0; i < vec_from.size(); ++i) + for (size_t i = 0, sz = vec_from.size(); i < sz; ++i) { MyTimeBase val(vec_from[i]); vec_to[i] = execute(context, val, is_null); @@ -3495,7 +3495,7 @@ struct TiDBToSecondsTransformerImpl typename ColumnVector::Container & vec_null_map) { bool is_null = false; - for (size_t i = 0; i < vec_from.size(); ++i) + for (size_t i = 0, sz = vec_from.size(); i < sz; ++i) { MyTimeBase val(vec_from[i]); vec_to[i] = execute(context, val, is_null); @@ -3531,7 +3531,7 @@ struct TiDBToDaysTransformerImpl typename ColumnVector::Container & vec_null_map) { bool is_null = false; - for (size_t i = 0; i < vec_from.size(); ++i) + for (size_t i = 0, sz = vec_from.size(); i < sz; ++i) { MyTimeBase val(vec_from[i]); vec_to[i] = execute(context, val, is_null); From 7cbd83378bf70362dddc5cbe8d4f4eebf306436e Mon Sep 17 00:00:00 2001 From: birdstorm Date: Thu, 1 Dec 2022 13:08:37 +0900 Subject: [PATCH 8/8] resolve comments --- dbms/src/Common/StringUtils/StringUtils.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 72aec33b350..cd5e56a82dc 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -158,7 +158,7 @@ inline bool isPunctuation(char c) // match at lease x digits inline std::tuple digit(const std::string & str, int x) { - for (size_t i = 0, times = x; i < times && i < str.length() && isdigit(str[i]); i++, x--) + for (size_t i = 0, times = x; i < times && i < str.length() && isdigit(str[i]); ++i, --x) { // find x digit } @@ -167,7 +167,7 @@ inline std::tuple digit(const std::string & str, return {"", str, false}; } size_t idx = 0; - for (auto sz = str.length(); idx < sz && isdigit(str[idx]); idx++) + for (auto sz = str.length(); idx < sz && isdigit(str[idx]); ++idx) { // remove all remaining spaces } @@ -177,7 +177,7 @@ inline std::tuple digit(const std::string & str, // remove at lease x spaces inline std::pair space(const std::string & str, int x) { - for (size_t i = 0, times = x, sz = str.length(); i < times && i < sz && isspace(str[i]); i++, x--) + for (size_t i = 0, times = x, sz = str.length(); i < times && i < sz && isspace(str[i]); ++i, --x) { // find x spaces } @@ -186,7 +186,7 @@ inline std::pair space(const std::string & str, int x) return {str, false}; } size_t idx = 0; - for (auto sz = str.length(); idx < sz && isspace(str[idx]); idx++) + for (auto sz = str.length(); idx < sz && isspace(str[idx]); ++idx) { // remove all remaining spaces } @@ -230,7 +230,7 @@ inline std::pair matchColon(const std::string & str) inline std::tuple number(const std::string & str) { size_t idx = 0; - for (auto sz = str.length(); idx < sz && isdigit(str[idx]); idx++) + for (auto sz = str.length(); idx < sz && isdigit(str[idx]); ++idx) { // find digit string }