From fececd9375bd3b5de2318788bb08fa84e7c5deed Mon Sep 17 00:00:00 2001 From: birdstorm Date: Fri, 11 Nov 2022 11:37:52 +0800 Subject: [PATCH] Support extract duration (#6205) close pingcap/tiflash#5636 --- dbms/src/Common/MyDuration.cpp | 6 +- dbms/src/Common/MyDuration.h | 15 +- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- dbms/src/Flash/tests/gtest_log_search.cpp | 2 +- dbms/src/Functions/FunctionsDateTime.h | 11 - dbms/src/Functions/FunctionsDuration.cpp | 1 + dbms/src/Functions/FunctionsDuration.h | 197 +++++++++++++++++- .../tests/gtest_datetime_extract.cpp | 4 +- .../tests/gtest_duration_extract.cpp | 76 +++++++ dbms/src/TestUtils/FunctionTestUtils.h | 54 +++++ .../fullstack-test/expr/extract_duration.test | 30 +++ 11 files changed, 376 insertions(+), 22 deletions(-) create mode 100644 dbms/src/Functions/tests/gtest_duration_extract.cpp create mode 100644 tests/fullstack-test/expr/extract_duration.test diff --git a/dbms/src/Common/MyDuration.cpp b/dbms/src/Common/MyDuration.cpp index d0ecf6e2f0a..97aa995a9ce 100644 --- a/dbms/src/Common/MyDuration.cpp +++ b/dbms/src/Common/MyDuration.cpp @@ -43,17 +43,17 @@ Int32 MyDuration::hours() const Int32 MyDuration::minutes() const { - return std::abs(nanos) / NANOS_PER_MINUTE % 60; + return (std::abs(nanos) / NANOS_PER_MINUTE) % 60; } Int32 MyDuration::seconds() const { - return std::abs(nanos) / NANOS_PER_SECOND % 60; + return (std::abs(nanos) / NANOS_PER_SECOND) % 60; } Int32 MyDuration::microSecond() const { - return std::abs(nanos) / NANOS_PER_MICRO % 1000000; + return (std::abs(nanos) / NANOS_PER_MICRO) % 1000000; } String MyDuration::toString() const diff --git a/dbms/src/Common/MyDuration.h b/dbms/src/Common/MyDuration.h index 730d31b232b..8c454d23d9f 100644 --- a/dbms/src/Common/MyDuration.h +++ b/dbms/src/Common/MyDuration.h @@ -48,11 +48,22 @@ class MyDuration static constexpr Int64 MAX_NANOS = MAX_HOUR_PART * NANOS_PER_HOUR + MAX_MINUTE_PART * NANOS_PER_MINUTE + MAX_SECOND_PART * NANOS_PER_SECOND + MAX_MICRO_PART * NANOS_PER_MICRO; static_assert(MAX_NANOS > 0); + static const int8_t DefaultFsp = 6; + Int64 nanos; UInt8 fsp; public: MyDuration() = default; + explicit MyDuration(Int64 nanos_) + : nanos(nanos_) + , fsp(DefaultFsp) + { + if (nanos_ > MAX_NANOS || nanos_ < -MAX_NANOS) + { + throw Exception(fmt::format("nanos must >= {} and <= {}", -MAX_NANOS, MAX_NANOS), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } + } MyDuration(Int64 nanos_, UInt8 fsp_) : nanos(nanos_) , fsp(fsp_) @@ -61,13 +72,13 @@ class MyDuration { throw Exception(fmt::format("nanos must >= {} and <= {}", -MAX_NANOS, MAX_NANOS), ErrorCodes::ARGUMENT_OUT_OF_BOUND); } - if (fsp > 6) + if (fsp > 6 || fsp < 0) throw Exception("fsp must >= 0 and <= 6", ErrorCodes::ARGUMENT_OUT_OF_BOUND); } MyDuration(Int32 neg, Int32 hour, Int32 minute, Int32 second, Int32 microsecond, UInt8 fsp) : MyDuration(neg * (hour * NANOS_PER_HOUR + minute * NANOS_PER_MINUTE + second * NANOS_PER_SECOND + microsecond * NANOS_PER_MICRO), fsp) { - if (fsp > 6) + if (fsp > 6 || fsp < 0) throw Exception("fsp must >= 0 and <= 6", ErrorCodes::ARGUMENT_OUT_OF_BOUND); if (minute > MAX_MINUTE_PART || minute < 0) throw Exception("minute must >= 0 and <= 59", ErrorCodes::ARGUMENT_OUT_OF_BOUND); diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index e43cb7f9a12..c2383c6698c 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -584,7 +584,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::FromUnixTime1Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::FromUnixTime2Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::ExtractDatetime, "extractMyDateTime"}, - //{tipb::ScalarFuncSig::ExtractDuration, "cast"}, + {tipb::ScalarFuncSig::ExtractDuration, "extractMyDuration"}, //{tipb::ScalarFuncSig::AddDateStringString, "cast"}, {tipb::ScalarFuncSig::AddDateStringInt, "date_add"}, diff --git a/dbms/src/Flash/tests/gtest_log_search.cpp b/dbms/src/Flash/tests/gtest_log_search.cpp index 99cc1aa63aa..87334dfb30c 100644 --- a/dbms/src/Flash/tests/gtest_log_search.cpp +++ b/dbms/src/Flash/tests/gtest_log_search.cpp @@ -41,7 +41,7 @@ inline Int64 getTimezoneAndOffset(int tz_sign, int tz_hour, int tz_min) inline void getTimezoneString(char * tzs, int tz_sign, int tz_hour, int tz_min) { - sprintf(tzs, "%c%02d:%02d", tz_sign > 0 ? '+' : '-', tz_hour, tz_min); + snprintf(tzs, 10, "%c%02d:%02d", tz_sign > 0 ? '+' : '-', tz_hour, tz_min); } TEST_F(LogSearchTest, LogSearch) diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index fa33a54f811..ef6bcce81aa 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -2802,17 +2802,6 @@ class FunctionExtractMyDateTime : public IFunction dispatch(col_from, vec_to); else if (unit == "year_month") dispatch(col_from, vec_to); - /// TODO: support ExtractDuration - // else if (unit == "hour"); - // else if (unit == "minute"); - // else if (unit == "second"); - // else if (unit == "microsecond"); - // else if (unit == "second_microsecond"); - // else if (unit == "minute_microsecond"); - // else if (unit == "minute_second"); - // else if (unit == "hour_microsecond"); - // else if (unit == "hour_second"); - // else if (unit == "hour_minute"); else throw TiFlashException(fmt::format("Function {} does not support '{}' unit", getName(), unit), Errors::Coprocessor::BadRequest); diff --git a/dbms/src/Functions/FunctionsDuration.cpp b/dbms/src/Functions/FunctionsDuration.cpp index 9ccafd2794d..751b46d90b4 100644 --- a/dbms/src/Functions/FunctionsDuration.cpp +++ b/dbms/src/Functions/FunctionsDuration.cpp @@ -215,5 +215,6 @@ void registerFunctionsDuration(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } // namespace DB diff --git a/dbms/src/Functions/FunctionsDuration.h b/dbms/src/Functions/FunctionsDuration.h index 5bc54d425f4..1cb046b4d40 100644 --- a/dbms/src/Functions/FunctionsDuration.h +++ b/dbms/src/Functions/FunctionsDuration.h @@ -14,11 +14,11 @@ #pragma once +#include #include #include #include #include -#include #include #include #include @@ -88,4 +88,197 @@ class FunctionMyDurationToSec : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override; }; -} // namespace DB \ No newline at end of file +struct ExtractMyDurationImpl +{ + static Int64 signMultiplier(const MyDuration & duration) + { + return duration.isNeg() ? -1 : 1; + } + + static Int64 extractHour(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.hours(); + } + + static Int64 extractMinute(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.minutes(); + } + + static Int64 extractSecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.seconds(); + } + + static Int64 extractMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.microSecond(); + } + + static Int64 extractSecondMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.seconds() * 1000000LL + duration.microSecond()); + } + + static Int64 extractMinuteMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * ((duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); + } + + static Int64 extractMinuteSecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.minutes() * 100LL + duration.seconds()); + } + + static Int64 extractHourMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * ((duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); + } + + static Int64 extractHourSecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()); + } + + static Int64 extractHourMinute(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 100LL + duration.minutes()); + } + + static Int64 extractDayMicrosecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * ((duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()) * 1000000LL + duration.microSecond()); + } + + static Int64 extractDaySecond(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 10000LL + duration.minutes() * 100LL + duration.seconds()); + } + + static Int64 extractDayMinute(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * (duration.hours() * 100LL + duration.minutes()); + } + + static Int64 extractDayHour(Int64 nano) + { + MyDuration duration(nano); + return signMultiplier(duration) * duration.hours(); + } +}; + +class FunctionExtractMyDuration : public IFunction +{ +public: + static constexpr auto name = "extractMyDuration"; + + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!arguments[0]->isString()) + throw TiFlashException(fmt::format("First argument for function {} (unit) must be String", getName()), Errors::Coprocessor::BadRequest); + + if (!arguments[1]->isMyTime()) + throw TiFlashException( + fmt::format("Illegal type {} of second argument of function {}. Must be Duration.", arguments[1]->getName(), getName()), + Errors::Coprocessor::BadRequest); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const auto * unit_column = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); + if (!unit_column) + throw TiFlashException( + fmt::format("First argument for function {} must be constant String", getName()), + Errors::Coprocessor::BadRequest); + + String unit = Poco::toLower(unit_column->getValue()); + + auto col_from = block.getByPosition(arguments[1]).column; + + size_t rows = block.rows(); + auto col_to = ColumnInt64::create(rows); + auto & vec_to = col_to->getData(); + + if (unit == "hour") + dispatch(col_from, vec_to); + else if (unit == "minute") + dispatch(col_from, vec_to); + else if (unit == "second") + dispatch(col_from, vec_to); + else if (unit == "microsecond") + dispatch(col_from, vec_to); + else if (unit == "second_microsecond") + dispatch(col_from, vec_to); + else if (unit == "minute_microsecond") + dispatch(col_from, vec_to); + else if (unit == "minute_second") + dispatch(col_from, vec_to); + else if (unit == "hour_microsecond") + dispatch(col_from, vec_to); + else if (unit == "hour_second") + dispatch(col_from, vec_to); + else if (unit == "hour_minute") + dispatch(col_from, vec_to); + else if (unit == "day_microsecond") + dispatch(col_from, vec_to); + else if (unit == "day_second") + dispatch(col_from, vec_to); + else if (unit == "day_minute") + dispatch(col_from, vec_to); + else if (unit == "day_hour") + dispatch(col_from, vec_to); + else + throw TiFlashException(fmt::format("Function {} does not support '{}' unit", getName(), unit), Errors::Coprocessor::BadRequest); + + block.getByPosition(result).column = std::move(col_to); + } + +private: + using Func = Int64 (*)(Int64); + + template + static void dispatch(const ColumnPtr col_from, PaddedPODArray & vec_to) + { + if (const auto * from = checkAndGetColumn(col_from.get()); from) + { + const auto & data = from->getData(); + vectorDuration(data, vec_to); + } + } + + template + static void vectorDuration(const ColumnInt64::Container & vec_from, PaddedPODArray & vec_to) + { + vec_to.resize(vec_from.size()); + for (size_t i = 0; i < vec_from.size(); i++) + { + vec_to[i] = F(vec_from[i]); + } + } +}; + +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_datetime_extract.cpp b/dbms/src/Functions/tests/gtest_datetime_extract.cpp index 1f6e585b002..d0ff74ac3e9 100644 --- a/dbms/src/Functions/tests/gtest_datetime_extract.cpp +++ b/dbms/src/Functions/tests/gtest_datetime_extract.cpp @@ -89,7 +89,7 @@ try size_t res_col_idx = 2; func_builder_ptr->build({unit_ctn, datetime_ctn})->execute(block, arg_cols_idx, res_col_idx); const IColumn * ctn_res = block.getByPosition(res_col_idx).column.get(); - const ColumnInt64 * col_res = checkAndGetColumn(ctn_res); + const auto * col_res = checkAndGetColumn(ctn_res); Field res_field; col_res->get(0, res_field); @@ -149,7 +149,7 @@ try size_t res_col_idx = 2; func_builder_ptr->build({unit_ctn, datetime_ctn})->execute(block, arg_cols_idx, res_col_idx); const IColumn * ctn_res = block.getByPosition(res_col_idx).column.get(); - const ColumnInt64 * col_res = checkAndGetColumn(ctn_res); + const auto * col_res = checkAndGetColumn(ctn_res); Field res_field; col_res->get(0, res_field); diff --git a/dbms/src/Functions/tests/gtest_duration_extract.cpp b/dbms/src/Functions/tests/gtest_duration_extract.cpp new file mode 100644 index 00000000000..7ffebb30d63 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_duration_extract.cpp @@ -0,0 +1,76 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" + +#pragma GCC diagnostic pop + +namespace DB +{ +namespace tests +{ +// TODO: rewrite using executeFunction() +class TestDurationExtract : public DB::tests::FunctionTest +{ +}; + +TEST_F(TestDurationExtract, ExtractFromMyDuration) +try +{ + std::vector units{ + "hour", + "minute", + "second", + "microsecond", + "second_microsecond", + "minute_microsecond", + "minute_second", + "hour_microsecond", + "hour_second", + "hour_minute", + "day_microsecond", + "day_second", + "day_minute", + "day_hour", + }; + MyDuration duration_value(1, 838, 34, 56, 123456, 6); + std::vector results{838, 34, 56, 123456, 56123456, 3456123456, 3456, 8383456123456, 8383456, 83834, 8383456123456, 8383456, 83834, 838}; + + for (size_t i = 0; i < units.size(); ++i) + { + const auto & unit = units[i]; + const auto & result = results[i]; + // nullable/non-null duration + ASSERT_COLUMN_EQ(toNullableVec({result}), executeFunction("extractMyDuration", createConstColumn(1, {unit}), createDurationColumn({duration_value}, 6))); + ASSERT_COLUMN_EQ(toVec({result}), executeFunction("extractMyDuration", createConstColumn(1, {unit}), createDurationColumn({duration_value}, 6))); + // const duration + ASSERT_COLUMN_EQ(createConstColumn(1, result), executeFunction("extractMyDuration", createConstColumn(1, {unit}), createDurationColumnConst(1, {duration_value}, 6))); + // null + ASSERT_COLUMN_EQ(toNullableVec({std::nullopt}), executeFunction("extractMyDuration", createConstColumn(1, {unit}), createDurationColumn({std::nullopt}, 6))); + } +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index af38a5f8d37..a6d7049420c 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -377,6 +377,60 @@ ColumnWithTypeAndName createDateTimeColumnConst(size_t size, const std::optional return {std::move(col), data_type_ptr, "datetime"}; } +template +ColumnWithTypeAndName createDurationColumn(std::initializer_list> init, int fraction) +{ + DataTypePtr data_type_ptr = std::make_shared(fraction); + if constexpr (is_nullable) + { + data_type_ptr = makeNullable(data_type_ptr); + } + auto col = data_type_ptr->createColumn(); + for (const auto & dt : init) + { + if (dt.has_value()) + col->insert(Field(dt->nanoSecond())); + else + { + if constexpr (is_nullable) + { + col->insert(Null()); + } + else + { + throw Exception("Null value for not nullable DataTypeMyDuration"); + } + } + } + return {std::move(col), data_type_ptr, "duration"}; +} + +template +ColumnWithTypeAndName createDurationColumnConst(size_t size, const std::optional & duration, int fraction) +{ + DataTypePtr data_type_ptr = std::make_shared(fraction); + if constexpr (is_nullable) + { + data_type_ptr = makeNullable(data_type_ptr); + } + + ColumnPtr col; + if (duration.has_value()) + col = data_type_ptr->createColumnConst(size, Field(duration->nanoSecond())); + else + { + if constexpr (is_nullable) + { + col = data_type_ptr->createColumnConst(size, Field(Null())); + } + else + { + throw Exception("Null value for not nullable DataTypeMyDuration"); + } + } + return {std::move(col), data_type_ptr, "duration"}; +} + // parse a string into decimal field. template typename TypeTraits::FieldType parseDecimal( diff --git a/tests/fullstack-test/expr/extract_duration.test b/tests/fullstack-test/expr/extract_duration.test new file mode 100644 index 00000000000..bc7bc98524e --- /dev/null +++ b/tests/fullstack-test/expr/extract_duration.test @@ -0,0 +1,30 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t +mysql> create table test.t (a time(6)) +mysql> insert into test.t values('838:34:56.123456') +mysql> insert into test.t values('-838:34:56.123456') +mysql> alter table test.t set tiflash replica 1 +func> wait_table test t + +mysql> set session tidb_isolation_read_engines='tiflash'; select a from test.t where extract(hour from a) = 838 and extract(minute from a) = 34 and extract(second from a) = 56 and extract(microsecond from a) = 123456; +a +838:34:56.123456 + +mysql> set session tidb_isolation_read_engines='tiflash'; select a from test.t where extract(second_microsecond from a) = -56123456 and extract(minute_microsecond from a) = -3456123456 and extract(minute_second from a) = -3456 and extract(hour_microsecond from a) = -8383456123456 and extract(hour_second from a) = -8383456 and extract(hour_minute from a) = -83834 and extract(day_microsecond from a) = -8383456123456 and extract(day_second from a) = -8383456 and extract(day_minute from a) = -83834 and extract(day_hour from a) = -838; +a +-838:34:56.123456 + +mysql> drop table if exists test.t