From bf0d5fad4138f9206da4608d79acb6483e5eb8c6 Mon Sep 17 00:00:00 2001 From: lizhenhuan <1916038084@qq.com> Date: Wed, 14 Sep 2022 09:30:58 +0800 Subject: [PATCH] Implement Space function push down #5113 (#5239) close pingcap/tiflash#5113 --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- dbms/src/Functions/FunctionsString.cpp | 178 ++++++++++++++++++ .../Functions/tests/gtest_strings_space.cpp | 83 ++++++++ tests/fullstack-test/expr/space.test | 37 ++++ 4 files changed, 299 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Functions/tests/gtest_strings_space.cpp create mode 100644 tests/fullstack-test/expr/space.test diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 5ae35452cc0..1c4a8e521e9 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -660,7 +660,7 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::Right, "cast"}, {tipb::ScalarFuncSig::RpadUTF8, "rpadUTF8"}, {tipb::ScalarFuncSig::Rpad, "rpad"}, - //{tipb::ScalarFuncSig::Space, "cast"}, + {tipb::ScalarFuncSig::Space, "space"}, {tipb::ScalarFuncSig::Strcmp, "strcmp"}, {tipb::ScalarFuncSig::Substring2ArgsUTF8, "substringUTF8"}, {tipb::ScalarFuncSig::Substring3ArgsUTF8, "substringUTF8"}, diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 8d70b368149..81f7eeb9248 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4392,6 +4392,183 @@ class FunctionRepeat : public IFunction }; +class FunctionSpace : public IFunction +{ +public: + static constexpr auto name = "space"; + + // tidb mysql.MaxBlobWidth space max input : space(MAX_BLOB_WIDTH+1) will return NULL + static constexpr auto MAX_BLOB_WIDTH = 16777216; + static const auto APPROX_STRING_SIZE = 64; + + FunctionSpace() = default; + + static FunctionPtr create(const Context & /*context*/) + { + return std::make_shared(); + } + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!arguments[0]->isInteger()) + throw Exception( + fmt::format("Illegal type {} of first argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return arguments[0]->onlyNull() + ? makeNullable(std::make_shared()) + : makeNullable(std::make_shared()); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + if (executeSpace(block, arguments, result) + || executeSpace(block, arguments, result) + || executeSpace(block, arguments, result) + || executeSpace(block, arguments, result) + || executeSpace(block, arguments, result) + || executeSpace(block, arguments, result) + || executeSpace(block, arguments, result) + || executeSpace(block, arguments, result)) + { + return; + } + else + { + throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + +private: + template + bool executeSpace( + Block & block, + const ColumnNumbers & arguments, + const size_t result) const + { + auto & c0_col = block.getByPosition(arguments[0]); + + auto c0_col_column = c0_col.column; + + size_t val_num = block.rows(); + auto result_null_map = ColumnUInt8::create(val_num); + auto col_res = ColumnString::create(); + auto & col_res_data = col_res->getChars(); + auto & col_res_offsets = col_res->getOffsets(); + + col_res_offsets.resize(c0_col_column->size()); + + + if (c0_col_column->isColumnConst()) + { + const ColumnConst * col_const_space_num = checkAndGetColumnConst>(c0_col_column.get()); + if (col_const_space_num == nullptr) + { + return false; + } + auto space_num_values = col_const_space_num->getValue(); + Int64 space_num = accurate::lessOp(INT64_MAX, space_num_values) ? INT64_MAX : space_num_values; + executeConst(space_num, val_num, result_null_map->getData(), col_res_data, col_res_offsets); + } + else + { + const auto * col_vector_space_num = checkAndGetColumn>(c0_col_column.get()); + if (col_vector_space_num == nullptr) + { + return false; + } + executeVector(col_vector_space_num, val_num, result_null_map->getData(), col_res_data, col_res_offsets); + } + + block.getByPosition(result).column = ColumnNullable::create(std::move(col_res), std::move(result_null_map)); + return true; + } + + static void executeConst( + Int64 space_num, + size_t val_num, + ColumnUInt8::Container & result_null_map_data, + ColumnString::Chars_t & res_data, + ColumnString::Offsets & res_offsets) + { + ColumnString::Offset res_offset = 0; + auto is_big = false; + + if (space_num < 0) + { + space_num = 0; + } + + if (space_num > MAX_BLOB_WIDTH) + { + res_data.reserve(val_num); + is_big = true; + space_num = 0; + } + else + { + res_data.reserve(val_num * (space_num + 1)); + } + + std::string res_string(space_num, ' '); + for (size_t row = 0; row < val_num; ++row) + { + result_null_map_data[row] = false; + + if (is_big) + { + result_null_map_data[row] = true; + } + res_data.resize(res_data.size() + space_num + 1); + + memcpy(&res_data[res_offset], &res_string[0], space_num); + + res_data[res_offset + space_num] = '\0'; + res_offset += space_num + 1; + res_offsets[row] = res_offset; + } + } + + template + static void executeVector( + const IntType * col_vector_space_num, + size_t val_num, + ColumnUInt8::Container & result_null_map_data, + ColumnString::Chars_t & res_data, + ColumnString::Offsets & res_offsets) + { + ColumnString::Offset res_offset = 0; + res_data.reserve(val_num * APPROX_STRING_SIZE); + const auto & col_vector_space_num_value = col_vector_space_num->getData(); + + for (size_t row = 0; row < val_num; ++row) + { + result_null_map_data[row] = false; + + Int64 space_num = accurate::lessOp(INT64_MAX, col_vector_space_num_value[row]) ? INT64_MAX : col_vector_space_num_value[row]; + if (space_num < 0) + { + space_num = 0; + } + if (space_num > MAX_BLOB_WIDTH) + { + result_null_map_data[row] = true; + space_num = 0; + } + res_data.resize(res_data.size() + space_num + 1); + + std::string res_string(space_num, ' '); + memcpy(&res_data[res_offset], &res_string[0], space_num); + + res_data[res_offset + space_num] = '\0'; + res_offset += space_num + 1; + res_offsets[row] = res_offset; + } + } +}; + class FunctionPosition : public IFunction { public: @@ -5702,6 +5879,7 @@ void registerFunctionsString(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); } diff --git a/dbms/src/Functions/tests/gtest_strings_space.cpp b/dbms/src/Functions/tests/gtest_strings_space.cpp new file mode 100644 index 00000000000..f89fc730567 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_strings_space.cpp @@ -0,0 +1,83 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB::tests +{ +class StringSpace : public DB::tests::FunctionTest +{ +public: + static constexpr auto func_name = "space"; + +protected: + static ColumnWithTypeAndName toVec(const std::vector & v) + { + return createColumn(v); + } + static ColumnWithTypeAndName toNullableVec(const std::vector> & v) + { + return createColumn>(v); + } + + static ColumnWithTypeAndName toVecInt(const std::vector> & v) + { + return createColumn>(v); + } + + static ColumnWithTypeAndName toConst(const Int64 & v) + { + return createConstColumn(8, v); + } +}; + +// test space +TEST_F(StringSpace, spaceTest) +try +{ + ASSERT_COLUMN_EQ( + toNullableVec({" ", "", " ", "", " "}), + executeFunction( + func_name, + toVecInt({2, 0, 10, -1, 6}))); +} +CATCH + +// test space NULL +TEST_F(StringSpace, nullTest) +try +{ + ASSERT_COLUMN_EQ( + toNullableVec({{}, " "}), + executeFunction( + func_name, + toVecInt({{}, 5}))); + + ASSERT_COLUMN_EQ( + toNullableVec({{}}), + executeFunction( + func_name, + toVecInt({16777217}))); +} +CATCH + +} // namespace DB::tests diff --git a/tests/fullstack-test/expr/space.test b/tests/fullstack-test/expr/space.test new file mode 100644 index 00000000000..dd5b2bb00c1 --- /dev/null +++ b/tests/fullstack-test/expr/space.test @@ -0,0 +1,37 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table if not exists test.t(a int(64)); + +mysql> insert into test.t values(-1); +mysql> insert into test.t values(0); +mysql> insert into test.t values(NULL); +mysql> insert into test.t values(10); +mysql> insert into test.t values(16777216); +mysql> insert into test.t values(16777217); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select length(space(a)) from test.t; ++------------------+ +| length(space(a)) | ++------------------+ +| 0 | +| 0 | +| NULL | +| 10 | +| 16777216 | +| NULL | ++------------------+ \ No newline at end of file