Skip to content

Commit

Permalink
Implement Space function push down #5113 (#5239)
Browse files Browse the repository at this point in the history
close #5113
  • Loading branch information
lizhenhuan authored Sep 14, 2022
1 parent a6bc771 commit bf0d5fa
Show file tree
Hide file tree
Showing 4 changed files with 299 additions and 1 deletion.
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
//{tipb::ScalarFuncSig::Right, "cast"},
{tipb::ScalarFuncSig::RpadUTF8, "rpadUTF8"},
{tipb::ScalarFuncSig::Rpad, "rpad"},
//{tipb::ScalarFuncSig::Space, "cast"},
{tipb::ScalarFuncSig::Space, "space"},
{tipb::ScalarFuncSig::Strcmp, "strcmp"},
{tipb::ScalarFuncSig::Substring2ArgsUTF8, "substringUTF8"},
{tipb::ScalarFuncSig::Substring3ArgsUTF8, "substringUTF8"},
Expand Down
178 changes: 178 additions & 0 deletions dbms/src/Functions/FunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4392,6 +4392,183 @@ class FunctionRepeat : public IFunction
};


class FunctionSpace : public IFunction
{
public:
static constexpr auto name = "space";

// tidb mysql.MaxBlobWidth space max input : space(MAX_BLOB_WIDTH+1) will return NULL
static constexpr auto MAX_BLOB_WIDTH = 16777216;
static const auto APPROX_STRING_SIZE = 64;

FunctionSpace() = default;

static FunctionPtr create(const Context & /*context*/)
{
return std::make_shared<FunctionSpace>();
}

std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!arguments[0]->isInteger())
throw Exception(
fmt::format("Illegal type {} of first argument of function {}", arguments[0]->getName(), getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0]->onlyNull()
? makeNullable(std::make_shared<DataTypeNothing>())
: makeNullable(std::make_shared<DataTypeString>());
}

void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
if (executeSpace<UInt8>(block, arguments, result)
|| executeSpace<UInt16>(block, arguments, result)
|| executeSpace<UInt32>(block, arguments, result)
|| executeSpace<UInt64>(block, arguments, result)
|| executeSpace<Int8>(block, arguments, result)
|| executeSpace<Int16>(block, arguments, result)
|| executeSpace<Int32>(block, arguments, result)
|| executeSpace<Int64>(block, arguments, result))
{
return;
}
else
{
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}

private:
template <typename IntType>
bool executeSpace(
Block & block,
const ColumnNumbers & arguments,
const size_t result) const
{
auto & c0_col = block.getByPosition(arguments[0]);

auto c0_col_column = c0_col.column;

size_t val_num = block.rows();
auto result_null_map = ColumnUInt8::create(val_num);
auto col_res = ColumnString::create();
auto & col_res_data = col_res->getChars();
auto & col_res_offsets = col_res->getOffsets();

col_res_offsets.resize(c0_col_column->size());


if (c0_col_column->isColumnConst())
{
const ColumnConst * col_const_space_num = checkAndGetColumnConst<ColumnVector<IntType>>(c0_col_column.get());
if (col_const_space_num == nullptr)
{
return false;
}
auto space_num_values = col_const_space_num->getValue<IntType>();
Int64 space_num = accurate::lessOp(INT64_MAX, space_num_values) ? INT64_MAX : space_num_values;
executeConst(space_num, val_num, result_null_map->getData(), col_res_data, col_res_offsets);
}
else
{
const auto * col_vector_space_num = checkAndGetColumn<ColumnVector<IntType>>(c0_col_column.get());
if (col_vector_space_num == nullptr)
{
return false;
}
executeVector(col_vector_space_num, val_num, result_null_map->getData(), col_res_data, col_res_offsets);
}

block.getByPosition(result).column = ColumnNullable::create(std::move(col_res), std::move(result_null_map));
return true;
}

static void executeConst(
Int64 space_num,
size_t val_num,
ColumnUInt8::Container & result_null_map_data,
ColumnString::Chars_t & res_data,
ColumnString::Offsets & res_offsets)
{
ColumnString::Offset res_offset = 0;
auto is_big = false;

if (space_num < 0)
{
space_num = 0;
}

if (space_num > MAX_BLOB_WIDTH)
{
res_data.reserve(val_num);
is_big = true;
space_num = 0;
}
else
{
res_data.reserve(val_num * (space_num + 1));
}

std::string res_string(space_num, ' ');
for (size_t row = 0; row < val_num; ++row)
{
result_null_map_data[row] = false;

if (is_big)
{
result_null_map_data[row] = true;
}
res_data.resize(res_data.size() + space_num + 1);

memcpy(&res_data[res_offset], &res_string[0], space_num);

res_data[res_offset + space_num] = '\0';
res_offset += space_num + 1;
res_offsets[row] = res_offset;
}
}

template <typename IntType>
static void executeVector(
const IntType * col_vector_space_num,
size_t val_num,
ColumnUInt8::Container & result_null_map_data,
ColumnString::Chars_t & res_data,
ColumnString::Offsets & res_offsets)
{
ColumnString::Offset res_offset = 0;
res_data.reserve(val_num * APPROX_STRING_SIZE);
const auto & col_vector_space_num_value = col_vector_space_num->getData();

for (size_t row = 0; row < val_num; ++row)
{
result_null_map_data[row] = false;

Int64 space_num = accurate::lessOp(INT64_MAX, col_vector_space_num_value[row]) ? INT64_MAX : col_vector_space_num_value[row];
if (space_num < 0)
{
space_num = 0;
}
if (space_num > MAX_BLOB_WIDTH)
{
result_null_map_data[row] = true;
space_num = 0;
}
res_data.resize(res_data.size() + space_num + 1);

std::string res_string(space_num, ' ');
memcpy(&res_data[res_offset], &res_string[0], space_num);

res_data[res_offset + space_num] = '\0';
res_offset += space_num + 1;
res_offsets[row] = res_offset;
}
}
};

class FunctionPosition : public IFunction
{
public:
Expand Down Expand Up @@ -5702,6 +5879,7 @@ void registerFunctionsString(FunctionFactory & factory)
factory.registerFunction<FunctionHexStr>();
factory.registerFunction<FunctionHexInt>();
factory.registerFunction<FunctionRepeat>();
factory.registerFunction<FunctionSpace>();
factory.registerFunction<FunctionBin>();
factory.registerFunction<FunctionElt>();
}
Expand Down
83 changes: 83 additions & 0 deletions dbms/src/Functions/tests/gtest_strings_space.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsString.h>
#include <Interpreters/Context.h>
#include <TestUtils/FunctionTestUtils.h>
#include <TestUtils/TiFlashTestBasic.h>

#include <string>
#include <vector>


namespace DB::tests
{
class StringSpace : public DB::tests::FunctionTest
{
public:
static constexpr auto func_name = "space";

protected:
static ColumnWithTypeAndName toVec(const std::vector<String> & v)
{
return createColumn<String>(v);
}
static ColumnWithTypeAndName toNullableVec(const std::vector<std::optional<String>> & v)
{
return createColumn<Nullable<String>>(v);
}

static ColumnWithTypeAndName toVecInt(const std::vector<std::optional<Int64>> & v)
{
return createColumn<Nullable<Int64>>(v);
}

static ColumnWithTypeAndName toConst(const Int64 & v)
{
return createConstColumn<Int64>(8, v);
}
};

// test space
TEST_F(StringSpace, spaceTest)
try
{
ASSERT_COLUMN_EQ(
toNullableVec({" ", "", " ", "", " "}),
executeFunction(
func_name,
toVecInt({2, 0, 10, -1, 6})));
}
CATCH

// test space NULL
TEST_F(StringSpace, nullTest)
try
{
ASSERT_COLUMN_EQ(
toNullableVec({{}, " "}),
executeFunction(
func_name,
toVecInt({{}, 5})));

ASSERT_COLUMN_EQ(
toNullableVec({{}}),
executeFunction(
func_name,
toVecInt({16777217})));
}
CATCH

} // namespace DB::tests
37 changes: 37 additions & 0 deletions tests/fullstack-test/expr/space.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2022 PingCAP, Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mysql> drop table if exists test.t;
mysql> create table if not exists test.t(a int(64));

mysql> insert into test.t values(-1);
mysql> insert into test.t values(0);
mysql> insert into test.t values(NULL);
mysql> insert into test.t values(10);
mysql> insert into test.t values(16777216);
mysql> insert into test.t values(16777217);
mysql> alter table test.t set tiflash replica 1;
func> wait_table test t

mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select length(space(a)) from test.t;
+------------------+
| length(space(a)) |
+------------------+
| 0 |
| 0 |
| NULL |
| 10 |
| 16777216 |
| NULL |
+------------------+

0 comments on commit bf0d5fa

Please sign in to comment.