From 0bd1aba0ca50e4fd3b7f346981afa5673b60db03 Mon Sep 17 00:00:00 2001 From: Jiwen liu <61498169+liujiwen-up@users.noreply.github.com> Date: Mon, 9 Dec 2024 10:38:07 +0800 Subject: [PATCH] [cherry-pick](branch3.0) impl scalar functions trim_in, ltrim_in and rtrim_in (#42642) pick from master: https://github.com/apache/doris/pull/41681 --- be/src/vec/functions/function_string.cpp | 160 ++++- .../doris/catalog/BuiltinScalarFunctions.java | 6 + .../executable/StringArithmetic.java | 69 +++ .../expressions/functions/scalar/LtrimIn.java | 84 +++ .../expressions/functions/scalar/RtrimIn.java | 84 +++ .../expressions/functions/scalar/TrimIn.java | 84 +++ .../visitor/ScalarFunctionVisitor.java | 15 + .../string_functions/test_trim_in.out | 547 ++++++++++++++++++ .../string_functions/test_trim_in.groovy | 204 +++++++ 9 files changed, 1250 insertions(+), 3 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LtrimIn.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RtrimIn.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TrimIn.java create mode 100644 regression-test/data/query_p0/sql_functions/string_functions/test_trim_in.out create mode 100644 regression-test/suites/query_p0/sql_functions/string_functions/test_trim_in.groovy diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index f98589b1965ccc..1876ed499f4d9e 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -508,6 +509,15 @@ struct NameLTrim { struct NameRTrim { static constexpr auto name = "rtrim"; }; +struct NameTrimIn { + static constexpr auto name = "trim_in"; +}; +struct NameLTrimIn { + static constexpr auto name = "ltrim_in"; +}; +struct NameRTrimIn { + static constexpr auto name = "rtrim_in"; +}; template struct TrimUtil { static Status vector(const ColumnString::Chars& str_data, @@ -535,6 +545,135 @@ struct TrimUtil { return Status::OK(); } }; +template +struct TrimInUtil { + static Status vector(const ColumnString::Chars& str_data, + const ColumnString::Offsets& str_offsets, const StringRef& remove_str, + ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { + const size_t offset_size = str_offsets.size(); + res_offsets.resize(offset_size); + res_data.reserve(str_data.size()); + bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) && + simd::VStringFunctions::is_ascii(StringRef( + reinterpret_cast(str_data.data()), str_data.size())); + + if (all_ascii) { + return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets); + } else { + return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets); + } + } + +private: + static Status impl_vectors_ascii(const ColumnString::Chars& str_data, + const ColumnString::Offsets& str_offsets, + const StringRef& remove_str, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets) { + const size_t offset_size = str_offsets.size(); + std::bitset<128> char_lookup; + const char* remove_begin = remove_str.data; + const char* remove_end = remove_str.data + remove_str.size; + + while (remove_begin < remove_end) { + char_lookup.set(static_cast(*remove_begin)); + remove_begin += 1; + } + + for (size_t i = 0; i < offset_size; ++i) { + const char* str_begin = + reinterpret_cast(str_data.data() + str_offsets[i - 1]); + const char* str_end = reinterpret_cast(str_data.data() + str_offsets[i]); + const char* left_trim_pos = str_begin; + const char* right_trim_pos = str_end; + + if constexpr (is_ltrim) { + while (left_trim_pos < str_end) { + if (!char_lookup.test(static_cast(*left_trim_pos))) { + break; + } + ++left_trim_pos; + } + } + + if constexpr (is_rtrim) { + while (right_trim_pos > left_trim_pos) { + --right_trim_pos; + if (!char_lookup.test(static_cast(*right_trim_pos))) { + ++right_trim_pos; + break; + } + } + } + + res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); + res_offsets[i] = res_data.size(); + } + + return Status::OK(); + } + + static Status impl_vectors_utf8(const ColumnString::Chars& str_data, + const ColumnString::Offsets& str_offsets, + const StringRef& remove_str, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets) { + const size_t offset_size = str_offsets.size(); + res_offsets.resize(offset_size); + res_data.reserve(str_data.size()); + + std::unordered_set char_lookup; + const char* remove_begin = remove_str.data; + const char* remove_end = remove_str.data + remove_str.size; + + while (remove_begin < remove_end) { + size_t byte_len, char_len; + std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length( + remove_begin, remove_end, 1); + char_lookup.insert(std::string_view(remove_begin, byte_len)); + remove_begin += byte_len; + } + + for (size_t i = 0; i < offset_size; ++i) { + const char* str_begin = + reinterpret_cast(str_data.data() + str_offsets[i - 1]); + const char* str_end = reinterpret_cast(str_data.data() + str_offsets[i]); + const char* left_trim_pos = str_begin; + const char* right_trim_pos = str_end; + + if constexpr (is_ltrim) { + while (left_trim_pos < str_end) { + size_t byte_len, char_len; + std::tie(byte_len, char_len) = + simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos, + str_end, 1); + if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) == + char_lookup.end()) { + break; + } + left_trim_pos += byte_len; + } + } + + if constexpr (is_rtrim) { + while (right_trim_pos > left_trim_pos) { + const char* prev_char_pos = right_trim_pos; + do { + --prev_char_pos; + } while ((*prev_char_pos & 0xC0) == 0x80); + size_t byte_len = right_trim_pos - prev_char_pos; + if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) == + char_lookup.end()) { + break; + } + right_trim_pos = prev_char_pos; + } + } + + res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); + res_offsets[i] = res_data.size(); + } + return Status::OK(); + } +}; // This is an implementation of a parameter for the Trim function. template struct Trim1Impl { @@ -583,14 +722,23 @@ struct Trim2Impl { const auto* remove_str_raw = col_right->get_chars().data(); const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; const StringRef remove_str(remove_str_raw, remove_str_size); + if (remove_str.size == 1) { RETURN_IF_ERROR((TrimUtil::vector( col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), col_res->get_offsets()))); } else { - RETURN_IF_ERROR((TrimUtil::vector( - col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), - col_res->get_offsets()))); + if constexpr (std::is_same::value || + std::is_same::value || + std::is_same::value) { + RETURN_IF_ERROR((TrimInUtil::vector( + col->get_chars(), col->get_offsets(), remove_str, + col_res->get_chars(), col_res->get_offsets()))); + } else { + RETURN_IF_ERROR((TrimUtil::vector( + col->get_chars(), col->get_offsets(), remove_str, + col_res->get_chars(), col_res->get_offsets()))); + } } block.replace_by_position(result, std::move(col_res)); } else { @@ -1023,6 +1171,12 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function>>(); factory.register_function>>(); factory.register_function>>(); + factory.register_function>>(); + factory.register_function>>(); + factory.register_function>>(); + factory.register_function>>(); + factory.register_function>>(); + factory.register_function>>(); factory.register_function(); factory.register_function>(); factory.register_function>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index b0eaa75e3ca29f..44220dda10683e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -277,6 +277,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Lower; import org.apache.doris.nereids.trees.expressions.functions.scalar.Lpad; import org.apache.doris.nereids.trees.expressions.functions.scalar.Ltrim; +import org.apache.doris.nereids.trees.expressions.functions.scalar.LtrimIn; import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeDate; import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey; import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue; @@ -356,6 +357,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.RoundBankers; import org.apache.doris.nereids.trees.expressions.functions.scalar.Rpad; import org.apache.doris.nereids.trees.expressions.functions.scalar.Rtrim; +import org.apache.doris.nereids.trees.expressions.functions.scalar.RtrimIn; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecToTime; import org.apache.doris.nereids.trees.expressions.functions.scalar.Second; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondCeil; @@ -435,6 +437,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Tokenize; import org.apache.doris.nereids.trees.expressions.functions.scalar.Translate; import org.apache.doris.nereids.trees.expressions.functions.scalar.Trim; +import org.apache.doris.nereids.trees.expressions.functions.scalar.TrimIn; import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate; import org.apache.doris.nereids.trees.expressions.functions.scalar.Unhex; import org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp; @@ -755,6 +758,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(Lower.class, "lcase", "lower"), scalar(Lpad.class, "lpad"), scalar(Ltrim.class, "ltrim"), + scalar(LtrimIn.class, "ltrim_in"), scalar(MakeDate.class, "makedate"), scalar(MapContainsKey.class, "map_contains_key"), scalar(MapContainsValue.class, "map_contains_value"), @@ -830,6 +834,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(RoundBankers.class, "round_bankers"), scalar(Rpad.class, "rpad"), scalar(Rtrim.class, "rtrim"), + scalar(RtrimIn.class, "rtrim_in"), scalar(Second.class, "second"), scalar(SecondCeil.class, "second_ceil"), scalar(SecondFloor.class, "second_floor"), @@ -914,6 +919,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(ToQuantileState.class, "to_quantile_state"), scalar(Translate.class, "translate"), scalar(Trim.class, "trim"), + scalar(TrimIn.class, "trim_in"), scalar(Truncate.class, "truncate"), scalar(Unhex.class, "unhex"), scalar(UnixTimestamp.class, "unix_timestamp"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java index 802aac3c2233fe..b4591de6af01b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java @@ -151,6 +151,27 @@ private static String trimImpl(String first, String second, boolean left, boolea return result; } + private static String trimInImpl(String first, String second, boolean left, boolean right) { + StringBuilder result = new StringBuilder(first); + + if (left) { + int start = 0; + while (start < result.length() && second.indexOf(result.charAt(start)) != -1) { + start++; + } + result.delete(0, start); + } + if (right) { + int end = result.length(); + while (end > 0 && second.indexOf(result.charAt(end - 1)) != -1) { + end--; + } + result.delete(end, result.length()); + } + + return result.toString(); + } + /** * Executable arithmetic functions Trim */ @@ -199,6 +220,54 @@ public static Expression rtrimVarcharVarchar(StringLikeLiteral first, StringLike return castStringLikeLiteral(first, trimImpl(first.getValue(), second.getValue(), false, true)); } + /** + * Executable arithmetic functions Trim_In + */ + @ExecFunction(name = "trim_in") + public static Expression trimInVarchar(StringLikeLiteral first) { + return castStringLikeLiteral(first, trimInImpl(first.getValue(), " ", true, true)); + } + + /** + * Executable arithmetic functions Trim_In + */ + @ExecFunction(name = "trim_in") + public static Expression trimInVarcharVarchar(StringLikeLiteral first, StringLikeLiteral second) { + return castStringLikeLiteral(first, trimInImpl(first.getValue(), second.getValue(), true, true)); + } + + /** + * Executable arithmetic functions ltrim_in + */ + @ExecFunction(name = "ltrim_in") + public static Expression ltrimInVarchar(StringLikeLiteral first) { + return castStringLikeLiteral(first, trimInImpl(first.getValue(), " ", true, false)); + } + + /** + * Executable arithmetic functions ltrim_in + */ + @ExecFunction(name = "ltrim_in") + public static Expression ltrimInVarcharVarchar(StringLikeLiteral first, StringLikeLiteral second) { + return castStringLikeLiteral(first, trimInImpl(first.getValue(), second.getValue(), true, false)); + } + + /** + * Executable arithmetic functions rtrim_in + */ + @ExecFunction(name = "rtrim_in") + public static Expression rtrimInVarchar(StringLikeLiteral first) { + return castStringLikeLiteral(first, trimInImpl(first.getValue(), " ", false, true)); + } + + /** + * Executable arithmetic functions rtrim_in + */ + @ExecFunction(name = "rtrim_in") + public static Expression rtrimInVarcharVarchar(StringLikeLiteral first, StringLikeLiteral second) { + return castStringLikeLiteral(first, trimInImpl(first.getValue(), second.getValue(), false, true)); + } + /** * Executable arithmetic functions Replace */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LtrimIn.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LtrimIn.java new file mode 100644 index 00000000000000..b54723638280e8 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LtrimIn.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'ltrimIn'. This class is generated by GenerateFunction. + */ +public class LtrimIn extends ScalarFunction + implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullable { + + private static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE) + ); + + private LtrimIn(List args) { + super("ltrim_in", args); + } + + /** + * constructor with 1 argument. + */ + public LtrimIn(Expression arg) { + super("ltrim_in", arg); + } + + /** + * constructor with 2 argument. + */ + public LtrimIn(Expression arg0, Expression arg1) { + super("ltrim_in", arg0, arg1); + } + + /** + * withChildren. + */ + @Override + public LtrimIn withChildren(List children) { + Preconditions.checkArgument(children.size() == 1 || children.size() == 2); + return new LtrimIn(children); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitLtrimIn(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RtrimIn.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RtrimIn.java new file mode 100644 index 00000000000000..035201ef93cb52 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RtrimIn.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'rtrim_in'. This class is generated by GenerateFunction. + */ +public class RtrimIn extends ScalarFunction + implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullable { + + private static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE) + ); + + private RtrimIn(List args) { + super("rtrim_in", args); + } + + /** + * constructor with 1 argument. + */ + public RtrimIn(Expression arg) { + super("rtrim_in", arg); + } + + /** + * constructor with 2 argument. + */ + public RtrimIn(Expression arg0, Expression arg1) { + super("rtrim_in", arg0, arg1); + } + + /** + * withChildren. + */ + @Override + public RtrimIn withChildren(List children) { + Preconditions.checkArgument(children.size() == 1 || children.size() == 2); + return new RtrimIn(children); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitRtrimIn(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TrimIn.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TrimIn.java new file mode 100644 index 00000000000000..978f71e09784b5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TrimIn.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'trim_in'. This class is generated by GenerateFunction. + */ +public class TrimIn extends ScalarFunction + implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullable { + + private static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE) + ); + + private TrimIn(List args) { + super("trim_in", args); + } + + /** + * constructor with 1 argument. + */ + public TrimIn(Expression arg) { + super("trim_in", arg); + } + + /** + * constructor with 2 argument. + */ + public TrimIn(Expression arg0, Expression arg1) { + super("trim_in", arg0, arg1); + } + + /** + * withChildren. + */ + @Override + public TrimIn withChildren(List children) { + Preconditions.checkArgument(children.size() == 1 || children.size() == 2); + return new TrimIn(children); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitTrimIn(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 367e91b0abc54d..c63663edd56001 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -280,6 +280,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Lower; import org.apache.doris.nereids.trees.expressions.functions.scalar.Lpad; import org.apache.doris.nereids.trees.expressions.functions.scalar.Ltrim; +import org.apache.doris.nereids.trees.expressions.functions.scalar.LtrimIn; import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeDate; import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey; import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue; @@ -355,6 +356,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.RoundBankers; import org.apache.doris.nereids.trees.expressions.functions.scalar.Rpad; import org.apache.doris.nereids.trees.expressions.functions.scalar.Rtrim; +import org.apache.doris.nereids.trees.expressions.functions.scalar.RtrimIn; import org.apache.doris.nereids.trees.expressions.functions.scalar.ScalarFunction; import org.apache.doris.nereids.trees.expressions.functions.scalar.Second; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondCeil; @@ -432,6 +434,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Tokenize; import org.apache.doris.nereids.trees.expressions.functions.scalar.Translate; import org.apache.doris.nereids.trees.expressions.functions.scalar.Trim; +import org.apache.doris.nereids.trees.expressions.functions.scalar.TrimIn; import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate; import org.apache.doris.nereids.trees.expressions.functions.scalar.Unhex; import org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp; @@ -1539,6 +1542,10 @@ default R visitLtrim(Ltrim ltrim, C context) { return visitScalarFunction(ltrim, context); } + default R visitLtrimIn(LtrimIn ltrimIn, C context) { + return visitScalarFunction(ltrimIn, context); + } + default R visitMakeDate(MakeDate makeDate, C context) { return visitScalarFunction(makeDate, context); } @@ -1791,6 +1798,10 @@ default R visitRtrim(Rtrim rtrim, C context) { return visitScalarFunction(rtrim, context); } + default R visitRtrimIn(RtrimIn rtrimIn, C context) { + return visitScalarFunction(rtrimIn, context); + } + default R visitSecond(Second second, C context) { return visitScalarFunction(second, context); } @@ -2087,6 +2098,10 @@ default R visitTrim(Trim trim, C context) { return visitScalarFunction(trim, context); } + default R visitTrimIn(TrimIn trimIn, C context) { + return visitScalarFunction(trimIn, context); + } + default R visitTruncate(Truncate truncate, C context) { return visitScalarFunction(truncate, context); } diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_trim_in.out b/regression-test/data/query_p0/sql_functions/string_functions/test_trim_in.out new file mode 100644 index 00000000000000..d62ae2744e3a12 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_trim_in.out @@ -0,0 +1,547 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !empty_nullable -- + +-- !empty_not_nullable -- + +-- !all_null -- +\N +\N +\N + +-- !nullable -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !not_nullable -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江省杭州市 + +-- !nullable -- + + + + ehlowrd +ac +余杭区 +西湖区 + +-- !not_nullable_null -- +\N +\N +\N +\N +\N +\N +\N + +-- !nullable_null -- +\N +\N +\N +\N +\N +\N +\N + +-- !const_nullable -- +\N +\N +\N +\N +\N +\N +\N + +-- !partial_const_nullable -- +\N +\N +\N +\N +\N +\N +\N + +-- !const_partial_nullable_no_null -- +ab +ab +ab +ab +ab +ab +ab + +-- !const_other_not_nullable -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江省杭州市 + +-- !const_not_nullable -- +abdc +abdc +abdc +abdc +abdc +abdc +abdc + +-- !1 -- + + +-- !2 -- + + +-- !3 -- +hello + +-- !4 -- +hello + +-- !5 -- +hello + +-- !6 -- +hello world + +-- !7 -- + + +-- !8 -- +hello + +-- !9 -- +hello + +-- !10 -- +hello + +-- !11 -- +hello world + +-- !12 -- + + +-- !13 -- + + +-- !14 -- + hello + +-- !15 -- + hello + +-- !16 -- +hello + +-- !17 -- + hello world + +-- !18 -- + + +-- !19 -- + hello + +-- !20 -- + hello + +-- !21 -- +hello + +-- !22 -- + hello world + +-- !23 -- + + +-- !24 -- + + +-- !25 -- + hello + +-- !26 -- +hello + +-- !27 -- +hello + +-- !28 -- +llo + +-- !29 -- +hello + +-- !30 -- +llo + +-- !31 -- +hello + +-- !32 -- +hello world + +-- !33 -- +llo world + +-- !34 -- + + +-- !35 -- +hello world + +-- !36 -- + + +-- !37 -- + + +-- !38 -- + hello + +-- !39 -- +hello + +-- !40 -- +llo + +-- !41 -- +hello + +-- !42 -- +llo + +-- !43 -- +hello + +-- !44 -- +hello world + +-- !45 -- +llo world + +-- !46 -- + + +-- !47 -- +hello world + +-- !48 -- + + +-- !49 -- + + +-- !50 -- + hello + +-- !51 -- + hello + +-- !52 -- + he + +-- !53 -- +hello + +-- !54 -- +he + +-- !55 -- +hello + +-- !56 -- + hello world + +-- !57 -- + hello wor + +-- !58 -- + + +-- !59 -- + hello world + +-- !60 -- +abc + +-- !61 -- + + +-- !62 -- + + +-- !63 -- + hello + +-- !64 -- + hello + +-- !65 -- + hello + +-- !66 -- + hello + +-- !67 -- + hello + +-- !68 -- +hello + +-- !69 -- + hello world + +-- !70 -- + hello world + +-- !71 -- + + +-- !72 -- + hello world + +-- !73 -- + +abcd +c浙江省杭州市a +llo +llo world +llo world +浙江省杭州市 + +-- !74 -- + +abcd +c浙江省杭州市a +llo +llo world +llo world +浙江省杭州市 + +-- !75 -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江省杭州市 + +-- !76 -- + + + +ac +lowrd +余杭区 +西湖区 + +-- !77 -- + + + +ac +lowrd +余杭区 +西湖区 + +-- !78 -- + + + + ehlowrd +ac +余杭区 +西湖区 + +-- !79 -- +\N +\N +\N +\N +\N +\N +\N + +-- !80 -- +\N +\N +\N +\N +\N +\N +\N + +-- !81 -- +\N +\N +\N +\N +\N +\N +\N + +-- !82 -- +\N +\N +\N +\N +\N +\N +\N + +-- !83 -- +\N +\N +\N +\N +\N +\N +\N + +-- !84 -- +\N +\N +\N +\N +\N +\N +\N + +-- !85 -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江 + +-- !86 -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江省杭州市 + +-- !87 -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江 + +-- !88 -- + + eh + ehlowrd +ac +he +区 +西湖区 + +-- !89 -- + + eh + ehlowrd +ac +he +区 +西湖区 + +-- !90 -- + + eh + ehlowrd +ac +he +余杭区 +西湖区 + +-- !91 -- + + hello + hello world + hello world +bcd +c浙江 +浙江 + +-- !92 -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江省杭州市 + +-- !93 -- + + hello + hello world + hello world +abcd +c浙江省杭州市a +浙江 + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_trim_in.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_trim_in.groovy new file mode 100644 index 00000000000000..ae6790fb0693e1 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_trim_in.groovy @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_trim_in") { + // this table has nothing todo. just make it eaiser to generate query + sql " drop table if exists hits_two_args " + sql """ create table hits_two_args( + nothing boolean + ) + properties("replication_num" = "1"); + """ + sql "insert into hits_two_args values(true);" + + sql " drop table if exists test_trim_in" + sql """ + create table test_trim_in ( + k0 int, + a varchar not null, + b varchar null + ) + DISTRIBUTED BY HASH(k0) + PROPERTIES + ( + "replication_num" = "1" + ); + """ + order_qt_empty_nullable "select trim_in(a, 'x') from test_trim_in" + order_qt_empty_not_nullable "select trim_in(b, 'x') from test_trim_in" + + sql "insert into test_trim_in values (1, '1', null), (1, '1', null), (1, '1', null)" + order_qt_all_null "select trim_in(b, NULL) from test_trim_in" + + sql "truncate table test_trim_in" + sql """ insert into test_trim_in values (1, "", ""), (2, "abcd", "ac"), (3, ' hello ', 'he '), + (4, ' hello world ', ' ehlowrd'),(5, ' hello world ', ' eh'),(6,'浙江省杭州市','余杭区'),(6,'c浙江省杭州市a','西湖区'); + """ + + /// all values + order_qt_nullable """ + SELECT atan2(t.arg1_two_args, t.ARG2) as result + FROM ( + SELECT hits_two_args.nothing, TABLE1.arg1_two_args, TABLE1.order1, TABLE2.ARG2, TABLE2.order2 + FROM hits_two_args + CROSS JOIN ( + SELECT b as arg1_two_args, k0 as order1 + FROM test_trim_in + ) as TABLE1 + CROSS JOIN ( + SELECT b as ARG2, k0 as order2 + FROM test_trim_in + ) as TABLE2 + )t; + """ + + /// nullables + order_qt_not_nullable "select trim_in(a, 'he') from test_trim_in" + order_qt_nullable "select trim_in(b, 'he') from test_trim_in" + order_qt_not_nullable_null "select trim_in(a, NULL) from test_trim_in" + order_qt_nullable_null "select trim_in(b, NULL) from test_trim_in" + + /// consts. most by BE-UT + order_qt_const_nullable "select trim_in(NULL,NULL) from test_trim_in" + order_qt_partial_const_nullable "select trim_in(NULL, 'he') from test_trim_in" + order_qt_const_partial_nullable_no_null "select trim_in(nullable('abcd'), 'cde') from test_trim_in" + order_qt_const_other_not_nullable "select trim_in(a, 'x') from test_trim_in" + order_qt_const_not_nullable "select trim_in('abdc', 'df') from test_trim_in" + + + + /// folding + def re_fe + def re_be + def re_no_fold + def check_three_ways = { test_sql -> + re_fe = order_sql "select/*+SET_VAR(enable_fold_constant_by_be=false)*/ ${test_sql}" + re_be = order_sql "select/*+SET_VAR(enable_fold_constant_by_be=true)*/ ${test_sql}" + re_no_fold = order_sql "select/*+SET_VAR(debug_skip_fold_constant=true)*/ ${test_sql}" + logger.info("check on sql ${test_sql}") + assertEquals(re_fe, re_be) + assertEquals(re_fe, re_no_fold) + } + + check_three_ways "trim_in(' hello world ', ' ld')" + check_three_ways "ltrim_in(' hello world ', ' ld')" + check_three_ways "rtrim_in(' hello world ', ' ld')" + check_three_ways "trim_in(' hello world ', ' ehlowrd')" + check_three_ways "ltrim_in(' hello world ', ' ehlowrd')" + check_three_ways "rtrim_in(' hello world ', ' ehlowrd')" + check_three_ways "trim_in(' hello world ', '')" + check_three_ways "ltrim_in(' hello world ', '')" + check_three_ways "rtrim_in(' hello world ', '')" + check_three_ways "trim_in(' hello world ', ' ')" + check_three_ways "ltrim_in(' hello world ', ' ')" + check_three_ways "rtrim_in(' hello world ', ' ')" + + order_qt_1 "SELECT ltrim_in('');" + order_qt_2 "SELECT ltrim_in(' ');" + order_qt_3 "SELECT ltrim_in(' hello ');" + order_qt_4 "SELECT ltrim_in(' hello');" + order_qt_5 "SELECT ltrim_in('hello ');" + order_qt_6 "SELECT ltrim_in(' hello world ');" + order_qt_7 "SELECT ltrim_in(CAST('' AS CHAR(20)));" + order_qt_8 "SELECT ltrim_in(CAST(' hello ' AS CHAR(9)));" + order_qt_9 "SELECT ltrim_in(CAST(' hello' AS CHAR(7)));" + order_qt_10 "SELECT ltrim_in(CAST('hello ' AS CHAR(7)));" + order_qt_11 "SELECT ltrim_in(CAST(' hello world ' AS CHAR(13)));" + order_qt_12 "SELECT rtrim_in('');" + order_qt_13 "SELECT rtrim_in(' ');" + order_qt_14 "SELECT rtrim_in(' hello ');" + order_qt_15 "SELECT rtrim_in(' hello');" + order_qt_16 "SELECT rtrim_in('hello ');" + order_qt_17 "SELECT rtrim_in(' hello world ');" + order_qt_18 "SELECT rtrim_in(CAST('' AS CHAR(20)));" + order_qt_19 "SELECT rtrim_in(CAST(' hello ' AS CHAR(9)));" + order_qt_20 "SELECT rtrim_in(CAST(' hello' AS CHAR(7)));" + order_qt_21 "SELECT rtrim_in(CAST('hello ' AS CHAR(7)));" + order_qt_22 "SELECT rtrim_in(CAST(' hello world ' AS CHAR(13)));" + order_qt_23 "SELECT ltrim_in('', '');" + order_qt_24 "SELECT ltrim_in(' ', '');" + order_qt_25 "SELECT ltrim_in(' hello ', '');" + order_qt_26 "SELECT ltrim_in(' hello ', ' ');" + order_qt_27 "SELECT ltrim_in(' hello ', ' ');" + order_qt_28 "SELECT ltrim_in(' hello ', 'he ');" + order_qt_29 "SELECT ltrim_in(' hello', ' ');" + order_qt_30 "SELECT ltrim_in(' hello', 'e h');" + order_qt_31 "SELECT ltrim_in('hello ', 'l');" + order_qt_32 "SELECT ltrim_in(' hello world ', ' ');" + order_qt_33 "SELECT ltrim_in(' hello world ', ' eh');" + order_qt_34 "SELECT ltrim_in(' hello world ', ' ehlowrd');" + order_qt_35 "SELECT ltrim_in(' hello world ', ' x');" + order_qt_36 "SELECT ltrim_in(CAST('' AS CHAR(1)), '');" + order_qt_37 "SELECT ltrim_in(CAST(' ' AS CHAR(3)), '');" + order_qt_38 "SELECT ltrim_in(CAST(' hello ' AS CHAR(9)), '');" + order_qt_39 "SELECT ltrim_in(CAST(' hello ' AS CHAR(9)), ' ');" + order_qt_40 "SELECT ltrim_in(CAST(' hello ' AS CHAR(9)), 'he ');" + order_qt_41 "SELECT ltrim_in(CAST(' hello' AS CHAR(7)), ' ');" + order_qt_42 "SELECT ltrim_in(CAST(' hello' AS CHAR(7)), 'e h');" + order_qt_43 "SELECT ltrim_in(CAST('hello ' AS CHAR(7)), 'l');" + order_qt_44 "SELECT ltrim_in(CAST(' hello world ' AS CHAR(13)), ' ');" + order_qt_45 "SELECT ltrim_in(CAST(' hello world ' AS CHAR(13)), ' eh');" + order_qt_46 "SELECT ltrim_in(CAST(' hello world ' AS CHAR(13)), ' ehlowrd');" + order_qt_47 "SELECT ltrim_in(CAST(' hello world ' AS CHAR(13)), ' x');" + order_qt_48 "SELECT rtrim_in('', '');" + order_qt_49 "SELECT rtrim_in(' ', '');" + order_qt_50 "SELECT rtrim_in(' hello ', '');" + order_qt_51 "SELECT rtrim_in(' hello ', ' ');" + order_qt_52 "SELECT rtrim_in(' hello ', 'lo ');" + order_qt_53 "SELECT rtrim_in('hello ', ' ');" + order_qt_54 "SELECT rtrim_in('hello ', 'l o');" + order_qt_55 "SELECT rtrim_in('hello ', 'l');" + order_qt_56 "SELECT rtrim_in(' hello world ', ' ');" + order_qt_57 "SELECT rtrim_in(' hello world ', ' ld');" + order_qt_58 "SELECT rtrim_in(' hello world ', ' ehlowrd');" + order_qt_59 "SELECT rtrim_in(' hello world ', ' x');" + order_qt_60 "SELECT rtrim_in(CAST('abc def' AS CHAR(7)), 'def');" + order_qt_61 "SELECT rtrim_in(CAST('' AS CHAR(1)), '');" + order_qt_62 "SELECT rtrim_in(CAST(' ' AS CHAR(3)), '');" + order_qt_63 "SELECT rtrim_in(CAST(' hello ' AS CHAR(9)), '');" + order_qt_64 "SELECT rtrim_in(CAST(' hello ' AS CHAR(9)), ' ');" + order_qt_65 "SELECT rtrim_in(CAST(' hello ' AS CHAR(9)), 'he ');" + order_qt_66 "SELECT rtrim_in(CAST(' hello' AS CHAR(7)), ' ');" + order_qt_67 "SELECT rtrim_in(CAST(' hello' AS CHAR(7)), 'e h');" + order_qt_68 "SELECT rtrim_in(CAST('hello ' AS CHAR(7)), 'l');" + order_qt_69 "SELECT rtrim_in(CAST(' hello world ' AS CHAR(13)), ' ');" + order_qt_70 "SELECT rtrim_in(CAST(' hello world ' AS CHAR(13)), ' eh');" + order_qt_71 "SELECT rtrim_in(CAST(' hello world ' AS CHAR(13)), ' ehlowrd');" + order_qt_72 "SELECT rtrim_in(CAST(' hello world ' AS CHAR(13)), ' x');" + + order_qt_73 "SELECT trim_in(a, ' eh') from test_trim_in;" + order_qt_74 "SELECT ltrim_in(a, ' eh') from test_trim_in;" + order_qt_75 "SELECT rtrim_in(a, ' eh') from test_trim_in;" + order_qt_76 "SELECT trim_in(b, ' eh') from test_trim_in;" + order_qt_77 "SELECT ltrim_in(b, ' eh') from test_trim_in;" + order_qt_78 "SELECT rtrim_in(b, ' eh') from test_trim_in;" + order_qt_79 "SELECT trim_in(a, NULL) from test_trim_in;" + order_qt_80 "SELECT ltrim_in(a, NULL) from test_trim_in;" + order_qt_81 "SELECT rtrim_in(a, NULL) from test_trim_in;" + order_qt_82 "SELECT trim_in(b, NULL) from test_trim_in;" + order_qt_83 "SELECT ltrim_in(b, NULL) from test_trim_in;" + order_qt_84 "SELECT rtrim_in(b, NULL) from test_trim_in;" + order_qt_85 "SELECT trim_in(a, '省市杭州') from test_trim_in;" + order_qt_86 "SELECT ltrim_in(a, '省市杭州') from test_trim_in;" + order_qt_87 "SELECT rtrim_in(a, '省市杭州') from test_trim_in;" + order_qt_88 "SELECT trim_in(b, '杭余') from test_trim_in;" + order_qt_89 "SELECT ltrim_in(b, '杭余') from test_trim_in;" + order_qt_90 "SELECT rtrim_in(b, '杭余') from test_trim_in;" + order_qt_91 "SELECT trim_in(a, '省市a杭州') from test_trim_in;" + order_qt_92 "SELECT ltrim_in(a, '省市b杭州') from test_trim_in;" + order_qt_93 "SELECT rtrim_in(a, '省市c杭州') from test_trim_in;" +}