diff --git a/.gitmodules b/.gitmodules index 021da59cf22..89b90ab67dc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -140,3 +140,6 @@ [submodule "contrib/qpl"] path = contrib/qpl url = https://github.com/intel/qpl.git +[submodule "contrib/simdjson"] + path = contrib/simdjson + url = https://github.com/simdjson/simdjson diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1794f4c3937..77f3c1994c8 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -187,3 +187,5 @@ endif () add_subdirectory(magic_enum) add_subdirectory(aws-cmake) + +add_subdirectory(simdjson) diff --git a/contrib/simdjson b/contrib/simdjson new file mode 160000 index 00000000000..17cb457ffd0 --- /dev/null +++ b/contrib/simdjson @@ -0,0 +1 @@ +Subproject commit 17cb457ffd017a354fd0d3361da4ea21311722dc diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d3f06952d14..e21a7e0a169 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -184,6 +184,7 @@ target_link_libraries (tiflash_common_io prometheus-cpp::pull cpptoml magic_enum + simdjson libsymbolization ${RE2_LIBRARY} ${RE2_ST_LIBRARY} diff --git a/dbms/src/Common/VectorWriter.h b/dbms/src/Common/VectorWriter.h new file mode 100644 index 00000000000..a7eb7ffd43d --- /dev/null +++ b/dbms/src/Common/VectorWriter.h @@ -0,0 +1,124 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include + +namespace DB +{ +template +class VectorWriter +{ +public: + using Position = char *; + + explicit VectorWriter(VectorType & vector_, size_t initial_size = 16) + : vector(vector_) + { + if (vector.size() < initial_size) + vector.resize(initial_size); + pos = reinterpret_cast(vector.data()); + end = reinterpret_cast(vector.data() + vector.size()); + } + + inline void write(char x) + { + reserveForNextSize(1); + *pos = x; + ++pos; + } + + void write(const char * from, size_t n) + { + if (unlikely(n == 0)) + return; + reserveForNextSize(n); + std::memcpy(pos, from, n); + pos += n; + } + + void setOffset(size_t new_offset) + { + if (new_offset > vector.size()) + { + size_t request_size = (new_offset - count()); + reserveForNextSize(request_size); + } + pos = reinterpret_cast(vector.data() + new_offset); + } + + void advance(size_t n) { setOffset(offset() + n); } + + size_t offset() { return pos - reinterpret_cast(vector.data()); } + + size_t count() { return offset(); } + + ~VectorWriter() + { + vector.resize(count()); + pos = nullptr; + end = nullptr; + } + +private: + size_t remainingSize() const { return static_cast(end - pos); } + + void reserve(size_t new_size) + { + size_t pos_offset = offset(); + vector.resize(new_size); + pos = reinterpret_cast(vector.data() + pos_offset); + end = reinterpret_cast(vector.data() + vector.size()); + } + + void reserveForNextSize(size_t request_size = 1) + { + assert(request_size > 0); + if (remainingSize() < request_size) + { + size_t old_size = vector.size(); + size_t new_size = std::max(old_size + request_size, std::ceil(old_size * 1.5)); + reserve(new_size); + } + } + +private: + static_assert(sizeof(typename VectorType::value_type) == sizeof(char)); + VectorType & vector; + + Position pos = nullptr; + Position end = nullptr; +}; + +template +inline void writeChar(char x, VectorWriter & writer) +{ + writer.write(x); +} + +template +inline void writeVarUInt(UInt64 x, VectorWriter & writer) +{ + while (x >= 0x80) + { + writeChar(static_cast(x) | 0x80, writer); + x >>= 7; + } + writeChar(x, writer); +} +} // namespace DB diff --git a/dbms/src/Common/tests/gtest_simdjson.cpp b/dbms/src/Common/tests/gtest_simdjson.cpp new file mode 100644 index 00000000000..fcfe8b8a47e --- /dev/null +++ b/dbms/src/Common/tests/gtest_simdjson.cpp @@ -0,0 +1,216 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::tests +{ +TEST(TestSIMDJson, error) +{ + simdjson::dom::parser parser; + { + std::string json_str{}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.error()); + } + { + std::string json_str{"[]]"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.error()); + } + { + std::string json_str{"fsdfhsdjhfjsdhfj"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.error()); + } + { + std::string json_str{"{}}"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.error()); + } + { + std::string json_str{"[[], [[fdjfhdjf]]]"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.error()); + } +} + +TEST(TestSIMDJson, literal) +{ + simdjson::dom::parser parser; + { + std::string json_str{"0"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_number()); + auto actual = res.get_double(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(actual.value_unsafe(), 0); + } + { + std::string json_str{"1"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_number()); + auto actual = res.get_double(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(actual.value_unsafe(), 1); + } + { + std::string json_str{"-1"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_number()); + auto actual = res.get_double(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(actual.value_unsafe(), -1); + } + { + std::string json_str{"1.111"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_number()); + auto actual = res.get_double(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(actual.value_unsafe(), 1.111); + } + { + std::string json_str{"-1.111"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_number()); + auto actual = res.get_double(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(actual.value_unsafe(), -1.111); + } + { + std::string json_str{"true"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_bool()); + auto actual = res.get_bool(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(actual.value_unsafe(), true); + } + { + std::string json_str{"false"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_bool()); + auto actual = res.get_bool(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(actual.value_unsafe(), false); + } + { + std::string json_str{"null"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_null()); + } + { + std::string json_str{"\"a\""}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_string()); + auto actual = res.get_string(); + ASSERT_TRUE(!actual.error()); + ASSERT_EQ(std::string(actual.value_unsafe()), "a"); + } +} + +TEST(TestSIMDJson, array) +{ + simdjson::dom::parser parser; + { + std::string json_str{"[]"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_array()); + auto array = res.get_array(); + ASSERT_TRUE(!array.error()); + const auto & actual = array.value_unsafe(); + ASSERT_EQ(actual.size(), 0); + } + { + std::string json_str{"[1, 2]"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_array()); + auto array = res.get_array(); + ASSERT_TRUE(!array.error()); + const auto & actual = array.value_unsafe(); + ASSERT_EQ(actual.size(), 2); + } + { + std::string json_str{"[1,2]"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_array()); + auto array = res.get_array(); + ASSERT_TRUE(!array.error()); + const auto & actual = array.value_unsafe(); + ASSERT_EQ(actual.size(), 2); + } + { + std::string json_str{"[[]]"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_array()); + auto array = res.get_array(); + ASSERT_TRUE(!array.error()); + const auto & actual = array.value_unsafe(); + ASSERT_EQ(actual.size(), 1); + ASSERT_TRUE(actual.at(0).is_array()); + } +} + +TEST(TestSIMDJson, object) +{ + simdjson::dom::parser parser; + { + std::string json_str{"{}"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_object()); + auto obj = res.get_object(); + ASSERT_TRUE(!obj.error()); + const auto & actual = obj.value_unsafe(); + ASSERT_EQ(actual.size(), 0); + } + { + std::string json_str{R"({"a":"b"})"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_object()); + auto obj = res.get_object(); + ASSERT_TRUE(!obj.error()); + const auto & actual = obj.value_unsafe(); + ASSERT_EQ(actual.size(), 1); + const auto & value = actual.at_key("a"); + ASSERT_TRUE(value.is_string()); + ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b"); + } + { + std::string json_str{R"({"a" : "b"})"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_object()); + auto obj = res.get_object(); + ASSERT_TRUE(!obj.error()); + const auto & actual = obj.value_unsafe(); + ASSERT_EQ(actual.size(), 1); + const auto & value = actual.at_key("a"); + ASSERT_TRUE(value.is_string()); + ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b"); + } + { + std::string json_str{R"({"a" : "b", "c":"d"})"}; + auto res = parser.parse(json_str); + ASSERT_TRUE(res.is_object()); + auto obj = res.get_object(); + ASSERT_TRUE(!obj.error()); + const auto & actual = obj.value_unsafe(); + ASSERT_EQ(actual.size(), 2); + const auto & value = actual.at_key("c"); + ASSERT_TRUE(value.is_string()); + ASSERT_EQ(std::string(value.get_string().value_unsafe()), "d"); + } +} + +} // namespace DB::tests diff --git a/dbms/src/Common/tests/gtest_vector_writer.cpp b/dbms/src/Common/tests/gtest_vector_writer.cpp new file mode 100644 index 00000000000..107f4c5c676 --- /dev/null +++ b/dbms/src/Common/tests/gtest_vector_writer.cpp @@ -0,0 +1,93 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include + +namespace DB +{ +namespace tests +{ +class TestVectorWriter : public testing::Test +{ +}; + +TEST_F(TestVectorWriter, test) +try +{ + PaddedPODArray vector; + { + VectorWriter writer(vector, 1); + ASSERT_EQ(writer.offset(), 0); + + writer.write('a'); + ASSERT_EQ(writer.offset(), 1); + + writer.advance(3); + ASSERT_EQ(writer.offset(), 4); + + PaddedPODArray tmp; + tmp.resize_fill(5, 'a'); + writer.write(reinterpret_cast(tmp.data()), tmp.size()); + ASSERT_EQ(writer.offset(), 9); + + writer.setOffset(1); + writer.write('a'); + writer.write('b'); + writer.write('c'); + writer.setOffset(9); + } + ASSERT_EQ(vector.size(), 9); + StringRef str(reinterpret_cast(vector.data()), vector.size()); + ASSERT_EQ(str, "aabcaaaaa"); +} +CATCH + +TEST_F(TestVectorWriter, allocFirst) +try +{ + PaddedPODArray vector; + { + VectorWriter writer(vector, 1); + ASSERT_EQ(writer.offset(), 0); + + writer.advance(3); + ASSERT_EQ(writer.offset(), 3); + + writer.write('a'); + ASSERT_EQ(writer.offset(), 4); + + PaddedPODArray tmp; + tmp.resize_fill(5, 'a'); + writer.write(reinterpret_cast(tmp.data()), tmp.size()); + ASSERT_EQ(writer.offset(), 9); + + writer.setOffset(0); + writer.write('a'); + writer.write('b'); + writer.write('c'); + writer.setOffset(9); + } + ASSERT_EQ(vector.size(), 9); + StringRef str(reinterpret_cast(vector.data()), vector.size()); + ASSERT_EQ(str, "abcaaaaaa"); +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index 8fd7047e845..0b36f292668 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -280,6 +281,61 @@ String DAGExpressionAnalyzerHelper::buildCastFunction( return buildCastFunctionInternal(analyzer, {name, type_expr_name}, false, expr.field_type(), actions); } +String DAGExpressionAnalyzerHelper::buildCastAsJsonWithInputTiDBField( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + const ExpressionActionsPtr & actions) +{ + if unlikely (expr.children_size() != 1) + throw TiFlashException("Cast function only support one argument", Errors::Coprocessor::BadRequest); + if unlikely (!exprHasValidFieldType(expr)) + throw TiFlashException("CAST function without valid field type", Errors::Coprocessor::BadRequest); + + const auto & input_expr = expr.children(0); + auto func_name = getFunctionName(expr); + + String arg = analyzer->getActions(input_expr, actions); + const auto & collator = getCollatorFromExpr(expr); + String result_name = genFuncString(func_name, {arg}, {collator}); + if (actions->getSampleBlock().has(result_name)) + return result_name; + + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, analyzer->getContext()); + auto * function_build_ptr = function_builder.get(); + if (auto * function_builder = dynamic_cast(function_build_ptr); function_builder) + { + auto * function_impl = function_builder->getFunctionImpl().get(); + if (auto * function_cast_int_as_json = dynamic_cast(function_impl); + function_cast_int_as_json) + { + function_cast_int_as_json->setInputTiDBFieldType(input_expr.field_type()); + } + else if (auto * function_cast_string_as_json = dynamic_cast(function_impl); + function_cast_string_as_json) + { + function_cast_string_as_json->setInputTiDBFieldType(input_expr.field_type()); + function_cast_string_as_json->setOutputTiDBFieldType(expr.field_type()); + } + else if (auto * function_cast_time_as_json = dynamic_cast(function_impl); + function_cast_time_as_json) + { + function_cast_time_as_json->setInputTiDBFieldType(input_expr.field_type()); + } + else + { + throw Exception(fmt::format("Unexpected func {} in buildCastAsJsonWithInputTiDBField", func_name)); + } + } + else + { + throw Exception(fmt::format("Unexpected func {} in buildCastAsJsonWithInputTiDBField", func_name)); + } + + const ExpressionAction & action = ExpressionAction::applyFunction(function_builder, {arg}, result_name, collator); + actions->add(action); + return result_name; +} + template String DAGExpressionAnalyzerHelper::buildDateAddOrSubFunction( DAGExpressionAnalyzer * analyzer, @@ -478,6 +534,9 @@ DAGExpressionAnalyzerHelper::FunctionBuilderMap DAGExpressionAnalyzerHelper::fun {"ifNull", DAGExpressionAnalyzerHelper::buildIfNullFunction}, {"multiIf", DAGExpressionAnalyzerHelper::buildMultiIfFunction}, {"tidb_cast", DAGExpressionAnalyzerHelper::buildCastFunction}, + {"cast_int_as_json", DAGExpressionAnalyzerHelper::buildCastAsJsonWithInputTiDBField}, + {"cast_string_as_json", DAGExpressionAnalyzerHelper::buildCastAsJsonWithInputTiDBField}, + {"cast_time_as_json", DAGExpressionAnalyzerHelper::buildCastAsJsonWithInputTiDBField}, {"and", DAGExpressionAnalyzerHelper::buildLogicalFunction}, {"or", DAGExpressionAnalyzerHelper::buildLogicalFunction}, {"xor", DAGExpressionAnalyzerHelper::buildLogicalFunction}, diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h index d5960e926f8..5313ad6b530 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h @@ -71,6 +71,11 @@ class DAGExpressionAnalyzerHelper const tipb::Expr & expr, const ExpressionActionsPtr & actions); + static String buildCastAsJsonWithInputTiDBField( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + const ExpressionActionsPtr & actions); + template static String buildDateAddOrSubFunction( DAGExpressionAnalyzer * analyzer, diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 1134d40ce0b..5da7f08fb26 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -25,6 +25,7 @@ #include #include + namespace DB { const Int8 VAR_SIZE = 0; @@ -78,7 +79,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::CastIntAsDecimal, "tidb_cast"}, {tipb::ScalarFuncSig::CastIntAsTime, "tidb_cast"}, //{tipb::ScalarFuncSig::CastIntAsDuration, "cast"}, - //{tipb::ScalarFuncSig::CastIntAsJson, "cast"}, + {tipb::ScalarFuncSig::CastIntAsJson, "cast_int_as_json"}, {tipb::ScalarFuncSig::CastRealAsInt, "tidb_cast"}, {tipb::ScalarFuncSig::CastRealAsReal, "tidb_cast"}, @@ -86,7 +87,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::CastRealAsDecimal, "tidb_cast"}, {tipb::ScalarFuncSig::CastRealAsTime, "tidb_cast"}, //{tipb::ScalarFuncSig::CastRealAsDuration, "cast"}, - //{tipb::ScalarFuncSig::CastRealAsJson, "cast"}, + {tipb::ScalarFuncSig::CastRealAsJson, "cast_real_as_json"}, {tipb::ScalarFuncSig::CastDecimalAsInt, "tidb_cast"}, {tipb::ScalarFuncSig::CastDecimalAsReal, "tidb_cast"}, @@ -94,7 +95,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::CastDecimalAsDecimal, "tidb_cast"}, {tipb::ScalarFuncSig::CastDecimalAsTime, "tidb_cast"}, //{tipb::ScalarFuncSig::CastDecimalAsDuration, "cast"}, - //{tipb::ScalarFuncSig::CastDecimalAsJson, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsJson, "cast_decimal_as_json"}, {tipb::ScalarFuncSig::CastStringAsInt, "tidb_cast"}, {tipb::ScalarFuncSig::CastStringAsReal, "tidb_cast"}, @@ -102,7 +103,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::CastStringAsDecimal, "tidb_cast"}, {tipb::ScalarFuncSig::CastStringAsTime, "tidb_cast"}, //{tipb::ScalarFuncSig::CastStringAsDuration, "cast"}, - //{tipb::ScalarFuncSig::CastStringAsJson, "cast"}, + {tipb::ScalarFuncSig::CastStringAsJson, "cast_string_as_json"}, {tipb::ScalarFuncSig::CastTimeAsInt, "tidb_cast"}, {tipb::ScalarFuncSig::CastTimeAsReal, "tidb_cast"}, @@ -110,7 +111,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::CastTimeAsDecimal, "tidb_cast"}, {tipb::ScalarFuncSig::CastTimeAsTime, "tidb_cast"}, {tipb::ScalarFuncSig::CastTimeAsDuration, "tidb_cast"}, - //{tipb::ScalarFuncSig::CastTimeAsJson, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsJson, "cast_time_as_json"}, //{tipb::ScalarFuncSig::CastDurationAsInt, "cast"}, //{tipb::ScalarFuncSig::CastDurationAsReal, "cast"}, @@ -118,7 +119,7 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::CastDurationAsDecimal, "cast"}, //{tipb::ScalarFuncSig::CastDurationAsTime, "cast"}, {tipb::ScalarFuncSig::CastDurationAsDuration, "tidb_cast"}, - //{tipb::ScalarFuncSig::CastDurationAsJson, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsJson, "cast_duration_as_json"}, //{tipb::ScalarFuncSig::CastJsonAsInt, "cast"}, //{tipb::ScalarFuncSig::CastJsonAsReal, "cast"}, @@ -126,7 +127,7 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::CastJsonAsDecimal, "cast"}, //{tipb::ScalarFuncSig::CastJsonAsTime, "cast"}, //{tipb::ScalarFuncSig::CastJsonAsDuration, "cast"}, - //{tipb::ScalarFuncSig::CastJsonAsJson, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsJson, "cast_json_as_json"}, {tipb::ScalarFuncSig::CoalesceInt, "coalesce"}, {tipb::ScalarFuncSig::CoalesceReal, "coalesce"}, @@ -1431,6 +1432,16 @@ bool hasUnsignedFlag(const tipb::FieldType & tp) return tp.flag() & TiDB::ColumnFlagUnsigned; } +bool hasIsBooleanFlag(const tipb::FieldType & tp) +{ + return tp.flag() & TiDB::ColumnFlagIsBooleanFlag; +} + +bool hasParseToJSONFlag(const tipb::FieldType & tp) +{ + return tp.flag() & TiDB::ColumnFlagParseToJSON; +} + void assertBlockSchema(const DataTypes & expected_types, const Block & block, const String & context_description) { assertBlockSchema( diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index b31e9c364d8..98b55269b22 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -74,6 +74,8 @@ bool isUnsupportedEncodeType(const std::vector & types, tipb::E TiDB::TiDBCollatorPtr getCollatorFromExpr(const tipb::Expr & expr); TiDB::TiDBCollatorPtr getCollatorFromFieldType(const tipb::FieldType & field_type); bool hasUnsignedFlag(const tipb::FieldType & tp); +bool hasIsBooleanFlag(const tipb::FieldType & tp); +bool hasParseToJSONFlag(const tipb::FieldType & tp); void assertBlockSchema(const DataTypes & expected_types, const Block & block, const String & context_description); diff --git a/dbms/src/Functions/FunctionsJson.cpp b/dbms/src/Functions/FunctionsJson.cpp index 4b3e01d836c..cbbf304f33e 100644 --- a/dbms/src/Functions/FunctionsJson.cpp +++ b/dbms/src/Functions/FunctionsJson.cpp @@ -19,10 +19,17 @@ namespace DB { void registerFunctionsJson(FunctionFactory & factory) { - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } // namespace DB diff --git a/dbms/src/Functions/FunctionsJson.h b/dbms/src/Functions/FunctionsJson.h index 0efdf6b0a11..587bb933b15 100644 --- a/dbms/src/Functions/FunctionsJson.h +++ b/dbms/src/Functions/FunctionsJson.h @@ -17,16 +17,30 @@ #include #include #include +#include +#include +#include +#include +#include #include #include #include +#include #include #include #include +#include #include #include +#include +#include +#include +#include #include +#include +#include +#include namespace DB { @@ -39,13 +53,15 @@ namespace DB * cast_json_as_string(json_object) * json_length(json_object) * json_array(json_object...) + * cast(column as json) * */ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; -} +extern const int UNKNOWN_TYPE; +} // namespace ErrorCodes inline bool isNullJsonBinary(size_t size) { @@ -54,11 +70,11 @@ inline bool isNullJsonBinary(size_t size) using namespace GatherUtils; -class FunctionsJsonExtract : public IFunction +class FunctionJsonExtract : public IFunction { public: static constexpr auto name = "json_extract"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } @@ -224,7 +240,7 @@ class FunctionsJsonExtract : public IFunction offsets_to.resize(rows); ColumnUInt8::MutablePtr col_null_map = ColumnUInt8::create(rows, 0); ColumnUInt8::Container & vec_null_map = col_null_map->getData(); - WriteBufferFromVector write_buffer(data_to); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to); size_t current_offset = 0; for (size_t i = 0; i < rows; ++i) { @@ -275,11 +291,11 @@ class FunctionsJsonExtract : public IFunction }; -class FunctionsJsonUnquote : public IFunction +class FunctionJsonUnquote : public IFunction { public: static constexpr auto name = "json_unquote"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } @@ -310,7 +326,7 @@ class FunctionsJsonUnquote : public IFunction ColumnString::Offsets & offsets_to = col_to->getOffsets(); offsets_to.resize(rows); ColumnUInt8::MutablePtr col_null_map = ColumnUInt8::create(rows, 0); - WriteBufferFromVector write_buffer(data_to); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to); size_t current_offset = 0; for (size_t i = 0; i < block.rows(); ++i) { @@ -332,11 +348,11 @@ class FunctionsJsonUnquote : public IFunction }; -class FunctionsCastJsonAsString : public IFunction +class FunctionCastJsonAsString : public IFunction { public: static constexpr auto name = "cast_json_as_string"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } @@ -369,7 +385,7 @@ class FunctionsCastJsonAsString : public IFunction offsets_to.resize(rows); ColumnUInt8::MutablePtr col_null_map = ColumnUInt8::create(rows, 0); ColumnUInt8::Container & vec_null_map = col_null_map->getData(); - WriteBufferFromVector write_buffer(data_to); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to); size_t current_offset = 0; for (size_t i = 0; i < block.rows(); ++i) { @@ -456,11 +472,11 @@ class FunctionJsonLength : public IFunction }; -class FunctionsJsonArray : public IFunction +class FunctionJsonArray : public IFunction { public: static constexpr auto name = "json_array"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } @@ -491,12 +507,28 @@ class FunctionsJsonArray : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - auto nested_block = createBlockWithNestedColumns(block, arguments, result); + if (arguments.empty()) + { + // clang-format off + const UInt8 empty_array_json_value[] = { + JsonBinary::TYPE_CODE_ARRAY, // array_type + 0x0, 0x0, 0x0, 0x0, // element_count + 0x8, 0x0, 0x0, 0x0}; // total_size + // clang-format on + auto empty_array_json = ColumnString::create(); + empty_array_json->insertData( + reinterpret_cast(empty_array_json_value), + sizeof(empty_array_json_value) / sizeof(UInt8)); + block.getByPosition(result).column = ColumnConst::create(std::move(empty_array_json), block.rows()); + return; + } + + auto nested_block = createBlockWithNestedColumns(block, arguments); StringSources sources; for (auto column_number : arguments) { sources.push_back( - block.getByPosition(column_number).type->onlyNull() + block.getByPosition(column_number).column->onlyNull() ? nullptr : createDynamicStringSource(*nested_block.getByPosition(column_number).column)); } @@ -504,24 +536,76 @@ class FunctionsJsonArray : public IFunction auto rows = block.rows(); auto col_to = ColumnString::create(); auto & data_to = col_to->getChars(); - WriteBufferFromVector write_buffer(data_to); auto & offsets_to = col_to->getOffsets(); offsets_to.resize(rows); + std::vector nullmaps; + nullmaps.reserve(sources.size()); + bool is_input_nullable = false; + for (auto column_number : arguments) + { + const auto & col = block.getByPosition(column_number).column; + if (col->isColumnNullable()) + { + const auto & column_nullable = static_cast(*col); + nullmaps.push_back(&(column_nullable.getNullMapData())); + is_input_nullable = true; + } + else + { + nullmaps.push_back(nullptr); + } + } + + if (is_input_nullable) + doExecuteImpl(sources, rows, data_to, offsets_to, nullmaps); + else + doExecuteImpl(sources, rows, data_to, offsets_to, nullmaps); + + block.getByPosition(result).column = std::move(col_to); + } + +private: + template + static void doExecuteImpl( + StringSources & sources, + size_t rows, + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const std::vector & nullmaps) + { + // rows * json_type. + size_t reserve_size = rows; + // for only null: null literal. + // for non only null: size of data_from. + for (const auto & source : sources) + reserve_size += source ? source->getSizeForReserve() : rows; + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + std::vector jsons; jsons.reserve(sources.size()); for (size_t i = 0; i < rows; ++i) { for (size_t col = 0; col < sources.size(); ++col) { - if (sources[col] && !block.getByPosition(arguments[col]).column->isNullAt(i)) + if constexpr (is_input_nullable) { - const auto & data_from = sources[col]->getWhole(); - jsons.emplace_back(data_from.data[0], StringRef(&data_from.data[1], data_from.size - 1)); + const auto * nullmap = nullmaps[col]; + if (!sources[col] || (nullmap && (*nullmap)[i])) + { + jsons.emplace_back(JsonBinary::TYPE_CODE_LITERAL, StringRef(&JsonBinary::LITERAL_NIL, 1)); + } + else + { + const auto & data_from = sources[col]->getWhole(); + jsons.emplace_back(data_from.data[0], StringRef(&data_from.data[1], data_from.size - 1)); + } } else { - jsons.emplace_back(JsonBinary::TYPE_CODE_LITERAL, StringRef(&JsonBinary::LITERAL_NIL, 1)); + assert(sources[col]); + const auto & data_from = sources[col]->getWhole(); + jsons.emplace_back(data_from.data[0], StringRef(&data_from.data[1], data_from.size - 1)); } } JsonBinary::buildBinaryJsonArrayInBuffer(jsons, write_buffer); @@ -530,11 +614,747 @@ class FunctionsJsonArray : public IFunction offsets_to[i] = write_buffer.count(); for (const auto & source : sources) { - if (source) + if constexpr (is_input_nullable) + { + if (source) + source->next(); + } + else + { + assert(source); source->next(); + } } } - data_to.resize(write_buffer.count()); + } +}; + + +class FunctionCastJsonAsJson : public IFunction +{ +public: + static constexpr auto name = "cast_json_as_json"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { return arguments[0]; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + auto from = block.getByPosition(arguments[0]).column; + block.getByPosition(result).column = std::move(from); + } +}; + +class FunctionCastRealAsJson : public IFunction +{ +public: + static constexpr auto name = "cast_real_as_json"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForNulls() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if unlikely (!arguments[0]->isFloatingPoint()) + throw Exception( + fmt::format("Illegal type {} of argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + auto rows = block.rows(); + auto col_to = ColumnString::create(); + auto & data_to = col_to->getChars(); + auto & offsets_to = col_to->getOffsets(); + offsets_to.resize(rows); + + const auto & from = block.getByPosition(arguments[0]); + if (from.type->getTypeId() == TypeIndex::Float32) + { + doExecute(data_to, offsets_to, from.column); + } + else + { + doExecute(data_to, offsets_to, from.column); + } + block.getByPosition(result).column = std::move(col_to); + } + +private: + template + static void doExecute( + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const ColumnPtr & column_ptr_from) + { + const auto * column_from = checkAndGetColumn>(column_ptr_from.get()); + RUNTIME_CHECK(column_from); + const auto & data_from = column_from->getData(); + // json_type + char 0 of string end + value + size_t reserve_size = data_from.size() * (1 + 1 + sizeof(Float64)); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + for (size_t i = 0; i < data_from.size(); ++i) + { + JsonBinary::appendNumber(write_buffer, static_cast(data_from[i])); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } +}; + +class FunctionCastDecimalAsJson : public IFunction +{ +public: + static constexpr auto name = "cast_decimal_as_json"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForNulls() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if unlikely (!arguments[0]->isDecimal()) + throw Exception( + fmt::format("Illegal type {} of argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + auto rows = block.rows(); + auto col_to = ColumnString::create(); + auto & data_to = col_to->getChars(); + auto & offsets_to = col_to->getOffsets(); + offsets_to.resize(rows); + + const auto & from = block.getByPosition(arguments[0]); + TypeIndex from_type_index = from.type->getTypeId(); + switch (from_type_index) + { + case TypeIndex::Decimal32: + doExecute(data_to, offsets_to, from.column); + break; + case TypeIndex::Decimal64: + doExecute(data_to, offsets_to, from.column); + break; + case TypeIndex::Decimal128: + doExecute(data_to, offsets_to, from.column); + break; + case TypeIndex::Decimal256: + doExecute(data_to, offsets_to, from.column); + break; + default: + throw Exception( + fmt::format( + "Illegal type {} of argument of function {}", + magic_enum::enum_name(from_type_index), + getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + block.getByPosition(result).column = std::move(col_to); + } + +private: + template + static void doExecute( + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const ColumnPtr & column_ptr_from) + { + const auto * column_from = checkAndGetColumn>(column_ptr_from.get()); + RUNTIME_CHECK(column_from); + // json_type + char 0 of string end + value + size_t reserve_size = column_from->size() * (1 + 1 + sizeof(Float64)); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + for (size_t i = 0; i < column_from->size(); ++i) + { + const auto & field = (*column_from)[i].template safeGet>(); + // same as https://github.com/pingcap/tidb/blob/90628349860718bb84c94fe7dc1e1f9bd9da4348/pkg/expression/builtin_cast.go#L854-L865 + // https://github.com/pingcap/tidb/issues/48796 + // TODO `select json_type(cast(1111.11 as json))` should return `DECIMAL`, we return `DOUBLE` now. + JsonBinary::appendNumber(write_buffer, static_cast(field)); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } +}; + +class FunctionCastIntAsJson : public IFunction +{ +public: + static constexpr auto name = "cast_int_as_json"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForNulls() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + void setInputTiDBFieldType(const tipb::FieldType & tidb_tp_) { input_tidb_tp = &tidb_tp_; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (unlikely(!arguments[0]->isInteger())) + throw Exception( + fmt::format("Illegal type {} of argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + auto col_to = ColumnString::create(); + auto & data_to = col_to->getChars(); + auto & offsets_to = col_to->getOffsets(); + auto rows = block.rows(); + offsets_to.resize(rows); + + const auto & int_base_type = block.getByPosition(arguments[0]).type; + bool is_types_valid = getIntType(int_base_type, [&](const auto & int_type, bool) { + using IntType = std::decay_t; + using IntFieldType = typename IntType::FieldType; + const auto & from = block.getByPosition(arguments[0]); + // In raw function test, input_tidb_tp is nullptr. + if (unlikely(input_tidb_tp == nullptr) || !hasIsBooleanFlag(*input_tidb_tp)) + { + if constexpr (std::is_unsigned_v) + doExecute(data_to, offsets_to, from.column); + else + doExecute(data_to, offsets_to, from.column); + } + else + { + doExecute(data_to, offsets_to, from.column); + } + + block.getByPosition(result).column = std::move(col_to); + return true; + }); + + if (unlikely(!is_types_valid)) + throw Exception( + fmt::format("Illegal types {} arguments of function {}", int_base_type->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +private: + template + static bool getIntType(DataTypePtr type, F && f) + { + return castTypeToEither< + DataTypeInt8, + DataTypeInt16, + DataTypeInt32, + DataTypeInt64, + DataTypeUInt8, + DataTypeUInt16, + DataTypeUInt32, + DataTypeUInt64>(type.get(), std::forward(f)); + } + + template + static void doExecute( + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const ColumnPtr & column_ptr_from) + { + const auto * column_from = checkAndGetColumn>(column_ptr_from.get()); + RUNTIME_CHECK(column_from); + const auto & data_from = column_from->getData(); + + // json_type + char 0 of string end + value + size_t reserve_size = 0; + if constexpr (std::is_same_v) + reserve_size = data_from.size() * (1 + 1 + 1); + else + reserve_size = data_from.size() * (1 + 1 + sizeof(ToType)); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + + for (size_t i = 0; i < data_from.size(); ++i) + { + JsonBinary::appendNumber(write_buffer, static_cast(data_from[i])); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + +private: + const tipb::FieldType * input_tidb_tp = nullptr; +}; + +class FunctionCastStringAsJson : public IFunction +{ +public: + static constexpr auto name = "cast_string_as_json"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + + void setInputTiDBFieldType(const tipb::FieldType & tidb_tp_) { input_tidb_tp = &tidb_tp_; } + void setOutputTiDBFieldType(const tipb::FieldType & tidb_tp_) { output_tidb_tp = &tidb_tp_; } + void setCollator(const TiDB::TiDBCollatorPtr & collator_) override { collator = collator_; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto & input_type = arguments[0]; + if (input_type->onlyNull()) + { + return input_type; + } + + if unlikely (!removeNullable(input_type)->isStringOrFixedString()) + throw Exception( + fmt::format("Illegal type {} of argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + auto return_type = std::make_shared(); + return input_type->isNullable() ? makeNullable(return_type) : return_type; + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const auto & from = block.getByPosition(arguments[0]); + if (from.type->onlyNull()) + { + auto only_null_column = from.column; + block.getByPosition(result).column = std::move(only_null_column); + return; + } + + auto nested_block = createBlockWithNestedColumns(block, arguments); + auto input_source = createDynamicStringSource(*nested_block.getByPosition(arguments[0]).column); + + auto col_to = ColumnString::create(); + auto & data_to = col_to->getChars(); + auto & offsets_to = col_to->getOffsets(); + auto rows = block.rows(); + offsets_to.resize(rows); + + // In raw function test, input_tidb_tp/output_tidb_tp is nullptr. + if (collator && collator->isBinary()) + { + auto tmp_null_map = ColumnUInt8::create(0, 0); + if (unlikely(input_tidb_tp == nullptr)) + { + doExecuteForBinary( + data_to, + offsets_to, + input_source, + tmp_null_map->getData(), + TiDB::TypeVarchar, + -1, + block.rows()); + } + else if (input_tidb_tp->tp() == TiDB::TypeString) + { + if (from.column->isColumnNullable()) + { + const auto & column_nullable = static_cast(*from.column); + doExecuteForBinary( + data_to, + offsets_to, + input_source, + column_nullable.getNullMapData(), + input_tidb_tp->tp(), + input_tidb_tp->flen(), + block.rows()); + } + else + { + doExecuteForBinary( + data_to, + offsets_to, + input_source, + tmp_null_map->getData(), + input_tidb_tp->tp(), + input_tidb_tp->flen(), + block.rows()); + } + } + else + { + doExecuteForBinary( + data_to, + offsets_to, + input_source, + tmp_null_map->getData(), + input_tidb_tp->tp(), + input_tidb_tp->flen(), + block.rows()); + } + } + else if ((unlikely(output_tidb_tp == nullptr)) || hasParseToJSONFlag(*output_tidb_tp)) + { + if (from.column->isColumnNullable()) + { + const auto & column_nullable = static_cast(*from.column); + doExecuteForParsingJson( + data_to, + offsets_to, + input_source, + column_nullable.getNullMapData(), + block.rows()); + } + else + { + auto tmp_null_map = ColumnUInt8::create(0, 0); + doExecuteForParsingJson( + data_to, + offsets_to, + input_source, + tmp_null_map->getData(), + block.rows()); + } + } + else + { + doExecuteForOthers(data_to, offsets_to, input_source, block.rows()); + } + + if (from.column->isColumnNullable()) + { + auto null_map = static_cast(*from.column).getNullMapColumnPtr(); + block.getByPosition(result).column = ColumnNullable::create(std::move(col_to), std::move(null_map)); + } + else + { + block.getByPosition(result).column = std::move(col_to); + } + } + +private: + template + static void doExecuteForBinary( + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const std::unique_ptr & data_from, + const NullMap & null_map_from, + UInt8 from_type_code, + Int32 flen, + size_t size) + { + size_t reserve_size = 0; + if constexpr (is_binary_str) + { + if (flen <= 0) + { + // json_type + from_type_code + size of data_from. + reserve_size += (size * (1 + 1) + data_from->getSizeForReserve()); + } + else + { + // for non-null value: char 0 of string end + json_type + from_type_code + flen. + size_t size_of_non_null_value = (1 + 1 + 1 + flen); + if constexpr (nullable_input_for_binary_str) + { + auto null_count = countBytesInFilter(null_map_from.data(), null_map_from.size()); + // for null value: char 0 of string end. + reserve_size += (null_count + (size - null_count) * size_of_non_null_value); + } + else + { + reserve_size += (size * size_of_non_null_value); + } + } + } + else + { + // json_type + from_type_code + size of data_from. + reserve_size += (size * (1 + 1) + data_from->getSizeForReserve()); + } + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + ColumnString::Chars_t tmp_buf; + for (size_t i = 0; i < size; ++i) + { + const auto & slice = data_from->getWhole(); + if constexpr (is_binary_str) + { + if constexpr (nullable_input_for_binary_str) + { + if (null_map_from[i]) + { + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + data_from->next(); + continue; + } + } + if (unlikely(flen <= 0)) + { + JsonBinary::appendOpaque( + write_buffer, + JsonBinary::Opaque{from_type_code, StringRef{slice.data, slice.size}}); + } + else + { + auto size_t_flen = static_cast(flen); + if (slice.size >= size_t_flen) + { + JsonBinary::appendOpaque( + write_buffer, + JsonBinary::Opaque{from_type_code, StringRef{slice.data, size_t_flen}}); + } + else + { + if (tmp_buf.size() < size_t_flen) + tmp_buf.resize(size_t_flen); + std::memcpy(tmp_buf.data(), slice.data, slice.size); + std::fill(tmp_buf.data() + slice.size, tmp_buf.data() + size_t_flen, 0); + JsonBinary::appendOpaque( + write_buffer, + JsonBinary::Opaque{from_type_code, StringRef{tmp_buf.data(), size_t_flen}}); + } + } + } + else + { + JsonBinary::appendOpaque( + write_buffer, + JsonBinary::Opaque{from_type_code, StringRef{slice.data, slice.size}}); + } + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + data_from->next(); + } + } + + template + static void doExecuteForParsingJson( + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const std::unique_ptr & data_from, + const NullMap & null_map_from, + size_t size) + { + // json_type + size of data_from. + size_t reserve_size = size + data_from->getSizeForReserve(); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + simdjson::dom::parser parser; + for (size_t i = 0; i < size; ++i) + { + if constexpr (is_nullable) + { + if (null_map_from[i]) + { + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + data_from->next(); + continue; + } + } + + const auto & slice = data_from->getWhole(); + if (unlikely(slice.size == 0)) + throw Exception("Invalid JSON text: The document is empty."); + + const auto & json_elem = parser.parse(slice.data, slice.size); + if (unlikely(json_elem.error())) + { + throw Exception(fmt::format( + "Invalid JSON text: The document root must not be followed by other values, details: {}", + simdjson::error_message(json_elem.error()))); + } + JsonBinary::appendSIMDJsonElem(write_buffer, json_elem.value_unsafe()); + + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + data_from->next(); + } + } + + static void doExecuteForOthers( + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const std::unique_ptr & data_from, + size_t size) + { + // json_type + size of data_from + size_t reserve_size = size + data_from->getSizeForReserve(); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + for (size_t i = 0; i < size; ++i) + { + const auto & slice = data_from->getWhole(); + JsonBinary::appendStringRef(write_buffer, StringRef{slice.data, slice.size}); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + data_from->next(); + } + } + +private: + const tipb::FieldType * input_tidb_tp = nullptr; + const tipb::FieldType * output_tidb_tp = nullptr; + TiDB::TiDBCollatorPtr collator = nullptr; +}; + +class FunctionCastTimeAsJson : public IFunction +{ +public: + static constexpr auto name = "cast_time_as_json"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForNulls() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + void setInputTiDBFieldType(const tipb::FieldType & tidb_tp_) { input_tidb_tp = &tidb_tp_; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if unlikely (!arguments[0]->isMyDateOrMyDateTime()) + throw Exception( + fmt::format("Illegal type {} of argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + auto col_to = ColumnString::create(); + auto & data_to = col_to->getChars(); + auto & offsets_to = col_to->getOffsets(); + auto rows = block.rows(); + offsets_to.resize(rows); + + const auto & from = block.getByPosition(arguments[0]); + if (checkDataType(from.type.get())) + { + // In raw function test, input_tidb_tp is nullptr. + bool is_timestamp = (unlikely(input_tidb_tp == nullptr)) || input_tidb_tp->tp() == TiDB::TypeTimestamp; + if (is_timestamp) + doExecute(data_to, offsets_to, from.column); + else + doExecute(data_to, offsets_to, from.column); + } + else if (checkDataType(from.type.get())) + { + doExecute(data_to, offsets_to, from.column); + } + + block.getByPosition(result).column = std::move(col_to); + } + +private: + template + static void doExecute( + ColumnString::Chars_t & data_to, + ColumnString::Offsets & offsets_to, + const ColumnPtr & column_ptr_from) + { + const auto * column_from + = checkAndGetColumn>(column_ptr_from.get()); + RUNTIME_CHECK(column_from); + const auto & data_from = column_from->getData(); + // json_type + char 0 of string end + value + size_t reserve_size = data_from.size() * (1 + 1 + sizeof(UInt64)); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + for (size_t i = 0; i < data_from.size(); ++i) + { + if constexpr (std::is_same_v) + { + MyDate date(data_from[i]); + JsonBinary::appendDate(write_buffer, date); + } + else + { + MyDateTime date_time(data_from[i]); + if constexpr (is_timestamp) + JsonBinary::appendTimestamp(write_buffer, date_time); + else + JsonBinary::appendDatetime(write_buffer, date_time); + } + + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + +private: + const tipb::FieldType * input_tidb_tp = nullptr; +}; + +class FunctionCastDurationAsJson : public IFunction +{ +public: + static constexpr auto name = "cast_duration_as_json"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForNulls() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if unlikely (!arguments[0]->isMyTime()) + throw Exception( + fmt::format("Illegal type {} of argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + auto col_to = ColumnString::create(); + auto & data_to = col_to->getChars(); + auto & offsets_to = col_to->getOffsets(); + auto rows = block.rows(); + offsets_to.resize(rows); + + const auto & from = block.getByPosition(arguments[0]); + if (likely(checkDataType(from.type.get()))) + { + const auto & col_from = checkAndGetColumn>(from.column.get()); + const auto & data_from = col_from->getData(); + // json_type + char 0 of string end + value + size_t reserve_size = data_from.size() * (1 + 1 + sizeof(UInt64) + sizeof(UInt32)); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size); + for (size_t i = 0; i < data_from.size(); ++i) + { + // from https://github.com/pingcap/tidb/blob/3543275dcf4b6454eb874c1362c87d31a963da6d/pkg/expression/builtin_cast.go#L921 + // fsp always is MaxFsp. + JsonBinary::appendDuration(write_buffer, data_from[i], 6); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + throw Exception( + fmt::format("Illegal type {} of argument of function {}", from.type->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } block.getByPosition(result).column = std::move(col_to); } diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index a4dfa02ac12..e251396f0d7 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -2273,7 +2273,7 @@ class FunctionTiDBCast final : public IFunctionBase context_); }; - // todo support convert to json type + // cast to json been implemented in FunctionsJson.h throw Exception{"tidb_cast to " + to_type->getName() + " is not supported", ErrorCodes::CANNOT_CONVERT_TYPE}; } diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 7041a8b701d..ecaf2b1b9a0 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -426,6 +426,8 @@ class DefaultFunctionBuilder : public IFunctionBuilder function->getLambdaArgumentTypes(arguments); } + std::shared_ptr getFunctionImpl() const { return function; } + private: std::shared_ptr function; }; diff --git a/dbms/src/Functions/tests/gtest_cast_as_json.cpp b/dbms/src/Functions/tests/gtest_cast_as_json.cpp new file mode 100644 index 00000000000..93cefdccedc --- /dev/null +++ b/dbms/src/Functions/tests/gtest_cast_as_json.cpp @@ -0,0 +1,383 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB::tests +{ +/** + * Because most functions(except cast string as json) have + * ``` + * bool useDefaultImplementationForNulls() const override { return true; } + * bool useDefaultImplementationForConstants() const override { return true; } + * ``` + * there is no need to test const, null_value, and only value. + * + * CastIntAsJson, CastStringAsJson and CastDurationAsJson can only test the case where input_tidb_tp/output_tidb_tp is nullptr + */ +class TestCastAsJson : public DB::tests::FunctionTest +{ +public: + template + ColumnWithTypeAndName executeFunctionWithCast(const String & func_name, const ColumnsWithTypeAndName & columns) + { + ColumnWithTypeAndName json_column; + if constexpr (is_raw) + { + json_column = executeFunction(func_name, columns, nullptr, true); + } + else + { + json_column = executeFunction(func_name, columns); + } + // The `json_binary` should be cast as a string to improve readability. + return executeFunction("cast_json_as_string", {json_column}); + } + + template + void executeAndAssert(const String & func_name, const Input & input, const String & expect) + { + ASSERT_COLUMN_EQ( + createColumn>({expect}), + executeFunctionWithCast(func_name, {createColumn({input})})); + } + + template + typename std::enable_if, void>::type executeAndAssert( + const String & func_name, + const DecimalField & input, + const String & expect) + { + auto meta = std::make_tuple(19, input.getScale()); + ASSERT_COLUMN_EQ( + createColumn>({expect}), + executeFunctionWithCast(func_name, {createColumn(meta, {input})})); + } + + template + typename std::enable_if, void>::type testForInt() + { + // Only raw function test is tested, so input_tidb_tp is always nullptr. + String func_name = "cast_int_as_json"; + executeAndAssert(func_name, 0, "0"); + executeAndAssert(func_name, 99, "99"); + if constexpr (std::is_signed_v) + { + executeAndAssert(func_name, -99, "-99"); + } + executeAndAssert( + func_name, + std::numeric_limits::max(), + fmt::format("{}", std::numeric_limits::max())); + executeAndAssert( + func_name, + std::numeric_limits::min(), + fmt::format("{}", std::numeric_limits::min())); + executeAndAssert( + func_name, + std::numeric_limits::lowest(), + fmt::format("{}", std::numeric_limits::lowest())); + } +}; + +TEST_F(TestCastAsJson, CastJsonAsJson) +try +{ + /// prepare + // [] + // clang-format off + const UInt8 empty_array[] = { + JsonBinary::TYPE_CODE_ARRAY, // array_type + 0x0, 0x0, 0x0, 0x0, // element_count + 0x8, 0x0, 0x0, 0x0}; // total_size + // clang-format on + ColumnWithTypeAndName json_column; + { + auto empty_array_json = ColumnString::create(); + empty_array_json->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); + empty_array_json->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); + json_column = ColumnWithTypeAndName(std::move(empty_array_json), std::make_shared()); + } + + auto gen_column_expect = [](const String & value) { + // double for rows_count 2. + return createColumn>({value, value}); + }; + + auto res = executeFunctionWithCast("cast_json_as_json", {json_column}); + auto expect = gen_column_expect("[]"); + ASSERT_COLUMN_EQ(expect, res); +} +CATCH + +TEST_F(TestCastAsJson, CastIntAsJson) +try +{ + testForInt(); + testForInt(); + testForInt(); + testForInt(); + + testForInt(); + testForInt(); + testForInt(); + testForInt(); +} +CATCH + +TEST_F(TestCastAsJson, CastRealAsJson) +try +{ + const String func_name = "cast_real_as_json"; + + /// Float32 + executeAndAssert(func_name, 0, "0"); + executeAndAssert(func_name, 999.999f, "999.9990234375"); + executeAndAssert(func_name, -999.999f, "-999.9990234375"); + executeAndAssert( + func_name, + std::numeric_limits::max(), + fmt::format("{}", static_cast(std::numeric_limits::max()))); + executeAndAssert( + func_name, + std::numeric_limits::min(), + fmt::format("{}", static_cast(std::numeric_limits::min()))); + executeAndAssert( + func_name, + std::numeric_limits::lowest(), + fmt::format("{}", static_cast(std::numeric_limits::lowest()))); + + /// Float64 + executeAndAssert(func_name, 0, "0"); + executeAndAssert(func_name, 999.999, "999.999"); + executeAndAssert(func_name, -999.999, "-999.999"); + executeAndAssert( + func_name, + std::numeric_limits::max(), + fmt::format("{}", std::numeric_limits::max())); + executeAndAssert( + func_name, + std::numeric_limits::min(), + fmt::format("{}", std::numeric_limits::min())); + executeAndAssert( + func_name, + std::numeric_limits::lowest(), + fmt::format("{}", std::numeric_limits::lowest())); +} +CATCH + +TEST_F(TestCastAsJson, CastDecimalAsJson) +try +{ + const String func_name = "cast_decimal_as_json"; + + using DecimalField32 = DecimalField; + using DecimalField64 = DecimalField; + using DecimalField128 = DecimalField; + using DecimalField256 = DecimalField; + + /// Decimal32 + executeAndAssert(func_name, DecimalField32(1011, 1), "101.1"); + executeAndAssert(func_name, DecimalField32(-1011, 1), "-101.1"); + executeAndAssert(func_name, DecimalField32(9999, 1), "999.9"); + executeAndAssert(func_name, DecimalField32(-9999, 1), "-999.9"); + + /// Decimal64 + executeAndAssert(func_name, DecimalField64(1011, 1), "101.1"); + executeAndAssert(func_name, DecimalField64(-1011, 1), "-101.1"); + executeAndAssert(func_name, DecimalField64(9999, 1), "999.9"); + executeAndAssert(func_name, DecimalField64(-9999, 1), "-999.9"); + + /// Decimal128 + executeAndAssert(func_name, DecimalField128(1011, 1), "101.1"); + executeAndAssert(func_name, DecimalField128(-1011, 1), "-101.1"); + executeAndAssert(func_name, DecimalField128(9999, 1), "999.9"); + executeAndAssert(func_name, DecimalField128(-9999, 1), "-999.9"); + + /// Decimal256 + executeAndAssert(func_name, DecimalField256(static_cast(1011), 1), "101.1"); + executeAndAssert(func_name, DecimalField256(static_cast(-1011), 1), "-101.1"); + executeAndAssert(func_name, DecimalField256(static_cast(9999), 1), "999.9"); + executeAndAssert(func_name, DecimalField256(static_cast(-9999), 1), "-999.9"); +} +CATCH + +TEST_F(TestCastAsJson, CastStringAsJson) +try +{ + // Only raw function test is tested, so output_tidb_tp is always nullptr and only the case of parsing json is tested here. + // Because of `bool useDefaultImplementationForNulls() const override { return true; }`, null column is need to be tested here. + + const String func_name = "cast_string_as_json"; + + /// case1 only null + { + ColumnWithTypeAndName only_null_const = createOnlyNullColumnConst(1); + ColumnsWithTypeAndName input{only_null_const}; + auto res = executeFunction(func_name, input, nullptr, true); + ASSERT_COLUMN_EQ(only_null_const, res); + } + + /// case2 nullable column + { + ColumnWithTypeAndName nullable_column = createColumn>({{}, "[]"}); + auto res = executeFunctionWithCast(func_name, {nullable_column}); + ASSERT_COLUMN_EQ(nullable_column, res); + } + // invalid json text. + { + ColumnWithTypeAndName nullable_column = createColumn>({""}); + ASSERT_THROW(executeFunctionWithCast(func_name, {nullable_column}), Exception); + } + { + ColumnWithTypeAndName nullable_column = createColumn>({"dsadhgashg"}); + ASSERT_THROW(executeFunctionWithCast(func_name, {nullable_column}), Exception); + } + + /// case3 not null + // invalid json text + { + // empty document + ColumnWithTypeAndName nullable_column = createColumn>({""}); + ASSERT_THROW(executeFunctionWithCast(func_name, {nullable_column}), Exception); + } + { + // invaild json + ColumnWithTypeAndName nullable_column = createColumn>({"a"}); + ASSERT_THROW(executeFunctionWithCast(func_name, {nullable_column}), Exception); + } + { + // invaild json + ColumnWithTypeAndName nullable_column = createColumn>({"{fds, 1}"}); + ASSERT_THROW(executeFunctionWithCast(func_name, {nullable_column}), Exception); + } + { + // too deep + ColumnWithTypeAndName nullable_column = createColumn>( + {"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]"}); + ASSERT_THROW(executeFunctionWithCast(func_name, {nullable_column}), Exception); + } + // valid json text + // a. literal + executeAndAssert(func_name, "0", "0"); + executeAndAssert(func_name, "1", "1"); + executeAndAssert(func_name, "-1", "-1"); + executeAndAssert(func_name, "1.11", "1.11"); + executeAndAssert(func_name, "-1.11", "-1.11"); + executeAndAssert(func_name, "\"a\"", "\"a\""); + executeAndAssert(func_name, "true", "true"); + executeAndAssert(func_name, "false", "false"); + executeAndAssert(func_name, "null", "null"); + // b. json array + executeAndAssert(func_name, "[]", "[]"); + executeAndAssert(func_name, "[1, 1000, 2.22, \"a\", null]", "[1, 1000, 2.22, \"a\", null]"); + executeAndAssert( + func_name, + R"([1, 1000, 2.22, "a", null, {"a":1.11}])", + R"([1, 1000, 2.22, "a", null, {"a": 1.11}])"); + executeAndAssert( + func_name, + "[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]", + "[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]"); + // c. json object + executeAndAssert(func_name, "{}", "{}"); + executeAndAssert(func_name, "{\"a\":1}", "{\"a\": 1}"); + executeAndAssert( + func_name, + R"({"a":null,"b":1,"c":1.11,"d":[],"e":{}})", + R"({"a": null, "b": 1, "c": 1.11, "d": [], "e": {}})"); + executeAndAssert( + func_name, + R"({"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{}}}}}}}}}}})", + R"({"a": {"a": {"a": {"a": {"a": {"a": {"a": {"a": {"a": {"a": {}}}}}}}}}}})"); +} +CATCH + +TEST_F(TestCastAsJson, CastTimeAsJson) +try +{ + static auto const datetime_type_ptr = std::make_shared(6); + static auto const date_type_ptr = std::make_shared(); + + // DataTypeMyDateTime + { + auto data_col_ptr + = createColumn( + {MyDateTime(2023, 1, 2, 3, 4, 5, 6).toPackedUInt(), MyDateTime(0, 0, 0, 0, 0, 0, 0).toPackedUInt()}) + .column; + ColumnWithTypeAndName input(data_col_ptr, datetime_type_ptr, ""); + auto res = executeFunctionWithCast("cast_time_as_json", {input}); + auto expect + = createColumn>({"\"2023-01-02 03:04:05.000006\"", "\"0000-00-00 00:00:00.000000\""}); + ASSERT_COLUMN_EQ(expect, res); + } + + // DataTypeMyDate + // Only raw function test is tested, so input_tidb_tp is always nullptr and only the case of TiDB::TypeTimestamp is tested here. + { + auto data_col_ptr = createColumn( + {MyDate(2023, 12, 31).toPackedUInt(), MyDate(0, 0, 0).toPackedUInt()}) + .column; + ColumnWithTypeAndName input(data_col_ptr, date_type_ptr, ""); + auto res = executeFunctionWithCast("cast_time_as_json", {input}); + auto expect = createColumn>({"\"2023-12-31\"", "\"0000-00-00\""}); + ASSERT_COLUMN_EQ(expect, res); + } +} +CATCH + +TEST_F(TestCastAsJson, CastDurationAsJson) +try +{ + { + ColumnWithTypeAndName input( + // 22hour, 22min, 22s, 222ms + createColumn({(22 * 3600000 + 22 * 60000 + 22 * 1000 + 222) * 1000000L, + -1 * (22 * 3600000 + 22 * 60000 + 22 * 1000 + 222) * 1000000L}) + .column, + std::make_shared(6), + ""); + + auto res = executeFunctionWithCast("cast_duration_as_json", {input}); + auto expect = createColumn>({"\"22:22:22.222000\"", "\"-22:22:22.222000\""}); + ASSERT_COLUMN_EQ(expect, res); + } + { + ColumnWithTypeAndName input( + // 22hour, 22min, 22s, 222ms + createColumn({(22 * 3600000 + 22 * 60000 + 22 * 1000 + 222) * 1000000L, + -1 * (22 * 3600000 + 22 * 60000 + 22 * 1000 + 222) * 1000000L}) + .column, + std::make_shared(1), + ""); + + auto res = executeFunctionWithCast("cast_duration_as_json", {input}); + auto expect = createColumn>({"\"22:22:22.222000\"", "\"-22:22:22.222000\""}); + ASSERT_COLUMN_EQ(expect, res); + } +} +CATCH + +} // namespace DB::tests diff --git a/dbms/src/Functions/tests/gtest_json_array.cpp b/dbms/src/Functions/tests/gtest_json_array.cpp index caa4a51ac88..2ad33f0c309 100644 --- a/dbms/src/Functions/tests/gtest_json_array.cpp +++ b/dbms/src/Functions/tests/gtest_json_array.cpp @@ -103,7 +103,7 @@ try // json_array() { auto res = executeFunctionWithCast({}, inputs); - auto expect = gen_column_expect("[]"); + auto expect = createConstColumn>(rows_count, "[]"); ASSERT_COLUMN_EQ(expect, res); } diff --git a/dbms/src/Storages/KVStore/tests/gtest_json_binary.cpp b/dbms/src/Storages/KVStore/tests/gtest_json_binary.cpp index 9a0a5729510..ed574628854 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_json_binary.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_json_binary.cpp @@ -656,7 +656,7 @@ try auto col_to = ColumnString::create(); ColumnString::Chars_t & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - WriteBufferFromVector write_buffer(data_to); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to); bool found = i.bj.extract(path_expr_container_vec, write_buffer); ASSERT_TRUE(found == i.found); if (found) @@ -693,7 +693,7 @@ try auto col_to = ColumnString::create(); ColumnString::Chars_t & data_to = col_to->getChars(); - WriteBufferFromVector write_buffer(data_to); + JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to); { /// Simple cache success check std::vector path_expr_container_vec; diff --git a/dbms/src/TiDB/Decode/JsonBinary.cpp b/dbms/src/TiDB/Decode/JsonBinary.cpp index 68562c5d268..aea8bf2f0f6 100644 --- a/dbms/src/TiDB/Decode/JsonBinary.cpp +++ b/dbms/src/TiDB/Decode/JsonBinary.cpp @@ -28,13 +28,18 @@ #include #pragma GCC diagnostic pop +#include + namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; -} +extern const int UNKNOWN_TYPE; +} // namespace ErrorCodes +namespace +{ constexpr char ELEM_SEPARATOR[] = ", "; constexpr char KEY_VALUE_SEPARATOR[] = ": "; constexpr size_t HEADER_SIZE = 8; // element size + data size. @@ -64,6 +69,303 @@ inline T decodeNumeric(size_t & cursor, const StringRef & raw_value) return res; } +template +inline void encodeNumeric(JsonBinary::JsonBinaryWriteBuffer & write_buffer, T value) +{ + toLittleEndianInPlace(value); + const char * from = reinterpret_cast(&value); + write_buffer.write(from, sizeof(T)); +} + +template +struct NeedDecode +{ +}; + +template <> +struct NeedDecode +{ + using Type = String; +}; + +template <> +struct NeedDecode +{ + using Type = void; +}; + +template +typename NeedDecode::Type DecodeJson(size_t & cursor, const String & raw_value) +{ + size_t base = cursor; + UInt8 type = raw_value[cursor++]; + size_t size = 0; + + switch (type) // JSON Root element type + { + case JsonBinary::TYPE_CODE_OBJECT: + cursor += 4; + size = decodeNumeric(cursor, raw_value); + break; + case JsonBinary::TYPE_CODE_ARRAY: + cursor += 4; + size = decodeNumeric(cursor, raw_value); + break; + case JsonBinary::TYPE_CODE_LITERAL: + size = 1; + break; + case JsonBinary::TYPE_CODE_INT64: + case JsonBinary::TYPE_CODE_UINT64: + case JsonBinary::TYPE_CODE_FLOAT64: + size = 8; + break; + case JsonBinary::TYPE_CODE_STRING: + size = DecodeVarUInt(cursor, raw_value); + size += (cursor - base - 1); + break; + default: + throw Exception( + "DecodeJsonBinary: Unknown JSON Element Type:" + std::to_string(type), + ErrorCodes::LOGICAL_ERROR); + } + + ++size; + cursor = base + size; + if constexpr (!doDecode) + return static_cast::Type>(0); + else + return static_cast::Type>(raw_value.substr(base, size)); +} + +String encodeBase64(const StringRef & str) +{ + std::ostringstream oss; + Poco::Base64Encoder encoder(oss, Poco::BASE64_NO_PADDING); + encoder.rdbuf()->setLineLength(0); /// No newline characters would be added + Poco::MemoryInputStream mis(str.data, str.size); + Poco::StreamCopier::copyStream(mis, encoder); + encoder.close(); + return oss.str(); +} + +bool jsonFinished(std::vector & json_binary_vec, bool one) +{ + return one && !json_binary_vec.empty(); +} + +inline JsonBinary::JsonType getJsonType(const simdjson::dom::element & elem) +{ + if (elem.is_object()) + { + return JsonBinary::TYPE_CODE_OBJECT; + } + else if (elem.is_array()) + { + return JsonBinary::TYPE_CODE_ARRAY; + } + else if (elem.is_bool() || elem.is_null()) + { + return JsonBinary::TYPE_CODE_LITERAL; + } + else if (elem.is_int64()) + { + return JsonBinary::TYPE_CODE_INT64; + } + else if (elem.is_uint64()) + { + return JsonBinary::TYPE_CODE_UINT64; + } + else if (elem.is_double()) + { + return JsonBinary::TYPE_CODE_FLOAT64; + } + else if (elem.is_string()) + { + return JsonBinary::TYPE_CODE_STRING; + } + else + { + throw Exception(ErrorCodes::UNKNOWN_TYPE, "unknown type: {}", magic_enum::enum_name(elem.type())); + } +} + +inline UInt64 appendValueOfSIMDJsonElem( + JsonBinary::JsonBinaryWriteBuffer & write_buffer, + const simdjson::dom::element & elem); + +// return depth. +inline UInt64 appendValueEntryAndData( + size_t buffer_start_pos, + UInt32 & data_offset, + const simdjson::dom::element & value, + JsonBinary::JsonBinaryWriteBuffer & write_buffer) +{ + auto type = getJsonType(value); + write_buffer.write(type); + if (type == JsonBinary::TYPE_CODE_LITERAL) + { + /// Literal values are inlined in the value entry, total takes 4 bytes + auto depth = appendValueOfSIMDJsonElem(write_buffer, value); // 1 byte + write_buffer.write(0); + write_buffer.write(0); + write_buffer.write(0); + return depth; + } + else + { + encodeNumeric(write_buffer, data_offset); + auto tmp_entry_pos = write_buffer.offset(); + + // write value data. + write_buffer.setOffset(data_offset + buffer_start_pos); + auto depth = appendValueOfSIMDJsonElem(write_buffer, value); + /// update data_offset + data_offset = write_buffer.offset() - buffer_start_pos; + + write_buffer.setOffset(tmp_entry_pos); + return depth; + } +} + +// return depth. +inline UInt64 appendValueOfSIMDJsonElem( + JsonBinary::JsonBinaryWriteBuffer & write_buffer, + const simdjson::dom::element & elem) +{ + if (elem.is_object()) + { + /// elem_count(4 bytes) + /// total_size(4 bytes) + /// key_entries(obj.size() * KEY_ENTRY_SIZE) + /// value_entries(obj.size() * VALUE_ENTRY_SIZE) + /// key_datas + /// value_datas + + const auto & obj = elem.get_object(); + UInt32 buffer_start_pos = write_buffer.offset(); + + // 1. write elem count + UInt32 element_count = obj.size(); + encodeNumeric(write_buffer, element_count); + + // 2. advance for total size + auto total_size_pos = write_buffer.offset(); + write_buffer.advance(4); + + // 3. write key entry with key offset. + UInt32 data_offset_start = HEADER_SIZE + obj.size() * (KEY_ENTRY_SIZE + VALUE_ENTRY_SIZE); + UInt32 data_offset = data_offset_start; + for (const auto & [key, _] : obj) + { + encodeNumeric(write_buffer, data_offset); + data_offset += key.size(); + if (unlikely(key.size() > std::numeric_limits::max())) + throw Exception("TiDB/TiFlash does not yet support JSON objects with the key length >= 65536"); + UInt16 key_len = key.size(); + encodeNumeric(write_buffer, key_len); + } + UInt32 value_entry_start_pos = write_buffer.offset(); + + // 4. write key value. + write_buffer.setOffset(buffer_start_pos + data_offset_start); + for (const auto & [key, _] : obj) + write_buffer.write(key.data(), key.size()); + + // 5. write value entry with value offset and value data. + write_buffer.setOffset(value_entry_start_pos); + UInt64 max_child_depth = 0; + for (const auto & [_, value] : obj) + { + auto child_depth = appendValueEntryAndData(buffer_start_pos, data_offset, value, write_buffer); + max_child_depth = std::max(max_child_depth, child_depth); + } + UInt64 depth = max_child_depth + 1; + JsonBinary::assertJsonDepth(depth); + + // 6. write total size in total_size_offset. + UInt32 total_size = data_offset; + write_buffer.setOffset(total_size_pos); + encodeNumeric(write_buffer, total_size); + write_buffer.setOffset(buffer_start_pos + data_offset); + + return depth; + } + else if (elem.is_array()) + { + /// elem_count(4 bytes) + /// total_size(4 bytes) + /// value_entries(obj.size() * VALUE_ENTRY_SIZE) + /// value_datas + + const auto & array = elem.get_array(); + UInt32 buffer_start_pos = write_buffer.offset(); + + // 1. write elem count + UInt32 element_count = array.size(); + encodeNumeric(write_buffer, element_count); + + // 2. advance for total size + auto total_size_pos = write_buffer.offset(); + write_buffer.advance(4); + + // 3. write value entry with value offset and value data. + UInt32 data_offset = HEADER_SIZE + array.size() * VALUE_ENTRY_SIZE; + UInt64 max_child_depth = 0; + for (const auto & value : array) + { + auto child_depth = appendValueEntryAndData(buffer_start_pos, data_offset, value, write_buffer); + max_child_depth = std::max(max_child_depth, child_depth); + } + UInt64 depth = max_child_depth + 1; + JsonBinary::assertJsonDepth(depth); + + // 4. write total size in total_size_offset. + UInt32 total_size = data_offset; + write_buffer.setOffset(total_size_pos); + encodeNumeric(write_buffer, total_size); + write_buffer.setOffset(buffer_start_pos + data_offset); + + return depth; + } + else if (elem.is_bool()) + { + write_buffer.write(elem.get_bool().value_unsafe() ? JsonBinary::LITERAL_TRUE : JsonBinary::LITERAL_FALSE); + return 1; + } + else if (elem.is_int64()) + { + encodeNumeric(write_buffer, elem.get_int64().value_unsafe()); + return 1; + } + else if (elem.is_uint64()) + { + encodeNumeric(write_buffer, elem.get_uint64().value_unsafe()); + return 1; + } + else if (elem.is_double()) + { + encodeNumeric(write_buffer, elem.get_double().value_unsafe()); + return 1; + } + else if (elem.is_string()) + { + const auto & value = elem.get_string().value_unsafe(); + writeVarUInt(static_cast(value.size()), write_buffer); + write_buffer.write(value.data(), value.size()); + return 1; + } + else if (elem.is_null()) + { + write_buffer.write(JsonBinary::LITERAL_NIL); + return 1; + } + else + { + throw Exception(ErrorCodes::UNKNOWN_TYPE, "unknown type: {}", magic_enum::enum_name(elem.type())); + } +} +} // namespace + char JsonBinary::getChar(size_t offset) const { RUNTIME_CHECK(offset < data.size); @@ -178,66 +480,6 @@ JsonBinary::Opaque JsonBinary::getOpaque() const return Opaque{opaque_type, getSubRef(data_start, data_length)}; } -template -struct NeedDecode -{ -}; - -template <> -struct NeedDecode -{ - using Type = String; -}; - -template <> -struct NeedDecode -{ - using Type = void; -}; - -template -typename NeedDecode::Type DecodeJson(size_t & cursor, const String & raw_value) -{ - size_t base = cursor; - UInt8 type = raw_value[cursor++]; - size_t size = 0; - - switch (type) // JSON Root element type - { - case JsonBinary::TYPE_CODE_OBJECT: - cursor += 4; - size = decodeNumeric(cursor, raw_value); - break; - case JsonBinary::TYPE_CODE_ARRAY: - cursor += 4; - size = decodeNumeric(cursor, raw_value); - break; - case JsonBinary::TYPE_CODE_LITERAL: - size = 1; - break; - case JsonBinary::TYPE_CODE_INT64: - case JsonBinary::TYPE_CODE_UINT64: - case JsonBinary::TYPE_CODE_FLOAT64: - size = 8; - break; - case JsonBinary::TYPE_CODE_STRING: - size = DecodeVarUInt(cursor, raw_value); - size += (cursor - base - 1); - break; - default: - throw Exception( - "DecodeJsonBinary: Unknown JSON Element Type:" + std::to_string(type), - ErrorCodes::LOGICAL_ERROR); - } - - size++; - cursor = base + size; - if constexpr (!doDecode) - return static_cast::Type>(0); - else - return static_cast::Type>(raw_value.substr(base, size)); -} - void JsonBinary::SkipJson(size_t & cursor, const String & raw_value) { DecodeJson(cursor, raw_value); @@ -415,17 +657,6 @@ void JsonBinary::marshalStringTo(JsonBinaryWriteBuffer & write_buffer, const Str write_buffer.write('"'); } -String encodeBase64(const StringRef & str) -{ - std::ostringstream oss; - Poco::Base64Encoder encoder(oss, Poco::BASE64_NO_PADDING); - encoder.rdbuf()->setLineLength(0); /// No newline characters would be added - Poco::MemoryInputStream mis(str.data, str.size); - Poco::StreamCopier::copyStream(mis, encoder); - encoder.close(); - return oss.str(); -} - void JsonBinary::marshalOpaqueTo(JsonBinaryWriteBuffer & write_buffer, const Opaque & opaque) { const String base64_padding_ends[] = {"", "===", "==", "="}; @@ -506,9 +737,10 @@ String JsonBinary::toString() const { ColumnString::Chars_t data_to; data_to.reserve(data.size * 3 / 2); - JsonBinaryWriteBuffer write_buffer(data_to); - marshalTo(write_buffer); - write_buffer.finalize(); + { + JsonBinaryWriteBuffer write_buffer(data_to); + marshalTo(write_buffer); + } return String(reinterpret_cast(data_to.data()), data_to.size()); } @@ -520,9 +752,10 @@ void JsonBinary::toStringInBuffer(JsonBinaryWriteBuffer & write_buffer) const String JsonBinary::unquoteJsonString(const StringRef & ref) { String result; - WriteBufferFromVector write_buffer(result); - unquoteJsonStringInBuffer(ref, write_buffer); - write_buffer.finalize(); + { + WriteBufferFromVector write_buffer(result); + unquoteJsonStringInBuffer(ref, write_buffer); + } return result; } @@ -602,11 +835,6 @@ bool JsonBinary::extract( return found; } -bool jsonFinished(std::vector & json_binary_vec, bool one) -{ - return one && !json_binary_vec.empty(); -} - std::optional JsonBinary::searchObjectKey(JsonPathObjectKey & key) const { auto element_count = getElementCount(); @@ -785,9 +1013,7 @@ void JsonBinary::buildBinaryJsonElementsInBuffer( } else { - auto endian_value_offset = value_offset; - toLittleEndianInPlace(endian_value_offset); - write_buffer.write(reinterpret_cast(&endian_value_offset), sizeof(endian_value_offset)); + encodeNumeric(write_buffer, value_offset); /// update value_offset value_offset += bj.data.size; } @@ -815,11 +1041,9 @@ void JsonBinary::buildBinaryJsonArrayInBuffer( write_buffer.write(TYPE_CODE_ARRAY); UInt32 element_count = json_binary_vec.size(); - toLittleEndianInPlace(element_count); - write_buffer.write(reinterpret_cast(&element_count), sizeof(element_count)); + encodeNumeric(write_buffer, element_count); - toLittleEndianInPlace(total_size); - write_buffer.write(reinterpret_cast(&total_size), sizeof(total_size)); + encodeNumeric(write_buffer, total_size); buildBinaryJsonElementsInBuffer(json_binary_vec, write_buffer); } @@ -840,4 +1064,87 @@ UInt64 JsonBinary::getJsonLength(const std::string_view & raw_value) return 1; } } + +void JsonBinary::appendNumber(JsonBinaryWriteBuffer & write_buffer, bool value) +{ + write_buffer.write(TYPE_CODE_LITERAL); + write_buffer.write(value ? LITERAL_TRUE : LITERAL_FALSE); +} + +void JsonBinary::appendNumber(JsonBinaryWriteBuffer & write_buffer, UInt64 value) +{ + write_buffer.write(TYPE_CODE_UINT64); + encodeNumeric(write_buffer, value); +} + +void JsonBinary::appendNumber(JsonBinaryWriteBuffer & write_buffer, Int64 value) +{ + write_buffer.write(TYPE_CODE_INT64); + encodeNumeric(write_buffer, value); +} + +void JsonBinary::appendNumber(JsonBinaryWriteBuffer & write_buffer, Float64 value) +{ + write_buffer.write(TYPE_CODE_FLOAT64); + encodeNumeric(write_buffer, value); +} + +void JsonBinary::appendStringRef(JsonBinaryWriteBuffer & write_buffer, const StringRef & value) +{ + write_buffer.write(TYPE_CODE_STRING); + writeVarUInt(static_cast(value.size), write_buffer); + write_buffer.write(value.data, value.size); +} + +void JsonBinary::appendOpaque(JsonBinaryWriteBuffer & write_buffer, const Opaque & value) +{ + write_buffer.write(TYPE_CODE_OPAQUE); + write_buffer.write(value.type); + writeVarUInt(static_cast(value.data.size), write_buffer); + write_buffer.write(value.data.data, value.data.size); +} + +void JsonBinary::appendDate(JsonBinaryWriteBuffer & write_buffer, const MyDate & value) +{ + write_buffer.write(TYPE_CODE_DATE); + encodeNumeric(write_buffer, value.toCoreTime()); +} + +void JsonBinary::appendTimestamp(JsonBinaryWriteBuffer & write_buffer, const MyDateTime & value) +{ + write_buffer.write(TYPE_CODE_TIMESTAMP); + encodeNumeric(write_buffer, value.toCoreTime()); +} + +void JsonBinary::appendDatetime(JsonBinaryWriteBuffer & write_buffer, const MyDateTime & value) +{ + write_buffer.write(TYPE_CODE_DATETIME); + encodeNumeric(write_buffer, value.toCoreTime()); +} + +void JsonBinary::appendDuration(JsonBinaryWriteBuffer & write_buffer, Int64 duration, UInt64 fsp) +{ + write_buffer.write(TYPE_CODE_DURATION); + encodeNumeric(write_buffer, static_cast(duration)); + encodeNumeric(write_buffer, static_cast(fsp)); +} + +void JsonBinary::appendNull(JsonBinaryWriteBuffer & write_buffer) +{ + write_buffer.write(JsonBinary::TYPE_CODE_LITERAL); + write_buffer.write(JsonBinary::LITERAL_NIL); +} + +void JsonBinary::assertJsonDepth(UInt64 depth) +{ + if (unlikely(depth > (1 + MAX_JSON_DEPTH))) + throw Exception( + fmt::format("Invalid JSON text: The JSON document exceeds the maximum depth {}.", MAX_JSON_DEPTH)); +} + +void JsonBinary::appendSIMDJsonElem(JsonBinaryWriteBuffer & write_buffer, const simdjson::dom::element & elem) +{ + write_buffer.write(getJsonType(elem)); + appendValueOfSIMDJsonElem(write_buffer, elem); +} } // namespace DB diff --git a/dbms/src/TiDB/Decode/JsonBinary.h b/dbms/src/TiDB/Decode/JsonBinary.h index cb7b1a663e0..13323e2454c 100644 --- a/dbms/src/TiDB/Decode/JsonBinary.h +++ b/dbms/src/TiDB/Decode/JsonBinary.h @@ -15,12 +15,15 @@ #pragma once #include +#include #include #include +#include #include #include #include #include +#include #include @@ -101,7 +104,7 @@ class JsonBinary public: using JsonType = UInt8; using DupCheckSet = std::unique_ptr>; - using JsonBinaryWriteBuffer = WriteBufferFromVector; + using JsonBinaryWriteBuffer = VectorWriter; static constexpr JsonType TYPE_CODE_OBJECT = 0x01; // TypeCodeObject indicates the JSON is an object. static constexpr JsonType TYPE_CODE_ARRAY = 0x03; // TypeCodeArray indicates the JSON is an array. static constexpr JsonType TYPE_CODE_LITERAL = 0x04; // TypeCodeLiteral indicates the JSON is a literal. @@ -119,6 +122,8 @@ class JsonBinary static constexpr UInt8 LITERAL_TRUE = 0x01; // LiteralTrue represents JSON true. static constexpr UInt8 LITERAL_FALSE = 0x02; // LiteralFalse represents JSON false. + static constexpr UInt64 MAX_JSON_DEPTH = 100; + /// Opaque represents a raw database binary type struct Opaque { @@ -163,6 +168,22 @@ class JsonBinary static UInt64 getJsonLength(const std::string_view & raw_value); + static void appendNumber(JsonBinaryWriteBuffer & write_buffer, bool value); + static void appendNumber(JsonBinaryWriteBuffer & write_buffer, UInt64 value); + static void appendNumber(JsonBinaryWriteBuffer & write_buffer, Int64 value); + static void appendNumber(JsonBinaryWriteBuffer & write_buffer, Float64 value); + static void appendStringRef(JsonBinaryWriteBuffer & write_buffer, const StringRef & value); + static void appendOpaque(JsonBinaryWriteBuffer & write_buffer, const Opaque & value); + static void appendDate(JsonBinaryWriteBuffer & write_buffer, const MyDate & value); + static void appendTimestamp(JsonBinaryWriteBuffer & write_buffer, const MyDateTime & value); + static void appendDatetime(JsonBinaryWriteBuffer & write_buffer, const MyDateTime & value); + static void appendDuration(JsonBinaryWriteBuffer & write_buffer, Int64 duration, UInt64 fsp); + static void appendNull(JsonBinaryWriteBuffer & write_buffer); + + static void appendSIMDJsonElem(JsonBinaryWriteBuffer & write_buffer, const simdjson::dom::element & elem); + + static void assertJsonDepth(UInt64 depth); + private: Int64 getInt64() const; UInt64 getUInt64() const; @@ -204,6 +225,7 @@ class JsonBinary static void marshalOpaqueTo(JsonBinaryWriteBuffer & write_buffer, const Opaque & o); static void marshalDurationTo(JsonBinaryWriteBuffer & write_buffer, Int64 duration, UInt32 fsp); +private: JsonType type; /// 'data' doesn't contain type byte. /// In this way, when we construct new JsonBinary object for child field, new object's 'data' field can directly reference original object's data memory as a slice diff --git a/dbms/src/TiDB/Schema/TiDB.h b/dbms/src/TiDB/Schema/TiDB.h index 26af887824f..82603700d0c 100644 --- a/dbms/src/TiDB/Schema/TiDB.h +++ b/dbms/src/TiDB/Schema/TiDB.h @@ -120,6 +120,8 @@ enum TP M(OnUpdateNow, (1 << 13)) \ M(PartKey, (1 << 14)) \ M(Num, (1 << 15)) \ + M(ParseToJSON, (1 << 18)) \ + M(IsBooleanFlag, (1 << 19)) \ M(GeneratedColumn, (1 << 23)) enum ColumnFlag diff --git a/tests/fullstack-test/expr/cast_as_json.test b/tests/fullstack-test/expr/cast_as_json.test new file mode 100644 index 00000000000..9e5e6c046a2 --- /dev/null +++ b/tests/fullstack-test/expr/cast_as_json.test @@ -0,0 +1,357 @@ +# Copyright 2023 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# prepare data +mysql> drop table if exists test.test_int; +mysql> CREATE TABLE test.test_int (bool_col boolean, `tiny_us` tinyint(20) unsigned,`tiny_s` tinyint(20),`small_us` smallint(20) unsigned,`small_s` smallint(20),`medium_us` mediumint(20) unsigned,`medium_s` mediumint(20),`int_us` int(20) unsigned,`int_s` int(20),`bigint_us` bigint(20) unsigned,`bigint_s` bigint(20)); +mysql> insert into test.test_int values(null, null, null, null, null, null, null, null, null, null, null); +mysql> insert into test.test_int values(false, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +mysql> insert into test.test_int values(true, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1); +mysql> alter table test.test_int set tiflash replica 1; + +mysql> drop table if exists test.test_float; +mysql> CREATE TABLE test.test_float (float_s float, `float_us` float unsigned, `double_s` double,`double_us` double unsigned); +mysql> insert into test.test_float values(null, null, null, null); +mysql> insert into test.test_float values(0, 0, 0, 0); +mysql> insert into test.test_float values(-999.999, 999.999, -999.999, 999.999); +mysql> alter table test.test_float set tiflash replica 1; + +mysql> drop table if exists test.test_str; +mysql> CREATE TABLE test.test_str (flag tinyint,str_ch CHAR,str_vch VARCHAR(100),str_tb TINYBLOB,str_tt TINYTEXT,str_b BLOB,str_t TEXT,str_mb MEDIUMBLOB,str_mt MEDIUMTEXT,str_lb LONGBLOB,str_lt LONGTEXT); +mysql> insert into test.test_str values(0, null, null, null, null, null, null, null, null, null, null); +mysql> insert into test.test_str values(1, '', '', '', '', '', '', '', '', '', ''); +mysql> insert into test.test_str values(2, 'a', '[1, \"1\", {\"1\":\"1\"}]', 'helloword', '[1, \"1\", {\"1\":\"1\"}]', 'helloword', '[1, \"1\", {\"1\":\"1\"}]', 'helloword', '[1, \"1\", {\"1\":\"1\"}]', 'helloword', '[1, \"1\", {\"1\":\"1\"}]'); #NO_UNESCAPE +mysql> insert into test.test_str values(3, null, '\"a\"', null, null, null, null, null, null, null, null); #NO_UNESCAPE +mysql> insert into test.test_str values(3, null, '1', null, null, null, null, null, null, null, null); #NO_UNESCAPE +mysql> insert into test.test_str values(3, null, '1.11', null, null, null, null, null, null, null, null); #NO_UNESCAPE +mysql> insert into test.test_str values(3, null, 'true', null, null, null, null, null, null, null, null); #NO_UNESCAPE +mysql> alter table test.test_str set tiflash replica 1; + +mysql> drop table if exists test.test_binary; +mysql> CREATE TABLE test.test_binary (col_b binary(5), col_br varbinary(5)); +mysql> insert into test.test_binary values(null, null); +mysql> insert into test.test_binary values('12345', '12345'); +mysql> insert into test.test_binary values('12', '12'); +mysql> insert into test.test_binary values('', ''); +mysql> alter table test.test_binary set tiflash replica 1; + +mysql> drop table if exists test.test_bin_str; +mysql> create table test.test_bin_str(col varchar(5)) COLLATE=binary; +mysql> insert into test.test_bin_str values(null); +mysql> insert into test.test_bin_str values('12345'); +mysql> insert into test.test_bin_str values('12'); +mysql> insert into test.test_bin_str values(''); +mysql> alter table test.test_bin_str set tiflash replica 1; + +mysql> drop table if exists test.test_long_str; +mysql> create table test.test_long_str(col varchar(500)); +mysql> insert into test.test_long_str values('[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]'); +mysql> alter table test.test_long_str set tiflash replica 1; + +mysql> drop table if exists test.test_time; +mysql> CREATE TABLE test.test_time (col_d DATE, col_dt DATETIME, col_ts TIMESTAMP, col_y YEAR); +mysql> insert into test.test_time values(null, null, null, null); +mysql> insert into test.test_time values('2023-11-14', '2023-11-14 21:59:59', '2023-11-14 21:59:59', '2023'); +mysql> alter table test.test_time set tiflash replica 1; + +mysql> drop table if exists test.test_duration; +mysql> CREATE TABLE test.test_duration (col1 time(1), col6 time(6)); +mysql> insert into test.test_duration values(null, null); +mysql> insert into test.test_duration values('08:00:00', '08:00:00'); +mysql> alter table test.test_duration set tiflash replica 1; + +mysql> drop table if exists test.test_json; +mysql> CREATE TABLE test.test_json (col json); +mysql> insert into test.test_json values(null); +mysql> insert into test.test_json values('[1, 2, null, [], {}, 1.11, \"a\", \"a\"]'); #NO_UNESCAPE +mysql> insert into test.test_json values('{\"a\":1, \"b\":1.11, \"c\":[], \"d\":\"d\", \"e\":{}}'); #NO_UNESCAPE +mysql> alter table test.test_json set tiflash replica 1; + +mysql> drop table if exists test.test_decimal; +mysql> CREATE TABLE test.test_decimal (col32 decimal(9, 1), col64 decimal(18, 1), col128 decimal(38, 1), col256 decimal(65, 1)); +mysql> insert into test.test_decimal values(null, null, null, null); +mysql> insert into test.test_decimal values(0.1, 0.1, 0.1, 0.1); +mysql> alter table test.test_decimal set tiflash replica 1; + +func> wait_table test test_int +func> wait_table test test_float +func> wait_table test test_str +func> wait_table test test_binary +func> wait_table test test_bin_str +func> wait_table test test_long_str +func> wait_table test test_time +func> wait_table test test_duration +func> wait_table test test_json +func> wait_table test test_decimal + +# test +## cast int as json +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(bool_col as json), cast(bool_col > 0 as json) from test.test_int; ++------------------------+----------------------------+ +| cast(bool_col as json) | cast(bool_col > 0 as json) | ++------------------------+----------------------------+ +| NULL | NULL | +| 0 | false | +| 1 | true | ++------------------------+----------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(tiny_us as json), cast(tiny_s as json) from test.test_int; ++-----------------------+----------------------+ +| cast(tiny_us as json) | cast(tiny_s as json) | ++-----------------------+----------------------+ +| NULL | NULL | +| 0 | 0 | +| 1 | -1 | ++-----------------------+----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(small_us as json), cast(small_s as json) from test.test_int; ++------------------------+-----------------------+ +| cast(small_us as json) | cast(small_s as json) | ++------------------------+-----------------------+ +| NULL | NULL | +| 0 | 0 | +| 1 | -1 | ++------------------------+-----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(medium_us as json), cast(medium_s as json) from test.test_int; ++-------------------------+------------------------+ +| cast(medium_us as json) | cast(medium_s as json) | ++-------------------------+------------------------+ +| NULL | NULL | +| 0 | 0 | +| 1 | -1 | ++-------------------------+------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(int_us as json), cast(int_s as json) from test.test_int; ++----------------------+---------------------+ +| cast(int_us as json) | cast(int_s as json) | ++----------------------+---------------------+ +| NULL | NULL | +| 0 | 0 | +| 1 | -1 | ++----------------------+---------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(bigint_us as json), cast(bigint_s as json) from test.test_int; ++-------------------------+------------------------+ +| cast(bigint_us as json) | cast(bigint_s as json) | ++-------------------------+------------------------+ +| NULL | NULL | +| 0 | 0 | +| 1 | -1 | ++-------------------------+------------------------+ + +## cast real as json +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(float_s as json), cast(float_us as json) from test.test_float; ++-----------------------+------------------------+ +| cast(float_s as json) | cast(float_us as json) | ++-----------------------+------------------------+ +| NULL | NULL | +| 0 | 0 | +| -999.9990234375 | 999.9990234375 | ++-----------------------+------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(double_s as json), cast(double_us as json) from test.test_float; ++------------------------+-------------------------+ +| cast(double_s as json) | cast(double_us as json) | ++------------------------+-------------------------+ +| NULL | NULL | +| 0 | 0 | +| -999.999 | 999.999 | ++------------------------+-------------------------+ + +## cast string as json +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_ch as json) from test.test_str where flag = 0; ++----------------------+ +| cast(str_ch as json) | ++----------------------+ +| NULL | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_ch as json) from test.test_str where flag = 1; +{#REGEXP}.*Invalid.* +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_ch as json) from test.test_str where flag = 2; +{#REGEXP}.*Invalid.* +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_vch as json) from test.test_str where flag = 0; ++-----------------------+ +| cast(str_vch as json) | ++-----------------------+ +| NULL | ++-----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_vch as json) from test.test_str where flag = 1; +{#REGEXP}.*Invalid.* +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_vch as json) from test.test_str where flag = 2; ++-----------------------+ +| cast(str_vch as json) | ++-----------------------+ +| [1, "1", {"1": "1"}] | ++-----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_vch as json) from test.test_str where flag = 3; ++-----------------------+ +| cast(str_vch as json) | ++-----------------------+ +| "a" | +| 1 | +| 1.11 | +| true | ++-----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_tb as json) from test.test_str where flag in (0, 1, 2); ++-------------------------------+ +| cast(str_tb as json) | ++-------------------------------+ +| NULL | +| "base64:type249:" | +| "base64:type249:aGVsbG93b3Jk" | ++-------------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_tt as json) from test.test_str where flag = 0; ++----------------------+ +| cast(str_tt as json) | ++----------------------+ +| NULL | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_tt as json) from test.test_str where flag = 1; +{#REGEXP}.*Invalid.* +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_tt as json) from test.test_str where flag = 2; ++----------------------+ +| cast(str_tt as json) | ++----------------------+ +| [1, "1", {"1": "1"}] | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_b as json) from test.test_str where flag in (0, 1, 2); ++-------------------------------+ +| cast(str_b as json) | ++-------------------------------+ +| NULL | +| "base64:type252:" | +| "base64:type252:aGVsbG93b3Jk" | ++-------------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_t as json) from test.test_str where flag = 0; ++---------------------+ +| cast(str_t as json) | ++---------------------+ +| NULL | ++---------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_t as json) from test.test_str where flag = 1; +{#REGEXP}.*Invalid.* +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_t as json) from test.test_str where flag = 2; ++----------------------+ +| cast(str_t as json) | ++----------------------+ +| [1, "1", {"1": "1"}] | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_mb as json) from test.test_str where flag in (0, 1, 2); ++-------------------------------+ +| cast(str_mb as json) | ++-------------------------------+ +| NULL | +| "base64:type250:" | +| "base64:type250:aGVsbG93b3Jk" | ++-------------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_mt as json) from test.test_str where flag = 0; ++----------------------+ +| cast(str_mt as json) | ++----------------------+ +| NULL | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_mt as json) from test.test_str where flag = 1; +{#REGEXP}.*Invalid.* +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_mt as json) from test.test_str where flag = 2; ++----------------------+ +| cast(str_mt as json) | ++----------------------+ +| [1, "1", {"1": "1"}] | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_lb as json) from test.test_str where flag in (0, 1, 2); ++-------------------------------+ +| cast(str_lb as json) | ++-------------------------------+ +| NULL | +| "base64:type251:" | +| "base64:type251:aGVsbG93b3Jk" | ++-------------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_lt as json) from test.test_str where flag = 0; ++----------------------+ +| cast(str_lt as json) | ++----------------------+ +| NULL | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_lt as json) from test.test_str where flag = 1; +{#REGEXP}.*Invalid.* +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(str_lt as json) from test.test_str where flag = 2; ++----------------------+ +| cast(str_lt as json) | ++----------------------+ +| [1, "1", {"1": "1"}] | ++----------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(col_b as json), cast(col_br as json) from test.test_binary; ++---------------------------+--------------------------+ +| cast(col_b as json) | cast(col_br as json) | ++---------------------------+--------------------------+ +| NULL | NULL | +| "base64:type254:MTIzNDU=" | "base64:type15:MTIzNDU=" | +| "base64:type254:MTIAAAA=" | "base64:type15:MTI=" | +| "base64:type254:AAAAAAA=" | "base64:type15:" | ++---------------------------+--------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(col as json) from test.test_bin_str; ++--------------------------+ +| cast(col as json) | ++--------------------------+ +| NULL | +| "base64:type15:MTIzNDU=" | +| "base64:type15:MTI=" | +| "base64:type15:" | ++--------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(col as json) from test.test_long_str; +{#REGEXP}.*Invalid.* + +## cast time as json +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(col_d as json),cast(col_dt as json),cast(col_ts as json),cast(col_y as json) from test.test_time; ++---------------------+------------------------------+------------------------------+---------------------+ +| cast(col_d as json) | cast(col_dt as json) | cast(col_ts as json) | cast(col_y as json) | ++---------------------+------------------------------+------------------------------+---------------------+ +| NULL | NULL | NULL | NULL | +| "2023-11-14" | "2023-11-14 21:59:59.000000" | "2023-11-14 21:59:59.000000" | 2023 | ++---------------------+------------------------------+------------------------------+---------------------+ + +## cast duration as json +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(col1 as json), cast(col6 as json) from test.test_duration; ++--------------------+--------------------+ +| cast(col1 as json) | cast(col6 as json) | ++--------------------+--------------------+ +| NULL | NULL | +| "08:00:00.000000" | "08:00:00.000000" | ++--------------------+--------------------+ + +## cast json as json +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(col as json) from test.test_json; ++-------------------------------------------------+ +| cast(col as json) | ++-------------------------------------------------+ +| NULL | +| [1, 2, null, [], {}, 1.11, "a", "a"] | +| {"a": 1, "b": 1.11, "c": [], "d": "d", "e": {}} | ++-------------------------------------------------+ + +# cast decimal as json +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select cast(col32 as json),cast(col64 as json),cast(col128 as json),cast(col256 as json) from test.test_decimal; ++---------------------+---------------------+----------------------+----------------------+ +| cast(col32 as json) | cast(col64 as json) | cast(col128 as json) | cast(col256 as json) | ++---------------------+---------------------+----------------------+----------------------+ +| NULL | NULL | NULL | NULL | +| 0.1 | 0.1 | 0.1 | 0.1 | ++---------------------+---------------------+----------------------+----------------------+ + +# Clean up. +mysql> drop table if exists test.test_int; +mysql> drop table if exists test.test_float; +mysql> drop table if exists test.test_str; +mysql> drop table if exists test.test_binary; +mysql> drop table if exists test.test_bin_str; +mysql> drop table if exists test.test_long_str; +mysql> drop table if exists test.test_time; +mysql> drop table if exists test.test_duration; +mysql> drop table if exists test.test_json; +mysql> drop table if exists test.test_decimal;