From 9af32a42de6b89553adbe2c198a061c35df7b01f Mon Sep 17 00:00:00 2001 From: Phi Date: Fri, 9 Feb 2024 14:24:06 +0100 Subject: [PATCH] Add json_array_append function --- extension/json/CMakeLists.txt | 1 + extension/json/include/json_executors.hpp | 38 +++++++ extension/json/include/json_functions.hpp | 1 + extension/json/json_config.py | 1 + extension/json/json_functions.cpp | 1 + .../json/json_functions/json_array_append.cpp | 102 ++++++++++++++++++ 6 files changed, 144 insertions(+) create mode 100644 extension/json/json_functions/json_array_append.cpp diff --git a/extension/json/CMakeLists.txt b/extension/json/CMakeLists.txt index b2626205d1ee..d032d73c35c8 100644 --- a/extension/json/CMakeLists.txt +++ b/extension/json/CMakeLists.txt @@ -31,6 +31,7 @@ set(JSON_EXTENSION_FILES json_functions/json_serialize_sql.cpp json_functions/read_json.cpp json_functions/read_json_objects.cpp + json_functions/json_array_append.cpp ${YYJSON_OBJECT_FILES}) build_static_extension(json ${JSON_EXTENSION_FILES}) diff --git a/extension/json/include/json_executors.hpp b/extension/json/include/json_executors.hpp index 334170ee73b2..167666b4d636 100644 --- a/extension/json/include/json_executors.hpp +++ b/extension/json/include/json_executors.hpp @@ -29,6 +29,21 @@ struct JSONExecutors { }); } + //! Single-argument JSON functions that (partially) exposes yyjson functionality + static void UnaryMutExecute(DataChunk &args, ExpressionState &state, Vector &result, + std::function fun) { + auto &lstate = JSONFunctionLocalState::ResetAndGet(state); + auto alc = lstate.json_allocator.GetYYAlc(); + + auto &inputs = args.data[0]; + UnaryExecutor::Execute(inputs, result, args.size(), [&](string_t input) { + auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, alc); + auto mut_doc = yyjson_doc_mut_copy(doc, alc); + auto new_val = fun(mut_doc->root, mut_doc, alc, result); + return JSONCommon::WriteVal(new_val, alc); // Write String back + }); + } + //! Two-argument JSON read function (with path query), i.e. json_type('[1, 2, 3]', '$[0]') template static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result, @@ -108,6 +123,29 @@ struct JSONExecutors { } } + //! Two-argument JSON manipulation function + template + static void BinaryMutExecute(DataChunk &args, ExpressionState &state, Vector &result, + std::function fun) { + auto &lstate = JSONFunctionLocalState::ResetAndGet(state); + auto alc = lstate.json_allocator.GetYYAlc(); + + auto &inputs_left = args.data[0]; + auto &inputs_right = args.data[1]; + + BinaryExecutor::Execute( + inputs_left, inputs_right, result, args.size(), + [&](string_t left, T right) { + auto ldoc = JSONCommon::ReadDocument(left, JSONCommon::READ_FLAG, alc); + auto mut_ldoc = yyjson_doc_mut_copy(ldoc, alc); + + auto new_val = fun(mut_ldoc->root, mut_ldoc, right, alc, result); + + return JSONCommon::WriteVal(new_val, alc); + }); + } + + //! JSON read function with list of path queries, i.e. json_type('[1, 2, 3]', ['$[0]', '$[1]']) template static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result, diff --git a/extension/json/include/json_functions.hpp b/extension/json/include/json_functions.hpp index b3c6d3d1ea4a..427e0efcab57 100644 --- a/extension/json/include/json_functions.hpp +++ b/extension/json/include/json_functions.hpp @@ -102,6 +102,7 @@ class JSONFunctions { static ScalarFunctionSet GetSerializeSqlFunction(); static ScalarFunctionSet GetDeserializeSqlFunction(); + static ScalarFunctionSet GetArrayAppendFunction(); static PragmaFunctionSet GetExecuteJsonSerializedSqlPragmaFunction(); template diff --git a/extension/json/json_config.py b/extension/json/json_config.py index 543d4f23f59d..b6c655e05b71 100644 --- a/extension/json/json_config.py +++ b/extension/json/json_config.py @@ -32,5 +32,6 @@ 'extension/json/json_serializer.cpp', 'extension/json/json_deserializer.cpp', 'extension/json/serialize_json.cpp', + 'extension/json/json_array_append.cpp', ] ] diff --git a/extension/json/json_functions.cpp b/extension/json/json_functions.cpp index 72246ab8d69d..f973ad92abae 100644 --- a/extension/json/json_functions.cpp +++ b/extension/json/json_functions.cpp @@ -160,6 +160,7 @@ vector JSONFunctions::GetScalarFunctions() { functions.push_back(GetSerializeSqlFunction()); functions.push_back(GetDeserializeSqlFunction()); + functions.push_back(GetArrayAppendFunction()); return functions; } diff --git a/extension/json/json_functions/json_array_append.cpp b/extension/json/json_functions/json_array_append.cpp new file mode 100644 index 000000000000..51b0a63b6642 --- /dev/null +++ b/extension/json/json_functions/json_array_append.cpp @@ -0,0 +1,102 @@ +#include "json_executors.hpp" + +namespace duckdb { + + +static void ArrayAppendFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto left_type = args.data[0].GetType(); + D_ASSERT(left_type == LogicalType::VARCHAR || left_type == JSONCommon::JSONType()); + + auto right_type = args.data[1].GetType(); + + // String or JSON value + if (right_type == LogicalType::VARCHAR || right_type == JSONCommon::JSONType()) { + JSONExecutors::BinaryMutExecute( + args, state, result, + [](yyjson_mut_val *arr, yyjson_mut_doc *doc, string_t element, yyjson_alc *alc, Vector &result) { + D_ASSERT(yyjson_mut_is_arr(arr)); + + auto edoc = JSONCommon::ReadDocument(element, JSONCommon::READ_FLAG, alc); + auto mut_edoc = yyjson_doc_mut_copy(edoc, alc); + + yyjson_mut_arr_append(arr, mut_edoc->root); + + return arr; + }); + // Boolean + } else if (right_type == LogicalType::BOOLEAN) { + JSONExecutors::BinaryMutExecute( + args, state, result, + [&](yyjson_mut_val *arr, yyjson_mut_doc *doc, bool element, yyjson_alc *alc, Vector &result) { + D_ASSERT(yyjson_mut_is_arr(arr)); + + yyjson_mut_arr_add_bool(doc, arr, element); + return arr; + }); + // Integer value + } else if (right_type == LogicalType::BIGINT || right_type == LogicalType::UTINYINT || + right_type == LogicalType::USMALLINT || right_type == LogicalType::UINTEGER || + right_type == LogicalType::UBIGINT) { + JSONExecutors::BinaryMutExecute( + args, state, result, + [&](yyjson_mut_val *arr, yyjson_mut_doc *doc, int64_t el, yyjson_alc *alc, Vector &result) { + D_ASSERT(yyjson_mut_is_arr(arr)); + + int64_t element = static_cast(el); + + yyjson_mut_arr_add_int(doc, arr, element); + return arr; + }); + // Floating value + } else if (right_type == LogicalType::FLOAT || right_type == LogicalType::DOUBLE) { + JSONExecutors::BinaryMutExecute( + args, state, result, + [&](yyjson_mut_val *arr, yyjson_mut_doc *doc, double el, yyjson_alc *alc, Vector &result) { + D_ASSERT(yyjson_mut_is_arr(arr)); + + double element = static_cast(el); + + yyjson_mut_arr_add_real(doc, arr, element); + return arr; + }); + } +} + +static void GetArrayAppendFunctionInternal(ScalarFunctionSet &set, const LogicalType &lhs, const LogicalType &rhs) { + set.AddFunction(ScalarFunction("json_array_append", {lhs, rhs}, JSONCommon::JSONType(), ArrayAppendFunction, + nullptr, nullptr, nullptr, JSONFunctionLocalState::Init)); +} + +ScalarFunctionSet JSONFunctions::GetArrayAppendFunction() { + ScalarFunctionSet set("json_array_append"); + + // Use different executor for these + // Allows booleans directly + GetArrayAppendFunctionInternal(set, LogicalType::VARCHAR, LogicalType::BOOLEAN); + GetArrayAppendFunctionInternal(set, JSONCommon::JSONType(), LogicalType::BOOLEAN); + + // Allows for Integer types + // TINYINT, SMALLINT, INTEGER, UTINYINT, USMALLINT are captured by BIGINT + // relies on consistant casting strategy upfront + GetArrayAppendFunctionInternal(set, LogicalType::VARCHAR, LogicalType::BIGINT); + GetArrayAppendFunctionInternal(set, JSONCommon::JSONType(), LogicalType::BIGINT); + GetArrayAppendFunctionInternal(set, LogicalType::VARCHAR, LogicalType::UINTEGER); + GetArrayAppendFunctionInternal(set, JSONCommon::JSONType(), LogicalType::UINTEGER); + GetArrayAppendFunctionInternal(set, LogicalType::VARCHAR, LogicalType::UBIGINT); + GetArrayAppendFunctionInternal(set, JSONCommon::JSONType(), LogicalType::UBIGINT); + + // Allows for floating types + // FLOAT is covered by automatic upfront casting to double + GetArrayAppendFunctionInternal(set, LogicalType::VARCHAR, LogicalType::DOUBLE); + GetArrayAppendFunctionInternal(set, JSONCommon::JSONType(), LogicalType::DOUBLE); + + // Allows for json and string values + GetArrayAppendFunctionInternal(set, LogicalType::VARCHAR, LogicalType::VARCHAR); + GetArrayAppendFunctionInternal(set, LogicalType::VARCHAR, JSONCommon::JSONType()); + GetArrayAppendFunctionInternal(set, JSONCommon::JSONType(), JSONCommon::JSONType()); + GetArrayAppendFunctionInternal(set, JSONCommon::JSONType(), LogicalType::VARCHAR); + + return set; +} + +}