Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support cast as json #8333

Merged
merged 65 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from 61 commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
c769c97
add appendJsonBinary
SeaRise Nov 7, 2023
6d677d9
tmp save
SeaRise Nov 8, 2023
749bc3c
tmp save
SeaRise Nov 8, 2023
bbb96e6
merge master
SeaRise Nov 8, 2023
a2e6f30
revert some useless change
SeaRise Nov 8, 2023
a5ecd2f
revert
SeaRise Nov 8, 2023
c694cfd
cast json as json
SeaRise Nov 8, 2023
137edc5
cast real as json; cast decimal as json
SeaRise Nov 8, 2023
386b886
cast int as json
SeaRise Nov 8, 2023
900030c
cast string as json step1
SeaRise Nov 8, 2023
dbe7506
tmp save
SeaRise Nov 8, 2023
ef05b96
cast string as json
SeaRise Nov 9, 2023
c7f8c2e
cast time as json
SeaRise Nov 9, 2023
1195c88
cast duration as json
SeaRise Nov 9, 2023
a58de3e
add ut for cast json as json
SeaRise Nov 9, 2023
a9e4d29
update
SeaRise Nov 13, 2023
b72f70a
ut finish
SeaRise Nov 13, 2023
71f8116
fix format
SeaRise Nov 13, 2023
40b24d8
tmp save
SeaRise Nov 13, 2023
c981d29
ut ut ut
SeaRise Nov 14, 2023
b8771b6
fmt
SeaRise Nov 14, 2023
7dbde8c
add it
SeaRise Nov 14, 2023
7e398ba
add it for decimal
SeaRise Nov 14, 2023
c82b923
revert
SeaRise Nov 14, 2023
170874f
fix #NO_UNESCAPE
SeaRise Nov 15, 2023
5c4ac21
Update tests/fullstack-test/expr/cast_as_json.test
SeaRise Nov 15, 2023
3cb5df4
Merge branch 'master' into cast_as_json
SeaRise Nov 15, 2023
4f238f7
introduce simdjson
SeaRise Nov 15, 2023
7b613c1
fix it
SeaRise Nov 15, 2023
b70e571
add ut for simdjson
SeaRise Nov 15, 2023
f4a10e1
use simdjson
SeaRise Nov 15, 2023
112558a
fmt
SeaRise Nov 15, 2023
214e992
fix obj
SeaRise Nov 15, 2023
529e402
fmt
SeaRise Nov 15, 2023
eba1300
fix and check deep
SeaRise Nov 16, 2023
cdb48ca
ut
SeaRise Nov 16, 2023
1c7401b
fmt
SeaRise Nov 16, 2023
84fec10
revert
SeaRise Nov 16, 2023
c27dfd4
revert
SeaRise Nov 16, 2023
fcdff44
Update dbms/src/Functions/tests/gtest_cast_as_json.cpp
SeaRise Nov 16, 2023
ce62b37
Update dbms/src/Functions/tests/gtest_cast_as_json.cpp
SeaRise Nov 16, 2023
9995ccb
prepare for optimize
SeaRise Nov 16, 2023
66c5a7c
optimize
SeaRise Nov 16, 2023
e680b94
add ut
SeaRise Nov 17, 2023
fcc7a6d
fmt
SeaRise Nov 17, 2023
c0e6aa8
update
SeaRise Nov 17, 2023
25b2e93
add more uts
SeaRise Nov 17, 2023
9aa59b8
minor refine
SeaRise Nov 20, 2023
b8c2bfa
Update dbms/src/Functions/FunctionsJson.h
SeaRise Nov 21, 2023
5e7717b
Merge branch 'master' into cast_as_json
SeaRise Nov 21, 2023
04bffbb
minior refine for json_array
SeaRise Nov 21, 2023
f0b1781
minior refine for json_array
SeaRise Nov 21, 2023
25195e6
more refine
SeaRise Nov 21, 2023
bc7e55d
tmp save
SeaRise Nov 22, 2023
35e8031
address comments
SeaRise Nov 22, 2023
1a83a58
fix
SeaRise Nov 22, 2023
90ca9ab
format
SeaRise Nov 23, 2023
2163ca7
Update dbms/src/Functions/FunctionsJson.h
SeaRise Nov 23, 2023
b3600ff
address comments
SeaRise Nov 23, 2023
efdeb11
fmt
SeaRise Nov 23, 2023
f662dad
fmt
SeaRise Nov 23, 2023
ae175d6
address comments
SeaRise Nov 27, 2023
2430e09
update
SeaRise Nov 27, 2023
86b2e6e
udpate
SeaRise Nov 27, 2023
8b77518
Merge branch 'master' into cast_as_json
ti-chi-bot[bot] Nov 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,6 @@
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl.git
[submodule "contrib/simdjson"]
path = contrib/simdjson
url = https://github.com/simdjson/simdjson
2 changes: 2 additions & 0 deletions contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,5 @@ endif ()
add_subdirectory(magic_enum)

add_subdirectory(aws-cmake)

add_subdirectory(simdjson)
1 change: 1 addition & 0 deletions contrib/simdjson
Submodule simdjson added at 17cb45
1 change: 1 addition & 0 deletions dbms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ target_link_libraries (tiflash_common_io
prometheus-cpp::pull
cpptoml
magic_enum
simdjson
libsymbolization
${RE2_LIBRARY}
${RE2_ST_LIBRARY}
Expand Down
124 changes: 124 additions & 0 deletions dbms/src/Common/VectorWriter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <common/likely.h>
#include <common/types.h>

#include <cmath>

namespace DB
{
template <typename VectorType>
class VectorWriter
{
public:
using Position = char *;

explicit VectorWriter(VectorType & vector_, size_t initial_size = 16)
: vector(vector_)
{
if (vector.size() < initial_size)
vector.resize(initial_size);
pos = reinterpret_cast<Position>(vector.data());
end = reinterpret_cast<Position>(vector.data() + vector.size());
}

inline void write(char x)
{
reserveForNextSize(1);
*pos = x;
++pos;
}

void write(const char * from, size_t n)
{
if (unlikely(n == 0))
return;
reserveForNextSize(n);
std::memcpy(pos, from, n);
pos += n;
}

void setOffset(size_t new_offset)
{
if (new_offset > vector.size())
{
size_t request_size = (new_offset - count());
reserveForNextSize(request_size);
}
pos = reinterpret_cast<Position>(vector.data() + new_offset);
}

void advance(size_t n) { setOffset(offset() + n); }

size_t offset() { return pos - reinterpret_cast<Position>(vector.data()); }

size_t count() { return offset(); }

~VectorWriter()
{
vector.resize(count());
pos = nullptr;
end = nullptr;
}

private:
size_t remainingSize() const { return static_cast<size_t>(end - pos); }

void reserve(size_t new_size)
{
size_t pos_offset = offset();
vector.resize(new_size);
pos = reinterpret_cast<Position>(vector.data() + pos_offset);
end = reinterpret_cast<Position>(vector.data() + vector.size());
}

void reserveForNextSize(size_t request_size = 1)
{
assert(request_size > 0);
if (remainingSize() < request_size)
{
size_t old_size = vector.size();
size_t new_size = std::max(old_size + request_size, std::ceil(old_size * 1.5));
reserve(new_size);
}
}

private:
static_assert(sizeof(typename VectorType::value_type) == sizeof(char));
VectorType & vector;

Position pos = nullptr;
Position end = nullptr;
};

template <typename VectorWriter>
inline void writeChar(char x, VectorWriter & writer)
{
writer.write(x);
}

template <typename VectorWriter>
inline void writeVarUInt(UInt64 x, VectorWriter & writer)
{
while (x >= 0x80)
{
writeChar(static_cast<UInt8>(x) | 0x80, writer);
x >>= 7;
}
writeChar(x, writer);
}
} // namespace DB
216 changes: 216 additions & 0 deletions dbms/src/Common/tests/gtest_simdjson.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <simdjson.h>

namespace DB::tests
{
TEST(TestSIMDJson, error)
{
simdjson::dom::parser parser;
{
std::string json_str{};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"[]]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"fsdfhsdjhfjsdhfj"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"{}}"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"[[], [[fdjfhdjf]]]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
}

TEST(TestSIMDJson, literal)
{
simdjson::dom::parser parser;
{
std::string json_str{"0"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), 0);
}
{
std::string json_str{"1"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), 1);
}
{
std::string json_str{"-1"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), -1);
}
{
std::string json_str{"1.111"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), 1.111);
}
{
std::string json_str{"-1.111"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), -1.111);
}
{
std::string json_str{"true"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_bool());
auto actual = res.get_bool();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), true);
}
{
std::string json_str{"false"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_bool());
auto actual = res.get_bool();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), false);
}
{
std::string json_str{"null"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_null());
}
{
std::string json_str{"\"a\""};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_string());
auto actual = res.get_string();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(std::string(actual.value_unsafe()), "a");
}
}

TEST(TestSIMDJson, array)
{
simdjson::dom::parser parser;
{
std::string json_str{"[]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 0);
}
{
std::string json_str{"[1, 2]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 2);
}
{
std::string json_str{"[1,2]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 2);
}
{
std::string json_str{"[[]]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 1);
ASSERT_TRUE(actual.at(0).is_array());
}
}

TEST(TestSIMDJson, object)
{
simdjson::dom::parser parser;
{
std::string json_str{"{}"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 0);
}
{
std::string json_str{R"({"a":"b"})"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 1);
const auto & value = actual.at_key("a");
ASSERT_TRUE(value.is_string());
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b");
}
{
std::string json_str{R"({"a" : "b"})"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 1);
const auto & value = actual.at_key("a");
ASSERT_TRUE(value.is_string());
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b");
}
{
std::string json_str{R"({"a" : "b", "c":"d"})"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 2);
const auto & value = actual.at_key("c");
ASSERT_TRUE(value.is_string());
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "d");
}
}

} // namespace DB::tests
Loading