Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop using null terminators on JSON (de)serialization. #5353

Merged
merged 10 commits into from
Nov 26, 2024
9 changes: 4 additions & 5 deletions test/regression/targets/sc-18250.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include <test/support/tdb_catch.h>

static const char* schema_str = R"rstr(
static constexpr std::string_view schema_str = R"rstr(
{"arrayType":"dense","attributes":[{
"cellValNum":1,"compressor":"NO_COMPRESSION",
"compressorLevel":-1,"name":"a1","type":"INT32"}],
Expand Down Expand Up @@ -33,11 +33,10 @@ TEST_CASE(
status = tiledb_buffer_alloc(ctx, &buf);
REQUIRE(status == TILEDB_OK);

status =
tiledb_buffer_set_data(ctx, buf, (void*)schema_str, sizeof(schema_str));
status = tiledb_buffer_set_data(
ctx, buf, (void*)schema_str.data(), schema_str.size());
REQUIRE(status == TILEDB_OK);

status = tiledb_deserialize_array_schema(
ctx, buf, tiledb_serialization_type_t(0), 0, &schema);
status = tiledb_deserialize_array_schema(ctx, buf, TILEDB_JSON, 0, &schema);
REQUIRE(status == TILEDB_OK);
}
38 changes: 36 additions & 2 deletions test/src/unit-cppapi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ struct CPPArrayFx {
static const unsigned d1_tile = 10;
static const unsigned d2_tile = 5;

CPPArrayFx()
: ctx(vfs_test_setup_.ctx())
CPPArrayFx(shared_ptr<tiledb_config_t> config = nullptr)
: vfs_test_setup_(config.get())
, ctx(vfs_test_setup_.ctx())
, array_uri_{vfs_test_setup_.array_uri("cpp_unit_array")} {
Domain domain(ctx);
auto d1 = Dimension::create<int>(ctx, "d1", {{-100, 100}}, d1_tile);
Expand Down Expand Up @@ -83,6 +84,18 @@ struct CPPArrayFx {
std::string array_uri_;
};

struct CPPArrayFxJsonSerialization : public CPPArrayFx {
static Config create_config() {
Config result;
result.set("rest.server_serialization_format", "JSON");
return result;
}

CPPArrayFxJsonSerialization()
: CPPArrayFx(create_config().ptr()) {
}
};

TEST_CASE("Config", "[cppapi][config][non-rest]") {
// Primarily to instansiate operator[]/= template
tiledb::Config cfg;
Expand All @@ -92,6 +105,27 @@ TEST_CASE("Config", "[cppapi][config][non-rest]") {
CHECK((std::string)cfg["vfs.s3.use_virtual_addressing"] == "true");
}

TEST_CASE_METHOD(
CPPArrayFxJsonSerialization,
"C++ API: Arrays, REST JSON serialization",
"[cppapi][basic][rest][json]") {
SECTION("Dimensions") {
ArraySchema schema(ctx, array_uri_);
CHECK(schema.domain().ndim() == 2);
auto a = schema.domain().dimensions()[0].domain<int>();
auto b = schema.domain().dimensions()[1].domain<int>();
CHECK_THROWS(schema.domain().dimensions()[0].domain<unsigned>());
CHECK(a.first == -100);
CHECK(a.second == 100);
CHECK(b.first == 0);
CHECK(b.second == 100);
CHECK_THROWS(schema.domain().dimensions()[0].tile_extent<unsigned>() == 10);
CHECK(schema.domain().dimensions()[0].tile_extent<int>() == 10);
CHECK(schema.domain().dimensions()[1].tile_extent<int>() == 5);
CHECK(schema.domain().cell_num() == 20301);
}
}

TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi][basic][rest]") {
SECTION("Dimensions") {
ArraySchema schema(ctx, array_uri_);
Expand Down
16 changes: 0 additions & 16 deletions tiledb/sm/buffer/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,22 +556,6 @@ class SerializationBuffer {
buffer_ = iter;
}

/**
* Assigns a new owned buffer to the object, and appends a null terminator.
*/
template <class Iter>
inline void assign_null_terminated(const Iter& iter) {
// Clear vector and deallocate its buffer.
buffer_owner_.clear();
buffer_owner_.shrink_to_fit();
// Reserve enough space for the data and the null terminator.
buffer_owner_.reserve(
std::distance(std::cbegin(iter), std::cend(iter)) + 1);
buffer_owner_.assign(std::cbegin(iter), std::cend(iter));
buffer_owner_.push_back('\0');
buffer_ = buffer_owner_;
}

/**
* Gets the number of bytes in the buffer.
*/
Expand Down
14 changes: 0 additions & 14 deletions tiledb/sm/buffer/test/unit_serialization_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,6 @@ TEST_CASE(
CHECK_THROWS(buff.owned_mutable_span());
}

TEST_CASE(
"SerializationBuffer: Test owned null-terminated buffer",
"[serialization_buffer][owned][null_terminated]") {
SerializationBuffer buff{
tiledb::test::get_test_memory_tracker()->get_resource(
MemoryType::SERIALIZATION_BUFFER)};
std::string_view data = "abcd";
buff.assign_null_terminated(data);
span<const char> buff_span = buff;
CHECK(buff.size() == data.size() + 1);
CHECK(memcmp(buff_span.data(), data.data(), data.size()) == 0);
CHECK(static_cast<span<const char>>(buff)[buff.size() - 1] == '\0');
}

TEST_CASE(
"SerializationBuffer: Test memory tracking",
"[serialization_buffer][memory_tracking]") {
Expand Down
35 changes: 0 additions & 35 deletions tiledb/sm/rest/rest_client_remote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,6 @@ RestClientRemote::get_array_schema_from_rest(const URI& uri) {
"Error getting array schema from REST; server returned no data.")),
nullopt};

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK_TUPLE(
ensure_json_null_delimited_string(&returned_data), nullopt);

auto array_schema = serialization::array_schema_deserialize(
serialization_type_, returned_data, memory_tracker_);

Expand Down Expand Up @@ -271,8 +267,6 @@ RestClientRemote::post_array_schema_from_rest(
"Error getting array schema from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
return serialization::deserialize_load_array_schema_response(
uri, config, serialization_type_, returned_data, memory_tracker_);
}
Expand Down Expand Up @@ -355,8 +349,6 @@ void RestClientRemote::post_array_from_rest(
"Error getting array from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
serialization::array_deserialize(
array, serialization_type_, returned_data, resources, memory_tracker_);
}
Expand Down Expand Up @@ -489,9 +481,6 @@ Status RestClientRemote::get_array_non_empty_domain(
Status_RestError("Error getting array non-empty domain "
"from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));

// Deserialize data returned
return serialization::nonempty_domain_deserialize(
array, returned_data, serialization_type_);
Expand Down Expand Up @@ -527,8 +516,6 @@ Status RestClientRemote::get_array_metadata_from_rest(
return LOG_STATUS(Status_RestError(
"Error getting array metadata from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::metadata_deserialize(
array->unsafe_metadata(),
array->config(),
Expand Down Expand Up @@ -613,8 +600,6 @@ RestClientRemote::post_enumerations_from_rest(
"Error getting enumerations from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
return serialization::deserialize_load_enumerations_response(
array_schema, config, serialization_type_, returned_data, memory_tracker);
}
Expand Down Expand Up @@ -669,8 +654,6 @@ void RestClientRemote::post_query_plan_from_rest(
"Error getting query plan from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
query_plan = serialization::deserialize_query_plan_response(
query, serialization_type_, returned_data);
}
Expand Down Expand Up @@ -1191,8 +1174,6 @@ Status RestClientRemote::get_query_est_result_sizes(
"Error getting array metadata from REST; server returned no data."));
}

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::query_est_result_size_deserialize(
query, serialization_type_, true, returned_data);
}
Expand Down Expand Up @@ -1266,8 +1247,6 @@ Status RestClientRemote::post_fragment_info_from_rest(
return LOG_STATUS(Status_RestError(
"Error getting fragment info from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::fragment_info_deserialize(
fragment_info, serialization_type_, uri, returned_data, memory_tracker_);
}
Expand Down Expand Up @@ -1308,8 +1287,6 @@ Status RestClientRemote::post_group_metadata_from_rest(
"Error getting group metadata from REST; server returned no data."));
}

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::metadata_deserialize(
group->unsafe_metadata(),
group->config(),
Expand Down Expand Up @@ -1407,8 +1384,6 @@ Status RestClientRemote::post_group_from_rest(const URI& uri, Group* group) {
return LOG_STATUS(Status_RestError(
"Error getting group from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::group_details_deserialize(
group, serialization_type_, returned_data);
}
Expand Down Expand Up @@ -1458,14 +1433,6 @@ void RestClientRemote::delete_group_from_rest(const URI& uri, bool recursive) {
stats_, url, serialization_type_, &returned_data, cache_key));
}

Status RestClientRemote::ensure_json_null_delimited_string(Buffer* buffer) {
if (serialization_type_ == SerializationType::JSON &&
buffer->value<char>(buffer->size() - 1) != '\0') {
RETURN_NOT_OK(buffer->write("\0", sizeof(char)));
}
return Status::Ok();
}

Status RestClientRemote::post_consolidation_to_rest(
const URI& uri, const Config& config) {
BufferList serialized{memory_tracker_};
Expand Down Expand Up @@ -1545,8 +1512,6 @@ RestClientRemote::post_consolidation_plan_from_rest(
"Error getting query plan from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
return serialization::deserialize_consolidation_plan_response(
serialization_type_, returned_data);
}
Expand Down
9 changes: 0 additions & 9 deletions tiledb/sm/rest/rest_client_remote.h
Original file line number Diff line number Diff line change
Expand Up @@ -577,15 +577,6 @@ class RestClientRemote : public RestClient {
* @return Returns the redirection URI if exists and empty string otherwise
*/
std::string redirect_uri(const std::string& cache_key);

/**
* Cap'n proto requires JSON messages to be null terminated c-style strings.
* This function checks if using JSON that returned buffers are null delimited
*
* @param buffer of server message
* @return Status
*/
Status ensure_json_null_delimited_string(Buffer* buffer);
};

} // namespace tiledb::sm
20 changes: 6 additions & 14 deletions tiledb/sm/serialization/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ Status metadata_serialize(
case SerializationType::JSON: {
::capnp::JsonCodec json;
kj::String capnp_json = json.encode(builder);
serialized_buffer.assign_null_terminated(capnp_json);
serialized_buffer.assign(capnp_json);
break;
}
case SerializationType::CAPNP: {
Expand Down Expand Up @@ -447,12 +447,9 @@ Status metadata_deserialize(
try {
switch (serialize_type) {
case SerializationType::JSON: {
::capnp::JsonCodec json;
::capnp::MallocMessageBuilder message_builder;
auto builder = message_builder.initRoot<capnp::ArrayMetadata>();
json.decode(
kj::StringPtr(static_cast<const char*>(serialized_buffer.data())),
builder);
utils::decode_json_message(serialized_buffer, builder);
auto reader = builder.asReader();

// Deserialize
Expand Down Expand Up @@ -515,7 +512,7 @@ Status array_serialize(
case SerializationType::JSON: {
::capnp::JsonCodec json;
kj::String capnp_json = json.encode(ArrayBuilder);
serialized_buffer.assign_null_terminated(capnp_json);
serialized_buffer.assign(capnp_json);
break;
}
case SerializationType::CAPNP: {
Expand Down Expand Up @@ -551,13 +548,10 @@ void array_deserialize(
try {
switch (serialize_type) {
case SerializationType::JSON: {
::capnp::JsonCodec json;
::capnp::MallocMessageBuilder message_builder;
capnp::Array::Builder array_builder =
message_builder.initRoot<capnp::Array>();
json.decode(
kj::StringPtr(static_cast<const char*>(serialized_buffer.data())),
array_builder);
utils::decode_json_message(serialized_buffer, array_builder);
capnp::Array::Reader array_reader = array_builder.asReader();
array_from_capnp(array_reader, resources, array, true, memory_tracker);
break;
Expand Down Expand Up @@ -612,7 +606,7 @@ Status array_open_serialize(
case SerializationType::JSON: {
::capnp::JsonCodec json;
kj::String capnp_json = json.encode(arrayOpenBuilder);
serialized_buffer.assign_null_terminated(capnp_json);
serialized_buffer.assign(capnp_json);
break;
}
case SerializationType::CAPNP: {
Expand Down Expand Up @@ -652,9 +646,7 @@ Status array_open_deserialize(
::capnp::MallocMessageBuilder message_builder;
capnp::ArrayOpen::Builder array_open_builder =
message_builder.initRoot<capnp::ArrayOpen>();
json.decode(
kj::StringPtr(static_cast<const char*>(serialized_buffer.data())),
array_open_builder);
utils::decode_json_message(serialized_buffer, array_open_builder, json);
capnp::ArrayOpen::Reader array_open_reader =
array_open_builder.asReader();
RETURN_NOT_OK(array_open_from_capnp(array_open_reader, array));
Expand Down
Loading
Loading