Skip to content

Commit

Permalink
Merge pull request redpanda-data#23031 from andijcr/feat/core/6657/sc…
Browse files Browse the repository at this point in the history
…hema_registry_remove_schemas_requirement

Feat/core/6657/schema registry remove schemas requirement
  • Loading branch information
andijcr authored Aug 23, 2024
2 parents 70185be + 3fbcccb commit 5e3311a
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 81 deletions.
107 changes: 40 additions & 67 deletions src/v/pandaproxy/schema_registry/json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ class schema_context {
explicit schema_context(json_schema_definition::impl const& schema)
: _schema{schema} {}

json_schema_dialect dialect() const { return _schema.ctx.dialect; }

private:
json_schema_definition::impl const& _schema;
};
Expand Down Expand Up @@ -669,18 +671,22 @@ bool is_additional_superset(
// schema | true | recurse with {}
// schema | false | recurse with {"not":{}}

auto field_name = [&] {
// in draft 2020, if checking "type": "array", "items" is used to represent
// additional tuples items instead of an "additionalItems"
auto get_field_name = [&](json_schema_dialect d) {
switch (field_type) {
case additional_field_for::object:
return "additionalProperties";
case additional_field_for::array:
return "additionalItems";
return d == json_schema_dialect::draft202012 ? "items"
: "additionalItems";
}
}();
};

// helper to parse additional__
auto get_additional_props =
[&](json::Value const& v) -> std::variant<bool, json::Value const*> {
auto it = v.FindMember(field_name);
auto get_additional_props = [&](json_schema_dialect d, json::Value const& v)
-> std::variant<bool, json::Value const*> {
auto it = v.FindMember(get_field_name(d));
if (it == v.MemberEnd()) {
return true;
}
Expand Down Expand Up @@ -723,8 +729,8 @@ bool is_additional_superset(
// check subschemas for compatibility
return is_superset(ctx, *older, *newer);
}),
get_additional_props(older),
get_additional_props(newer));
get_additional_props(ctx.older.dialect(), older),
get_additional_props(ctx.newer.dialect(), newer));
}

bool is_string_superset(json::Value const& older, json::Value const& newer) {
Expand Down Expand Up @@ -872,7 +878,7 @@ bool is_array_superset(
// for array, "items" is a schema that validates all the elements.
// for tuple in Draft4, "items" is an array of schemas to validate the
// tuple, and "additionalItems" a schema to validate extra elements.
// TODO in later drafts, tuple validation has "prefixItems" as array of
// from draft 2020, tuple validation has "prefixItems" as array of
// schemas, "items" is for validation of extra elements, "additionalItems"
// is not used.
// This superset function has a common section for tuples and array, and
Expand Down Expand Up @@ -914,19 +920,27 @@ bool is_array_superset(
return false;
}

// in draft 2020, "prefixItems" is used to represent tuples instead of an
// overloaded "items"
constexpr static auto get_tuple_items_kw = [](json_schema_dialect d) {
if (d == json_schema_dialect::draft202012) {
return "prefixItems";
}
return "items";
};

// check if the input is an array schema or a tuple schema
auto is_array = [](json::Value const& v) -> bool {
// TODO "prefixItems" is not in Draft4, it's from later drafts. if it's
// present, it's a tuple schema
auto items_it = v.FindMember("items");
auto is_array = [](json_schema_dialect d, json::Value const& v) -> bool {
auto tuple_items_it = v.FindMember(get_tuple_items_kw(d));
// default for items is `{}` so it's not a tuple schema
// v is a tuple schema if "items" is an array of schemas
auto is_tuple = items_it != v.MemberEnd() && items_it->value.IsArray();
auto is_tuple = tuple_items_it != v.MemberEnd()
&& tuple_items_it->value.IsArray();
return !is_tuple;
};

auto older_is_array = is_array(older);
auto newer_is_array = is_array(newer);
auto older_is_array = is_array(ctx.older.dialect(), older);
auto newer_is_array = is_array(ctx.newer.dialect(), newer);

if (older_is_array != newer_is_array) {
// one is a tuple and the other is not. not compatible
Expand All @@ -936,8 +950,7 @@ bool is_array_superset(

if (older_is_array) {
// both are array, only "items" is relevant and it's a schema
// TODO after draft 4 "items" can be also a boolean so this needs to
// account for that note that "additionalItems" can be defined, but it's
// note that "additionalItems" can be defined, but it's
// not used by validation because every element is validated against
// "items"
return is_superset(
Expand All @@ -956,8 +969,10 @@ bool is_array_superset(
return false;
}

auto older_tuple_schema = older["items"].GetArray();
auto newer_tuple_schema = newer["items"].GetArray();
auto older_tuple_schema
= older[get_tuple_items_kw(ctx.older.dialect())].GetArray();
auto newer_tuple_schema
= newer[get_tuple_items_kw(ctx.newer.dialect())].GetArray();
// find the first pair of schemas that do not match
auto [older_it, newer_it] = std::ranges::mismatch(
older_tuple_schema,
Expand All @@ -982,7 +997,11 @@ bool is_array_superset(
// excess elements with older["additionalItems"]

auto older_additional_schema = get_object_or_empty(
ctx.older, older, "additionalItems");
ctx.older,
older,
ctx.older.dialect() == json_schema_dialect::draft202012
? "items"
: "additionalItems");

// check that all excess schemas are compatible with
// older["additionalItems"]
Expand Down Expand Up @@ -1541,11 +1560,6 @@ bool is_superset(
for (auto not_yet_handled_keyword : {
// draft 6 unhandled keywords:
"$ref",
// draft 2019-09 unhandled keywords:
"dependentRequired",
"dependentSchemas",
// draft 2020-12 unhandled keywords:
"prefixItems",
}) {
if (
newer.HasMember(not_yet_handled_keyword)
Expand All @@ -1566,43 +1580,6 @@ bool is_superset(
return true;
}

// this function assumes that the inputs are valid schemas, and if they have a
// $schema member it will have a string value
bool check_compatible_dialects(
json::Value const& older, json::Value const& newer) {
auto get_dialect =
[](json::Value const& v) -> std::optional<json_schema_dialect> {
if (!v.IsObject()) {
// support true/false schemas
return std::nullopt;
}
auto it = v.FindMember("$schema");
if (it == v.MemberEnd()) {
return std::nullopt;
}
return from_uri(as_string_view(it->value));
};

auto older_dialect = get_dialect(older);
auto newer_dialect = get_dialect(newer);
if (!older_dialect.has_value() && !newer_dialect.has_value()) {
// no $schema, compatible
return true;
}
// basic support: require that both use the same dialect.
// TODO: schemas using different dialects could be conditionally compatible
if (older_dialect != newer_dialect) {
throw as_exception(invalid_schema(fmt::format(
"not yet implemented compatibility check between different dialects: "
"{}, {}",
older_dialect ? to_uri(older_dialect.value()) : "[not specified]",
newer_dialect ? to_uri(newer_dialect.value()) : "[not specified]")));
}

// same dialect, compatible
return true;
}

void sort(json::Value& val) {
switch (val.GetType()) {
case rapidjson::Type::kFalseType:
Expand Down Expand Up @@ -1672,10 +1649,6 @@ compatibility_result check_compatible(
const json_schema_definition& writer,
verbose is_verbose [[maybe_unused]]) {
auto is_compatible = [&]() {
// schemas might be using incompatible dialects
if (!check_compatible_dialects(reader().ctx.doc, writer().ctx.doc)) {
return false;
}
// reader is a superset of writer iff every schema that is valid for
// writer is also valid for reader
context ctx{.older{reader()}, .newer{writer()}};
Expand Down
121 changes: 107 additions & 14 deletions src/v/pandaproxy/schema_registry/test/test_json_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,20 +619,6 @@ static constexpr auto compatibility_test_cases = std::to_array<
= R"({"oneOf": [{"type":"number", "multipleOf": 10}, {"type": "number", "multipleOf": 1.1}]})",
.reader_is_compatible_with_writer = false,
},
// different dialects
{
.reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})",
.writer_schema
= R"({"$schema": "http://json-schema.org/draft-06/schema#"})",
.reader_is_compatible_with_writer = false,
.expected_exception = true,
},
{
.reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})",
.writer_schema = "true",
.reader_is_compatible_with_writer = false,
.expected_exception = true,
},
//***** compatible section *****
// atoms
{
Expand Down Expand Up @@ -861,6 +847,55 @@ static constexpr auto compatibility_test_cases = std::to_array<
})",
.reader_is_compatible_with_writer = true,
},
// array checks: prefixItems/items are compatible across drafts
{
.reader_schema = R"({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "array",
"prefixItems": [
{
"type": "integer"
}
],
"items": false
})",

.writer_schema = R"({
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "array",
"items": [
{
"type": "integer"
}
],
"additionalItems": false
})",
.reader_is_compatible_with_writer = true,
},
{
.reader_schema = R"({
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "array",
"items": [
{
"type": "integer"
}
],
"additionalItems": false
})",
.writer_schema = R"({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "array",
"prefixItems": [
{
"type": "integer"
}
],
"items": false
})",

.reader_is_compatible_with_writer = true,
},
// combinators: "not"
{
.reader_schema
Expand Down Expand Up @@ -1019,6 +1054,64 @@ static constexpr auto compatibility_test_cases = std::to_array<
= R"({"$schema": "http://json-schema.org/draft-06/schema#"})",
.reader_is_compatible_with_writer = true,
},
// different dialects
{
.reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})",
.writer_schema
= R"({"$schema": "http://json-schema.org/draft-06/schema#"})",
.reader_is_compatible_with_writer = true,
},
{
.reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})",
.writer_schema = "true",
.reader_is_compatible_with_writer = true,
},
// array checks: prefixItems/items are compatible across drafts, unspecified
// schema
{
.reader_schema = R"({
"type": "array",
"prefixItems": [
{
"type": "integer"
}
],
"items": false
})",

.writer_schema = R"({
"type": "array",
"items": [
{
"type": "integer"
}
],
"additionalItems": false
})",
.reader_is_compatible_with_writer = true,
},
{
.reader_schema = R"({
"type": "array",
"items": [
{
"type": "integer"
}
],
"additionalItems": false
})",
.writer_schema = R"({
"type": "array",
"prefixItems": [
{
"type": "integer"
}
],
"items": false
})",

.reader_is_compatible_with_writer = true,
},
});
SEASTAR_THREAD_TEST_CASE(test_compatibility_check) {
store_fixture f;
Expand Down

0 comments on commit 5e3311a

Please sign in to comment.