From a1caf51a42b8276334a7a2d0f95f0c74727b2730 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Thu, 22 Aug 2024 23:29:05 +0200 Subject: [PATCH 1/6] schema_registry/json: schema_context::dialect() method --- src/v/pandaproxy/schema_registry/json.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index f782afc9a066..46839ba0bf1a 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -191,6 +191,8 @@ class schema_context { explicit schema_context(json_schema_definition::impl const& schema) : _schema{schema} {} + json_schema_dialect dialect() const { return _schema.ctx.dialect; } + private: json_schema_definition::impl const& _schema; }; From 65c65ce81cfe5af6901bb2b19f2da7074e0ef947 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Thu, 22 Aug 2024 23:30:21 +0200 Subject: [PATCH 2/6] schema_registry/json: allow dependentRequired/Schema these two keywords are not implemented in the reference implementation. if they need to be implemented, each one implements one of the variants of "dependencies" (already implemented) --- src/v/pandaproxy/schema_registry/json.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 46839ba0bf1a..4bd4dadd3d1e 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -1543,9 +1543,6 @@ bool is_superset( for (auto not_yet_handled_keyword : { // draft 6 unhandled keywords: "$ref", - // draft 2019-09 unhandled keywords: - "dependentRequired", - "dependentSchemas", // draft 2020-12 unhandled keywords: "prefixItems", }) { From a74acfd01056fc450a21ccba0c9bd83eb2796ba0 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Thu, 22 Aug 2024 23:35:39 +0200 Subject: [PATCH 3/6] schema_registry/json: extend is_additional_superset for draft2020 in draft 2020, if "type": "array", "items" is used instead of "additionalItems". tests shows that different drafts should be comparable so this function is expanded to compare draft2020 with older drafts --- src/v/pandaproxy/schema_registry/json.cc | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 4bd4dadd3d1e..8ea43985ef7d 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -671,18 +671,22 @@ bool is_additional_superset( // schema | true | recurse with {} // schema | false | recurse with {"not":{}} - auto field_name = [&] { + // in draft 2020, if checking "type": "array", "items" is used to represent + // additional tuples items instead of an "additionalItems" + auto get_field_name = [&](json_schema_dialect d) { switch (field_type) { case additional_field_for::object: return "additionalProperties"; case additional_field_for::array: - return "additionalItems"; + return d == json_schema_dialect::draft202012 ? "items" + : "additionalItems"; } - }(); + }; + // helper to parse additional__ - auto get_additional_props = - [&](json::Value const& v) -> std::variant { - auto it = v.FindMember(field_name); + auto get_additional_props = [&](json_schema_dialect d, json::Value const& v) + -> std::variant { + auto it = v.FindMember(get_field_name(d)); if (it == v.MemberEnd()) { return true; } @@ -725,8 +729,8 @@ bool is_additional_superset( // check subschemas for compatibility return is_superset(ctx, *older, *newer); }), - get_additional_props(older), - get_additional_props(newer)); + get_additional_props(ctx.older.dialect(), older), + get_additional_props(ctx.newer.dialect(), newer)); } bool is_string_superset(json::Value const& older, json::Value const& newer) { From aece1141dc47667e549ff652aedd2f1a673707f2 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Thu, 22 Aug 2024 23:38:56 +0200 Subject: [PATCH 4/6] schema_registry/json: extend is_array_superset for draft2020 in draft2020, when describing a tuple, we should use "prefixItems" instead of "items" and "items" instead of "additionalItems". --- src/v/pandaproxy/schema_registry/json.cc | 41 +++++++++++++++--------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 8ea43985ef7d..856e1f83c240 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -878,7 +878,7 @@ bool is_array_superset( // for array, "items" is a schema that validates all the elements. // for tuple in Draft4, "items" is an array of schemas to validate the // tuple, and "additionalItems" a schema to validate extra elements. - // TODO in later drafts, tuple validation has "prefixItems" as array of + // from draft 2020, tuple validation has "prefixItems" as array of // schemas, "items" is for validation of extra elements, "additionalItems" // is not used. // This superset function has a common section for tuples and array, and @@ -920,19 +920,27 @@ bool is_array_superset( return false; } + // in draft 2020, "prefixItems" is used to represent tuples instead of an + // overloaded "items" + constexpr static auto get_tuple_items_kw = [](json_schema_dialect d) { + if (d == json_schema_dialect::draft202012) { + return "prefixItems"; + } + return "items"; + }; + // check if the input is an array schema or a tuple schema - auto is_array = [](json::Value const& v) -> bool { - // TODO "prefixItems" is not in Draft4, it's from later drafts. if it's - // present, it's a tuple schema - auto items_it = v.FindMember("items"); + auto is_array = [](json_schema_dialect d, json::Value const& v) -> bool { + auto tuple_items_it = v.FindMember(get_tuple_items_kw(d)); // default for items is `{}` so it's not a tuple schema // v is a tuple schema if "items" is an array of schemas - auto is_tuple = items_it != v.MemberEnd() && items_it->value.IsArray(); + auto is_tuple = tuple_items_it != v.MemberEnd() + && tuple_items_it->value.IsArray(); return !is_tuple; }; - auto older_is_array = is_array(older); - auto newer_is_array = is_array(newer); + auto older_is_array = is_array(ctx.older.dialect(), older); + auto newer_is_array = is_array(ctx.newer.dialect(), newer); if (older_is_array != newer_is_array) { // one is a tuple and the other is not. not compatible @@ -942,8 +950,7 @@ bool is_array_superset( if (older_is_array) { // both are array, only "items" is relevant and it's a schema - // TODO after draft 4 "items" can be also a boolean so this needs to - // account for that note that "additionalItems" can be defined, but it's + // note that "additionalItems" can be defined, but it's // not used by validation because every element is validated against // "items" return is_superset( @@ -962,8 +969,10 @@ bool is_array_superset( return false; } - auto older_tuple_schema = older["items"].GetArray(); - auto newer_tuple_schema = newer["items"].GetArray(); + auto older_tuple_schema + = older[get_tuple_items_kw(ctx.older.dialect())].GetArray(); + auto newer_tuple_schema + = newer[get_tuple_items_kw(ctx.newer.dialect())].GetArray(); // find the first pair of schemas that do not match auto [older_it, newer_it] = std::ranges::mismatch( older_tuple_schema, @@ -988,7 +997,11 @@ bool is_array_superset( // excess elements with older["additionalItems"] auto older_additional_schema = get_object_or_empty( - ctx.older, older, "additionalItems"); + ctx.older, + older, + ctx.older.dialect() == json_schema_dialect::draft202012 + ? "items" + : "additionalItems"); // check that all excess schemas are compatible with // older["additionalItems"] @@ -1547,8 +1560,6 @@ bool is_superset( for (auto not_yet_handled_keyword : { // draft 6 unhandled keywords: "$ref", - // draft 2020-12 unhandled keywords: - "prefixItems", }) { if ( newer.HasMember(not_yet_handled_keyword) From ac6943000004a4b3bc4c5e2b29186ef6afde5996 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Thu, 22 Aug 2024 23:45:55 +0200 Subject: [PATCH 5/6] schema_registry/json: allow comparing differnt dialects the only outstanding diverging semantincs from different dialects was "items" in draft2020 vs the previous releases. this is solved in the previous commit so the draft check can be removed --- src/v/pandaproxy/schema_registry/json.cc | 41 ------------------- .../schema_registry/test/test_json_schema.cc | 26 ++++++------ 2 files changed, 12 insertions(+), 55 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 856e1f83c240..a6654f42ceea 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -1580,43 +1580,6 @@ bool is_superset( return true; } -// this function assumes that the inputs are valid schemas, and if they have a -// $schema member it will have a string value -bool check_compatible_dialects( - json::Value const& older, json::Value const& newer) { - auto get_dialect = - [](json::Value const& v) -> std::optional { - if (!v.IsObject()) { - // support true/false schemas - return std::nullopt; - } - auto it = v.FindMember("$schema"); - if (it == v.MemberEnd()) { - return std::nullopt; - } - return from_uri(as_string_view(it->value)); - }; - - auto older_dialect = get_dialect(older); - auto newer_dialect = get_dialect(newer); - if (!older_dialect.has_value() && !newer_dialect.has_value()) { - // no $schema, compatible - return true; - } - // basic support: require that both use the same dialect. - // TODO: schemas using different dialects could be conditionally compatible - if (older_dialect != newer_dialect) { - throw as_exception(invalid_schema(fmt::format( - "not yet implemented compatibility check between different dialects: " - "{}, {}", - older_dialect ? to_uri(older_dialect.value()) : "[not specified]", - newer_dialect ? to_uri(newer_dialect.value()) : "[not specified]"))); - } - - // same dialect, compatible - return true; -} - void sort(json::Value& val) { switch (val.GetType()) { case rapidjson::Type::kFalseType: @@ -1686,10 +1649,6 @@ compatibility_result check_compatible( const json_schema_definition& writer, verbose is_verbose [[maybe_unused]]) { auto is_compatible = [&]() { - // schemas might be using incompatible dialects - if (!check_compatible_dialects(reader().ctx.doc, writer().ctx.doc)) { - return false; - } // reader is a superset of writer iff every schema that is valid for // writer is also valid for reader context ctx{.older{reader()}, .newer{writer()}}; diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 463d5b8b547e..721c25bc3ce0 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -619,20 +619,6 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"oneOf": [{"type":"number", "multipleOf": 10}, {"type": "number", "multipleOf": 1.1}]})", .reader_is_compatible_with_writer = false, }, - // different dialects - { - .reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})", - .writer_schema - = R"({"$schema": "http://json-schema.org/draft-06/schema#"})", - .reader_is_compatible_with_writer = false, - .expected_exception = true, - }, - { - .reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})", - .writer_schema = "true", - .reader_is_compatible_with_writer = false, - .expected_exception = true, - }, //***** compatible section ***** // atoms { @@ -1019,6 +1005,18 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"$schema": "http://json-schema.org/draft-06/schema#"})", .reader_is_compatible_with_writer = true, }, + // different dialects + { + .reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})", + .writer_schema + = R"({"$schema": "http://json-schema.org/draft-06/schema#"})", + .reader_is_compatible_with_writer = true, + }, + { + .reader_schema = R"({"$schema": "http://json-schema.org/draft-04/schema"})", + .writer_schema = "true", + .reader_is_compatible_with_writer = true, + }, }); SEASTAR_THREAD_TEST_CASE(test_compatibility_check) { store_fixture f; From 3fbcccba969ff883540c75c65582b6932196be69 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 23 Aug 2024 11:32:38 +0200 Subject: [PATCH 6/6] schema_registry/json: unit test mixed prefixItems/items checks the tests checks that draft2020/draft* schemas are correctly compared, even when the $schema is deduced --- .../schema_registry/test/test_json_schema.cc | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 721c25bc3ce0..8f68a6a8a3a7 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -847,6 +847,55 @@ static constexpr auto compatibility_test_cases = std::to_array< })", .reader_is_compatible_with_writer = true, }, + // array checks: prefixItems/items are compatible across drafts + { + .reader_schema = R"({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "array", + "prefixItems": [ + { + "type": "integer" + } + ], + "items": false + })", + + .writer_schema = R"({ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "array", + "items": [ + { + "type": "integer" + } + ], + "additionalItems": false + })", + .reader_is_compatible_with_writer = true, + }, + { + .reader_schema = R"({ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "array", + "items": [ + { + "type": "integer" + } + ], + "additionalItems": false + })", + .writer_schema = R"({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "array", + "prefixItems": [ + { + "type": "integer" + } + ], + "items": false + })", + + .reader_is_compatible_with_writer = true, + }, // combinators: "not" { .reader_schema @@ -1017,6 +1066,52 @@ static constexpr auto compatibility_test_cases = std::to_array< .writer_schema = "true", .reader_is_compatible_with_writer = true, }, + // array checks: prefixItems/items are compatible across drafts, unspecified + // schema + { + .reader_schema = R"({ + "type": "array", + "prefixItems": [ + { + "type": "integer" + } + ], + "items": false + })", + + .writer_schema = R"({ + "type": "array", + "items": [ + { + "type": "integer" + } + ], + "additionalItems": false + })", + .reader_is_compatible_with_writer = true, + }, + { + .reader_schema = R"({ + "type": "array", + "items": [ + { + "type": "integer" + } + ], + "additionalItems": false + })", + .writer_schema = R"({ + "type": "array", + "prefixItems": [ + { + "type": "integer" + } + ], + "items": false + })", + + .reader_is_compatible_with_writer = true, + }, }); SEASTAR_THREAD_TEST_CASE(test_compatibility_check) { store_fixture f;