diff --git a/karapace/protobuf/proto_normalizations.py b/karapace/protobuf/proto_normalizations.py index 9f65a2623..bdcb734f4 100644 --- a/karapace/protobuf/proto_normalizations.py +++ b/karapace/protobuf/proto_normalizations.py @@ -93,12 +93,17 @@ class NormalizedOneOfElement(OneOfElement): groups: Sequence[NormalizedGroupElement] -def type_field_element_with_sorted_options(type_field: FieldElement) -> NormalizedFieldElement: +def type_field_element_with_sorted_options(type_field: FieldElement, scope: str) -> NormalizedFieldElement: sorted_options = None if type_field.options is None else list(sorted(type_field.options, key=sort_by_name)) + field_type_normalized = type_field.element_type.removeprefix( + "." + ).removeprefix( # getting rid of the optional `.` before the package name + f"{scope}." + ) return NormalizedFieldElement( location=type_field.location, label=type_field.label, - element_type=type_field.element_type, + element_type=field_type_normalized, name=type_field.name, default_value=type_field.default_value, json_name=type_field.json_name, @@ -119,13 +124,14 @@ def enum_constant_element_with_sorted_options(enum_constant: EnumConstantElement ) -def enum_element_with_sorted_options(enum_element: EnumElement) -> NormalizedEnumElement: +def enum_element_with_sorted_options(enum_element: EnumElement, scope: str) -> NormalizedEnumElement: sorted_options = None if enum_element.options is None else list(sorted(enum_element.options, key=sort_by_name)) constants_with_sorted_options = ( None if enum_element.constants is None else [enum_constant_element_with_sorted_options(constant) for constant in enum_element.constants] ) + # todo: strip scope. return NormalizedEnumElement( location=enum_element.location, name=enum_element.name, @@ -163,10 +169,13 @@ def one_ofs_with_sorted_options(one_ofs: OneOfElement) -> NormalizedOneOfElement ) -def message_element_with_sorted_options(message_element: MessageElement) -> NormalizedMessageElement: +def message_element_with_sorted_options(message_element: MessageElement, scope: str) -> NormalizedMessageElement: + message_scope = f"{scope}.{message_element.name}" sorted_options = None if message_element.options is None else list(sorted(message_element.options, key=sort_by_name)) - sorted_nested_types = [type_element_with_sorted_options(nested_type) for nested_type in message_element.nested_types] - sorted_fields = [type_field_element_with_sorted_options(field) for field in message_element.fields] + sorted_nested_types = [ + type_element_with_sorted_options(nested_type, scope) for nested_type in message_element.nested_types + ] + sorted_fields = [type_field_element_with_sorted_options(field, scope) for field in message_element.fields] sorted_one_ofs = [one_ofs_with_sorted_options(one_of) for one_of in message_element.one_ofs] return NormalizedMessageElement( @@ -183,14 +192,17 @@ def message_element_with_sorted_options(message_element: MessageElement) -> Norm ) -def type_element_with_sorted_options(type_element: TypeElement) -> NormalizedTypeElement: +def type_element_with_sorted_options(type_element: TypeElement, scope: str) -> NormalizedTypeElement: sorted_nested_types: list[TypeElement] = [] + parent_name = type_element.name + message_scope = f"{scope}.{parent_name}" + for nested_type in type_element.nested_types: if isinstance(nested_type, EnumElement): - sorted_nested_types.append(enum_element_with_sorted_options(nested_type)) + sorted_nested_types.append(enum_element_with_sorted_options(nested_type, message_scope)) elif isinstance(nested_type, MessageElement): - sorted_nested_types.append(message_element_with_sorted_options(nested_type)) + sorted_nested_types.append(message_element_with_sorted_options(nested_type, message_scope)) else: raise ValueError("Unknown type element") # tried with assert_never but it did not work @@ -198,10 +210,10 @@ def type_element_with_sorted_options(type_element: TypeElement) -> NormalizedTyp type_element.nested_types = sorted_nested_types if isinstance(type_element, EnumElement): - return enum_element_with_sorted_options(type_element) + return enum_element_with_sorted_options(type_element, scope) if isinstance(type_element, MessageElement): - return message_element_with_sorted_options(type_element) + return message_element_with_sorted_options(type_element, scope) raise ValueError("Unknown type element") # tried with assert_never but it did not work @@ -251,7 +263,8 @@ def service_element_with_sorted_options(service_element: ServiceElement) -> Norm def normalize(proto_file_element: ProtoFileElement) -> NormalizedProtoFileElement: sorted_types: Sequence[NormalizedTypeElement] = [ - type_element_with_sorted_options(type_element) for type_element in proto_file_element.types + type_element_with_sorted_options(type_element, proto_file_element.package_name) + for type_element in proto_file_element.types ] sorted_options = list(sorted(proto_file_element.options, key=sort_by_name)) sorted_services: Sequence[NormalizedServiceElement] = [ diff --git a/tests/unit/protobuf/test_protobuf_normalization.py b/tests/unit/protobuf/test_protobuf_normalization.py index b772b293c..c70b6973d 100644 --- a/tests/unit/protobuf/test_protobuf_normalization.py +++ b/tests/unit/protobuf/test_protobuf_normalization.py @@ -6,10 +6,11 @@ from karapace.protobuf.location import Location from karapace.protobuf.proto_normalizations import normalize from karapace.protobuf.proto_parser import ProtoParser +from typing import Final import pytest -location: Location = Location("some/folder", "file.proto") +LOCATION: Final[Location] = Location("somefolder", "file.proto") # this would be a good case for using a property based test with a well-formed message generator @@ -511,10 +512,110 @@ ), ) def test_differently_ordered_options_normalizes_equally(ordered_schema: str, unordered_schema: str) -> None: - ordered_proto = ProtoParser.parse(location, ordered_schema) - unordered_proto = ProtoParser.parse(location, unordered_schema) + ordered_proto = ProtoParser.parse(LOCATION, ordered_schema) + unordered_proto = ProtoParser.parse(LOCATION, unordered_schema) result = CompareResult() normalize(ordered_proto).compare(normalize(unordered_proto), result) assert result.is_compatible() assert normalize(ordered_proto).to_schema() == normalize(unordered_proto).to_schema() + + +PROTO_WITH_FULLY_QUALIFIED_PATHS = """\ +syntax = "proto3"; +package my.awesome.customer.v1; + +import "my/awesome/customer/v1/nested_value.proto"; +import "google/protobuf/timestamp.proto"; + +option csharp_namespace = "my.awesome.customer.V1"; +option go_package = "github.com/customer/api/my/awesome/customer/v1;dspv1"; +option java_multiple_files = true; +option java_outer_classname = "EventValueProto"; +option java_package = "com.my.awesome.customer.v1"; +option objc_class_prefix = "TDD"; +option php_metadata_namespace = "My\\Awesome\\Customer\\V1"; +option php_namespace = "My\\Awesome\\Customer\\V1"; +option ruby_package = "My::Awesome::Customer::V1"; + +message EventValue { + .my.awesome.customer.v1.NestedValue nested_value = 1; + .google.protobuf.Timestamp created_at = 2; +} +""" + + +PROTO_WITH_SIMPLE_NAMES = """\ +syntax = "proto3"; +package my.awesome.customer.v1; + +import "my/awesome/customer/v1/nested_value.proto"; +import "google/protobuf/timestamp.proto"; + +option csharp_namespace = "my.awesome.customer.V1"; +option go_package = "github.com/customer/api/my/awesome/customer/v1;dspv1"; +option java_multiple_files = true; +option java_outer_classname = "EventValueProto"; +option java_package = "com.my.awesome.customer.v1"; +option objc_class_prefix = "TDD"; +option php_metadata_namespace = "My\\Awesome\\Customer\\V1"; +option php_namespace = "My\\Awesome\\Customer\\V1"; +option ruby_package = "My::Awesome::Customer::V1"; + +message EventValue { + NestedValue nested_value = 1; + google.protobuf.Timestamp created_at = 2; +}""" + + +def test_full_path_and_simple_names_are_equal() -> None: + """ + This test aims to ensure that after the normalization process the schema expressed as SimpleNames will match the same + schemas expressed with fully qualified references. + This does not consider the normalization process for the different ways a type can be expressed as a relative reference. + + Long explaination below: + + If we accept the specifications from buf as correct, we can look at how a type + reference is defined here: https://protobuf.com/docs/language-spec#type-references. + + The main problem with the previous implementation is that the current parser can't tell + if a fully-qualified type reference in dot notation is the same as one identified by name + alone aka simple name notation (https://protobuf.com/docs/language-spec#fully-qualified-references). + + The fix do not consider all the different ways users can define a relative reference, schemas + with different way of expressing a relative reference even if normalized, + for now will keep being considered different (https://protobuf.com/docs/language-spec#relative-references). + + Right now, our logic removes the `.package_name` (+ the Message scope) part + before comparing field modifications (in fact it re-writes the schema using the simple name notation). + + Even though the TypeTree (trie data structure) could help resolve + relative references (https://protobuf.com/docs/language-spec#relative-references), + we don't want to add this feature in the python implementation now because it might cause new bugs due to the + non-trivial behaviour of the protoc compiler. + + We plan to properly normalize the protobuf schemas later. + + We'll use protobuf descriptors (after the compilation and linking step) + to gather type references already resolved, and we will threaten all the protobuf + using always the fully qualified names. + Properly handling all path variations means reimplementing the protoc compiler behavior, + we prefer relying on the already processed proto descriptor. + + So, for now, we'll only implement a normalization for the fully-qualified references + in dot notation and by simple name alone. + + This is not changing the semantics of the message since the local scope its always the one + with max priority, so if you get rid of the fully-qualified reference protoc will resolve + the reference with the one specified in the package scope. + """ + + fully_qualitifed_simple_name_notation = ProtoParser.parse(LOCATION, PROTO_WITH_SIMPLE_NAMES) + fully_qualitifed_dot_notation = ProtoParser.parse(LOCATION, PROTO_WITH_FULLY_QUALIFIED_PATHS) + result = CompareResult() + normalize(fully_qualitifed_dot_notation).compare(normalize(fully_qualitifed_simple_name_notation), result) + assert result.is_compatible() + assert ( + normalize(fully_qualitifed_dot_notation).to_schema() == normalize(fully_qualitifed_simple_name_notation).to_schema() + )