Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: get rid of the path for fully qualified names. #912

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 62 additions & 34 deletions karapace/protobuf/proto_normalizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

from __future__ import annotations

from karapace.dependency import Dependency
from karapace.protobuf.enum_constant_element import EnumConstantElement
from karapace.protobuf.enum_element import EnumElement
from karapace.protobuf.extend_element import ExtendElement
from karapace.protobuf.field_element import FieldElement
from karapace.protobuf.group_element import GroupElement
from karapace.protobuf.known_dependency import KnownDependency
from karapace.protobuf.message_element import MessageElement
from karapace.protobuf.one_of_element import OneOfElement
from karapace.protobuf.option_element import OptionElement
Expand All @@ -19,8 +19,9 @@
from karapace.protobuf.schema import ProtobufSchema
from karapace.protobuf.service_element import ServiceElement
from karapace.protobuf.type_element import TypeElement
from karapace.schema_references import Reference
from typing import Mapping, Sequence
from karapace.protobuf.type_tree import TypeTree
from karapace.utils import remove_prefix
from typing import Sequence

import abc

Expand Down Expand Up @@ -77,28 +78,39 @@ class NormalizedGroupElement(GroupElement):
class NormalizedProtobufSchema(ProtobufSchema):
proto_file_element: NormalizedProtoFileElement

def __init__(
self,
schema: str,
references: Sequence[Reference] | None = None,
dependencies: Mapping[str, Dependency] | None = None,
proto_file_element: ProtoFileElement | None = None,
) -> None:
super().__init__(schema, references, dependencies, proto_file_element)
self.proto_file_element = normalize(self.proto_file_element)
@staticmethod
def from_protobuf_schema(protobuf_schema: ProtobufSchema) -> NormalizedProtobufSchema:
return normalize(protobuf_schema)


class NormalizedOneOfElement(OneOfElement):
fields: Sequence[NormalizedFieldElement]
groups: Sequence[NormalizedGroupElement]


def type_field_element_with_sorted_options(type_field: FieldElement) -> NormalizedFieldElement:
def normalize_type_field_element(type_field: FieldElement, package: str, type_tree: TypeTree) -> NormalizedFieldElement:
sorted_options = None if type_field.options is None else list(sorted(type_field.options, key=sort_by_name))
field_type_normalized = remove_prefix(remove_prefix(type_field.element_type, "."), f"{package}.")
reference_in_type_tree = type_tree.type_in_tree(field_type_normalized)
google_included_type = (
field_type_normalized in KnownDependency.index_simple or field_type_normalized in KnownDependency.index
)
element_type = (
field_type_normalized
# we can remove the package from the type only if there aren't clashing names in the same
# definition with the same relative prefix.
if (
reference_in_type_tree is not None
and reference_in_type_tree.is_uniquely_identifiable_type
and not google_included_type
)
or google_included_type
else type_field.element_type
)
return NormalizedFieldElement(
location=type_field.location,
label=type_field.label,
element_type=type_field.element_type,
element_type=element_type,
name=type_field.name,
default_value=type_field.default_value,
json_name=type_field.json_name,
Expand Down Expand Up @@ -135,9 +147,9 @@ def enum_element_with_sorted_options(enum_element: EnumElement) -> NormalizedEnu
)


def groups_with_sorted_options(group: GroupElement) -> NormalizedGroupElement:
def groups_with_sorted_options(group: GroupElement, package: str, type_tree: TypeTree) -> NormalizedGroupElement:
sorted_fields = (
None if group.fields is None else [type_field_element_with_sorted_options(field) for field in group.fields]
None if group.fields is None else [normalize_type_field_element(field, package, type_tree) for field in group.fields]
)
return NormalizedGroupElement(
label=group.label,
Expand All @@ -149,10 +161,10 @@ def groups_with_sorted_options(group: GroupElement) -> NormalizedGroupElement:
)


def one_ofs_with_sorted_options(one_ofs: OneOfElement) -> NormalizedOneOfElement:
def one_ofs_with_sorted_options(one_ofs: OneOfElement, package: str, type_tree: TypeTree) -> NormalizedOneOfElement:
sorted_options = None if one_ofs.options is None else list(sorted(one_ofs.options, key=sort_by_name))
sorted_fields = [type_field_element_with_sorted_options(field) for field in one_ofs.fields]
sorted_groups = [groups_with_sorted_options(group) for group in one_ofs.groups]
sorted_fields = [normalize_type_field_element(field, package, type_tree) for field in one_ofs.fields]
sorted_groups = [groups_with_sorted_options(group, package, type_tree) for group in one_ofs.groups]

return NormalizedOneOfElement(
name=one_ofs.name,
Expand All @@ -163,11 +175,15 @@ def one_ofs_with_sorted_options(one_ofs: OneOfElement) -> NormalizedOneOfElement
)


def message_element_with_sorted_options(message_element: MessageElement) -> NormalizedMessageElement:
def message_element_with_sorted_options(
message_element: MessageElement, package: str, type_tree: TypeTree
) -> NormalizedMessageElement:
sorted_options = None if message_element.options is None else list(sorted(message_element.options, key=sort_by_name))
sorted_nested_types = [type_element_with_sorted_options(nested_type) for nested_type in message_element.nested_types]
sorted_fields = [type_field_element_with_sorted_options(field) for field in message_element.fields]
sorted_one_ofs = [one_ofs_with_sorted_options(one_of) for one_of in message_element.one_ofs]
sorted_nested_types = [
type_element_with_sorted_options(nested_type, package, type_tree) for nested_type in message_element.nested_types
]
sorted_fields = [normalize_type_field_element(field, package, type_tree) for field in message_element.fields]
sorted_one_ofs = [one_ofs_with_sorted_options(one_of, package, type_tree) for one_of in message_element.one_ofs]

return NormalizedMessageElement(
location=message_element.location,
Expand All @@ -183,16 +199,16 @@ def message_element_with_sorted_options(message_element: MessageElement) -> Norm
)


def type_element_with_sorted_options(type_element: TypeElement) -> NormalizedTypeElement:
def type_element_with_sorted_options(type_element: TypeElement, package: str, type_tree: TypeTree) -> NormalizedTypeElement:
sorted_nested_types: list[TypeElement] = []

for nested_type in type_element.nested_types:
if isinstance(nested_type, EnumElement):
sorted_nested_types.append(enum_element_with_sorted_options(nested_type))
elif isinstance(nested_type, MessageElement):
sorted_nested_types.append(message_element_with_sorted_options(nested_type))
sorted_nested_types.append(message_element_with_sorted_options(nested_type, package, type_tree))
else:
raise ValueError("Unknown type element") # tried with assert_never but it did not work
raise ValueError(f"Unknown type element {type(nested_type)}") # tried with assert_never but it did not work

# doing it here since the subtypes do not declare the nested_types property
type_element.nested_types = sorted_nested_types
Expand All @@ -201,16 +217,18 @@ def type_element_with_sorted_options(type_element: TypeElement) -> NormalizedTyp
return enum_element_with_sorted_options(type_element)

if isinstance(type_element, MessageElement):
return message_element_with_sorted_options(type_element)
return message_element_with_sorted_options(type_element, package, type_tree)

raise ValueError("Unknown type element") # tried with assert_never but it did not work
raise ValueError(f"Unknown type element of type {type(type_element)}") # tried with assert_never but it did not work


def extends_element_with_sorted_options(extend_element: ExtendElement) -> NormalizedExtendElement:
def extends_element_with_sorted_options(
extend_element: ExtendElement, package: str, type_tree: TypeTree
) -> NormalizedExtendElement:
sorted_fields = (
None
if extend_element.fields is None
else [type_field_element_with_sorted_options(field) for field in extend_element.fields]
else [normalize_type_field_element(field, package, type_tree) for field in extend_element.fields]
)
return NormalizedExtendElement(
location=extend_element.location,
Expand Down Expand Up @@ -249,19 +267,22 @@ def service_element_with_sorted_options(service_element: ServiceElement) -> Norm
)


def normalize(proto_file_element: ProtoFileElement) -> NormalizedProtoFileElement:
def normalize(protobuf_schema: ProtobufSchema) -> NormalizedProtobufSchema:
proto_file_element = protobuf_schema.proto_file_element
type_tree = protobuf_schema.types_tree()
package = proto_file_element.package_name or ""
sorted_types: Sequence[NormalizedTypeElement] = [
type_element_with_sorted_options(type_element) for type_element in proto_file_element.types
type_element_with_sorted_options(type_element, package, type_tree) for type_element in proto_file_element.types
]
sorted_options = list(sorted(proto_file_element.options, key=sort_by_name))
sorted_services: Sequence[NormalizedServiceElement] = [
service_element_with_sorted_options(service) for service in proto_file_element.services
]
sorted_extend_declarations: Sequence[NormalizedExtendElement] = [
extends_element_with_sorted_options(extend) for extend in proto_file_element.extend_declarations
extends_element_with_sorted_options(extend, package, type_tree) for extend in proto_file_element.extend_declarations
]

return NormalizedProtoFileElement(
normalized_protobuf_element = NormalizedProtoFileElement(
location=proto_file_element.location,
package_name=proto_file_element.package_name,
syntax=proto_file_element.syntax,
Expand All @@ -272,3 +293,10 @@ def normalize(proto_file_element: ProtoFileElement) -> NormalizedProtoFileElemen
extend_declarations=sorted_extend_declarations,
options=sorted_options,
)

return NormalizedProtobufSchema(
protobuf_schema.schema,
protobuf_schema.references,
protobuf_schema.dependencies,
normalized_protobuf_element,
)
87 changes: 3 additions & 84 deletions karapace/protobuf/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
from karapace.protobuf.proto_parser import ProtoParser
from karapace.protobuf.serialization import deserialize, serialize
from karapace.protobuf.type_element import TypeElement
from karapace.protobuf.type_tree import SourceFileReference, TypeTree
from karapace.protobuf.utils import append_documentation, append_indented
from karapace.schema_references import Reference
from typing import Iterable, Mapping, Sequence
from typing import Mapping, Sequence

import binascii
import itertools


def add_slashes(text: str) -> str:
Expand Down Expand Up @@ -122,88 +122,6 @@ class UsedType:
used_attribute_type: str


@default_dataclass
class SourceFileReference:
reference: str
import_order: int


@default_dataclass
class TypeTree:
token: str
children: list[TypeTree]
source_reference: SourceFileReference | None

def source_reference_tree_recursive(self) -> Iterable[SourceFileReference | None]:
sources = [] if self.source_reference is None else [self.source_reference]
for child in self.children:
sources = itertools.chain(sources, child.source_reference_tree())
return sources

def source_reference_tree(self) -> Iterable[SourceFileReference]:
return filter(lambda x: x is not None, self.source_reference_tree_recursive())

def inserted_elements(self) -> int:
"""
Returns the newest element generation accessible from that node.
Where with the term generation we mean the order for which a message
has been imported.
If called on the root of the tree it corresponds with the number of
fully specified path objects present in the tree.
"""
return max(reference.import_order for reference in self.source_reference_tree())

def oldest_matching_import(self) -> int:
"""
Returns the oldest element generation accessible from that node.
Where with the term generation we mean the order for which a message
has been imported.
"""
return min(reference.import_order for reference in self.source_reference_tree())

def expand_missing_absolute_path_recursive(self, oldest_import: int) -> Sequence[str]:
"""
Method that, once called on a node, traverse all the leaf and
return the oldest imported element with the common postfix.
This is also the current behaviour
of protobuf while dealing with a not fully specified path, it seeks for
the firstly imported message with a matching path.
"""
if self.source_reference is not None:
if self.source_reference.import_order == oldest_import:
return [self.token]
return []

for child in self.children:
maybe_oldest_child = child.expand_missing_absolute_path_recursive(oldest_import)
if maybe_oldest_child is not None:
return list(maybe_oldest_child) + [self.token]

return []

def expand_missing_absolute_path(self) -> Sequence[str]:
oldest_import = self.oldest_matching_import()
expanded_missing_path = self.expand_missing_absolute_path_recursive(oldest_import)
assert (
expanded_missing_path is not None
), "each node should have, by construction, at least a leaf that is a fully specified path"
return expanded_missing_path[:-1] # skipping myself since I was matched

@property
def is_fully_qualified_type(self) -> bool:
return len(self.children) == 0

def represent(self, level=0) -> str:
spacing = " " * 3 * level
if self.is_fully_qualified_type:
return f"{spacing}>{self.token}"
child_repr = "\n".join(child.represent(level=level + 1) for child in self.children)
return f"{spacing}{self.token} ->\n{child_repr}"

def __repr__(self) -> str:
return self.represent()


def _add_new_type_recursive(
parent_tree: TypeTree,
remaining_tokens: list[str],
Expand Down Expand Up @@ -258,6 +176,7 @@ def __init__(
) -> None:
if type(schema).__name__ != "str":
raise IllegalArgumentException("Non str type of schema string")
self.schema = schema
self.cache_string = ""

if proto_file_element is not None:
Expand Down
Loading
Loading