diff --git a/bench/unmarshall_manufacturerdata.py b/bench/unmarshall_manufacturerdata.py new file mode 100644 index 00000000..89cb6156 --- /dev/null +++ b/bench/unmarshall_manufacturerdata.py @@ -0,0 +1,30 @@ +import io +import timeit + +from dbus_fast._private.unmarshaller import Unmarshaller + +# cythonize -X language_level=3 -a -i src/dbus_fast/_private/unmarshaller.py + + +bluez_mfr_data_message = ( + b"l\4\1\1x\0\0\0\232\312\n\0\225\0\0\0\1\1o\0%\0\0\0/org/bluez/hci0/dev_D0_C2_4E_08_AB_57\0\0\0\2\1s" + b"\0\37\0\0\0org.freedesktop.DBus.Properties\0\3\1s\0\21\0\0\0PropertiesChanged\0\0\0\0\0\0\0\10\1g\0" + b"\10sa{sv}as\0\0\0\7\1s\0\4\0\0\0:1.4\0\0\0\0\21\0\0\0org.bluez.Device1\0\0\0T\0\0\0\0\0\0\0\4\0\0\0" + b"RSSI\0\1n\0\252\377\0\0\20\0\0\0ManufacturerData\0\5a{qv}\0$\0\0\0u\0\2ay\0\0\0\30\0\0\0B\4\1\1p\320" + b"\302N\10\253W\322\302N\10\253V\1\0\0\0\0\0\0\0\0\0\0l\4\1\0014\0\0\0\233\312\n\0\225\0\0\0\1\1o\0%\0" +) + +stream = io.BytesIO(bluez_mfr_data_message) + +unmarshaller = Unmarshaller(stream) + + +def unmarshall_mfr_data_message(): + stream.seek(0) + unmarshaller.reset() + unmarshaller.unmarshall() + + +count = 3000000 +time = timeit.Timer(unmarshall_mfr_data_message).timeit(count) +print(f"Unmarshalling {count} bluetooth ManufacturerData messages took {time} seconds") diff --git a/src/dbus_fast/_private/unmarshaller.pxd b/src/dbus_fast/_private/unmarshaller.pxd index 5a1d0688..ad7f6807 100644 --- a/src/dbus_fast/_private/unmarshaller.pxd +++ b/src/dbus_fast/_private/unmarshaller.pxd @@ -7,23 +7,33 @@ from ..signature import SignatureType cdef unsigned int UINT32_SIZE cdef unsigned int INT16_SIZE +cdef unsigned int UINT16_SIZE + cdef unsigned int HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION cdef unsigned int HEADER_SIGNATURE_SIZE cdef unsigned int LITTLE_ENDIAN cdef unsigned int BIG_ENDIAN cdef unsigned int PROTOCOL_VERSION + cdef str UINT32_CAST cdef str INT16_CAST +cdef str UINT16_CAST + cdef bint SYS_IS_LITTLE_ENDIAN cdef bint SYS_IS_BIG_ENDIAN cdef object UNPACK_HEADER_LITTLE_ENDIAN cdef object UNPACK_HEADER_BIG_ENDIAN + cdef object UINT32_UNPACK_LITTLE_ENDIAN cdef object UINT32_UNPACK_BIG_ENDIAN + cdef object INT16_UNPACK_LITTLE_ENDIAN cdef object INT16_UNPACK_BIG_ENDIAN +cdef object UINT16_UNPACK_LITTLE_ENDIAN +cdef object UINT16_UNPACK_BIG_ENDIAN + cdef object Variant cdef object Message cdef object MESSAGE_TYPE_MAP @@ -31,11 +41,15 @@ cdef object MESSAGE_FLAG_MAP cdef object HEADER_MESSAGE_ARG_NAME cdef object SIGNATURE_TREE_EMPTY -cdef object SIGNATURE_TREE_SA_SV_AS cdef object SIGNATURE_TREE_N cdef object SIGNATURE_TREE_S +cdef object SIGNATURE_TREE_SA_SV_AS cdef object SIGNATURE_TREE_SA_SV_AS_TYPES_1 cdef object SIGNATURE_TREE_SA_SV_AS_TYPES_2 +cdef object SIGNATURE_TREE_AY +cdef object SIGNATURE_TREE_AY_TYPES_0 +cdef object SIGNATURE_TREE_A_QV +cdef object SIGNATURE_TREE_A_QV_TYPES_0 cpdef get_signature_tree @@ -48,6 +62,11 @@ cdef inline short _cast_int16_native(const char * payload, unsigned int offset) cdef short *s16p = &payload[offset] return s16p[0] +cdef inline unsigned short _cast_uint16_native(const char * payload, unsigned int offset): + cdef unsigned short *u16p = &payload[offset] + return u16p[0] + + cdef class MarshallerStreamEndError(Exception): pass @@ -69,6 +88,7 @@ cdef class Unmarshaller: cdef unsigned int _is_native cdef object _uint32_unpack cdef object _int16_unpack + cdef object _uint16_unpack cpdef reset(self) @@ -89,6 +109,10 @@ cdef class Unmarshaller: cdef int _read_int16_unpack(self) + cpdef read_uint16_unpack(self, object type_) + + cdef unsigned int _read_uint16_unpack(self) + cpdef read_string_unpack(self, object type_) @cython.locals( diff --git a/src/dbus_fast/_private/unmarshaller.py b/src/dbus_fast/_private/unmarshaller.py index a3467cb5..b03dfd51 100644 --- a/src/dbus_fast/_private/unmarshaller.py +++ b/src/dbus_fast/_private/unmarshaller.py @@ -23,13 +23,17 @@ INT16_SIZE = 2 INT16_DBUS_TYPE = "n" +UINT16_CAST = "H" +UINT16_SIZE = 2 +UINT16_DBUS_TYPE = "q" + SYS_IS_LITTLE_ENDIAN = sys.byteorder == "little" SYS_IS_BIG_ENDIAN = sys.byteorder == "big" DBUS_TO_CTYPE = { "y": ("B", 1), # byte INT16_DBUS_TYPE: (INT16_CAST, INT16_SIZE), # int16 - "q": ("H", 2), # uint16 + UINT16_DBUS_TYPE: (UINT16_CAST, UINT16_SIZE), # uint16 "i": ("i", 4), # int32 UINT32_DBUS_TYPE: (UINT32_CAST, UINT32_SIZE), # uint32 "x": ("q", 8), # int64 @@ -39,12 +43,16 @@ } UNPACK_HEADER_LITTLE_ENDIAN = Struct("III").unpack_from -UINT32_UNPACK_BIG_ENDIAN = Struct(">I").unpack_from -INT16_UNPACK_BIG_ENDIAN = Struct(">h").unpack_from + +UINT32_UNPACK_LITTLE_ENDIAN = Struct(f"<{UINT32_CAST}").unpack_from +UINT32_UNPACK_BIG_ENDIAN = Struct(f">{UINT32_CAST}").unpack_from + +INT16_UNPACK_LITTLE_ENDIAN = Struct(f"<{INT16_CAST}").unpack_from +INT16_UNPACK_BIG_ENDIAN = Struct(f">{INT16_CAST}").unpack_from + +UINT16_UNPACK_LITTLE_ENDIAN = Struct(f"<{UINT16_CAST}").unpack_from +UINT16_UNPACK_BIG_ENDIAN = Struct(f">{UINT16_CAST}").unpack_from HEADER_SIGNATURE_SIZE = 16 HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION = 12 @@ -53,6 +61,12 @@ SIGNATURE_TREE_EMPTY = get_signature_tree("") SIGNATURE_TREE_N = get_signature_tree("n") SIGNATURE_TREE_S = get_signature_tree("s") + +SIGNATURE_TREE_AY = get_signature_tree("ay") +SIGNATURE_TREE_AY_TYPES_0 = SIGNATURE_TREE_AY.types[0] +SIGNATURE_TREE_A_QV = get_signature_tree("a{qv}") +SIGNATURE_TREE_A_QV_TYPES_0 = SIGNATURE_TREE_A_QV.types[0] + SIGNATURE_TREE_SA_SV_AS = get_signature_tree("sa{sv}as") SIGNATURE_TREE_SA_SV_AS_TYPES_1 = SIGNATURE_TREE_SA_SV_AS.types[1] SIGNATURE_TREE_SA_SV_AS_TYPES_2 = SIGNATURE_TREE_SA_SV_AS.types[2] @@ -148,6 +162,7 @@ class Unmarshaller: "_msg_len", "_uint32_unpack", "_int16_unpack", + "_uint16_unpack", "_is_native", ) @@ -168,6 +183,7 @@ def __init__(self, stream: io.BufferedRWPair, sock: Optional[socket.socket] = No self._is_native = 0 self._uint32_unpack: Callable | None = None self._int16_unpack: Callable | None = None + self._uint16_unpack: Callable | None = None def reset(self) -> None: """Reset the unmarshaller to its initial state. @@ -185,8 +201,8 @@ def reset(self) -> None: self._flag = 0 self._msg_len = 0 self._is_native = 0 - self._uint32_unpack = None - self._int16_unpack = None + # No need to reset the unpack functions, they are set in _read_header + # every time a new message is processed. @property def message(self) -> Message: @@ -253,6 +269,17 @@ def _read_uint32_unpack(self) -> int: ) return self._uint32_unpack(self._buf, self._pos - UINT32_SIZE)[0] + def read_uint16_unpack(self, type_: SignatureType) -> int: + return self._read_uint16_unpack() + + def _read_uint16_unpack(self) -> int: + self._pos += UINT16_SIZE + (-self._pos & (UINT16_SIZE - 1)) # align + if self._is_native and cython.compiled: + return _cast_uint16_native( # pragma: no cover + self._buf, self._pos - UINT16_SIZE + ) + return self._uint16_unpack(self._buf, self._pos - UINT16_SIZE)[0] + def read_int16_unpack(self, type_: SignatureType) -> int: return self._read_int16_unpack() @@ -301,6 +328,16 @@ def _read_variant(self) -> Variant: # verify in Variant is only useful on construction not unmarshalling if signature == "n": return Variant(SIGNATURE_TREE_N, self._read_int16_unpack(), False) + elif signature == "ay": + return Variant( + SIGNATURE_TREE_AY, self._read_array(SIGNATURE_TREE_AY_TYPES_0), False + ) + elif signature == "a{qv}": + return Variant( + SIGNATURE_TREE_A_QV, + self._read_array(SIGNATURE_TREE_A_QV_TYPES_0), + False, + ) tree = get_signature_tree(signature) signature_type = tree.types[0] return Variant( @@ -355,15 +392,20 @@ def _read_array(self, type_: SignatureType) -> Iterable[Any]: child_1 = child_type.children[1] child_0_token = child_0.token child_1_token = child_1.token - # Strings with variant values are the most common case # so we optimize for that by inlining the string reading # and the variant reading here - if child_0_token in "os" and child_1_token == "v": - while self._pos - beginning_pos < array_length: - self._pos += -self._pos & 7 # align 8 - key = self._read_string_unpack() - result_dict[key] = self._read_variant() + if child_1_token == "v": + if child_0_token in "os": + while self._pos - beginning_pos < array_length: + self._pos += -self._pos & 7 # align 8 + key = self._read_string_unpack() + result_dict[key] = self._read_variant() + elif child_0_token == "q": + while self._pos - beginning_pos < array_length: + self._pos += -self._pos & 7 # align 8 + key = self._read_uint16_unpack() + result_dict[key] = self._read_variant() else: reader_1 = self._readers[child_1_token] reader_0 = self._readers[child_0_token] @@ -447,12 +489,14 @@ def _read_header(self) -> None: ) = UNPACK_HEADER_LITTLE_ENDIAN(self._buf, 4) self._uint32_unpack = UINT32_UNPACK_LITTLE_ENDIAN self._int16_unpack = INT16_UNPACK_LITTLE_ENDIAN + self._uint16_unpack = UINT16_UNPACK_LITTLE_ENDIAN elif endian == BIG_ENDIAN: self._body_len, self._serial, self._header_len = UNPACK_HEADER_BIG_ENDIAN( self._buf, 4 ) self._uint32_unpack = UINT32_UNPACK_BIG_ENDIAN self._int16_unpack = INT16_UNPACK_BIG_ENDIAN + self._uint16_unpack = UINT16_UNPACK_BIG_ENDIAN else: raise InvalidMessageError( f"Expecting endianness as the first byte, got {endian} from {buffer}" @@ -530,6 +574,7 @@ def unmarshall(self) -> Optional[Message]: "h": read_uint32_unpack, UINT32_DBUS_TYPE: read_uint32_unpack, INT16_DBUS_TYPE: read_int16_unpack, + UINT16_DBUS_TYPE: read_uint16_unpack, } _ctype_by_endian: Dict[int, Dict[str, READER_TYPE]] = { diff --git a/tests/test_marshaller.py b/tests/test_marshaller.py index a87d97ed..abce89c2 100644 --- a/tests/test_marshaller.py +++ b/tests/test_marshaller.py @@ -9,6 +9,7 @@ from dbus_fast import Message, MessageFlag, MessageType, SignatureTree, Variant from dbus_fast._private._cython_compat import FakeCython from dbus_fast._private.unmarshaller import Unmarshaller +from dbus_fast.unpack import unpack_variants def print_buf(buf): @@ -166,6 +167,67 @@ def read(self, n) -> bytes: assert unmarshaller.message is not None +def test_unmarshall_bluez_message(): + bluez_mfr_message = ( + "6c040101780000009aca0a009500000001016f00250000002f6f72672f626c75657a2f686369302f646576" + "5f44305f43325f34455f30385f41425f3537000000020173001f0000006f72672e667265656465736b746f" + "702e444275732e50726f7065727469657300030173001100000050726f706572746965734368616e676564" + "00000000000000080167000873617b73767d617300000007017300040000003a312e340000000011000000" + "6f72672e626c75657a2e446576696365310000005400000000000000040000005253534900016e00aaff00" + "00100000004d616e756661637475726572446174610005617b71767d002400000075000261790000001800" + "00004204010170d0c24e08ab57d2c24e08ab5601000000000000000000006c040101340000009bca0a0095" + "00000001016f002500" + ) + message_bytes = bytes.fromhex(bluez_mfr_message) + stream = io.BytesIO(message_bytes) + unmarshaller = Unmarshaller(stream) + assert unmarshaller.unmarshall() + message = unmarshaller.message + assert message is not None + assert message.body == [ + "org.bluez.Device1", + { + "ManufacturerData": Variant( + "a{qv}", + { + 117: Variant( + "ay", + bytearray( + b"B\x04\x01\x01p\xd0\xc2N\x08\xabW\xd2\xc2N\x08\xabV\x01\x00\x00\x00\x00\x00\x00" + ), + ) + }, + ), + "RSSI": Variant("n", -86), + }, + [], + ] + assert message.sender == ":1.4" + assert message.path == "/org/bluez/hci0/dev_D0_C2_4E_08_AB_57" + assert message.interface == "org.freedesktop.DBus.Properties" + assert message.member == "PropertiesChanged" + assert message.signature == "sa{sv}as" + assert message.message_type == MessageType.SIGNAL + assert message.flags == MessageFlag.NO_REPLY_EXPECTED + assert message.serial == 707226 + assert message.destination is None + unpacked = unpack_variants(message.body) + assert unpacked == [ + "org.bluez.Device1", + { + "ManufacturerData": { + 117: bytearray( + b"B\x04\x01\x01p\xd0\xc2N\x08\xabW\xd2" + b"\xc2N\x08\xabV\x01\x00\x00" + b"\x00\x00\x00\x00" + ) + }, + "RSSI": -86, + }, + [], + ] + + def test_ay_buffer(): body = [bytes(10000)] msg = Message(path="/test", member="test", signature="ay", body=body)