Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(python): Improve tuple and list serializer performance #1933

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion integration_tests/cpython_benchmark/fury_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,11 @@
],
60,
)
LARGE_TUPLE = tuple(range(2**20 + 1))


LIST = [[list(range(10)), list(range(10))] for _ in range(10)]
LARGE_LIST = [i for i in range(2**20 + 1)]


def mutate_dict(orig_dict, random_source):
Expand Down Expand Up @@ -169,7 +171,7 @@ def benchmark_args():
def micro_benchmark():
args = benchmark_args()
runner = pyperf.Runner()
if args.disable_cython:
if args and args.disable_cython:
os.environ["ENABLE_FURY_CYTHON_SERIALIZATION"] = "0"
sys.argv += ["--inherit-environ", "ENABLE_FURY_CYTHON_SERIALIZATION"]
runner.parse_args()
Expand All @@ -179,7 +181,13 @@ def micro_benchmark():
"fury_dict_group", fury_object, language, not args.no_ref, DICT_GROUP
)
runner.bench_func("fury_tuple", fury_object, language, not args.no_ref, TUPLE)
runner.bench_func(
"fury_large_tuple", fury_object, language, not args.no_ref, LARGE_TUPLE
)
runner.bench_func("fury_list", fury_object, language, not args.no_ref, LIST)
runner.bench_func(
"fury_large_list", fury_object, language, not args.no_ref, LARGE_LIST
)
runner.bench_func(
"fury_complex", fury_object, language, not args.no_ref, COMPLEX_OBJECT
)
Expand Down
86 changes: 48 additions & 38 deletions python/pyfury/_serialization.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ from libc.stdint cimport *
from libcpp.vector cimport vector
from cpython cimport PyObject
from cpython.ref cimport *
from cpython.list cimport PyList_New, PyList_SET_ITEM
from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM
from libcpp cimport bool as c_bool
from libcpp.utility cimport pair
from cython.operator cimport dereference as deref
Expand Down Expand Up @@ -1688,72 +1690,80 @@ cdef class ListSerializer(CollectionSerializer):
cpdef read(self, Buffer buffer):
cdef MapRefResolver ref_resolver = self.fury.ref_resolver
cdef ClassResolver class_resolver = self.fury.class_resolver
cdef list list_ = []
cdef int32_t len_ = buffer.read_varint32()
cdef list list_ = PyList_New(len_)
ref_resolver.reference(list_)
populate_list(buffer, list_, ref_resolver, class_resolver)
for i in range(len_):
elem = get_next_elenment(buffer, ref_resolver, class_resolver)
Py_INCREF(elem)
PyList_SET_ITEM(list_, i, elem)
return list_

cpdef xread(self, Buffer buffer):
cdef int32_t len_ = buffer.read_varint32()
cdef list collection_ = []
cdef list collection_ = PyList_New(len_)
self.fury.ref_resolver.reference(collection_)
for i in range(len_):
collection_.append(self.fury.xdeserialize_ref(
elem = self.fury.xdeserialize_ref(
buffer, serializer=self.elem_serializer
))
)
Py_INCREF(elem)
PyList_SET_ITEM(collection_, i, elem)
return collection_


cdef populate_list(
cdef inline get_next_elenment(
Buffer buffer,
list list_,
MapRefResolver ref_resolver,
ClassResolver class_resolver):
cdef int32_t ref_id
cdef ClassInfo classinfo
cdef int32_t len_ = buffer.read_varint32()
for i in range(len_):
ref_id = ref_resolver.try_preserve_ref_id(buffer)
if ref_id < NOT_NULL_VALUE_FLAG:
list_.append(ref_resolver.get_read_object())
continue
# indicates that the object is first read.
classinfo = class_resolver.read_classinfo(buffer)
cls = classinfo.cls
# Note that all read operations in fast paths of list/tuple/set/dict/sub_dict
# ust match corresponding writing operations. Otherwise, ref tracking will
# error.
if cls is str:
list_.append(buffer.read_string())
elif cls is int:
list_.append(buffer.read_varint64())
elif cls is bool:
list_.append(buffer.read_bool())
elif cls is float:
list_.append(buffer.read_double())
else:
o = classinfo.serializer.read(buffer)
ref_resolver.set_read_object(ref_id, o)
list_.append(o)
ref_id = ref_resolver.try_preserve_ref_id(buffer)
if ref_id < NOT_NULL_VALUE_FLAG:
return ref_resolver.get_read_object()
# indicates that the object is first read.
classinfo = class_resolver.read_classinfo(buffer)
cls = classinfo.cls
# Note that all read operations in fast paths of list/tuple/set/dict/sub_dict
# ust match corresponding writing operations. Otherwise, ref tracking will
# error.
if cls is str:
return buffer.read_string()
elif cls is int:
return buffer.read_varint64()
elif cls is bool:
return buffer.read_bool()
elif cls is float:
return buffer.read_double()
else:
o = classinfo.serializer.read(buffer)
ref_resolver.set_read_object(ref_id, o)
return o


@cython.final
cdef class TupleSerializer(CollectionSerializer):
cpdef inline read(self, Buffer buffer):
cdef MapRefResolver ref_resolver = self.fury.ref_resolver
cdef ClassResolver class_resolver = self.fury.class_resolver
cdef list list_ = []
populate_list(buffer, list_, ref_resolver, class_resolver)
return tuple(list_)
cdef int32_t len_ = buffer.read_varint32()
cdef tuple tuple_ = PyTuple_New(len_)
for i in range(len_):
elem = get_next_elenment(buffer, ref_resolver, class_resolver)
Py_INCREF(elem)
PyTuple_SET_ITEM(tuple_, i, elem)
return tuple_

cpdef inline xread(self, Buffer buffer):
cdef int32_t len_ = buffer.read_varint32()
cdef list collection_ = []
cdef tuple tuple_ = PyTuple_New(len_)
for i in range(len_):
collection_.append(self.fury.xdeserialize_ref(
elem = self.fury.xdeserialize_ref(
buffer, serializer=self.elem_serializer
))
return tuple(collection_)
)
Py_INCREF(elem)
PyTuple_SET_ITEM(tuple_, i, elem)
return tuple_


@cython.final
Expand Down
Loading