From 7ea4e24287a1e57836081b23f49e65e95eabc4da Mon Sep 17 00:00:00 2001 From: jakirkham Date: Mon, 24 Aug 2020 18:31:34 -0700 Subject: [PATCH] Simplify `frame_split_size` (#4067) * Drop 2nd check of `frame` in `frame_split_size` As we already check `nbytes` and `return`ed if it was `<= n`, we already know `nbytes(frame) > n`. So there is no need to check it again. Thus we get rid of this second check. * Store `nbytes(frame)` in `frame_split_size` To avoid calling `nbytes(frame)` repeatedly, just store the result as a variable and access that. * Assign other computed values in `frame_split_size` Should also make it a bit clearer what is going on in this code. * Run `black` on `distributed/protocol/utils.py` * Always use `memoryview` in `frame_split_size` As slicing a `memoryview` is free (it just views the underlying data) and it is able to easily access `itemsize`, just always use `memoryview`. This keeps `frame_split_size` performing in a reasonable way. Also simplifies the code a bit. * Get `memoryview` of `frame` to start * Skip assigning `itemsize` This is fast and easy to access from `memoryview` and we only use it as part of computing the next two variables. So just skip assigning it and use this `memoryview` property directly. --- distributed/protocol/utils.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/distributed/protocol/utils.py b/distributed/protocol/utils.py index 66c9687bae..d5694a1b3f 100644 --- a/distributed/protocol/utils.py +++ b/distributed/protocol/utils.py @@ -23,21 +23,15 @@ def frame_split_size(frame, n=BIG_BYTES_SHARD_SIZE) -> list: >>> frame_split_size([b'12345', b'678'], n=3) # doctest: +SKIP [b'123', b'45', b'678'] """ - if nbytes(frame) <= n: + frame = memoryview(frame) + + if frame.nbytes <= n: return [frame] - if nbytes(frame) > n: - if isinstance(frame, (bytes, bytearray)): - frame = memoryview(frame) - try: - itemsize = frame.itemsize - except AttributeError: - itemsize = 1 + nitems = frame.nbytes // frame.itemsize + items_per_shard = n // frame.itemsize - return [ - frame[i : i + n // itemsize] - for i in range(0, nbytes(frame) // itemsize, n // itemsize) - ] + return [frame[i : i + items_per_shard] for i in range(0, nitems, items_per_shard)] def merge_frames(header, frames):