From 8448545f14ff3e9be0b1cbea6a7804a454ef0cea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=BE=D1=80=D0=B5=D0=BD=D0=B1=D0=B5=D1=80=D0=B3=20?= =?UTF-8?q?=D0=9C=D0=B0=D1=80=D0=BA=20=28imac=29?= Date: Thu, 17 Dec 2015 15:51:16 +0500 Subject: [PATCH] Weboscket XOR performance improved. Fixes #686 Both cython and pure python-based. --- aiohttp/_websocket.pyx | 52 ++++++++++++++++++++++++++++++++---------- aiohttp/websocket.py | 14 +++++++++++- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/aiohttp/_websocket.pyx b/aiohttp/_websocket.pyx index 8e74ac134d2..a2d2c55f20c 100644 --- a/aiohttp/_websocket.pyx +++ b/aiohttp/_websocket.pyx @@ -1,20 +1,48 @@ -from cpython cimport PyBytes_FromStringAndSize, PyBytes_AsString -from cpython.ref cimport PyObject +from cpython cimport PyBytes_AsString +#from cpython cimport PyByteArray_AsString # cython still not exports that cdef extern from "Python.h": - char* PyByteArray_AsString(object bytearray) except NULL + char* PyByteArray_AsString(bytearray ba) except NULL +from libc.stdint cimport uint32_t, uint64_t, uintmax_t def _websocket_mask_cython(bytes mask, bytearray data): - cdef Py_ssize_t mask_len, data_len, i - cdef char * in_buf - cdef char * out_buf - cdef char * mask_buf - cdef bytes ret - mask_len = len(mask) + """Note, this function mutates it's `data` argument + """ + cdef: + Py_ssize_t data_len, i + # bit operations on signed integers are implementation-specific + unsigned char * in_buf + const unsigned char * mask_buf + uint32_t uint32_msk + uint64_t uint64_msk + + assert len(mask) == 4 + data_len = len(data) - in_buf = PyByteArray_AsString(data) - mask_buf = PyBytes_AsString(mask) + in_buf = PyByteArray_AsString(data) + mask_buf = PyBytes_AsString(mask) + uint32_msk = (mask_buf)[0] + + # TODO: align in_data ptr to achieve even faster speeds + # does it need in python ?! malloc() always aligns to sizeof(long) bytes + + if sizeof(uintmax_t) >= 8: + uint64_msk = uint32_msk + uint64_msk = (uint64_msk << 32) | uint32_msk + + while data_len >= 8: + (in_buf)[0] ^= uint64_msk + in_buf += 8 + data_len -= 8 + + + while data_len >= 4: + (in_buf)[0] ^= uint32_msk + in_buf += 4 + data_len -= 4 + for i in range(0, data_len): - in_buf[i] = in_buf[i] ^ mask_buf[i % 4] + in_buf[i] ^= mask_buf[i] + return data diff --git a/aiohttp/websocket.py b/aiohttp/websocket.py index 10056464e31..1853b6eac2c 100644 --- a/aiohttp/websocket.py +++ b/aiohttp/websocket.py @@ -6,6 +6,7 @@ import hashlib import os import random +import sys from struct import Struct from aiohttp import errors, hdrs @@ -172,6 +173,9 @@ def WebSocketParser(out, buf): Message(OPCODE_BINARY, data, ''), len(data)) +native_byteorder = sys.byteorder + + def _websocket_mask_python(mask, data): """Websocket masking function. @@ -184,7 +188,15 @@ def _websocket_mask_python(mask, data): version when available. """ - return bytes(b ^ mask[i % 4] for i, b in enumerate(data)) + assert len(mask) == 4 + datalen = len(data) + if datalen == 0: + # everything work without this, but may be changed later in Python. + return b'' + data = int.from_bytes(data, native_byteorder) + mask = int.from_bytes(mask * (datalen // 4) + mask[: datalen % 4], + native_byteorder) + return (data ^ mask).to_bytes(datalen, native_byteorder) if bool(os.environ.get('AIOHTTP_NO_EXTENSIONS')):