Skip to content

Commit

Permalink
segfault fix, perf improvement
Browse files Browse the repository at this point in the history
Fixed segfault in byteswap with incorect usage
Improved byteswap performance with string formats
Compile with O3 when available
Fixed NDEBUG define
  • Loading branch information
qchateau committed Oct 7, 2019
1 parent 06d1bb9 commit f1dd326
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 47 deletions.
38 changes: 19 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,25 @@ The script available in `tests/test_perf.py` measures performance comparing to t

Here are the result "on my machine" (Ubuntu in Virtualbox on a laptop):
```
byteswap list of int | x 8.204 ( 9.208us -> 1.122us)
byteswap str | x 6.433 ( 9.689us -> 1.506us)
calcsize | x149.423 ( 61.967us -> 0.415us)
compiled pack | x 43.227 ( 34.758us -> 0.804us)
compiled pack_dict | x 26.490 ( 34.951us -> 1.319us)
compiled pack_into | x 32.017 ( 39.522us -> 1.234us)
compiled pack_into_dict | x 26.817 ( 38.984us -> 1.454us)
compiled unpack | x 34.454 ( 31.814us -> 0.923us)
compiled unpack_dict | x 23.534 ( 34.071us -> 1.448us)
compiled unpack_from | x 27.170 ( 31.884us -> 1.174us)
compiled unpack_from_dict | x 22.600 ( 33.927us -> 1.501us)
pack | x 78.314 ( 105.593us -> 1.348us)
pack_dict | x 52.916 ( 106.748us -> 2.017us)
pack_into | x 82.233 ( 119.950us -> 1.459us)
pack_into_dict | x 45.214 ( 111.338us -> 2.462us)
unpack | x 82.712 ( 93.686us -> 1.133us)
unpack_dict | x 41.064 ( 91.473us -> 2.228us)
unpack_from | x 81.678 ( 95.729us -> 1.172us)
unpack_from_dict | x 40.379 ( 90.430us -> 2.240us)
byteswap list of int | x 8.779 ( 8.638us -> 0.984us)
byteswap str | x 17.466 ( 9.158us -> 0.524us)
calcsize | x139.330 ( 61.060us -> 0.438us)
compiled pack | x 47.389 ( 35.968us -> 0.759us)
compiled pack_dict | x 27.184 ( 34.588us -> 1.272us)
compiled pack_into | x 32.037 ( 38.650us -> 1.206us)
compiled pack_into_dict | x 27.343 ( 37.718us -> 1.379us)
compiled unpack | x 33.928 ( 31.278us -> 0.922us)
compiled unpack_dict | x 21.627 ( 31.597us -> 1.461us)
compiled unpack_from | x 30.622 ( 29.977us -> 0.979us)
compiled unpack_from_dict | x 20.479 ( 30.936us -> 1.511us)
pack | x 77.003 ( 103.030us -> 1.338us)
pack_dict | x 53.254 ( 103.255us -> 1.939us)
pack_into | x 82.829 ( 119.373us -> 1.441us)
pack_into_dict | x 52.173 ( 108.135us -> 2.073us)
unpack | x 78.459 ( 91.896us -> 1.171us)
unpack_dict | x 40.287 ( 89.300us -> 2.217us)
unpack_from | x 77.027 ( 91.202us -> 1.184us)
unpack_from_dict | x 39.467 ( 88.043us -> 2.231us)
```

*Disclaimer:* these results may and will vary largely depending on the number of elements and types you pack/unpack. This script is provided as-is, and I will gladly accept an improved script providing more reliable results.
Expand Down
56 changes: 30 additions & 26 deletions cbitstruct/_cbitstruct.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ static void c_pack(
int nbytes = (desc->bits + 7) / 8;
int padding = nbytes * 8 - desc->bits;
#if PY_LITTLE_ENDIAN
assert(nbytes <= sizeof(data));
assert(nbytes <= (int)sizeof(data));
c_byteswitch((uint8_t*)&data, nbytes);
#endif
data >>= padding;
Expand Down Expand Up @@ -410,7 +410,7 @@ static void c_unpack(
int padding = nbytes * 8 - desc->bits;
data <<= padding;
#if PY_LITTLE_ENDIAN
assert(nbytes <= sizeof(data));
assert(nbytes <= (int)sizeof(data));
c_byteswitch((uint8_t*)&data, nbytes);
#endif
}
Expand Down Expand Up @@ -456,7 +456,7 @@ static bool python_to_parsed_elements(
Py_ssize_t data_size,
CompiledFormat fmt)
{
assert(data_size >= fmt.ndescs);
assert(data_size >= fmt.ndescs - fmt.npadding);

int n = 0;
for (int i = 0; i < fmt.ndescs; ++i) {
Expand Down Expand Up @@ -676,8 +676,6 @@ static PyObject* CompiledFormat_pack_raw(
PyObject** data,
Py_ssize_t n_data)
{
assert(PyTuple_Check(args));

ParsedElement elements_stack[SMALL_FORMAT_OPTIMIZATION];
ParsedElement* elements = elements_stack;
bool use_stack = compiled_fmt.ndescs <= SMALL_FORMAT_OPTIMIZATION;
Expand Down Expand Up @@ -1170,8 +1168,6 @@ CompiledFormatDict_pack_into_impl(PyCompiledFormatDictObject *self,
/*[clinic end generated code: output=ee246de261e9c699 input=290a9a4a3e3ed942]*/
// clang-format on
{
assert(PyTuple_Check(args));

PyObject* return_value = NULL;

Py_ssize_t nnames = PySequence_Fast_GET_SIZE(self->names);
Expand Down Expand Up @@ -1461,8 +1457,6 @@ pack_into_dict_impl(PyObject *module, const char *fmt, PyObject *names,
/*[clinic end generated code: output=619b415fc187011b input=e72dec46484ec66f]*/
// clang-format on
{
assert(PyTuple_Check(args));

PyObject* return_value = NULL;
PyCompiledFormatDictObject self;
memset(&self, 0, sizeof(self));
Expand Down Expand Up @@ -1692,28 +1686,38 @@ byteswap_impl(PyObject *module, PyObject *fmt, Py_buffer *data,
goto exit;
}

int sum = 0;
for (int i = 0; i < length; ++i) {
PyObject* item = PySequence_GetItem(fmt, i);
if (!item) {
long sum = 0;
if (PyUnicode_Check(fmt)) {
const char* cfmt = PyUnicode_AsUTF8(fmt);
if (!cfmt) {
goto exit;
}

long len = -1;
if (PyUnicode_Check(item)) {
PyObject* pylong = PyLong_FromUnicodeObject(item, 10);
len = PyLong_AsLong(pylong);
Py_DECREF(pylong);
}
else {
len = PyLong_AsLong(item);
for (int i = 0; i < length; ++i) {
int len = cfmt[i] - '0';
if (len < 0 || len > 9) {
PyErr_SetString(
PyExc_ValueError, "bad value in byteswap format");
goto exit;
}
sum += len;
count_iter[i] = len;
}
}
else {
for (int i = 0; i < length; ++i) {
PyObject* item = PySequence_GetItem(fmt, i);
if (!item) {
goto exit;
}

sum += len;
count_iter[i] = len;
Py_DECREF(item);
if (len < 0 || PyErr_Occurred()) {
goto exit;
long len = PyLong_AsLong(item);
sum += len;
count_iter[i] = len;
Py_DECREF(item);
if (len == -1 && PyErr_Occurred()) {
goto exit;
}
}
}

Expand Down
1 change: 1 addition & 0 deletions cbitstruct/tests/test_cornercase.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def test_byteswap_bad_args(self):
self.assertRaises(TypeError, cbitstruct.byteswap, None, b"\xff")
self.assertRaises(TypeError, cbitstruct.byteswap, "23")
self.assertRaises(TypeError, cbitstruct.byteswap)
self.assertRaises(ValueError, cbitstruct.byteswap, "\x02\x02", b"z")

def test_calcsize_bad_args(self):
self.assertRaises(TypeError, cbitstruct.calcsize, "g32")
Expand Down
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@

extra_compile_args = []
extra_link_args = []
undef_macros = []


if sys.platform == "win32":
extra_compile_args += []
else:
extra_compile_args += ["-std=c11", "-Wall", "-Werror"]
extra_compile_args += ["-std=c11", "-Wall", "-Werror", "-O3"]


if os.environ.get("COVERAGE"):
extra_compile_args += ["-g", "-O0", "-fprofile-arcs", "-ftest-coverage"]
extra_link_args += ["-fprofile-arcs"]
undef_macros += ["NDEBUG"]


with open("README.md", "r") as fh:
Expand All @@ -25,7 +27,7 @@

setup(
name="cbitstruct",
version="1.0.2",
version="1.0.3",
author="Quentin CHATEAU",
author_email="[email protected]",
license="GPLv3",
Expand Down Expand Up @@ -57,6 +59,7 @@
extra_link_args=extra_link_args,
sources=["cbitstruct/_cbitstruct.c"],
include_dirs=["cbitstruct/"],
undef_macros=undef_macros,
)
],
packages=["cbitstruct", "cbitstruct.tests"],
Expand Down

0 comments on commit f1dd326

Please sign in to comment.