Skip to content

Commit

Permalink
gh-107137: Add _PyTupleBuilder API to the internal C API
Browse files Browse the repository at this point in the history
Add _PyTupleBuilder structure and functions:

* _PyTupleBuilder_Init()
* _PyTupleBuilder_Alloc()
* _PyTupleBuilder_Append()
* _PyTupleBuilder_AppendUnsafe()
* _PyTupleBuilder_Finish()
* _PyTupleBuilder_Dealloc()

The builder tracks the size of the tuple and resize it in
_PyTupleBuilder_Finish() if needed. Don't allocate empty tuple.
Allocate an array of 16 objects on the stack to avoid allocating
small tuple. _PyTupleBuilder_Append() overallocates the tuple by 25%
to reduce the number of _PyTuple_Resize() calls.

Do no track the temporary internal tuple by the GC before
_PyTupleBuilder_Finish() creates the final complete and consistent
tuple object.

Use _PyTupleBuilder API in itertools batched_traverse(),
PySequence_Tuple() and initialize_structseq_dict().

Add also helper functions:

* _PyTuple_ResizeNoTrack()
* _PyTuple_NewNoTrack()
  • Loading branch information
vstinner committed Jul 23, 2023
1 parent 0ae4870 commit 38a4c0c
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 99 deletions.
140 changes: 138 additions & 2 deletions Include/internal/pycore_tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "tupleobject.h" /* _PyTuple_CAST() */

extern PyObject* _PyTuple_NewNoTrack(Py_ssize_t size);
extern int _PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize);

/* runtime lifecycle */

Expand Down Expand Up @@ -73,6 +73,142 @@ typedef struct {
PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */
} _PyTupleIterObject;


// --- _PyTupleBuilder API ---------------------------------------------------

typedef struct _PyTupleBuilder {
PyObject* small_tuple[16];
PyObject *tuple;
PyObject **items;
size_t size;
size_t allocated;
} _PyTupleBuilder;

static inline int
_PyTupleBuilder_Alloc(_PyTupleBuilder *builder, size_t size)
{
if (size > (size_t)PY_SSIZE_T_MAX) {
/* Check for overflow */
PyErr_NoMemory();
return -1;
}
if (size <= builder->allocated) {
return 0;
}

if (size <= Py_ARRAY_LENGTH(builder->small_tuple)) {
assert(builder->tuple == NULL);
builder->items = builder->small_tuple;
builder->allocated = Py_ARRAY_LENGTH(builder->small_tuple);
return 0;
}

assert(size >= 1);
if (builder->tuple != NULL) {
if (_PyTuple_ResizeNoTrack(&builder->tuple, (Py_ssize_t)size) < 0) {
return -1;
}
}
else {
builder->tuple = _PyTuple_NewNoTrack((Py_ssize_t)size);
if (builder->tuple == NULL) {
return -1;
}

if (builder->size > 0) {
memcpy(_PyTuple_ITEMS(builder->tuple),
builder->small_tuple,
builder->size * sizeof(builder->small_tuple[0]));
}
}
builder->items = _PyTuple_ITEMS(builder->tuple);
builder->allocated = size;
return 0;
}

static inline int
_PyTupleBuilder_Init(_PyTupleBuilder *builder, Py_ssize_t size)
{
memset(builder, 0, sizeof(*builder));

int res;
if (size > 0) {
res = _PyTupleBuilder_Alloc(builder, (size_t)size);
}
else {
res = 0;
}
return res;
}

// The tuple builder must have already enough allocated items to store item.
static inline void
_PyTupleBuilder_AppendUnsafe(_PyTupleBuilder *builder, PyObject *item)
{
assert(builder->items != NULL);
assert(builder->size < builder->allocated);
builder->items[builder->size] = item;
builder->size++;
}

static inline int
_PyTupleBuilder_Append(_PyTupleBuilder *builder, PyObject *item)
{
if (builder->size >= (size_t)PY_SSIZE_T_MAX) {
// prevent integer overflow
PyErr_NoMemory();
return -1;
}
if (builder->size >= builder->allocated) {
size_t allocated = builder->size;
allocated += (allocated >> 2); // Over-allocate by 25%
if (_PyTupleBuilder_Alloc(builder, allocated) < 0) {
return -1;
}
}
_PyTupleBuilder_AppendUnsafe(builder, item);
return 0;
}

static inline void
_PyTupleBuilder_Dealloc(_PyTupleBuilder *builder)
{
Py_CLEAR(builder->tuple);
builder->items = NULL;
builder->size = 0;
builder->allocated = 0;
}

static inline PyObject*
_PyTupleBuilder_Finish(_PyTupleBuilder *builder)
{
if (builder->size == 0) {
_PyTupleBuilder_Dealloc(builder);
// return the empty tuple singleton
return PyTuple_New(0);
}

if (builder->tuple != NULL) {
if (_PyTuple_ResizeNoTrack(&builder->tuple, (Py_ssize_t)builder->size) < 0) {
_PyTupleBuilder_Dealloc(builder);
return NULL;
}

PyObject *result = builder->tuple;
builder->tuple = NULL;
// Avoid _PyObject_GC_TRACK() to avoid including pycore_object.h
PyObject_GC_Track(result);
return result;
}
else {
PyObject *tuple = _PyTuple_FromArraySteal(builder->items,
(Py_ssize_t)builder->size);
builder->size = 0;
return tuple;
}
}


#ifdef __cplusplus
}
#endif
Expand Down
57 changes: 26 additions & 31 deletions Modules/itertoolsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "pycore_moduleobject.h" // _PyModule_GetState()
#include "pycore_typeobject.h" // _PyType_GetModuleState()
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_tuple.h" // _PyTupleBuilder
#include "structmember.h" // PyMemberDef
#include <stddef.h> // offsetof()

Expand Down Expand Up @@ -193,47 +193,42 @@ batched_traverse(batchedobject *bo, visitproc visit, void *arg)
static PyObject *
batched_next(batchedobject *bo)
{
Py_ssize_t i;
Py_ssize_t n = bo->batch_size;
PyObject *it = bo->it;
PyObject *item;
PyObject *result;

if (it == NULL) {
return NULL;
}
result = PyTuple_New(n);
if (result == NULL) {

_PyTupleBuilder builder;
Py_ssize_t n = bo->batch_size;
if (_PyTupleBuilder_Init(&builder, n) < 0) {
return NULL;
}

iternextfunc iternext = *Py_TYPE(it)->tp_iternext;
PyObject **items = _PyTuple_ITEMS(result);
for (i=0 ; i < n ; i++) {
item = iternext(it);
for (Py_ssize_t i=0 ; i < n; i++) {
PyObject *item = iternext(it);
if (item == NULL) {
goto null_item;
if (PyErr_Occurred()) {
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
/* Input raised an exception other than StopIteration */
goto error;
}
PyErr_Clear();
// StopIteration was raised
}
if (i == 0) {
goto error;
}
break;
}
items[i] = item;
_PyTupleBuilder_AppendUnsafe(&builder, item);
}
return result;
return _PyTupleBuilder_Finish(&builder);

null_item:
if (PyErr_Occurred()) {
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
/* Input raised an exception other than StopIteration */
Py_CLEAR(bo->it);
Py_DECREF(result);
return NULL;
}
PyErr_Clear();
}
if (i == 0) {
Py_CLEAR(bo->it);
Py_DECREF(result);
return NULL;
}
_PyTuple_Resize(&result, i);
return result;
error:
_PyTupleBuilder_Dealloc(&builder);
Py_CLEAR(bo->it);
return NULL;
}

static PyType_Slot batched_slots[] = {
Expand Down
70 changes: 26 additions & 44 deletions Objects/abstract.c
Original file line number Diff line number Diff line change
Expand Up @@ -2074,11 +2074,6 @@ PySequence_DelSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2)
PyObject *
PySequence_Tuple(PyObject *v)
{
PyObject *it; /* iter(v) */
Py_ssize_t n; /* guess for result tuple size */
PyObject *result = NULL;
Py_ssize_t j;

if (v == NULL) {
return null_error();
}
Expand All @@ -2091,66 +2086,53 @@ PySequence_Tuple(PyObject *v)
a copy, so there's no need for exactness below. */
return Py_NewRef(v);
}
if (PyList_CheckExact(v))
if (PyList_CheckExact(v)) {
return PyList_AsTuple(v);
}

/* Get iterator. */
it = PyObject_GetIter(v);
if (it == NULL)
_PyTupleBuilder builder;
if (_PyTupleBuilder_Init(&builder, 0) < 0) {
return NULL;
}

/* Get iterator. */
PyObject *it = PyObject_GetIter(v); // iter(v)
if (it == NULL) {
goto Fail;
}

/* Guess result size and allocate space. */
n = PyObject_LengthHint(v, 10);
if (n == -1)
Py_ssize_t n = PyObject_LengthHint(v, 10); // Guess for result tuple size
if (n == -1) {
goto Fail;
result = PyTuple_New(n);
if (result == NULL)
}
if (_PyTupleBuilder_Alloc(&builder, n) < 0) {
goto Fail;
}

/* Fill the tuple. */
Py_ssize_t j;
for (j = 0; ; ++j) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
if (PyErr_Occurred())
if (PyErr_Occurred()) {
goto Fail;
}
break;
}
if (j >= n) {
size_t newn = (size_t)n;
/* The over-allocation strategy can grow a bit faster
than for lists because unlike lists the
over-allocation isn't permanent -- we reclaim
the excess before the end of this routine.
So, grow by ten and then add 25%.
*/
newn += 10u;
newn += newn >> 2;
if (newn > PY_SSIZE_T_MAX) {
/* Check for overflow */
PyErr_NoMemory();
Py_DECREF(item);
goto Fail;
}
n = (Py_ssize_t)newn;
if (_PyTuple_Resize(&result, n) != 0) {
Py_DECREF(item);
goto Fail;
}

if (_PyTupleBuilder_Append(&builder, item) < 0) {
Py_DECREF(item);
goto Fail;
}
PyTuple_SET_ITEM(result, j, item);
}

/* Cut tuple back if guess was too large. */
if (j < n &&
_PyTuple_Resize(&result, j) != 0)
goto Fail;

Py_DECREF(it);
return result;
return _PyTupleBuilder_Finish(&builder);

Fail:
Py_XDECREF(result);
Py_DECREF(it);
_PyTupleBuilder_Dealloc(&builder);
Py_XDECREF(it);
return NULL;
}

Expand Down
Loading

0 comments on commit 38a4c0c

Please sign in to comment.