From 703c1e7fc6ce1c064c6ff460e3015fc01beb1f77 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 10 Jun 2024 19:18:30 +0900 Subject: [PATCH] gh-119506: fix _io.TextIOWrapper.write() write during flush (#119507) --- Lib/test/test_io.py | 22 +++++++ ...-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 1 + Modules/_io/textio.c | 66 ++++++++++++------- 3 files changed, 67 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 537c9fa7b9835b..1e444d2566e855 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3960,6 +3960,28 @@ def write(self, data): t.write("x"*chunk_size) self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack) + def test_issue119506(self): + chunk_size = 8192 + + class MockIO(self.MockRawIO): + written = False + def write(self, data): + if not self.written: + self.written = True + t.write("middle") + return super().write(data) + + buf = MockIO() + t = self.TextIOWrapper(buf) + t.write("abc") + t.write("def") + # writing data which size >= chunk_size cause flushing buffer before write. + t.write("g" * chunk_size) + t.flush() + + self.assertEqual([b"abcdef", b"middle", b"g"*chunk_size], + buf._write_stack) + class PyTextIOWrapperTest(TextIOWrapperTest): io = pyio diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst new file mode 100644 index 00000000000000..f9b764ae0c49b3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -0,0 +1 @@ +Fix :meth:`!io.TextIOWrapper.write` method breaks internal buffer when the method is called again during flushing internal buffer. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 3de4c06704b8b9..ba69e2afd27570 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1701,34 +1701,56 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) bytes_len = PyBytes_GET_SIZE(b); } - if (self->pending_bytes == NULL) { - self->pending_bytes_count = 0; - self->pending_bytes = b; - } - else if (self->pending_bytes_count + bytes_len > self->chunk_size) { - // Prevent to concatenate more than chunk_size data. - if (_textiowrapper_writeflush(self) < 0) { - Py_DECREF(b); - return NULL; + // We should avoid concatinating huge data. + // Flush the buffer before adding b to the buffer if b is not small. + // https://github.com/python/cpython/issues/87426 + if (bytes_len >= self->chunk_size) { + // _textiowrapper_writeflush() calls buffer.write(). + // self->pending_bytes can be appended during buffer->write() + // or other thread. + // We need to loop until buffer becomes empty. + // https://github.com/python/cpython/issues/118138 + // https://github.com/python/cpython/issues/119506 + while (self->pending_bytes != NULL) { + if (_textiowrapper_writeflush(self) < 0) { + Py_DECREF(b); + return NULL; + } } - self->pending_bytes = b; } - else if (!PyList_CheckExact(self->pending_bytes)) { - PyObject *list = PyList_New(2); - if (list == NULL) { - Py_DECREF(b); - return NULL; - } - PyList_SET_ITEM(list, 0, self->pending_bytes); - PyList_SET_ITEM(list, 1, b); - self->pending_bytes = list; + + if (self->pending_bytes == NULL) { + assert(self->pending_bytes_count == 0); + self->pending_bytes = b; } else { - if (PyList_Append(self->pending_bytes, b) < 0) { - Py_DECREF(b); - return NULL; + if (!PyList_CheckExact(self->pending_bytes)) { + PyObject *list = PyList_New(0); + if (list == NULL) { + Py_DECREF(b); + return NULL; + } + // PyList_New() may trigger GC and other thread may call write(). + // So, we need to check the self->pending_bytes is a list again. + if (PyList_CheckExact(self->pending_bytes)) { + // Releasing empty list won't trigger GC and/or __del__. + Py_DECREF(list); + } + else { + if (PyList_Append(list, self->pending_bytes) < 0) { + Py_DECREF(list); + Py_DECREF(b); + return NULL; + } + Py_SETREF(self->pending_bytes, list); + } } + + int ret = PyList_Append(self->pending_bytes, b); Py_DECREF(b); + if (ret < 0) { + return NULL; + } } self->pending_bytes_count += bytes_len;