From d4bfaa8861693c961d9c5c581aae470e2185f1f2 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 13:28:50 -0400 Subject: [PATCH 1/8] gh-85393: Handle partial .write in TextIOWrapper Also improves non-blocking support (gh-57531) and unbuffered support (gh-61606) --- ...5-05-21-13-28-43.gh-issue-85383.NCmYBp.rst | 3 + Modules/_io/textio.c | 202 ++++++++++++++++-- 2 files changed, 192 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst b/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst new file mode 100644 index 00000000000000..363648b935ce7b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst @@ -0,0 +1,3 @@ +Improve behavior of :meth:`TextIOWrapper.write` when there is a partial write +caused by underlying buffer being :class:`RawIOBase` (:gh:`91606` :gh:`85393`) +or if there is a :exc:`BlockingIOError` :gh:`97531` diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 86328e46a7b131..2da80493ce57d0 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1573,17 +1573,73 @@ _io_TextIOWrapper_detach_impl(textio *self) return buffer; } +/* Prepend bytes to pending_bytes. + + At the front because other bytes were added via TextIOWrapper.write + during buffer.write (see: gh-119506, gh-118138). */ +static int +_textiowrapper_prepend(textio *self, PyObject *unwritten) +{ + assert(PyBytes_CheckExact(unwritten)); + + /* Ensure pending_bytes and pending_bytes_count are protected. */ + _Py_AssertHoldsTstate(); + + if (self->pending_bytes == NULL) { + self->pending_bytes = unwritten; + assert(self->pending_bytes == 0); + self->pending_bytes_count += PyBytes_GET_SIZE(unwritten); + } + else if (!PyList_CheckExact(self->pending_bytes)) { + PyObject *list = PyList_New(2); + if (list == NULL) { + Py_DECREF(unwritten); + return -1; + } + // Since Python 3.12, allocating GC object won't trigger GC and release + // GIL. See https://github.com/python/cpython/issues/97922 + assert(!PyList_CheckExact(self->pending_bytes)); + PyList_SET_ITEM(list, 0, unwritten); + PyList_SET_ITEM(list, 1, self->pending_bytes); + self->pending_bytes = list; + self->pending_bytes_count += PyBytes_GET_SIZE(unwritten); + } + else { + PyList_Insert(self->pending_bytes, 0, unwritten); + self->pending_bytes_count += PyBytes_GET_SIZE(unwritten); + Py_DECREF(unwritten); + } + return 0; +} + /* Flush the internal write buffer. This doesn't explicitly flush the underlying buffered object, though. */ static int _textiowrapper_writeflush(textio *self) { + /* Validate have exclusive access to members. + + NOTE: this code relies on GIL / @critical_section. It needs to ensure + state is "safe" before/after calls to other Python code. + + In particular: + _textiowrapper_writeflush() calls buffer.write(). self->pending_bytes + can be appended during buffer->write() or other thread. + + At the end of this function either pending_bytes needs to be NULL _or_ + there needs to be an exception. + + For more details see: gh-119506, gh-118138. */ + _Py_AssertHoldsTstate(); + if (self->pending_bytes == NULL) return 0; PyObject *pending = self->pending_bytes; + Py_ssize_t bytes_to_write = self->pending_bytes_count; PyObject *b; + /* Make pending_bytes into a contiguous bytes. */ if (PyBytes_Check(pending)) { b = Py_NewRef(pending); } @@ -1628,20 +1684,140 @@ _textiowrapper_writeflush(textio *self) assert(pos == self->pending_bytes_count); } + + /* pending_bytes is now owned by this function. */ self->pending_bytes_count = 0; self->pending_bytes = NULL; Py_DECREF(pending); PyObject *ret; - do { + /* Continue until all data written or an error occurs. */ + while (1) { + /* note: gh-119506 self->pending_bytes and self->pending_bytes_count + may be altered during this call. */ ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b); - } while (ret == NULL && _PyIO_trap_eintr()); + + /* Validate have exclusive access to members. */ + _Py_AssertHoldsTstate(); + + if (ret == NULL) { + /* PEP 475 Retry on EINTR */ + if (_PyIO_trap_eintr()) { + continue; + } + + /* Try to find out how many bytes were written before exception. */ + PyObject *exc = PyErr_GetRaisedException(); + /* If it's a BlockingIOError, use the bytes written. */ + if (PyErr_GivenExceptionMatches(exc, PyExc_BlockingIOError)) { + PyOSErrorObject *err = (PyOSErrorObject *)exc; + bytes_to_write -= err->written; + if (bytes_to_write) { + pending = PyBytes_FromStringAndSize( + PyBytes_AS_STRING(b) + err->written, + self->pending_bytes_count); + if (pending) { + /* _textiowrapper can raise an exception if it fails to + allocate a list, that just adds to active error + list. Already exiting as fast as possible. */ + _textiowrapper_prepend(self, pending); + } + else { + PyErr_SetString(PyExc_MemoryError, + "lost unwritten bytes during partial write"); + + } + } + } + /* XXX: For other exceptions this assumes all data was written. */ + /* Re-raise exceptions. */ + PyErr_SetRaisedException(exc); + Py_DECREF(b); /* note: ret is NULL / no need to decref */ + return -1; + } + + /* Try to determine bytes written from return value + + XXX: On unexpected return this matches previous behavior and asumes + all data was written. */ + /* OPEN QUESTION: None is common in CPython, should this warn? */ + if (ret == Py_None) { + Py_DECREF(b); + Py_DECREF(ret); + return 0; + } + + /* Should be an integer. */ + if (!_PyIndex_Check(ret)) { + PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "write returned '%s' not specified by BufferedIOBase or" + " TextIOBase, typically count of bytes written", + ret); + Py_DECREF(b); + Py_DECREF(ret); + return 0; + } + Py_ssize_t size = PyNumber_AsSsize_t(ret, PyExc_OverflowError); + + /* Check for unexpected return values. */ + /* Can't get out size follow return previous behavior. */ + if (size == -1 && PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) { + Py_DECREF(b); + Py_DECREF(ret); + return -1; + } + PyErr_Clear(); /* fall through */ + PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "buffer.write returned value '%s' not specified by" + " BufferedIOBase or TextIOBase", ret); + Py_DECREF(b); + Py_DECREF(ret); + return 0; + } + /* Negative count of bytes doesn't make sense. */ + else if (size < 0) { + PyErr_WarnFormat(PyExc_RuntimeWarning, 1, + "buffer.write returned negative count of bytes '%s'", + ret); + Py_DECREF(b); + Py_DECREF(ret); + return 0; + } + /* More written than passed in call. */ + else if (size > bytes_to_write) { + PyErr_WarnFormat(PyExc_RuntimeWarning, 1, + "buffer.write returned '%s' bytes written but was only" + " called with '%s' bytes to write", + ret, bytes_to_write); + Py_DECREF(b); + Py_DECREF(ret); + return 0; + } + + /* Wrote data! */ + bytes_to_write -= size; + assert(bytes_to_write >= 0); + + /* Avoid a new byte allocation if fully written. */ + if (bytes_to_write == 0) { + Py_DECREF(ret); + break; + } + + /* Make a new PyByte to keep type for next call to write. */ + pending = PyBytes_FromStringAndSize( + PyBytes_AS_STRING(b) + size, + self->pending_bytes_count); + Py_DECREF(b); + b = pending; + pending = NULL; + } + + /* All data owned by this function is written. Other bytes could have shown + up while running. */ + assert(self->pending_bytes_count >= 0); Py_DECREF(b); - // NOTE: We cleared buffer but we don't know how many bytes are actually written - // when an error occurred. - if (ret == NULL) - return -1; - Py_DECREF(ret); return 0; } @@ -1733,12 +1909,12 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) // Flush the buffer before adding b to the buffer if b is not small. // https://github.com/python/cpython/issues/87426 if (bytes_len >= self->chunk_size) { - // _textiowrapper_writeflush() calls buffer.write(). - // self->pending_bytes can be appended during buffer->write() - // or other thread. - // We need to loop until buffer becomes empty. - // https://github.com/python/cpython/issues/118138 - // https://github.com/python/cpython/issues/119506 + /* writeflush works to ensure all data written. + + Additional data may be written to the TextIO when the lock is + released while calling buffer.write (and show up in + pending_bytes). When that happens, flush again to avoid copying + the current bytes. */ while (self->pending_bytes != NULL) { if (_textiowrapper_writeflush(self) < 0) { Py_DECREF(b); From 947ff627f626b67bce3f5fe70a748444dd5946d5 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 16:43:33 -0400 Subject: [PATCH 2/8] Fix blurb reST --- .../Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst b/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst index 363648b935ce7b..787a6ed46085ed 100644 --- a/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst +++ b/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst @@ -1,3 +1,3 @@ -Improve behavior of :meth:`TextIOWrapper.write` when there is a partial write -caused by underlying buffer being :class:`RawIOBase` (:gh:`91606` :gh:`85393`) +Improve behavior of :meth:`io.TextIOWrapper.write` when there is a partial write +caused by underlying buffer being :class:`io.RawIOBase` (:gh:`91606` :gh:`85393`) or if there is a :exc:`BlockingIOError` :gh:`97531` From 4173474bb410f8243a1828f2a7b5473fbc0c5de3 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 16:55:18 -0400 Subject: [PATCH 3/8] Fix comments, better handle if write returuns a big/small int --- Modules/_io/textio.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 2da80493ce57d0..04f610d8a4abcc 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1738,8 +1738,8 @@ _textiowrapper_writeflush(textio *self) /* Try to determine bytes written from return value - XXX: On unexpected return this matches previous behavior and asumes - all data was written. */ + XXX: Unexpected return: match previous behavior assume all data was + written. */ /* OPEN QUESTION: None is common in CPython, should this warn? */ if (ret == Py_None) { Py_DECREF(b); @@ -1762,15 +1762,12 @@ _textiowrapper_writeflush(textio *self) /* Check for unexpected return values. */ /* Can't get out size follow return previous behavior. */ if (size == -1 && PyErr_Occurred()) { - if (!PyErr_ExceptionMatches(PyExc_TypeError)) { - Py_DECREF(b); - Py_DECREF(ret); - return -1; - } - PyErr_Clear(); /* fall through */ + /* Warn about the value, but do not error. */ + PyObject *exc = PyErr_GetRaisedException() PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "buffer.write returned value '%s' not specified by" - " BufferedIOBase or TextIOBase", ret); + "write returned value '%s' not specified by" + " BufferedIOBase or TextIOBase (%s)", ret, exc); + Py_DECREF(exc); Py_DECREF(b); Py_DECREF(ret); return 0; @@ -1778,8 +1775,7 @@ _textiowrapper_writeflush(textio *self) /* Negative count of bytes doesn't make sense. */ else if (size < 0) { PyErr_WarnFormat(PyExc_RuntimeWarning, 1, - "buffer.write returned negative count of bytes '%s'", - ret); + "write returned negative count of bytes '%s'", ret); Py_DECREF(b); Py_DECREF(ret); return 0; @@ -1787,9 +1783,8 @@ _textiowrapper_writeflush(textio *self) /* More written than passed in call. */ else if (size > bytes_to_write) { PyErr_WarnFormat(PyExc_RuntimeWarning, 1, - "buffer.write returned '%s' bytes written but was only" - " called with '%s' bytes to write", - ret, bytes_to_write); + "write returned '%s' bytes written but was only called" + " with '%s' bytes to write", ret, bytes_to_write); Py_DECREF(b); Py_DECREF(ret); return 0; @@ -1805,7 +1800,7 @@ _textiowrapper_writeflush(textio *self) break; } - /* Make a new PyByte to keep type for next call to write. */ + /* Make a new PyBytes to keep type for next call to write. */ pending = PyBytes_FromStringAndSize( PyBytes_AS_STRING(b) + size, self->pending_bytes_count); From 1364c3bedb3d667e8d9ebbe8f376ba2a81664393 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 17:00:43 -0400 Subject: [PATCH 4/8] Fix substring length argument --- Modules/_io/textio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 04f610d8a4abcc..9c43f5e43876fa 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1715,7 +1715,7 @@ _textiowrapper_writeflush(textio *self) if (bytes_to_write) { pending = PyBytes_FromStringAndSize( PyBytes_AS_STRING(b) + err->written, - self->pending_bytes_count); + bytes_to_write); if (pending) { /* _textiowrapper can raise an exception if it fails to allocate a list, that just adds to active error @@ -1803,7 +1803,7 @@ _textiowrapper_writeflush(textio *self) /* Make a new PyBytes to keep type for next call to write. */ pending = PyBytes_FromStringAndSize( PyBytes_AS_STRING(b) + size, - self->pending_bytes_count); + bytes_to_write); Py_DECREF(b); b = pending; pending = NULL; @@ -1811,7 +1811,6 @@ _textiowrapper_writeflush(textio *self) /* All data owned by this function is written. Other bytes could have shown up while running. */ - assert(self->pending_bytes_count >= 0); Py_DECREF(b); return 0; } From 0f1ba4766bd7c5f0ada8eac380c6fffc52d75abe Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 17:15:45 -0400 Subject: [PATCH 5/8] Fix build, more tweaks --- Modules/_io/textio.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 9c43f5e43876fa..e90bebd1ed459b 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1622,9 +1622,8 @@ _textiowrapper_writeflush(textio *self) NOTE: this code relies on GIL / @critical_section. It needs to ensure state is "safe" before/after calls to other Python code. - In particular: - _textiowrapper_writeflush() calls buffer.write(). self->pending_bytes - can be appended during buffer->write() or other thread. + In particular _textiowrapper_writeflush() calls buffer.write(). That will + modify self->pending_bytes. At the end of this function either pending_bytes needs to be NULL _or_ there needs to be an exception. @@ -1684,7 +1683,6 @@ _textiowrapper_writeflush(textio *self) assert(pos == self->pending_bytes_count); } - /* pending_bytes is now owned by this function. */ self->pending_bytes_count = 0; self->pending_bytes = NULL; @@ -1760,10 +1758,10 @@ _textiowrapper_writeflush(textio *self) Py_ssize_t size = PyNumber_AsSsize_t(ret, PyExc_OverflowError); /* Check for unexpected return values. */ - /* Can't get out size follow return previous behavior. */ + /* Can't get out size. */ if (size == -1 && PyErr_Occurred()) { /* Warn about the value, but do not error. */ - PyObject *exc = PyErr_GetRaisedException() + PyObject *exc = PyErr_GetRaisedException(); PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "write returned value '%s' not specified by" " BufferedIOBase or TextIOBase (%s)", ret, exc); From 7bb765428085773c85ed81dae4a104abdd4639bf Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 17:32:29 -0400 Subject: [PATCH 6/8] Tweak news --- .../Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst b/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst index 787a6ed46085ed..26027afcc49e0b 100644 --- a/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst +++ b/Misc/NEWS.d/next/Library/2025-05-21-13-28-43.gh-issue-85383.NCmYBp.rst @@ -1,3 +1,3 @@ -Improve behavior of :meth:`io.TextIOWrapper.write` when there is a partial write -caused by underlying buffer being :class:`io.RawIOBase` (:gh:`91606` :gh:`85393`) -or if there is a :exc:`BlockingIOError` :gh:`97531` +Improve behavior of :meth:`!io.TextIOWrapper.write` when there is a partial +write caused by underlying buffer being :class:`io.RawIOBase` or if there is a +:exc:`BlockingIOError` From 5c188f536912f2a4ba8b6889e050cf4c793e453d Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 20:39:09 -0400 Subject: [PATCH 7/8] Fix assert --- Modules/_io/textio.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index e90bebd1ed459b..0b18ea90af4978 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1587,7 +1587,7 @@ _textiowrapper_prepend(textio *self, PyObject *unwritten) if (self->pending_bytes == NULL) { self->pending_bytes = unwritten; - assert(self->pending_bytes == 0); + assert(self->pending_bytes_count == 0); self->pending_bytes_count += PyBytes_GET_SIZE(unwritten); } else if (!PyList_CheckExact(self->pending_bytes)) { @@ -1901,12 +1901,11 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) // Flush the buffer before adding b to the buffer if b is not small. // https://github.com/python/cpython/issues/87426 if (bytes_len >= self->chunk_size) { - /* writeflush works to ensure all data written. + /* writeflush ensures one pending_bytes is written. Additional data may be written to the TextIO when the lock is - released while calling buffer.write (and show up in - pending_bytes). When that happens, flush again to avoid copying - the current bytes. */ + released while calling buffer.write (and show up in pending_bytes). + When that happens, flush again to avoid copying the current bytes. */ while (self->pending_bytes != NULL) { if (_textiowrapper_writeflush(self) < 0) { Py_DECREF(b); From c5e7b2434630ccefa736876854609ad54d392e19 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 20:41:24 -0400 Subject: [PATCH 8/8] fix docstring --- Modules/_io/textio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 0b18ea90af4978..589bbdf3fe170d 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1622,8 +1622,8 @@ _textiowrapper_writeflush(textio *self) NOTE: this code relies on GIL / @critical_section. It needs to ensure state is "safe" before/after calls to other Python code. - In particular _textiowrapper_writeflush() calls buffer.write(). That will - modify self->pending_bytes. + In particular during calls to `buffer.write` self->pending_bytes may be + modified by other threads. At the end of this function either pending_bytes needs to be NULL _or_ there needs to be an exception.