Skip to content

Commit

Permalink
Add PyUnicodeWriter_DecodeUTF8Stateful()
Browse files Browse the repository at this point in the history
  • Loading branch information
vstinner committed Jun 17, 2024
1 parent d1019a0 commit 7c4cc95
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,12 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
PyUnicodeWriter *writer,
const char *str,
Py_ssize_t size);
PyAPI_FUNC(int) PyUnicodeWriter_DecodeUTF8Stateful(
PyUnicodeWriter *writer,
const char *string, /* UTF-8 encoded string */
Py_ssize_t length, /* size of string */
const char *errors, /* error handling */
Py_ssize_t *consumed); /* bytes consumed */
PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
PyUnicodeWriter *writer,
wchar_t *str,
Expand Down
34 changes: 34 additions & 0 deletions Modules/_testcapi/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,39 @@ test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
}


static PyObject *
test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args))
{
// test PyUnicodeWriter_DecodeUTF8Stateful()
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0) {
goto error;
}
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
goto error;
}
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0) {
goto error;
}

PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "ignore-replace\xef\xbf\xbd"));
Py_DECREF(result);

Py_RETURN_NONE;

error:
PyUnicodeWriter_Discard(writer);
return NULL;
}


static PyObject *
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
{
Expand Down Expand Up @@ -484,6 +517,7 @@ static PyMethodDef TestMethods[] = {
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
{"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
{"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS},
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
{"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS},
Expand Down
22 changes: 22 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -13501,6 +13501,28 @@ PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
}


int
PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
const char *string,
Py_ssize_t length,
const char *errors,
Py_ssize_t *consumed)
{
if (length < 0) {
length = strlen(string);
}

_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
Py_ssize_t old_pos = _writer->pos;
int res = unicode_decode_utf8_writer(_writer, string, length,
_Py_ERROR_UNKNOWN, errors, consumed);
if (res < 0) {
_writer->pos = old_pos;
}
return res;
}


int
PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer,
wchar_t *str,
Expand Down

0 comments on commit 7c4cc95

Please sign in to comment.