diff --git a/Misc/ACKS b/Misc/ACKS
index b18fabe09ef4f81..7065267379deb13 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -609,6 +609,7 @@ Jan-Philip Gehrcke
Thomas Gellekum
Gabriel Genellina
Christos Georgiou
+Philip Georgi
Elazar (אלעזר) Gershuni
Ben Gertzfield
Nadim Ghaznavi
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index 908e6df9766731c..ac8959ebea5bf22 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -169,7 +169,7 @@
@MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c
@MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c
-@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c
+@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/unicode.c
# Some testing modules MUST be built as shared libraries.
*shared*
diff --git a/Modules/_testcapi/parts.h b/Modules/_testcapi/parts.h
index 4b672c9d05bddd5..c0e0f3aa1cc21be 100644
--- a/Modules/_testcapi/parts.h
+++ b/Modules/_testcapi/parts.h
@@ -3,3 +3,4 @@
int _PyTestCapi_Init_Vectorcall(PyObject *module);
int _PyTestCapi_Init_VectorcallLimited(PyObject *module);
int _PyTestCapi_Init_Heaptype(PyObject *module);
+int _PyTestCapi_Init_Unicode(PyObject *module);
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
new file mode 100644
index 000000000000000..58214249e225275
--- /dev/null
+++ b/Modules/_testcapi/unicode.c
@@ -0,0 +1,653 @@
+#include "parts.h"
+
+static struct PyModuleDef *_testcapimodule = NULL; // set at initialization
+
+static PyObject *
+codec_incrementalencoder(PyObject *self, PyObject *args)
+{
+ const char *encoding, *errors = NULL;
+ if (!PyArg_ParseTuple(args, "s|s:test_incrementalencoder",
+ &encoding, &errors))
+ return NULL;
+ return PyCodec_IncrementalEncoder(encoding, errors);
+}
+
+static PyObject *
+codec_incrementaldecoder(PyObject *self, PyObject *args)
+{
+ const char *encoding, *errors = NULL;
+ if (!PyArg_ParseTuple(args, "s|s:test_incrementaldecoder",
+ &encoding, &errors))
+ return NULL;
+ return PyCodec_IncrementalDecoder(encoding, errors);
+}
+
+static PyObject *
+test_unicode_compare_with_ascii(PyObject *self, PyObject *Py_UNUSED(ignored)) {
+ PyObject *py_s = PyUnicode_FromStringAndSize("str\0", 4);
+ int result;
+ if (py_s == NULL)
+ return NULL;
+ result = PyUnicode_CompareWithASCIIString(py_s, "str");
+ Py_DECREF(py_s);
+ if (!result) {
+ PyErr_SetString(PyExc_AssertionError, "Python string ending in NULL "
+ "should not compare equal to c string.");
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
+ const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
+ size_t wtextlen = 1;
+ const wchar_t invalid[1] = {(wchar_t)0x110000u};
+#else
+ const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
+ size_t wtextlen = 2;
+#endif
+ PyObject *wide, *utf8;
+
+ wide = PyUnicode_FromWideChar(wtext, wtextlen);
+ if (wide == NULL)
+ return NULL;
+
+ utf8 = PyUnicode_FromString("\xf4\x8a\xaf\x8d");
+ if (utf8 == NULL) {
+ Py_DECREF(wide);
+ return NULL;
+ }
+
+ if (PyUnicode_GET_LENGTH(wide) != PyUnicode_GET_LENGTH(utf8)) {
+ Py_DECREF(wide);
+ Py_DECREF(utf8);
+ PyErr_SetString(PyExc_AssertionError,
+ "test_widechar: "
+ "wide string and utf8 string "
+ "have different length");
+ return NULL;
+ }
+ if (PyUnicode_Compare(wide, utf8)) {
+ Py_DECREF(wide);
+ Py_DECREF(utf8);
+ if (PyErr_Occurred())
+ return NULL;
+ PyErr_SetString(PyExc_AssertionError,
+ "test_widechar: "
+ "wide string and utf8 string "
+ "are different");
+ return NULL;
+ }
+
+ Py_DECREF(wide);
+ Py_DECREF(utf8);
+
+#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
+ wide = PyUnicode_FromWideChar(invalid, 1);
+ if (wide == NULL)
+ PyErr_Clear();
+ else {
+ PyErr_SetString(PyExc_AssertionError,
+ "test_widechar: "
+ "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail");
+ return NULL;
+ }
+#endif
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+unicode_aswidechar(PyObject *self, PyObject *args)
+{
+ PyObject *unicode, *result;
+ Py_ssize_t buflen, size;
+ wchar_t *buffer;
+
+ if (!PyArg_ParseTuple(args, "Un", &unicode, &buflen))
+ return NULL;
+ buffer = PyMem_New(wchar_t, buflen);
+ if (buffer == NULL)
+ return PyErr_NoMemory();
+
+ size = PyUnicode_AsWideChar(unicode, buffer, buflen);
+ if (size == -1) {
+ PyMem_Free(buffer);
+ return NULL;
+ }
+
+ if (size < buflen)
+ buflen = size + 1;
+ else
+ buflen = size;
+ result = PyUnicode_FromWideChar(buffer, buflen);
+ PyMem_Free(buffer);
+ if (result == NULL)
+ return NULL;
+
+ return Py_BuildValue("(Nn)", result, size);
+}
+
+static PyObject *
+unicode_aswidecharstring(PyObject *self, PyObject *args)
+{
+ PyObject *unicode, *result;
+ Py_ssize_t size;
+ wchar_t *buffer;
+
+ if (!PyArg_ParseTuple(args, "U", &unicode))
+ return NULL;
+
+ buffer = PyUnicode_AsWideCharString(unicode, &size);
+ if (buffer == NULL)
+ return NULL;
+
+ result = PyUnicode_FromWideChar(buffer, size + 1);
+ PyMem_Free(buffer);
+ if (result == NULL)
+ return NULL;
+ return Py_BuildValue("(Nn)", result, size);
+}
+
+static PyObject *
+unicode_asucs4(PyObject *self, PyObject *args)
+{
+ PyObject *unicode, *result;
+ Py_UCS4 *buffer;
+ int copy_null;
+ Py_ssize_t str_len, buf_len;
+
+ if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) {
+ return NULL;
+ }
+
+ buf_len = str_len + 1;
+ buffer = PyMem_NEW(Py_UCS4, buf_len);
+ if (buffer == NULL) {
+ return PyErr_NoMemory();
+ }
+ memset(buffer, 0, sizeof(Py_UCS4)*buf_len);
+ buffer[str_len] = 0xffffU;
+
+ if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
+ PyMem_Free(buffer);
+ return NULL;
+ }
+
+ result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
+ PyMem_Free(buffer);
+ return result;
+}
+
+static PyObject *
+unicode_asutf8(PyObject *self, PyObject *args)
+{
+ PyObject *unicode;
+ const char *buffer;
+
+ if (!PyArg_ParseTuple(args, "U", &unicode)) {
+ return NULL;
+ }
+
+ buffer = PyUnicode_AsUTF8(unicode);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ return PyBytes_FromString(buffer);
+}
+
+static PyObject *
+unicode_asutf8andsize(PyObject *self, PyObject *args)
+{
+ PyObject *unicode, *result;
+ const char *buffer;
+ Py_ssize_t utf8_len;
+
+ if(!PyArg_ParseTuple(args, "U", &unicode)) {
+ return NULL;
+ }
+
+ buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ result = PyBytes_FromString(buffer);
+ if (result == NULL) {
+ return NULL;
+ }
+
+ return Py_BuildValue("(Nn)", result, utf8_len);
+}
+
+static PyObject *
+unicode_findchar(PyObject *self, PyObject *args)
+{
+ PyObject *str;
+ int direction;
+ unsigned int ch;
+ Py_ssize_t result;
+ Py_ssize_t start, end;
+
+ if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch,
+ &start, &end, &direction)) {
+ return NULL;
+ }
+
+ result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction);
+ if (result == -2)
+ return NULL;
+ else
+ return PyLong_FromSsize_t(result);
+}
+
+static PyObject *
+unicode_copycharacters(PyObject *self, PyObject *args)
+{
+ PyObject *from, *to, *to_copy;
+ Py_ssize_t from_start, to_start, how_many, copied;
+
+ if (!PyArg_ParseTuple(args, "UnOnn:unicode_copycharacters", &to, &to_start,
+ &from, &from_start, &how_many)) {
+ return NULL;
+ }
+
+ if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to),
+ PyUnicode_MAX_CHAR_VALUE(to)))) {
+ return NULL;
+ }
+ if (PyUnicode_Fill(to_copy, 0, PyUnicode_GET_LENGTH(to_copy), 0U) < 0) {
+ Py_DECREF(to_copy);
+ return NULL;
+ }
+
+ if ((copied = PyUnicode_CopyCharacters(to_copy, to_start, from,
+ from_start, how_many)) < 0) {
+ Py_DECREF(to_copy);
+ return NULL;
+ }
+
+ return Py_BuildValue("(Nn)", to_copy, copied);
+}
+
+static int
+check_raised_systemerror(PyObject *result, char* msg)
+{
+ if (result) {
+ // no exception
+ PyErr_Format(PyExc_AssertionError,
+ "SystemError not raised: %s",
+ msg);
+ return 0;
+ }
+ if (PyErr_ExceptionMatches(PyExc_SystemError)) {
+ // expected exception
+ PyErr_Clear();
+ return 1;
+ }
+ // unexpected exception
+ return 0;
+}
+
+static PyObject *
+test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+ PyObject *result;
+ PyObject *unicode = PyUnicode_FromString("None");
+
+#define CHECK_FORMAT_2(FORMAT, EXPECTED, ARG1, ARG2) \
+ result = PyUnicode_FromFormat(FORMAT, ARG1, ARG2); \
+ if (EXPECTED == NULL) { \
+ if (!check_raised_systemerror(result, FORMAT)) { \
+ goto Fail; \
+ } \
+ } \
+ else if (result == NULL) \
+ return NULL; \
+ else if (!_PyUnicode_EqualToASCIIString(result, EXPECTED)) { \
+ PyErr_Format(PyExc_AssertionError, \
+ "test_string_from_format: failed at \"%s\" " \
+ "expected \"%s\" got \"%s\"", \
+ FORMAT, EXPECTED, PyUnicode_AsUTF8(result)); \
+ goto Fail; \
+ } \
+ Py_XDECREF(result)
+
+#define CHECK_FORMAT_1(FORMAT, EXPECTED, ARG) \
+ CHECK_FORMAT_2(FORMAT, EXPECTED, ARG, 0)
+
+#define CHECK_FORMAT_0(FORMAT, EXPECTED) \
+ CHECK_FORMAT_2(FORMAT, EXPECTED, 0, 0)
+
+ // Unrecognized
+ CHECK_FORMAT_2("%u %? %u", NULL, 1, 2);
+
+ // "%%" (options are rejected)
+ CHECK_FORMAT_0( "%%", "%");
+ CHECK_FORMAT_0( "%0%", NULL);
+ CHECK_FORMAT_0("%00%", NULL);
+ CHECK_FORMAT_0( "%2%", NULL);
+ CHECK_FORMAT_0("%02%", NULL);
+ CHECK_FORMAT_0("%.0%", NULL);
+ CHECK_FORMAT_0("%.2%", NULL);
+
+ // "%c"
+ CHECK_FORMAT_1( "%c", "c", 'c');
+ CHECK_FORMAT_1( "%0c", "c", 'c');
+ CHECK_FORMAT_1("%00c", "c", 'c');
+ CHECK_FORMAT_1( "%2c", "c", 'c');
+ CHECK_FORMAT_1("%02c", "c", 'c');
+ CHECK_FORMAT_1("%.0c", "c", 'c');
+ CHECK_FORMAT_1("%.2c", "c", 'c');
+
+ // Integers
+ CHECK_FORMAT_1("%d", "123", (int)123);
+ CHECK_FORMAT_1("%i", "123", (int)123);
+ CHECK_FORMAT_1("%u", "123", (unsigned int)123);
+ CHECK_FORMAT_1("%ld", "123", (long)123);
+ CHECK_FORMAT_1("%li", "123", (long)123);
+ CHECK_FORMAT_1("%lu", "123", (unsigned long)123);
+ CHECK_FORMAT_1("%lld", "123", (long long)123);
+ CHECK_FORMAT_1("%lli", "123", (long long)123);
+ CHECK_FORMAT_1("%llu", "123", (unsigned long long)123);
+ CHECK_FORMAT_1("%zd", "123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%zi", "123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%zu", "123", (size_t)123);
+ CHECK_FORMAT_1("%x", "7b", (int)123);
+
+ CHECK_FORMAT_1("%d", "-123", (int)-123);
+ CHECK_FORMAT_1("%i", "-123", (int)-123);
+ CHECK_FORMAT_1("%ld", "-123", (long)-123);
+ CHECK_FORMAT_1("%li", "-123", (long)-123);
+ CHECK_FORMAT_1("%lld", "-123", (long long)-123);
+ CHECK_FORMAT_1("%lli", "-123", (long long)-123);
+ CHECK_FORMAT_1("%zd", "-123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%zi", "-123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%x", "ffffff85", (int)-123);
+
+ // Integers: width < length
+ CHECK_FORMAT_1("%1d", "123", (int)123);
+ CHECK_FORMAT_1("%1i", "123", (int)123);
+ CHECK_FORMAT_1("%1u", "123", (unsigned int)123);
+ CHECK_FORMAT_1("%1ld", "123", (long)123);
+ CHECK_FORMAT_1("%1li", "123", (long)123);
+ CHECK_FORMAT_1("%1lu", "123", (unsigned long)123);
+ CHECK_FORMAT_1("%1lld", "123", (long long)123);
+ CHECK_FORMAT_1("%1lli", "123", (long long)123);
+ CHECK_FORMAT_1("%1llu", "123", (unsigned long long)123);
+ CHECK_FORMAT_1("%1zd", "123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%1zi", "123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%1zu", "123", (size_t)123);
+ CHECK_FORMAT_1("%1x", "7b", (int)123);
+
+ CHECK_FORMAT_1("%1d", "-123", (int)-123);
+ CHECK_FORMAT_1("%1i", "-123", (int)-123);
+ CHECK_FORMAT_1("%1ld", "-123", (long)-123);
+ CHECK_FORMAT_1("%1li", "-123", (long)-123);
+ CHECK_FORMAT_1("%1lld", "-123", (long long)-123);
+ CHECK_FORMAT_1("%1lli", "-123", (long long)-123);
+ CHECK_FORMAT_1("%1zd", "-123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%1zi", "-123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%1x", "ffffff85", (int)-123);
+
+ // Integers: width > length
+ CHECK_FORMAT_1("%5d", " 123", (int)123);
+ CHECK_FORMAT_1("%5i", " 123", (int)123);
+ CHECK_FORMAT_1("%5u", " 123", (unsigned int)123);
+ CHECK_FORMAT_1("%5ld", " 123", (long)123);
+ CHECK_FORMAT_1("%5li", " 123", (long)123);
+ CHECK_FORMAT_1("%5lu", " 123", (unsigned long)123);
+ CHECK_FORMAT_1("%5lld", " 123", (long long)123);
+ CHECK_FORMAT_1("%5lli", " 123", (long long)123);
+ CHECK_FORMAT_1("%5llu", " 123", (unsigned long long)123);
+ CHECK_FORMAT_1("%5zd", " 123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%5zi", " 123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%5zu", " 123", (size_t)123);
+ CHECK_FORMAT_1("%5x", " 7b", (int)123);
+
+ CHECK_FORMAT_1("%5d", " -123", (int)-123);
+ CHECK_FORMAT_1("%5i", " -123", (int)-123);
+ CHECK_FORMAT_1("%5ld", " -123", (long)-123);
+ CHECK_FORMAT_1("%5li", " -123", (long)-123);
+ CHECK_FORMAT_1("%5lld", " -123", (long long)-123);
+ CHECK_FORMAT_1("%5lli", " -123", (long long)-123);
+ CHECK_FORMAT_1("%5zd", " -123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%5zi", " -123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%9x", " ffffff85", (int)-123);
+
+ // Integers: width > length, 0-flag
+ CHECK_FORMAT_1("%05d", "00123", (int)123);
+ CHECK_FORMAT_1("%05i", "00123", (int)123);
+ CHECK_FORMAT_1("%05u", "00123", (unsigned int)123);
+ CHECK_FORMAT_1("%05ld", "00123", (long)123);
+ CHECK_FORMAT_1("%05li", "00123", (long)123);
+ CHECK_FORMAT_1("%05lu", "00123", (unsigned long)123);
+ CHECK_FORMAT_1("%05lld", "00123", (long long)123);
+ CHECK_FORMAT_1("%05lli", "00123", (long long)123);
+ CHECK_FORMAT_1("%05llu", "00123", (unsigned long long)123);
+ CHECK_FORMAT_1("%05zd", "00123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%05zi", "00123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%05zu", "00123", (size_t)123);
+ CHECK_FORMAT_1("%05x", "0007b", (int)123);
+
+ // Integers: precision < length
+ CHECK_FORMAT_1("%.1d", "123", (int)123);
+ CHECK_FORMAT_1("%.1i", "123", (int)123);
+ CHECK_FORMAT_1("%.1u", "123", (unsigned int)123);
+ CHECK_FORMAT_1("%.1ld", "123", (long)123);
+ CHECK_FORMAT_1("%.1li", "123", (long)123);
+ CHECK_FORMAT_1("%.1lu", "123", (unsigned long)123);
+ CHECK_FORMAT_1("%.1lld", "123", (long long)123);
+ CHECK_FORMAT_1("%.1lli", "123", (long long)123);
+ CHECK_FORMAT_1("%.1llu", "123", (unsigned long long)123);
+ CHECK_FORMAT_1("%.1zd", "123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%.1zi", "123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%.1zu", "123", (size_t)123);
+ CHECK_FORMAT_1("%.1x", "7b", (int)123);
+
+ CHECK_FORMAT_1("%.1d", "-123", (int)-123);
+ CHECK_FORMAT_1("%.1i", "-123", (int)-123);
+ CHECK_FORMAT_1("%.1ld", "-123", (long)-123);
+ CHECK_FORMAT_1("%.1li", "-123", (long)-123);
+ CHECK_FORMAT_1("%.1lld", "-123", (long long)-123);
+ CHECK_FORMAT_1("%.1lli", "-123", (long long)-123);
+ CHECK_FORMAT_1("%.1zd", "-123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%.1zi", "-123", (Py_ssize_t)-123);
+ CHECK_FORMAT_1("%.1x", "ffffff85", (int)-123);
+
+ // Integers: precision > length
+ CHECK_FORMAT_1("%.5d", "00123", (int)123);
+ CHECK_FORMAT_1("%.5i", "00123", (int)123);
+ CHECK_FORMAT_1("%.5u", "00123", (unsigned int)123);
+ CHECK_FORMAT_1("%.5ld", "00123", (long)123);
+ CHECK_FORMAT_1("%.5li", "00123", (long)123);
+ CHECK_FORMAT_1("%.5lu", "00123", (unsigned long)123);
+ CHECK_FORMAT_1("%.5lld", "00123", (long long)123);
+ CHECK_FORMAT_1("%.5lli", "00123", (long long)123);
+ CHECK_FORMAT_1("%.5llu", "00123", (unsigned long long)123);
+ CHECK_FORMAT_1("%.5zd", "00123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%.5zi", "00123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%.5zu", "00123", (size_t)123);
+ CHECK_FORMAT_1("%.5x", "0007b", (int)123);
+
+ // Integers: width > precision > length
+ CHECK_FORMAT_1("%7.5d", " 00123", (int)123);
+ CHECK_FORMAT_1("%7.5i", " 00123", (int)123);
+ CHECK_FORMAT_1("%7.5u", " 00123", (unsigned int)123);
+ CHECK_FORMAT_1("%7.5ld", " 00123", (long)123);
+ CHECK_FORMAT_1("%7.5li", " 00123", (long)123);
+ CHECK_FORMAT_1("%7.5lu", " 00123", (unsigned long)123);
+ CHECK_FORMAT_1("%7.5lld", " 00123", (long long)123);
+ CHECK_FORMAT_1("%7.5lli", " 00123", (long long)123);
+ CHECK_FORMAT_1("%7.5llu", " 00123", (unsigned long long)123);
+ CHECK_FORMAT_1("%7.5zd", " 00123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%7.5zi", " 00123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%7.5zu", " 00123", (size_t)123);
+ CHECK_FORMAT_1("%7.5x", " 0007b", (int)123);
+
+ // Integers: width > precision > length, 0-flag
+ CHECK_FORMAT_1("%07.5d", "0000123", (int)123);
+ CHECK_FORMAT_1("%07.5i", "0000123", (int)123);
+ CHECK_FORMAT_1("%07.5u", "0000123", (unsigned int)123);
+ CHECK_FORMAT_1("%07.5ld", "0000123", (long)123);
+ CHECK_FORMAT_1("%07.5li", "0000123", (long)123);
+ CHECK_FORMAT_1("%07.5lu", "0000123", (unsigned long)123);
+ CHECK_FORMAT_1("%07.5lld", "0000123", (long long)123);
+ CHECK_FORMAT_1("%07.5lli", "0000123", (long long)123);
+ CHECK_FORMAT_1("%07.5llu", "0000123", (unsigned long long)123);
+ CHECK_FORMAT_1("%07.5zd", "0000123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%07.5zi", "0000123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%07.5zu", "0000123", (size_t)123);
+ CHECK_FORMAT_1("%07.5x", "000007b", (int)123);
+
+ // Integers: precision > width > length
+ CHECK_FORMAT_1("%5.7d", "0000123", (int)123);
+ CHECK_FORMAT_1("%5.7i", "0000123", (int)123);
+ CHECK_FORMAT_1("%5.7u", "0000123", (unsigned int)123);
+ CHECK_FORMAT_1("%5.7ld", "0000123", (long)123);
+ CHECK_FORMAT_1("%5.7li", "0000123", (long)123);
+ CHECK_FORMAT_1("%5.7lu", "0000123", (unsigned long)123);
+ CHECK_FORMAT_1("%5.7lld", "0000123", (long long)123);
+ CHECK_FORMAT_1("%5.7lli", "0000123", (long long)123);
+ CHECK_FORMAT_1("%5.7llu", "0000123", (unsigned long long)123);
+ CHECK_FORMAT_1("%5.7zd", "0000123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%5.7zi", "0000123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%5.7zu", "0000123", (size_t)123);
+ CHECK_FORMAT_1("%5.7x", "000007b", (int)123);
+
+ // Integers: precision > width > length, 0-flag
+ CHECK_FORMAT_1("%05.7d", "0000123", (int)123);
+ CHECK_FORMAT_1("%05.7i", "0000123", (int)123);
+ CHECK_FORMAT_1("%05.7u", "0000123", (unsigned int)123);
+ CHECK_FORMAT_1("%05.7ld", "0000123", (long)123);
+ CHECK_FORMAT_1("%05.7li", "0000123", (long)123);
+ CHECK_FORMAT_1("%05.7lu", "0000123", (unsigned long)123);
+ CHECK_FORMAT_1("%05.7lld", "0000123", (long long)123);
+ CHECK_FORMAT_1("%05.7lli", "0000123", (long long)123);
+ CHECK_FORMAT_1("%05.7llu", "0000123", (unsigned long long)123);
+ CHECK_FORMAT_1("%05.7zd", "0000123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%05.7zi", "0000123", (Py_ssize_t)123);
+ CHECK_FORMAT_1("%05.7zu", "0000123", (size_t)123);
+ CHECK_FORMAT_1("%05.7x", "000007b", (int)123);
+
+ // Integers: precision = 0, arg = 0 (empty string in C)
+ CHECK_FORMAT_1("%.0d", "0", (int)0);
+ CHECK_FORMAT_1("%.0i", "0", (int)0);
+ CHECK_FORMAT_1("%.0u", "0", (unsigned int)0);
+ CHECK_FORMAT_1("%.0ld", "0", (long)0);
+ CHECK_FORMAT_1("%.0li", "0", (long)0);
+ CHECK_FORMAT_1("%.0lu", "0", (unsigned long)0);
+ CHECK_FORMAT_1("%.0lld", "0", (long long)0);
+ CHECK_FORMAT_1("%.0lli", "0", (long long)0);
+ CHECK_FORMAT_1("%.0llu", "0", (unsigned long long)0);
+ CHECK_FORMAT_1("%.0zd", "0", (Py_ssize_t)0);
+ CHECK_FORMAT_1("%.0zi", "0", (Py_ssize_t)0);
+ CHECK_FORMAT_1("%.0zu", "0", (size_t)0);
+ CHECK_FORMAT_1("%.0x", "0", (int)0);
+
+ // Strings
+ CHECK_FORMAT_1("%s", "None", "None");
+ CHECK_FORMAT_1("%U", "None", unicode);
+ CHECK_FORMAT_1("%A", "None", Py_None);
+ CHECK_FORMAT_1("%S", "None", Py_None);
+ CHECK_FORMAT_1("%R", "None", Py_None);
+ CHECK_FORMAT_2("%V", "None", unicode, "ignored");
+ CHECK_FORMAT_2("%V", "None", NULL, "None");
+
+ // Strings: width < length
+ CHECK_FORMAT_1("%1s", "None", "None");
+ CHECK_FORMAT_1("%1U", "None", unicode);
+ CHECK_FORMAT_1("%1A", "None", Py_None);
+ CHECK_FORMAT_1("%1S", "None", Py_None);
+ CHECK_FORMAT_1("%1R", "None", Py_None);
+ CHECK_FORMAT_2("%1V", "None", unicode, "ignored");
+ CHECK_FORMAT_2("%1V", "None", NULL, "None");
+
+ // Strings: width > length
+ CHECK_FORMAT_1("%5s", " None", "None");
+ CHECK_FORMAT_1("%5U", " None", unicode);
+ CHECK_FORMAT_1("%5A", " None", Py_None);
+ CHECK_FORMAT_1("%5S", " None", Py_None);
+ CHECK_FORMAT_1("%5R", " None", Py_None);
+ CHECK_FORMAT_2("%5V", " None", unicode, "ignored");
+ CHECK_FORMAT_2("%5V", " None", NULL, "None");
+
+ // Strings: precision < length
+ CHECK_FORMAT_1("%.1s", "N", "None");
+ CHECK_FORMAT_1("%.1U", "N", unicode);
+ CHECK_FORMAT_1("%.1A", "N", Py_None);
+ CHECK_FORMAT_1("%.1S", "N", Py_None);
+ CHECK_FORMAT_1("%.1R", "N", Py_None);
+ CHECK_FORMAT_2("%.1V", "N", unicode, "ignored");
+ CHECK_FORMAT_2("%.1V", "N", NULL, "None");
+
+ // Strings: precision > length
+ CHECK_FORMAT_1("%.5s", "None", "None");
+ CHECK_FORMAT_1("%.5U", "None", unicode);
+ CHECK_FORMAT_1("%.5A", "None", Py_None);
+ CHECK_FORMAT_1("%.5S", "None", Py_None);
+ CHECK_FORMAT_1("%.5R", "None", Py_None);
+ CHECK_FORMAT_2("%.5V", "None", unicode, "ignored");
+ CHECK_FORMAT_2("%.5V", "None", NULL, "None");
+
+ // Strings: precision < length, width > length
+ CHECK_FORMAT_1("%5.1s", " N", "None");
+ CHECK_FORMAT_1("%5.1U", " N", unicode);
+ CHECK_FORMAT_1("%5.1A", " N", Py_None);
+ CHECK_FORMAT_1("%5.1S", " N", Py_None);
+ CHECK_FORMAT_1("%5.1R", " N", Py_None);
+ CHECK_FORMAT_2("%5.1V", " N", unicode, "ignored");
+ CHECK_FORMAT_2("%5.1V", " N", NULL, "None");
+
+ // Strings: width < length, precision > length
+ CHECK_FORMAT_1("%1.5s", "None", "None");
+ CHECK_FORMAT_1("%1.5U", "None", unicode);
+ CHECK_FORMAT_1("%1.5A", "None", Py_None);
+ CHECK_FORMAT_1("%1.5S", "None", Py_None);
+ CHECK_FORMAT_1("%1.5R", "None", Py_None);
+ CHECK_FORMAT_2("%1.5V", "None", unicode, "ignored");
+ CHECK_FORMAT_2("%1.5V", "None", NULL, "None");
+
+ Py_XDECREF(unicode);
+ Py_RETURN_NONE;
+
+ Fail:
+ Py_XDECREF(result);
+ Py_XDECREF(unicode);
+ return NULL;
+
+#undef CHECK_FORMAT_2
+#undef CHECK_FORMAT_1
+#undef CHECK_FORMAT_0
+}
+
+static PyMethodDef TestMethods[] = {
+ {"codec_incrementalencoder", codec_incrementalencoder, METH_VARARGS},
+ {"codec_incrementaldecoder", codec_incrementaldecoder, METH_VARARGS},
+ {"test_unicode_compare_with_ascii",
+ test_unicode_compare_with_ascii, METH_NOARGS},
+ {"test_string_from_format", test_string_from_format, METH_NOARGS},
+ {"test_widechar", test_widechar, METH_NOARGS},
+ {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
+ {"unicode_aswidecharstring", unicode_aswidecharstring, METH_VARARGS},
+ {"unicode_asucs4", unicode_asucs4, METH_VARARGS},
+ {"unicode_asutf8", unicode_asutf8, METH_VARARGS},
+ {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
+ {"unicode_findchar", unicode_findchar, METH_VARARGS},
+ {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
+ {NULL},
+};
+
+int
+_PyTestCapi_Init_Unicode(PyObject *m) {
+ _testcapimodule = PyModule_GetDef(m);
+
+ if (PyModule_AddFunctions(m, TestMethods) < 0) {
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 8004fa18bcc5284..91bdeb8b6464dfb 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1886,234 +1886,6 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args)
return return_value;
}
-static PyObject *
-test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
-{
-#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
- const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
- size_t wtextlen = 1;
- const wchar_t invalid[1] = {(wchar_t)0x110000u};
-#else
- const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
- size_t wtextlen = 2;
-#endif
- PyObject *wide, *utf8;
-
- wide = PyUnicode_FromWideChar(wtext, wtextlen);
- if (wide == NULL)
- return NULL;
-
- utf8 = PyUnicode_FromString("\xf4\x8a\xaf\x8d");
- if (utf8 == NULL) {
- Py_DECREF(wide);
- return NULL;
- }
-
- if (PyUnicode_GET_LENGTH(wide) != PyUnicode_GET_LENGTH(utf8)) {
- Py_DECREF(wide);
- Py_DECREF(utf8);
- return raiseTestError("test_widechar",
- "wide string and utf8 string "
- "have different length");
- }
- if (PyUnicode_Compare(wide, utf8)) {
- Py_DECREF(wide);
- Py_DECREF(utf8);
- if (PyErr_Occurred())
- return NULL;
- return raiseTestError("test_widechar",
- "wide string and utf8 string "
- "are different");
- }
-
- Py_DECREF(wide);
- Py_DECREF(utf8);
-
-#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
- wide = PyUnicode_FromWideChar(invalid, 1);
- if (wide == NULL)
- PyErr_Clear();
- else
- return raiseTestError("test_widechar",
- "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail");
-#endif
- Py_RETURN_NONE;
-}
-
-static PyObject *
-unicode_aswidechar(PyObject *self, PyObject *args)
-{
- PyObject *unicode, *result;
- Py_ssize_t buflen, size;
- wchar_t *buffer;
-
- if (!PyArg_ParseTuple(args, "Un", &unicode, &buflen))
- return NULL;
- buffer = PyMem_New(wchar_t, buflen);
- if (buffer == NULL)
- return PyErr_NoMemory();
-
- size = PyUnicode_AsWideChar(unicode, buffer, buflen);
- if (size == -1) {
- PyMem_Free(buffer);
- return NULL;
- }
-
- if (size < buflen)
- buflen = size + 1;
- else
- buflen = size;
- result = PyUnicode_FromWideChar(buffer, buflen);
- PyMem_Free(buffer);
- if (result == NULL)
- return NULL;
-
- return Py_BuildValue("(Nn)", result, size);
-}
-
-static PyObject *
-unicode_aswidecharstring(PyObject *self, PyObject *args)
-{
- PyObject *unicode, *result;
- Py_ssize_t size;
- wchar_t *buffer;
-
- if (!PyArg_ParseTuple(args, "U", &unicode))
- return NULL;
-
- buffer = PyUnicode_AsWideCharString(unicode, &size);
- if (buffer == NULL)
- return NULL;
-
- result = PyUnicode_FromWideChar(buffer, size + 1);
- PyMem_Free(buffer);
- if (result == NULL)
- return NULL;
- return Py_BuildValue("(Nn)", result, size);
-}
-
-static PyObject *
-unicode_asucs4(PyObject *self, PyObject *args)
-{
- PyObject *unicode, *result;
- Py_UCS4 *buffer;
- int copy_null;
- Py_ssize_t str_len, buf_len;
-
- if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) {
- return NULL;
- }
-
- buf_len = str_len + 1;
- buffer = PyMem_NEW(Py_UCS4, buf_len);
- if (buffer == NULL) {
- return PyErr_NoMemory();
- }
- memset(buffer, 0, sizeof(Py_UCS4)*buf_len);
- buffer[str_len] = 0xffffU;
-
- if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
- PyMem_Free(buffer);
- return NULL;
- }
-
- result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
- PyMem_Free(buffer);
- return result;
-}
-
-static PyObject *
-unicode_asutf8(PyObject *self, PyObject *args)
-{
- PyObject *unicode;
- const char *buffer;
-
- if (!PyArg_ParseTuple(args, "U", &unicode)) {
- return NULL;
- }
-
- buffer = PyUnicode_AsUTF8(unicode);
- if (buffer == NULL) {
- return NULL;
- }
-
- return PyBytes_FromString(buffer);
-}
-
-static PyObject *
-unicode_asutf8andsize(PyObject *self, PyObject *args)
-{
- PyObject *unicode, *result;
- const char *buffer;
- Py_ssize_t utf8_len;
-
- if(!PyArg_ParseTuple(args, "U", &unicode)) {
- return NULL;
- }
-
- buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len);
- if (buffer == NULL) {
- return NULL;
- }
-
- result = PyBytes_FromString(buffer);
- if (result == NULL) {
- return NULL;
- }
-
- return Py_BuildValue("(Nn)", result, utf8_len);
-}
-
-static PyObject *
-unicode_findchar(PyObject *self, PyObject *args)
-{
- PyObject *str;
- int direction;
- unsigned int ch;
- Py_ssize_t result;
- Py_ssize_t start, end;
-
- if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch,
- &start, &end, &direction)) {
- return NULL;
- }
-
- result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction);
- if (result == -2)
- return NULL;
- else
- return PyLong_FromSsize_t(result);
-}
-
-static PyObject *
-unicode_copycharacters(PyObject *self, PyObject *args)
-{
- PyObject *from, *to, *to_copy;
- Py_ssize_t from_start, to_start, how_many, copied;
-
- if (!PyArg_ParseTuple(args, "UnOnn:unicode_copycharacters", &to, &to_start,
- &from, &from_start, &how_many)) {
- return NULL;
- }
-
- if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to),
- PyUnicode_MAX_CHAR_VALUE(to)))) {
- return NULL;
- }
- if (PyUnicode_Fill(to_copy, 0, PyUnicode_GET_LENGTH(to_copy), 0U) < 0) {
- Py_DECREF(to_copy);
- return NULL;
- }
-
- if ((copied = PyUnicode_CopyCharacters(to_copy, to_start, from,
- from_start, how_many)) < 0) {
- Py_DECREF(to_copy);
- return NULL;
- }
-
- return Py_BuildValue("(Nn)", to_copy, copied);
-}
-
static PyObject *
getargs_w_star(PyObject *self, PyObject *args)
{
@@ -2164,27 +1936,6 @@ test_empty_argparse(PyObject *self, PyObject *Py_UNUSED(ignored))
}
}
-static PyObject *
-codec_incrementalencoder(PyObject *self, PyObject *args)
-{
- const char *encoding, *errors = NULL;
- if (!PyArg_ParseTuple(args, "s|s:test_incrementalencoder",
- &encoding, &errors))
- return NULL;
- return PyCodec_IncrementalEncoder(encoding, errors);
-}
-
-static PyObject *
-codec_incrementaldecoder(PyObject *self, PyObject *args)
-{
- const char *encoding, *errors = NULL;
- if (!PyArg_ParseTuple(args, "s|s:test_incrementaldecoder",
- &encoding, &errors))
- return NULL;
- return PyCodec_IncrementalDecoder(encoding, errors);
-}
-
-
/* Simple test of _PyLong_NumBits and _PyLong_Sign. */
static PyObject *
test_long_numbits(PyObject *self, PyObject *Py_UNUSED(ignored))
@@ -2847,63 +2598,6 @@ pending_threadfunc(PyObject *self, PyObject *arg)
Py_RETURN_TRUE;
}
-/* Some tests of PyUnicode_FromFormat(). This needs more tests. */
-static PyObject *
-test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
-{
- PyObject *result;
- char *msg;
-
-#define CHECK_1_FORMAT(FORMAT, TYPE) \
- result = PyUnicode_FromFormat(FORMAT, (TYPE)1); \
- if (result == NULL) \
- return NULL; \
- if (!_PyUnicode_EqualToASCIIString(result, "1")) { \
- msg = FORMAT " failed at 1"; \
- goto Fail; \
- } \
- Py_DECREF(result)
-
- CHECK_1_FORMAT("%d", int);
- CHECK_1_FORMAT("%ld", long);
- /* The z width modifier was added in Python 2.5. */
- CHECK_1_FORMAT("%zd", Py_ssize_t);
-
- /* The u type code was added in Python 2.5. */
- CHECK_1_FORMAT("%u", unsigned int);
- CHECK_1_FORMAT("%lu", unsigned long);
- CHECK_1_FORMAT("%zu", size_t);
-
- /* "%lld" and "%llu" support added in Python 2.7. */
- CHECK_1_FORMAT("%llu", unsigned long long);
- CHECK_1_FORMAT("%lld", long long);
-
- Py_RETURN_NONE;
-
- Fail:
- Py_XDECREF(result);
- return raiseTestError("test_string_from_format", msg);
-
-#undef CHECK_1_FORMAT
-}
-
-
-static PyObject *
-test_unicode_compare_with_ascii(PyObject *self, PyObject *Py_UNUSED(ignored)) {
- PyObject *py_s = PyUnicode_FromStringAndSize("str\0", 4);
- int result;
- if (py_s == NULL)
- return NULL;
- result = PyUnicode_CompareWithASCIIString(py_s, "str");
- Py_DECREF(py_s);
- if (!result) {
- PyErr_SetString(TestError, "Python string ending in NULL "
- "should not compare equal to c string.");
- return NULL;
- }
- Py_RETURN_NONE;
-}
-
/* This is here to provide a docstring for test_descr. */
static PyObject *
test_with_docstring(PyObject *self, PyObject *Py_UNUSED(ignored))
@@ -5823,12 +5517,9 @@ static PyMethodDef TestMethods[] = {
{"pyobject_repr_from_null", pyobject_repr_from_null, METH_NOARGS},
{"pyobject_str_from_null", pyobject_str_from_null, METH_NOARGS},
{"pyobject_bytes_from_null", pyobject_bytes_from_null, METH_NOARGS},
- {"test_string_from_format", (PyCFunction)test_string_from_format, METH_NOARGS},
{"test_with_docstring", test_with_docstring, METH_NOARGS,
PyDoc_STR("This is a pretty normal docstring.")},
{"test_string_to_double", test_string_to_double, METH_NOARGS},
- {"test_unicode_compare_with_ascii", test_unicode_compare_with_ascii,
- METH_NOARGS},
{"test_capsule", (PyCFunction)test_capsule, METH_NOARGS},
{"test_from_contiguous", (PyCFunction)test_from_contiguous, METH_NOARGS},
#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__)
@@ -5897,19 +5588,7 @@ static PyMethodDef TestMethods[] = {
{"getargs_et", getargs_et, METH_VARARGS},
{"getargs_es_hash", getargs_es_hash, METH_VARARGS},
{"getargs_et_hash", getargs_et_hash, METH_VARARGS},
- {"codec_incrementalencoder",
- (PyCFunction)codec_incrementalencoder, METH_VARARGS},
- {"codec_incrementaldecoder",
- (PyCFunction)codec_incrementaldecoder, METH_VARARGS},
{"test_s_code", test_s_code, METH_NOARGS},
- {"test_widechar", test_widechar, METH_NOARGS},
- {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
- {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
- {"unicode_asucs4", unicode_asucs4, METH_VARARGS},
- {"unicode_asutf8", unicode_asutf8, METH_VARARGS},
- {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
- {"unicode_findchar", unicode_findchar, METH_VARARGS},
- {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"_test_thread_state", test_thread_state, METH_VARARGS},
{"_pending_threadfunc", pending_threadfunc, METH_VARARGS},
#ifdef HAVE_GETTIMEOFDAY
@@ -6871,6 +6550,9 @@ PyInit__testcapi(void)
if (_PyTestCapi_Init_Heaptype(m) < 0) {
return NULL;
}
+ if (_PyTestCapi_Init_Unicode(m) < 0) {
+ return NULL;
+ }
PyState_AddModule(m, &_testcapimodule);
return m;
diff --git a/PCbuild/_testcapi.vcxproj b/PCbuild/_testcapi.vcxproj
index 0cb4e44cf734448..23bb5ec85274aef 100644
--- a/PCbuild/_testcapi.vcxproj
+++ b/PCbuild/_testcapi.vcxproj
@@ -97,6 +97,7 @@
+
diff --git a/PCbuild/_testcapi.vcxproj.filters b/PCbuild/_testcapi.vcxproj.filters
index 4da972f279c8a3e..fc2c4345fe142ed 100644
--- a/PCbuild/_testcapi.vcxproj.filters
+++ b/PCbuild/_testcapi.vcxproj.filters
@@ -21,6 +21,9 @@
Source Files
+
+ Source Files
+