From 0785c685599aaa052f85d6163872bdecb9c66486 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 30 Nov 2023 13:12:49 +0300 Subject: [PATCH] gh-111972: Make Unicode name C APIcapsule initialization thread-safe (#112249) --- Include/internal/pycore_ucnhash.h | 2 ++ Objects/unicodeobject.c | 32 ++++++++++++++++++++----------- Python/codecs.c | 12 +++--------- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_ucnhash.h b/Include/internal/pycore_ucnhash.h index 187dd68e7347ff..1561dfbb3150d3 100644 --- a/Include/internal/pycore_ucnhash.h +++ b/Include/internal/pycore_ucnhash.h @@ -28,6 +28,8 @@ typedef struct { } _PyUnicode_Name_CAPI; +extern _PyUnicode_Name_CAPI* _PyUnicode_GetNameCAPI(void); + #ifdef __cplusplus } #endif diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cffc06297a9aee..10022e23c04abf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5869,6 +5869,23 @@ PyUnicode_AsUTF16String(PyObject *unicode) return _PyUnicode_EncodeUTF16(unicode, NULL, 0); } +_PyUnicode_Name_CAPI * +_PyUnicode_GetNameCAPI(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_Name_CAPI *ucnhash_capi; + + ucnhash_capi = _Py_atomic_load_ptr(&interp->unicode.ucnhash_capi); + if (ucnhash_capi == NULL) { + ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( + PyUnicodeData_CAPSULE_NAME, 1); + + // It's fine if we overwite the value here. It's always the same value. + _Py_atomic_store_ptr(&interp->unicode.ucnhash_capi, ucnhash_capi); + } + return ucnhash_capi; +} + /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * @@ -5884,7 +5901,6 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; _PyUnicode_Name_CAPI *ucnhash_capi; - PyInterpreterState *interp = _PyInterpreterState_GET(); // so we can remember if we've seen an invalid escape char or not *first_invalid_escape = NULL; @@ -6032,19 +6048,13 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, /* \N{name} */ case 'N': - ucnhash_capi = interp->unicode.ucnhash_capi; + ucnhash_capi = _PyUnicode_GetNameCAPI(); if (ucnhash_capi == NULL) { - /* load the unicode data module */ - ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (ucnhash_capi == NULL) { - PyErr_SetString( + PyErr_SetString( PyExc_UnicodeError, "\\N escapes not supported (can't load unicodedata module)" - ); - goto onError; - } - interp->unicode.ucnhash_capi = ucnhash_capi; + ); + goto onError; } message = "malformed \\N character escape"; diff --git a/Python/codecs.c b/Python/codecs.c index 545bf82e00dca1..d8fe7b22063a80 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -931,8 +931,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } -static _PyUnicode_Name_CAPI *ucnhash_capi = NULL; - PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { @@ -953,13 +951,9 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; - if (!ucnhash_capi) { - /* load the unicode data module */ - ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (!ucnhash_capi) { - return NULL; - } + _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI(); + if (ucnhash_capi == NULL) { + return NULL; } for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */