Skip to content

Commit

Permalink
gh-111972: Make Unicode name C APIcapsule initialization thread-safe (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Eclips4 authored Nov 30, 2023
1 parent 81261fa commit 0785c68
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 20 deletions.
2 changes: 2 additions & 0 deletions Include/internal/pycore_ucnhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ typedef struct {

} _PyUnicode_Name_CAPI;

extern _PyUnicode_Name_CAPI* _PyUnicode_GetNameCAPI(void);

#ifdef __cplusplus
}
#endif
Expand Down
32 changes: 21 additions & 11 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5869,6 +5869,23 @@ PyUnicode_AsUTF16String(PyObject *unicode)
return _PyUnicode_EncodeUTF16(unicode, NULL, 0);
}

_PyUnicode_Name_CAPI *
_PyUnicode_GetNameCAPI(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyUnicode_Name_CAPI *ucnhash_capi;

ucnhash_capi = _Py_atomic_load_ptr(&interp->unicode.ucnhash_capi);
if (ucnhash_capi == NULL) {
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);

// It's fine if we overwite the value here. It's always the same value.
_Py_atomic_store_ptr(&interp->unicode.ucnhash_capi, ucnhash_capi);
}
return ucnhash_capi;
}

/* --- Unicode Escape Codec ----------------------------------------------- */

PyObject *
Expand All @@ -5884,7 +5901,6 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
_PyUnicode_Name_CAPI *ucnhash_capi;
PyInterpreterState *interp = _PyInterpreterState_GET();

// so we can remember if we've seen an invalid escape char or not
*first_invalid_escape = NULL;
Expand Down Expand Up @@ -6032,19 +6048,13 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,

/* \N{name} */
case 'N':
ucnhash_capi = interp->unicode.ucnhash_capi;
ucnhash_capi = _PyUnicode_GetNameCAPI();
if (ucnhash_capi == NULL) {
/* load the unicode data module */
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
if (ucnhash_capi == NULL) {
PyErr_SetString(
PyErr_SetString(
PyExc_UnicodeError,
"\\N escapes not supported (can't load unicodedata module)"
);
goto onError;
}
interp->unicode.ucnhash_capi = ucnhash_capi;
);
goto onError;
}

message = "malformed \\N character escape";
Expand Down
12 changes: 3 additions & 9 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -931,8 +931,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
return Py_BuildValue("(Nn)", res, end);
}

static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;

PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
{
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Expand All @@ -953,13 +951,9 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
if (!ucnhash_capi) {
/* load the unicode data module */
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
if (!ucnhash_capi) {
return NULL;
}
_PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI();
if (ucnhash_capi == NULL) {
return NULL;
}
for (i = start, ressize = 0; i < end; ++i) {
/* object is guaranteed to be "ready" */
Expand Down

0 comments on commit 0785c68

Please sign in to comment.