Skip to content

Commit

Permalink
bpo-1635741: _PyUnicode_Name_CAPI moves to internal C API (pythonGH-2…
Browse files Browse the repository at this point in the history
…2713)

The private _PyUnicode_Name_CAPI structure of the PyCapsule API
unicodedata.ucnhash_CAPI moves to the internal C API. Moreover, the
structure gets a new state member which must be passed to the
getcode() and getname() functions.

* Move Include/ucnhash.h to Include/internal/pycore_ucnhash.h
* unicodedata module is now built with Py_BUILD_CORE_MODULE.
* unicodedata: move hashAPI variable into unicodedata_module_state.
  • Loading branch information
vstinner authored and adorilson committed Mar 11, 2021
1 parent 777ff55 commit 5ad787a
Show file tree
Hide file tree
Showing 11 changed files with 74 additions and 49 deletions.
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,12 @@ Porting to Python 3.10
Unicode object without initial data.
(Contributed by Inada Naoki in :issue:`36346`.)

* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover,
the structure gets a new ``state`` member which must be passed to the
``getcode()`` and ``getname()`` functions.
(Contributed by Victor Stinner in :issue:`1635741`.)

Deprecated
----------

Expand Down
22 changes: 15 additions & 7 deletions Include/ucnhash.h → Include/internal/pycore_ucnhash.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
/* Unicode name database interface */
#ifndef Py_LIMITED_API
#ifndef Py_UCNHASH_H
#define Py_UCNHASH_H
#ifndef Py_INTERNAL_UCNHASH_H
#define Py_INTERNAL_UCNHASH_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

/* revised ucnhash CAPI interface (exported through a "wrapper") */

#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
Expand All @@ -15,22 +18,27 @@ typedef struct {
/* Size of this struct */
int size;

// state which must be passed as the first parameter to getname()
// and getcode()
void *state;

/* Get name for a given character code. Returns non-zero if
success, zero if not. Does not set Python exceptions.
If self is NULL, data come from the default version of the database.
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
int (*getname)(void *state, PyObject *self, Py_UCS4 code,
char* buffer, int buflen,
int with_alias_and_seq);

/* Get character code for a given name. Same error handling
as for getname. */
int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code,
int (*getcode)(void *state, PyObject *self,
const char* name, int namelen, Py_UCS4* code,
int with_named_seq);

} _PyUnicode_Name_CAPI;

#ifdef __cplusplus
}
#endif
#endif /* !Py_UCNHASH_H */
#endif /* !Py_LIMITED_API */
#endif /* !Py_INTERNAL_UCNHASH_H */
2 changes: 1 addition & 1 deletion Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/traceback.h \
$(srcdir)/Include/tracemalloc.h \
$(srcdir)/Include/tupleobject.h \
$(srcdir)/Include/ucnhash.h \
$(srcdir)/Include/unicodeobject.h \
$(srcdir)/Include/warnings.h \
$(srcdir)/Include/weakrefobject.h \
Expand Down Expand Up @@ -1129,6 +1128,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_sysmodule.h \
$(srcdir)/Include/internal/pycore_traceback.h \
$(srcdir)/Include/internal/pycore_tuple.h \
$(srcdir)/Include/internal/pycore_ucnhash.h \
$(srcdir)/Include/internal/pycore_unionobject.h \
$(srcdir)/Include/internal/pycore_warnings.h \
$(DTRACE_HEADERS)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
structure gets a new ``state`` member which must be passed to the
``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.
2 changes: 1 addition & 1 deletion Modules/Setup
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ _symtable symtablemodule.c
#_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c # _json speedups
#_statistics _statisticsmodule.c # statistics accelerator

#unicodedata unicodedata.c # static Unicode character database
#unicodedata unicodedata.c -DPy_BUILD_CORE_BUILTIN # static Unicode character database


# Modules with some UNIX dependencies -- on by default:
Expand Down
28 changes: 15 additions & 13 deletions Modules/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#define PY_SSIZE_T_CLEAN

#include "Python.h"
#include "ucnhash.h"
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "structmember.h" // PyMemberDef

#include <stdbool.h>
Expand Down Expand Up @@ -97,6 +97,8 @@ typedef struct {
// Borrowed reference to &UCD_Type. It is used to prepare the code
// to convert the UCD_Type static type to a heap type.
PyTypeObject *ucd_type;

_PyUnicode_Name_CAPI capi;
} unicodedata_module_state;

// bpo-1635741: Temporary global state until the unicodedata module
Expand Down Expand Up @@ -1180,10 +1182,11 @@ _getucname(unicodedata_module_state *state, PyObject *self,
}

static int
capi_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code,
char* buffer, int buflen,
int with_alias_and_seq)
{
unicodedata_module_state *state = &global_module_state;
unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);

}
Expand Down Expand Up @@ -1323,21 +1326,15 @@ _getcode(unicodedata_module_state *state, PyObject* self,
}

static int
capi_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
capi_getcode(void *state_raw, PyObject* self,
const char* name, int namelen, Py_UCS4* code,
int with_named_seq)
{
unicodedata_module_state *state = &global_module_state;
unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
return _getcode(state, self, name, namelen, code, with_named_seq);

}

static const _PyUnicode_Name_CAPI hashAPI =
{
sizeof(_PyUnicode_Name_CAPI),
capi_getucname,
capi_getcode
};

/* -------------------------------------------------------------------- */
/* Python bindings */

Expand Down Expand Up @@ -1510,6 +1507,11 @@ PyInit_unicodedata(void)
PyObject *m, *v;
unicodedata_module_state *state = &global_module_state;

state->capi.size = sizeof(_PyUnicode_Name_CAPI);
state->capi.state = state;
state->capi.getname = capi_getucname;
state->capi.getcode = capi_getcode;

Py_SET_TYPE(&UCD_Type, &PyType_Type);
state->ucd_type = &UCD_Type;

Expand All @@ -1528,7 +1530,7 @@ PyInit_unicodedata(void)
PyModule_AddObject(m, "ucd_3_2_0", v);

/* Export C API */
v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL);
v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
if (v != NULL)
PyModule_AddObject(m, "ucnhash_CAPI", v);
return m;
Expand Down
31 changes: 16 additions & 15 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "ucnhash.h" // _PyUnicode_Name_CAPI
#include "stringlib/eq.h" // unicode_eq()
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "stringlib/eq.h" // unicode_eq()

#ifdef MS_WINDOWS
#include <windows.h>
Expand Down Expand Up @@ -6344,7 +6344,7 @@ PyUnicode_AsUTF16String(PyObject *unicode)

/* --- Unicode Escape Codec ----------------------------------------------- */

static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;

PyObject *
_PyUnicode_DecodeUnicodeEscape(const char *s,
Expand Down Expand Up @@ -6497,11 +6497,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,

/* \N{name} */
case 'N':
if (ucnhash_CAPI == NULL) {
if (ucnhash_capi == NULL) {
/* load the unicode data module */
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
if (ucnhash_CAPI == NULL) {
if (ucnhash_capi == NULL) {
PyErr_SetString(
PyExc_UnicodeError,
"\\N escapes not supported (can't load unicodedata module)"
Expand All @@ -6523,7 +6523,8 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
s++;
ch = 0xffffffff; /* in case 'getcode' messes up */
if (namelen <= INT_MAX &&
ucnhash_CAPI->getcode(NULL, start, (int)namelen,
ucnhash_capi->getcode(ucnhash_capi->state, NULL,
start, (int)namelen,
&ch, 0)) {
assert(ch <= MAX_UNICODE);
WRITE_CHAR(ch);
Expand Down
2 changes: 1 addition & 1 deletion PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@
<ClInclude Include="..\Include\internal\pycore_sysmodule.h" />
<ClInclude Include="..\Include\internal\pycore_traceback.h" />
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
<ClInclude Include="..\Include\internal\pycore_unionobject.h" />
<ClInclude Include="..\Include\internal\pycore_warnings.h" />
<ClInclude Include="..\Include\interpreteridobject.h" />
Expand Down Expand Up @@ -252,7 +253,6 @@
<ClInclude Include="..\Include\traceback.h" />
<ClInclude Include="..\Include\tracemalloc.h" />
<ClInclude Include="..\Include\tupleobject.h" />
<ClInclude Include="..\Include\ucnhash.h" />
<ClInclude Include="..\Include\unicodeobject.h" />
<ClInclude Include="..\Include\weakrefobject.h" />
<ClInclude Include="..\Modules\_math.h" />
Expand Down
6 changes: 3 additions & 3 deletions PCbuild/pythoncore.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,6 @@
<ClInclude Include="..\Include\tupleobject.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\ucnhash.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\unicodeobject.h">
<Filter>Include</Filter>
</ClInclude>
Expand Down Expand Up @@ -573,6 +570,9 @@
<ClInclude Include="..\Include\internal\pycore_tuple.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_ucnhash.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_unionobject.h">
<Filter>Include\internal</Filter>
</ClInclude>
Expand Down
17 changes: 10 additions & 7 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
#include "Python.h"
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "ucnhash.h"
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include <ctype.h>

const char *Py_hexdigits = "0123456789abcdef";
Expand Down Expand Up @@ -954,7 +954,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
return Py_BuildValue("(Nn)", res, end);
}

static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;

PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
{
Expand All @@ -976,17 +976,19 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
if (!ucnhash_CAPI) {
if (!ucnhash_capi) {
/* load the unicode data module */
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
if (!ucnhash_CAPI)
if (!ucnhash_capi) {
return NULL;
}
}
for (i = start, ressize = 0; i < end; ++i) {
/* object is guaranteed to be "ready" */
c = PyUnicode_READ_CHAR(object, i);
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
c, buffer, sizeof(buffer), 1)) {
replsize = 1+1+1+(int)strlen(buffer)+1;
}
else if (c >= 0x10000) {
Expand All @@ -1009,7 +1011,8 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
i < end; ++i) {
c = PyUnicode_READ_CHAR(object, i);
*outp++ = '\\';
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
c, buffer, sizeof(buffer), 1)) {
*outp++ = 'N';
*outp++ = '{';
strcpy((char *)outp, buffer);
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,8 @@ def detect_simple_extensions(self):
self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']))
# static Unicode character database
self.add(Extension('unicodedata', ['unicodedata.c'],
depends=['unicodedata_db.h', 'unicodename_db.h']))
depends=['unicodedata_db.h', 'unicodename_db.h'],
extra_compile_args=['-DPy_BUILD_CORE_MODULE']))
# _opcode module
self.add(Extension('_opcode', ['_opcode.c']))
# asyncio speedups
Expand Down

0 comments on commit 5ad787a

Please sign in to comment.