Skip to content

Commit

Permalink
gh-91924: Optimize unicode_check_encoding_errors() (#93200)
Browse files Browse the repository at this point in the history
Avoid _PyCodec_Lookup() and PyCodec_LookupError() for most common
built-in encodings and error handlers to avoid creating a temporary
Unicode string object, whereas these encodings and error handlers are
known to be valid.
  • Loading branch information
vstinner authored May 26, 2022
1 parent efc5d37 commit 5f8c3fb
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,15 +454,29 @@ unicode_check_encoding_errors(const char *encoding, const char *errors)
return 0;
}

if (encoding != NULL) {
if (encoding != NULL
// Fast path for the most common built-in encodings. Even if the codec
// is cached, _PyCodec_Lookup() decodes the bytes string from UTF-8 to
// create a temporary Unicode string (the key in the cache).
&& strcmp(encoding, "utf-8") != 0
&& strcmp(encoding, "utf8") != 0
&& strcmp(encoding, "ascii") != 0)
{
PyObject *handler = _PyCodec_Lookup(encoding);
if (handler == NULL) {
return -1;
}
Py_DECREF(handler);
}

if (errors != NULL) {
if (errors != NULL
// Fast path for the most common built-in error handlers.
&& strcmp(errors, "strict") != 0
&& strcmp(errors, "ignore") != 0
&& strcmp(errors, "replace") != 0
&& strcmp(errors, "surrogateescape") != 0
&& strcmp(errors, "surrogatepass") != 0)
{
PyObject *handler = PyCodec_LookupError(errors);
if (handler == NULL) {
return -1;
Expand Down

0 comments on commit 5f8c3fb

Please sign in to comment.