From f4ee4da8ce39f262b506a0b102b69a3c6ae28c8b Mon Sep 17 00:00:00 2001 From: Jeremy Rimpo Date: Wed, 29 Nov 2023 00:10:10 -0600 Subject: [PATCH 1/2] Fix string conversion issues with emoji characters --- src/pybind11-qt/pybind11_qt_basic.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pybind11-qt/pybind11_qt_basic.cpp b/src/pybind11-qt/pybind11_qt_basic.cpp index 56ddf4f..d584eae 100644 --- a/src/pybind11-qt/pybind11_qt_basic.cpp +++ b/src/pybind11-qt/pybind11_qt_basic.cpp @@ -46,6 +46,10 @@ namespace pybind11::detail { PyObject* strPtr = PyUnicode_Check(objPtr) ? PyUnicode_AsUTF8String(objPtr) : objPtr; + if (!strPtr) { + return false; + } + // Extract the character data from the python string value = QString::fromUtf8(PyBytes_AsString(strPtr)); @@ -68,8 +72,7 @@ namespace pybind11::detail { handle /* parent */) { static_assert(sizeof(QChar) == 2); - return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, src.constData(), - src.length()); + return PyUnicode_FromString(src.toStdString().c_str()); } bool type_caster::load(handle src, bool) From aac872c5f5d73bf1ee739fce4f1c18c60b6b386b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mika=C3=ABl=20Capelle?= Date: Sat, 2 Dec 2023 17:31:25 +0100 Subject: [PATCH 2/2] Fix load/cast of QString to avoid issues with surrogate pairs. --- src/pybind11-qt/pybind11_qt_basic.cpp | 46 +++++++++++++++------------ tests/python/test_qt.cpp | 8 +++++ tests/python/test_qt.py | 9 ++++++ 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/pybind11-qt/pybind11_qt_basic.cpp b/src/pybind11-qt/pybind11_qt_basic.cpp index d584eae..d82ec2b 100644 --- a/src/pybind11-qt/pybind11_qt_basic.cpp +++ b/src/pybind11-qt/pybind11_qt_basic.cpp @@ -35,30 +35,36 @@ namespace pybind11::detail { */ bool type_caster::load(handle src, bool) { - PyObject* objPtr = src.ptr(); - if (!PyBytes_Check(objPtr) && !PyUnicode_Check(objPtr)) { - return false; + if (PyBytes_Check(objPtr)) { + value = QString::fromUtf8(PyBytes_AsString(objPtr)); + return true; } + else if (PyUnicode_Check(objPtr)) { + switch (PyUnicode_KIND(objPtr)) { + case PyUnicode_1BYTE_KIND: + value = QString::fromUtf8(PyUnicode_AsUTF8(objPtr)); + break; + case PyUnicode_2BYTE_KIND: + value = QString::fromUtf16( + reinterpret_cast(PyUnicode_2BYTE_DATA(objPtr)), + PyUnicode_GET_LENGTH(objPtr)); + break; + case PyUnicode_4BYTE_KIND: + value = QString::fromUcs4( + reinterpret_cast(PyUnicode_4BYTE_DATA(objPtr)), + PyUnicode_GET_LENGTH(objPtr)); + break; + default: + return false; + } - // Ensure the string uses 8-bit characters - PyObject* strPtr = - PyUnicode_Check(objPtr) ? PyUnicode_AsUTF8String(objPtr) : objPtr; - - if (!strPtr) { - return false; + return true; } - - // Extract the character data from the python string - value = QString::fromUtf8(PyBytes_AsString(strPtr)); - - // Deallocate local copy if one was made - if (strPtr != objPtr) { - Py_DecRef(strPtr); + else { + return false; } - - return true; } /** @@ -71,8 +77,8 @@ namespace pybind11::detail { handle type_caster::cast(QString src, return_value_policy /* policy */, handle /* parent */) { - static_assert(sizeof(QChar) == 2); - return PyUnicode_FromString(src.toStdString().c_str()); + return PyUnicode_DecodeUTF16(reinterpret_cast(src.utf16()), + 2 * src.length(), nullptr, 0); } bool type_caster::load(handle src, bool) diff --git a/tests/python/test_qt.cpp b/tests/python/test_qt.cpp index 8d7cb1e..35b1785 100644 --- a/tests/python/test_qt.cpp +++ b/tests/python/test_qt.cpp @@ -11,6 +11,14 @@ PYBIND11_MODULE(qt, m) { // QString + m.def("create_qstring_with_emoji", []() { + return QString::fromUtf16(u"\U0001F600"); + }); + + m.def("consume_qstring_with_emoji", [](QString const& qstring) { + return qstring.length(); + }); + m.def("qstring_to_stdstring", [](QString const& qstring) { return qstring.toStdString(); }); diff --git a/tests/python/test_qt.py b/tests/python/test_qt.py index cb35db5..5bbfd49 100644 --- a/tests/python/test_qt.py +++ b/tests/python/test_qt.py @@ -10,10 +10,19 @@ def test_qstring(): assert m.qstring_to_stdstring("éàüö") == "éàüö" assert m.stdstring_to_qstring("éàüö") == "éàüö" + assert m.qstring_to_stdstring("خالد") == "خالد" + assert m.qstring_to_stdstring("🌎") == "🌎" assert m.qstring_to_int("2") == 2 assert m.int_to_qstring(2) == "2" + emoji = m.create_qstring_with_emoji() + + assert emoji.encode("utf-16be", "surrogatepass") == b"\xd8\x3d\xde\x00" + assert m.consume_qstring_with_emoji(emoji) == 2 + + assert m.consume_qstring_with_emoji("🌎") == 2 + def test_qstringlist(): assert m.qstringlist_join([""], "--") == ""