diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 6f0e6067ef..71c8ba1ca2 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -973,7 +973,7 @@ class str : public object { if (PyUnicode_Check(m_ptr)) { temp = reinterpret_steal(PyUnicode_AsUTF8String(m_ptr)); if (!temp) - pybind11_fail("Unable to extract string contents! (encoding issue)"); + throw error_already_set(); } char *buffer; ssize_t length; diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index 6921796aa0..4d33d4aa1a 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -81,6 +81,9 @@ TEST_SUBMODULE(pytypes, m) { m.def("str_from_object", [](const py::object& obj) { return py::str(obj); }); m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); }); m.def("str_from_handle", [](py::handle h) { return py::str(h); }); + m.def("str_from_string_from_str", [](const py::str& obj) { + return py::str(static_cast(obj)); + }); m.def("str_format", []() { auto s1 = "{} + {} = {}"_s.format(1, 2, 3); diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index f17bf76b77..d005c8a35e 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -133,6 +133,14 @@ def __repr__(self): else: assert m.str_from_handle(malformed_utf8) == "b'\\x80'" + assert m.str_from_string_from_str("this is a str") == "this is a str" + ucs_surrogates_str = "\udcc3" + if env.PY2: + assert u"\udcc3" == m.str_from_string_from_str(ucs_surrogates_str) + else: + with pytest.raises(UnicodeEncodeError): + m.str_from_string_from_str(ucs_surrogates_str) + def test_bytes(doc): assert m.bytes_from_string().decode() == "foo"