Skip to content

Commit

Permalink
Raise codec errors when casting to std::string (#2903)
Browse files Browse the repository at this point in the history
* Raise codec errors when casting to std::string

Allow the codec's exception to be raised instead of RuntimeError when
casting from py::str to std::string.

PY2 allows ucs surrogates in UTF-8 conversion

Signed-off-by: Shane Loretz <[email protected]>
Signed-off-by: Shane Loretz <[email protected]>

* Attempt to fix py2 error

* Revert all unicode literals

* Fixed

Co-authored-by: Aaron Gokaslan <[email protected]>
  • Loading branch information
sloretz and Skylion007 authored Jul 14, 2021
1 parent aca6c3b commit 7331d38
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ class str : public object {
if (PyUnicode_Check(m_ptr)) {
temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(m_ptr));
if (!temp)
pybind11_fail("Unable to extract string contents! (encoding issue)");
throw error_already_set();
}
char *buffer = nullptr;
ssize_t length = 0;
Expand Down
3 changes: 3 additions & 0 deletions tests/test_pytypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ TEST_SUBMODULE(pytypes, m) {
m.def("str_from_object", [](const py::object& obj) { return py::str(obj); });
m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); });
m.def("str_from_handle", [](py::handle h) { return py::str(h); });
m.def("str_from_string_from_str", [](const py::str& obj) {
return py::str(static_cast<std::string>(obj));
});

m.def("str_format", []() {
auto s1 = "{} + {} = {}"_s.format(1, 2, 3);
Expand Down
8 changes: 8 additions & 0 deletions tests/test_pytypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,14 @@ def __repr__(self):
else:
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"

assert m.str_from_string_from_str("this is a str") == "this is a str"
ucs_surrogates_str = u"\udcc3"
if env.PY2:
assert u"\udcc3" == m.str_from_string_from_str(ucs_surrogates_str)
else:
with pytest.raises(UnicodeEncodeError):
m.str_from_string_from_str(ucs_surrogates_str)


def test_bytes(doc):
assert m.bytes_from_string().decode() == "foo"
Expand Down

0 comments on commit 7331d38

Please sign in to comment.