From 1801811c07d298879f2e8c04195f7682b786924d Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 29 Jul 2020 18:53:26 -0400 Subject: [PATCH 01/28] Call PySys_SetArgv when initializing interpreter. --- include/pybind11/embed.h | 104 ++++++++++++++++++++++++-- tests/test_embed/test_interpreter.cpp | 24 ++++++ tests/test_embed/test_interpreter.py | 4 + 3 files changed, 127 insertions(+), 5 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index eae86c714c..a44e23f232 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -87,6 +87,95 @@ struct embedded_module { } }; +/// Python 2.x/3.x-compatible version of `PySys_SetArgv` +inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_path) { + // Before it was special-cased in python 3.8, passing an empty or null argv + // caused a segfault, so we have to reimplement the special case ourselves. + char** safe_argv = argv; + if (nullptr == argv || argc <= 0) { + safe_argv = new char*[1]; + if (nullptr == safe_argv) return; + safe_argv[0] = new char[1]; + if (nullptr == safe_argv[0]) { + delete[] safe_argv; + return; + } + safe_argv[0][0] = '\0'; + argc = 1; + } +#if PY_MAJOR_VERSION >= 3 + // SetArgv* on python 3 takes wchar_t, so we have to convert. + wchar_t** widened_argv = new wchar_t*[static_cast(argc)]; + for (int ii = 0; ii < argc; ++ii) { +# if PY_MINOR_VERSION >= 5 + // From Python 3.5 onwards, we're supposed to use Py_DecodeLocale to + // generate the wchar_t version of argv. + widened_argv[ii] = Py_DecodeLocale(safe_argv[ii], nullptr); +# define FREE_WIDENED_ARG(X) PyMem_RawFree(X) +# else + // Before Python 3.5, we're stuck with mbstowcs, which may or may not + // actually work. Mercifully, pyconfig.h provides this define: +# ifdef HAVE_BROKEN_MBSTOWCS + size_t count = strlen(safe_argv[ii]); +# else + size_t count = mbstowcs(nullptr, safe_argv[ii], 0); +# endif + widened_argv[ii] = nullptr; + if (count != static_cast(-1)) { + widened_argv[ii] = new wchar_t[count + 1]; + mbstowcs(widened_argv[ii], safe_argv[ii], count + 1); + } +# define FREE_WIDENED_ARG(X) delete[] X +# endif + if (nullptr == widened_argv[ii]) { + // Either we ran out of memory or had a unicode encoding issue. + // Free what we've encoded so far and bail. + for (--ii; ii >= 0; --ii) + FREE_WIDENED_ARG(widened_argv[ii]); + return; + } + } + +# if PY_MINOR_VERSION < 1 || (PY_MINOR_VERSION == 1 && PY_MICRO_VERSION < 3) +# define NEED_PYRUN_TO_SANITIZE_PATH 1 + // don't have SetArgvEx yet + PySys_SetArgv(argc, widened_argv); +# else + PySys_SetArgvEx(argc, widened_argv, add_current_dir_to_path ? 1 : 0); +# endif + + // PySys_SetArgv makes new PyUnicode objects so we can clean up this memory + if (nullptr != widened_argv) { + for (int ii = 0; ii < argc; ++ii) + if (nullptr != widened_argv[ii]) + FREE_WIDENED_ARG(widened_argv[ii]); + delete[] widened_argv; + } +# undef FREE_WIDENED_ARG +#else + // python 2.x +# if PY_MINOR_VERSION < 6 || (PY_MINOR_VERSION == 6 && PY_MICRO_VERSION < 6) +# define NEED_PYRUN_TO_SANITIZE_PATH 1 + // don't have SetArgvEx yet + PySys_SetArgv(argc, safe_argv); +# else + PySys_SetArgvEx(argc, safe_argv, add_current_dir_to_path ? 1 : 0); +# endif +#endif + +#ifdef NEED_PYRUN_TO_SANITIZE_PATH +# undef NEED_PYRUN_TO_SANITIZE_PATH + if (!add_current_dir_to_path) + PyRun_SimpleString("import sys; sys.path.pop(0)\n"); +#endif + + // if we allocated new memory to make safe_argv, we need to free it + if (safe_argv != argv) { + delete[] safe_argv[0]; + delete[] safe_argv; + } +} + PYBIND11_NAMESPACE_END(detail) /** \rst @@ -102,14 +191,16 @@ PYBIND11_NAMESPACE_END(detail) .. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx \endrst */ -inline void initialize_interpreter(bool init_signal_handlers = true) { +inline void initialize_interpreter(bool init_signal_handlers = true, + int argc = 0, + char** argv = nullptr, + bool add_current_dir_to_path = true) { if (Py_IsInitialized()) pybind11_fail("The interpreter is already running"); Py_InitializeEx(init_signal_handlers ? 1 : 0); - // Make .py files in the working directory available by default - module::import("sys").attr("path").cast().append("."); + detail::set_interpreter_argv(argc, argv, add_current_dir_to_path); } /** \rst @@ -182,8 +273,11 @@ inline void finalize_interpreter() { \endrst */ class scoped_interpreter { public: - scoped_interpreter(bool init_signal_handlers = true) { - initialize_interpreter(init_signal_handlers); + scoped_interpreter(bool init_signal_handlers = true, + int argc = 0, + char** argv = nullptr, + bool add_current_dir_to_path = true) { + initialize_interpreter(init_signal_handlers, argc, argv, add_current_dir_to_path); } scoped_interpreter(const scoped_interpreter &) = delete; diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 222bd565fb..4c4b256780 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -22,6 +22,7 @@ class Widget { std::string the_message() const { return message; } virtual int the_answer() const = 0; + virtual std::string argv0() const = 0; private: std::string message; @@ -31,6 +32,7 @@ class PyWidget final : public Widget { using Widget::Widget; int the_answer() const override { PYBIND11_OVERLOAD_PURE(int, Widget, the_answer); } + std::string argv0() const override { PYBIND11_OVERLOAD_PURE(std::string, Widget, argv0); } }; PYBIND11_EMBEDDED_MODULE(widget_module, m) { @@ -282,3 +284,25 @@ TEST_CASE("Reload module from file") { result = module.attr("test")().cast(); REQUIRE(result == 2); } + +TEST_CASE("sys.argv gets initialized properly") { + py::finalize_interpreter(); + { + py::scoped_interpreter default_scope; + auto module = py::module::import("test_interpreter"); + auto py_widget = module.attr("DerivedWidget")("The question"); + const auto &cpp_widget = py_widget.cast(); + REQUIRE(cpp_widget.argv0() == ""); + } + + { + char* argv[] = { strdup("a.out") }; + py::scoped_interpreter argv_scope(true, 1, argv); + free(argv[0]); + auto module = py::module::import("test_interpreter"); + auto py_widget = module.attr("DerivedWidget")("The question"); + const auto &cpp_widget = py_widget.cast(); + REQUIRE(cpp_widget.argv0() == "a.out"); + } + py::initialize_interpreter(); +} diff --git a/tests/test_embed/test_interpreter.py b/tests/test_embed/test_interpreter.py index 6174ede446..7b1f31ac58 100644 --- a/tests/test_embed/test_interpreter.py +++ b/tests/test_embed/test_interpreter.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from widget_module import Widget +import sys class DerivedWidget(Widget): @@ -8,3 +9,6 @@ def __init__(self, message): def the_answer(self): return 42 + + def argv0(self): + return sys.argv[0] From c3e96c26ce64641096db3efee4408e2e21ad0600 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 29 Jul 2020 20:39:23 -0400 Subject: [PATCH 02/28] Document argc/argv parameters in initialize_interpreter. --- include/pybind11/embed.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index a44e23f232..15d7830f40 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -181,15 +181,21 @@ PYBIND11_NAMESPACE_END(detail) /** \rst Initialize the Python interpreter. No other pybind11 or CPython API functions can be called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The - optional parameter can be used to skip the registration of signal handlers (see the - `Python documentation`_ for details). Calling this function again after the interpreter - has already been initialized is a fatal error. + optional `init_signal_handlers` parameter can be used to skip the registration of + signal handlers (see the `Python documentation`_ for details). Calling this function + again after the interpreter has already been initialized is a fatal error. If initializing the Python interpreter fails, then the program is terminated. (This is controlled by the CPython runtime and is an exception to pybind11's normal behavior of throwing exceptions on errors.) + The remaining optional parameters, `argc`, `argv`, and `add_current_dir_to_path` are + used to populate ``sys.argv`` and ``sys.path``. + See the |PySys_SetArgvEx documentation|_ for details. + .. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx + .. |PySys_SetArgvEx documentation| replace:: ``PySys_SetArgvEx`` documentation + .. _PySys_SetArgvEx documentation: https://docs.python.org/3/c-api/init.html#c.PySys_SetArgvEx \endrst */ inline void initialize_interpreter(bool init_signal_handlers = true, int argc = 0, @@ -262,6 +268,8 @@ inline void finalize_interpreter() { Scope guard version of `initialize_interpreter` and `finalize_interpreter`. This a move-only guard and only a single instance can exist. + See `initialize_interpreter` for a discussion of its constructor arguments. + .. code-block:: cpp #include From 5145e58235cee7037d4804500e2faf80196e0fb5 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 29 Jul 2020 22:56:59 -0400 Subject: [PATCH 03/28] Remove manual memory management from set_interpreter_argv in favor of smart pointers. --- include/pybind11/embed.h | 41 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 15d7830f40..c0d316ae92 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -11,6 +11,8 @@ #include "pybind11.h" #include "eval.h" +#include +#include #if defined(PYPY_VERSION) # error Embedding the interpreter is not supported with PyPy @@ -92,26 +94,34 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ // Before it was special-cased in python 3.8, passing an empty or null argv // caused a segfault, so we have to reimplement the special case ourselves. char** safe_argv = argv; + std::unique_ptr argv_guard; + std::unique_ptr argv_inner_guard; if (nullptr == argv || argc <= 0) { - safe_argv = new char*[1]; - if (nullptr == safe_argv) return; - safe_argv[0] = new char[1]; - if (nullptr == safe_argv[0]) { - delete[] safe_argv; - return; - } + argv_guard = std::unique_ptr(safe_argv = new char*[1]); + argv_inner_guard = std::unique_ptr(safe_argv[0] = new char[1]); safe_argv[0][0] = '\0'; argc = 1; } #if PY_MAJOR_VERSION >= 3 // SetArgv* on python 3 takes wchar_t, so we have to convert. - wchar_t** widened_argv = new wchar_t*[static_cast(argc)]; + std::unique_ptr widened_argv(new wchar_t*[static_cast(argc)]); +# if PY_MINOR_VERSION >= 5 + // Use of PyMem_RawFree here instead of PyMem_Free is as recommended by the python + // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale + struct pymem_rawfree_deleter { + void operator()(void* ptr) const { + PyMem_RawFree(ptr); + } + }; + std::vector< std::unique_ptr > widened_argv_entries; +# else + std::vector< std::unique_ptr > widened_argv_entries; +# endif for (int ii = 0; ii < argc; ++ii) { # if PY_MINOR_VERSION >= 5 // From Python 3.5 onwards, we're supposed to use Py_DecodeLocale to // generate the wchar_t version of argv. widened_argv[ii] = Py_DecodeLocale(safe_argv[ii], nullptr); -# define FREE_WIDENED_ARG(X) PyMem_RawFree(X) # else // Before Python 3.5, we're stuck with mbstowcs, which may or may not // actually work. Mercifully, pyconfig.h provides this define: @@ -125,15 +135,12 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ widened_argv[ii] = new wchar_t[count + 1]; mbstowcs(widened_argv[ii], safe_argv[ii], count + 1); } -# define FREE_WIDENED_ARG(X) delete[] X # endif if (nullptr == widened_argv[ii]) { // Either we ran out of memory or had a unicode encoding issue. - // Free what we've encoded so far and bail. - for (--ii; ii >= 0; --ii) - FREE_WIDENED_ARG(widened_argv[ii]); return; - } + } else + widened_argv_entries.emplace_back(widened_argv[ii]); } # if PY_MINOR_VERSION < 1 || (PY_MINOR_VERSION == 1 && PY_MICRO_VERSION < 3) @@ -168,12 +175,6 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ if (!add_current_dir_to_path) PyRun_SimpleString("import sys; sys.path.pop(0)\n"); #endif - - // if we allocated new memory to make safe_argv, we need to free it - if (safe_argv != argv) { - delete[] safe_argv[0]; - delete[] safe_argv; - } } PYBIND11_NAMESPACE_END(detail) From 62243f27e428eef6af2942a956f557158d24fe45 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 29 Jul 2020 22:57:45 -0400 Subject: [PATCH 04/28] Use size_t for indexers in set_interpreter_argv. --- include/pybind11/embed.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index c0d316ae92..05668e523d 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -102,9 +102,10 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ safe_argv[0][0] = '\0'; argc = 1; } + size_t argv_size = static_cast(argc); #if PY_MAJOR_VERSION >= 3 // SetArgv* on python 3 takes wchar_t, so we have to convert. - std::unique_ptr widened_argv(new wchar_t*[static_cast(argc)]); + std::unique_ptr widened_argv(new wchar_t*[argv_size]); # if PY_MINOR_VERSION >= 5 // Use of PyMem_RawFree here instead of PyMem_Free is as recommended by the python // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale @@ -117,7 +118,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ # else std::vector< std::unique_ptr > widened_argv_entries; # endif - for (int ii = 0; ii < argc; ++ii) { + for (size_t ii = 0; ii < argv_size; ++ii) { # if PY_MINOR_VERSION >= 5 // From Python 3.5 onwards, we're supposed to use Py_DecodeLocale to // generate the wchar_t version of argv. From 32c14457ccca2177ca5bc6d18e91f1a8c4264f26 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 29 Jul 2020 22:59:03 -0400 Subject: [PATCH 05/28] Minimize macros for flow control in set_interpreter_argv. --- include/pybind11/embed.h | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 05668e523d..0092a5639b 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -138,43 +138,29 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ } # endif if (nullptr == widened_argv[ii]) { - // Either we ran out of memory or had a unicode encoding issue. + // A null here indicates a character-encoding failure or the python + // interpreter out of memory. Give up. return; } else widened_argv_entries.emplace_back(widened_argv[ii]); } -# if PY_MINOR_VERSION < 1 || (PY_MINOR_VERSION == 1 && PY_MICRO_VERSION < 3) -# define NEED_PYRUN_TO_SANITIZE_PATH 1 - // don't have SetArgvEx yet - PySys_SetArgv(argc, widened_argv); -# else - PySys_SetArgvEx(argc, widened_argv, add_current_dir_to_path ? 1 : 0); -# endif - - // PySys_SetArgv makes new PyUnicode objects so we can clean up this memory - if (nullptr != widened_argv) { - for (int ii = 0; ii < argc; ++ii) - if (nullptr != widened_argv[ii]) - FREE_WIDENED_ARG(widened_argv[ii]); - delete[] widened_argv; - } -# undef FREE_WIDENED_ARG + auto pysys_argv = widened_argv.get(); #else // python 2.x -# if PY_MINOR_VERSION < 6 || (PY_MINOR_VERSION == 6 && PY_MICRO_VERSION < 6) -# define NEED_PYRUN_TO_SANITIZE_PATH 1 - // don't have SetArgvEx yet - PySys_SetArgv(argc, safe_argv); -# else - PySys_SetArgvEx(argc, safe_argv, add_current_dir_to_path ? 1 : 0); -# endif + auto pysys_argv = safe_argv; #endif -#ifdef NEED_PYRUN_TO_SANITIZE_PATH -# undef NEED_PYRUN_TO_SANITIZE_PATH +# if PY_MAJOR_VERSION == 2 && (PY_MINOR_VERSION < 6 || (PY_MINOR_VERSION == 6 && PY_MICRO_VERSION < 6)) || \ + PY_MAJOR_VERSION == 3 && (PY_MINOR_VERSION < 1 || (PY_MINOR_VERSION == 1 && PY_MICRO_VERSION < 3)) + // These python versions don't have PySys_SetArgvEx, so we have to use the workaround + // recommended by https://docs.python.org/3.5/c-api/init.html#c.PySys_SetArgvEx + // to work around CVE-2008-5983 + PySys_SetArgv(argc, pysys_argv); if (!add_current_dir_to_path) PyRun_SimpleString("import sys; sys.path.pop(0)\n"); +#else + PySys_SetArgvEx(argc, pysys_argv, add_current_dir_to_path ? 1 : 0); #endif } From 3ff967da87c92afbde35f64e97abbba33f15b848 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 29 Jul 2020 23:05:06 -0400 Subject: [PATCH 06/28] Fix 'unused variable' warning on Py2 --- include/pybind11/embed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 0092a5639b..0fc42f194b 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -102,8 +102,8 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ safe_argv[0][0] = '\0'; argc = 1; } - size_t argv_size = static_cast(argc); #if PY_MAJOR_VERSION >= 3 + size_t argv_size = static_cast(argc); // SetArgv* on python 3 takes wchar_t, so we have to convert. std::unique_ptr widened_argv(new wchar_t*[argv_size]); # if PY_MINOR_VERSION >= 5 From 27ad85e3872498366c63dd6e9477ff5343672e6b Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 30 Jul 2020 00:49:07 -0400 Subject: [PATCH 07/28] whitespace --- include/pybind11/embed.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 0fc42f194b..d48afe3579 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -151,8 +151,8 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ auto pysys_argv = safe_argv; #endif -# if PY_MAJOR_VERSION == 2 && (PY_MINOR_VERSION < 6 || (PY_MINOR_VERSION == 6 && PY_MICRO_VERSION < 6)) || \ - PY_MAJOR_VERSION == 3 && (PY_MINOR_VERSION < 1 || (PY_MINOR_VERSION == 1 && PY_MICRO_VERSION < 3)) +#if PY_MAJOR_VERSION == 2 && (PY_MINOR_VERSION < 6 || (PY_MINOR_VERSION == 6 && PY_MICRO_VERSION < 6)) || \ + PY_MAJOR_VERSION == 3 && (PY_MINOR_VERSION < 1 || (PY_MINOR_VERSION == 1 && PY_MICRO_VERSION < 3)) // These python versions don't have PySys_SetArgvEx, so we have to use the workaround // recommended by https://docs.python.org/3.5/c-api/init.html#c.PySys_SetArgvEx // to work around CVE-2008-5983 From fb3de9f08f9185896ea7ac4a551db7d748d750dd Mon Sep 17 00:00:00 2001 From: Boris Staletic Date: Tue, 25 Aug 2020 16:46:03 -0400 Subject: [PATCH 08/28] Define wide_char_arg_deleter outside set_interpreter_argv. --- include/pybind11/embed.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index d48afe3579..1d63f066ca 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -89,6 +89,17 @@ struct embedded_module { } }; +struct wide_char_arg_deleter { + void operator()(void* ptr) const { +#if PY_VERSION_HEX >= 0x030500f0 + // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale + PyMem_RawFree(ptr); +#else + delete ptr; +#endif + } +}; + /// Python 2.x/3.x-compatible version of `PySys_SetArgv` inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_path) { // Before it was special-cased in python 3.8, passing an empty or null argv @@ -106,18 +117,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ size_t argv_size = static_cast(argc); // SetArgv* on python 3 takes wchar_t, so we have to convert. std::unique_ptr widened_argv(new wchar_t*[argv_size]); -# if PY_MINOR_VERSION >= 5 - // Use of PyMem_RawFree here instead of PyMem_Free is as recommended by the python - // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale - struct pymem_rawfree_deleter { - void operator()(void* ptr) const { - PyMem_RawFree(ptr); - } - }; - std::vector< std::unique_ptr > widened_argv_entries; -# else - std::vector< std::unique_ptr > widened_argv_entries; -# endif + std::vector< std::unique_ptr > widened_argv_entries; for (size_t ii = 0; ii < argv_size; ++ii) { # if PY_MINOR_VERSION >= 5 // From Python 3.5 onwards, we're supposed to use Py_DecodeLocale to From 3b8438e97b16b7ffa90b650ff4744356853eaedd Mon Sep 17 00:00:00 2001 From: Boris Staletic Date: Wed, 26 Aug 2020 11:45:12 -0400 Subject: [PATCH 09/28] Do sys.path workaround in C++ rather than eval. --- include/pybind11/embed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 1d63f066ca..0ff2a8b44b 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -158,7 +158,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ // to work around CVE-2008-5983 PySys_SetArgv(argc, pysys_argv); if (!add_current_dir_to_path) - PyRun_SimpleString("import sys; sys.path.pop(0)\n"); + PyList_PopItem(py::module::import("sys").attr("path").ptr(), 0); #else PySys_SetArgvEx(argc, pysys_argv, add_current_dir_to_path ? 1 : 0); #endif From 05661604102998482e40ae19ce8ae0fbf2b9090e Mon Sep 17 00:00:00 2001 From: Boris Staletic Date: Fri, 28 Aug 2020 11:37:23 -0400 Subject: [PATCH 10/28] Factor out wchar conversion to a separate function. --- include/pybind11/embed.h | 46 +++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 0ff2a8b44b..524d46b59c 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -100,6 +100,27 @@ struct wide_char_arg_deleter { } }; +wchar_t* widen_chars(char* safe_arg) { +#if PY_VERSION_HEX >= 0x030500f0 + wchar_t* widened_arg = Py_DecodeLocale(safe_arg, nullptr); +#elif HAVE_BROKEN_MBSTOWCS + size_t count = strlen(safe_arg); + widened_arg = nullptr; + if (count != static_cast(-1)) { + widened_arg = new wchar_t[count + 1]; + mbstowcs(widened_arg, safe_arg, count + 1); + } +#else + size_t count = mbstowcs(nullptr, safe_arg, 0); + widened_arg = nullptr; + if (count != static_cast(-1)) { + widened_arg = new wchar_t[count + 1]; + mbstowcs(widened_arg, safe_arg, count + 1); + } +#endif + return widened_arg; +} + /// Python 2.x/3.x-compatible version of `PySys_SetArgv` inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_path) { // Before it was special-cased in python 3.8, passing an empty or null argv @@ -116,33 +137,14 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ #if PY_MAJOR_VERSION >= 3 size_t argv_size = static_cast(argc); // SetArgv* on python 3 takes wchar_t, so we have to convert. - std::unique_ptr widened_argv(new wchar_t*[argv_size]); std::vector< std::unique_ptr > widened_argv_entries; for (size_t ii = 0; ii < argv_size; ++ii) { -# if PY_MINOR_VERSION >= 5 - // From Python 3.5 onwards, we're supposed to use Py_DecodeLocale to - // generate the wchar_t version of argv. - widened_argv[ii] = Py_DecodeLocale(safe_argv[ii], nullptr); -# else - // Before Python 3.5, we're stuck with mbstowcs, which may or may not - // actually work. Mercifully, pyconfig.h provides this define: -# ifdef HAVE_BROKEN_MBSTOWCS - size_t count = strlen(safe_argv[ii]); -# else - size_t count = mbstowcs(nullptr, safe_argv[ii], 0); -# endif - widened_argv[ii] = nullptr; - if (count != static_cast(-1)) { - widened_argv[ii] = new wchar_t[count + 1]; - mbstowcs(widened_argv[ii], safe_argv[ii], count + 1); - } -# endif - if (nullptr == widened_argv[ii]) { + widened_argv_entries.emplace_back(widen_chars(safe_argv[ii])); + if (!widened_argv_entries.back()) { // A null here indicates a character-encoding failure or the python // interpreter out of memory. Give up. return; - } else - widened_argv_entries.emplace_back(widened_argv[ii]); + } } auto pysys_argv = widened_argv.get(); From 74243c57a9d8dbde5b080920a89cab5ea6c516c6 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 11:40:05 -0400 Subject: [PATCH 11/28] Restore widened_argv variable declaration. --- include/pybind11/embed.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 524d46b59c..f54836daa0 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -137,6 +137,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ #if PY_MAJOR_VERSION >= 3 size_t argv_size = static_cast(argc); // SetArgv* on python 3 takes wchar_t, so we have to convert. + std::unique_ptr widened_argv(new wchar_t*[argv_size]); std::vector< std::unique_ptr > widened_argv_entries; for (size_t ii = 0; ii < argv_size; ++ii) { widened_argv_entries.emplace_back(widen_chars(safe_argv[ii])); @@ -144,7 +145,8 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ // A null here indicates a character-encoding failure or the python // interpreter out of memory. Give up. return; - } + } else + widened_argv[ii] = widened_argv_entries.back().get(); } auto pysys_argv = widened_argv.get(); From c55ab94824d7aaa94e046816c3cbe8806b4414e7 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 11:46:50 -0400 Subject: [PATCH 12/28] Fix undeclared widened_arg variable on some paths. --- include/pybind11/embed.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index f54836daa0..1fce9569f7 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -103,16 +103,13 @@ struct wide_char_arg_deleter { wchar_t* widen_chars(char* safe_arg) { #if PY_VERSION_HEX >= 0x030500f0 wchar_t* widened_arg = Py_DecodeLocale(safe_arg, nullptr); -#elif HAVE_BROKEN_MBSTOWCS - size_t count = strlen(safe_arg); - widened_arg = nullptr; - if (count != static_cast(-1)) { - widened_arg = new wchar_t[count + 1]; - mbstowcs(widened_arg, safe_arg, count + 1); - } #else + wchar_t* widened_arg = nullptr; +# if HAVE_BROKEN_MBSTOWCS + size_t count = strlen(safe_arg); +# else size_t count = mbstowcs(nullptr, safe_arg, 0); - widened_arg = nullptr; +# endif if (count != static_cast(-1)) { widened_arg = new wchar_t[count + 1]; mbstowcs(widened_arg, safe_arg, count + 1); From fe38a2473267de8d31ce7945607203b00f64f694 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 11:52:34 -0400 Subject: [PATCH 13/28] Use delete[] to match new wchar_t[]. --- include/pybind11/embed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 1fce9569f7..e1df90f53c 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -95,7 +95,7 @@ struct wide_char_arg_deleter { // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale PyMem_RawFree(ptr); #else - delete ptr; + delete[] ptr; #endif } }; From 3aa548f03803369a420b688dd294ffcdf2d9cb17 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 13:44:52 -0400 Subject: [PATCH 14/28] Fix compiler errors --- include/pybind11/embed.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index e1df90f53c..064b002a3c 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -100,7 +100,7 @@ struct wide_char_arg_deleter { } }; -wchar_t* widen_chars(char* safe_arg) { +inline wchar_t* widen_chars(char* safe_arg) { #if PY_VERSION_HEX >= 0x030500f0 wchar_t* widened_arg = Py_DecodeLocale(safe_arg, nullptr); #else @@ -159,7 +159,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ // to work around CVE-2008-5983 PySys_SetArgv(argc, pysys_argv); if (!add_current_dir_to_path) - PyList_PopItem(py::module::import("sys").attr("path").ptr(), 0); + module::import("sys").attr("path").attr("pop")(); #else PySys_SetArgvEx(argc, pysys_argv, add_current_dir_to_path ? 1 : 0); #endif From 1d7f2b3ed075c51c4c280bf6d9340ee2aadc4b02 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 14:40:26 -0400 Subject: [PATCH 15/28] Use PY_VERSION_HEX for a cleaner CVE-2008-5983 mode check. --- include/pybind11/embed.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 064b002a3c..a8b366b816 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -152,9 +152,8 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ auto pysys_argv = safe_argv; #endif -#if PY_MAJOR_VERSION == 2 && (PY_MINOR_VERSION < 6 || (PY_MINOR_VERSION == 6 && PY_MICRO_VERSION < 6)) || \ - PY_MAJOR_VERSION == 3 && (PY_MINOR_VERSION < 1 || (PY_MINOR_VERSION == 1 && PY_MICRO_VERSION < 3)) - // These python versions don't have PySys_SetArgvEx, so we have to use the workaround +#if PY_VERSION_HEX < 0x020606f0 || (PY_MAJOR_VERSION == 3 && PY_VERSION_HEX < 0x030103f0 + // These python versions don't have PySys_SetArgvEx, so we have to use the approach // recommended by https://docs.python.org/3.5/c-api/init.html#c.PySys_SetArgvEx // to work around CVE-2008-5983 PySys_SetArgv(argc, pysys_argv); From d5c1df933896376cf74ca34326a30d78ffd84317 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 14:47:17 -0400 Subject: [PATCH 16/28] Fix typo --- include/pybind11/embed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index a8b366b816..9173258830 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -152,7 +152,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ auto pysys_argv = safe_argv; #endif -#if PY_VERSION_HEX < 0x020606f0 || (PY_MAJOR_VERSION == 3 && PY_VERSION_HEX < 0x030103f0 +#if PY_VERSION_HEX < 0x020606f0 || (PY_MAJOR_VERSION == 3 && PY_VERSION_HEX < 0x030103f0) // These python versions don't have PySys_SetArgvEx, so we have to use the approach // recommended by https://docs.python.org/3.5/c-api/init.html#c.PySys_SetArgvEx // to work around CVE-2008-5983 From aa3b8b83d4886f2d8e13380ce82181e3f3dc8bc4 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 14:47:27 -0400 Subject: [PATCH 17/28] Use explicit type for deleter so delete[] works cross-compiler. --- include/pybind11/embed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 9173258830..5d2a9faf52 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -90,7 +90,7 @@ struct embedded_module { }; struct wide_char_arg_deleter { - void operator()(void* ptr) const { + void operator()(wchar_t* ptr) const { #if PY_VERSION_HEX >= 0x030500f0 // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale PyMem_RawFree(ptr); From 0627b120924388214c8bf066287b2317c71cb91e Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 16:35:18 -0400 Subject: [PATCH 18/28] Always use PySys_SetArgvEx because pybind11 doesn't support pythons that don't include it. --- include/pybind11/embed.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 5d2a9faf52..6b8cd7204f 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -152,16 +152,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ auto pysys_argv = safe_argv; #endif -#if PY_VERSION_HEX < 0x020606f0 || (PY_MAJOR_VERSION == 3 && PY_VERSION_HEX < 0x030103f0) - // These python versions don't have PySys_SetArgvEx, so we have to use the approach - // recommended by https://docs.python.org/3.5/c-api/init.html#c.PySys_SetArgvEx - // to work around CVE-2008-5983 - PySys_SetArgv(argc, pysys_argv); - if (!add_current_dir_to_path) - module::import("sys").attr("path").attr("pop")(); -#else PySys_SetArgvEx(argc, pysys_argv, add_current_dir_to_path ? 1 : 0); -#endif } PYBIND11_NAMESPACE_END(detail) From 5c38bc5d87a177185b6348912b0269a758d7831e Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 17:04:29 -0400 Subject: [PATCH 19/28] Remove pointless ternary operator. --- include/pybind11/embed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 6b8cd7204f..d19fd52a8e 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -152,7 +152,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ auto pysys_argv = safe_argv; #endif - PySys_SetArgvEx(argc, pysys_argv, add_current_dir_to_path ? 1 : 0); + PySys_SetArgvEx(argc, pysys_argv, add_current_dir_to_path); } PYBIND11_NAMESPACE_END(detail) From 8358be4ae24ea5be94459d397f92a31381df3eef Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 28 Aug 2020 17:05:29 -0400 Subject: [PATCH 20/28] Use unique_ptr.reset instead of a second initialization. --- include/pybind11/embed.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index d19fd52a8e..d52ac4d88c 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -126,8 +126,8 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ std::unique_ptr argv_guard; std::unique_ptr argv_inner_guard; if (nullptr == argv || argc <= 0) { - argv_guard = std::unique_ptr(safe_argv = new char*[1]); - argv_inner_guard = std::unique_ptr(safe_argv[0] = new char[1]); + argv_guard.reset(safe_argv = new char*[1]); + argv_inner_guard.reset(safe_argv[0] = new char[1]); safe_argv[0][0] = '\0'; argc = 1; } From 8d64831c5abc0976f1bfd6e5bcbbc77cb53ea51a Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 31 Aug 2020 14:10:18 -0400 Subject: [PATCH 21/28] Rename add_program_dir_to_path parameter to clarify intent. --- include/pybind11/embed.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index d52ac4d88c..3e7a8c2795 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -119,7 +119,7 @@ inline wchar_t* widen_chars(char* safe_arg) { } /// Python 2.x/3.x-compatible version of `PySys_SetArgv` -inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_path) { +inline void set_interpreter_argv(int argc, char** argv, bool add_program_dir_to_path) { // Before it was special-cased in python 3.8, passing an empty or null argv // caused a segfault, so we have to reimplement the special case ourselves. char** safe_argv = argv; @@ -152,7 +152,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_current_dir_to_ auto pysys_argv = safe_argv; #endif - PySys_SetArgvEx(argc, pysys_argv, add_current_dir_to_path); + PySys_SetArgvEx(argc, pysys_argv, add_program_dir_to_path); } PYBIND11_NAMESPACE_END(detail) @@ -168,7 +168,7 @@ PYBIND11_NAMESPACE_END(detail) is controlled by the CPython runtime and is an exception to pybind11's normal behavior of throwing exceptions on errors.) - The remaining optional parameters, `argc`, `argv`, and `add_current_dir_to_path` are + The remaining optional parameters, `argc`, `argv`, and `add_program_dir_to_path` are used to populate ``sys.argv`` and ``sys.path``. See the |PySys_SetArgvEx documentation|_ for details. @@ -179,13 +179,13 @@ PYBIND11_NAMESPACE_END(detail) inline void initialize_interpreter(bool init_signal_handlers = true, int argc = 0, char** argv = nullptr, - bool add_current_dir_to_path = true) { + bool add_program_dir_to_path = true) { if (Py_IsInitialized()) pybind11_fail("The interpreter is already running"); Py_InitializeEx(init_signal_handlers ? 1 : 0); - detail::set_interpreter_argv(argc, argv, add_current_dir_to_path); + detail::set_interpreter_argv(argc, argv, add_program_dir_to_path); } /** \rst @@ -263,8 +263,8 @@ class scoped_interpreter { scoped_interpreter(bool init_signal_handlers = true, int argc = 0, char** argv = nullptr, - bool add_current_dir_to_path = true) { - initialize_interpreter(init_signal_handlers, argc, argv, add_current_dir_to_path); + bool add_program_dir_to_path = true) { + initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path); } scoped_interpreter(const scoped_interpreter &) = delete; From abc7b38f0fac326aa124fc0a410c316afe964b18 Mon Sep 17 00:00:00 2001 From: Dan Date: Sat, 10 Oct 2020 17:47:14 -0400 Subject: [PATCH 22/28] Add defined() check before evaluating HAVE_BROKEN_MBSTOWCS. --- include/pybind11/embed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 56502144c8..42a799b828 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -109,7 +109,7 @@ inline wchar_t* widen_chars(char* safe_arg) { wchar_t* widened_arg = Py_DecodeLocale(safe_arg, nullptr); #else wchar_t* widened_arg = nullptr; -# if HAVE_BROKEN_MBSTOWCS +# if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS size_t count = strlen(safe_arg); # else size_t count = mbstowcs(nullptr, safe_arg, 0); From 7763c78a51765726ec8351e4eb5237a800924a44 Mon Sep 17 00:00:00 2001 From: Aaron Gokaslan Date: Thu, 12 Aug 2021 12:37:14 -0400 Subject: [PATCH 23/28] Apply clang-tidy fixes --- include/pybind11/embed.h | 8 ++++---- tests/test_embed/test_interpreter.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 9c34a9c15a..432185e45f 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -128,7 +128,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_program_dir_to_ argc = 1; } #if PY_MAJOR_VERSION >= 3 - size_t argv_size = static_cast(argc); + auto argv_size = static_cast(argc); // SetArgv* on python 3 takes wchar_t, so we have to convert. std::unique_ptr widened_argv(new wchar_t*[argv_size]); std::vector< std::unique_ptr > widened_argv_entries; @@ -138,8 +138,8 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_program_dir_to_ // A null here indicates a character-encoding failure or the python // interpreter out of memory. Give up. return; - } else - widened_argv[ii] = widened_argv_entries.back().get(); + } + widened_argv[ii] = widened_argv_entries.back().get(); } auto pysys_argv = widened_argv.get(); @@ -148,7 +148,7 @@ inline void set_interpreter_argv(int argc, char** argv, bool add_program_dir_to_ auto pysys_argv = safe_argv; #endif - PySys_SetArgvEx(argc, pysys_argv, add_program_dir_to_path); + PySys_SetArgvEx(argc, pysys_argv, static_cast(add_program_dir_to_path)); } PYBIND11_NAMESPACE_END(detail) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 96e3acc8a2..1cdc6112a1 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -293,7 +293,7 @@ TEST_CASE("sys.argv gets initialized properly") { auto module = py::module::import("test_interpreter"); auto py_widget = module.attr("DerivedWidget")("The question"); const auto &cpp_widget = py_widget.cast(); - REQUIRE(cpp_widget.argv0() == ""); + REQUIRE(cpp_widget.argv0().empty()); } { From ff2976d7e474137837baaba982ab591fce1b7a92 Mon Sep 17 00:00:00 2001 From: Aaron Gokaslan Date: Mon, 16 Aug 2021 10:51:32 -0400 Subject: [PATCH 24/28] Pre-commit --- tests/test_embed/test_interpreter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_embed/test_interpreter.py b/tests/test_embed/test_interpreter.py index 7b1f31ac58..5ab55a4b37 100644 --- a/tests/test_embed/test_interpreter.py +++ b/tests/test_embed/test_interpreter.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -from widget_module import Widget import sys +from widget_module import Widget + class DerivedWidget(Widget): def __init__(self, message): From 912e1c94b03c87eb5d7f04b7d575859b2da0105a Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Sat, 21 Aug 2021 12:05:29 -0400 Subject: [PATCH 25/28] refactor: use const for set_interpreter_argv --- include/pybind11/embed.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 432185e45f..849082f840 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -96,7 +96,7 @@ struct wide_char_arg_deleter { } }; -inline wchar_t* widen_chars(char* safe_arg) { +inline wchar_t* widen_chars(const char* safe_arg) { #if PY_VERSION_HEX >= 0x030500f0 wchar_t* widened_arg = Py_DecodeLocale(safe_arg, nullptr); #else @@ -115,18 +115,16 @@ inline wchar_t* widen_chars(char* safe_arg) { } /// Python 2.x/3.x-compatible version of `PySys_SetArgv` -inline void set_interpreter_argv(int argc, char** argv, bool add_program_dir_to_path) { +inline void set_interpreter_argv(int argc, const char* const* argv, bool add_program_dir_to_path) { // Before it was special-cased in python 3.8, passing an empty or null argv // caused a segfault, so we have to reimplement the special case ourselves. - char** safe_argv = argv; - std::unique_ptr argv_guard; - std::unique_ptr argv_inner_guard; - if (nullptr == argv || argc <= 0) { - argv_guard.reset(safe_argv = new char*[1]); - argv_inner_guard.reset(safe_argv[0] = new char[1]); - safe_argv[0][0] = '\0'; + bool special_case = (argv == nullptr || argc <= 0); + + const char* const empty_argv[] {"\0"}; + const char* const* safe_argv = special_case ? empty_argv : argv; + if (special_case) argc = 1; - } + #if PY_MAJOR_VERSION >= 3 auto argv_size = static_cast(argc); // SetArgv* on python 3 takes wchar_t, so we have to convert. @@ -174,7 +172,7 @@ PYBIND11_NAMESPACE_END(detail) \endrst */ inline void initialize_interpreter(bool init_signal_handlers = true, int argc = 0, - char** argv = nullptr, + const char* const* argv = nullptr, bool add_program_dir_to_path = true) { if (Py_IsInitialized() != 0) pybind11_fail("The interpreter is already running"); @@ -258,7 +256,7 @@ class scoped_interpreter { public: scoped_interpreter(bool init_signal_handlers = true, int argc = 0, - char** argv = nullptr, + const char* const* argv = nullptr, bool add_program_dir_to_path = true) { initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path); } From 1ede69efd0fe239c583cd9e7d43886a8342708d8 Mon Sep 17 00:00:00 2001 From: Aaron Gokaslan Date: Sat, 21 Aug 2021 12:48:37 -0400 Subject: [PATCH 26/28] Try to fix const issue and allocate vector properly --- include/pybind11/embed.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 849082f840..c3ebaea7bc 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -130,6 +130,7 @@ inline void set_interpreter_argv(int argc, const char* const* argv, bool add_pro // SetArgv* on python 3 takes wchar_t, so we have to convert. std::unique_ptr widened_argv(new wchar_t*[argv_size]); std::vector< std::unique_ptr > widened_argv_entries; + widened_argv_entries.reserve(argv_size); for (size_t ii = 0; ii < argv_size; ++ii) { widened_argv_entries.emplace_back(widen_chars(safe_argv[ii])); if (!widened_argv_entries.back()) { @@ -143,7 +144,7 @@ inline void set_interpreter_argv(int argc, const char* const* argv, bool add_pro auto pysys_argv = widened_argv.get(); #else // python 2.x - auto pysys_argv = safe_argv; + char** pysys_argv = const_castsafe_argv; #endif PySys_SetArgvEx(argc, pysys_argv, static_cast(add_program_dir_to_path)); From 318495e4b750876ee571d2d3991c82b1b6880cf2 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Sat, 21 Aug 2021 13:28:04 -0400 Subject: [PATCH 27/28] fix: copy strings on Python 2 --- include/pybind11/embed.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index c3ebaea7bc..a204a6dbfe 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -125,8 +125,8 @@ inline void set_interpreter_argv(int argc, const char* const* argv, bool add_pro if (special_case) argc = 1; -#if PY_MAJOR_VERSION >= 3 auto argv_size = static_cast(argc); +#if PY_MAJOR_VERSION >= 3 // SetArgv* on python 3 takes wchar_t, so we have to convert. std::unique_ptr widened_argv(new wchar_t*[argv_size]); std::vector< std::unique_ptr > widened_argv_entries; @@ -144,7 +144,11 @@ inline void set_interpreter_argv(int argc, const char* const* argv, bool add_pro auto pysys_argv = widened_argv.get(); #else // python 2.x - char** pysys_argv = const_castsafe_argv; + std::vector strings{safe_argv, safe_argv+argv_size}; + std::vector char_strings{argv_size}; + for (std::size_t i=0; i(add_program_dir_to_path)); From e5c33055a74038999cbba9941cb8f41c1e4976c6 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 26 Aug 2021 12:39:43 -0700 Subject: [PATCH 28/28] Applying clang-format-diff relative to master. The only manual change is an added empty line between pybind11 and system `#include`s. ``` git diff -U0 --no-color master | python3 $HOME/clone/llvm-project/clang/tools/clang-format/clang-format-diff.py -p1 -style=file -i ``` --- include/pybind11/embed.h | 37 ++++++++++++++------------- tests/test_embed/test_interpreter.cpp | 2 +- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index a204a6dbfe..7b5d7cd24a 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -11,6 +11,7 @@ #include "pybind11.h" #include "eval.h" + #include #include @@ -86,7 +87,7 @@ struct embedded_module { }; struct wide_char_arg_deleter { - void operator()(wchar_t* ptr) const { + void operator()(wchar_t *ptr) const { #if PY_VERSION_HEX >= 0x030500f0 // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale PyMem_RawFree(ptr); @@ -96,16 +97,16 @@ struct wide_char_arg_deleter { } }; -inline wchar_t* widen_chars(const char* safe_arg) { +inline wchar_t *widen_chars(const char *safe_arg) { #if PY_VERSION_HEX >= 0x030500f0 - wchar_t* widened_arg = Py_DecodeLocale(safe_arg, nullptr); + wchar_t *widened_arg = Py_DecodeLocale(safe_arg, nullptr); #else - wchar_t* widened_arg = nullptr; -# if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS + wchar_t *widened_arg = nullptr; +# if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS size_t count = strlen(safe_arg); -# else +# else size_t count = mbstowcs(nullptr, safe_arg, 0); -# endif +# endif if (count != static_cast(-1)) { widened_arg = new wchar_t[count + 1]; mbstowcs(widened_arg, safe_arg, count + 1); @@ -115,21 +116,21 @@ inline wchar_t* widen_chars(const char* safe_arg) { } /// Python 2.x/3.x-compatible version of `PySys_SetArgv` -inline void set_interpreter_argv(int argc, const char* const* argv, bool add_program_dir_to_path) { +inline void set_interpreter_argv(int argc, const char *const *argv, bool add_program_dir_to_path) { // Before it was special-cased in python 3.8, passing an empty or null argv // caused a segfault, so we have to reimplement the special case ourselves. bool special_case = (argv == nullptr || argc <= 0); - const char* const empty_argv[] {"\0"}; - const char* const* safe_argv = special_case ? empty_argv : argv; + const char *const empty_argv[]{"\0"}; + const char *const *safe_argv = special_case ? empty_argv : argv; if (special_case) argc = 1; auto argv_size = static_cast(argc); #if PY_MAJOR_VERSION >= 3 // SetArgv* on python 3 takes wchar_t, so we have to convert. - std::unique_ptr widened_argv(new wchar_t*[argv_size]); - std::vector< std::unique_ptr > widened_argv_entries; + std::unique_ptr widened_argv(new wchar_t *[argv_size]); + std::vector> widened_argv_entries; widened_argv_entries.reserve(argv_size); for (size_t ii = 0; ii < argv_size; ++ii) { widened_argv_entries.emplace_back(widen_chars(safe_argv[ii])); @@ -144,11 +145,11 @@ inline void set_interpreter_argv(int argc, const char* const* argv, bool add_pro auto pysys_argv = widened_argv.get(); #else // python 2.x - std::vector strings{safe_argv, safe_argv+argv_size}; - std::vector char_strings{argv_size}; - for (std::size_t i=0; i strings{safe_argv, safe_argv + argv_size}; + std::vector char_strings{argv_size}; + for (std::size_t i = 0; i < argv_size; ++i) char_strings[i] = &strings[i][0]; - char** pysys_argv = char_strings.data(); + char **pysys_argv = char_strings.data(); #endif PySys_SetArgvEx(argc, pysys_argv, static_cast(add_program_dir_to_path)); @@ -177,7 +178,7 @@ PYBIND11_NAMESPACE_END(detail) \endrst */ inline void initialize_interpreter(bool init_signal_handlers = true, int argc = 0, - const char* const* argv = nullptr, + const char *const *argv = nullptr, bool add_program_dir_to_path = true) { if (Py_IsInitialized() != 0) pybind11_fail("The interpreter is already running"); @@ -261,7 +262,7 @@ class scoped_interpreter { public: scoped_interpreter(bool init_signal_handlers = true, int argc = 0, - const char* const* argv = nullptr, + const char *const *argv = nullptr, bool add_program_dir_to_path = true) { initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path); } diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index b81ce13fd3..78b64be6b0 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -313,7 +313,7 @@ TEST_CASE("sys.argv gets initialized properly") { } { - char* argv[] = { strdup("a.out") }; + char *argv[] = {strdup("a.out")}; py::scoped_interpreter argv_scope(true, 1, argv); free(argv[0]); auto module = py::module::import("test_interpreter");