Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Call PySys_SetArgv when initializing interpreter. #2341

Merged
merged 33 commits into from
Aug 26, 2021
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1801811
Call PySys_SetArgv when initializing interpreter.
drmoose Jul 29, 2020
c3e96c2
Document argc/argv parameters in initialize_interpreter.
drmoose Jul 30, 2020
5145e58
Remove manual memory management from set_interpreter_argv in favor of…
drmoose Jul 30, 2020
62243f2
Use size_t for indexers in set_interpreter_argv.
drmoose Jul 30, 2020
32c1445
Minimize macros for flow control in set_interpreter_argv.
drmoose Jul 30, 2020
3ff967d
Fix 'unused variable' warning on Py2
drmoose Jul 30, 2020
27ad85e
whitespace
drmoose Jul 30, 2020
fb3de9f
Define wide_char_arg_deleter outside set_interpreter_argv.
bstaletic Aug 25, 2020
3b8438e
Do sys.path workaround in C++ rather than eval.
bstaletic Aug 26, 2020
0566160
Factor out wchar conversion to a separate function.
bstaletic Aug 28, 2020
74243c5
Restore widened_argv variable declaration.
drmoose Aug 28, 2020
c55ab94
Fix undeclared widened_arg variable on some paths.
drmoose Aug 28, 2020
fe38a24
Use delete[] to match new wchar_t[].
drmoose Aug 28, 2020
3aa548f
Fix compiler errors
drmoose Aug 28, 2020
1d7f2b3
Use PY_VERSION_HEX for a cleaner CVE-2008-5983 mode check.
drmoose Aug 28, 2020
d5c1df9
Fix typo
drmoose Aug 28, 2020
aa3b8b8
Use explicit type for deleter so delete[] works cross-compiler.
drmoose Aug 28, 2020
0627b12
Always use PySys_SetArgvEx because pybind11 doesn't support pythons t…
drmoose Aug 28, 2020
5c38bc5
Remove pointless ternary operator.
drmoose Aug 28, 2020
8358be4
Use unique_ptr.reset instead of a second initialization.
drmoose Aug 28, 2020
8d64831
Rename add_program_dir_to_path parameter to clarify intent.
drmoose Aug 31, 2020
3ad3c6d
Merge remote-tracking branch 'upstream/master' into set_interpreter_argv
drmoose Oct 10, 2020
abc7b38
Add defined() check before evaluating HAVE_BROKEN_MBSTOWCS.
drmoose Oct 10, 2020
65881fe
Merge branch 'master' into set_interpreter_argv
Skylion007 Aug 9, 2021
7763c78
Apply clang-tidy fixes
Skylion007 Aug 12, 2021
6ddc929
Merge branch 'master' of https://github.com/pybind/pybind11 into set_…
Skylion007 Aug 16, 2021
ff2976d
Pre-commit
Skylion007 Aug 16, 2021
912e1c9
refactor: use const for set_interpreter_argv
henryiii Aug 21, 2021
52d88c3
Merge branch 'master' of https://github.com/pybind/pybind11 into set_…
Skylion007 Aug 21, 2021
1ede69e
Try to fix const issue and allocate vector properly
Skylion007 Aug 21, 2021
318495e
fix: copy strings on Python 2
henryiii Aug 21, 2021
96cb69f
Merge branch 'master' into set_interpreter_argv
Aug 26, 2021
e5c3305
Applying clang-format-diff relative to master.
Aug 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 87 additions & 8 deletions include/pybind11/embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#include "pybind11.h"
#include "eval.h"
#include <memory>
#include <vector>

#if defined(PYPY_VERSION)
# error Embedding the interpreter is not supported with PyPy
Expand Down Expand Up @@ -83,29 +85,101 @@ struct embedded_module {
}
};

struct wide_char_arg_deleter {
void operator()(wchar_t* ptr) const {
#if PY_VERSION_HEX >= 0x030500f0
// API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale
PyMem_RawFree(ptr);
#else
delete[] ptr;
#endif
}
};

inline wchar_t* widen_chars(const char* safe_arg) {
#if PY_VERSION_HEX >= 0x030500f0
wchar_t* widened_arg = Py_DecodeLocale(safe_arg, nullptr);
#else
wchar_t* widened_arg = nullptr;
# if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS
size_t count = strlen(safe_arg);
# else
size_t count = mbstowcs(nullptr, safe_arg, 0);
# endif
if (count != static_cast<size_t>(-1)) {
widened_arg = new wchar_t[count + 1];
mbstowcs(widened_arg, safe_arg, count + 1);
}
#endif
return widened_arg;
}

/// Python 2.x/3.x-compatible version of `PySys_SetArgv`
inline void set_interpreter_argv(int argc, const char* const* argv, bool add_program_dir_to_path) {
// Before it was special-cased in python 3.8, passing an empty or null argv
// caused a segfault, so we have to reimplement the special case ourselves.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: shouldn't we add an debug assert that argc >= 0? We are casting a signed int to an unsigned size_t

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's covered by the argc <= 0 below, combined with argc = 1.

bool special_case = (argv == nullptr || argc <= 0);

const char* const empty_argv[] {"\0"};
const char* const* safe_argv = special_case ? empty_argv : argv;
if (special_case)
argc = 1;

#if PY_MAJOR_VERSION >= 3
auto argv_size = static_cast<size_t>(argc);
// SetArgv* on python 3 takes wchar_t, so we have to convert.
std::unique_ptr<wchar_t*[]> widened_argv(new wchar_t*[argv_size]);
Skylion007 marked this conversation as resolved.
Show resolved Hide resolved
std::vector< std::unique_ptr<wchar_t[], wide_char_arg_deleter> > widened_argv_entries;
Skylion007 marked this conversation as resolved.
Show resolved Hide resolved
for (size_t ii = 0; ii < argv_size; ++ii) {
widened_argv_entries.emplace_back(widen_chars(safe_argv[ii]));
if (!widened_argv_entries.back()) {
// A null here indicates a character-encoding failure or the python
// interpreter out of memory. Give up.
return;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we raise an exception here, instead of silently doing nothing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think silently doing nothing is actually OK here. Consider:

  • All this means is that PySys_SetArgvEx won't be called, but current pybind doesn't call it, so probably 95% of users won't notice or care.
  • Assuming locales are configured correctly on your machine, you are probably out of memory. As such, your exception handler is not likely to work.
  • Assuming locales are configured incorrectly, a failure here will be harder to debug than the inevitable failures in the user's python code.

That said, I'm not married to the decision. It just seemed like a reasonable fallback to revert to the old behavior.

}
widened_argv[ii] = widened_argv_entries.back().get();
}

auto pysys_argv = widened_argv.get();
#else
// python 2.x
auto pysys_argv = safe_argv;
#endif

PySys_SetArgvEx(argc, pysys_argv, static_cast<int>(add_program_dir_to_path));
}

PYBIND11_NAMESPACE_END(detail)

/** \rst
Initialize the Python interpreter. No other pybind11 or CPython API functions can be
called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The
optional parameter can be used to skip the registration of signal handlers (see the
`Python documentation`_ for details). Calling this function again after the interpreter
has already been initialized is a fatal error.
optional `init_signal_handlers` parameter can be used to skip the registration of
signal handlers (see the `Python documentation`_ for details). Calling this function
again after the interpreter has already been initialized is a fatal error.

If initializing the Python interpreter fails, then the program is terminated. (This
is controlled by the CPython runtime and is an exception to pybind11's normal behavior
of throwing exceptions on errors.)

The remaining optional parameters, `argc`, `argv`, and `add_program_dir_to_path` are
used to populate ``sys.argv`` and ``sys.path``.
See the |PySys_SetArgvEx documentation|_ for details.

.. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx
.. |PySys_SetArgvEx documentation| replace:: ``PySys_SetArgvEx`` documentation
.. _PySys_SetArgvEx documentation: https://docs.python.org/3/c-api/init.html#c.PySys_SetArgvEx
\endrst */
inline void initialize_interpreter(bool init_signal_handlers = true) {
inline void initialize_interpreter(bool init_signal_handlers = true,
int argc = 0,
const char* const* argv = nullptr,
bool add_program_dir_to_path = true) {
if (Py_IsInitialized() != 0)
pybind11_fail("The interpreter is already running");

Py_InitializeEx(init_signal_handlers ? 1 : 0);

// Make .py files in the working directory available by default
module_::import("sys").attr("path").cast<list>().append(".");
detail::set_interpreter_argv(argc, argv, add_program_dir_to_path);
}

/** \rst
Expand Down Expand Up @@ -167,6 +241,8 @@ inline void finalize_interpreter() {
Scope guard version of `initialize_interpreter` and `finalize_interpreter`.
This a move-only guard and only a single instance can exist.

See `initialize_interpreter` for a discussion of its constructor arguments.

.. code-block:: cpp

#include <pybind11/embed.h>
Expand All @@ -178,8 +254,11 @@ inline void finalize_interpreter() {
\endrst */
class scoped_interpreter {
public:
scoped_interpreter(bool init_signal_handlers = true) {
initialize_interpreter(init_signal_handlers);
scoped_interpreter(bool init_signal_handlers = true,
int argc = 0,
const char* const* argv = nullptr,
bool add_program_dir_to_path = true) {
initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path);
}

scoped_interpreter(const scoped_interpreter &) = delete;
Expand Down
24 changes: 24 additions & 0 deletions tests/test_embed/test_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Widget {

std::string the_message() const { return message; }
virtual int the_answer() const = 0;
virtual std::string argv0() const = 0;

private:
std::string message;
Expand All @@ -32,6 +33,7 @@ class PyWidget final : public Widget {
using Widget::Widget;

int the_answer() const override { PYBIND11_OVERRIDE_PURE(int, Widget, the_answer); }
std::string argv0() const override { PYBIND11_OVERRIDE_PURE(std::string, Widget, argv0); }
};

PYBIND11_EMBEDDED_MODULE(widget_module, m) {
Expand Down Expand Up @@ -283,3 +285,25 @@ TEST_CASE("Reload module from file") {
result = module_.attr("test")().cast<int>();
REQUIRE(result == 2);
}

TEST_CASE("sys.argv gets initialized properly") {
py::finalize_interpreter();
{
py::scoped_interpreter default_scope;
auto module = py::module::import("test_interpreter");
auto py_widget = module.attr("DerivedWidget")("The question");
const auto &cpp_widget = py_widget.cast<const Widget &>();
REQUIRE(cpp_widget.argv0().empty());
}

{
char* argv[] = { strdup("a.out") };
py::scoped_interpreter argv_scope(true, 1, argv);
free(argv[0]);
auto module = py::module::import("test_interpreter");
auto py_widget = module.attr("DerivedWidget")("The question");
const auto &cpp_widget = py_widget.cast<const Widget &>();
REQUIRE(cpp_widget.argv0() == "a.out");
}
py::initialize_interpreter();
}
5 changes: 5 additions & 0 deletions tests/test_embed/test_interpreter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
import sys

from widget_module import Widget


Expand All @@ -8,3 +10,6 @@ def __init__(self, message):

def the_answer(self):
return 42

def argv0(self):
return sys.argv[0]