diff --git a/.circleci/config.yml b/.circleci/config.yml index 74fdc1879c5..dd4ad2bcbe4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -762,9 +762,15 @@ jobs: gunicorn: <<: *machine_executor steps: - - run_test: - pattern: "gunicorn" - snapshot: true + - attach_workspace: + at: . + - checkout + - start_docker_services: + services: ddagent + - run: + command: | + mv .riot .ddriot + ./scripts/ddtest riot -v run --pass-env -s 'gunicorn' httplib: <<: *machine_executor diff --git a/ddtrace/contrib/gunicorn/__init__.py b/ddtrace/contrib/gunicorn/__init__.py index 03f69f18328..d8e6db0452a 100644 --- a/ddtrace/contrib/gunicorn/__init__.py +++ b/ddtrace/contrib/gunicorn/__init__.py @@ -1,17 +1,33 @@ """ -``ddtrace`` supports `Gunicorn `__. +**Note:** ``ddtrace-run`` and Python 2 are both not supported with `Gunicorn `__. -If the application is using the ``gevent`` worker class, ``gevent`` monkey patching must be performed before loading the -``ddtrace`` library. +``ddtrace`` only supports Gunicorn's ``gevent`` worker type when configured as follows: -There are different options to ensure this happens: +- The application is running under Python 3 +- `ddtrace-run` is not used +- The `DD_GEVENT_PATCH_ALL=1` environment variable is set +- Gunicorn's ```post_fork`` `__ hook does not import from + ``ddtrace`` +- ``import ddtrace.bootstrap.sitecustomize`` is called either in the application's main process or in the + ```post_worker_init`` `__ hook. -- If using ``ddtrace-run``, set the environment variable ``DD_GEVENT_PATCH_ALL=1``. +.. code-block:: python -- Replace ``ddtrace-run`` by using ``import ddtrace.bootstrap.sitecustomize`` as the first import of the application. + # gunicorn.conf.py + def post_fork(server, worker): + # don't touch ddtrace here + pass -- Use a `post_worker_init `_ - hook to import ``ddtrace.bootstrap.sitecustomize``. + def post_worker_init(worker): + import ddtrace.bootstrap.sitecustomize + + workers = 4 + worker_class = "gevent" + bind = "8080" + +.. code-block:: bash + + DD_GEVENT_PATCH_ALL=1 gunicorn --config gunicorn.conf.py path.to.my:app """ diff --git a/ddtrace/internal/periodic.py b/ddtrace/internal/periodic.py index e50c952b82e..ed0b7dff98f 100644 --- a/ddtrace/internal/periodic.py +++ b/ddtrace/internal/periodic.py @@ -54,15 +54,46 @@ def stop(self): self.quit.set() def _is_proper_class(self): - # DEV: Some frameworks, like e.g. gevent, seem to resuscitate some - # of the threads that were running prior to the fork of the worker - # processes. These threads are normally created via the native API - # and are exposed to the child process as _DummyThreads. We check - # whether the current thread is no longer an instance of the - # original thread class to prevent it from running in the child - # process while the state copied over from the parent is being - # cleaned up. The restarting of the thread is responsibility to the - # registered forksafe hooks. + """ + Picture this: you're running a gunicorn server under ddtrace-run (`ddtrace-run gunicorn...`). + Profiler._profiler() (a PeriodicThread) is running in the main process. + Gunicorn forks the process and sets up the resulting child process as a "gevent worker". + Because of the Profiler's _restart_on_fork hook, inside the child process, the Profiler is stopped, copied, and + started for the purpose of profiling the child process. + In _restart_on_fork, the Profiler being stopped is the one that was running before the fork, ie the one in the + main process. Copying that Profiler takes place in the child process. Inside the child process, we've now got + *one* process-local Profiler running in a thread, and that's the only Profiler running. + + ...or is it? + + As it turns out, gevent has some of its own post-fork logic that complicates things. All of the above is + accurate, except for the bit about the child process' Profiler being the only one alive. In fact, gunicorn's + "gevent worker" notices that there was a Profiler (PeriodicThread) running before the fork, and attempts to + bring it back to life after the fork. + + Outside of ddtrace-run, the only apparent problem this causes is duplicated work and decreased performance. + Under ddtrace-run, because it triggers an additional fork to which gevent's post-fork logic responds, the + thread ends up being restarted twice in the child process. This means that there are a bunch of instances of + the thread running simultaneously: + + the "correct" one started by ddtrace's _restart_on_fork + the copy of the pre-fork one restarted by gevent after the fork done by gunicorn + the copy of the pre-fork one restarted by gevent after the fork done by ddtrace-run + + This causes even more problems for PeriodicThread uses like the Profiler that rely on running as singletons per + process. + + In these situations where there are many copies of the restarted thread, the copies are conveniently marked as + such by being instances of gevent.threading._DummyThread. + + This function _is_proper_class exists as a thread-local release valve that lets these _DummyThreads stop + themselves, because there's no other sane way to join all _DummyThreads from outside of those threads + themselves. Not doing this causes the _DummyThreads to be orphaned and hang, which can degrade performance + and cause crashes in threads that assume they're singletons. + + That's why it's hard to write a test for - doing so requires waiting for the threads to die on their own, which + can make tests take a really long time. + """ return isinstance(threading.current_thread(), self.__class__) def run(self): diff --git a/docs/index.rst b/docs/index.rst index 444566290ba..5ea5d1b38b5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -101,7 +101,7 @@ contacting support. +--------------------------------------------------+---------------+----------------+ | :ref:`graphql-core ` | >= 2.0.0 | Yes | +--------------------------------------------------+---------------+----------------+ -| :ref:`gunicorn ` | >= 19.10.0 | No | +| :ref:`gunicorn ` | >= 20.0 | No | +--------------------------------------------------+---------------+----------------+ | :ref:`httplib` | \* | Yes | +--------------------------------------------------+---------------+----------------+ diff --git a/releasenotes/notes/gunicorn-issue-09308901fa00e76c.yaml b/releasenotes/notes/gunicorn-issue-09308901fa00e76c.yaml new file mode 100644 index 00000000000..8baf41ec4e7 --- /dev/null +++ b/releasenotes/notes/gunicorn-issue-09308901fa00e76c.yaml @@ -0,0 +1,4 @@ +--- +issues: + - | + gunicorn: ddtrace-run does not work with gunicorn. To instrument a gunicorn application, follow the instructions `here `_. diff --git a/riotfile.py b/riotfile.py index c93420eef1e..3d254d0a1b4 100644 --- a/riotfile.py +++ b/riotfile.py @@ -2595,13 +2595,8 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION): Venv( name="gunicorn", command="pytest {cmdargs} tests/contrib/gunicorn", - pkgs={"requests": latest}, + pkgs={"requests": latest, "gevent": latest}, venvs=[ - Venv( - pys="2.7", - # Gunicorn ended Python 2 support after 19.10.0 - pkgs={"gunicorn": "==19.10.0"}, - ), Venv( pys=select_pys(min_version="3.5"), pkgs={"gunicorn": ["==19.10.0", "==20.0.4", latest]}, diff --git a/tests/contrib/gunicorn/post_fork.py b/tests/contrib/gunicorn/post_fork.py new file mode 100644 index 00000000000..85da8039fd3 --- /dev/null +++ b/tests/contrib/gunicorn/post_fork.py @@ -0,0 +1,21 @@ +import sys + +import gevent.monkey + +from ddtrace.debugging import DynamicInstrumentation +from ddtrace.internal.remoteconfig import RemoteConfig +from ddtrace.profiling.profiler import Profiler + + +# take some notes about the relative ordering of thread creation and +# monkeypatching +monkeypatch_happened = gevent.monkey.is_module_patched("threading") + +# enabling DI here allows test cases to exercise the code paths that handle +# gevent monkeypatching of running threads +# post_fork is called before gevent.monkey.patch_all() +if sys.version_info < (3, 11): + DynamicInstrumentation.enable() + +RemoteConfig._was_enabled_after_gevent_monkeypatch = monkeypatch_happened +Profiler._was_enabled_after_gevent_monkeypatch = monkeypatch_happened diff --git a/tests/contrib/gunicorn/simple_app.py b/tests/contrib/gunicorn/simple_app.py deleted file mode 100644 index ed02339949e..00000000000 --- a/tests/contrib/gunicorn/simple_app.py +++ /dev/null @@ -1,4 +0,0 @@ -def app(_, start_response): - data = b"Hello, World!\n" - start_response("200 OK", [("Content-Type", "text/plain"), ("Content-Length", str(len(data)))]) - return iter([data]) diff --git a/tests/contrib/gunicorn/test_gunicorn.py b/tests/contrib/gunicorn/test_gunicorn.py index fa3495d9a2c..7e702dd14ae 100644 --- a/tests/contrib/gunicorn/test_gunicorn.py +++ b/tests/contrib/gunicorn/test_gunicorn.py @@ -1,8 +1,12 @@ +from contextlib import contextmanager +import json import os import subprocess import sys +import time from typing import Dict from typing import NamedTuple +from typing import Optional # noqa import pytest import tenacity @@ -11,6 +15,11 @@ from tests.webclient import Client +SERVICE_INTERVAL = 1 +# this is the most direct manifestation i can find of a bug caused by misconfigured gunicorn+ddtrace +MOST_DIRECT_KNOWN_GUNICORN_RELATED_PROFILER_ERROR_SIGNAL = b"RuntimeError: the memalloc module is already started" + + GunicornServerSettings = NamedTuple( "GunicornServerSettings", [ @@ -21,29 +30,62 @@ ("worker_class", str), ("bind", str), ("use_ddtracerun", bool), - ("post_worker_init", str), + ("import_sitecustomize_in_postworkerinit", bool), + ("start_service_in_hook_named", str), ], ) -_post_worker_init_ddtrace = """ -def post_worker_init(worker): - import ddtrace.bootstrap.sitecustomize -""" +IMPORT_SITECUSTOMIZE = "import ddtrace.bootstrap.sitecustomize" +with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "post_fork.py"), "r") as f: + code = f.readlines() +START_SERVICE = " " + " ".join(code) + + +def assert_no_profiler_error(server_process): + assert MOST_DIRECT_KNOWN_GUNICORN_RELATED_PROFILER_ERROR_SIGNAL not in server_process.stderr.read() + + +def parse_payload(data): + decoded = data + if sys.version_info[1] == 5: + decoded = data.decode("utf-8") + return json.loads(decoded) + + +def assert_remoteconfig_started_successfully(response, check_patch=True): + if sys.version_info[1] in (5, 11): + return + assert response.status_code == 200 + payload = parse_payload(response.content) + assert payload["remoteconfig"]["worker_alive"] is True + if check_patch: + assert payload["remoteconfig"]["enabled_after_gevent_monkeypatch"] is True def _gunicorn_settings_factory( - env=os.environ.copy(), # type: Dict[str, str] + env=None, # type: Dict[str, str] directory=os.getcwd(), # type: str - app_path="tests.contrib.gunicorn.simple_app:app", # type: str + app_path="tests.contrib.gunicorn.wsgi_mw_app:app", # type: str num_workers="4", # type: str worker_class="sync", # type: str bind="0.0.0.0:8080", # type: str use_ddtracerun=True, # type: bool - post_worker_init="", # type: str + import_sitecustomize_in_postworkerinit=False, # type: bool + patch_gevent=None, # type: Optional[bool] + import_sitecustomize_in_app=None, # type: Optional[bool] + start_service_in_hook_named="post_fork", # type: str ): # type: (...) -> GunicornServerSettings """Factory for creating gunicorn settings with simple defaults if settings are not defined.""" + if env is None: + env = os.environ.copy() + if patch_gevent is not None: + env["DD_GEVENT_PATCH_ALL"] = str(patch_gevent) + if import_sitecustomize_in_app is not None: + env["_DD_TEST_IMPORT_SITECUSTOMIZE"] = str(import_sitecustomize_in_app) + env["DD_REMOTECONFIG_POLL_SECONDS"] = str(SERVICE_INTERVAL) + env["DD_PROFILING_UPLOAD_INTERVAL"] = str(SERVICE_INTERVAL) return GunicornServerSettings( env=env, directory=directory, @@ -52,79 +94,103 @@ def _gunicorn_settings_factory( worker_class=worker_class, bind=bind, use_ddtracerun=use_ddtracerun, - post_worker_init=post_worker_init, + import_sitecustomize_in_postworkerinit=import_sitecustomize_in_postworkerinit, + start_service_in_hook_named=start_service_in_hook_named, ) -@pytest.fixture -def gunicorn_server_settings(): - yield _gunicorn_settings_factory() - - -@pytest.fixture -def gunicorn_server(gunicorn_server_settings, tmp_path): - cfg_file = tmp_path / "gunicorn.conf.py" +def build_config_file(gunicorn_server_settings): + post_fork = START_SERVICE if gunicorn_server_settings.start_service_in_hook_named != "post_worker_init" else "" + post_worker_init = " {sitecustomize}\n{service_start}".format( + sitecustomize=IMPORT_SITECUSTOMIZE if gunicorn_server_settings.import_sitecustomize_in_postworkerinit else "", + service_start=START_SERVICE + if gunicorn_server_settings.start_service_in_hook_named == "post_worker_init" + else "", + ) cfg = """ +def post_fork(server, worker): + pass +{post_fork} + +def post_worker_init(worker): + pass {post_worker_init} workers = {num_workers} worker_class = "{worker_class}" bind = "{bind}" """.format( - post_worker_init=gunicorn_server_settings.post_worker_init, + post_fork=post_fork, + post_worker_init=post_worker_init, bind=gunicorn_server_settings.bind, num_workers=gunicorn_server_settings.num_workers, worker_class=gunicorn_server_settings.worker_class, ) + return cfg + + +@contextmanager +def gunicorn_server(gunicorn_server_settings, tmp_path): + cfg_file = tmp_path / "gunicorn.conf.py" + cfg = build_config_file(gunicorn_server_settings) cfg_file.write_text(stringify(cfg)) cmd = [] if gunicorn_server_settings.use_ddtracerun: cmd = ["ddtrace-run"] cmd += ["gunicorn", "--config", str(cfg_file), str(gunicorn_server_settings.app_path)] print("Running %r with configuration file %s" % (" ".join(cmd), cfg)) - p = subprocess.Popen( + + server_process = subprocess.Popen( cmd, env=gunicorn_server_settings.env, cwd=gunicorn_server_settings.directory, - stdout=sys.stdout, - stderr=sys.stderr, + stderr=subprocess.PIPE, close_fds=True, preexec_fn=os.setsid, ) try: client = Client("http://%s" % gunicorn_server_settings.bind) - # Wait for the server to start up try: - client.wait(max_tries=20, delay=0.5) + client.wait(max_tries=100, delay=0.1) except tenacity.RetryError: - # if proc.returncode is not None: - # process failed raise TimeoutError("Server failed to start, see stdout and stderr logs") - - yield client - + time.sleep(SERVICE_INTERVAL) + yield server_process, client try: client.get_ignored("/shutdown") except Exception: pass finally: - p.terminate() - p.wait() + server_process.terminate() + server_process.wait() -def test_basic(gunicorn_server): - r = gunicorn_server.get("/") - assert r.status_code == 200 - assert r.content == b"Hello, World!\n" +SETTINGS_GEVENT_APPIMPORT_PATCH_POSTWORKERSERVICE = _gunicorn_settings_factory( + worker_class="gevent", + use_ddtracerun=False, + import_sitecustomize_in_app=True, + patch_gevent=True, + start_service_in_hook_named="post_worker_init", +) +SETTINGS_GEVENT_POSTWORKERIMPORT_PATCH_POSTWORKERSERVICE = _gunicorn_settings_factory( + worker_class="gevent", + use_ddtracerun=False, + import_sitecustomize_in_postworkerinit=True, + patch_gevent=True, + start_service_in_hook_named="post_worker_init", +) -@pytest.mark.snapshot(ignores=["meta.result_class"]) @pytest.mark.parametrize( - "gunicorn_server_settings", [_gunicorn_settings_factory(app_path="tests.contrib.gunicorn.wsgi_mw_app:app")] + "gunicorn_server_settings", + [ + SETTINGS_GEVENT_APPIMPORT_PATCH_POSTWORKERSERVICE, + SETTINGS_GEVENT_POSTWORKERIMPORT_PATCH_POSTWORKERSERVICE, + ], ) -def test_traced_basic(gunicorn_server_settings, gunicorn_server): - # meta.result_class is listiterator vs list_iterator in PY2 vs PY3. - # Ignore this field to avoid having to create mostly duplicate snapshots in Python 2 and 3. - r = gunicorn_server.get("/") - assert r.status_code == 200 - assert r.content == b"Hello, World!\n" +def test_no_known_errors_occur(gunicorn_server_settings, tmp_path): + with gunicorn_server(gunicorn_server_settings, tmp_path) as context: + server_process, client = context + r = client.get("/") + assert_no_profiler_error(server_process) + assert_remoteconfig_started_successfully(r) diff --git a/tests/contrib/gunicorn/wsgi_mw_app.py b/tests/contrib/gunicorn/wsgi_mw_app.py index 87128dc9dd6..98898349836 100644 --- a/tests/contrib/gunicorn/wsgi_mw_app.py +++ b/tests/contrib/gunicorn/wsgi_mw_app.py @@ -1,20 +1,57 @@ +""" +This app exists to replicate and report on failures and degraded behavior that can arise when using ddtrace with +gunicorn +""" +import json +import os +import sys + from ddtrace import tracer from ddtrace.contrib.wsgi import DDWSGIMiddleware +from ddtrace.internal.remoteconfig import RemoteConfig +from ddtrace.profiling import bootstrap +import ddtrace.profiling.auto # noqa from tests.webclient import PingFilter +if os.getenv("_DD_TEST_IMPORT_SITECUSTOMIZE"): + import ddtrace.bootstrap.sitecustomize # noqa: F401 # isort: skip + tracer.configure( settings={ "FILTERS": [PingFilter()], } ) +if sys.version_info < (3, 11): + from ddtrace.debugging import DynamicInstrumentation + + +def aggressive_shutdown(): + RemoteConfig.disable() + if sys.version_info < (3, 11): + DynamicInstrumentation.disable() + tracer.shutdown(timeout=1) + if hasattr(bootstrap, "profiler"): + bootstrap.profiler._scheduler.stop() + bootstrap.profiler.stop() + def simple_app(environ, start_response): if environ["RAW_URI"] == "/shutdown": - tracer.shutdown() + aggressive_shutdown() + data = bytes("goodbye", encoding="utf-8") + else: + has_config_worker = hasattr(RemoteConfig._worker, "_worker") + payload = { + "remoteconfig": { + "worker_alive": has_config_worker and RemoteConfig._worker._worker.is_alive(), + "enabled_after_gevent_monkeypatch": RemoteConfig._was_enabled_after_gevent_monkeypatch, + }, + } + json_payload = json.dumps(payload) + data = bytes(json_payload, encoding="utf-8") - data = b"Hello, World!\n" start_response("200 OK", [("Content-Type", "text/plain"), ("Content-Length", str(len(data)))]) return iter([data])