From fe5f0c97ef71f2cd7a0657155a3e7d5ee7c65092 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 7 Oct 2023 16:38:30 -0400 Subject: [PATCH] better cython debugging (#53821) --- .gitignore | 2 +- .../development/debugging_extensions.rst | 30 ++++++++++++++-- pandas/_libs/meson.build | 10 +++++- pandas/_libs/tslibs/meson.build | 11 +++++- setup.py | 3 ++ tooling/debug/Dockerfile.pandas-debug | 35 +++++++++++++++++++ tooling/debug/README | 19 ++++++++++ 7 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 tooling/debug/Dockerfile.pandas-debug create mode 100644 tooling/debug/README diff --git a/.gitignore b/.gitignore index cd22c2bb8cb5b..051a3ec11b794 100644 --- a/.gitignore +++ b/.gitignore @@ -39,7 +39,7 @@ .mesonpy-native-file.ini MANIFEST compile_commands.json -debug +.debug # Python files # ################ diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 27f812b65e261..63154369dfd88 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -14,8 +14,8 @@ For Python developers with limited or no C/C++ experience this can seem a daunti 2. `Fundamental Python Debugging Part 2 - Python Extensions `_ 3. `Fundamental Python Debugging Part 3 - Cython Extensions `_ -Generating debug builds ------------------------ +Debugging locally +----------------- By default building pandas from source will generate a release build. To generate a development build you can type:: @@ -27,6 +27,32 @@ By default building pandas from source will generate a release build. To generat By specifying ``builddir="debug"`` all of the targets will be built and placed in the debug directory relative to the project root. This helps to keep your debug and release artifacts separate; you are of course able to choose a different directory name or omit altogether if you do not care to separate build types. +Using Docker +------------ + +To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locallly. + +You can then mount your pandas repository into this image via: + +.. code-block:: sh + + docker run --rm -it -w /data -v ${PWD}:/data pandas/pandas-debug + +Inside the image, you can use meson to build/install pandas and place the build artifacts into a ``debug`` folder using a command as follows: + +.. code-block:: sh + + python -m pip install -ve . --no-build-isolation --config-settings=builddir="debug" --config-settings=setup-args="-Dbuildtype=debug" + +If planning to use cygdb, the files required by that application are placed within the build folder. So you have to first ``cd`` to the build folder, then start that application. + +.. code-block:: sh + + cd debug + cygdb + +Within the debugger you can use `cygdb commands `_ to navigate cython extensions. + Editor support -------------- diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index fd632790546f6..b4662d6bf8dd2 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -101,12 +101,20 @@ libs_sources = { 'writers': {'sources': ['writers.pyx']} } +cython_args = [ + '--include-dir', + meson.current_build_dir(), + '-X always_allow_keywords=true' +] +if get_option('buildtype') == 'debug' + cython_args += ['--gdb'] +endif foreach ext_name, ext_dict : libs_sources py.extension_module( ext_name, ext_dict.get('sources'), - cython_args: ['--include-dir', meson.current_build_dir(), '-X always_allow_keywords=true'], + cython_args: cython_args, include_directories: [inc_np, inc_pd], dependencies: ext_dict.get('deps', ''), subdir: 'pandas/_libs', diff --git a/pandas/_libs/tslibs/meson.build b/pandas/_libs/tslibs/meson.build index a1b0c54d1f48c..85410f771233f 100644 --- a/pandas/_libs/tslibs/meson.build +++ b/pandas/_libs/tslibs/meson.build @@ -19,11 +19,20 @@ tslibs_sources = { 'vectorized': {'sources': ['vectorized.pyx']}, } +cython_args = [ + '--include-dir', + meson.current_build_dir(), + '-X always_allow_keywords=true' +] +if get_option('buildtype') == 'debug' + cython_args += ['--gdb'] +endif + foreach ext_name, ext_dict : tslibs_sources py.extension_module( ext_name, ext_dict.get('sources'), - cython_args: ['--include-dir', meson.current_build_dir(), '-X always_allow_keywords=true'], + cython_args: cython_args, include_directories: [inc_np, inc_pd], dependencies: ext_dict.get('deps', ''), subdir: 'pandas/_libs/tslibs', diff --git a/setup.py b/setup.py index 663bbd3952eab..db3717efb738d 100755 --- a/setup.py +++ b/setup.py @@ -418,6 +418,9 @@ def maybe_cythonize(extensions, *args, **kwargs): kwargs["nthreads"] = parsed.parallel build_ext.render_templates(_pxifiles) + if debugging_symbols_requested: + kwargs["gdb_debug"] = True + return cythonize(extensions, *args, **kwargs) diff --git a/tooling/debug/Dockerfile.pandas-debug b/tooling/debug/Dockerfile.pandas-debug new file mode 100644 index 0000000000000..00e10a85d7ab9 --- /dev/null +++ b/tooling/debug/Dockerfile.pandas-debug @@ -0,0 +1,35 @@ +FROM ubuntu:latest + +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y build-essential git valgrind + +# cpython dev install +RUN git clone -b 3.10 --depth 1 https://github.com/python/cpython.git /clones/cpython +RUN apt-get install -y libbz2-dev libffi-dev libssl-dev zlib1g-dev liblzma-dev libsqlite3-dev libreadline-dev +RUN cd /clones/cpython && ./configure --with-pydebug && CFLAGS="-g3" make -s -j$(nproc) && make install + +# gdb installation +RUN apt-get install -y wget libgmp-dev +RUN cd /tmp && wget http://mirrors.kernel.org/sourceware/gdb/releases/gdb-12.1.tar.gz && tar -zxf gdb-12.1.tar.gz +RUN cd /tmp/gdb-12.1 && ./configure --with-python=python3 && make -j$(nproc) && make install +RUN rm -r /tmp/gdb-12.1 + +# pandas dependencies +RUN python3 -m pip install \ + cython \ + hypothesis \ + ninja \ + numpy \ + meson \ + meson-python \ + pytest \ + pytest-asyncio \ + python-dateutil \ + pytz \ + versioneer[toml] + +# At the time this docker image was built, there was a bug/limitation +# with meson where only having a python3 executable and not python +# would cause the build to fail. This symlink could be removed if +# users stick to always calling python3 within the container +RUN ln -s /usr/local/bin/python3 /usr/local/bin/python diff --git a/tooling/debug/README b/tooling/debug/README new file mode 100644 index 0000000000000..111a958ff5ef5 --- /dev/null +++ b/tooling/debug/README @@ -0,0 +1,19 @@ +The Docker image here helps to set up an isolated environment containing a debug version of Python and a gdb installation which the Cython debugger can work with. + +If you have internet access, you can pull a pre-built image via + +```sh +docker pull pandas/pandas-debug +``` + +To build the image locally, you can do + +```sh +docker build . -t pandas/pandas-debug -f Dockerfile.pandas-debug +``` + +For pandas developers, you can push a new copy of the image to dockerhub via + +```sh +docker push pandas/pandas-debug +```