From 9922e7ffdcc41ea593593b75e711f06b2309741a Mon Sep 17 00:00:00 2001 From: TLouf Date: Sun, 16 May 2021 13:48:47 +0200 Subject: [PATCH 01/10] PERF: load plotting entrypoint only when necessary --- pandas/plotting/_core.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 27f8835968b54..5c912a8865f44 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -865,7 +865,7 @@ def _get_call_args(backend_name, data, args, kwargs): if args and isinstance(data, ABCSeries): positional_args = str(args)[1:-1] keyword_args = ", ".join( - f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args) + f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args) ) msg = ( "`Series.plot()` should not be called with positional " @@ -876,7 +876,7 @@ def _get_call_args(backend_name, data, args, kwargs): ) raise TypeError(msg) - pos_args = {name: value for value, (name, _) in zip(args, arg_def)} + pos_args = {name: value for (name, _), value in zip(arg_def, args)} if backend_name == "pandas.plotting._matplotlib": kwargs = dict(arg_def, **pos_args, **kwargs) else: @@ -1729,7 +1729,7 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): def _find_backend(backend: str): """ - Find a pandas plotting backend> + Find a pandas plotting backend. Parameters ---------- @@ -1749,11 +1749,8 @@ def _find_backend(backend: str): import pkg_resources # Delay import for performance. for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): - if entry_point.name == "matplotlib": - # matplotlib is an optional dependency. When - # missing, this would raise. - continue - _backends[entry_point.name] = entry_point.load() + if entry_point.name == backend: + _backends[entry_point.name] = entry_point.load() try: return _backends[backend] @@ -1778,21 +1775,26 @@ def _find_backend(backend: str): ) -def _get_plot_backend(backend=None): +def _get_plot_backend(backend: str | None = None): """ Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). - The plotting system of pandas has been using matplotlib, but the idea here - is that it can also work with other third-party backends. In the future, - this function will return the backend from a pandas option, and all the - rest of the code in this file will use the backend specified there for the - plotting. + The plotting system of pandas uses matplotlib by default, but the idea here + is that it can also work with other third-party backends. This function + returns the module which provides a top-level `.plot` method that will + actually do the plotting. The backend is specified from a string, which + either comes from the keyword argument `backend`, or, if not specified, from + the option `pandas.options.plotting.backend`. All the rest of the code in + this file uses the backend specified there for the plotting. The backend is imported lazily, as matplotlib is a soft dependency, and pandas can be used without it being installed. """ backend = backend or get_option("plotting.backend") + if backend in _backends: + return _backends[backend] + if backend == "matplotlib": # Because matplotlib is an optional dependency and first-party backend, # we need to attempt an import here to raise an ImportError if needed. @@ -1805,10 +1807,7 @@ def _get_plot_backend(backend=None): ) from None _backends["matplotlib"] = module - - if backend in _backends: - return _backends[backend] + return module module = _find_backend(backend) - _backends[backend] = module return module From 69c9206ff4f7ee72f0399c5e1473146261f7c850 Mon Sep 17 00:00:00 2001 From: TLouf Date: Mon, 17 May 2021 10:50:03 +0200 Subject: [PATCH 02/10] Better separate backend loading. Introduces `_load_backend` instead of `_find_backend`, which has a better defined role, and can also handle backend='matplotlib' --- pandas/plotting/_core.py | 80 ++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 5c912a8865f44..a75126ffe4cef 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1727,15 +1727,15 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): _backends = {} -def _find_backend(backend: str): +def _load_backend(backend: str): """ - Find a pandas plotting backend. + Load a pandas plotting backend. Parameters ---------- backend : str The identifier for the backend. Either an entrypoint item registered - with pkg_resources, or a module name. + with pkg_resources, "matplotlib", or a module name. Notes ----- @@ -1746,32 +1746,45 @@ def _find_backend(backend: str): types.ModuleType The imported backend. """ - import pkg_resources # Delay import for performance. - - for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): - if entry_point.name == backend: - _backends[entry_point.name] = entry_point.load() - - try: - return _backends[backend] - except KeyError: - # Fall back to unregistered, module name approach. + if backend == "matplotlib": + # Because matplotlib is an optional dependency and first-party backend, + # we need to attempt an import here to raise an ImportError if needed. try: - module = importlib.import_module(backend) + import pandas.plotting._matplotlib as module except ImportError: - # We re-raise later on. - pass - else: - if hasattr(module, "plot"): - # Validate that the interface is implemented when the option - # is set, rather than at plot time. - _backends[backend] = module - return module + raise ImportError( + "matplotlib is required for plotting when the " + 'default backend "matplotlib" is selected.' + ) from None + + else: + module = None + # Delay import for performance. + # TODO: replace with `importlib.metadata` when python_requires >= 3.8. + from pkg_resources import iter_entry_points + + for entry_point in iter_entry_points("pandas_plotting_backends"): + if entry_point.name == backend: + module = entry_point.load() + + if module is None: + # Fall back to unregistered, module name approach. + try: + module = importlib.import_module(backend) + except ImportError: + # We re-raise later on. + pass + + if hasattr(module, "plot"): + # Validate that the interface is implemented when the option is set, + # rather than at plot time. + _backends[backend] = module + return module raise ValueError( - f"Could not find plotting backend '{backend}'. Ensure that you've installed " - f"the package providing the '{backend}' entrypoint, or that the package has a " - "top-level `.plot` method." + f"Could not find plotting backend '{backend}'. Ensure that you've " + f"installed the package providing the '{backend}' entrypoint, or that " + "the package has a top-level `.plot` method." ) @@ -1795,19 +1808,4 @@ def _get_plot_backend(backend: str | None = None): if backend in _backends: return _backends[backend] - if backend == "matplotlib": - # Because matplotlib is an optional dependency and first-party backend, - # we need to attempt an import here to raise an ImportError if needed. - try: - import pandas.plotting._matplotlib as module - except ImportError: - raise ImportError( - "matplotlib is required for plotting when the " - 'default backend "matplotlib" is selected.' - ) from None - - _backends["matplotlib"] = module - return module - - module = _find_backend(backend) - return module + return _load_backend(backend) From 8315b388c0e07320915248936bf412675a55f257 Mon Sep 17 00:00:00 2001 From: TLouf Date: Mon, 17 May 2021 11:02:31 +0200 Subject: [PATCH 03/10] Update whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 409125b6d6691..a8ae36a1b17db 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -812,6 +812,7 @@ Performance improvements - Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`) - Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`) - Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`) +- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`) .. --------------------------------------------------------------------------- From 23d059e811c4881731ed82add8b93993e131c768 Mon Sep 17 00:00:00 2001 From: TLouf Date: Mon, 17 May 2021 15:59:03 +0200 Subject: [PATCH 04/10] Mutate _backends in_get_plot_backend import top --- pandas/plotting/_core.py | 43 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a75126ffe4cef..d8fe99c2c97ac 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -6,6 +6,9 @@ Sequence, ) +# TODO: replace with `importlib.metadata` when python_requires >= 3.8. +import pkg_resources + from pandas._config import get_option from pandas._typing import IndexLabel @@ -1737,10 +1740,6 @@ def _load_backend(backend: str): The identifier for the backend. Either an entrypoint item registered with pkg_resources, "matplotlib", or a module name. - Notes - ----- - Modifies _backends with imported backends as a side effect. - Returns ------- types.ModuleType @@ -1756,29 +1755,25 @@ def _load_backend(backend: str): "matplotlib is required for plotting when the " 'default backend "matplotlib" is selected.' ) from None + return module - else: - module = None - # Delay import for performance. - # TODO: replace with `importlib.metadata` when python_requires >= 3.8. - from pkg_resources import iter_entry_points + module = None - for entry_point in iter_entry_points("pandas_plotting_backends"): - if entry_point.name == backend: - module = entry_point.load() + for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): + if entry_point.name == backend: + module = entry_point.load() - if module is None: - # Fall back to unregistered, module name approach. - try: - module = importlib.import_module(backend) - except ImportError: - # We re-raise later on. - pass + if module is None: + # Fall back to unregistered, module name approach. + try: + module = importlib.import_module(backend) + except ImportError: + # We re-raise later on. + pass if hasattr(module, "plot"): # Validate that the interface is implemented when the option is set, # rather than at plot time. - _backends[backend] = module return module raise ValueError( @@ -1802,10 +1797,16 @@ def _get_plot_backend(backend: str | None = None): The backend is imported lazily, as matplotlib is a soft dependency, and pandas can be used without it being installed. + + Notes + ----- + Modifies `_backends` with imported backend as a side effect. """ backend = backend or get_option("plotting.backend") if backend in _backends: return _backends[backend] - return _load_backend(backend) + module = _load_backend(backend) + _backends[backend] = module + return module From a9f98991e75e1e2091e708a3d863ceeab9e23ed2 Mon Sep 17 00:00:00 2001 From: TLouf Date: Tue, 18 May 2021 13:07:57 +0200 Subject: [PATCH 05/10] Fix type annotations --- pandas/plotting/_core.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index d8fe99c2c97ac..cc0a15b5f98d7 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,6 +1,7 @@ from __future__ import annotations import importlib +import types from typing import ( TYPE_CHECKING, Sequence, @@ -1727,7 +1728,7 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): return self(kind="hexbin", x=x, y=y, C=C, **kwargs) -_backends = {} +_backends: dict[str, types.ModuleType] = {} def _load_backend(backend: str): @@ -1745,11 +1746,13 @@ def _load_backend(backend: str): types.ModuleType The imported backend. """ + module: types.ModuleType | None = None + if backend == "matplotlib": # Because matplotlib is an optional dependency and first-party backend, # we need to attempt an import here to raise an ImportError if needed. try: - import pandas.plotting._matplotlib as module + module = __import__("pandas.plotting._matplotlib") except ImportError: raise ImportError( "matplotlib is required for plotting when the " @@ -1757,8 +1760,6 @@ def _load_backend(backend: str): ) from None return module - module = None - for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): if entry_point.name == backend: module = entry_point.load() From 8808ecd714fc8a90adb49586ae1458c8e94952b7 Mon Sep 17 00:00:00 2001 From: TLouf Date: Tue, 18 May 2021 13:09:52 +0200 Subject: [PATCH 06/10] Add backend loading benchmark --- asv_bench/benchmarks/plotting.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 11e43401f9395..7653520213d73 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -1,10 +1,15 @@ +import importlib +import sys + import matplotlib import numpy as np +import pkg_resources from pandas import ( DataFrame, DatetimeIndex, Series, + __file__ as pd_file, date_range, ) @@ -13,6 +18,8 @@ except ImportError: from pandas.tools.plotting import andrews_curves +from pandas.plotting._core import _get_plot_backend + matplotlib.use("Agg") @@ -99,4 +106,29 @@ def time_plot_andrews_curves(self): andrews_curves(self.df, "Name") +class BackendLoading: + def setup(self): + dist = pkg_resources.get_distribution("pandas") + if dist.module_path not in pd_file: + # We are running from a non-installed pandas, and this benchmark is + # invalid + raise NotImplementedError("Testing a non-installed pandas") + + spec = importlib.machinery.ModuleSpec("my_backend", None) + mod = importlib.util.module_from_spec(spec) + mod.plot = lambda *args, **kwargs: 1 + + backends = pkg_resources.get_entry_map("pandas") + my_entrypoint = pkg_resources.EntryPoint( + "pandas_plotting_backend", mod.__name__, dist=dist + ) + backends["pandas_plotting_backends"][mod.__name__] = my_entrypoint + for i in range(1000): + backends["pandas_plotting_backends"][str(i)] = my_entrypoint + sys.modules["my_backend"] = mod + + def time_get_plot_backend(self): + _get_plot_backend("my_backend") + + from .pandas_vb_common import setup # noqa: F401 isort:skip From 00f79d3caf0117dfe9ca4e7dac15a609e61faa6f Mon Sep 17 00:00:00 2001 From: TLouf Date: Tue, 18 May 2021 14:21:42 +0200 Subject: [PATCH 07/10] Fix BackendLoading removing warmup --- asv_bench/benchmarks/plotting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 7653520213d73..d81a4019a7577 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -107,6 +107,10 @@ def time_plot_andrews_curves(self): class BackendLoading: + repeat = 1 + number = 1 + warmup_time = 0 + def setup(self): dist = pkg_resources.get_distribution("pandas") if dist.module_path not in pd_file: From 25b1dced8f4c96fb6dab0b8987ec2b2eafbb1523 Mon Sep 17 00:00:00 2001 From: TLouf Date: Tue, 1 Jun 2021 18:29:02 +0200 Subject: [PATCH 08/10] Reduce number of dummy backends --- asv_bench/benchmarks/plotting.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index d81a4019a7577..249a8f3f556a1 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -9,7 +9,6 @@ DataFrame, DatetimeIndex, Series, - __file__ as pd_file, date_range, ) @@ -113,11 +112,6 @@ class BackendLoading: def setup(self): dist = pkg_resources.get_distribution("pandas") - if dist.module_path not in pd_file: - # We are running from a non-installed pandas, and this benchmark is - # invalid - raise NotImplementedError("Testing a non-installed pandas") - spec = importlib.machinery.ModuleSpec("my_backend", None) mod = importlib.util.module_from_spec(spec) mod.plot = lambda *args, **kwargs: 1 @@ -127,7 +121,7 @@ def setup(self): "pandas_plotting_backend", mod.__name__, dist=dist ) backends["pandas_plotting_backends"][mod.__name__] = my_entrypoint - for i in range(1000): + for i in range(10): backends["pandas_plotting_backends"][str(i)] = my_entrypoint sys.modules["my_backend"] = mod From be6f4f363de1e21c54f6660fa77be3a6967362f4 Mon Sep 17 00:00:00 2001 From: TLouf Date: Tue, 1 Jun 2021 18:30:04 +0200 Subject: [PATCH 09/10] Return type in _load_backend, changes for mypy --- pandas/plotting/_core.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index cc0a15b5f98d7..a4db8018eb293 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -7,7 +7,6 @@ Sequence, ) -# TODO: replace with `importlib.metadata` when python_requires >= 3.8. import pkg_resources from pandas._config import get_option @@ -1731,7 +1730,7 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): _backends: dict[str, types.ModuleType] = {} -def _load_backend(backend: str): +def _load_backend(backend: str) -> types.ModuleType: """ Load a pandas plotting backend. @@ -1746,13 +1745,11 @@ def _load_backend(backend: str): types.ModuleType The imported backend. """ - module: types.ModuleType | None = None - if backend == "matplotlib": # Because matplotlib is an optional dependency and first-party backend, # we need to attempt an import here to raise an ImportError if needed. try: - module = __import__("pandas.plotting._matplotlib") + module = importlib.import_module("pandas.plotting._matplotlib") except ImportError: raise ImportError( "matplotlib is required for plotting when the " @@ -1760,22 +1757,27 @@ def _load_backend(backend: str): ) from None return module + found_backend = False + for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): - if entry_point.name == backend: + found_backend = entry_point.name == backend + if found_backend: module = entry_point.load() - if module is None: + if not found_backend: # Fall back to unregistered, module name approach. try: module = importlib.import_module(backend) + found_backend = True except ImportError: # We re-raise later on. pass - if hasattr(module, "plot"): - # Validate that the interface is implemented when the option is set, - # rather than at plot time. - return module + if found_backend: + if hasattr(module, "plot"): + # Validate that the interface is implemented when the option is set, + # rather than at plot time. + return module raise ValueError( f"Could not find plotting backend '{backend}'. Ensure that you've " From 54a54cae745c43d28de18633b714fa317a593f83 Mon Sep 17 00:00:00 2001 From: TLouf Date: Thu, 3 Jun 2021 10:09:07 +0200 Subject: [PATCH 10/10] Break iteration if found entry point --- pandas/plotting/_core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a4db8018eb293..5d3db13610845 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1763,6 +1763,7 @@ def _load_backend(backend: str) -> types.ModuleType: found_backend = entry_point.name == backend if found_backend: module = entry_point.load() + break if not found_backend: # Fall back to unregistered, module name approach.