Skip to content

Commit

Permalink
PERF: load plotting entrypoint only when necessary (#41503)
Browse files Browse the repository at this point in the history
  • Loading branch information
TLouf authored Jun 4, 2021
1 parent 963561b commit e602f7b
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 48 deletions.
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/plotting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import importlib
import sys

import matplotlib
import numpy as np
import pkg_resources

from pandas import (
DataFrame,
Expand All @@ -13,6 +17,8 @@
except ImportError:
from pandas.tools.plotting import andrews_curves

from pandas.plotting._core import _get_plot_backend

matplotlib.use("Agg")


Expand Down Expand Up @@ -99,4 +105,28 @@ def time_plot_andrews_curves(self):
andrews_curves(self.df, "Name")


class BackendLoading:
repeat = 1
number = 1
warmup_time = 0

def setup(self):
dist = pkg_resources.get_distribution("pandas")
spec = importlib.machinery.ModuleSpec("my_backend", None)
mod = importlib.util.module_from_spec(spec)
mod.plot = lambda *args, **kwargs: 1

backends = pkg_resources.get_entry_map("pandas")
my_entrypoint = pkg_resources.EntryPoint(
"pandas_plotting_backend", mod.__name__, dist=dist
)
backends["pandas_plotting_backends"][mod.__name__] = my_entrypoint
for i in range(10):
backends["pandas_plotting_backends"][str(i)] = my_entrypoint
sys.modules["my_backend"] = mod

def time_get_plot_backend(self):
_get_plot_backend("my_backend")


from .pandas_vb_common import setup # noqa: F401 isort:skip
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,7 @@ Performance improvements
- Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`)
- Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`)
- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`)
- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`)

.. ---------------------------------------------------------------------------
Expand Down
98 changes: 50 additions & 48 deletions pandas/plotting/_core.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from __future__ import annotations

import importlib
import types
from typing import (
TYPE_CHECKING,
Sequence,
)

import pkg_resources

from pandas._config import get_option

from pandas._typing import IndexLabel
Expand Down Expand Up @@ -865,7 +868,7 @@ def _get_call_args(backend_name, data, args, kwargs):
if args and isinstance(data, ABCSeries):
positional_args = str(args)[1:-1]
keyword_args = ", ".join(
f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args)
f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)
)
msg = (
"`Series.plot()` should not be called with positional "
Expand All @@ -876,7 +879,7 @@ def _get_call_args(backend_name, data, args, kwargs):
)
raise TypeError(msg)

pos_args = {name: value for value, (name, _) in zip(args, arg_def)}
pos_args = {name: value for (name, _), value in zip(arg_def, args)}
if backend_name == "pandas.plotting._matplotlib":
kwargs = dict(arg_def, **pos_args, **kwargs)
else:
Expand Down Expand Up @@ -1724,91 +1727,90 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
return self(kind="hexbin", x=x, y=y, C=C, **kwargs)


_backends = {}
_backends: dict[str, types.ModuleType] = {}


def _find_backend(backend: str):
def _load_backend(backend: str) -> types.ModuleType:
"""
Find a pandas plotting backend>
Load a pandas plotting backend.
Parameters
----------
backend : str
The identifier for the backend. Either an entrypoint item registered
with pkg_resources, or a module name.
Notes
-----
Modifies _backends with imported backends as a side effect.
with pkg_resources, "matplotlib", or a module name.
Returns
-------
types.ModuleType
The imported backend.
"""
import pkg_resources # Delay import for performance.
if backend == "matplotlib":
# Because matplotlib is an optional dependency and first-party backend,
# we need to attempt an import here to raise an ImportError if needed.
try:
module = importlib.import_module("pandas.plotting._matplotlib")
except ImportError:
raise ImportError(
"matplotlib is required for plotting when the "
'default backend "matplotlib" is selected.'
) from None
return module

found_backend = False

for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"):
if entry_point.name == "matplotlib":
# matplotlib is an optional dependency. When
# missing, this would raise.
continue
_backends[entry_point.name] = entry_point.load()
found_backend = entry_point.name == backend
if found_backend:
module = entry_point.load()
break

try:
return _backends[backend]
except KeyError:
if not found_backend:
# Fall back to unregistered, module name approach.
try:
module = importlib.import_module(backend)
found_backend = True
except ImportError:
# We re-raise later on.
pass
else:
if hasattr(module, "plot"):
# Validate that the interface is implemented when the option
# is set, rather than at plot time.
_backends[backend] = module
return module

if found_backend:
if hasattr(module, "plot"):
# Validate that the interface is implemented when the option is set,
# rather than at plot time.
return module

raise ValueError(
f"Could not find plotting backend '{backend}'. Ensure that you've installed "
f"the package providing the '{backend}' entrypoint, or that the package has a "
"top-level `.plot` method."
f"Could not find plotting backend '{backend}'. Ensure that you've "
f"installed the package providing the '{backend}' entrypoint, or that "
"the package has a top-level `.plot` method."
)


def _get_plot_backend(backend=None):
def _get_plot_backend(backend: str | None = None):
"""
Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
The plotting system of pandas has been using matplotlib, but the idea here
is that it can also work with other third-party backends. In the future,
this function will return the backend from a pandas option, and all the
rest of the code in this file will use the backend specified there for the
plotting.
The plotting system of pandas uses matplotlib by default, but the idea here
is that it can also work with other third-party backends. This function
returns the module which provides a top-level `.plot` method that will
actually do the plotting. The backend is specified from a string, which
either comes from the keyword argument `backend`, or, if not specified, from
the option `pandas.options.plotting.backend`. All the rest of the code in
this file uses the backend specified there for the plotting.
The backend is imported lazily, as matplotlib is a soft dependency, and
pandas can be used without it being installed.
Notes
-----
Modifies `_backends` with imported backend as a side effect.
"""
backend = backend or get_option("plotting.backend")

if backend == "matplotlib":
# Because matplotlib is an optional dependency and first-party backend,
# we need to attempt an import here to raise an ImportError if needed.
try:
import pandas.plotting._matplotlib as module
except ImportError:
raise ImportError(
"matplotlib is required for plotting when the "
'default backend "matplotlib" is selected.'
) from None

_backends["matplotlib"] = module

if backend in _backends:
return _backends[backend]

module = _find_backend(backend)
module = _load_backend(backend)
_backends[backend] = module
return module

0 comments on commit e602f7b

Please sign in to comment.