Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: load plotting entrypoint only when necessary #41503

Merged
merged 10 commits into from
Jun 4, 2021
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/plotting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import importlib
import sys

import matplotlib
import numpy as np
import pkg_resources

from pandas import (
DataFrame,
Expand All @@ -13,6 +17,8 @@
except ImportError:
from pandas.tools.plotting import andrews_curves

from pandas.plotting._core import _get_plot_backend

matplotlib.use("Agg")


Expand Down Expand Up @@ -99,4 +105,28 @@ def time_plot_andrews_curves(self):
andrews_curves(self.df, "Name")


class BackendLoading:
repeat = 1
number = 1
warmup_time = 0

def setup(self):
dist = pkg_resources.get_distribution("pandas")
spec = importlib.machinery.ModuleSpec("my_backend", None)
mod = importlib.util.module_from_spec(spec)
mod.plot = lambda *args, **kwargs: 1

backends = pkg_resources.get_entry_map("pandas")
my_entrypoint = pkg_resources.EntryPoint(
"pandas_plotting_backend", mod.__name__, dist=dist
)
backends["pandas_plotting_backends"][mod.__name__] = my_entrypoint
for i in range(10):
backends["pandas_plotting_backends"][str(i)] = my_entrypoint
sys.modules["my_backend"] = mod

def time_get_plot_backend(self):
_get_plot_backend("my_backend")


from .pandas_vb_common import setup # noqa: F401 isort:skip
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,7 @@ Performance improvements
- Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`)
- Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`)
- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`)
- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`)

.. ---------------------------------------------------------------------------

Expand Down
98 changes: 50 additions & 48 deletions pandas/plotting/_core.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from __future__ import annotations

import importlib
import types
from typing import (
TYPE_CHECKING,
Sequence,
)

import pkg_resources

from pandas._config import get_option

from pandas._typing import IndexLabel
Expand Down Expand Up @@ -865,7 +868,7 @@ def _get_call_args(backend_name, data, args, kwargs):
if args and isinstance(data, ABCSeries):
positional_args = str(args)[1:-1]
keyword_args = ", ".join(
f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args)
f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)
)
msg = (
"`Series.plot()` should not be called with positional "
Expand All @@ -876,7 +879,7 @@ def _get_call_args(backend_name, data, args, kwargs):
)
raise TypeError(msg)

pos_args = {name: value for value, (name, _) in zip(args, arg_def)}
pos_args = {name: value for (name, _), value in zip(arg_def, args)}
if backend_name == "pandas.plotting._matplotlib":
kwargs = dict(arg_def, **pos_args, **kwargs)
else:
Expand Down Expand Up @@ -1724,91 +1727,90 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
return self(kind="hexbin", x=x, y=y, C=C, **kwargs)


_backends = {}
_backends: dict[str, types.ModuleType] = {}


def _find_backend(backend: str):
def _load_backend(backend: str) -> types.ModuleType:
"""
Find a pandas plotting backend>
Load a pandas plotting backend.

Parameters
----------
backend : str
The identifier for the backend. Either an entrypoint item registered
with pkg_resources, or a module name.

Notes
-----
Modifies _backends with imported backends as a side effect.
with pkg_resources, "matplotlib", or a module name.

Returns
-------
types.ModuleType
The imported backend.
"""
import pkg_resources # Delay import for performance.
if backend == "matplotlib":
# Because matplotlib is an optional dependency and first-party backend,
# we need to attempt an import here to raise an ImportError if needed.
try:
module = importlib.import_module("pandas.plotting._matplotlib")
except ImportError:
raise ImportError(
"matplotlib is required for plotting when the "
'default backend "matplotlib" is selected.'
) from None
return module

found_backend = False

for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"):
if entry_point.name == "matplotlib":
# matplotlib is an optional dependency. When
# missing, this would raise.
continue
_backends[entry_point.name] = entry_point.load()
found_backend = entry_point.name == backend
if found_backend:
module = entry_point.load()
jreback marked this conversation as resolved.
Show resolved Hide resolved
break

try:
return _backends[backend]
except KeyError:
if not found_backend:
# Fall back to unregistered, module name approach.
try:
module = importlib.import_module(backend)
found_backend = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what i meant is that you can move all of this code here to L1766 (e.g. just load and import there) right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, I first have to iterate through all entry points before falling back to importing a module. That's because for instance, the hvPlot backend is an entry point whose name is "holoviews" (see https://github.com/holoviz/hvplot/blob/master/setup.py#L134). So if I move up the unregistered entry point approach, in that case I might import holoviews, which has no top level plot method, thus raising when the entry point is actually available.

except ImportError:
# We re-raise later on.
pass
else:
if hasattr(module, "plot"):
# Validate that the interface is implemented when the option
# is set, rather than at plot time.
_backends[backend] = module
return module

if found_backend:
if hasattr(module, "plot"):
# Validate that the interface is implemented when the option is set,
# rather than at plot time.
return module

raise ValueError(
f"Could not find plotting backend '{backend}'. Ensure that you've installed "
f"the package providing the '{backend}' entrypoint, or that the package has a "
"top-level `.plot` method."
f"Could not find plotting backend '{backend}'. Ensure that you've "
f"installed the package providing the '{backend}' entrypoint, or that "
"the package has a top-level `.plot` method."
)


def _get_plot_backend(backend=None):
def _get_plot_backend(backend: str | None = None):
"""
Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).

The plotting system of pandas has been using matplotlib, but the idea here
is that it can also work with other third-party backends. In the future,
this function will return the backend from a pandas option, and all the
rest of the code in this file will use the backend specified there for the
plotting.
The plotting system of pandas uses matplotlib by default, but the idea here
is that it can also work with other third-party backends. This function
returns the module which provides a top-level `.plot` method that will
actually do the plotting. The backend is specified from a string, which
either comes from the keyword argument `backend`, or, if not specified, from
the option `pandas.options.plotting.backend`. All the rest of the code in
this file uses the backend specified there for the plotting.

The backend is imported lazily, as matplotlib is a soft dependency, and
pandas can be used without it being installed.

Notes
-----
Modifies `_backends` with imported backend as a side effect.
"""
backend = backend or get_option("plotting.backend")

if backend == "matplotlib":
# Because matplotlib is an optional dependency and first-party backend,
# we need to attempt an import here to raise an ImportError if needed.
try:
import pandas.plotting._matplotlib as module
except ImportError:
raise ImportError(
"matplotlib is required for plotting when the "
'default backend "matplotlib" is selected.'
) from None

_backends["matplotlib"] = module

if backend in _backends:
return _backends[backend]

module = _find_backend(backend)
module = _load_backend(backend)
_backends[backend] = module
return module