Skip to content

Commit

Permalink
Lazy load click subcommands (#3883)
Browse files Browse the repository at this point in the history
* Lazy load subcommands

Signed-off-by: Ankita Katiyar <[email protected]>

* Test + lint

Signed-off-by: Ankita Katiyar <[email protected]>

* Make overwriting of commands work

Signed-off-by: Ankita Katiyar <[email protected]>

* Some lint fixes

Signed-off-by: Ankita Katiyar <[email protected]>

* Some lint fixes

Signed-off-by: Ankita Katiyar <[email protected]>

* Some lint fixes

Signed-off-by: Ankita Katiyar <[email protected]>

* Some lint fixes

Signed-off-by: Ankita Katiyar <[email protected]>

* Fix some tests

Signed-off-by: Ankita Katiyar <[email protected]>

* Some tests + lint

Signed-off-by: Ankita Katiyar <[email protected]>

* fix tests

Signed-off-by: Ankita Katiyar <[email protected]>

* Move lazy group

Signed-off-by: Ankita Katiyar <[email protected]>

* lint

Signed-off-by: Ankita Katiyar <[email protected]>

* lint

Signed-off-by: Ankita Katiyar <[email protected]>

* lint

Signed-off-by: Ankita Katiyar <[email protected]>

* Remove commented parts, add comments

Signed-off-by: Ankita Katiyar <[email protected]>

* test lazy commands + docs link

Signed-off-by: Ankita Katiyar <[email protected]>

* Release notes

Signed-off-by: Ankita Katiyar <[email protected]>

---------

Signed-off-by: Ankita Katiyar <[email protected]>
  • Loading branch information
ankatiyar authored Jul 10, 2024
1 parent bdaebb2 commit d82ddb8
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 95 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Upcoming Release 0.19.7

## Major features and improvements
* Kedro commands are now lazily loaded to add performance gains when running Kedro commands.

## Bug fixes and other changes
* Updated error message for invalid catalog entries.
Expand Down
6 changes: 5 additions & 1 deletion docs/source/kedro_project_setup/session.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,8 @@ This function reads `settings.py` and `pipeline_registry.py` and registers the c
#### ValueError: Package name not found
> ValueError: Package name not found. Make sure you have configured the project using 'bootstrap_project'. This should happen automatically if you are using Kedro command line interface.
If you are using `multiprocessing`, you need to be careful about this. Depending on your Operating System, you may have [different default](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods). If the processes are `spawn`, Python will re-import all the modules in each process and thus you need to run `configure_project` again at the start of the new process. For example, this is how Kedro handle this in `ParallelRunner`(https://github.com/kedro-org/kedro/blob/9e883e6a0ba40e3db4497b234dcb3801258e8396/kedro/runner/parallel_runner.py#L84-L85)
If you are using `multiprocessing`, you need to be careful about this. Depending on your Operating System, you may have [different default](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods). If the processes are `spawn`, Python will re-import all the modules in each process and thus you need to run `configure_project` again at the start of the new process. For example, this is how Kedro handles this in `ParallelRunner`:
```python
if multiprocessing.get_start_method() == "spawn" and package_name:
_bootstrap_subprocess(package_name, logging_config)
```
59 changes: 39 additions & 20 deletions kedro/framework/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,13 @@

from kedro import __version__ as version
from kedro.framework.cli import BRIGHT_BLACK, ORANGE
from kedro.framework.cli.catalog import catalog_cli
from kedro.framework.cli.hooks import get_cli_hook_manager
from kedro.framework.cli.jupyter import jupyter_cli
from kedro.framework.cli.micropkg import micropkg_cli
from kedro.framework.cli.pipeline import pipeline_cli
from kedro.framework.cli.project import project_group
from kedro.framework.cli.registry import registry_cli
from kedro.framework.cli.starters import create_cli
from kedro.framework.cli.utils import (
CONTEXT_SETTINGS,
ENTRY_POINT_GROUPS,
CommandCollection,
KedroCliError,
LazyGroup,
_get_entry_points,
load_entry_points,
)
Expand All @@ -51,6 +45,9 @@ def cli() -> None: # pragma: no cover
"""Kedro is a CLI for creating and using Kedro projects. For more
information, type ``kedro info``.
NOTE: If a command from a plugin conflicts with a built-in command from Kedro,
the command from the plugin will take precedence.
"""
pass

Expand Down Expand Up @@ -85,6 +82,38 @@ def info() -> None:
click.echo("No plugins installed")


@click.group(
context_settings=CONTEXT_SETTINGS,
cls=LazyGroup,
name="Kedro",
lazy_subcommands={
"registry": "kedro.framework.cli.registry.registry",
"catalog": "kedro.framework.cli.catalog.catalog",
"ipython": "kedro.framework.cli.project.ipython",
"run": "kedro.framework.cli.project.run",
"micropkg": "kedro.framework.cli.micropkg.micropkg",
"package": "kedro.framework.cli.project.package",
"jupyter": "kedro.framework.cli.jupyter.jupyter",
"pipeline": "kedro.framework.cli.pipeline.pipeline",
},
)
def project_commands() -> None:
pass # pragma: no cover


@click.group(
context_settings=CONTEXT_SETTINGS,
name="Kedro",
cls=LazyGroup,
lazy_subcommands={
"new": "kedro.framework.cli.starters.new",
"starter": "kedro.framework.cli.starters.starter",
},
)
def global_commands() -> None:
pass # pragma: no cover


def _init_plugins() -> None:
init_hooks = load_entry_points("init")
for init_hook in init_hooks:
Expand Down Expand Up @@ -125,7 +154,6 @@ def main(
self._cli_hook_manager.hook.before_command_run(
project_metadata=self._metadata, command_args=args
)

try:
super().main(
args=args,
Expand Down Expand Up @@ -178,7 +206,7 @@ def global_groups(self) -> Sequence[click.MultiCommand]:
combines them with the built-in ones (eventually overriding the
built-in ones if they are redefined by plugins).
"""
return [cli, create_cli, *load_entry_points("global")]
return [*load_entry_points("global"), cli, global_commands]

@property
def project_groups(self) -> Sequence[click.MultiCommand]:
Expand All @@ -192,15 +220,6 @@ def project_groups(self) -> Sequence[click.MultiCommand]:
if not self._metadata:
return []

built_in = [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
]

plugins = load_entry_points("project")

try:
Expand All @@ -209,7 +228,7 @@ def project_groups(self) -> Sequence[click.MultiCommand]:
except ModuleNotFoundError:
# return only built-in commands and commands from plugins
# (plugins can override built-in commands)
return [*built_in, *plugins]
return [*plugins, project_commands]

# fail badly if cli.py exists, but has no `cli` in it
if not hasattr(project_cli, "cli"):
Expand All @@ -219,7 +238,7 @@ def project_groups(self) -> Sequence[click.MultiCommand]:
user_defined = project_cli.cli
# return built-in commands, plugin commands and user defined commands
# (overriding happens as follows built-in < plugins < cli.py)
return [*built_in, *plugins, user_defined]
return [user_defined, *plugins, project_commands]


def main() -> None: # pragma: no cover
Expand Down
65 changes: 39 additions & 26 deletions kedro/framework/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,12 @@ def __init__(self, *groups: tuple[str, Sequence[click.MultiCommand]]):
for title, cli_list in groups
]
sources = list(chain.from_iterable(cli_list for _, cli_list in self.groups))

help_texts = [
cli.help
for cli_collection in sources
for cli in cli_collection.sources
if cli.help
]
self._dedupe_commands(sources)
super().__init__(
sources=sources, # type: ignore[arg-type]
help="\n\n".join(help_texts),
Expand All @@ -136,29 +134,6 @@ def __init__(self, *groups: tuple[str, Sequence[click.MultiCommand]]):
self.params = sources[0].params
self.callback = sources[0].callback

@staticmethod
def _dedupe_commands(cli_collections: Sequence[click.CommandCollection]) -> None:
"""Deduplicate commands by keeping the ones from the last source
in the list.
"""
seen_names: set[str] = set()
for cli_collection in reversed(cli_collections):
for cmd_group in reversed(cli_collection.sources):
cmd_group.commands = { # type: ignore[attr-defined]
cmd_name: cmd
for cmd_name, cmd in cmd_group.commands.items() # type: ignore[attr-defined]
if cmd_name not in seen_names
}
seen_names |= cmd_group.commands.keys() # type: ignore[attr-defined]

# remove empty command groups
for cli_collection in cli_collections:
cli_collection.sources = [
cmd_group
for cmd_group in cli_collection.sources
if cmd_group.commands # type: ignore[attr-defined]
]

@staticmethod
def _merge_same_name_collections(
groups: Sequence[click.MultiCommand],
Expand All @@ -169,7 +144,6 @@ def _merge_same_name_collections(
named_groups[group.name].append(group) # type: ignore[index]
if group.help:
helps[group.name].append(group.help) # type: ignore[index]

return [
click.CommandCollection(
name=group_name,
Expand Down Expand Up @@ -504,3 +478,42 @@ def _split_load_versions(ctx: click.Context, param: Any, value: str) -> dict[str
load_versions_dict[load_version_list[0]] = load_version_list[1]

return load_versions_dict


class LazyGroup(click.Group):
"""A click Group that supports lazy loading of subcommands."""

def __init__(
self,
*args: Any,
lazy_subcommands: dict[str, str] | None = None,
**kwargs: Any,
):
super().__init__(*args, **kwargs)
# lazy_subcommands is a map of the form:
#
# {command-name} -> {module-name}.{command-object-name}
#
self.lazy_subcommands = lazy_subcommands or {}

def list_commands(self, ctx: click.Context) -> list[str]:
base = list(super().list_commands(ctx))
lazy = sorted(self.lazy_subcommands.keys())
return base + lazy

def get_command( # type: ignore[override]
self, ctx: click.Context, cmd_name: str
) -> click.BaseCommand | click.Command | None:
if cmd_name in self.lazy_subcommands:
return self._lazy_load(cmd_name)
return super().get_command(ctx, cmd_name)

def _lazy_load(self, cmd_name: str) -> click.BaseCommand:
# lazily loading a command, first get the module name and attribute name
import_path = self.lazy_subcommands[cmd_name]
modname, cmd_object_name = import_path.rsplit(".", 1)
# do the import
mod = import_module(modname)
# get the Command object from that module
cmd_object = getattr(mod, cmd_object_name)
return cmd_object # type: ignore[no-any-return]
68 changes: 29 additions & 39 deletions tests/framework/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
from kedro import KedroDeprecationWarning
from kedro import __version__ as version
from kedro.framework.cli import load_entry_points
from kedro.framework.cli.catalog import catalog_cli
from kedro.framework.cli.cli import KedroCLI, _init_plugins, cli
from kedro.framework.cli.jupyter import jupyter_cli
from kedro.framework.cli.micropkg import micropkg_cli
from kedro.framework.cli.pipeline import pipeline_cli
from kedro.framework.cli.project import project_group
from kedro.framework.cli.registry import registry_cli
from kedro.framework.cli.starters import create_cli
from kedro.framework.cli.cli import (
KedroCLI,
_init_plugins,
cli,
global_commands,
project_commands,
)
from kedro.framework.cli.utils import (
CommandCollection,
KedroCliError,
Expand Down Expand Up @@ -335,15 +334,19 @@ def test_project_commands_no_clipy(self, mocker, fake_metadata):
side_effect=cycle([ModuleNotFoundError()]),
)
kedro_cli = KedroCLI(fake_metadata.project_path)
print(kedro_cli.project_groups)
assert len(kedro_cli.project_groups) == 6
assert kedro_cli.project_groups == [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
# There is only one `LazyGroup` for project commands
assert len(kedro_cli.project_groups) == 1
assert kedro_cli.project_groups == [project_commands]
# Assert that the lazy commands are listed properly
assert kedro_cli.project_groups[0].list_commands(None) == [
"catalog",
"ipython",
"jupyter",
"micropkg",
"package",
"pipeline",
"registry",
"run",
]

def test_project_commands_no_project(self, mocker, tmp_path):
Expand Down Expand Up @@ -374,22 +377,20 @@ def test_project_commands_valid_clipy(self, mocker, fake_metadata):
return_value=Module(cli=cli),
)
kedro_cli = KedroCLI(fake_metadata.project_path)
assert len(kedro_cli.project_groups) == 7
# The project group will now have two groups, the first from the project's cli.py and
# the second is the lazy project command group
assert len(kedro_cli.project_groups) == 2
assert kedro_cli.project_groups == [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
cli,
project_commands,
]

def test_kedro_cli_no_project(self, mocker, tmp_path):
mocker.patch("kedro.framework.cli.cli._is_project", return_value=False)
kedro_cli = KedroCLI(tmp_path)
assert len(kedro_cli.global_groups) == 2
assert kedro_cli.global_groups == [cli, create_cli]
# The global groups will be the cli(group for info command) and the global commands (starter and new)
assert kedro_cli.global_groups == [cli, global_commands]

result = CliRunner().invoke(kedro_cli, [])

Expand All @@ -413,28 +414,17 @@ def test_kedro_run_no_project(self, mocker, tmp_path):
)

def test_kedro_cli_with_project(self, mocker, fake_metadata):
Module = namedtuple("Module", ["cli"])
mocker.patch("kedro.framework.cli.cli._is_project", return_value=True)
mocker.patch(
"kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata
)
mocker.patch(
"kedro.framework.cli.cli.importlib.import_module",
return_value=Module(cli=cli),
)
kedro_cli = KedroCLI(fake_metadata.project_path)

assert len(kedro_cli.global_groups) == 2
assert kedro_cli.global_groups == [cli, create_cli]
assert len(kedro_cli.project_groups) == 7
assert kedro_cli.global_groups == [cli, global_commands]
assert len(kedro_cli.project_groups) == 1
assert kedro_cli.project_groups == [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
cli,
project_commands,
]

result = CliRunner().invoke(kedro_cli, [])
Expand Down
12 changes: 3 additions & 9 deletions tests/framework/cli/test_cli_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
from click.testing import CliRunner

from kedro.framework.cli.cli import KedroCLI, cli
from kedro.framework.cli.cli import KedroCLI
from kedro.framework.cli.hooks import cli_hook_impl, get_cli_hook_manager, manager
from kedro.framework.startup import ProjectMetadata

Expand Down Expand Up @@ -83,7 +83,7 @@ def fake_plugin_distribution(mocker):
class TestKedroCLIHooks:
@pytest.mark.parametrize(
"command, exit_code",
[("-V", 0), ("info", 2), ("pipeline list", 2), ("starter", 0)],
[("-V", 0), ("info", 0), ("pipeline list", 2), ("starter", 0)],
)
def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
self,
Expand All @@ -97,7 +97,6 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
):
caplog.set_level(logging.DEBUG, logger="kedro")

Module = namedtuple("Module", ["cli"])
mocker.patch(
"kedro.framework.cli.cli._is_project",
return_value=True,
Expand All @@ -106,10 +105,6 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
"kedro.framework.cli.cli.bootstrap_project",
return_value=fake_metadata,
)
mocker.patch(
"kedro.framework.cli.cli.importlib.import_module",
return_value=Module(cli=cli),
)
kedro_cli = KedroCLI(fake_metadata.project_path)
result = CliRunner().invoke(kedro_cli, [command])
assert (
Expand All @@ -121,8 +116,7 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
f"Before command `{command}` run for project {fake_metadata}"
in result.output
)

# 'pipeline list' and 'info' aren't actually in the click structure and
# 'pipeline list' isn't actually in the click structure and
# return exit code 2 ('invalid usage of some shell built-in command')
assert (
f"After command `{command}` run for project {fake_metadata} (exit: {exit_code})"
Expand Down

0 comments on commit d82ddb8

Please sign in to comment.