Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy load click subcommands #3883

Merged
merged 21 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docs/source/kedro_project_setup/session.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,8 @@ This function reads `settings.py` and `pipeline_registry.py` and registers the c
#### ValueError: Package name not found
> ValueError: Package name not found. Make sure you have configured the project using 'bootstrap_project'. This should happen automatically if you are using Kedro command line interface.

If you are using `multiprocessing`, you need to be careful about this. Depending on your Operating System, you may have [different default](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods). If the processes are `spawn`, Python will re-import all the modules in each process and thus you need to run `configure_project` again at the start of the new process. For example, this is how Kedro handle this in `ParallelRunner`(https://github.com/kedro-org/kedro/blob/9e883e6a0ba40e3db4497b234dcb3801258e8396/kedro/runner/parallel_runner.py#L84-L85)
If you are using `multiprocessing`, you need to be careful about this. Depending on your Operating System, you may have [different default](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods). If the processes are `spawn`, Python will re-import all the modules in each process and thus you need to run `configure_project` again at the start of the new process. For example, this is how Kedro handles this in `ParallelRunner`:
```python
if multiprocessing.get_start_method() == "spawn" and package_name:
_bootstrap_subprocess(package_name, logging_config)
```
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The secret scan test was complaining about the link ^ I added the code snippet instead. I hope that's okay? @noklam

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had this problem locally as well but it's all good now and the link seems correct and working.

59 changes: 39 additions & 20 deletions kedro/framework/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,13 @@

from kedro import __version__ as version
from kedro.framework.cli import BRIGHT_BLACK, ORANGE
from kedro.framework.cli.catalog import catalog_cli
from kedro.framework.cli.hooks import get_cli_hook_manager
from kedro.framework.cli.jupyter import jupyter_cli
from kedro.framework.cli.micropkg import micropkg_cli
from kedro.framework.cli.pipeline import pipeline_cli
from kedro.framework.cli.project import project_group
from kedro.framework.cli.registry import registry_cli
from kedro.framework.cli.starters import create_cli
from kedro.framework.cli.utils import (
CONTEXT_SETTINGS,
ENTRY_POINT_GROUPS,
CommandCollection,
KedroCliError,
LazyGroup,
_get_entry_points,
load_entry_points,
)
Expand All @@ -51,6 +45,9 @@ def cli() -> None: # pragma: no cover
"""Kedro is a CLI for creating and using Kedro projects. For more
information, type ``kedro info``.

NOTE: If a command from a plugin conflicts with a built-in command from Kedro,
the command from the plugin will take precedence.

"""
pass

Expand Down Expand Up @@ -85,6 +82,38 @@ def info() -> None:
click.echo("No plugins installed")


@click.group(
context_settings=CONTEXT_SETTINGS,
cls=LazyGroup,
name="Kedro",
lazy_subcommands={
"registry": "kedro.framework.cli.registry.registry",
"catalog": "kedro.framework.cli.catalog.catalog",
"ipython": "kedro.framework.cli.project.ipython",
"run": "kedro.framework.cli.project.run",
"micropkg": "kedro.framework.cli.micropkg.micropkg",
"package": "kedro.framework.cli.project.package",
"jupyter": "kedro.framework.cli.jupyter.jupyter",
"pipeline": "kedro.framework.cli.pipeline.pipeline",
},
)
def project_commands() -> None:
pass # pragma: no cover


@click.group(
context_settings=CONTEXT_SETTINGS,
name="Kedro",
cls=LazyGroup,
lazy_subcommands={
"new": "kedro.framework.cli.starters.new",
"starter": "kedro.framework.cli.starters.starter",
},
)
def global_commands() -> None:
pass # pragma: no cover


def _init_plugins() -> None:
init_hooks = load_entry_points("init")
for init_hook in init_hooks:
Expand Down Expand Up @@ -125,7 +154,6 @@ def main(
self._cli_hook_manager.hook.before_command_run(
project_metadata=self._metadata, command_args=args
)

try:
super().main(
args=args,
Expand Down Expand Up @@ -178,7 +206,7 @@ def global_groups(self) -> Sequence[click.MultiCommand]:
combines them with the built-in ones (eventually overriding the
built-in ones if they are redefined by plugins).
"""
return [cli, create_cli, *load_entry_points("global")]
return [*load_entry_points("global"), cli, global_commands]

@property
def project_groups(self) -> Sequence[click.MultiCommand]:
Expand All @@ -192,15 +220,6 @@ def project_groups(self) -> Sequence[click.MultiCommand]:
if not self._metadata:
return []

built_in = [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
]

plugins = load_entry_points("project")

try:
Expand All @@ -209,7 +228,7 @@ def project_groups(self) -> Sequence[click.MultiCommand]:
except ModuleNotFoundError:
# return only built-in commands and commands from plugins
# (plugins can override built-in commands)
return [*built_in, *plugins]
return [*plugins, project_commands]

# fail badly if cli.py exists, but has no `cli` in it
if not hasattr(project_cli, "cli"):
Expand All @@ -219,7 +238,7 @@ def project_groups(self) -> Sequence[click.MultiCommand]:
user_defined = project_cli.cli
# return built-in commands, plugin commands and user defined commands
# (overriding happens as follows built-in < plugins < cli.py)
return [*built_in, *plugins, user_defined]
return [user_defined, *plugins, project_commands]


def main() -> None: # pragma: no cover
Expand Down
65 changes: 39 additions & 26 deletions kedro/framework/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,12 @@ def __init__(self, *groups: tuple[str, Sequence[click.MultiCommand]]):
for title, cli_list in groups
]
sources = list(chain.from_iterable(cli_list for _, cli_list in self.groups))

help_texts = [
cli.help
for cli_collection in sources
for cli in cli_collection.sources
if cli.help
]
self._dedupe_commands(sources)
super().__init__(
sources=sources, # type: ignore[arg-type]
help="\n\n".join(help_texts),
Expand All @@ -136,29 +134,6 @@ def __init__(self, *groups: tuple[str, Sequence[click.MultiCommand]]):
self.params = sources[0].params
self.callback = sources[0].callback

@staticmethod
def _dedupe_commands(cli_collections: Sequence[click.CommandCollection]) -> None:
"""Deduplicate commands by keeping the ones from the last source
in the list.
"""
seen_names: set[str] = set()
for cli_collection in reversed(cli_collections):
for cmd_group in reversed(cli_collection.sources):
cmd_group.commands = { # type: ignore[attr-defined]
cmd_name: cmd
for cmd_name, cmd in cmd_group.commands.items() # type: ignore[attr-defined]
if cmd_name not in seen_names
}
seen_names |= cmd_group.commands.keys() # type: ignore[attr-defined]

# remove empty command groups
for cli_collection in cli_collections:
cli_collection.sources = [
cmd_group
for cmd_group in cli_collection.sources
if cmd_group.commands # type: ignore[attr-defined]
]

@staticmethod
def _merge_same_name_collections(
groups: Sequence[click.MultiCommand],
Expand All @@ -169,7 +144,6 @@ def _merge_same_name_collections(
named_groups[group.name].append(group) # type: ignore[index]
if group.help:
helps[group.name].append(group.help) # type: ignore[index]

return [
click.CommandCollection(
name=group_name,
Expand Down Expand Up @@ -504,3 +478,42 @@ def _split_load_versions(ctx: click.Context, param: Any, value: str) -> dict[str
load_versions_dict[load_version_list[0]] = load_version_list[1]

return load_versions_dict


class LazyGroup(click.Group):
"""A click Group that supports lazy loading of subcommands."""

def __init__(
self,
*args: Any,
lazy_subcommands: dict[str, str] | None = None,
**kwargs: Any,
):
super().__init__(*args, **kwargs)
# lazy_subcommands is a map of the form:
#
# {command-name} -> {module-name}.{command-object-name}
#
self.lazy_subcommands = lazy_subcommands or {}

def list_commands(self, ctx: click.Context) -> list[str]:
base = list(super().list_commands(ctx))
lazy = sorted(self.lazy_subcommands.keys())
return base + lazy

def get_command( # type: ignore[override]
self, ctx: click.Context, cmd_name: str
) -> click.BaseCommand | click.Command | None:
if cmd_name in self.lazy_subcommands:
return self._lazy_load(cmd_name)
return super().get_command(ctx, cmd_name)

def _lazy_load(self, cmd_name: str) -> click.BaseCommand:
# lazily loading a command, first get the module name and attribute name
import_path = self.lazy_subcommands[cmd_name]
modname, cmd_object_name = import_path.rsplit(".", 1)
# do the import
mod = import_module(modname)
# get the Command object from that module
cmd_object = getattr(mod, cmd_object_name)
return cmd_object # type: ignore[no-any-return]
68 changes: 29 additions & 39 deletions tests/framework/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
from kedro import KedroDeprecationWarning
from kedro import __version__ as version
from kedro.framework.cli import load_entry_points
from kedro.framework.cli.catalog import catalog_cli
from kedro.framework.cli.cli import KedroCLI, _init_plugins, cli
from kedro.framework.cli.jupyter import jupyter_cli
from kedro.framework.cli.micropkg import micropkg_cli
from kedro.framework.cli.pipeline import pipeline_cli
from kedro.framework.cli.project import project_group
from kedro.framework.cli.registry import registry_cli
from kedro.framework.cli.starters import create_cli
from kedro.framework.cli.cli import (
KedroCLI,
_init_plugins,
cli,
global_commands,
project_commands,
)
from kedro.framework.cli.utils import (
CommandCollection,
KedroCliError,
Expand Down Expand Up @@ -332,15 +331,19 @@ def test_project_commands_no_clipy(self, mocker, fake_metadata):
side_effect=cycle([ModuleNotFoundError()]),
)
kedro_cli = KedroCLI(fake_metadata.project_path)
print(kedro_cli.project_groups)
assert len(kedro_cli.project_groups) == 6
assert kedro_cli.project_groups == [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
# There is only one `LazyGroup` for project commands
assert len(kedro_cli.project_groups) == 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be helpful to update the test name or leave a comment explaining that the 1 command group loaded is everything that's not doing lazy loading.

assert kedro_cli.project_groups == [project_commands]
# Assert that the lazy commands are listed properly
assert kedro_cli.project_groups[0].list_commands(None) == [
"catalog",
"ipython",
"jupyter",
"micropkg",
"package",
"pipeline",
"registry",
"run",
]

def test_project_commands_no_project(self, mocker, tmp_path):
Expand Down Expand Up @@ -371,22 +374,20 @@ def test_project_commands_valid_clipy(self, mocker, fake_metadata):
return_value=Module(cli=cli),
)
kedro_cli = KedroCLI(fake_metadata.project_path)
assert len(kedro_cli.project_groups) == 7
# The project group will now have two groups, the first from the project's cli.py and
# the second is the lazy project command group
assert len(kedro_cli.project_groups) == 2
assert kedro_cli.project_groups == [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
cli,
project_commands,
]

def test_kedro_cli_no_project(self, mocker, tmp_path):
mocker.patch("kedro.framework.cli.cli._is_project", return_value=False)
kedro_cli = KedroCLI(tmp_path)
assert len(kedro_cli.global_groups) == 2
assert kedro_cli.global_groups == [cli, create_cli]
# The global groups will be the cli(group for info command) and the global commands (starter and new)
assert kedro_cli.global_groups == [cli, global_commands]

result = CliRunner().invoke(kedro_cli, [])

Expand All @@ -410,28 +411,17 @@ def test_kedro_run_no_project(self, mocker, tmp_path):
)

def test_kedro_cli_with_project(self, mocker, fake_metadata):
Module = namedtuple("Module", ["cli"])
mocker.patch("kedro.framework.cli.cli._is_project", return_value=True)
mocker.patch(
"kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata
)
mocker.patch(
"kedro.framework.cli.cli.importlib.import_module",
return_value=Module(cli=cli),
)
kedro_cli = KedroCLI(fake_metadata.project_path)

assert len(kedro_cli.global_groups) == 2
assert kedro_cli.global_groups == [cli, create_cli]
assert len(kedro_cli.project_groups) == 7
assert kedro_cli.global_groups == [cli, global_commands]
assert len(kedro_cli.project_groups) == 1
assert kedro_cli.project_groups == [
catalog_cli,
jupyter_cli,
pipeline_cli,
micropkg_cli,
project_group,
registry_cli,
cli,
project_commands,
]

result = CliRunner().invoke(kedro_cli, [])
Expand Down
12 changes: 3 additions & 9 deletions tests/framework/cli/test_cli_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
from click.testing import CliRunner

from kedro.framework.cli.cli import KedroCLI, cli
from kedro.framework.cli.cli import KedroCLI
from kedro.framework.cli.hooks import cli_hook_impl, get_cli_hook_manager, manager
from kedro.framework.startup import ProjectMetadata

Expand Down Expand Up @@ -83,7 +83,7 @@ def fake_plugin_distribution(mocker):
class TestKedroCLIHooks:
@pytest.mark.parametrize(
"command, exit_code",
[("-V", 0), ("info", 2), ("pipeline list", 2), ("starter", 0)],
[("-V", 0), ("info", 0), ("pipeline list", 2), ("starter", 0)],
)
def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
self,
Expand All @@ -97,7 +97,6 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
):
caplog.set_level(logging.DEBUG, logger="kedro")

Module = namedtuple("Module", ["cli"])
mocker.patch(
"kedro.framework.cli.cli._is_project",
return_value=True,
Expand All @@ -106,10 +105,6 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
"kedro.framework.cli.cli.bootstrap_project",
return_value=fake_metadata,
)
mocker.patch(
"kedro.framework.cli.cli.importlib.import_module",
return_value=Module(cli=cli),
)
kedro_cli = KedroCLI(fake_metadata.project_path)
result = CliRunner().invoke(kedro_cli, [command])
assert (
Expand All @@ -121,8 +116,7 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin(
f"Before command `{command}` run for project {fake_metadata}"
in result.output
)

# 'pipeline list' and 'info' aren't actually in the click structure and
# 'pipeline list' isn't actually in the click structure and
# return exit code 2 ('invalid usage of some shell built-in command')
assert (
f"After command `{command}` run for project {fake_metadata} (exit: {exit_code})"
Expand Down
Loading