Skip to content

Commit

Permalink
✨ Make project CLI init, ui and modelify work inside subdirectory (#531
Browse files Browse the repository at this point in the history
…) (#539)

* ✨ Make project CLI init, ui and modelify work inside subdirectory (#531)

* add tests

* add changelog

* relax requirements

* fix typo in changelog
  • Loading branch information
Galileo-Galilei authored Apr 17, 2024
1 parent 70a9002 commit bda15bb
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 34 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
- :construction_worker: :package: Add build distribution (instead of source only distribution) to PyPI release to make install easier and faster ([#515](https://github.com/Galileo-Galilei/kedro-mlflow/issues/515))
- :memo: Document the ability to update configuration at runtime ([#395](https://github.com/Galileo-Galilei/kedro-mlflow/issues/395))

### Changed

- :sparkles: Project level CLI commands ``kedro mlflow init``, ``kedro mlflow ui`` and ``kedro mlflow modelify`` now work even inside a subdirectory and not at the root of the kedro project to be [consistent with ``kedro>0.19.4``](https://github.com/kedro-org/kedro/blob/main/RELEASE.md#major-features-and-improvements-1) ([#531](https://github.com/Galileo-Galilei/kedro-mlflow/issues/531))

### Fixed

- :bug: Proxy import of private kedro functions ``_is_project`` and ``_find_kedro_project`` to be resilient to changes ([#531](https://github.com/Galileo-Galilei/kedro-mlflow/issues/531))

## [0.12.1] - 2024-02-09

### Added
Expand Down
11 changes: 6 additions & 5 deletions kedro_mlflow/framework/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
from kedro import __version__ as kedro_version
from kedro.framework.project import pipelines, settings
from kedro.framework.session import KedroSession
from kedro.framework.startup import _is_project, bootstrap_project
from kedro.framework.startup import bootstrap_project
from mlflow.models import infer_signature
from packaging import version

from kedro_mlflow.framework.cli.cli_utils import write_jinja_template
from kedro_mlflow.mlflow import KedroPipelineModel
from kedro_mlflow.utils import _find_kedro_project, _is_project

LOGGER = getLogger(__name__)
TEMPLATE_FOLDER_PATH = Path(__file__).parent.parent.parent / "template" / "project"
Expand All @@ -27,7 +28,7 @@ def reset_commands(self):
self.commands = {}

# add commands on the fly based on conditions
if _is_project(Path.cwd()):
if _is_project(_find_kedro_project(Path.cwd()) or Path.cwd()):
self.add_command(init)
self.add_command(ui)
self.add_command(modelify)
Expand Down Expand Up @@ -87,7 +88,7 @@ def init(env: str, force: bool, silent: bool):

# get constants
mlflow_yml = "mlflow.yml"
project_path = Path().cwd()
project_path = _find_kedro_project(Path.cwd()) or Path.cwd()
project_metadata = bootstrap_project(project_path)
mlflow_yml_path = project_path / settings.CONF_SOURCE / env / mlflow_yml

Expand Down Expand Up @@ -150,7 +151,7 @@ def ui(env: str, port: str, host: str):
enables to browse and compares runs.
"""

project_path = Path().cwd()
project_path = _find_kedro_project(Path.cwd()) or Path.cwd()
bootstrap_project(project_path)
with KedroSession.create(
project_path=project_path,
Expand Down Expand Up @@ -307,7 +308,7 @@ def modelify(

# if the command is available, we are necessarily at the root of a kedro project

project_path = Path.cwd()
project_path = _find_kedro_project(Path.cwd()) or Path.cwd()
bootstrap_project(project_path)
with KedroSession.create(project_path=project_path) as session:
# "pipeline" is the Pipeline object you want to convert to a mlflow model
Expand Down
22 changes: 21 additions & 1 deletion kedro_mlflow/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import List, Union
from typing import Any, List, Union


def _parse_requirements(path: Union[str, Path], encoding="utf-8") -> List:
Expand All @@ -8,3 +8,23 @@ def _parse_requirements(path: Union[str, Path], encoding="utf-8") -> List:
x.strip() for x in file_handler if x.strip() and not x.startswith("-r")
]
return requirements


def _is_project(project_path: Union[str, Path]) -> bool:
try:
# untested in the CI, for retrocompatiblity with kedro >=0.19.0,<0.19.3
from kedro.framework.startup import _is_project as _ip
except ImportError:
from kedro.utils import _is_project as _ip

return _ip(project_path)


def _find_kedro_project(current_dir: Path) -> Any:
try:
# untested in the CI, for retrocompatiblity with kedro >=0.19.0,<0.19.3
from kedro.framework.startup import _find_kedro_project as _fkp
except ImportError:
from kedro.utils import _find_kedro_project as _fkp

return _fkp(current_dir)
37 changes: 24 additions & 13 deletions tests/framework/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,26 +64,29 @@ def test_cli_global_discovered(monkeypatch, tmp_path):
# because discovery mechanisme is linked to setup.py


## This command is temporarlily deactivated beacuse of a bug in kedro==0.17.3, see: https://github.com/Galileo-Galilei/kedro-mlflow/issues/193
# def test_mlflow_commands_outside_kedro_project(monkeypatch, tmp_path):
# monkeypatch.chdir(tmp_path)
# cli_runner = CliRunner()
# result = cli_runner.invoke(cli_mlflow)
# assert {"new"} == set(extract_cmd_from_help(result.output))

@pytest.mark.parametrize("inside_subdirectory", (True, False))
def test_mlflow_commands_inside_kedro_project(
monkeypatch, kedro_project, inside_subdirectory
):
if inside_subdirectory is True:
monkeypatch.chdir(kedro_project / "src")
else:
monkeypatch.chdir(kedro_project)

def test_mlflow_commands_inside_kedro_project(monkeypatch, kedro_project):
monkeypatch.chdir(kedro_project)
# launch the command to initialize the project
cli_runner = CliRunner()
result = cli_runner.invoke(cli_mlflow)
assert {"init", "ui", "modelify"} == set(extract_cmd_from_help(result.output))
assert "You have not updated your template yet" not in result.output


def test_cli_init(monkeypatch, kedro_project):
@pytest.mark.parametrize("inside_subdirectory", (True, False))
def test_cli_init(monkeypatch, kedro_project, inside_subdirectory):
# "kedro_project" is a pytest.fixture declared in conftest
monkeypatch.chdir(kedro_project)
if inside_subdirectory is True:
monkeypatch.chdir(kedro_project / "src")
else:
monkeypatch.chdir(kedro_project)
cli_runner = CliRunner()
result = cli_runner.invoke(cli_init)

Expand Down Expand Up @@ -177,6 +180,7 @@ def test_cli_init_with_env(monkeypatch, kedro_project, env):
)
def test_cli_init_with_wrong_env(monkeypatch, kedro_project, env):
# "kedro_project" is a pytest.fixture declared in conftest

monkeypatch.chdir(kedro_project)
cli_runner = CliRunner()
result = cli_runner.invoke(cli_init, f"--env {env}")
Expand All @@ -189,8 +193,15 @@ def test_cli_init_with_wrong_env(monkeypatch, kedro_project, env):
# I tried mimicking mlflow_cli with mock but did not achieve desired result
# other solution is to use pytest-xprocess
# TODO: create an initlaized_kedro_project fixture with a global scope
def test_ui_is_up(monkeypatch, mocker, kedro_project_with_mlflow_conf):
monkeypatch.chdir(kedro_project_with_mlflow_conf)
@pytest.mark.parametrize("inside_subdirectory", (True, False))
def test_cli_ui_is_up(
monkeypatch, mocker, kedro_project_with_mlflow_conf, inside_subdirectory
):
if inside_subdirectory is True:
monkeypatch.chdir(kedro_project_with_mlflow_conf / "src")
else:
monkeypatch.chdir(kedro_project_with_mlflow_conf)

cli_runner = CliRunner()

# This does not test anything : the goal is to check whether it raises an error
Expand Down
39 changes: 24 additions & 15 deletions tests/framework/cli/test_cli_modelify.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,6 @@ def kp_for_modelify_persistent_input(kp_for_modelify):

@pytest.fixture
def kp_for_modelify_with_parameters(tmp_path):
# TODO: find a better way to inject dynamically
# the templated config loader without modifying the template

_FAKE_MODELIFY_PROJECT_NAME = r"kp_for_modelify_with_params"
config = {
# "output_dir": tmp_path,
Expand Down Expand Up @@ -203,43 +200,55 @@ def register_pipelines():
mlflow_tracking_uri: mlruns
"""

kp_for_modelify = tmp_path / config["repo_name"]
kp_for_modelify_with_parameters = tmp_path / config["repo_name"]

_write_file(
kp_for_modelify / "src" / config["python_package"] / "pipeline_registry.py",
kp_for_modelify_with_parameters
/ "src"
/ config["python_package"]
/ "pipeline_registry.py",
pipeline_registry_py,
)
_write_file(
kp_for_modelify / "conf" / "base" / "catalog.yml",
kp_for_modelify_with_parameters / "conf" / "base" / "catalog.yml",
catalog_yml,
)
_write_file(
kp_for_modelify / "conf" / "base" / "parameters.yml",
kp_for_modelify_with_parameters / "conf" / "base" / "parameters.yml",
parameters_yml,
)
_write_file(
kp_for_modelify / "conf" / "base" / "mlflow.yml",
kp_for_modelify_with_parameters / "conf" / "base" / "mlflow.yml",
mlflow_yml,
)

return kp_for_modelify
return kp_for_modelify_with_parameters


@pytest.mark.parametrize(
"example_repo,artifacts_list",
"example_repo,artifacts_list,inside_subdirectory",
[
(pytest.lazy_fixture("kp_for_modelify"), ["trained_model"]),
(pytest.lazy_fixture("kp_for_modelify"), ["trained_model"], False),
(pytest.lazy_fixture("kp_for_modelify"), ["trained_model"], True),
(
pytest.lazy_fixture("kp_for_modelify_with_parameters"),
["trained_model", "params:my_param"],
False,
),
],
)
def test_modelify_logs_in_mlflow(monkeypatch, example_repo, artifacts_list):
monkeypatch.chdir(example_repo)
def test_modelify_logs_in_mlflow_even_inside_subdirectory(
monkeypatch, example_repo, artifacts_list, inside_subdirectory
):
if inside_subdirectory is True:
project_cwd = example_repo / "src"
else:
project_cwd = example_repo

bootstrap_project(Path().cwd())
with KedroSession.create(project_path=Path().cwd()) as session:
monkeypatch.chdir(project_cwd)

bootstrap_project(example_repo)
with KedroSession.create(project_path=example_repo) as session:
context = session.load_context()
catalog = context.catalog
catalog.save("trained_model", 2)
Expand Down

0 comments on commit bda15bb

Please sign in to comment.