From 249f7086b1734ac31ce5f101291624fab9043e0f Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Thu, 5 Oct 2023 09:09:06 +0200 Subject: [PATCH 1/4] restructure-cli --- src/fondant/cli.py | 205 +++++++++++++++++++++++++-------------------- 1 file changed, 112 insertions(+), 93 deletions(-) diff --git a/src/fondant/cli.py b/src/fondant/cli.py index f40843094..2575662a6 100644 --- a/src/fondant/cli.py +++ b/src/fondant/cli.py @@ -80,20 +80,6 @@ def entrypoint(): args.func(args) -def set_default_output(args: argparse.Namespace): - """Set the default output path depending on the runner type.""" - if args.output_path is None: - if args.local: - args.output_path = "docker-compose.yml" - elif args.kubeflow: - args.output_path = "pipeline.yaml" - else: - msg = "One of the arguments --local --kubeflow is required" - raise ValueError(msg) - - return args - - def register_explore(parent_parser): parser = parent_parser.add_parser( "explore", @@ -202,6 +188,9 @@ def register_compile(parent_parser): """, ), ) + + compiler_subparser = parser.add_subparsers() + parser.add_argument( "ref", help="""Reference to the pipeline to run, can be a path to a spec file or @@ -209,47 +198,59 @@ def register_compile(parent_parser): """, action="store", ) - # add a mutually exclusive group for the mode - mode_group = parser.add_mutually_exclusive_group(required=True) - mode_group.add_argument("--local", action="store_true") - mode_group.add_argument("--kubeflow", action="store_true") - parser.add_argument( + local_parser = compiler_subparser.add_parser(name="local", help="Local compiler") + kubeflow_parser = compiler_subparser.add_parser( + name="kubeflow", + help="Kubeflow compiler", + ) + + # Local runner parser + local_parser.add_argument( "--output-path", "-o", - help="Output directory", - default=None, + help="Output path of compiled pipeline", + default="docker_compose.yml", ) - parser.add_argument( + local_parser.add_argument( "--extra-volumes", help="Extra volumes to mount in containers", nargs="+", ) - parser.add_argument( + local_parser.add_argument( "--build-arg", action="append", help="Build arguments to pass to `docker build`. Format {key}={value}.", default=[], ) - parser.set_defaults(func=compile) + # Kubeflow parser + kubeflow_parser.add_argument( + "--output-path", + "-o", + help="Output path of compiled pipeline", + default="pipeline.yaml", + ) + + local_parser.set_defaults(func=compile_local) + kubeflow_parser.set_defaults(func=compile_kfp) -def compile(args): - args = set_default_output(args) +def compile_local(args): pipeline = pipeline_from_module(args.ref) + compiler = DockerCompiler() + compiler.compile( + pipeline=pipeline, + extra_volumes=args.extra_volumes, + output_path=args.output_path, + build_args=args.build_arg, + ) - if args.local: - compiler = DockerCompiler() - compiler.compile( - pipeline=pipeline, - extra_volumes=args.extra_volumes, - output_path=args.output_path, - build_args=args.build_arg, - ) - elif args.kubeflow: - compiler = KubeFlowCompiler() - compiler.compile(pipeline=pipeline, output_path=args.output_path) + +def compile_kfp(args): + pipeline = pipeline_from_module(args.ref) + compiler = KubeFlowCompiler() + compiler.compile(pipeline=pipeline, output_path=args.output_path) def register_run(parent_parser): @@ -271,6 +272,9 @@ def register_run(parent_parser): """, ), ) + + runner_subparser = parser.add_subparsers() + # Define the "ref" argument once parser.add_argument( "ref", help="""Reference to the pipeline to run, can be a path to a spec file or @@ -278,72 +282,87 @@ def register_run(parent_parser): """, action="store", ) - # add a mutually exclusive group for the mode - mode_group = parser.add_mutually_exclusive_group(required=True) - mode_group.add_argument("--local", action="store_true") - mode_group.add_argument("--kubeflow", action="store_true") - parser.add_argument( + local_parser = runner_subparser.add_parser(name="local", help="Local runner") + kubeflow_parser = runner_subparser.add_parser( + name="kubeflow", + help="Kubeflow runner", + ) + + # Local runner parser + local_parser.add_argument( "--output-path", "-o", - help="Output directory", - default=None, + help="Output path of compiled pipeline", + default="docker_compose.yml", ) - parser.add_argument( + local_parser.add_argument( "--extra-volumes", - help="Extra volumes to mount in containers", nargs="+", + help="Extra volumes to mount in containers", ) - parser.add_argument( + local_parser.add_argument( "--build-arg", action="append", - help="Build arguments to pass to `docker build`. Format {key}={value}.", + help="Build arguments for `docker build`", ) - parser.add_argument("--host", help="KubeFlow pipeline host url", required=False) - parser.set_defaults(func=run) - - -def run(args): - args = set_default_output(args) - - if args.local: - try: - pipeline = pipeline_from_module(args.ref) - except ModuleNotFoundError: - spec_ref = args.ref - else: - spec_ref = args.output_path - logging.info( - "Found reference to un-compiled pipeline... compiling to {spec_ref}", - ) - compiler = DockerCompiler() - compiler.compile( - pipeline=pipeline, - extra_volumes=args.extra_volumes, - output_path=spec_ref, - build_args=args.build_arg, - ) - finally: - DockerRunner().run(spec_ref) - - elif args.kubeflow: - if not args.host: - msg = "--host argument is required for running on Kubeflow" - raise ValueError(msg) - try: - pipeline = pipeline_from_module(args.ref) - except ModuleNotFoundError: - spec_ref = args.ref - else: - spec_ref = args.output_path - logging.info( - f"Found reference to un-compiled pipeline... compiling to {spec_ref}", - ) - compiler = KubeFlowCompiler() - compiler.compile(pipeline=pipeline, output_path=spec_ref) - finally: - runner = KubeflowRunner(host=args.host) - runner.run(input_spec=spec_ref) + local_parser.set_defaults(func=run_local) + + # kubeflow runner parser + kubeflow_parser.add_argument( + "--output-path", + "-o", + help="Output path of compiled pipeline", + default="pipeline.yaml", + ) + kubeflow_parser.add_argument( + "--host", + help="KubeFlow pipeline host url", + required=True, + ) + + kubeflow_parser.set_defaults(func=run_kubeflow) + + +def run_local(args): + try: + pipeline = pipeline_from_module(args.ref) + except ModuleNotFoundError: + spec_ref = args.ref + else: + spec_ref = args.output_path + logging.info( + "Found reference to un-compiled pipeline... compiling to {spec_ref}", + ) + compiler = DockerCompiler() + compiler.compile( + pipeline=pipeline, + extra_volumes=args.extra_volumes, + output_path=spec_ref, + build_args=args.build_arg, + ) + finally: + DockerRunner().run(spec_ref) + + +def run_kubeflow(args): + if not args.host: + msg = "--host argument is required for running on Kubeflow" + raise ValueError(msg) + try: + pipeline = pipeline_from_module(args.ref) + except ModuleNotFoundError: + spec_ref = args.ref + else: + spec_ref = args.output_path + logging.info( + "Found reference to un-compiled pipeline... compiling to {spec_ref}", + ) + compiler = KubeFlowCompiler() + compiler.compile(pipeline=pipeline, output_path=spec_ref) + finally: + runner = KubeflowRunner(host=args.host) + runner.run(input_spec=spec_ref) def register_execute(parent_parser): From f70774b0e88beb198dd8dff56a2e74ab61d3191c Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Thu, 5 Oct 2023 16:05:12 +0200 Subject: [PATCH 2/4] modify tests --- src/fondant/cli.py | 4 ++-- tests/test_cli.py | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/fondant/cli.py b/src/fondant/cli.py index 2575662a6..3f8e1509f 100644 --- a/src/fondant/cli.py +++ b/src/fondant/cli.py @@ -321,7 +321,7 @@ def register_run(parent_parser): required=True, ) - kubeflow_parser.set_defaults(func=run_kubeflow) + kubeflow_parser.set_defaults(func=run_kfp) def run_local(args): @@ -345,7 +345,7 @@ def run_local(args): DockerRunner().run(spec_ref) -def run_kubeflow(args): +def run_kfp(args): if not args.host: msg = "--host argument is required for running on Kubeflow" raise ValueError(msg) diff --git a/tests/test_cli.py b/tests/test_cli.py index 33b742763..b8326f148 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,12 +6,14 @@ from fondant.cli import ( ComponentImportError, PipelineImportError, - compile, + compile_kfp, + compile_local, component_from_module, execute, get_module, pipeline_from_module, - run, + run_kfp, + run_local, ) from fondant.component import DaskLoadComponent from fondant.executor import Executor, ExecutorFactory @@ -137,7 +139,7 @@ def test_local_logic(tmp_path_factory): extra_volumes=[], build_arg=[], ) - compile(args) + compile_local(args) def test_kfp_compile(tmp_path_factory): @@ -148,14 +150,14 @@ def test_kfp_compile(tmp_path_factory): local=False, output_path=str(fn / "kubeflow_pipelines.yml"), ) - compile(args) + compile_kfp(args) def test_local_run(tmp_path_factory): """Test that the run command works with different arguments.""" args = argparse.Namespace(local=True, ref="some/path", output_path=None) with patch("subprocess.call") as mock_call: - run(args) + run_local(args) mock_call.assert_called_once_with( [ "docker", @@ -178,7 +180,7 @@ def test_local_run(tmp_path_factory): extra_volumes=[], build_arg=[], ) - run(args1) + run_local(args1) mock_call.assert_called_once_with( [ "docker", @@ -207,7 +209,7 @@ def test_kfp_run(tmp_path_factory): ValueError, match="--host argument is required for running on Kubeflow", ): # no host - run(args) + run_kfp(args) with patch("fondant.cli.KubeflowRunner") as mock_runner: args = argparse.Namespace( kubeflow=True, @@ -216,7 +218,7 @@ def test_kfp_run(tmp_path_factory): host="localhost", ref="some/path", ) - run(args) + run_kfp(args) mock_runner.assert_called_once_with(host="localhost") with patch("fondant.cli.KubeflowRunner") as mock_runner, tmp_path_factory.mktemp( "temp", @@ -228,5 +230,5 @@ def test_kfp_run(tmp_path_factory): output_path=str(fn / "kubeflow_pipelines.yml"), ref=__name__, ) - run(args) + run_kfp(args) mock_runner.assert_called_once_with(host="localhost2") From 8cc592f5f9cc4b2ad58318575b1f54c34dfbc56c Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Thu, 5 Oct 2023 16:18:39 +0200 Subject: [PATCH 3/4] update documentation --- README.md | 2 +- docs/getting_started.md | 2 +- docs/guides/build_a_simple_pipeline.md | 2 +- docs/pipeline.md | 6 ++-- examples/pipelines/filter-cc-25m/README.md | 4 +-- src/fondant/cli.py | 32 +++++++++++----------- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index c1e5ade3a..ccdc7a792 100644 --- a/README.md +++ b/README.md @@ -281,7 +281,7 @@ For more advanced use cases, you can use the `DaskTransformComponent` instead. Once you have a pipeline you can easily run (and compile) it by using the built-in CLI: ```bash -fondant run pipeline.py --local +fondant run local pipeline.py ``` To see all available arguments you can check the fondant CLI help pages diff --git a/docs/getting_started.md b/docs/getting_started.md index 9f17acd86..9b5b0891d 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -25,7 +25,7 @@ git clone https://github.com/ml6team/fondant.git ``` Make sure that Docker Compose is running, navigate to `fondant/examples/pipelines/filter-cc-25m`, and initiate the pipeline by executing: ``` -fondant run pipeline --local +fondant run local pipeline.py ``` Note: For local testing purposes, the pipeline will only download the first 10,000 images. If you want to download the full dataset, you will need to modify the component arguments in the pipeline.py file, specifically the following part: ```python diff --git a/docs/guides/build_a_simple_pipeline.md b/docs/guides/build_a_simple_pipeline.md index 3f2b9f255..5d23a4127 100644 --- a/docs/guides/build_a_simple_pipeline.md +++ b/docs/guides/build_a_simple_pipeline.md @@ -129,7 +129,7 @@ Two key actions are taking place here: To test the pipeline, you can execute the following command within the pipeline directory: ``` -fondant run pipeline --local +fondant run local pipeline.py ``` The pipeline execution will start, initiating the download of the dataset from HuggingFace. diff --git a/docs/pipeline.md b/docs/pipeline.md index e854d772d..5367fb544 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -139,7 +139,7 @@ The Kubeflow compiler will take your pipeline and compile it to a Kubeflow pipel - Using the CLI: ```bash -fondant compile --kubeflow --output +fondant compile kubeflow --output ``` - Using the compiler directly: @@ -163,7 +163,7 @@ There are 2 ways to run a Kubeflow compiled pipeline: - Using the CLI: ```bash -fondant run --kubeflow --host +fondant run kubeflow --host ``` NOTE: that the pipeline ref is the path to the compiled pipeline spec OR a reference to an fondant pipeline in which case the compiler will compile the pipeline first before running. @@ -251,7 +251,7 @@ docker compose up Or you can use the fondant cli to run the pipeline: ```bash -fondant run --local +fondant run local ``` NOTE: that the pipeline ref is the path to the compiled pipeline spec OR a reference to an fondant pipeline in which case the compiler will compile the pipeline first before running. diff --git a/examples/pipelines/filter-cc-25m/README.md b/examples/pipelines/filter-cc-25m/README.md index e8a592f3d..1c6ab89b0 100644 --- a/examples/pipelines/filter-cc-25m/README.md +++ b/examples/pipelines/filter-cc-25m/README.md @@ -57,7 +57,7 @@ Accordingly, the getting started documentation, we can run the pipeline by using as follow: ```bash -fondant run pipeline --local +fondant run local pipeline.py ``` > Note: The 'load_from_hub' component accepts an argument that defines the dataset size. @@ -68,7 +68,7 @@ fondant run pipeline --local If you wish to run the entire pipeline, including the filtering step, use the following command: ```bash -fondant run filter_pipeline --local +fondant run local filter_pipeline ``` After the pipeline is succeeded you can explore the data by using the fondant data explorer: diff --git a/src/fondant/cli.py b/src/fondant/cli.py index 3f8e1509f..7a1f44567 100644 --- a/src/fondant/cli.py +++ b/src/fondant/cli.py @@ -170,21 +170,17 @@ def register_compile(parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent( """ - Compile a fondant pipeline into either a docker-compose.yml(local) or kubeflow spec file. + Compile a fondant pipeline into pipeline specification file file. The pipeline argument is a formatstring. The compiler will try to import the pipeline from the module specified in the formatstring. (NOTE: path is patched to include the current working directory so you can do relative imports) - The --local or --kubeflow flag specifies the mode in which the pipeline will be compiled. - You can use the --extra-volumes flag to specify extra volumes to mount in the containers this can be used: + You can use different modes for fondant runners. Current existing modes are local and kubeflow. - - to mount data directories to be used by the pipeline (note that if your pipeline's base_path is local it will already be mounted for you). - - to mount cloud credentials (see examples)) - - Example: - fondant compile my_project.my_pipeline.py --local --extra-volumes $HOME/.aws/credentials:/root/.aws/credentials + Examples of compiling component: + fondant compile local --extra-volumes $HOME/.aws/credentials:/root/.aws/credentials my_project.my_pipeline.py - fondant compile my_project.my_pipeline.py --kubeflow --extra-volumes $HOME/.config/gcloud/application_default_credentials.json:/root/.config/gcloud/application_default_credentials.json + fondant compile kubeflow --extra-volumes $HOME/.config/gcloud/application_default_credentials.json:/root/.config/gcloud/application_default_credentials.json my_project.my_pipeline.py """, ), ) @@ -214,7 +210,9 @@ def register_compile(parent_parser): ) local_parser.add_argument( "--extra-volumes", - help="Extra volumes to mount in containers", + help="""Extra volumes to mount in containers. You can use the --extra-volumes flag to specify extra volumes to mount in the containers this can be used: + - to mount data directories to be used by the pipeline (note that if your pipeline's base_path is local it will already be mounted for you). + - to mount cloud credentials""", nargs="+", ) local_parser.add_argument( @@ -263,12 +261,12 @@ def register_run(parent_parser): pipeline (see fondant compile --help for more info) OR a path to a spec file in which case it will compile the pipeline first and then run it. - The --local or --kubeflow flag specifies the mode in which the pipeline will be ran. - You can use the --extra-volumes flag to specify extra volumes to mount in the containers this can be used: + You can use different modes for fondant runners. Current existing modes are `local` and `kubeflow`. + You can run `fondant --help` to find out more about the specific arguments for each mode. - Example: - fondant run my_project.my_pipeline.py --local --extra-volumes $HOME/.aws/credentials:/root/.aws/credentials - fondant run ./my_compiled_kubeflow_pipeline.tgz --kubeflow + Examples of running component: + fondant run local --extra-volumes $HOME/.aws/credentials:/root/.aws/credentials my_project.my_pipeline.py + fondant run kubeflow ./my_compiled_kubeflow_pipeline.tgz """, ), ) @@ -299,7 +297,9 @@ def register_run(parent_parser): local_parser.add_argument( "--extra-volumes", nargs="+", - help="Extra volumes to mount in containers", + help="""Extra volumes to mount in containers. You can use the --extra-volumes flag to specify extra volumes to mount in the containers this can be used: + - to mount data directories to be used by the pipeline (note that if your pipeline's base_path is local it will already be mounted for you). + - to mount cloud credentials""", ) local_parser.add_argument( "--build-arg", From cd4678096c21d4fc992a6a58d68b2ed05dd6f08f Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Fri, 6 Oct 2023 08:50:01 +0200 Subject: [PATCH 4/4] modify docker compose name --- src/fondant/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fondant/cli.py b/src/fondant/cli.py index 7a1f44567..9aa1c7f2d 100644 --- a/src/fondant/cli.py +++ b/src/fondant/cli.py @@ -206,7 +206,7 @@ def register_compile(parent_parser): "--output-path", "-o", help="Output path of compiled pipeline", - default="docker_compose.yml", + default="docker-compose.yml", ) local_parser.add_argument( "--extra-volumes", @@ -292,7 +292,7 @@ def register_run(parent_parser): "--output-path", "-o", help="Output path of compiled pipeline", - default="docker_compose.yml", + default="docker-compose.yml", ) local_parser.add_argument( "--extra-volumes",