From e270d8cd0007795ce1a05572413e18089b4d05ca Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Tue, 17 Oct 2023 12:39:21 +0200 Subject: [PATCH] Add fondant build command --- pyproject.toml | 2 + src/fondant/build.py | 114 ++++++++++++++++++++++++++++++++++ src/fondant/cli.py | 70 ++++++++++++++++++++- src/fondant/component_spec.py | 6 +- 4 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 src/fondant/build.py diff --git a/pyproject.toml b/pyproject.toml index a8175533a..003da00f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ fsspec = { version = ">= 2023.4.0", optional = true} gcsfs = { version = ">= 2023.4.0", optional = true } s3fs = { version = ">= 2023.4.0", optional = true } adlfs = { version = ">= 2023.4.0", optional = true } +docker = {version = ">= 6.1.3", optional = true } kfp = { version = "2.3.0", optional = true, extras =["kubernetes"] } pandas = { version = ">= 1.3.5", optional = true } google-cloud-aiplatform = { version = "1.34.0", optional = true} @@ -61,6 +62,7 @@ azure = ["fsspec", "adlfs"] gcp = ["fsspec", "gcsfs"] kfp = ["kfp"] vertex = ["kfp", "google-cloud-aiplatform"] +docker = ["docker"] [tool.poetry.group.test.dependencies] pre-commit = "^3.1.1" diff --git a/src/fondant/build.py b/src/fondant/build.py new file mode 100644 index 000000000..cd1ecfa73 --- /dev/null +++ b/src/fondant/build.py @@ -0,0 +1,114 @@ +"""Module holding implementation to build Fondant components, used by the `fondant build` +command. +""" +import logging +import re +import typing as t +from pathlib import Path + +from fondant.pipeline import ComponentOp + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def build_component( # ruff: noqa: PLR0912, PLR0915 + component_dir: Path, + *, + tag: str, + build_args: t.List[str], + nocache: bool = False, + pull: bool = False, + target: t.Optional[str] = None, +) -> None: + try: + import docker + except ImportError: + msg = ( + "You need to install `docker` to use the `fondant build` command, you can install " + "it with `pip install fondant[docker]`" + ) + raise SystemExit( + msg, + ) + + component_op = ComponentOp(component_dir) + component_spec = component_op.component_spec + + if component_op.dockerfile_path is None: + msg = ( + f"Could not detect a `Dockerfile` in {component_dir}. Please make sure it is placed " + f"at the root of your component_dir and named `Dockerfile`." + ) + raise SystemExit(msg) + + if ":" in tag: + logger.info("Detected `:` in tag") + full_image_name = tag + else: + logger.info("Did not detect `:` in tag") + logger.info("Extracting image name from `component_spec.yaml`") + repository = component_spec.image.split(":")[0] + full_image_name = f"{repository}:{tag}" + + logger.info(f"Assuming full image name: {full_image_name}") + + logger.info("Building image...") + # Convert build args from ["key=value", ...] to {"key": "value", ...} + build_kwargs = {} + for arg in build_args: + k, v = arg.split("=", 1) + build_kwargs[k] = v + + try: + docker_client = docker.from_env() + except docker.errors.DockerException: + for url in [ + "/var/run/docker.sock", + Path.home() / ".docker/desktop/docker.sock", + ]: + base_url = f"unix://{url}" + try: + docker_client = docker.DockerClient(base_url=base_url) + break + except docker.errors.DockerException: + continue + else: + msg = "Could not connect to docker daemon, is it running?" + raise SystemExit(msg) + + logs = docker_client.api.build( + path=str(component_dir), + tag=full_image_name, + buildargs=build_kwargs, + nocache=nocache, + pull=pull, + target=target, + decode=True, + ) + + for chunk in logs: + if "stream" in chunk: + for line in chunk["stream"].splitlines(): + logger.info(line) + + logger.info("Pushing image...") + repository, tag = full_image_name.split(":") + logs = docker_client.api.push(repository, tag=tag, stream=True, decode=True) + + for chunk in logs: + message = chunk.get("status", "") + if "progress" in chunk: + message += " | " + chunk["progress"] + logger.info(message) + + logger.info("Updating image name in component_spec") + # Read and write with `re.sub` to prevent reformatting of file with yaml + with open(component_dir / component_op.COMPONENT_SPEC_NAME, "r+") as f: + content = f.read() + f.seek(0) + content = re.sub(r"image: [^\n]*", f"image: {full_image_name}", content) + f.write(content) + f.truncate() + + logger.info("Done") diff --git a/src/fondant/cli.py b/src/fondant/cli.py index 49ffa4a2b..c09ded51d 100644 --- a/src/fondant/cli.py +++ b/src/fondant/cli.py @@ -23,8 +23,10 @@ import textwrap import typing as t from collections import defaultdict +from pathlib import Path from types import ModuleType +from fondant.build import build_component from fondant.compiler import DockerCompiler, KubeFlowCompiler, VertexCompiler from fondant.component import BaseComponent, Component from fondant.executor import ExecutorFactory @@ -66,6 +68,7 @@ def entrypoint(): ) subparsers = parser.add_subparsers() register_explore(subparsers) + register_build(subparsers) register_execute(subparsers) register_compile(subparsers) register_run(subparsers) @@ -164,6 +167,71 @@ def explore(args): ) +def register_build(parent_parser): + parser = parent_parser.add_parser( + "build", + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent( + """ + Build a component and push it to the registry. The image name in the + `fondant_component.yaml` will automatically be updated to use the new image. + + Example: + + fondant build components/my-component -tag my-tag + """, + ), + ) + parser.add_argument( + "component_dir", + type=Path, + help="""Path to the directory containing the component code, including a + `fondant_component.yaml` and `Dockerfile`.""", + ) + parser.add_argument( + "--tag", + "-t", + type=str, + help="Tag to add to built container. If the tag contains a `:`, it will be used as the " + "full name for the image. If it does not contain a `:`, the image base name will be " + "read from the `fondant_component.yaml` and combined into `base_name:tag`.", + ) + parser.add_argument( + "--build-arg", + action="append", + help="Build arguments to pass to `docker build`. Format {key}={value}, can be repeated.", + default=[], + ) + parser.add_argument( + "--nocache", + action="store_true", + help="Disable cache during building.", + ) + parser.add_argument( + "--pull", + action="store_true", + help="Downloads any updates to the FROM image in Dockerfiles.", + ) + parser.add_argument( + "--target", + type=str, + help="Name of the build-stage to build in a multi-stage Dockerfile.", + ) + + parser.set_defaults(func=build) + + +def build(args): + build_component( + args.component_dir, + tag=args.tag, + build_args=args.build_arg, + nocache=args.nocache, + pull=args.pull, + target=args.target, + ) + + def register_compile(parent_parser): parser = parent_parser.add_parser( "compile", @@ -221,7 +289,7 @@ def register_compile(parent_parser): local_parser.add_argument( "--build-arg", action="append", - help="Build arguments to pass to `docker build`. Format {key}={value}.", + help="Build arguments to pass to `docker build`. Format {key}={value}, can be repeated.", default=[], ) diff --git a/src/fondant/component_spec.py b/src/fondant/component_spec.py index f6dcc967a..01445d27d 100644 --- a/src/fondant/component_spec.py +++ b/src/fondant/component_spec.py @@ -156,9 +156,13 @@ def description(self): return self._specification["description"] @property - def image(self): + def image(self) -> str: return self._specification["image"] + @image.setter + def image(self, value: str) -> None: + self._specification["image"] = value + @property def index(self): return ComponentSubset({"fields": {}})