diff --git a/flytekit/clis/sdk_in_container/package.py b/flytekit/clis/sdk_in_container/package.py index 2a884e29da..1a849d0681 100644 --- a/flytekit/clis/sdk_in_container/package.py +++ b/flytekit/clis/sdk_in_container/package.py @@ -77,8 +77,16 @@ default="/root", help="Filesystem path to where the code is copied into within the Dockerfile. look for `COPY . /root` like command.", ) +@click.option( + "--deref-symlinks", + default=False, + is_flag=True, + help="Enables symlink dereferencing when packaging files in fast registration", +) @click.pass_context -def package(ctx, image_config, source, output, force, fast, in_container_source_path, python_interpreter): +def package( + ctx, image_config, source, output, force, fast, in_container_source_path, python_interpreter, deref_symlinks +): """ This command produces a Flyte backend registrable package of all entities in Flyte. For tasks, one pb file is produced for each task, representing one TaskTemplate object. @@ -103,6 +111,6 @@ def package(ctx, image_config, source, output, force, fast, in_container_source_ display_help_with_error(ctx, "No packages to scan for flyte entities. Aborting!") try: - serialize_and_package(pkgs, serialization_settings, source, output, fast) + serialize_and_package(pkgs, serialization_settings, source, output, fast, deref_symlinks) except NoSerializableEntitiesError: click.secho(f"No flyte objects found in packages {pkgs}", fg="yellow") diff --git a/flytekit/clis/sdk_in_container/register.py b/flytekit/clis/sdk_in_container/register.py index 03e00d7896..024b70edde 100644 --- a/flytekit/clis/sdk_in_container/register.py +++ b/flytekit/clis/sdk_in_container/register.py @@ -99,6 +99,12 @@ type=str, help="Version the package or module is registered with", ) +@click.option( + "--deref-symlinks", + default=False, + is_flag=True, + help="Enables symlink dereferencing when packaging files in fast registration", +) @click.argument("package-or-module", type=click.Path(exists=True, readable=True, resolve_path=True), nargs=-1) @click.pass_context def register( @@ -111,6 +117,7 @@ def register( service_account: str, raw_data_prefix: str, version: typing.Optional[str], + deref_symlinks: bool, package_or_module: typing.Tuple[str], ): """ @@ -142,7 +149,7 @@ def register( # Create a zip file containing all the entries. detected_root = find_common_root(package_or_module) cli_logger.debug(f"Using {detected_root} as root folder for project") - zip_file = fast_package(detected_root, output) + zip_file = fast_package(detected_root, output, deref_symlinks) # Upload zip file to Admin using FlyteRemote. md5_bytes, native_url = remote._upload_file(pathlib.Path(zip_file)) diff --git a/flytekit/clis/sdk_in_container/serialize.py b/flytekit/clis/sdk_in_container/serialize.py index 0b12d6b406..33c0b47940 100644 --- a/flytekit/clis/sdk_in_container/serialize.py +++ b/flytekit/clis/sdk_in_container/serialize.py @@ -155,16 +155,22 @@ def fast(ctx): @click.command("workflows") +@click.option( + "--deref-symlinks", + default=False, + is_flag=True, + help="Enables symlink dereferencing when packaging files in fast registration", +) @click.option("-f", "--folder", type=click.Path(exists=True)) @click.pass_context -def fast_workflows(ctx, folder=None): +def fast_workflows(ctx, folder=None, deref_symlinks=False): if folder: click.echo(f"Writing output to {folder}") source_dir = ctx.obj[CTX_LOCAL_SRC_ROOT] # Write using gzip - archive_fname = fast_package(source_dir, folder) + archive_fname = fast_package(source_dir, folder, deref_symlinks) click.echo(f"Wrote compressed archive to {archive_fname}") pkgs = ctx.obj[CTX_PACKAGES] diff --git a/flytekit/tools/fast_registration.py b/flytekit/tools/fast_registration.py index c4ac31a01a..34faadc58c 100644 --- a/flytekit/tools/fast_registration.py +++ b/flytekit/tools/fast_registration.py @@ -21,12 +21,13 @@ file_access = FlyteContextManager.current_context().file_access -def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike: +def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: bool = False) -> os.PathLike: """ Takes a source directory and packages everything not covered by common ignores into a tarball named after a hexdigest of the included files. :param os.PathLike source: :param os.PathLike output_dir: + :param bool deref_symlinks: Enables dereferencing symlinks when packaging directory :return os.PathLike: """ ignore = IgnoreGroup(source, [GitIgnore, DockerIgnore, StandardIgnore]) @@ -41,7 +42,7 @@ def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike: with tempfile.TemporaryDirectory() as tmp_dir: tar_path = os.path.join(tmp_dir, "tmp.tar") - with tarfile.open(tar_path, "w") as tar: + with tarfile.open(tar_path, "w", dereference=deref_symlinks) as tar: tar.add(source, arcname="", filter=lambda x: ignore.tar_filter(tar_strip_file_attributes(x))) with gzip.GzipFile(filename=archive_fname, mode="wb", mtime=0) as gzipped: with open(tar_path, "rb") as tar_file: diff --git a/flytekit/tools/repo.py b/flytekit/tools/repo.py index 167c772184..ceaee36435 100644 --- a/flytekit/tools/repo.py +++ b/flytekit/tools/repo.py @@ -75,6 +75,7 @@ def package( source: str = ".", output: str = "./flyte-package.tgz", fast: bool = False, + deref_symlinks: bool = False, ): """ Package the given entities and the source code (if fast is enabled) into a package with the given name in output @@ -82,6 +83,7 @@ def package( :param source: source folder :param output: output package name with suffix :param fast: fast enabled implies source code is bundled + :param deref_symlinks: if enabled then symlinks are dereferenced during packaging """ if not registrable_entities: raise NoSerializableEntitiesError("Nothing to package") @@ -95,7 +97,7 @@ def package( if os.path.abspath(output).startswith(os.path.abspath(source)) and os.path.exists(output): click.secho(f"{output} already exists within {source}, deleting and re-creating it", fg="yellow") os.remove(output) - archive_fname = fast_registration.fast_package(source, output_tmpdir) + archive_fname = fast_registration.fast_package(source, output_tmpdir, deref_symlinks) click.secho(f"Fast mode enabled: compressed archive {archive_fname}", dim=True) with tarfile.open(output, "w:gz") as tar: @@ -110,13 +112,14 @@ def serialize_and_package( source: str = ".", output: str = "./flyte-package.tgz", fast: bool = False, + deref_symlinks: bool = False, options: typing.Optional[Options] = None, ): """ Fist serialize and then package all entities """ registrable_entities = serialize(pkgs, settings, source, options=options) - package(registrable_entities, source, output, fast) + package(registrable_entities, source, output, fast, deref_symlinks) def register( diff --git a/tests/flytekit/unit/tools/test_fast_registration.py b/tests/flytekit/unit/tools/test_fast_registration.py index 0b50d6fdcf..aae3995bcb 100644 --- a/tests/flytekit/unit/tools/test_fast_registration.py +++ b/tests/flytekit/unit/tools/test_fast_registration.py @@ -23,7 +23,10 @@ def flyte_project(tmp_path): "workflows": { "__pycache__": {"some.pyc": ""}, "hello_world.py": "print('Hello World!')", - } + }, + }, + "utils": { + "util.py": "print('Hello from utils!')", }, ".venv": {"lots": "", "of": "", "packages": ""}, ".env": "supersecret", @@ -35,6 +38,7 @@ def flyte_project(tmp_path): } make_tree(tmp_path, tree) + os.symlink(str(tmp_path) + "/utils/util.py", str(tmp_path) + "/src/util") subprocess.run(["git", "init", str(tmp_path)]) return tmp_path @@ -48,9 +52,29 @@ def test_package(flyte_project, tmp_path): ".gitignore", "keep.foo", "src", + "src/util", "src/workflows", "src/workflows/hello_world.py", + "utils", + "utils/util.py", + ] + util = tar.getmember("src/util") + assert util.issym() + assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX) + assert str(archive_fname).endswith(FAST_FILEENDING) + + +def test_package_with_symlink(flyte_project, tmp_path): + archive_fname = fast_package(source=flyte_project / "src", output_dir=tmp_path, deref_symlinks=True) + with tarfile.open(archive_fname, dereference=True) as tar: + assert tar.getnames() == [ + "", # tar root, output removes leading '/' + "util", + "workflows", + "workflows/hello_world.py", ] + util = tar.getmember("util") + assert util.isfile() assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX) assert str(archive_fname).endswith(FAST_FILEENDING)