Skip to content

Commit

Permalink
Added symlink dereferencing in fast packaging and tests (#1151)
Browse files Browse the repository at this point in the history
* Added symlink dereferencing and tests

Signed-off-by: Vanshika Chowdhary <[email protected]>

* Added flag to register as well

Signed-off-by: Vanshika Chowdhary <[email protected]>

* More flag propagation

Signed-off-by: Vanshika Chowdhary <[email protected]>

Signed-off-by: Vanshika Chowdhary <[email protected]>
Co-authored-by: Vanshika Chowdhary <[email protected]>
  • Loading branch information
vchowdhary and Vanshika Chowdhary authored Sep 9, 2022
1 parent 4368e98 commit 1c75bd1
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 10 deletions.
12 changes: 10 additions & 2 deletions flytekit/clis/sdk_in_container/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,16 @@
default="/root",
help="Filesystem path to where the code is copied into within the Dockerfile. look for `COPY . /root` like command.",
)
@click.option(
"--deref-symlinks",
default=False,
is_flag=True,
help="Enables symlink dereferencing when packaging files in fast registration",
)
@click.pass_context
def package(ctx, image_config, source, output, force, fast, in_container_source_path, python_interpreter):
def package(
ctx, image_config, source, output, force, fast, in_container_source_path, python_interpreter, deref_symlinks
):
"""
This command produces a Flyte backend registrable package of all entities in Flyte.
For tasks, one pb file is produced for each task, representing one TaskTemplate object.
Expand All @@ -103,6 +111,6 @@ def package(ctx, image_config, source, output, force, fast, in_container_source_
display_help_with_error(ctx, "No packages to scan for flyte entities. Aborting!")

try:
serialize_and_package(pkgs, serialization_settings, source, output, fast)
serialize_and_package(pkgs, serialization_settings, source, output, fast, deref_symlinks)
except NoSerializableEntitiesError:
click.secho(f"No flyte objects found in packages {pkgs}", fg="yellow")
9 changes: 8 additions & 1 deletion flytekit/clis/sdk_in_container/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@
type=str,
help="Version the package or module is registered with",
)
@click.option(
"--deref-symlinks",
default=False,
is_flag=True,
help="Enables symlink dereferencing when packaging files in fast registration",
)
@click.argument("package-or-module", type=click.Path(exists=True, readable=True, resolve_path=True), nargs=-1)
@click.pass_context
def register(
Expand All @@ -111,6 +117,7 @@ def register(
service_account: str,
raw_data_prefix: str,
version: typing.Optional[str],
deref_symlinks: bool,
package_or_module: typing.Tuple[str],
):
"""
Expand Down Expand Up @@ -142,7 +149,7 @@ def register(
# Create a zip file containing all the entries.
detected_root = find_common_root(package_or_module)
cli_logger.debug(f"Using {detected_root} as root folder for project")
zip_file = fast_package(detected_root, output)
zip_file = fast_package(detected_root, output, deref_symlinks)

# Upload zip file to Admin using FlyteRemote.
md5_bytes, native_url = remote._upload_file(pathlib.Path(zip_file))
Expand Down
10 changes: 8 additions & 2 deletions flytekit/clis/sdk_in_container/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,22 @@ def fast(ctx):


@click.command("workflows")
@click.option(
"--deref-symlinks",
default=False,
is_flag=True,
help="Enables symlink dereferencing when packaging files in fast registration",
)
@click.option("-f", "--folder", type=click.Path(exists=True))
@click.pass_context
def fast_workflows(ctx, folder=None):
def fast_workflows(ctx, folder=None, deref_symlinks=False):

if folder:
click.echo(f"Writing output to {folder}")

source_dir = ctx.obj[CTX_LOCAL_SRC_ROOT]
# Write using gzip
archive_fname = fast_package(source_dir, folder)
archive_fname = fast_package(source_dir, folder, deref_symlinks)
click.echo(f"Wrote compressed archive to {archive_fname}")

pkgs = ctx.obj[CTX_PACKAGES]
Expand Down
5 changes: 3 additions & 2 deletions flytekit/tools/fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@
file_access = FlyteContextManager.current_context().file_access


def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike:
def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: bool = False) -> os.PathLike:
"""
Takes a source directory and packages everything not covered by common ignores into a tarball
named after a hexdigest of the included files.
:param os.PathLike source:
:param os.PathLike output_dir:
:param bool deref_symlinks: Enables dereferencing symlinks when packaging directory
:return os.PathLike:
"""
ignore = IgnoreGroup(source, [GitIgnore, DockerIgnore, StandardIgnore])
Expand All @@ -41,7 +42,7 @@ def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike:

with tempfile.TemporaryDirectory() as tmp_dir:
tar_path = os.path.join(tmp_dir, "tmp.tar")
with tarfile.open(tar_path, "w") as tar:
with tarfile.open(tar_path, "w", dereference=deref_symlinks) as tar:
tar.add(source, arcname="", filter=lambda x: ignore.tar_filter(tar_strip_file_attributes(x)))
with gzip.GzipFile(filename=archive_fname, mode="wb", mtime=0) as gzipped:
with open(tar_path, "rb") as tar_file:
Expand Down
7 changes: 5 additions & 2 deletions flytekit/tools/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,15 @@ def package(
source: str = ".",
output: str = "./flyte-package.tgz",
fast: bool = False,
deref_symlinks: bool = False,
):
"""
Package the given entities and the source code (if fast is enabled) into a package with the given name in output
:param registrable_entities: Entities that can be serialized
:param source: source folder
:param output: output package name with suffix
:param fast: fast enabled implies source code is bundled
:param deref_symlinks: if enabled then symlinks are dereferenced during packaging
"""
if not registrable_entities:
raise NoSerializableEntitiesError("Nothing to package")
Expand All @@ -95,7 +97,7 @@ def package(
if os.path.abspath(output).startswith(os.path.abspath(source)) and os.path.exists(output):
click.secho(f"{output} already exists within {source}, deleting and re-creating it", fg="yellow")
os.remove(output)
archive_fname = fast_registration.fast_package(source, output_tmpdir)
archive_fname = fast_registration.fast_package(source, output_tmpdir, deref_symlinks)
click.secho(f"Fast mode enabled: compressed archive {archive_fname}", dim=True)

with tarfile.open(output, "w:gz") as tar:
Expand All @@ -110,13 +112,14 @@ def serialize_and_package(
source: str = ".",
output: str = "./flyte-package.tgz",
fast: bool = False,
deref_symlinks: bool = False,
options: typing.Optional[Options] = None,
):
"""
Fist serialize and then package all entities
"""
registrable_entities = serialize(pkgs, settings, source, options=options)
package(registrable_entities, source, output, fast)
package(registrable_entities, source, output, fast, deref_symlinks)


def register(
Expand Down
26 changes: 25 additions & 1 deletion tests/flytekit/unit/tools/test_fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ def flyte_project(tmp_path):
"workflows": {
"__pycache__": {"some.pyc": ""},
"hello_world.py": "print('Hello World!')",
}
},
},
"utils": {
"util.py": "print('Hello from utils!')",
},
".venv": {"lots": "", "of": "", "packages": ""},
".env": "supersecret",
Expand All @@ -35,6 +38,7 @@ def flyte_project(tmp_path):
}

make_tree(tmp_path, tree)
os.symlink(str(tmp_path) + "/utils/util.py", str(tmp_path) + "/src/util")
subprocess.run(["git", "init", str(tmp_path)])
return tmp_path

Expand All @@ -48,9 +52,29 @@ def test_package(flyte_project, tmp_path):
".gitignore",
"keep.foo",
"src",
"src/util",
"src/workflows",
"src/workflows/hello_world.py",
"utils",
"utils/util.py",
]
util = tar.getmember("src/util")
assert util.issym()
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)


def test_package_with_symlink(flyte_project, tmp_path):
archive_fname = fast_package(source=flyte_project / "src", output_dir=tmp_path, deref_symlinks=True)
with tarfile.open(archive_fname, dereference=True) as tar:
assert tar.getnames() == [
"", # tar root, output removes leading '/'
"util",
"workflows",
"workflows/hello_world.py",
]
util = tar.getmember("util")
assert util.isfile()
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)

Expand Down

0 comments on commit 1c75bd1

Please sign in to comment.