Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added symlink dereferencing in fast packaging and tests #1151

Merged
merged 3 commits into from
Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions flytekit/clis/sdk_in_container/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,16 @@
default="/root",
help="Filesystem path to where the code is copied into within the Dockerfile. look for `COPY . /root` like command.",
)
@click.option(
"--deref-symlinks",
default=False,
is_flag=True,
help="Enables symlink dereferencing when packaging files in fast registration",
)
@click.pass_context
def package(ctx, image_config, source, output, force, fast, in_container_source_path, python_interpreter):
def package(
ctx, image_config, source, output, force, fast, in_container_source_path, python_interpreter, deref_symlinks
):
"""
This command produces a Flyte backend registrable package of all entities in Flyte.
For tasks, one pb file is produced for each task, representing one TaskTemplate object.
Expand All @@ -103,6 +111,6 @@ def package(ctx, image_config, source, output, force, fast, in_container_source_
display_help_with_error(ctx, "No packages to scan for flyte entities. Aborting!")

try:
serialize_and_package(pkgs, serialization_settings, source, output, fast)
serialize_and_package(pkgs, serialization_settings, source, output, fast, deref_symlinks)
except NoSerializableEntitiesError:
click.secho(f"No flyte objects found in packages {pkgs}", fg="yellow")
9 changes: 8 additions & 1 deletion flytekit/clis/sdk_in_container/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@
type=str,
help="Version the package or module is registered with",
)
@click.option(
"--deref-symlinks",
default=False,
is_flag=True,
help="Enables symlink dereferencing when packaging files in fast registration",
)
@click.argument("package-or-module", type=click.Path(exists=True, readable=True, resolve_path=True), nargs=-1)
@click.pass_context
def register(
Expand All @@ -111,6 +117,7 @@ def register(
service_account: str,
raw_data_prefix: str,
version: typing.Optional[str],
deref_symlinks: bool,
package_or_module: typing.Tuple[str],
):
"""
Expand Down Expand Up @@ -142,7 +149,7 @@ def register(
# Create a zip file containing all the entries.
detected_root = find_common_root(package_or_module)
cli_logger.debug(f"Using {detected_root} as root folder for project")
zip_file = fast_package(detected_root, output)
zip_file = fast_package(detected_root, output, deref_symlinks)

# Upload zip file to Admin using FlyteRemote.
md5_bytes, native_url = remote._upload_file(pathlib.Path(zip_file))
Expand Down
10 changes: 8 additions & 2 deletions flytekit/clis/sdk_in_container/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,22 @@ def fast(ctx):


@click.command("workflows")
@click.option(
"--deref-symlinks",
default=False,
is_flag=True,
help="Enables symlink dereferencing when packaging files in fast registration",
)
@click.option("-f", "--folder", type=click.Path(exists=True))
@click.pass_context
def fast_workflows(ctx, folder=None):
def fast_workflows(ctx, folder=None, deref_symlinks=False):

if folder:
click.echo(f"Writing output to {folder}")

source_dir = ctx.obj[CTX_LOCAL_SRC_ROOT]
# Write using gzip
archive_fname = fast_package(source_dir, folder)
archive_fname = fast_package(source_dir, folder, deref_symlinks)
click.echo(f"Wrote compressed archive to {archive_fname}")

pkgs = ctx.obj[CTX_PACKAGES]
Expand Down
5 changes: 3 additions & 2 deletions flytekit/tools/fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@
file_access = FlyteContextManager.current_context().file_access


def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike:
def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: bool = False) -> os.PathLike:
"""
Takes a source directory and packages everything not covered by common ignores into a tarball
named after a hexdigest of the included files.
:param os.PathLike source:
:param os.PathLike output_dir:
:param bool deref_symlinks: Enables dereferencing symlinks when packaging directory
:return os.PathLike:
"""
ignore = IgnoreGroup(source, [GitIgnore, DockerIgnore, StandardIgnore])
Expand All @@ -41,7 +42,7 @@ def fast_package(source: os.PathLike, output_dir: os.PathLike) -> os.PathLike:

with tempfile.TemporaryDirectory() as tmp_dir:
tar_path = os.path.join(tmp_dir, "tmp.tar")
with tarfile.open(tar_path, "w") as tar:
with tarfile.open(tar_path, "w", dereference=deref_symlinks) as tar:
tar.add(source, arcname="", filter=lambda x: ignore.tar_filter(tar_strip_file_attributes(x)))
with gzip.GzipFile(filename=archive_fname, mode="wb", mtime=0) as gzipped:
with open(tar_path, "rb") as tar_file:
Expand Down
7 changes: 5 additions & 2 deletions flytekit/tools/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,15 @@ def package(
source: str = ".",
output: str = "./flyte-package.tgz",
fast: bool = False,
deref_symlinks: bool = False,
):
"""
Package the given entities and the source code (if fast is enabled) into a package with the given name in output
:param registrable_entities: Entities that can be serialized
:param source: source folder
:param output: output package name with suffix
:param fast: fast enabled implies source code is bundled
:param deref_symlinks: if enabled then symlinks are dereferenced during packaging
"""
if not registrable_entities:
raise NoSerializableEntitiesError("Nothing to package")
Expand All @@ -95,7 +97,7 @@ def package(
if os.path.abspath(output).startswith(os.path.abspath(source)) and os.path.exists(output):
click.secho(f"{output} already exists within {source}, deleting and re-creating it", fg="yellow")
os.remove(output)
archive_fname = fast_registration.fast_package(source, output_tmpdir)
archive_fname = fast_registration.fast_package(source, output_tmpdir, deref_symlinks)
click.secho(f"Fast mode enabled: compressed archive {archive_fname}", dim=True)

with tarfile.open(output, "w:gz") as tar:
Expand All @@ -110,13 +112,14 @@ def serialize_and_package(
source: str = ".",
output: str = "./flyte-package.tgz",
fast: bool = False,
deref_symlinks: bool = False,
options: typing.Optional[Options] = None,
):
"""
Fist serialize and then package all entities
"""
registrable_entities = serialize(pkgs, settings, source, options=options)
package(registrable_entities, source, output, fast)
package(registrable_entities, source, output, fast, deref_symlinks)


def register(
Expand Down
26 changes: 25 additions & 1 deletion tests/flytekit/unit/tools/test_fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ def flyte_project(tmp_path):
"workflows": {
"__pycache__": {"some.pyc": ""},
"hello_world.py": "print('Hello World!')",
}
},
},
"utils": {
"util.py": "print('Hello from utils!')",
},
".venv": {"lots": "", "of": "", "packages": ""},
".env": "supersecret",
Expand All @@ -35,6 +38,7 @@ def flyte_project(tmp_path):
}

make_tree(tmp_path, tree)
os.symlink(str(tmp_path) + "/utils/util.py", str(tmp_path) + "/src/util")
subprocess.run(["git", "init", str(tmp_path)])
return tmp_path

Expand All @@ -48,9 +52,29 @@ def test_package(flyte_project, tmp_path):
".gitignore",
"keep.foo",
"src",
"src/util",
"src/workflows",
"src/workflows/hello_world.py",
"utils",
"utils/util.py",
]
util = tar.getmember("src/util")
assert util.issym()
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)


def test_package_with_symlink(flyte_project, tmp_path):
archive_fname = fast_package(source=flyte_project / "src", output_dir=tmp_path, deref_symlinks=True)
with tarfile.open(archive_fname, dereference=True) as tar:
assert tar.getnames() == [
"", # tar root, output removes leading '/'
"util",
"workflows",
"workflows/hello_world.py",
]
util = tar.getmember("util")
assert util.isfile()
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)

Expand Down