Skip to content

Commit

Permalink
Fix cli creds (#669)
Browse files Browse the repository at this point in the history
PR that unifies the way extra volumes are passed across the
compiler/runner/explorer. `--credentials` argument was removed in favor
of `--extra-volumes` to make it more uniform.
  • Loading branch information
PhilippeMoussalli authored Nov 24, 2023
1 parent f9b4b81 commit 5d1df45
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 44 deletions.
2 changes: 1 addition & 1 deletion docs/data_explorer.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ You can setup the data explorer container with the `fondant explore` CLI command

Where the base path can be either a local or remote base path. Make sure to pass the proper mount credentials arguments when using a remote base path or a local base path
that references remote datasets. You can do that either with `--auth-gcp`, `--auth-aws` or `--auth-azure` to
mount your default local cloud credentials to the pipeline. Or You can also use the `--credentials` argument to mount custom credentials to the local container pipeline.
mount your default local cloud credentials to the pipeline. Or You can also use the `--extra-volumnes` flag to specify credentials or local files you need to mount.

Example:

Expand Down
28 changes: 14 additions & 14 deletions src/fondant/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ def get_cloud_credentials(args) -> t.Optional[str]:
return CloudCredentialsMount.AWS.value
if args.auth_azure:
return CloudCredentialsMount.AZURE.value
if args.credentials:
return args.credentials

return None

Expand Down Expand Up @@ -103,7 +101,7 @@ def register_explore(parent_parser):
This will spin up a docker container that hosts a web application that allows you to explore the data produced by a fondant pipeline.
The default address is http://localhost:8501. You can choose both a local and remote base path to explore. If the data that you want to explore is stored remotely, you
should use the --credentials flag to specify the path to a file that contains the credentials to access remote data (for S3, GCS, etc).
should use the --extra-volumes flag to specify credentials or local files you need to mount.
Example:
Expand Down Expand Up @@ -165,17 +163,11 @@ def register_explore(parent_parser):
)

auth_group.add_argument(
"--credentials",
"-c",
type=str,
default=None,
help="""Path mapping of the source (local) and target (docker file system)
credential paths in the format of src:target
More info on
Google Cloud: https://cloud.google.com/docs/authentication/application-default-credentials
AWS: https://docs.aws.amazon.com/sdkref/latest/guide/file-location.html
Azure: https://stackoverflow.com/questions/69010943/how-does-az-login-store-credential-information
""",
"--extra-volumes",
help="""Extra volumes to mount in containers. You can use the --extra-volumes flag to specify extra volumes to mount in the containers this can be used:
- to mount data directories to be used by the pipeline (note that if your pipeline's base_path is local it will already be mounted for you).
- to mount cloud credentials""",
nargs="+",
)

parser.set_defaults(func=explore)
Expand All @@ -189,8 +181,16 @@ def explore(args):
"Docker runtime not found. Please install Docker and try again.",
)

extra_volumes = []

cloud_cred = get_cloud_credentials(args)

if cloud_cred:
extra_volumes.append(cloud_cred)

if args.extra_volumes:
extra_volumes.extend(args.extra_volumes)

run_explorer_app(
base_path=args.base_path,
container=args.container,
Expand Down
57 changes: 36 additions & 21 deletions src/fondant/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,28 @@ def run_explorer_app( # type: ignore
port: int = 8501,
container: str = "fndnt/data_explorer",
tag: str = "latest",
credentials: t.Optional[str] = None,
extra_volumes: t.Union[t.Optional[list], t.Optional[str]] = None,
): # type: ignore
"""Run the data explorer container."""
"""
Run an Explorer App in a Docker container.
Args:
base_path: the base path where the Explorer App will be mounted.
port: The port number to expose the Explorer App. Default is 8501.
container: The Docker container name or image to use. Default is "fndnt/data_explorer".
tag: The tag/version of the Docker container. Default is "latest".
extra_volumes: Extra volumes to mount in containers. You can use the --extra-volumes flag
to specify extra volumes to mount in the containers this can be used:
- to mount data directories to be used by the pipeline (note that if your pipeline's
base_path is local it will already be mounted for you).
- to mount cloud credentials
"""
if extra_volumes is None:
extra_volumes = []

if isinstance(extra_volumes, str):
extra_volumes = [extra_volumes]

cmd = [
"docker",
"run",
Expand All @@ -30,34 +49,23 @@ def run_explorer_app( # type: ignore
f"{port}:8501",
]

# mount the credentials file to the container
if credentials:
# check if path is read only
if not credentials.endswith(":ro"):
credentials += ":ro"

cmd.extend(
[
"-v",
credentials,
],
)

fs, _ = fsspec.core.url_to_fs(base_path)
if isinstance(fs, LocalFileSystem):
logging.info(f"Using local base path: {base_path}")
logging.info(
"This directory will be mounted to /artifacts in the container.",
)
if not credentials:
logging.warning(
"You have not provided a credentials file. If you wish to access data "
"from a cloud provider, mount the credentials file with the --credentials flag.",
)
data_directory_path = Path(base_path).resolve()
host_machine_path = data_directory_path.as_posix()
container_path = os.path.join("/", data_directory_path.name)

# Mount extra volumes to the container
if extra_volumes:
for volume in extra_volumes:
cmd.extend(
["-v", volume],
)

# Mount the local base path to the container
cmd.extend(
["-v", f"{shlex.quote(host_machine_path)}:{shlex.quote(container_path)}"],
Expand All @@ -75,7 +83,7 @@ def run_explorer_app( # type: ignore
)

else:
if credentials is None:
if not extra_volumes:
raise RuntimeError(
None,
f"Specified base path `{base_path}`, Please provide valid credentials when using"
Expand All @@ -84,6 +92,13 @@ def run_explorer_app( # type: ignore

logging.info(f"Using remote base path: {base_path}")

# Mount extra volumes to the container
if extra_volumes:
for volume in extra_volumes:
cmd.extend(
["-v", volume],
)

# add the image name
cmd.extend(
[
Expand Down
16 changes: 8 additions & 8 deletions tests/test_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def remote_path() -> str:


@pytest.fixture()
def credentials() -> str:
def extra_volumes() -> str:
return (
"$HOME/.config/gcloud/application_default_credentials.json:/root/.config/"
"gcloud/application_default_credentials.json"
Expand All @@ -32,15 +32,15 @@ def container_path() -> str:
return "/source"


def test_run_data_explorer_local_base_path(host_path, container_path, credentials):
def test_run_data_explorer_local_base_path(host_path, container_path, extra_volumes):
"""Test that the data explorer can be run with a local base path."""
with patch("subprocess.call") as mock_call:
run_explorer_app(
base_path=host_path,
port=DEFAULT_PORT,
container=DEFAULT_CONTAINER,
tag=DEFAULT_TAG,
credentials=credentials,
extra_volumes=extra_volumes,
)
mock_call.assert_called_once_with(
[
Expand All @@ -54,7 +54,7 @@ def test_run_data_explorer_local_base_path(host_path, container_path, credential
"-p",
"8501:8501",
"-v",
f"{credentials}:ro",
f"{extra_volumes}",
"-v",
f"{Path(host_path).resolve()}:{container_path}",
f"{DEFAULT_CONTAINER}:{DEFAULT_TAG}",
Expand All @@ -65,15 +65,15 @@ def test_run_data_explorer_local_base_path(host_path, container_path, credential
)


def test_run_data_explorer_remote_base_path(remote_path, credentials):
def test_run_data_explorer_remote_base_path(remote_path, extra_volumes):
"""Test that the data explorer can be run with a remote base path."""
with patch("subprocess.call") as mock_call:
run_explorer_app(
base_path=remote_path,
port=DEFAULT_PORT,
container=DEFAULT_CONTAINER,
tag=DEFAULT_TAG,
credentials=credentials,
extra_volumes=extra_volumes,
)

mock_call.assert_called_once_with(
Expand All @@ -88,7 +88,7 @@ def test_run_data_explorer_remote_base_path(remote_path, credentials):
"-p",
"8501:8501",
"-v",
f"{credentials}:ro",
f"{extra_volumes}",
f"{DEFAULT_CONTAINER}:{DEFAULT_TAG}",
"--base_path",
f"{remote_path}",
Expand All @@ -111,5 +111,5 @@ def test_invalid_run_data_explorer_remote_base_path(remote_path):
port=DEFAULT_PORT,
container=DEFAULT_CONTAINER,
tag=DEFAULT_TAG,
credentials=None,
extra_volumes=None,
)

0 comments on commit 5d1df45

Please sign in to comment.