Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move cloud auth to sdk #779

Merged
merged 2 commits into from
Jan 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 6 additions & 28 deletions docs/runners/local.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,62 +53,40 @@ about this in the [installation](../guides/installation.md) guide.
=== "Python"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner

EXTRA_VOLUMES = <str_or_list_of_optional_extra_volumes_to_mount>
compiler = DockerCompiler(extra_volumes=EXTRA_VOLUMES)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(extra_volumes=<str_or_list_of_optional_extra_volumes_to_mount>)
```

If you want to use remote paths (GCS, S3, etc.) you can use pass the default local cloud credentials to the pipeline.
If you want to use remote paths (GCS, S3, etc.) you can use the authentification argument
in your pipeline

=== "GCP"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner
from fondant.core.schema import CloudCredentialsMount

gcp_mount_dir = CloudCredentialsMount.GCP.value
compiler = DockerCompiler(extra_volumes=gcp_mount_dir)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(auth_gcp=True)
```

=== "AWS"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner
from fondant.core.schema import CloudCredentialsMount

aws_mount_dir = CloudCredentialsMount.AWS.value
compiler = DockerCompiler(extra_volumes=aws_mount_dir)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(auth_aws=True)
```

=== "Azure"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner
from fondant.core.schema import CloudCredentialsMount

azure_mount_dir = CloudCredentialsMount.AZURE.value
compiler = DockerCompiler(extra_volumes=azure_mount_dir)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(auth_azure=True)
```

This will mount your default local cloud credentials to the pipeline. Make sure you are authenticated locally before running the pipeline and
Expand Down
33 changes: 9 additions & 24 deletions src/fondant/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,6 @@
logger = logging.getLogger(__name__)


def get_cloud_credentials(args) -> t.Optional[str]:
if args.auth_gcp:
return CloudCredentialsMount.GCP.value
if args.auth_aws:
return CloudCredentialsMount.AWS.value
if args.auth_azure:
return CloudCredentialsMount.AZURE.value

return None


def entrypoint():
"""Entrypoint for the fondant CLI."""
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -204,11 +193,6 @@ def start_explore(args):

extra_volumes = []

cloud_cred = get_cloud_credentials(args)

if cloud_cred:
extra_volumes.append(cloud_cred)

if args.extra_volumes:
extra_volumes.extend(args.extra_volumes)

Expand All @@ -218,6 +202,9 @@ def start_explore(args):
tag=args.tag,
port=args.port,
extra_volumes=extra_volumes,
auth_gcp=args.auth_gcp,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about combining these arguments into a single one, for instance auth_provider=AZURE,AWS,GCP? Might be easier to update the different auth options without changing the interface.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good suggestion! Implemented

auth_aws=args.auth_aws,
auth_azure=args.auth_azure,
)


Expand Down Expand Up @@ -449,21 +436,20 @@ def compile_local(args):
from fondant.pipeline.compiler import DockerCompiler

extra_volumes = []
cloud_cred = get_cloud_credentials(args)

if args.extra_volumes:
extra_volumes.extend(args.extra_volumes)

if cloud_cred:
extra_volumes.append(cloud_cred)

pipeline = pipeline_from_string(args.ref)
compiler = DockerCompiler()
compiler.compile(
pipeline=pipeline,
extra_volumes=extra_volumes,
output_path=args.output_path,
build_args=args.build_arg,
auth_gcp=args.auth_gcp,
auth_aws=args.auth_aws,
auth_azure=args.auth_azure,
)


Expand Down Expand Up @@ -661,14 +647,10 @@ def run_local(args):
from fondant.pipeline.runner import DockerRunner

extra_volumes = []
cloud_cred = get_cloud_credentials(args)

if args.extra_volumes:
extra_volumes.extend(args.extra_volumes)

if cloud_cred:
extra_volumes.append(cloud_cred)

try:
ref = pipeline_from_string(args.ref)
except ModuleNotFoundError:
Expand All @@ -679,6 +661,9 @@ def run_local(args):
input=ref,
extra_volumes=extra_volumes,
build_args=args.build_arg,
auth_gcp=args.auth_gcp,
auth_aws=args.auth_aws,
auth_azure=args.auth_azure,
)


Expand Down
60 changes: 51 additions & 9 deletions src/fondant/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import re
import typing as t
from dataclasses import dataclass
from enum import Enum
from enum import Enum, auto

import pyarrow as pa

from fondant.core.exceptions import InvalidTypeSchema
from fondant.core.exceptions import InvalidPipelineDefinition, InvalidTypeSchema


@dataclass
Expand All @@ -30,13 +30,55 @@ class DockerVolume:


class CloudCredentialsMount(Enum):
home_directory = os.path.expanduser("~")
AWS = f"{home_directory}/credentials:/root/.aws/credentials"
GCP = (
f"{home_directory}/.config/gcloud/application_default_credentials.json:/root/.config/"
f"gcloud/application_default_credentials.json"
)
AZURE = f"{home_directory}/.azure:/root/.azure"
AWS = auto()
GCP = auto()
AZURE = auto()

def get_path(self):
home_dir = os.path.expanduser("~")

if self == CloudCredentialsMount.AWS:
return f"{home_dir}/credentials:/root/.aws/credentials"

if self == CloudCredentialsMount.GCP:
return (
f"{home_dir}/.config/gcloud/application_default_credentials.json:"
f"/root/.config/gcloud/application_default_credentials.json"
)

if self == CloudCredentialsMount.AZURE:
return f"{home_dir}/.azure:/root/.azure"

return None

@staticmethod
def get_cloud_credentials(
*,
auth_gcp: t.Optional[bool] = None,
auth_azure: t.Optional[bool] = None,
auth_aws: t.Optional[bool] = None,
) -> t.Optional[str]:
"""Get the appropriate cloud credentials based on authentication flags."""
auth_flags = [auth_gcp, auth_azure, auth_aws]
count_true = sum(flag is True for flag in auth_flags if flag is not None)

if count_true > 1:
msg = (
"You can only provide one of the following authentication flags:"
" auth_gcp, auth_aws, auth_azure"
)
raise InvalidPipelineDefinition(
msg,
)

if auth_gcp:
return CloudCredentialsMount.GCP.get_path()
if auth_aws:
return CloudCredentialsMount.AWS.get_path()
if auth_azure:
return CloudCredentialsMount.AZURE.get_path()

return None


"""
Expand Down
22 changes: 21 additions & 1 deletion src/fondant/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import yaml
from fsspec.implementations.local import LocalFileSystem

from fondant.core.schema import DockerVolume
from fondant.core.schema import CloudCredentialsMount, DockerVolume

CONTAINER = "fndnt/data_explorer"
PORT = 8501
Expand All @@ -29,17 +29,28 @@ def _generate_explorer_spec(
container: str = CONTAINER,
tag: t.Optional[str] = None,
extra_volumes: t.Union[t.Optional[list], t.Optional[str]] = None,
auth_gcp: t.Optional[bool] = None,
auth_aws: t.Optional[bool] = None,
auth_azure: t.Optional[bool] = None,
) -> t.Dict[str, t.Any]:
"""Generate a Docker Compose specification for the Explorer App."""
if tag is None:
tag = version("fondant") if version("fondant") != "0.1.dev0" else "latest"

cloud_creds = CloudCredentialsMount.get_cloud_credentials(
auth_gcp=auth_gcp,
auth_azure=auth_azure,
auth_aws=auth_aws,
)
if extra_volumes is None:
extra_volumes = []

if isinstance(extra_volumes, str):
extra_volumes = [extra_volumes]

if cloud_creds:
extra_volumes.append(cloud_creds)

# Mount extra volumes to the container
volumes: t.List[t.Union[str, dict]] = []

Expand Down Expand Up @@ -105,6 +116,9 @@ def run_explorer_app( # type: ignore # noqa: PLR0913
output_path: str = OUTPUT_PATH,
tag: t.Optional[str] = None,
extra_volumes: t.Union[t.Optional[list], t.Optional[str]] = None,
auth_gcp: t.Optional[bool] = None,
auth_aws: t.Optional[bool] = None,
auth_azure: t.Optional[bool] = None,
): # type: ignore
"""
Run an Explorer App in a Docker container.
Expand All @@ -121,6 +135,9 @@ def run_explorer_app( # type: ignore # noqa: PLR0913
- to mount data directories to be used by the pipeline (note that if your pipeline's
base_path is local it will already be mounted for you).
- to mount cloud credentials
auth_gcp: Flag to enable authentication with GCP
auth_aws: Flag to enable authentication with AWS
auth_azure: Flag to enable authentication with Azure
"""
os.makedirs(".fondant", exist_ok=True)

Expand All @@ -130,6 +147,9 @@ def run_explorer_app( # type: ignore # noqa: PLR0913
container=container,
tag=tag,
extra_volumes=extra_volumes,
auth_gcp=auth_gcp,
auth_aws=auth_aws,
auth_azure=auth_azure,
)

with open(output_path, "w") as outfile:
Expand Down
18 changes: 17 additions & 1 deletion src/fondant/pipeline/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from fondant.core.exceptions import InvalidPipelineDefinition
from fondant.core.manifest import Metadata
from fondant.core.schema import DockerVolume
from fondant.core.schema import CloudCredentialsMount, DockerVolume
from fondant.pipeline import (
VALID_ACCELERATOR_TYPES,
VALID_VERTEX_ACCELERATOR_TYPES,
Expand Down Expand Up @@ -56,6 +56,9 @@ def compile(
output_path: str = "docker-compose.yml",
extra_volumes: t.Union[t.Optional[list], t.Optional[str]] = None,
build_args: t.Optional[t.List[str]] = None,
auth_gcp: t.Optional[bool] = None,
auth_aws: t.Optional[bool] = None,
auth_azure: t.Optional[bool] = None,
) -> None:
"""Compile a pipeline to docker-compose spec and save it to a specified output path.

Expand All @@ -66,14 +69,27 @@ def compile(
https://docs.docker.com/compose/compose-file/05-services/#short-syntax-5)
to mount in the docker-compose spec.
build_args: List of build arguments to pass to docker
auth_gcp: Flag to enable authentication with GCP
auth_aws: Flag to enable authentication with AWS
auth_azure: Flag to enable authentication with Azure
"""
cloud_creds = CloudCredentialsMount.get_cloud_credentials(
auth_gcp=auth_gcp,
auth_azure=auth_azure,
auth_aws=auth_aws,
)

if extra_volumes is None:
extra_volumes = []

if isinstance(extra_volumes, str):
extra_volumes = [extra_volumes]

if cloud_creds:
extra_volumes.append(cloud_creds)

logger.info(f"Compiling {pipeline.name} to {output_path}")

spec = self._generate_spec(
pipeline,
extra_volumes=extra_volumes,
Expand Down
9 changes: 9 additions & 0 deletions src/fondant/pipeline/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def run(
*,
extra_volumes: t.Union[t.Optional[list], t.Optional[str]] = None,
build_args: t.Optional[t.List[str]] = None,
auth_gcp: t.Optional[bool] = None,
auth_aws: t.Optional[bool] = None,
auth_azure: t.Optional[bool] = None,
) -> None:
"""Run a pipeline, either from a compiled docker-compose spec or from a fondant pipeline.

Expand All @@ -65,6 +68,9 @@ def run(
https://docs.docker.com/compose/compose-file/05-services/#short-syntax-5)
to mount in the docker-compose spec.
build_args: List of build arguments to pass to docker
auth_gcp: Flag to enable authentication with GCP
auth_aws: Flag to enable authentication with AWS
auth_azure: Flag to enable authentication with Azure
"""
self.check_docker_install()
self.check_docker_compose_install()
Expand All @@ -81,6 +87,9 @@ def run(
output_path=output_path,
extra_volumes=extra_volumes,
build_args=build_args,
auth_gcp=auth_gcp,
auth_aws=auth_aws,
auth_azure=auth_azure,
)
self._run(output_path)
else:
Expand Down
Loading
Loading