Skip to content

Commit

Permalink
Refactor docker operator attribute validations and docs (#35571)
Browse files Browse the repository at this point in the history
* Refactor docker operator attribute validations and docs

* Network mode: None -> 'none'
  • Loading branch information
Taragolis authored Nov 12, 2023
1 parent 4b63e36 commit 5a6dcfd
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 59 deletions.
9 changes: 1 addition & 8 deletions airflow/providers/docker/decorators/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,7 @@

from airflow.decorators.base import DecoratedOperator, task_decorator_factory
from airflow.providers.docker.operators.docker import DockerOperator

try:
from airflow.utils.decorators import remove_task_decorator

# This can be removed after we move to Airflow 2.4+
except ImportError:
from airflow.utils.python_virtualenv import remove_task_decorator

from airflow.utils.decorators import remove_task_decorator
from airflow.utils.python_virtualenv import write_python_script

if TYPE_CHECKING:
Expand Down
84 changes: 50 additions & 34 deletions airflow/providers/docker/operators/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from docker.errors import APIError
from docker.types import LogConfig, Mount, Ulimit
from dotenv import dotenv_values
from typing_extensions import Literal

from airflow.exceptions import AirflowProviderDeprecationWarning
from airflow.models import BaseOperator
Expand Down Expand Up @@ -96,22 +97,22 @@ class DockerOperator(BaseOperator):
:param environment: Environment variables to set in the container. (templated)
:param private_environment: Private environment variables to set in the container.
These are not templated, and hidden from the website.
:param env_file: Relative path to the .env file with environment variables to set in the container.
:param env_file: Relative path to the ``.env`` file with environment variables to set in the container.
Overridden by variables in the environment parameter. (templated)
:param force_pull: Pull the docker image on every run. Default is False.
:param mem_limit: Maximum amount of memory the container can use.
Either a float value, which represents the limit in bytes,
or a string like ``128m`` or ``1g``.
:param host_tmp_dir: Specify the location of the temporary directory on the host which will
be mapped to tmp_dir. If not provided defaults to using the standard system temp directory.
:param network_mode: Network mode for the container.
It can be one of the following:
bridge - Create new network stack for the container with default docker bridge network
None - No networking for this container
container:<name|id> - Use the network stack of another container specified via <name|id>
host - Use the host network stack. Incompatible with `port_bindings`
'<network-name>|<network-id>' - Connects the container to user created network
(using `docker network create` command)
:param network_mode: Network mode for the container. It can be one of the following:
- ``"bridge"``: Create new network stack for the container with default docker bridge network
- ``"none"``: No networking for this container
- ``"container:<name|id>"``: Use the network stack of another container specified via <name|id>
- ``"host"``: Use the host network stack. Incompatible with `port_bindings`
- ``"<network-name>|<network-id>"``: Connects the container to user created network
(using ``docker network create`` command)
:param tls_ca_cert: Path to a PEM-encoded certificate authority
to secure the docker connection.
:param tls_client_cert: Path to the PEM-encoded certificate
Expand All @@ -138,16 +139,20 @@ class DockerOperator(BaseOperator):
:param docker_conn_id: The :ref:`Docker connection id <howto/connection:docker>`
:param dns: Docker custom DNS servers
:param dns_search: Docker custom DNS search domain
:param auto_remove: Auto-removal of the container on daemon side when the
container's process exits.
The default is never.
:param auto_remove: Enable removal of the container when the container's process exits. Possible values:
- ``never``: (default) do not remove container
- ``success``: remove on success
- ``force``: always remove container
:param shm_size: Size of ``/dev/shm`` in bytes. The size must be
greater than 0. If omitted uses system default.
:param tty: Allocate pseudo-TTY to the container
This needs to be set see logs of the Docker container.
:param hostname: Optional hostname for the container.
:param privileged: Give extended privileges to this container.
:param cap_add: Include container capabilities
:param extra_hosts: Additional hostnames to resolve inside the container,
as a mapping of hostname to IP address.
:param retrieve_output: Should this docker image consistently attempt to pull from and output
file before manually shutting down the image. Useful for cases where users want a pickle serialized
output that is not posted to logs
Expand All @@ -166,11 +171,15 @@ class DockerOperator(BaseOperator):
:param port_bindings: Publish a container's port(s) to the host. It is a
dictionary of value where the key indicates the port to open inside the container
and value indicates the host port that binds to the container port.
Incompatible with ``host`` in ``network_mode``.
Incompatible with ``"host"`` in ``network_mode``.
:param ulimits: List of ulimit options to set for the container. Each item should
be a :py:class:`docker.types.Ulimit` instance.
"""

# !!! Changes in DockerOperator's arguments should be also reflected in !!!
# - docs/apache-airflow-providers-docker/decorators/docker.rst
# - airflow/decorators/__init__.pyi (by a separate PR)

template_fields: Sequence[str] = ("image", "command", "environment", "env_file", "container_name")
template_fields_renderers = {"env_file": "yaml"}
template_ext: Sequence[str] = (
Expand Down Expand Up @@ -211,7 +220,7 @@ def __init__(
docker_conn_id: str | None = None,
dns: list[str] | None = None,
dns_search: list[str] | None = None,
auto_remove: str = "never",
auto_remove: Literal["never", "success", "force"] = "never",
shm_size: int | None = None,
tty: bool = False,
hostname: str | None = None,
Expand All @@ -225,28 +234,43 @@ def __init__(
log_opts_max_size: str | None = None,
log_opts_max_file: str | None = None,
ipc_mode: str | None = None,
skip_exit_code: int | None = None,
skip_on_exit_code: int | Container[int] | None = None,
port_bindings: dict | None = None,
ulimits: list[Ulimit] | None = None,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.api_version = api_version
if skip_exit_code := kwargs.pop("skip_exit_code", None):
warnings.warn(
"`skip_exit_code` is deprecated and will be removed in the future. "
"Please use `skip_on_exit_code` instead.",
AirflowProviderDeprecationWarning,
stacklevel=2,
)
if skip_on_exit_code is not None and skip_exit_code != skip_on_exit_code:
msg = (
f"Conflicting `skip_on_exit_code` provided, "
f"skip_on_exit_code={skip_on_exit_code!r}, skip_exit_code={skip_exit_code!r}."
)
raise ValueError(msg)
skip_on_exit_code = skip_exit_code
if isinstance(auto_remove, bool):
warnings.warn(
"bool value for auto_remove is deprecated, please use 'never', 'success', or 'force' instead",
"bool value for `auto_remove` is deprecated and will be removed in the future. "
"Please use 'never', 'success', or 'force' instead",
AirflowProviderDeprecationWarning,
stacklevel=2,
)
if str(auto_remove) == "False":
self.auto_remove = "never"
elif str(auto_remove) == "True":
self.auto_remove = "success"
elif str(auto_remove) in ("never", "success", "force"):
self.auto_remove = auto_remove
else:
raise ValueError("unsupported auto_remove option, use 'never', 'success', or 'force' instead")
auto_remove = "success" if auto_remove else "never"

super().__init__(**kwargs)
self.api_version = api_version
if not auto_remove or auto_remove not in ("never", "success", "force"):
msg = (
f"Invalid `auto_remove` value {auto_remove!r}, "
"expected one of 'never', 'success', or 'force'."
)
raise ValueError(msg)
self.auto_remove = auto_remove
self.command = command
self.container_name = container_name
self.cpus = cpus
Expand Down Expand Up @@ -291,14 +315,6 @@ def __init__(
self.log_opts_max_size = log_opts_max_size
self.log_opts_max_file = log_opts_max_file
self.ipc_mode = ipc_mode
if skip_exit_code is not None:
warnings.warn(
"skip_exit_code is deprecated. Please use skip_on_exit_code",
AirflowProviderDeprecationWarning,
stacklevel=2,
)
skip_on_exit_code = skip_exit_code

self.skip_on_exit_code = (
skip_on_exit_code
if isinstance(skip_on_exit_code, Container)
Expand Down
75 changes: 59 additions & 16 deletions docs/apache-airflow-providers-docker/decorators/docker.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ The following parameters are supported in Docker Task decorator.

multiple_outputs
If set, function return value will be unrolled to multiple XCom values.
Dict will unroll to XCom values with keys as XCom keys. Defaults to False.
Dict will unroll to XCom values with keys as XCom keys. Defaults to False.
use_dill
Whether to use dill or pickle for serialization
python_command
Expand All @@ -44,7 +44,7 @@ api_version
Remote API version. Set to ``auto`` to automatically detect the server's version.
container_name
Name of the container. Optional (templated)
cpus:
cpus
Number of CPUs to assign to the container. This value gets multiplied with 1024.
docker_url
URL of the host running the docker daemon.
Expand All @@ -54,6 +54,9 @@ environment
private_environment
Private environment variables to set in the container.
These are not templated, and hidden from the website.
env_file
Relative path to the ``.env`` file with environment variables to set in the container.
Overridden by variables in the environment parameter.
force_pull
Pull the docker image on every run. Default is False.
mem_limit
Expand All @@ -64,29 +67,27 @@ host_tmp_dir
Specify the location of the temporary directory on the host which will
be mapped to tmp_dir. If not provided defaults to using the standard system temp directory.
network_mode
Network mode for the container.
Network mode for the container. It can be one of the following

It can be one of the following:
bridge
Create new network stack for the container with default docker bridge network
'None'
No networking for this container
container:<name> or <id>
Use the network stack of another container specified via <name> or <id>
host
Use the host network stack. Incompatible with `port_bindings`
'<network-name>' or '<network-id>'
Connects the container to user created network(using `docker network create` command)
- ``"bridge"``: Create new network stack for the container with default docker bridge network
- ``"none"``: No networking for this container
- ``"container:<name>"`` or ``"container:<id>"``: Use the network stack of another container specified via <name> or <id>
- ``"host"``: Use the host network stack. Incompatible with **port_bindings**
- ``"<network-name>"`` or ``"<network-id>"``: Connects the container to user created network (using ``docker network create`` command)
tls_ca_cert
Path to a PEM-encoded certificate authority to secure the docker connection.
tls_client_cert
Path to the PEM-encoded certificate used to authenticate docker client.
tls_client_key
Path to the PEM-encoded key used to authenticate docker client.
tls_verify
Set ``True`` to verify the validity of the provided certificate.
tls_hostname
Hostname to match against the docker server certificate or False to disable the check.
tls_ssl_version
Version of SSL to use when communicating with docker daemon.
mount_tmp_dir
Specify whether the temporary directory should be bind-mounted from the host to the container.
tmp_dir
Mount point inside the container to
a temporary directory created on the host by the operator.
Expand All @@ -99,6 +100,8 @@ mounts
``['/host/path:/container/path', '/host/path2:/container/path2:ro']``.
working_dir
Working directory to set on the container (equivalent to the -w switch the docker client)
entrypoint
Overwrite the default ENTRYPOINT of the image
xcom_all
Push all the stdout or just the last line. The default is False (last line).
docker_conn_id
Expand All @@ -108,18 +111,58 @@ dns
dns_search
Docker custom DNS search domain
auto_remove
Auto-removal of the container on daemon side when the container's process exits.
The default is False.
Enable removal of the container when the container's process exits. Possible values

- ``never``: (default) do not remove container
- ``success``: remove on success
- ``force``: always remove container
shm_size
Size of ``/dev/shm`` in bytes. The size must be greater than 0.
If omitted uses system default.
tty
Allocate pseudo-TTY to the container
This needs to be set see logs of the Docker container.
hostname
Optional hostname for the container.
privileged
Give extended privileges to this container.
cap_add
Include container capabilities
extra_hosts
Additional hostnames to resolve inside the container, as a mapping of hostname to IP address.
retrieve_output
Should this docker image consistently attempt to pull from and output
file before manually shutting down the image. Useful for cases where users want a pickle serialized
output that is not posted to logs
retrieve_output_path
path for output file that will be retrieved and passed to xcom
timeout
Default timeout for API calls, in seconds.
device_requests
Expose host resources such as GPUs to the container.
log_opts_max_size
The maximum size of the log before it is rolled.
A positive integer plus a modifier representing the unit of measure (k, m, or g).
Eg: 10m or 1g Defaults to -1 (unlimited).
log_opts_max_file
The maximum number of log files that can be present.
If rolling the logs creates excess files, the oldest file is removed.
Only effective when max-size is also set. A positive integer. Defaults to 1.
ipc_mode
Set the IPC mode for the container.
skip_on_exit_code
If task exits with this exit code, leave the task
in ``skipped`` state (default: None). If set to ``None``, any non-zero
exit code will be treated as a failure.
port_bindings
Publish a container's port(s) to the host. It is a
dictionary of value where the key indicates the port to open inside the container
and value indicates the host port that binds to the container port.
Incompatible with ``"host"`` in ``network_mode``.
ulimits
List of ulimit options to set for the container.
Each item should be a ``docker.types.Ulimit`` instance.


Usage Example
-------------
Expand Down
2 changes: 2 additions & 0 deletions docs/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,7 @@ intvl
Investorise
io
ip
ipc
iPython
irreproducible
IRSA
Expand Down Expand Up @@ -1627,6 +1628,7 @@ ui
uid
ukey
ulimit
ulimits
Umask
umask
Un
Expand Down
Loading

0 comments on commit 5a6dcfd

Please sign in to comment.