Skip to content

Commit

Permalink
support for UV pip install (#76)
Browse files Browse the repository at this point in the history
* Added uv pip install option and tests
  • Loading branch information
petteja authored May 21, 2024
1 parent ce84f1d commit 33fe61c
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 8 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on: push

env:
GIT_SYNC_IMAGE: europe-north1-docker.pkg.dev/knada-gcp/knada-north/git-sync
DATAVERK_AIRFLOW_IMAGE: europe-north1-docker.pkg.dev/knada-gcp/knada-north/dataverk-airflow
DATAVERK_AIRFLOW_IMAGE_BASE_URL: europe-north1-docker.pkg.dev/knada-gcp/knada-north/dataverk-airflow-python

permissions:
contents: read
Expand Down Expand Up @@ -71,8 +71,8 @@ jobs:
gitsync_tag=$(gcloud artifacts docker images list "$GIT_SYNC_IMAGE" --include-tags --sort-by=~Update_Time --limit=1 --format=json | jq -rc '.[0].tags')
echo "CLONE_REPO_IMAGE=ghcr.io/navikt/knada-git-sync/git-sync:$gitsync_tag" >> $GITHUB_ENV
dataverk_airflow_tag=$(gcloud artifacts docker images list "$DATAVERK_AIRFLOW_IMAGE" --include-tags --sort-by=~Update_Time --limit=1 --format=json | jq -rc '.[0].tags')
echo "KNADA_AIRFLOW_OPERATOR_IMAGE=ghcr.io/navikt/knada-images/dataverk-airflow:$dataverk_airflow_tag" >> $GITHUB_ENV
dataverk_airflow_tag=$(gcloud artifacts docker images list "$DATAVERK_AIRFLOW_IMAGE_BASE_URL-${{ matrix.version}}" --include-tags --sort-by=~Update_Time --limit=1 --format=json | jq -rc '.[0].tags')
echo "KNADA_AIRFLOW_OPERATOR_IMAGE=ghcr.io/navikt/knada-images/dataverk-airflow-python-${{ matrix.version }}:$dataverk_airflow_tag" >> $GITHUB_ENV
- name: Run tests Knada
env:
AIRFLOW_CONN_SQLITE_DEFAULT: sqlite://?mode=ro
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Alle våre operators lar deg klone et annet repo enn der DAGene er definert, bar
Vi har også støtte for å installere Python pakker ved oppstart av Airflow task, spesifiser `requirements.txt`-filen din med `requirements_path="/path/to/requirements.txt"`.
Merk at hvis du kombinerer `repo` og `requirements_path`, må `requirements.txt` ligge i repoet nevnt i `repo`.

Ønsker du å benytte [UV](https://github.com/astral-sh/uv) for `pip install`, så kan du sette `use_uv_pip_install=True`.

### Quarto operator (datafortelling)

Denne kjører `quarto render` for deg, som lager en HTML-fil som kan lastes opp til Datamarkedsplassen.
Expand Down
10 changes: 8 additions & 2 deletions dataverk_airflow/kubernetes_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def kubernetes_operator(
container_uid: int = 50000,
on_success_callback: Callable = None,
working_dir: str = None,
use_uv_pip_install: bool = False,
):
"""Simplified operator for creating KubernetesPodOperator.
Expand Down Expand Up @@ -92,6 +93,7 @@ def kubernetes_operator(
objects to the task instance and is documented under the macros
section of the API.
:param working_dir: str: Path to working directory
:param use_uv_pip_install: bool: Use uv pip install, default False
:return: KubernetesPodOperator
"""
Expand Down Expand Up @@ -133,8 +135,12 @@ def on_failure(context):
working_dir = POD_WORKSPACE_DIR

if requirements_path:
cmds = [
f"pip install -r {POD_WORKSPACE_DIR}/{requirements_path} --user --no-cache-dir"] + cmds
if use_uv_pip_install:
cmds = [
f"uv venv .local && . .local/bin/activate && uv pip install -r {POD_WORKSPACE_DIR}/{requirements_path} --no-cache-dir"] + cmds
else:
cmds = [
f"pip install -r {POD_WORKSPACE_DIR}/{requirements_path} --user --no-cache-dir"] + cmds

allowlist.append("pypi.org")
allowlist.append("files.pythonhosted.org")
Expand Down
4 changes: 3 additions & 1 deletion dataverk_airflow/notebook_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def notebook_operator(
do_xcom_push: bool = False,
on_success_callback: Callable = None,
container_uid: int = 50000,
use_uv_pip_install: bool = False,
):
"""Operator for executing Jupyter notebooks.
Expand All @@ -55,6 +56,7 @@ def notebook_operator(
:param do_xcom_push: bool: Enable xcom push of content in file '/airflow/xcom/return.json', default False
:param container_uid: int: User ID for the container image. Root (id = 0) is not allowed, defaults to 50000 (standard uid for airflow).
:param on_success_callback: Callable
:param use_uv_pip_install: bool: Use uv pip install, default False
:return: KubernetesPodOperator
"""
Expand All @@ -78,7 +80,7 @@ def notebook_operator(
"slack_channel": slack_channel, "extra_envs": extra_envs, "allowlist": allowlist, "requirements_path": requirements_path,
"resources": resources, "startup_timeout_seconds": startup_timeout_seconds,
"retries": retries, "delete_on_finish": delete_on_finish, "retry_delay": retry_delay, "do_xcom_push": do_xcom_push,
"on_success_callback": on_success_callback, "working_dir": str(Path(nb_path).parent), "container_uid": container_uid,
"on_success_callback": on_success_callback, "working_dir": str(Path(nb_path).parent), "container_uid": container_uid, "use_uv_pip_install": use_uv_pip_install,
}
kwargs = {k: v for k, v in kwargs.items() if v is not None}

Expand Down
4 changes: 3 additions & 1 deletion dataverk_airflow/python_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def python_operator(
do_xcom_push: bool = False,
container_uid: int = 50000,
on_success_callback: Callable = None,
use_uv_pip_install: bool = False,
):
"""Operator for executing Python scripts.
Expand All @@ -53,6 +54,7 @@ def python_operator(
:param do_xcom_push: bool: Enable xcom push of content in file '/airflow/xcom/return.json', default False
:param container_uid: int: User ID for the container image. Root (id = 0) is not allowed, defaults to 50000 (standard uid for airflow).
:param on_success_callback: Callable
:param use_uv_pip_install: bool: Use uv pip install, default False
:return: KubernetesPodOperator
"""
Expand All @@ -74,7 +76,7 @@ def python_operator(
"slack_channel": slack_channel, "extra_envs": extra_envs, "allowlist": allowlist, "requirements_path": requirements_path,
"resources": resources, "startup_timeout_seconds": startup_timeout_seconds,
"retries": retries, "delete_on_finish": delete_on_finish, "retry_delay": retry_delay, "do_xcom_push": do_xcom_push,
"on_success_callback": on_success_callback, "working_dir": str(Path(script_path).parent), "container_uid": container_uid,
"on_success_callback": on_success_callback, "working_dir": str(Path(script_path).parent), "container_uid": container_uid, "use_uv_pip_install": use_uv_pip_install,
}
kwargs = {k: v for k, v in kwargs.items() if v is not None}

Expand Down
4 changes: 3 additions & 1 deletion dataverk_airflow/quarto_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def quarto_operator(
do_xcom_push: bool = False,
container_uid: int = 50000,
on_success_callback: Callable = None,
use_uv_pip_install: bool = False,
):
"""Operator for rendering Quarto.
Expand All @@ -55,6 +56,7 @@ def quarto_operator(
:param do_xcom_push: bool: Enable xcom push of content in file '/airflow/xcom/return.json', default False
:param container_uid: int: User ID for the container image. Root (id = 0) is not allowed, defaults to 50000 (standard uid for airflow).
:param on_success_callback: Callable
:param use_uv_pip_install: bool: Use uv pip install, default False
:return: KubernetesPodOperator
"""
Expand Down Expand Up @@ -97,7 +99,7 @@ def quarto_operator(
"slack_channel": slack_channel, "extra_envs": extra_envs, "allowlist": allowlist, "requirements_path": requirements_path,
"resources": resources, "startup_timeout_seconds": startup_timeout_seconds,
"retries": retries, "delete_on_finish": delete_on_finish, "retry_delay": retry_delay, "do_xcom_push": do_xcom_push,
"on_success_callback": on_success_callback, "working_dir": str(working_dir), "container_uid": container_uid,
"on_success_callback": on_success_callback, "working_dir": str(working_dir), "container_uid": container_uid, "use_uv_pip_install": use_uv_pip_install,
}

kwargs = {k: v for k, v in kwargs.items() if v is not None}
Expand Down
54 changes: 54 additions & 0 deletions tests-integration/composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
startup_timeout_seconds=60,
)

py_op_uv = python_operator(
dag=dag,
name="python-op-uv",
script_path="tests-integration/notebooks/script.py",
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

nb_op = notebook_operator(
dag=dag,
name="nb-op",
Expand All @@ -23,6 +33,16 @@
startup_timeout_seconds=60,
)

nb_op_uv = notebook_operator(
dag=dag,
name="nb-op-uv",
nb_path="tests-integration/notebooks/mynb.ipynb",
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

quarto_op = quarto_operator(
dag=dag,
name="quarto-op",
Expand All @@ -37,6 +57,21 @@
startup_timeout_seconds=60,
)

quarto_op_uv = quarto_operator(
dag=dag,
name="quarto-op-uv",
quarto={
"path": "tests-integration/notebooks/quarto.ipynb",
"env": "dev",
"id": "bf48d8a4-05ca-47a5-a360-bc24171baf62",
"token": Variable.get("quarto_token"),
},
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

quarto_book_op = quarto_operator(
dag=dag,
name="quarto-book-op",
Expand All @@ -51,7 +86,26 @@
startup_timeout_seconds=60,
)

quarto_book_op_uv = quarto_operator(
dag=dag,
name="quarto-book-op-uv",
quarto={
"folder": "tests-integration/notebooks/quartobook",
"env": "dev",
"id": "757da08e-031e-4fac-a5f0-fffe6d2d96b6",
"token": Variable.get("quarto_token"),
},
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

py_op
py_op_uv
nb_op
nb_op_uv
quarto_op
quarto_op_uv
quarto_book_op
quarto_book_op_uv
57 changes: 57 additions & 0 deletions tests-integration/knada.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
retries=0,
startup_timeout_seconds=60,
)
py_op_uv = python_operator(
dag=dag,
name="python-op-uv",
repo="navikt/dataverk-airflow",
script_path="tests-integration/notebooks/script.py",
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

nb_op = notebook_operator(
dag=dag,
Expand All @@ -25,6 +35,17 @@
startup_timeout_seconds=60,
)

nb_op_uv = notebook_operator(
dag=dag,
name="nb-op-uv",
repo="navikt/dataverk-airflow",
nb_path="tests-integration/notebooks/mynb.ipynb",
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

quarto_op = quarto_operator(
dag=dag,
name="quarto-op",
Expand All @@ -40,6 +61,22 @@
startup_timeout_seconds=60,
)

quarto_op_uv = quarto_operator(
dag=dag,
name="quarto-op-uv",
repo="navikt/dataverk-airflow",
quarto={
"path": "tests-integration/notebooks/quarto.ipynb",
"env": "dev",
"id": "bf48d8a4-05ca-47a5-a360-bc24171baf62",
"token": Variable.get("quarto_token"),
},
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

quarto_book_op = quarto_operator(
dag=dag,
name="quarto-book-op",
Expand All @@ -55,7 +92,27 @@
startup_timeout_seconds=60,
)

quarto_book_op_uv = quarto_operator(
dag=dag,
name="quarto-book-op-uv",
repo="navikt/dataverk-airflow",
quarto={
"folder": "tests-integration/notebooks/quartobook",
"env": "dev",
"id": "757da08e-031e-4fac-a5f0-fffe6d2d96b6",
"token": Variable.get("quarto_token"),
},
requirements_path="tests-integration/notebooks/requirements.txt",
retries=0,
startup_timeout_seconds=60,
use_uv_pip_install=True,
)

py_op
py_op_uv
nb_op
nb_op_uv
quarto_op
quarto_op_uv
quarto_book_op
quarto_book_op_uv
9 changes: 9 additions & 0 deletions tests/test_kubernetes_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,12 @@ def test_that_dependency_install_is_prepended_to_container_cmds(self, dag):

assert container.arguments == [
"pip install -r /workspace/requirements.txt --user --no-cache-dir && python script.py"]

def test_that_uv_is_used_for_pip_install(self, dag):
container = kubernetes_operator(dag, "name", "repo", "image",
cmds=["python script.py"],
requirements_path="requirements.txt",
use_uv_pip_install=True)

assert container.arguments == [
f"uv venv .local && . .local/bin/activate && uv pip install -r /workspace/requirements.txt --no-cache-dir && python script.py"]

0 comments on commit 33fe61c

Please sign in to comment.