diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 469f45c7ce14c..a9ffa69a38348 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -856,13 +856,13 @@ arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cg cncf.kubernetes, cohere, common.io, common.sql, crypto, databricks, datadog, dbt.cloud, deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, -google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, -ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, -mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, -pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, -webhdfs, winrm, yandex, zendesk +google_auth, graphviz, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, +kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +mssql, mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, +pagerduty, pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, +redis, s3, s3fs, salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, +snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, +weaviate, webhdfs, winrm, yandex, zendesk .. END EXTRAS HERE Provider packages diff --git a/Dockerfile b/Dockerfile index c1ed35a61ebd8..5468bcff35854 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,7 +35,7 @@ # much smaller. # # Use the same builder frontend version for everyone -ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,common.io,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" +ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,common.io,docker,elasticsearch,ftp,google,google_auth,graphviz,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" ARG ADDITIONAL_AIRFLOW_EXTRAS="" ARG ADDITIONAL_PYTHON_DEPS="" diff --git a/INSTALL b/INSTALL index 4b6da57ae840f..e0778619ddf61 100644 --- a/INSTALL +++ b/INSTALL @@ -101,13 +101,13 @@ arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cg cncf.kubernetes, cohere, common.io, common.sql, crypto, databricks, datadog, dbt.cloud, deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, -google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, -ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, -mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, -pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, -webhdfs, winrm, yandex, zendesk +google_auth, graphviz, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, +kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +mssql, mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, +pagerduty, pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, +redis, s3, s3fs, salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, +snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, +weaviate, webhdfs, winrm, yandex, zendesk # END EXTRAS HERE # For installing Airflow in development environments - see CONTRIBUTING.rst diff --git a/airflow/utils/dot_renderer.py b/airflow/utils/dot_renderer.py index 41281fbbb1610..4d44d1e2ecf14 100644 --- a/airflow/utils/dot_renderer.py +++ b/airflow/utils/dot_renderer.py @@ -19,9 +19,14 @@ """Renderer DAG (tasks and dependencies) to the graphviz object.""" from __future__ import annotations +import warnings from typing import TYPE_CHECKING, Any -import graphviz +try: + import graphviz +except ImportError: + warnings.warn("Could not import graphviz. Rendering graph to the graphical format will not be possible.") + graphviz = None from airflow.exceptions import AirflowException from airflow.models.baseoperator import BaseOperator @@ -151,6 +156,10 @@ def render_dag_dependencies(deps: dict[str, list[DagDependency]]) -> graphviz.Di :param deps: List of DAG dependencies :return: Graphviz object """ + if not graphviz: + raise AirflowException( + "Could not import graphviz. Install the graphviz python package to fix this error." + ) dot = graphviz.Digraph(graph_attr={"rankdir": "LR"}) for dag, dependencies in deps.items(): @@ -179,6 +188,10 @@ def render_dag(dag: DAG, tis: list[TaskInstance] | None = None) -> graphviz.Digr :param tis: List of task instances :return: Graphviz object """ + if not graphviz: + raise AirflowException( + "Could not import graphviz. Install the graphviz python package to fix this error." + ) dot = graphviz.Digraph( dag.dag_id, graph_attr={ diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 4f08a42fd44df..5de443d2ec862 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -432,6 +432,7 @@ def get_airflow_extras(): "ftp", "google", "google_auth", + "graphviz", "grpc", "hashicorp", "http", diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index 082595312edf3..5bab6fd7d9ce9 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -52,6 +52,8 @@ python dependencies for the provided package. +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | google_auth | ``pip install 'apache-airflow[google_auth]'`` | Google auth backend | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ +| graphviz | ``pip install 'apache-airflow[graphviz]'`` | Graphviz renderer for converting DAG to graphical output | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | kerberos | ``pip install 'apache-airflow[kerberos]'`` | Kerberos integration for Kerberized services (Hadoop, Presto, Trino) | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | ldap | ``pip install 'apache-airflow[ldap]'`` | LDAP authentication for users | diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst index a07760558eed2..73c30a3892863 100644 --- a/docs/docker-stack/build-arg-ref.rst +++ b/docs/docker-stack/build-arg-ref.rst @@ -91,6 +91,7 @@ List of default extras in the production Dockerfile: * ftp * google * google_auth +* graphviz * grpc * hashicorp * http diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index c1fe295997040..191c787751028 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -687,6 +687,7 @@ googleapiclient GoogleDisplayVideo gpu gpus +graphviz greenlet Groupalia groupId diff --git a/images/breeze/output_prod-image_build.txt b/images/breeze/output_prod-image_build.txt index b5499f9c685f6..ceeb24a418b0b 100644 --- a/images/breeze/output_prod-image_build.txt +++ b/images/breeze/output_prod-image_build.txt @@ -1 +1 @@ -aa383b195f2991035d5333a6f37bebaa +f7a753d66923772bfb6250d5b87d1f51 diff --git a/newsfragments/36647.significant.rst b/newsfragments/36647.significant.rst new file mode 100644 index 0000000000000..dc3f0faad8a26 --- /dev/null +++ b/newsfragments/36647.significant.rst @@ -0,0 +1,23 @@ +Graphviz dependency is now an optional one, not required one. + +The ``graphviz`` dependency has been problematic as Airflow required dependency - especially for +ARM-based installations. Graphviz packages require binary graphviz libraries - which is already a +limitation, but they also require to install graphviz Python bindings to be build and installed. +This does not work for older Linux installation but - more importantly - when you try to install +Graphviz libraries for Python 3.8, 3.9 for ARM M1 MacBooks, the packages fail to install because +Python bindings compilation for M1 can only work for Python 3.10+. + +This is not a breaking change technically - the CLIs to render the DAGs is still there and IF you +already have graphviz installed, it will continue working as it did before. The only problem when it +does not work is where you do not have graphviz installed it will raise an error and inform that you need it. + +Graphviz will remain to be installed for most users: + +* the Airflow Image will still contain graphviz library, because + it is added there as extra +* when previous version of Airflow has been installed already, then + graphviz library is already installed there and Airflow will + continue working as it did + +The only change will be a new installation of new version of Airflow from the scratch, where graphviz will +need to be specified as extra or installed separately in order to enable DAG rendering option. diff --git a/setup.cfg b/setup.cfg index d10e0bb6d90fe..6d7bb58e6beec 100644 --- a/setup.cfg +++ b/setup.cfg @@ -107,7 +107,6 @@ install_requires = flask-wtf>=0.15 fsspec>=2023.10.0 google-re2>=1.0 - graphviz>=0.12 gunicorn>=20.1.0 httpx importlib_metadata>=1.7;python_version<"3.9" diff --git a/setup.py b/setup.py index 4b43a0add9934..a5f29d694ef81 100644 --- a/setup.py +++ b/setup.py @@ -318,12 +318,14 @@ def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_ve ] doc_gen = [ "eralchemy2", + "graphviz>=0.12", ] flask_appbuilder_oauth = [ "authlib>=1.0.0", # The version here should be upgraded at the same time as flask-appbuilder in setup.cfg "flask-appbuilder[oauth]==4.3.10", ] +graphviz = ["graphviz>=0.12"] kerberos = [ "pykerberos>=1.1.13", "requests_kerberos>=0.10.0", @@ -593,6 +595,7 @@ def get_unique_dependency_list(req_list_iterable: Iterable[list[str]]): "deprecated_api": deprecated_api, "github_enterprise": flask_appbuilder_oauth, "google_auth": flask_appbuilder_oauth, + "graphviz": graphviz, "kerberos": kerberos, "ldap": ldap, "leveldb": leveldb,