diff --git a/.dockerignore b/.dockerignore index 803b75ea80acf..5240d438d706a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -49,7 +49,6 @@ !.dockerignore !RELEASE_NOTES.rst !LICENSE -!MANIFEST.in !NOTICE !.github !empty @@ -69,8 +68,6 @@ !.bash_completion.d # Setup/version configuration -!setup.cfg -!setup.py !pyproject.toml !manifests !generated diff --git a/.github/actions/build-prod-images/action.yml b/.github/actions/build-prod-images/action.yml index 14a5aa8de90c6..b58aa3a2baecf 100644 --- a/.github/actions/build-prod-images/action.yml +++ b/.github/actions/build-prod-images/action.yml @@ -41,7 +41,7 @@ runs: shell: bash run: > breeze release-management prepare-provider-packages - --package-list-file ./airflow/providers/installed_providers.txt + --package-list-file ./dev/prod_image_installed_providers.txt --package-format wheel --version-suffix-for-pypi dev0 if: ${{ inputs.build-provider-packages == 'true' }} - name: "Prepare chicken-eggs provider packages" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f321ec26191d2..6ab4213153a19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -195,7 +195,7 @@ jobs: # Push early BuildX cache to GitHub Registry in Apache repository, This cache does not wait for all the # tests to complete - it is run very early in the build process for "main" merges in order to refresh - # cache using the current constraints. This will speed up cache refresh in cases when setup.py + # cache using the current constraints. This will speed up cache refresh in cases when pyproject.toml # changes or in case of Dockerfile changes. Failure in this step is not a problem (at most it will # delay cache refresh. It does not attempt to upgrade to newer dependencies. # We only push CI cache as PROD cache usually does not gain as much from fresh cache because @@ -489,7 +489,7 @@ jobs: # And when we prepare them from sources they will have apache-airflow>=X.Y.Z.dev0 shell: bash run: > - breeze release-management prepare-provider-packages + breeze release-management prepare-provider-packages --include-not-ready-providers --package-format wheel --version-suffix-for-pypi dev0 ${{ needs.build-info.outputs.chicken-egg-providers }} if: needs.build-info.outputs.chicken-egg-providers != '' @@ -681,9 +681,9 @@ jobs: id: cache-doc-inventories with: path: ./docs/_inventory_cache/ - key: docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + key: docs-inventory-${{ hashFiles('pyproject.toml;') }} restore-keys: | - docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + docs-inventory-${{ hashFiles('pyproject.toml;') }} docs-inventory- - name: "Build docs" run: > @@ -745,9 +745,9 @@ jobs: id: cache-doc-inventories with: path: ./docs/_inventory_cache/ - key: docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + key: docs-inventory-${{ hashFiles('pyproject.toml;') }} restore-keys: | - docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + docs-inventory-${{ hashFiles('pyproject.toml;') }} docs-inventory- - name: "Spellcheck docs" run: > @@ -776,11 +776,13 @@ jobs: run: rm -fv ./dist/* - name: "Prepare provider documentation" run: > - breeze release-management prepare-provider-documentation --non-interactive + breeze release-management prepare-provider-documentation --include-not-ready-providers + --non-interactive ${{ needs.build-info.outputs.affected-providers-list-as-string }} - name: "Prepare provider packages: wheel" run: > - breeze release-management prepare-provider-packages --version-suffix-for-pypi dev0 + breeze release-management prepare-provider-packages --include-not-ready-providers + --version-suffix-for-pypi dev0 --package-format wheel ${{ needs.build-info.outputs.affected-providers-list-as-string }} - name: "Prepare airflow package: wheel" run: breeze release-management prepare-airflow-package --version-suffix-for-pypi dev0 @@ -849,7 +851,7 @@ jobs: run: rm -fv ./dist/* - name: "Prepare provider packages: sdist" run: > - breeze release-management prepare-provider-packages + breeze release-management prepare-provider-packages --include-not-ready-providers --version-suffix-for-pypi dev0 --package-format sdist ${{ needs.build-info.outputs.affected-providers-list-as-string }} - name: "Prepare airflow package: sdist" @@ -916,7 +918,7 @@ jobs: run: rm -fv ./dist/* - name: "Prepare provider packages: wheel" run: > - breeze release-management prepare-provider-packages + breeze release-management prepare-provider-packages --include-not-ready-providers --package-format wheel ${{ needs.build-info.outputs.affected-providers-list-as-string }} - name: > Remove incompatible Airflow @@ -925,17 +927,9 @@ jobs: rm -vf ${{ matrix.remove-providers }} working-directory: ./dist if: matrix.remove-providers != '' - - name: "Checkout ${{matrix.airflow-version}} of Airflow" - uses: actions/checkout@v4 - with: - persist-credentials: false - ref: ${{matrix.airflow-version}} - path: old-airflow - - name: "Prepare airflow package: wheel" + - name: "Download airflow package: wheel" run: | - pip install pip==23.3.2 wheel==0.36.2 gitpython==3.1.40 - python setup.py egg_info --tag-build ".dev0" bdist_wheel -d ../dist - working-directory: ./old-airflow + pip download "apache-airflow==${{matrix.airflow-version}}" -d dist --no-deps - name: > Install and verify all provider packages and airflow on Airflow ${{matrix.airflow-version}}:Python ${{matrix.python-version}} @@ -2124,8 +2118,7 @@ jobs: path: ".build/.k8s-env" key: "\ k8s-env-${{steps.breeze.outputs.host-python-version}}-\ - ${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','setup.cfg',\ - 'setup.py','pyproject.toml','generated/provider_dependencies.json') }}" + ${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','pyproject.toml') }}" - name: Run complete K8S tests ${{needs.build-info.outputs.kubernetes-combos-list-as-string}} run: breeze k8s run-complete-tests --run-in-parallel --upgrade env: @@ -2256,7 +2249,7 @@ jobs: - name: "Prepare providers packages for PROD build" run: > breeze release-management prepare-provider-packages - --package-list-file ./airflow/providers/installed_providers.txt + --package-list-file ./dev/prod_image_installed_providers.txt --package-format wheel env: VERSION_SUFFIX_FOR_PYPI: "dev0" diff --git a/.gitignore b/.gitignore index 32e202a43ea15..cf1a07577849b 100644 --- a/.gitignore +++ b/.gitignore @@ -190,8 +190,6 @@ dmypy.json log.txt* # Provider-related ignores -/provider_packages/CHANGELOG.txt -/provider_packages/MANIFEST.in /airflow/providers/__init__.py # Docker context files @@ -219,7 +217,7 @@ pip-wheel-metadata /dev/Dockerfile.pmc # Generated UI licenses -licenses/LICENSES-ui.txt +3rd-party-licenses/LICENSES-ui.txt # Packaged breeze on Windows /breeze.exe @@ -240,3 +238,6 @@ licenses/LICENSES-ui.txt # Dask Executor tests generate this directory /tests/executors/dask-worker-space/ + +# airflow-build-dockerfile and correconding ignore file +airflow-build-dockerfile* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 51aba2e09fe43..f50076ba614b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -274,11 +274,6 @@ repos: - --ignore-words=docs/spelling_wordlist.txt - --skip=airflow/providers/*/*.rst,airflow/www/*.log,docs/*/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md - --exclude-file=.codespellignorelines - - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.15 - hooks: - - id: validate-pyproject - name: Validate pyproject.toml - repo: local # Note that this is the 2nd "local" repo group in the .pre-commit-config.yaml file. This is because # we try to minimise the number of passes that must happen in order to apply some of the changes @@ -333,13 +328,6 @@ repos: files: Dockerfile.*$ pass_filenames: true require_serial: true - - id: check-setup-order - name: Check order of dependencies in setup.cfg and setup.py - language: python - files: ^setup\.cfg$|^setup\.py$ - pass_filenames: false - entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py - additional_dependencies: ['rich>=12.4.4'] - id: check-airflow-k8s-not-used name: Check airflow.kubernetes imports are not used language: python @@ -363,14 +351,6 @@ repos: exclude: ^airflow/kubernetes/|^airflow/providers/ entry: ./scripts/ci/pre_commit/pre_commit_check_cncf_k8s_used_for_k8s_executor_only.py additional_dependencies: ['rich>=12.4.4'] - - id: check-extra-packages-references - name: Checks setup extra packages - description: Checks if all the libraries in setup.py are listed in extra-packages-ref.rst file - language: python - files: ^setup\.py$|^docs/apache-airflow/extra-packages-ref\.rst$|^airflow/providers/.*/provider\.yaml$ - pass_filenames: false - entry: ./scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py - additional_dependencies: ['rich>=12.4.4'] - id: check-airflow-provider-compatibility name: Check compatibility of Providers with Airflow entry: ./scripts/ci/pre_commit/pre_commit_check_provider_airflow_compatibility.py @@ -400,19 +380,34 @@ repos: files: ^airflow/providers/.*/hooks/.*\.py$ additional_dependencies: ['rich>=12.4.4', 'pyyaml', 'packaging'] - id: update-providers-dependencies - name: Update cross-dependencies for providers packages + name: Update dependencies for provider packages entry: ./scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py language: python - files: ^airflow/providers/.*\.py$|^airflow/providers/.*/provider\.yaml$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$ + files: ^airflow/providers/.*\.py$|^airflow/providers/.*/provider\.yaml$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$|^scripts/ci/pre_commit/pre_commit_update_providers_dependencies\.py$ pass_filenames: false - additional_dependencies: ['setuptools', 'rich>=12.4.4', 'pyyaml'] + additional_dependencies: ['setuptools', 'rich>=12.4.4', 'pyyaml', 'tomli'] + - id: check-extra-packages-references + name: Checks setup extra packages + description: Checks if all the extras defined in pyproject.toml are listed in extra-packages-ref.rst file + language: python + files: ^docs/apache-airflow/extra-packages-ref\.rst$|^pyproject.toml + pass_filenames: false + entry: ./scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py + additional_dependencies: ['rich>=12.4.4', 'tomli', 'tabulate'] + - id: check-pyproject-toml-order + name: Check order of dependencies in pyproject.toml + language: python + files: ^pyproject\.toml$ + pass_filenames: false + entry: ./scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py + additional_dependencies: ['rich>=12.4.4'] - id: update-extras name: Update extras in documentation entry: ./scripts/ci/pre_commit/pre_commit_insert_extras.py language: python files: ^setup\.py$|^CONTRIBUTING\.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$ pass_filenames: false - additional_dependencies: ['rich>=12.4.4'] + additional_dependencies: ['rich>=12.4.4', 'tomli'] - id: check-extras-order name: Check order of extras in Dockerfile entry: ./scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py @@ -712,7 +707,7 @@ repos: name: Sort alphabetically and uniquify installed_providers.txt entry: ./scripts/ci/pre_commit/pre_commit_sort_installed_providers.py language: python - files: ^\.pre-commit-config\.yaml$|^airflow/providers/installed_providers\.txt$ + files: ^\.pre-commit-config\.yaml$|^dev/.*_installed_providers\.txt$ pass_filenames: false require_serial: true - id: update-spelling-wordlist-to-be-sorted diff --git a/.rat-excludes b/.rat-excludes index d881787de9085..ab2296b487436 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -43,7 +43,8 @@ venv files airflow.iml .gitmodules -installed_providers.txt +prod_image_installed_providers.txt +airflow_pre_installed_providers.txt # Generated doc files .*html @@ -61,7 +62,7 @@ spelling_wordlist.txt # it is compatible according to http://www.apache.org/legal/resolved.html#category-a kerberos_auth.py airflow_api_auth_backend_kerberos_auth_py.html -licenses/* +3rd-party-licenses/* parallel.js underscore.js jquery.dataTables.min.js diff --git a/licenses/LICENSE-bootstrap.txt b/3rd-party-licenses/LICENSE-bootstrap.txt similarity index 100% rename from licenses/LICENSE-bootstrap.txt rename to 3rd-party-licenses/LICENSE-bootstrap.txt diff --git a/licenses/LICENSE-bootstrap3-typeahead.txt b/3rd-party-licenses/LICENSE-bootstrap3-typeahead.txt similarity index 100% rename from licenses/LICENSE-bootstrap3-typeahead.txt rename to 3rd-party-licenses/LICENSE-bootstrap3-typeahead.txt diff --git a/licenses/LICENSE-d3-shape.txt b/3rd-party-licenses/LICENSE-d3-shape.txt similarity index 100% rename from licenses/LICENSE-d3-shape.txt rename to 3rd-party-licenses/LICENSE-d3-shape.txt diff --git a/licenses/LICENSE-d3-tip.txt b/3rd-party-licenses/LICENSE-d3-tip.txt similarity index 100% rename from licenses/LICENSE-d3-tip.txt rename to 3rd-party-licenses/LICENSE-d3-tip.txt diff --git a/licenses/LICENSE-d3js.txt b/3rd-party-licenses/LICENSE-d3js.txt similarity index 100% rename from licenses/LICENSE-d3js.txt rename to 3rd-party-licenses/LICENSE-d3js.txt diff --git a/licenses/LICENSE-dagre-d3.txt b/3rd-party-licenses/LICENSE-dagre-d3.txt similarity index 100% rename from licenses/LICENSE-dagre-d3.txt rename to 3rd-party-licenses/LICENSE-dagre-d3.txt diff --git a/licenses/LICENSE-datatables.txt b/3rd-party-licenses/LICENSE-datatables.txt similarity index 100% rename from licenses/LICENSE-datatables.txt rename to 3rd-party-licenses/LICENSE-datatables.txt diff --git a/licenses/LICENSE-elasticmock.txt b/3rd-party-licenses/LICENSE-elasticmock.txt similarity index 100% rename from licenses/LICENSE-elasticmock.txt rename to 3rd-party-licenses/LICENSE-elasticmock.txt diff --git a/licenses/LICENSE-eonasdan-bootstrap-datetimepicker.txt b/3rd-party-licenses/LICENSE-eonasdan-bootstrap-datetimepicker.txt similarity index 100% rename from licenses/LICENSE-eonasdan-bootstrap-datetimepicker.txt rename to 3rd-party-licenses/LICENSE-eonasdan-bootstrap-datetimepicker.txt diff --git a/licenses/LICENSE-flask-kerberos.txt b/3rd-party-licenses/LICENSE-flask-kerberos.txt similarity index 100% rename from licenses/LICENSE-flask-kerberos.txt rename to 3rd-party-licenses/LICENSE-flask-kerberos.txt diff --git a/licenses/LICENSE-hue.txt b/3rd-party-licenses/LICENSE-hue.txt similarity index 100% rename from licenses/LICENSE-hue.txt rename to 3rd-party-licenses/LICENSE-hue.txt diff --git a/licenses/LICENSE-jqclock.txt b/3rd-party-licenses/LICENSE-jqclock.txt similarity index 100% rename from licenses/LICENSE-jqclock.txt rename to 3rd-party-licenses/LICENSE-jqclock.txt diff --git a/licenses/LICENSE-jquery.txt b/3rd-party-licenses/LICENSE-jquery.txt similarity index 100% rename from licenses/LICENSE-jquery.txt rename to 3rd-party-licenses/LICENSE-jquery.txt diff --git a/licenses/LICENSE-moment.txt b/3rd-party-licenses/LICENSE-moment.txt similarity index 100% rename from licenses/LICENSE-moment.txt rename to 3rd-party-licenses/LICENSE-moment.txt diff --git a/licenses/LICENSE-normalize.txt b/3rd-party-licenses/LICENSE-normalize.txt similarity index 100% rename from licenses/LICENSE-normalize.txt rename to 3rd-party-licenses/LICENSE-normalize.txt diff --git a/licenses/LICENSE-pytest-capture-warnings.txt b/3rd-party-licenses/LICENSE-pytest-capture-warnings.txt similarity index 100% rename from licenses/LICENSE-pytest-capture-warnings.txt rename to 3rd-party-licenses/LICENSE-pytest-capture-warnings.txt diff --git a/licenses/LICENSE-unicodecsv.txt b/3rd-party-licenses/LICENSE-unicodecsv.txt similarity index 100% rename from licenses/LICENSE-unicodecsv.txt rename to 3rd-party-licenses/LICENSE-unicodecsv.txt diff --git a/BREEZE.rst b/BREEZE.rst index 85164fb456cd6..61b42c510f1e9 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -1569,7 +1569,7 @@ The CI image is built automatically as needed, however it can be rebuilt manuall Building the image first time pulls a pre-built version of images from the Docker Hub, which may take some time. But for subsequent source code changes, no wait time is expected. -However, changes to sensitive files like ``setup.py`` or ``Dockerfile.ci`` will trigger a rebuild +However, changes to sensitive files like ``pyproject.toml`` or ``Dockerfile.ci`` will trigger a rebuild that may take more time though it is highly optimized to only rebuild what is needed. Breeze has built in mechanism to check if your local image has not diverged too much from the @@ -2299,7 +2299,7 @@ These are all available flags of ``release-management add-back-references`` comm Generating constraints """""""""""""""""""""" -Whenever setup.py gets modified, the CI main job will re-generate constraint files. Those constraint +Whenever ``pyproject.toml`` gets modified, the CI main job will re-generate constraint files. Those constraint files are stored in separated orphan branches: ``constraints-main``, ``constraints-2-0``. Those are constraint files as described in detail in the @@ -2341,14 +2341,14 @@ These are all available flags of ``generate-constraints`` command: :width: 100% :alt: Breeze generate-constraints -In case someone modifies setup.py, the scheduled CI Tests automatically upgrades and +In case someone modifies ``pyproject.toml``, the scheduled CI Tests automatically upgrades and pushes changes to the constraint files, however you can also perform test run of this locally using the procedure described in the `Manually generating image cache and constraints `_ which utilises multiple processors on your local machine to generate such constraints faster. -This bumps the constraint files to latest versions and stores hash of setup.py. The generated constraint -and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff +This bumps the constraint files to latest versions and stores hash of ``pyproject.toml``. The generated constraint +and ``pyproject.toml`` hash files are stored in the ``files`` folder and while generating the constraints diff of changes vs the previous constraint files is printed. Updating constraints @@ -2697,18 +2697,18 @@ disappear when you exit Breeze shell. When you want to add dependencies permanently, then it depends what kind of dependency you add. -If you want to add core dependency that should always be installed - you need to add it to ``setup.cfg`` -to ``install_requires`` section. If you want to add it to one of the optional core extras, you should -add it in the extra definition in ``setup.py`` (you need to find out where it is defined). If you want -to add it to one of the providers, you need to add it to the ``provider.yaml`` file in the provider +If you want to add core dependency that should always be installed - you need to add it to ``pyproject.toml`` +to ``dependencies`` section. If you want to add it to one of the optional core extras, you should +add it in the extra definition in ``pyproject.toml`` (you need to find out where it is defined). +If you want to add it to one of the providers, you need to add it to the ``provider.yaml`` file in the provider directory - but remember that this should be followed by running pre-commit that will automatically update -the ``generated/provider_dependencies.json`` directory with the new dependencies: +the ``pyproject.toml`` with the new dependencies as the ``provider.yaml`` files are not used directly, they +are used to update ``pyproject.toml`` file: .. code-block:: bash pre-commit run update-providers-dependencies --all-files - You can also run the pre-commit by ``breeze static-checks --type update-providers-dependencies --all-files`` command - which provides autocomplete. diff --git a/CI.rst b/CI.rst index e8d7200027c1b..af37ff2fd9102 100644 --- a/CI.rst +++ b/CI.rst @@ -604,7 +604,7 @@ those via corresponding command line flags passed to ``breeze shell`` command. | ``UPGRADE_TO_NEWER_DEPENDENCIES`` | false | false | false\* | Determines whether the build should | | | | | | attempt to upgrade Python base image and all | | | | | | PIP dependencies to latest ones matching | -| | | | | ``setup.py`` limits. This tries to replicate | +| | | | | ``pyproject.toml`` limits. Tries to replicate | | | | | | the situation of "fresh" user who just installs | | | | | | airflow and uses latest version of matching | | | | | | dependencies. By default we are using a | @@ -625,7 +625,7 @@ those via corresponding command line flags passed to ``breeze shell`` command. | | | | | | | | | | | Setting the value to random value is best way | | | | | | to assure that constraints are upgraded even if | -| | | | | there is no change to setup.py | +| | | | | there is no change to ``pyproject.toml`` | | | | | | | | | | | | This way our constraints are automatically | | | | | | tested and updated whenever new versions | diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index ba3ff9ec98f9b..a7bf23effe85e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -660,29 +660,66 @@ Extras ------ There are a number of extras that can be specified when installing Airflow. Those -extras can be specified after the usual pip install - for example -``pip install -e .[ssh]``. For development purpose there is a ``devel`` extra that -installs all development dependencies. There is also ``devel_ci`` that installs -all dependencies needed in the CI environment. +extras can be specified after the usual pip install - for example ``pip install -e.[ssh]`` for editable +installation. Note that there are two kinds of those extras - ``regular`` extras (used when you install +airflow as a user, but in ``editable`` mode you can also install ``devel`` extras that are necessary if +you want to run airflow locally for testing and ``doc`` extras that install tools needed to build +the documentation. This is the full list of those extras: - .. START EXTRAS HERE -aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, -apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, -apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.webhdfs, apprise, -arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, -cncf.kubernetes, cohere, common.io, common.sql, crypto, databricks, datadog, dbt.cloud, -deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, -druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, -google_auth, graphviz, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, -kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, -mssql, mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, -pagerduty, pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, -redis, s3, s3fs, salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, -snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, -weaviate, webhdfs, winrm, yandex, zendesk - .. END EXTRAS HERE +Devel extras +............. + +The ``devel`` extras are not available in the released packages. They are only available when you install +Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing, also +some providers have their own development-related extras tbat allow to install tools necessary to run tests, +where the tools are specific for the provider. + + + .. START DEVEL EXTRAS HERE +devel, devel-all, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel- +hadoop, devel-mypy, devel-sentry, devel-static-checks, devel-tests + .. END DEVEL EXTRAS HERE + +Doc extras +........... + +The ``doc`` extras are not available in the released packages. They are only available when you install +Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +Airflow. They provide tools needed when you want to build Airflow documentation (note that you also need +``devel`` extras installed for airflow and providers in order to build documentation for airflow and +provider packages respectively). The ``doc`` package is enough to build regular documentation, where +``doc_gen`` is needed to generate ER diagram we have describing our database. + + .. START DOC EXTRAS HERE +doc, doc-gen + .. END DOC EXTRAS HERE + + +Regular extras +.............. + +Those extras are available as regular Airflow extras and are targeted to be used by Airflow users and +contributors to select features of Airflow they want to use They might install additional providers or +just install dependencies that are necessary to enable the feature. + + .. START REGULAR EXTRAS HERE +aiobotocore, airbyte, alibaba, all, all-core, all-dbs, amazon, apache-atlas, apache-beam, apache- +cassandra, apache-drill, apache-druid, apache-flink, apache-hdfs, apache-hive, apache-impala, +apache-kafka, apache-kylin, apache-livy, apache-pig, apache-pinot, apache-spark, apache-webhdfs, +apprise, arangodb, asana, async, atlas, atlassian-jira, aws, azure, cassandra, celery, cgroups, +cloudant, cncf-kubernetes, cohere, common-io, common-sql, crypto, databricks, datadog, dbt-cloud, +deprecated-api, dingding, discord, docker, druid, elasticsearch, exasol, facebook, ftp, gcp, +gcp_api, github, github-enterprise, google, google-auth, graphviz, grpc, hashicorp, hdfs, hive, +http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft-azure, +microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, odbc, openai, +openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, +pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, salesforce, samba, saml, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk + .. END REGULAR EXTRAS HERE Provider packages ----------------- @@ -704,29 +741,25 @@ of ``airflow\providers``. This file contains: * list of integrations, operators, hooks, sensors, transfers provided by the provider (useful for documentation generation) * list of connection types, extra-links, secret backends, auth backends, and logging handlers (useful to both register them as they are needed by Airflow and to include them in documentation automatically). +* and more ... If you want to add dependencies to the provider, you should add them to the corresponding ``provider.yaml`` and Airflow pre-commits and package generation commands will use them when preparing package information. In Airflow 1.10 all those providers were installed together within one single package and when you installed airflow locally, from sources, they were also installed. In Airflow 2.0, providers are separated out, -and not packaged together with the core, unless you set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment -variable to ``true``. +and not packaged together with the core when you build "apache-airflow" package, however when you install +airflow project locally with ``pip install -e ".[devel]"`` they are available on the same +environment as Airflow. -In Breeze - which is a development environment, ``INSTALL_PROVIDERS_FROM_SOURCES`` variable is set to true, -but you can add ``--install-providers-from-sources=false`` flag to Breeze to install providers from PyPI instead of source files when -building the images. - -One watch-out - providers are still always installed (or rather available) if you install airflow from -sources using ``-e`` (or ``--editable``) flag. In such case airflow is read directly from the sources -without copying airflow packages to the usual installation location, and since 'providers' folder is -in this airflow folder - the providers package is importable. +You should only update dependencies for the provider in the corresponding ``provider.yaml`` which is the +source of truth for all information about the provider. Some of the packages have cross-dependencies with other providers packages. This typically happens for transfer operators where operators use hooks from the other providers in case they are transferring -data between the providers. The list of dependencies is maintained (automatically with pre-commits) -in the ``generated/provider_dependencies.json``. Pre-commits are also used to generate dependencies. -The dependency list is automatically used during PyPI packages generation. +data between the providers. The list of dependencies is maintained (automatically with the +``update-providers-dependencies`` pre-commit) in the ``generated/provider_dependencies.json``. +Same pre-commit also updates generate dependencies in ``pyproject.toml``. Cross-dependencies between provider packages are converted into extras - if you need functionality from the other provider package you can install it adding [extra] after the @@ -735,8 +768,9 @@ the other provider package you can install it adding [extra] after the transfer operators from Amazon ECS. If you add a new dependency between different providers packages, it will be detected automatically during -and pre-commit will generate new entry in ``generated/provider_dependencies.json`` so that -the package extra dependencies are properly handled when package is installed. +and pre-commit will generate new entry in ``generated/provider_dependencies.json`` and update +``pyproject.toml`` so that the package extra dependencies are properly handled when package +might be installed when breeze is restarted or by your IDE or by running ``pip install -e ".[devel]"``. Developing community managed provider packages ---------------------------------------------- @@ -746,27 +780,26 @@ They are part of the same repository as Apache Airflow (we use ``monorepo`` appr parts of the system are developed in the same repository but then they are packaged and released separately). All the community-managed providers are in 'airflow/providers' folder and they are all sub-packages of 'airflow.providers' package. All the providers are available as ``apache-airflow-providers-`` -packages. +packages when installed by users, but when you contribute to providers you can work on airflow main +and install provider dependencies via ``editable`` extras - without having to manage and install providers +separately, you can easily run tests for the providers and when you run airflow from the ``main`` +sources, all community providers are automatically available for you. The capabilities of the community-managed providers are the same as the third-party ones. When the providers are installed from PyPI, they provide the entry-point containing the metadata as described in the previous chapter. However when they are locally developed, together with Airflow, the mechanism of discovery of the providers is based on ``provider.yaml`` file that is placed in the top-folder of -the provider. Similarly as in case of the ``provider.yaml`` file is compliant with the -`json-schema specification `_. -Thanks to that mechanism, you can develop community managed providers in a seamless way directly from -Airflow sources, without preparing and releasing them as packages. This is achieved by: +the provider. The ``provider.yaml`` is the single source of truth for the provider metadata and it is +there where you should add and remove dependencies for providers (following by running +``update-providers-dependencies`` pre-commit to synchronize the dependencies with ``pyproject.toml`` +of Airflow). -* When Airflow is installed locally in editable mode (``pip install -e``) the provider packages installed - from PyPI are uninstalled and the provider discovery mechanism finds the providers in the Airflow - sources by searching for provider.yaml files. +The ``provider.yaml`` file is compliant with the schema that is available in +`json-schema specification `_. -* When you want to install Airflow from sources you can set ``INSTALL_PROVIDERS_FROM_SOURCES`` variable - to ``true`` and then the providers will not be installed from PyPI packages, but they will be installed - from local sources as part of the ``apache-airflow`` package, but additionally the ``provider.yaml`` files - are copied together with the sources, so that capabilities and names of the providers can be discovered. - This mode is especially useful when you are developing a new provider, that cannot be installed from - PyPI and you want to check if it installs cleanly. +Thanks to that mechanism, you can develop community managed providers in a seamless way directly from +Airflow sources, without preparing and releasing them as packages separately, which would be rather +complicated. Regardless if you plan to contribute your provider, when you are developing your own, custom providers, you can use the above functionality to make your development easier. You can add your provider @@ -774,6 +807,7 @@ as a sub-folder of the ``airflow.providers`` package, add the ``provider.yaml`` in development mode - then capabilities of your provider will be discovered by airflow and you will see the provider among other providers in ``airflow providers`` command output. + Documentation for the community managed providers ------------------------------------------------- @@ -799,12 +833,13 @@ You can see for example ``google`` provider which has very comprehensive documen * `Documentation `_ * `Example DAGs `_ -Part of the documentation are example dags. We are using the example dags for various purposes in -providers: +Part of the documentation are example dags (placed in the ``tests/system`` folder). The reason why +they are in ``tests/system`` is because we are using the example dags for various purposes: * showing real examples of how your provider classes (Operators/Sensors/Transfers) can be used * snippets of the examples are embedded in the documentation via ``exampleinclude::`` directive -* examples are executable as system tests +* examples are executable as system tests and some of our stakeholders run them regularly to + check if ``system`` level instagration is still working, before releasing a new version of the provider. Testing the community managed providers --------------------------------------- @@ -841,8 +876,7 @@ be open to allow several different libraries with the same requirements to be in The problem is that Apache Airflow is a bit of both - application to install and library to be used when you are developing your own operators and DAGs. -This - seemingly unsolvable - puzzle is solved by having pinned constraints files. Those are available -as of airflow 1.10.10 and further improved with 1.10.12 (moved to separate orphan branches) +This - seemingly unsolvable - puzzle is solved by having pinned constraints files. Pinned constraint files ======================= @@ -906,7 +940,7 @@ requirements). .. code-block:: bash - pip install -e . \ + pip install -e ".[devel]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" @@ -931,7 +965,7 @@ If you want to update just airflow dependencies, without paying attention to pro The ``constraints-.txt`` and ``constraints-no-providers-.txt`` -will be automatically regenerated by CI job every time after the ``setup.py`` is updated and pushed +will be automatically regenerated by CI job every time after the ``pyproject.toml`` is updated and pushed if the tests are successful. diff --git a/CONTRIBUTORS_QUICK_START.rst b/CONTRIBUTORS_QUICK_START.rst index e79c9cbe0232e..ffe30e753dd91 100644 --- a/CONTRIBUTORS_QUICK_START.rst +++ b/CONTRIBUTORS_QUICK_START.rst @@ -151,14 +151,13 @@ Pyenv and setting up virtual-env basic system-level dependencies on Debian/Ubuntu-like system. You will have to adapt it to install similar packages if your operating system is MacOS or another flavour of Linux - .. code-block:: bash sudo apt install openssl sqlite default-libmysqlclient-dev libmysqlclient-dev postgresql If you want to install all airflow providers, more system dependencies might be needed. For example on Debian/Ubuntu -like system, this command will install all necessary dependencies that should be installed when you use ``devel_all`` -extra while installing airflow. +like system, this command will install all necessary dependencies that should be installed when you use +``devel-all`` extra while installing airflow. .. code-block:: bash diff --git a/Dockerfile b/Dockerfile index e493d99ec0e58..47c904e56679f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,7 +35,7 @@ # much smaller. # # Use the same builder frontend version for everyone -ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,common.io,docker,elasticsearch,ftp,google,google_auth,graphviz,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" +ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf-kubernetes,common-io,docker,elasticsearch,ftp,google,google-auth,graphviz,grpc,hashicorp,http,ldap,microsoft-azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" ARG ADDITIONAL_AIRFLOW_EXTRAS="" ARG ADDITIONAL_PYTHON_DEPS="" @@ -760,10 +760,11 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then - # Remove airflow and reinstall it using editable flag + # Remove airflow and all providers and reinstall it using editable flag # We can only do it when we install airflow from sources set -x - pip uninstall apache-airflow --yes + pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true + pip uninstall apache-airflow --yes 2>/dev/null || true pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" @@ -785,7 +786,7 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py + # then upgrade if needed without using constraints to account for new limits in pyproject.toml pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ @@ -1288,17 +1289,13 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR # By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up -# builds in case setup.py/setup.cfg changed. This is pure optimisation of CI/Breeze builds. +# builds in case pyproject.toml changed. This is pure optimisation of CI/Breeze builds. ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" # This is airflow version that is put in the label of the image build ARG AIRFLOW_VERSION # By default latest released version of airflow is installed (when empty) but this value can be overridden # and we can install version according to specification (For example ==2.0.2 or <3.0.0). ARG AIRFLOW_VERSION_SPECIFICATION -# By default we install providers from PyPI but in case of Breeze build we want to install providers -# from local sources without the need of preparing provider packages upfront. This value is -# automatically overridden by Breeze scripts. -ARG INSTALL_PROVIDERS_FROM_SOURCES="false" # Determines the way airflow is installed. By default we install airflow from PyPI `apache-airflow` package # But it also can be `.` from local installation or GitHub URL pointing to specific branch or tag # Of Airflow. Note That for local source installation you need to have local sources of @@ -1327,7 +1324,6 @@ ARG ADDITIONAL_PIP_INSTALL_FLAGS="" ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ - INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \ AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \ @@ -1372,8 +1368,7 @@ ARG USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES="false" # In case of Production build image segment we want to pre-install main version of airflow # dependencies from GitHub so that we do not have to always reinstall it from the scratch. -# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") -# are uninstalled, only dependencies remain +# The Airflow and providers are uninstalled, only dependencies remain # the cache is only used when "upgrade to newer dependencies" is not set to automatically # account for removed dependencies (we do not install them in the first place) and in case # INSTALL_PACKAGES_FROM_CONTEXT is not set (because then caching it from main makes no sense). diff --git a/Dockerfile.ci b/Dockerfile.ci index 3364ee234fe2c..0f10757de59ed 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -598,10 +598,11 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then - # Remove airflow and reinstall it using editable flag + # Remove airflow and all providers and reinstall it using editable flag # We can only do it when we install airflow from sources set -x - pip uninstall apache-airflow --yes + pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true + pip uninstall apache-airflow --yes 2>/dev/null || true pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" @@ -623,7 +624,7 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py + # then upgrade if needed without using constraints to account for new limits in pyproject.toml pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ @@ -899,7 +900,7 @@ function check_download_sqlalchemy() { if [[ ${DOWNGRADE_SQLALCHEMY=} != "true" ]]; then return fi - min_sqlalchemy_version=$(grep "sqlalchemy>=" setup.cfg | sed "s/.*>=\([0-9\.]*\).*/\1/") + min_sqlalchemy_version=$(grep "\"sqlalchemy>=" pyproject.toml | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs) echo echo "${COLOR_BLUE}Downgrading sqlalchemy to minimum supported version: ${min_sqlalchemy_version}${COLOR_RESET}" echo @@ -1057,8 +1058,6 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. ARG AIRFLOW_CI_BUILD_EPOCH="6" ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" -# By default in the image, we are installing all providers when installing from sources -ARG INSTALL_PROVIDERS_FROM_SOURCES="true" ARG AIRFLOW_PIP_VERSION=23.3.2 # Setup PIP # By default PIP install run without cache to make image smaller @@ -1088,7 +1087,6 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \ AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ - INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ # In the CI image we always: @@ -1116,11 +1114,12 @@ RUN echo "Airflow version: ${AIRFLOW_VERSION}" # force them on the main Airflow package. Currently we need no extra limits as PIP 23.1+ has much better # dependency resolution and we do not need to limit the versions of the dependencies # -# Aiobotocore is limited for eager upgrade because it either causes a long backtracking or -# conflict when we do not limit it. It seems that `pip` has a hard time figuring the right -# combination of dependencies for aiobotocore, botocore, boto3 and s3fs together +# boto3 is limited to <1.34 because of aiobotocore that only works with 1.33 and we want to help +# `pip` to limit the versions it checks and limit backtracking, by explicitly specifying these limits +# when performing eager upgrade of dependencies - this way it won't even consider 1.34 versions of boto +# We should update it every time a new version of aiobotocore is released supporting 1.34 # -ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="aiobotocore>=2.5.4" +ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="boto3>=1.33,<1.34" ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" ARG VERSION_SUFFIX_FOR_PYPI="" @@ -1138,8 +1137,7 @@ COPY --from=scripts install_pip_version.sh install_airflow_dependencies_from_bra # In case of CI builds we want to pre-install main version of airflow dependencies so that # We do not have to always reinstall it from the scratch. # And is automatically reinstalled from the scratch every time patch release of python gets released -# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") -# are uninstalled, only dependencies remain. +# The Airflow and providers are uninstalled, only dependencies remain. # the cache is only used when "upgrade to newer dependencies" is not set to automatically # account for removed dependencies (we do not install them in the first place) RUN bash /scripts/docker/install_pip_version.sh; \ @@ -1158,21 +1156,22 @@ COPY --from=scripts install_pipx_tools.sh /scripts/docker/ RUN bash /scripts/docker/install_pipx_tools.sh # Airflow sources change frequently but dependency configuration won't change that often -# We copy setup.py and other files needed to perform setup of dependencies -# So in case setup.py changes we can install latest dependencies required. -COPY setup.py ${AIRFLOW_SOURCES}/setup.py -COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg +# We copy pyproject.toml and other files needed to perform setup of dependencies +# So in case pyproject.toml changes we can install latest dependencies required. +COPY pyproject.toml ${AIRFLOW_SOURCES}/pyproject.toml COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ -COPY generated/provider_dependencies.json ${AIRFLOW_SOURCES}/generated/ +COPY generated/* ${AIRFLOW_SOURCES}/generated/ COPY constraints/* ${AIRFLOW_SOURCES}/constraints/ - +COPY LICENSE ${AIRFLOW_SOURCES}/LICENSE +COPY dev/airflow_pre_installed_providers.txt ${AIRFLOW_SOURCES}/dev/airflow_pre_installed_providers.txt +COPY dev/hatch_build.py ${AIRFLOW_SOURCES}/dev/hatch_build.py COPY --from=scripts install_airflow.sh /scripts/docker/ -# The goal of this line is to install the dependencies from the most current setup.py from sources +# The goal of this line is to install the dependencies from the most current pyproject.toml from sources # This will be usually incremental small set of packages in CI optimized build, so it will be very fast # In non-CI optimized build this will install all dependencies before installing sources. -# Usually we will install versions based on the dependencies in setup.py and upgraded only if needed. -# But in cron job we will install latest versions matching setup.py to see if there is no breaking change +# Usually we will install versions based on the dependencies in pyproject.toml and upgraded only if needed. +# But in cron job we will install latest versions matching pyproject.toml to see if there is no breaking change # and push the constraints if everything is successful RUN bash /scripts/docker/install_airflow.sh diff --git a/IMAGES.rst b/IMAGES.rst index 9cb6d94bb3750..809d33a3855e3 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -54,7 +54,7 @@ CI image The CI image is used by `Breeze `_ as the shell image but it is also used during CI tests. The image is single segment image that contains Airflow installation with "all" dependencies installed. It is optimised for rebuild speed. It installs PIP dependencies from the current branch first - -so that any changes in ``setup.py`` do not trigger reinstalling of all dependencies. +so that any changes in ``pyproject.toml`` do not trigger reinstalling of all dependencies. There is a second step of installation that re-installs the dependencies from the latest sources so that we are sure that latest dependencies are installed. diff --git a/INSTALL b/INSTALL index 832fd8603d7d3..8fdd933e90082 100644 --- a/INSTALL +++ b/INSTALL @@ -1,118 +1,304 @@ # INSTALL / BUILD instructions for Apache Airflow -This is a generic installation method that requires a number of dependencies to be installed. +## Basic installation of Airflow from sources and development environment setup + +This is a generic installation method that requires minimum starndard tools to develop airflow and +test it in local virtual environment (using standard CPyhon installation and `pip`). Depending on your system you might need different prerequisites, but the following systems/prerequisites are known to work: -Linux (Debian Bullseye, Bookworm and Linux Mint Debbie): +Linux (Debian Bookworm): + + sudo apt install -y --no-install-recommends apt-transport-https apt-utils ca-certificates \ + curl dumb-init freetds-bin gosu krb5-user libgeos-dev \ + ldap-utils libsasl2-2 libsasl2-modules libxmlsec1 locales libffi8 libldap-2.5-0 libssl3 netcat-openbsd \ + lsb-release openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc + +You might need to install MariaDB development headers to build some of the dependencies + + sudo apt-get install libmariadb-dev libmariadbclient-dev + +MacOS (Mojave/Catalina) you might need to to install XCode command line tools and brew and those packages: + + brew install sqlite mysql postgresql + +## Downloading and installing Airflow from sources + +While you can get Airflow sources in various ways (including cloning https://github.com/apache/airflow/), the +canonical way to download it is to fetch the tarball published at https://downloads.apache.org where you can +also verify checksum, signatures of the downloaded file. You can then and un-tar the source move into the +directory that was un-tarred. + +When you download source packages from https://downloads.apache.org, you download sources of Airflow and +all providers separately, however when you clone the GitHub repository at https://github.com/apache/airflow/ +you get all sources in one place. This is the most convenient way to develop Airflow and Providers together. +otherwise you have to separately install Airflow and Providers from sources in the same environment, which +is not as convenient. + +## Creating virtualenv + +Airflow pulls in quite a lot of dependencies in order to connect to other services. You generally want to +test or run Airflow from a virtual env to make sure those dependencies are separated from your system +wide versions. Using system-installed Python installation is strongly discouraged as the versions of Python +shipped with operating system often have a number of limitations and are not up to date. It is recommended +to install Python using either https://www.python.org/downloads/ or other tools that use them. See later +for description of `Hatch` as one of the tools that is Airflow's tool of choice to build Airflow packages. + +Once you have a suitable Python version installed, you can create a virtualenv and activate it: + + python3 -m venv PATH_TO_YOUR_VENV + source PATH_TO_YOUR_VENV/bin/activate + +## Installing airflow locally + +Installing airflow locally can be done using pip - note that this will install "development" version of +Airflow, where all providers are installed from local sources (if they are available), not from `pypi`. +It will also not include pre-installed providers installed from PyPI. In case you install from sources of +just Airflow, you need to install separately each provider that you want to develop. In case you install +from GitHub repository, all the current providers are available after installing Airflow. + + pip install . + +If you develop Airflow and iterate on it you should install it in editable mode (with -e) flag and then +you do not need to re-install it after each change to sources. This is useful if you want to develop and +iterate on Airflow and Providers (together) if you install sources from cloned GitHub repository. + +Note that you might want to install `devel` extra when you install airflow for development in editable env +as this one contains minimum set of tools and dependencies that are needed to run unit tests. + + + pip install -e ".[devel]" + + +You can also install optional packages that are needed to run certain tests. In case of local installation +for example you can install all prerequisites for google provider, tests and +all hadoop providers with this command: + + pip install -e ".[google,devel-tests,devel-hadoop]" + + +or you can install all packages needed to run tests for core, providers and all extensions of airflow: + + pip install -e ".[devel-all]" + +You can see the list of all available extras below. + +# Using Hatch to manage your Python, virtualenvs and build packages + +Airflow uses [hatch](https://hatch.pypa.io/) as a build and development tool of choice. It is one of popular +build tools and environment managers for Python, maintained by the Python Packaging Authority. +It is an optional tool that is only really needed when you want to build packages from sources, but +it is also very convenient to manage your Python versions and virtualenvs. + +Airflow project contains some pre-defined virtualenv definitions in ``pyproject.toml`` that can be +easily used by hatch to create your local venvs. This is not necessary for you to develop and test +Airflow, but it is a convenient way to manage your local Python versions and virtualenvs. + +## Installing Hatch + +You can install hat using various other ways (including Gui installers). + +Example using `pipx`: + + pipx install hatch + +We recommend using `pipx` as you can manage installed Python apps easily and later use it +to upgrade `hatch` easily as needed with: -sudo apt install build-essential python3-dev libsqlite3-dev openssl \ - sqlite default-libmysqlclient-dev libmysqlclient-dev postgresql + pipx upgrade hatch -On Ubuntu 20.04 you may get an error of mariadb_config not found -and mysql_config not found. +## Using Hatch to manage your Python versions -Install MariaDB development headers: -sudo apt-get install libmariadb-dev libmariadbclient-dev +You can also use hatch to install and manage airflow virtualenvs and development +environments. For example, you can install Python 3.10 with this command: -MacOS (Mojave/Catalina): + hatch python install 3.10 -brew install sqlite mysql postgresql +or install all Python versions that are used in Airflow: -# [required] fetch the tarball and untar the source move into the directory that was untarred. + hatch python install all -# [optional] run Apache RAT (release audit tool) to validate license headers -# RAT docs here: https://creadur.apache.org/rat/. Requires Java and Apache Rat -java -jar apache-rat.jar -E ./.rat-excludes -d . +## Using Hatch to manage your virtualenvs -# [optional] Airflow pulls in quite a lot of dependencies in order -# to connect to other services. You might want to test or run Airflow -# from a virtual env to make sure those dependencies are separated -# from your system wide versions +Airflow has some pre-defined virtualenvs that you can use to develop and test airflow. +You can see the list of available envs with: -python3 -m venv PATH_TO_YOUR_VENV -source PATH_TO_YOUR_VENV/bin/activate + hatch show env -# [required] building and installing by pip (preferred) -pip install . +This is what it shows currently: -# or directly -python setup.py install +┏━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ Name ┃ Type ┃ Features ┃ Description ┃ +┡━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ default │ virtual │ devel │ Default environment with Python 3.8 for maximum compatibility │ +├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-38 │ virtual │ │ Environment with Python 3.8. No devel installed. │ +├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-39 │ virtual │ │ Environment with Python 3.9. No devel installed. │ +├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-310 │ virtual │ │ Environment with Python 3.10. No devel installed. │ +├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-311 │ virtual │ │ Environment with Python 3.11. No devel installed │ +└─────────────┴─────────┴──────────┴───────────────────────────────────────────────────────────────┘ -# You can also install recommended version of the dependencies by using -# constraint-python.txt files as constraint file. This is needed in case -# you have problems with installing the current requirements from PyPI. -# There are different constraint files for different python versions. For example" +The default env (if you have not used one explicitly) is `default` and it is a Python 3.8 +virtualenv for maximum compatibility with `devel` extra installed - this devel extra contains the minimum set +of dependencies and tools that should be used during unit testing of core Airflow and running all `airflow` +CLI commands - without support for providers or databases. -pip install . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" +The other environments are just bare-bones Python virtualenvs with Airflow core requirements only, +without any extras installed and without any tools. They are much faster to create than the default +environment, and you can manually install either appropriate extras or directly tools that you need for +testing or development. + hatch env create -By default `pip install` in Airflow 2.0 installs only the provider packages that are needed by the extras and -install them as packages from PyPI rather than from local sources: +You can create specific environment by using them in create command: -pip install .[google,amazon] \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" + hatch env create airflow-310 +You can install extras in the environment by running pip command: -You can upgrade just airflow, without paying attention to provider's dependencies by using 'constraints-no-providers' -constraint files. This allows you to keep installed provider packages. + hatch -e airflow-310 run -- pip install -e ".[devel,google]" -pip install . --upgrade \ +And you can enter the environment with running a shell of your choice (for example zsh) where you +can run any commands + + hatch -e airflow-310 shell + +Once you are in the environment (indicated usually by updated prompt), you can just install +extra dependencies you need: + + [~/airflow] [airflow-310] pip install -e ".[devel,google]" + +You can exit the environment by just exiting the shell. + +You can also see where hatch created the virtualenvs and use it in your IDE or activate it manually: + + hatch env find airflow-310 + +You will get path similar to: + + /Users/jarek/Library/Application Support/hatch/env/virtual/apache-airflow/TReRdyYt/apache-airflow + +Then you will find `python` binary and `activate` script in the `bin` sub-folder of this directory and +you can configure your IDE to use this python virtualenv if you want to use that environment in your IDE. + +You can also set default environment name by HATCH_ENV environment variable. + +You can clean the env by running: + + hatch env prune + +More information about hatch can be found in https://hatch.pypa.io/1.9/environment/ + +## Using Hatch to build your packages + +You can use hatch to build installable package from the airflow sources. Such package will +include all metadata that is configured in `pyproject.toml` and will be installable with pip. + +The packages will have pre-installed dependencies for providers that are always +installed when Airflow is installed from PyPI. By default both `wheel` and `sdist` packages are built. + + hatch build + +You can also build only `wheel` or `sdist` packages: + + hatch build -t wheel + hatch build -t sdist + +## Installing recommended version of dependencies + +Whatever virtualenv solution you use, when you want to make sure you are using the same +version of dependencies as in main, you can install recommended version of the dependencies by using +constraint-python.txt files as `constraint` file. This might be useful +to avoid "works-for-me" syndrome, where you use different version of dependencies than the ones +that are used in main, CI tests and by other contributors. + +There are different constraint files for different python versions. For example this command will install +all basic devel requirements and requirements of google provider as last successfully tested for Python 3.8: + + pip install -e ".[devel,google]"" \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" + +You can upgrade just airflow, without paying attention to provider's dependencies by using +the 'constraints-no-providers' constraint files. This allows you to keep installed provider dependencies +and install to latest supported ones by pure airflow core. + +pip install -e ".[devel]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" +## All airflow extras + +Airflow has a number of extras that you can install to get additional dependencies. They sometimes install +providers, sometimes enable other features where packages are not installed by default. + +You can read more about those extras in the extras reference: +https://airflow.apache.org/docs/apache-airflow/stable/extra-packages-ref.html + +The list of available extras is below. + +Regular extras that are available for users in the Airflow package. + +# START REGULAR EXTRAS HERE +aiobotocore, airbyte, alibaba, all, all-core, all-dbs, amazon, apache-atlas, apache-beam, apache- +cassandra, apache-drill, apache-druid, apache-flink, apache-hdfs, apache-hive, apache-impala, +apache-kafka, apache-kylin, apache-livy, apache-pig, apache-pinot, apache-spark, apache-webhdfs, +apprise, arangodb, asana, async, atlas, atlassian-jira, aws, azure, cassandra, celery, cgroups, +cloudant, cncf-kubernetes, cohere, common-io, common-sql, crypto, databricks, datadog, dbt-cloud, +deprecated-api, dingding, discord, docker, druid, elasticsearch, exasol, facebook, ftp, gcp, +gcp_api, github, github-enterprise, google, google-auth, graphviz, grpc, hashicorp, hdfs, hive, +http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft-azure, +microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, odbc, openai, +openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, +pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, salesforce, samba, saml, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk +# END REGULAR EXTRAS HERE + +Devel extras - used to install development-related tools. Only available during editable install. + +# START DEVEL EXTRAS HERE +devel, devel-all, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel- +hadoop, devel-mypy, devel-sentry, devel-static-checks, devel-tests +# END DEVEL EXTRAS HERE + +Doc extras - used to install dependencies that are needed to build documentation. Only available during +editable install. + +# START DOC EXTRAS HERE +doc, doc-gen +# END DOC EXTRAS HERE -You can also install airflow in "editable mode" (with -e) flag and then provider packages are -available directly from the sources (and the provider packages installed from PyPI are UNINSTALLED in -order to avoid having providers in two places. And `provider.yaml` files are used to discover capabilities -of the providers which are part of the airflow source code. +## Compiling front end assets -You can read more about `provider.yaml` and community-managed providers in -https://airflow.apache.org/docs/apache-airflow-providers/index.html for developing custom providers -and in ``CONTRIBUTING.rst`` for developing community maintained providers. +Sometimes you can see that front-end assets are missing and website looks broken. This is because +you need to compile front-end assets. This is done automatically when you create a virtualenv +with hatch, but if you want to do it manually, you can do it after installing node and yarn and running: -This is useful if you want to develop providers: + yarn install --frozen-lockfile + yarn run build -pip install -e . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" +Currently we are running yarn coming with note 18.6.0, but you should check the version in +our `.pre-commit-config.yaml` file (node version). -You can also skip installing provider packages from PyPI by setting INSTALL_PROVIDERS_FROM_SOURCE to "true". -In this case Airflow will be installed in non-editable mode with all providers installed from the sources. -Additionally `provider.yaml` files will also be copied to providers folders which will make the providers -discoverable by Airflow even if they are not installed from packages in this case. +Installing yarn is described in https://classic.yarnpkg.com/en/docs/install -INSTALL_PROVIDERS_FROM_SOURCES="true" pip install . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" +Also - in case you use `breeze` or have `pre-commit` installed you can build the assets with: -Airflow can be installed with extras to install some additional features (for example 'async' or 'doc' or -to install automatically providers and all dependencies needed by that provider: + pre-commit run --hook-stage manual compile-www-assets --all-files -pip install .[async,google,amazon] \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" +or -The list of available extras: + breeze compile-www-assets -# START EXTRAS HERE -aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, -apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, -apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.webhdfs, apprise, -arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, -cncf.kubernetes, cohere, common.io, common.sql, crypto, databricks, datadog, dbt.cloud, -deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, -druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, -google_auth, graphviz, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, -kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, -mssql, mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, -pagerduty, pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, -redis, s3, s3fs, salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, -snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, -weaviate, webhdfs, winrm, yandex, zendesk -# END EXTRAS HERE +Both commands will install node and yarn if needed to a dedicated pre-commit node environment and +then build the assets. -# For installing Airflow in development environments - see CONTRIBUTING.rst +Finally you can also clean and recompile assets with ``custom`` build target when running hatch build -# COMPILING FRONT-END ASSETS (in case you see "Please make sure to build the frontend in static/ directory and then restart the server") -# Optional : Installing yarn - https://classic.yarnpkg.com/en/docs/install + hatch build -t custom -t wheel -t sdist -python setup.py compile_assets +This will also update `git_version` file in airflow package that should contain the git commit hash of the +build. This is used to display the commit hash in the UI. diff --git a/LOCAL_VIRTUALENV.rst b/LOCAL_VIRTUALENV.rst index 753f3a9f29e0d..e7dc4d4ee423c 100644 --- a/LOCAL_VIRTUALENV.rst +++ b/LOCAL_VIRTUALENV.rst @@ -21,26 +21,29 @@ Local Virtual Environment (virtualenv) ====================================== -Use the local virtualenv development option in combination with the `Breeze -`_ development environment. This option helps -you benefit from the infrastructure provided -by your IDE (for example, IntelliJ PyCharm/IntelliJ Idea) and work in the -environment where all necessary dependencies and tests are available and set up -within Docker images. +The easiest wey to run tests for Airflow is to use local virtualenv. While Breeze is the recommended +way to run tests - because it provides a reproducible environment and is easy to set up, it is not +always the best option as you need to run your tests inside a docker container. This might make it +harder to debug the tests and to use your IDE to run them. -But you can also use the local virtualenv as a standalone development option if you -develop Airflow functionality that does not incur large external dependencies and -CI test coverage. +That's why we recommend using local virtualenv for development and testing. -These are examples of the development options available with the local virtualenv in your IDE: +The simplest way to install Airflow in local virtualenv is to use ``pip``: -* local debugging; -* Airflow source view; -* auto-completion; -* documentation support; -* unit tests. +.. code:: bash -This document describes minimum requirements and instructions for using a standalone version of the local virtualenv. + pip install -e ".[devel,]" # for example: pip install -e ".[devel,google,postgres]" + +This will install Airflow in 'editable' mode - where sources of Airflow are taken directly from the source +code rather than moved to the installation directory. You need to run this command in the virtualenv you +want to install Airflow in - and you need to have the virtualenv activated. + +While you can use any virtualenv manager, we recommend using `Hatch `__ +as your development environment front-end, and we already use Hatch backend ``hatchling`` for Airflow. + +Hatchling is automatically installed when you build Airflow but since airflow build system uses +``PEP`` compliant ``pyproject.toml`` file, you can use any front-end build system that supports +``PEP 517`` and ``PEP 518``. You can also use ``pip`` to install Airflow in editable mode. Prerequisites ============= @@ -88,7 +91,7 @@ Extra Packages You can also install extra packages (like ``[ssh]``, etc) via -``pip install -e [EXTRA1,EXTRA2 ...]``. However, some of them may +``pip install -e [devel,EXTRA1,EXTRA2 ...]``. However, some of them may have additional install and setup requirements for your local system. For example, if you have a trouble installing the mysql client on macOS and get @@ -108,129 +111,165 @@ You are STRONGLY encouraged to also install and use `pre-commit hooks `_. +The full list of extras is available in ``_ and can be easily retrieved using hatch via -Creating a Local virtualenv -=========================== -To use your IDE for Airflow development and testing, you need to configure a virtual -environment. Ideally you should set up virtualenv for all Python versions that Airflow -supports (3.8, 3.9, 3.10, 3.11). +Using Hatch +=========== -To create and initialize the local virtualenv: +Airflow uses [hatch](https://hatch.pypa.io/) as a build and development tool of choice. It is one of popular +build tools and environment managers for Python, maintained by the Python Packaging Authority. +It is an optional tool that is only really needed when you want to build packages from sources, but +it is also very convenient to manage your Python versions and virtualenvs. -1. Create an environment with one of the two options: +Airflow project contains some pre-defined virtualenv definitions in ``pyproject.toml`` that can be +easily used by hatch to create your local venvs. This is not necessary for you to develop and test +Airflow, but it is a convenient way to manage your local Python versions and virtualenvs. - - Option 1: consider using one of the following utilities to create virtual environments and easily switch between them with the ``workon`` command: +Installing Hatch +---------------- - - `pyenv `_ - - `pyenv-virtualenv `_ - - `virtualenvwrapper `_ +You can install hat using various other ways (including Gui installers). - ``mkvirtualenv --python=python`` +Example using ``pipx``: - - Option 2: create a local virtualenv with Conda +.. code:: bash - - install `miniconda3 `_ + pipx install hatch - .. code-block:: bash +We recommend using ``pipx`` as you can manage installed Python apps easily and later use it +to upgrade ``hatch`` easily as needed with: - conda create -n airflow python=3.8 # or 3.9, 3.10, 3.11 - conda activate airflow +.. code:: bash -2. Install Python PIP requirements: + pipx upgrade hatch -.. note:: +## Using Hatch to manage your Python versions - Only ``pip`` installation is currently officially supported. +You can also use hatch to install and manage airflow virtualenvs and development +environments. For example, you can install Python 3.10 with this command: - While they are some successes with using other tools like `poetry `_ or - `pip-tools `_, they do not share the same workflow as - ``pip`` - especially when it comes to constraint vs. requirements management. - Installing via ``Poetry`` or ``pip-tools`` is not currently supported. +.. code:: bash - There are known issues with ``bazel`` that might lead to circular dependencies when using it to install - Airflow. Please switch to ``pip`` if you encounter such problems. ``Bazel`` community works on fixing - the problem in `this PR `_ so it might be that - newer versions of ``bazel`` will handle it. + hatch python install 3.10 - If you wish to install airflow using those tools you should use the constraint files and convert - them to appropriate format and workflow that your tool requires. +or install all Python versions that are used in Airflow: +.. code:: bash - .. code-block:: bash + hatch python install all - pip install --upgrade -e ".[devel,]" # for example: pip install --upgrade -e ".[devel,google,postgres]" +Manage your virtualenvs with Hatch +---------------------------------- -In case you have problems with installing airflow because of some requirements are not installable, you can -try to install it with the set of working constraints (note that there are different constraint files -for different python versions). For development on current main source: +Airflow has some pre-defined virtualenvs that you can use to develop and test airflow. +You can see the list of available envs with: - .. code-block:: bash +.. code:: bash - # use the same version of python as you are working with, 3.8, 3.9, 3.10 or 3.11 - pip install -e ".[devel,]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" + hatch show env + +This is what it shows currently: + ++-------------+---------+----------+---------------------------------------------------------------+ +| Name | Type | Features | Description | ++=============+=========+==========+===============================================================+ +| default | virtual | devel | Default environment with Python 3.8 for maximum compatibility | ++-------------+---------+----------+---------------------------------------------------------------+ +| airflow-38 | virtual | devel | Environment with Python 3.8 | ++-------------+---------+----------+---------------------------------------------------------------+ +| airflow-39 | virtual | devel | Environment with Python 3.9 | ++-------------+---------+----------+---------------------------------------------------------------+ +| airflow-310 | virtual | devel | Environment with Python 3.10 | ++-------------+---------+----------+---------------------------------------------------------------+ +| airflow-311 | virtual | devel | Environment with Python 3.11 | ++-------------+---------+----------+---------------------------------------------------------------+ + +The default env (if you have not used one explicitly) is ``default`` and it is a Python 3.8 +virtualenv for maximum compatibility with ``devel`` extra installed - this devel extra contains the minimum set +of dependencies and tools that should be used during unit testing of core Airflow and running all ``airflow`` +CLI commands - without support for providers or databases. + +The other environments are just bare-bones Python virtualenvs with Airflow core requirements only, +without any extras installed and without any tools. They are much faster to create than the default +environment, and you can manually install either appropriate extras or directly tools that you need for +testing or development. -This will install Airflow in 'editable' mode - where sources of Airflow are taken directly from the source -code rather than moved to the installation directory. During the installation airflow will install - but then -automatically remove all provider packages installed from PyPI - instead it will automatically use the -provider packages available in your local sources. +.. code:: bash + + hatch env create + +You can create specific environment by using them in create command: + +.. code:: bash + + hatch env create airflow-310 + +You can install extras in the environment by running pip command: + +.. code:: bash + + hatch -e airflow-310 run -- pip install -e ".[devel,google]" + +And you can enter the environment with running a shell of your choice (for example zsh) where you +can run any commands + +.. code:: bash + + hatch -e airflow-310 shell + + +Once you are in the environment (indicated usually by updated prompt), you can just install +extra dependencies you need: -You can also install Airflow in non-editable mode: +.. code:: bash + + [~/airflow] [airflow-310] pip install -e ".[devel,google]" - .. code-block:: bash - # use the same version of python as you are working with, 3.8, 3.9, 3.10 or 3.11 - pip install ".[devel,]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" +You can also see where hatch created the virtualenvs and use it in your IDE or activate it manually: -This will copy the sources to directory where usually python packages are installed. You can see the list -of directories via ``python -m site`` command. In this case the providers are installed from PyPI, not from -sources, unless you set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment variable to ``true`` +.. code:: bash - .. code-block:: bash + hatch env find airflow-310 - # use the same version of python as you are working with, 3.8, 3.9, 3.10 or 3.11 - INSTALL_PROVIDERS_FROM_SOURCES="true" pip install ".[devel,]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" +You will get path similar to: +.. code:: -Note: when you first initialize database (the next step), you may encounter some problems. -This is because airflow by default will try to load in example dags where some of them requires dependencies ``google`` and ``postgres``. -You can solve the problem by: + /Users/jarek/Library/Application Support/hatch/env/virtual/apache-airflow/TReRdyYt/apache-airflow -- installing the extras i.e. ``[devel,google,postgres]`` or -- disable the example dags with environment variable: ``export AIRFLOW__CORE__LOAD_EXAMPLES=False`` or -- simply ignore the error messages and proceed +Then you will find ``python`` binary and ``activate`` script in the ``bin`` sub-folder of this directory and +you can configure your IDE to use this python virtualenv if you want to use that environment in your IDE. -*In addition to above, you may also encounter problems during database migration.* -*This is a known issue and please see the progress here:* `AIRFLOW-6265 `_ +You can also set default environment name by HATCH_ENV environment variable. -3. Create the Airflow sqlite database: +You can clean the env by running: - .. code-block:: bash +.. code:: bash - # if necessary, start with a clean AIRFLOW_HOME, e.g. - # rm -rf ~/airflow - airflow db migrate + hatch env prune -4. Select the virtualenv you created as the project's default virtualenv in your IDE. +More information about hatch can be found in https://hatch.pypa.io/1.9/environment/ -Note that if you have the Breeze development environment installed, the ``breeze`` -script can automate initializing the created virtualenv (steps 2 and 3). -Activate your virtualenv, e.g. by using ``workon``, and once you are in it, run: +## Using Hatch to build your packages -.. code-block:: bash +You can use hatch to build installable package from the airflow sources. Such package will +include all metadata that is configured in ``pyproject.toml`` and will be installable with pip. - ./scripts/tools/initialize_virtualenv.py +The packages will have pre-installed dependencies for providers that are always +installed when Airflow is installed from PyPI. By default both ``wheel`` and ``sdist`` packages are built. -By default Breeze installs the ``devel`` extra only. You can optionally control which extras are -Adding extra dependencies as parameter. +.. code:: bash -.. code-block:: bash + hatch build - ./scripts/tools/initialize_virtualenv.py devel,google,postgres +You can also build only ``wheel`` or ``sdist`` packages: + +.. code:: bash + + hatch build -t wheel + hatch build -t sdist Developing Providers @@ -240,22 +279,84 @@ In Airflow 2.0 we introduced split of Apache Airflow into separate packages - th apache-airflow package with core of Airflow and 70+ packages for all providers (external services and software Airflow can communicate with). -Developing providers is part of Airflow development, but when you install airflow as editable in your local -development environment, the corresponding provider packages will be also installed from PyPI. However, the -providers will also be present in your "airflow/providers" folder. This might lead to confusion, -which sources of providers are imported during development. It will depend on your -environment's PYTHONPATH setting in general. +When you install airflow from sources using editable install, you can develop together both - main version +of Airflow and providers, which is pretty convenient, because you can use the same environment for both. + + +Running ``pipinstall -e .`` will install Airflow in editable mode, but all provider code will also be +available in the same environment. However, most provider need some additional dependencies. + +You can install the dependencies of the provider you want to develop by installing airflow in editable +mode with provider id as extra. You can see the list of provider's extras in the +`extras reference <./docs/apache-airflow/extra-packages-ref.rst>`_. + +For example, if you want to develop Google provider, you can install it with: + +.. code:: bash + + pip install -e ".[devel,google]" + +In case of a provider has name compose of several segments, you can use ``-`` to separate them. You can also +install multiple extra dependencies at a time: + +.. code:: bash + + pip install -e ".[devel,apache-beam,dbt-cloud]" + +The dependencies for providers are configured in ``airflow/providers/PROVIDERS_FOLDER/provider.yaml`` file - +separately for each provider. You can find there two types of ``dependencies`` - production runtime +dependencies, and sometimes ``devel-dependencies`` which are needed to run tests. While ``provider.yaml`` +file is the single source of truth for the dependencies, eventually they need to find its way to Airflow`s +``pyproject.toml``. This is done by running: + +.. code:: bash + + pre-commit run update-providers-dependencies --all-files + +This will update ``pyproject.toml`` with the dependencies from ``provider.yaml`` files and from there +it will be used automatically when you install Airflow in editable mode. + +If you want to add another dependency to a provider, you should add it to corresponding ``provider.yaml``, +run the command above and commit the changes to ``pyproject.toml``. Then running +``pip install -e .[devel,PROVIDER_EXTRA]`` will install the new dependencies. Tools like ``hatch`` can also +install the dependencies automatically when you create or switch to a development environment. + + +Installing recommended version of dependencies +============================================== + +Whatever virtualenv solution you use, when you want to make sure you are using the same +version of dependencies as in main, you can install recommended version of the dependencies by using +constraint-python.txt files as ``constraint`` file. This might be useful +to avoid "works-for-me" syndrome, where you use different version of dependencies than the ones +that are used in main, CI tests and by other contributors. + +There are different constraint files for different python versions. For example this command will install +all basic devel requirements and requirements of google provider as last successfully tested for Python 3.8: + +.. code:: bash + + pip install -e ".[devel,google]"" \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" + +You can upgrade just airflow, without paying attention to provider's dependencies by using +the 'constraints-no-providers' constraint files. This allows you to keep installed provider dependencies +and install to latest supported ones by pure airflow core. -In order to avoid the confusion, you can set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment to ``true`` -before running ``pip install`` command: +.. code:: bash -.. code-block:: bash + pip install -e ".[devel]" \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" - INSTALL_PROVIDERS_FROM_SOURCES="true" pip install -U -e ".[devel,]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" +These are examples of the development options available with the local virtualenv in your IDE: -This way no providers packages will be installed and they will always be imported from the "airflow/providers" -folder. +* local debugging; +* Airflow source view; +* auto-completion; +* documentation support; +* unit tests. + +This document describes minimum requirements and instructions for using a standalone version of the local virtualenv. Running Tests @@ -263,23 +364,23 @@ Running Tests Running tests is described in `TESTING.rst `_. -While most of the tests are typical unit tests that do not -require external components, there are a number of Integration tests. You can technically use local -virtualenv to run those tests, but it requires to set up a number of -external components (databases/queues/kubernetes and the like). So, it is -much easier to use the `Breeze `__ development environment -for Integration tests. +While most of the tests are typical unit tests that do not require external components, there are a number +of Integration tests. You can technically use local virtualenv to run those tests, but it requires to +set up all necessary dependencies for all the providers you are going to tests and also setup +databases - and sometimes other external components (for integration test). + +So, generally it should be easier to use the `Breeze `__ development environment (especially +for Integration tests). -Note: Soon we will separate the integration and system tests out via pytest -so that you can clearly know which tests are unit tests and can be run in -the local virtualenv and which should be run using Breeze. Connecting to database ---------------------- When analyzing the situation, it is helpful to be able to directly query the database. You can do it using -the built-in Airflow command: +the built-in Airflow command (however you needs a CLI client tool for each database to be installed): .. code:: bash airflow db shell + +The command will explain what CLI tool is needed for the database you have configured. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 5a636212b877a..0000000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -include NOTICE -include LICENSE -include RELEASE_NOTES.rst -include README.md -graft licenses -graft airflow/www -graft airflow/www/static -graft airflow/www/templates -graft scripts/systemd -graft airflow/config_templates -recursive-exclude airflow/www/node_modules * -global-exclude __pycache__ *.pyc -exclude airflow/www/yarn.lock -exclude airflow/www/*.sh -include airflow/alembic.ini -include airflow/api_connexion/openapi/v1.yaml -include airflow/auth/managers/fab/openapi/v1.yaml -include airflow/git_version -include airflow/provider_info.schema.json -include airflow/customized_form_field_behaviours.schema.json -include airflow/serialization/schema.json -include airflow/utils/python_virtualenv_script.jinja2 -include airflow/utils/context.pyi -include airflow/example_dags/sql/sample.sql -include generated diff --git a/README.md b/README.md index d55d4e88143d6..b992bd77ea293 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ Documentation for dependent projects like provider packages, Docker image, Helm We publish Apache Airflow as `apache-airflow` package in PyPI. Installing it however might be sometimes tricky because Airflow is a bit of both a library and application. Libraries usually keep their dependencies open, and applications usually pin them, but we should do neither and both simultaneously. We decided to keep -our dependencies as open as possible (in `setup.py`) so users can install different versions of libraries +our dependencies as open as possible (in `pyproject.toml`) so users can install different versions of libraries if needed. This means that `pip install apache-airflow` will not work from time to time or will produce unusable Airflow installation. @@ -384,7 +384,7 @@ binding. ### Approach for dependencies for Airflow Core -Those `extras` and `providers` dependencies are maintained in `setup.cfg`. +Those dependencies are maintained in ``pyproject.toml``. There are few dependencies that we decided are important enough to upper-bound them by default, as they are known to follow predictable versioning scheme, and we know that new versions of those are very likely to diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst index 81911d7375eb6..68a0aef451ce6 100644 --- a/STATIC_CODE_CHECKS.rst +++ b/STATIC_CODE_CHECKS.rst @@ -262,12 +262,12 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-pydevd-left-in-code | Check for pydevd debug statements accidentally left | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ +| check-pyproject-toml-order | Check order of dependencies in pyproject.toml | | ++-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-revision-heads-map | Check that the REVISION_HEADS_MAP is up-to-date | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-safe-filter-usage-in-html | Don't use safe in templates | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ -| check-setup-order | Check order of dependencies in setup.cfg and setup.py | | -+-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-start-date-not-used-in-defaults | start_date not to be defined in default_args in example_dags | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-system-tests-present | Check if system tests have required segments of code | | @@ -392,7 +392,7 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-migration-references | Update migration ref doc | * | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ -| update-providers-dependencies | Update cross-dependencies for providers packages | | +| update-providers-dependencies | Update dependencies for provider packages | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-spelling-wordlist-to-be-sorted | Sort alphabetically and uniquify spelling_wordlist.txt | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ @@ -402,8 +402,6 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-version | Update version to the latest version in the documentation | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ -| validate-pyproject | Validate pyproject.toml | | -+-----------------------------------------------------------+--------------------------------------------------------------+---------+ | yamllint | Check YAML files with yamllint | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ diff --git a/airflow/_vendor/README.md b/airflow/_vendor/README.md index e76d8beea360c..9d26a0257f753 100644 --- a/airflow/_vendor/README.md +++ b/airflow/_vendor/README.md @@ -30,7 +30,7 @@ Way to vendor a library or update a version: 3. Replace them with new files (only replace relevant python packages:move LICENSE ) * move license files to [licenses](../../licenses) folder * remove README and any other supporting files (they can be found in PyPI) - * make sure to add requirements from setup.py to airflow's setup.py with appropriate comment stating + * make sure to add requirements to airflow's ``pyproject.toml`` with appropriate comment stating why the requirements are added and when they should be removed 4. If you replace previous version, re-apply historical fixes from the "package" folder by cherry-picking them. diff --git a/airflow/cli/cli_parser.py b/airflow/cli/cli_parser.py index e98e8ce52cb23..a295bd21ba20c 100644 --- a/airflow/cli/cli_parser.py +++ b/airflow/cli/cli_parser.py @@ -25,6 +25,7 @@ import argparse import logging +import sys from argparse import Action from collections import Counter from functools import lru_cache @@ -72,9 +73,13 @@ try: auth_mgr = get_auth_manager_cls() airflow_commands.extend(auth_mgr.get_cli_commands()) -except Exception: - log.exception("cannot load CLI commands from auth manager") +except Exception as e: + log.warning("cannot load CLI commands from auth manager: %s", e) + log.warning("Authentication manager is not configured and webserver will not be able to start.") # do not re-raise for the same reason as above + if len(sys.argv) > 1 and sys.argv[1] == "webserver": + log.exception(e) + sys.exit(1) ALL_COMMANDS_DICT: dict[str, CLICommand] = {sp.name: sp for sp in airflow_commands} diff --git a/airflow/configuration.py b/airflow/configuration.py index 7dca8a44d1dfa..e4e3dab4b3167 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -1191,7 +1191,7 @@ def getimport(self, section: str, key: str, **kwargs) -> Any: try: return import_string(full_qualified_path) except ImportError as e: - log.error(e) + log.warning(e) raise AirflowConfigException( f'The object could not be loaded. Please check "{key}" key in "{section}" section. ' f'Current value: "{full_qualified_path}".' diff --git a/airflow/provider.yaml.schema.json b/airflow/provider.yaml.schema.json index 2d439b7fda2c7..faf66d8565a0c 100644 --- a/airflow/provider.yaml.schema.json +++ b/airflow/provider.yaml.schema.json @@ -38,6 +38,13 @@ "type": "string" } }, + "devel-dependencies": { + "description": "Dependencies that should be added to development requirements of the provider", + "type": "array", + "items": { + "type": "string" + } + }, "excluded-python-versions": { "description": "List of python versions excluded for that provider", "type": "array", diff --git a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst index 9170786aff42f..0e4aa1308160f 100644 --- a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst +++ b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst @@ -195,11 +195,16 @@ Documentation An important part of building a new provider is the documentation. Some steps for documentation occurs automatically by ``pre-commit`` see `Installing pre-commit guide `_ +Those are important files in the airflow source tree that affect providers. The ``pyproject.toml`` in root +Airflow folder is automatically generated based on content of ``provider.yaml`` file in each provider +when ``pre-commit`` is run. Files such as ``extra-packages-ref.rst`` should be manually updated because +they are manually formatted for better layout and ``pre-commit`` will just verify if the information +about provider is updated there. Files like ``commit.rst`` and ``CHANGELOG`` are automatically updated +by ``breeze release-management`` command by release manager when providers are released. + .. code-block:: bash - ├── INSTALL - ├── CONTRIBUTING.rst - ├── setup.py + ├── pyproject.toml ├── airflow/ │ └── providers/ │ └── / @@ -207,7 +212,6 @@ Some steps for documentation occurs automatically by ``pre-commit`` see `Install │ └── CHANGELOG.rst │ └── docs/ - ├── spelling_wordlist.txt ├── apache-airflow/ │ └── extra-packages-ref.rst ├── integration-logos// @@ -220,36 +224,8 @@ Some steps for documentation occurs automatically by ``pre-commit`` see `Install └── .rst -Files automatically updated by pre-commit: - -- ``INSTALL`` in provider - -Files automatically created when the provider is released: - -- ``docs/apache-airflow-providers-/commits.rst`` -- ``/airflow/providers//CHANGELOG`` - There is a chance that your provider's name is not a common English word. -In this case is necessary to add it to the file ``docs/spelling_wordlist.txt``. This file begin with capitalized words and -lowercase in the second block. - - .. code-block:: bash - - Namespace - Neo4j - Nextdoor - (new line) - Nones - NotFound - Nullable - ... - neo4j - neq - networkUri - (new line) - nginx - nobr - nodash +In this case is necessary to add it to the file ``docs/spelling_wordlist.txt``. Add your provider dependencies into ``provider.yaml`` under ``dependencies`` key.. If your provider doesn't have any dependency add a empty list. @@ -258,9 +234,9 @@ In the ``docs/apache-airflow-providers-/connections.rst``: - add information how to configure connection for your provider. -In the ``docs/apache-airflow-providers-/operators/.rst``: - -- add information how to use the Operator. It's important to add examples and additional information if your Operator has extra-parameters. +In the ``docs/apache-airflow-providers-/operators/.rst`` add information +how to use the Operator. It's important to add examples and additional information if your +Operator has extra-parameters. .. code-block:: RST @@ -284,7 +260,7 @@ In the ``docs/apache-airflow-providers-/operators/.r :end-before: [END howto_operator_] -Copy from another, similar provider the docs: ``docs/apache-airflow-providers-new_provider/*.rst``: +Copy from another, similar provider the docs: ``docs/apache-airflow-providers-/*.rst``: At least those docs should be present @@ -336,20 +312,6 @@ In the ``airflow/providers//provider.yaml`` add information of you - hook-class-name: airflow.providers..hooks..NewProviderHook - connection-type: provider-connection-type - hook-class-names: # deprecated in Airflow 2.2.0 - - airflow.providers..hooks..NewProviderHook - -.. note:: Defining your own connection types - - You only need to add ``connection-types`` in case you have some hooks that have customized UI behavior. However, - it is only supported for Airflow 2.2.0. If your providers are also targeting Airflow below 2.2.0 you should - provide the deprecated ``hook-class-names`` array. The ``connection-types`` array allows for optimization - of importing of individual connections and while Airflow 2.2.0 is able to handle both definition, the - ``connection-types`` is recommended. - - For more information see `Custom connection types `_ - - After changing and creating these files you can build the documentation locally. The two commands below will serve to accomplish this. The first will build your provider's documentation. The second will ensure that the main Airflow documentation that involves some steps with the providers is also working. @@ -460,6 +422,19 @@ the compatibility checks should be updated when min airflow version is updated. Details on how this should be done are described in `Provider policies `_ +Releasing pre-installed providers for the first time +==================================================== + +When releasing providers for the first time, you need to release them in state ``not-ready``. +This will make it available for release management commands, but it will not be added to airflow's +preinstalled providers list - allowing airflow in main ``CI`` builds to be built without expecting the +provider to be available in PyPI. + +You need to add ``--include-not-ready-providers`` if you want to add them to the list of providers +considered by the release management commands. + +As soon as the provider is released, you should update the provider to ``state: ready``. + Suspending providers ==================== @@ -488,14 +463,13 @@ to do. * You will have to run ``breeze setup regenerate-command-images`` to regenerate breeze help files * you will need to update ``extra-packages-ref.rst`` and in some cases - when mentioned there explicitly - - ``setup.py`` to remove the provider from list of dependencies. + ``pyproject.toml`` to remove the provider from list of dependencies. -What happens under-the-hood as a result, is that ``generated/providers.json`` file is updated with +What happens under-the-hood as a result, is that ``pyproject.toml`` file is updated with the information about available providers and their dependencies and it is used by our tooling to exclude suspended providers from all relevant parts of the build and CI system (such as building CI image with dependencies, building documentation, running tests, etc.) - Resuming providers ================== @@ -503,7 +477,6 @@ Resuming providers is done by reverting the original change that suspended it. I needed to fix problems in the reverted provider, our CI will detect them and you will have to fix them as part of the PR reverting the suspension. - Removing providers ================== diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index f9687dcbfa5eb..8fe5b1496eadd 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -92,6 +92,36 @@ dependencies: - sqlalchemy_redshift>=0.8.6 - asgiref +additional-extras: + - name: pandas + dependencies: + - pandas>=1.2.5 + # There is conflict between boto3 and aiobotocore dependency botocore. + # TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or + # boto3 have native async support and we move away from aio aiobotocore + - name: aiobotocore + dependencies: + - aiobotocore[boto3]>=2.5.3 + - name: cncf.kubernetes + dependencies: + - apache-airflow-providers-cncf-kubernetes>=7.2.0 + - name: s3fs + dependencies: + - s3fs>=2023.10.0 + - name: python3-saml + dependencies: + - python3-saml>=1.16.0 + +devel-dependencies: + - aiobotocore>=2.7.0 + - aws_xray_sdk>=2.12.0 + - moto[cloudformation,glue]>=4.2.12 + - mypy-boto3-appflow>=1.33.0 + - mypy-boto3-rds>=1.33.0 + - mypy-boto3-redshift-data>=1.33.0 + - mypy-boto3-s3>=1.33.0 + - s3fs>=2023.10.0 + integrations: - integration-name: Amazon Athena external-doc-url: https://aws.amazon.com/athena/ @@ -735,26 +765,6 @@ logging: - airflow.providers.amazon.aws.log.s3_task_handler.S3TaskHandler - airflow.providers.amazon.aws.log.cloudwatch_task_handler.CloudwatchTaskHandler -additional-extras: - - name: pandas - dependencies: - - pandas>=1.2.5 - # There is conflict between boto3 and aiobotocore dependency botocore. - # TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or - # boto3 have native async support and we move away from aio aiobotocore - - name: aiobotocore - dependencies: - - aiobotocore[boto3]>=2.5.3 - - name: cncf.kubernetes - dependencies: - - apache-airflow-providers-cncf-kubernetes>=7.2.0 - - name: s3fs - dependencies: - - s3fs>=2023.10.0 - - name: python3-saml - dependencies: - - python3-saml>=1.16.0 - config: aws: description: This section contains settings for Amazon Web Services (AWS) integration. diff --git a/airflow/providers/apache/beam/provider.yaml b/airflow/providers/apache/beam/provider.yaml index ddf8100edeb0a..339f0c23e5d18 100644 --- a/airflow/providers/apache/beam/provider.yaml +++ b/airflow/providers/apache/beam/provider.yaml @@ -56,6 +56,11 @@ dependencies: - apache-beam>=2.53.0 - pyarrow>=14.0.1 +additional-extras: + - name: google + dependencies: + - apache-beam[gcp] + integrations: - integration-name: Apache Beam external-doc-url: https://beam.apache.org/ @@ -77,8 +82,3 @@ triggers: - integration-name: Apache Beam python-modules: - airflow.providers.apache.beam.triggers.beam - -additional-extras: - - name: google - dependencies: - - apache-beam[gcp] diff --git a/airflow/providers/apache/hive/provider.yaml b/airflow/providers/apache/hive/provider.yaml index a3aefe854ce84..51114871a4007 100644 --- a/airflow/providers/apache/hive/provider.yaml +++ b/airflow/providers/apache/hive/provider.yaml @@ -66,7 +66,7 @@ dependencies: - apache-airflow-providers-common-sql>=1.3.1 - hmsclient>=0.1.0 - pandas>=1.2.5 - - pyhive[hive_pure_sasl]>=0.7.0 + - pyhive[hive-pure-sasl]>=0.7.0 - thrift>=0.9.2 integrations: diff --git a/airflow/providers/apache/impala/provider.yaml b/airflow/providers/apache/impala/provider.yaml index b6cc9870883c5..53cc6806690f3 100644 --- a/airflow/providers/apache/impala/provider.yaml +++ b/airflow/providers/apache/impala/provider.yaml @@ -37,6 +37,12 @@ dependencies: - impyla>=0.18.0,<1.0 - apache-airflow>=2.6.0 +additional-extras: + - name: kerberos + dependencies: + - kerberos>=1.3.0 + + integrations: - integration-name: Apache Impala external-doc-url: https://impala.apache.org @@ -50,8 +56,3 @@ hooks: connection-types: - hook-class-name: airflow.providers.apache.impala.hooks.impala.ImpalaHook connection-type: impala - -additional-extras: - - name: kerberos - dependencies: - - kerberos>=1.3.0 diff --git a/airflow/providers/apache/spark/provider.yaml b/airflow/providers/apache/spark/provider.yaml index 992deffaddd33..a8062c4734072 100644 --- a/airflow/providers/apache/spark/provider.yaml +++ b/airflow/providers/apache/spark/provider.yaml @@ -56,6 +56,11 @@ dependencies: - pyspark - grpcio-status>=1.59.0 +additional-extras: + - name: cncf.kubernetes + dependencies: + - apache-airflow-providers-cncf-kubernetes>=7.4.0 + integrations: - integration-name: Apache Spark external-doc-url: https://spark.apache.org/ @@ -94,8 +99,3 @@ connection-types: task-decorators: - class-name: airflow.providers.apache.spark.decorators.pyspark.pyspark_task name: pyspark - -additional-extras: - - name: cncf.kubernetes - dependencies: - - apache-airflow-providers-cncf-kubernetes>=7.4.0 diff --git a/airflow/providers/celery/provider.yaml b/airflow/providers/celery/provider.yaml index 962fe57c9d987..ad32e9f7d6dd1 100644 --- a/airflow/providers/celery/provider.yaml +++ b/airflow/providers/celery/provider.yaml @@ -57,6 +57,12 @@ dependencies: - flower>=1.0.0 - google-re2>=1.0 +additional-extras: + - name: cncf.kubernetes + dependencies: + - apache-airflow-providers-cncf-kubernetes>=7.4.0 + + integrations: - integration-name: Celery external-doc-url: https://docs.celeryq.dev/en/stable/ @@ -324,8 +330,3 @@ config: sensitive: true example: '{"password": "password_for_redis_server"}' default: ~ - -additional-extras: - - name: cncf.kubernetes - dependencies: - - apache-airflow-providers-cncf-kubernetes>=7.4.0 diff --git a/airflow/providers/databricks/provider.yaml b/airflow/providers/databricks/provider.yaml index de7829f1cf03a..a8c65d76a492c 100644 --- a/airflow/providers/databricks/provider.yaml +++ b/airflow/providers/databricks/provider.yaml @@ -67,6 +67,16 @@ dependencies: - databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0 - aiohttp>=3.6.3, <4 +additional-extras: + # pip install apache-airflow-providers-databricks[sdk] + - name: sdk + description: Install Databricks SDK + dependencies: + - databricks-sdk==0.10.0 + +devel-dependencies: + - deltalake>=0.12.0 + integrations: - integration-name: Databricks external-doc-url: https://databricks.com/ @@ -129,10 +139,3 @@ connection-types: extra-links: - airflow.providers.databricks.operators.databricks.DatabricksJobRunLink - -additional-extras: - # pip install apache-airflow-providers-databricks[sdk] - - name: sdk - description: Install Databricks SDK - dependencies: - - databricks-sdk==0.10.0 diff --git a/airflow/providers/google/provider.yaml b/airflow/providers/google/provider.yaml index aefc4d49977d6..eb8a728211c2e 100644 --- a/airflow/providers/google/provider.yaml +++ b/airflow/providers/google/provider.yaml @@ -145,6 +145,26 @@ dependencies: - sqlalchemy-bigquery>=1.2.1 - sqlalchemy-spanner>=1.6.2 +additional-extras: + - name: apache.beam + dependencies: + - apache-beam[gcp] + - name: cncf.kubernetes + dependencies: + - apache-airflow-providers-cncf-kubernetes>=7.2.0 + - name: leveldb + dependencies: + - plyvel + - name: oracle + dependencies: + - apache-airflow-providers-oracle>=3.1.0 + - name: facebook + dependencies: + - apache-airflow-providers-facebook>=2.2.0 + - name: amazon + dependencies: + - apache-airflow-providers-amazon>=2.6.0 + integrations: - integration-name: Google Analytics (GA4) external-doc-url: https://analytics.google.com/ @@ -1218,26 +1238,6 @@ extra-links: - airflow.providers.google.common.links.storage.FileDetailsLink - airflow.providers.google.marketing_platform.links.analytics_admin.GoogleAnalyticsPropertyLink -additional-extras: - - name: apache.beam - dependencies: - - apache-beam[gcp] - - name: cncf.kubernetes - dependencies: - - apache-airflow-providers-cncf-kubernetes>=7.2.0 - - name: leveldb - dependencies: - - plyvel - - name: oracle - dependencies: - - apache-airflow-providers-oracle>=3.1.0 - - name: facebook - dependencies: - - apache-airflow-providers-facebook>=2.2.0 - - name: amazon - dependencies: - - apache-airflow-providers-amazon>=2.6.0 - secrets-backends: - airflow.providers.google.cloud.secrets.secret_manager.CloudSecretManagerBackend diff --git a/airflow/providers/microsoft/azure/provider.yaml b/airflow/providers/microsoft/azure/provider.yaml index fc3aefaf9c360..ffe397024d838 100644 --- a/airflow/providers/microsoft/azure/provider.yaml +++ b/airflow/providers/microsoft/azure/provider.yaml @@ -95,6 +95,9 @@ dependencies: - azure-mgmt-containerregistry>=8.0.0 - azure-mgmt-containerinstance>=9.0.0 +devel-dependencies: + - pywinrm + integrations: - integration-name: Microsoft Azure Batch external-doc-url: https://azure.microsoft.com/en-us/services/batch/ diff --git a/airflow/providers/mongo/provider.yaml b/airflow/providers/mongo/provider.yaml index 9e2bea685989c..f22d7035de80c 100644 --- a/airflow/providers/mongo/provider.yaml +++ b/airflow/providers/mongo/provider.yaml @@ -48,6 +48,9 @@ dependencies: - dnspython>=1.13.0 - pymongo>=3.6.0 +devel-dependencies: + - mongomock + integrations: - integration-name: MongoDB external-doc-url: https://www.mongodb.com/ diff --git a/airflow/providers/mysql/provider.yaml b/airflow/providers/mysql/provider.yaml index 22d1f364ef9d8..48c74bcd7c929 100644 --- a/airflow/providers/mysql/provider.yaml +++ b/airflow/providers/mysql/provider.yaml @@ -60,6 +60,11 @@ dependencies: - mysqlclient>=1.3.6 - mysql-connector-python>=8.0.29 +additional-extras: + # only needed for backwards compatibility + - name: mysql-connector-python + dependencies: [] + integrations: - integration-name: MySQL external-doc-url: https://www.mysql.com/ @@ -97,8 +102,3 @@ transfers: connection-types: - hook-class-name: airflow.providers.mysql.hooks.mysql.MySqlHook connection-type: mysql - -additional-extras: - # only needed for backwards compatibility - - name: mysql-connector-python - dependencies: [] diff --git a/airflow/providers/oracle/provider.yaml b/airflow/providers/oracle/provider.yaml index 89b6480d0ea85..59bc4b2ab571c 100644 --- a/airflow/providers/oracle/provider.yaml +++ b/airflow/providers/oracle/provider.yaml @@ -56,17 +56,18 @@ dependencies: - apache-airflow-providers-common-sql>=1.3.1 - oracledb>=1.0.0 +additional-extras: + - name: numpy + dependencies: + - numpy + + integrations: - integration-name: Oracle external-doc-url: https://www.oracle.com/en/database/ logo: /integration-logos/oracle/Oracle.png tags: [software] -additional-extras: - - name: numpy - dependencies: - - numpy - operators: - integration-name: Oracle python-modules: diff --git a/airflow/providers/postgres/provider.yaml b/airflow/providers/postgres/provider.yaml index c6e4338978fb7..5fcb7068d4b3b 100644 --- a/airflow/providers/postgres/provider.yaml +++ b/airflow/providers/postgres/provider.yaml @@ -60,6 +60,11 @@ dependencies: - apache-airflow-providers-common-sql>=1.3.1 - psycopg2-binary>=2.8.0 +additional-extras: + - name: amazon + dependencies: + - apache-airflow-providers-amazon>=2.6.0 + integrations: - integration-name: PostgreSQL external-doc-url: https://www.postgresql.org/ @@ -81,8 +86,3 @@ hooks: connection-types: - hook-class-name: airflow.providers.postgres.hooks.postgres.PostgresHook connection-type: postgres - -additional-extras: - - name: amazon - dependencies: - - apache-airflow-providers-amazon>=2.6.0 diff --git a/airflow/providers/tabular/provider.yaml b/airflow/providers/tabular/provider.yaml index 7b161a41d2e63..47bd18547db46 100644 --- a/airflow/providers/tabular/provider.yaml +++ b/airflow/providers/tabular/provider.yaml @@ -36,6 +36,9 @@ versions: dependencies: - apache-airflow>=2.6.0 +devel-dependencies: + - pyiceberg>=0.5.0 + integrations: - integration-name: Tabular external-doc-url: https://tabular.io/docs/ diff --git a/airflow/providers_manager.py b/airflow/providers_manager.py index 5ca1b6740e32f..01bc9bc39a35d 100644 --- a/airflow/providers_manager.py +++ b/airflow/providers_manager.py @@ -959,6 +959,14 @@ def _import_hook( field_behaviours = hook_class.get_ui_field_behaviour() if field_behaviours: self._add_customized_fields(package_name, hook_class, field_behaviours) + except ImportError as e: + if "No module named 'flask_appbuilder'" in e.msg: + log.warning( + "The hook_class '%s' is not fully initialized (UI widgets will be missing), because " + "the 'flask_appbuilder' package is not installed, however it is not required for " + "Airflow components to work", + hook_class_name, + ) except Exception as e: log.warning( "Exception when importing '%s' from '%s' package: %s", diff --git a/airflow/www/webpack.config.js b/airflow/www/webpack.config.js index 6ac1f3a208890..4e3de410bc8d0 100644 --- a/airflow/www/webpack.config.js +++ b/airflow/www/webpack.config.js @@ -275,7 +275,7 @@ const config = { }), new LicensePlugin({ additionalFiles: { - "../../../../licenses/LICENSES-ui.txt": formatLicenses, + "../../../../3rd-party-licenses/LICENSES-ui.txt": formatLicenses, }, unacceptableLicenseTest: (licenseIdentifier) => [ diff --git a/clients/gen/common.sh b/clients/gen/common.sh index cd78d6d039948..0437028351589 100755 --- a/clients/gen/common.sh +++ b/clients/gen/common.sh @@ -59,8 +59,7 @@ function validate_input { git_push.sh .gitlab-ci.yml requirements.txt -setup.cfg -setup.py +pyproject.toml test-requirements.txt tox.ini EOF diff --git a/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md b/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md index 29b25f5f1175f..d74514d31ec3c 100644 --- a/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md +++ b/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md @@ -424,9 +424,10 @@ The slight risk is that if there is a constraint problem that impacts regular PR make all PRs "red" until the constraint is fixed. However, if this is the case then usually we should fix the problem by fixing the tests or dependencies and the automated CI process should be able to self-heal. The main build does not use constraints and it will attempt to upgrade (or downgrade) the dependencies to -the latest version matching the dependency specification we have in setup.cfg/setup.py/provider.yaml files. -Also the constraints are pushed without `--force` so there is no risk of destroying anything. -The history is kept in Git, so you can always revert to the previous version if needed. +the latest version matching the dependency specification we have in `pyproject.toml` files (note that provider +dependencies in `pyproject.toml` are generated from provider.yaml files being the single source of truth for +provider dependencies). Also, the constraints are pushed without `--force` so there is no risk of destroying +anything. The history is kept in Git, so you can always revert to the previous version if needed. # Manually updating already tagged constraint files diff --git a/dev/README.md b/dev/README.md index 5ed5929233d04..9ffd0798ccf89 100644 --- a/dev/README.md +++ b/dev/README.md @@ -23,6 +23,7 @@ - [Apache Airflow source releases](#apache-airflow-source-releases) - [Apache Airflow Package](#apache-airflow-package) - [Provider packages](#provider-packages) +- [Preinstalled providers](#preinstalled-providers) - [Prerequisites for the release manager preparing the release](#prerequisites-for-the-release-manager-preparing-the-release) - [Upload Public keys to id.apache.org and GitHub](#upload-public-keys-to-idapacheorg-and-github) - [Configure PyPI uploads](#configure-pypi-uploads) @@ -95,7 +96,6 @@ The full provider's list can be found here: There are also convenience packages released as "apache-airflow-providers"separately in PyPI. [PyPI query for providers](https://pypi.org/search/?q=apache-airflow-providers) - And available in PyPI: [PyPI query for backport providers](https://pypi.org/search/?q=apache-airflow-backport-providers). @@ -105,6 +105,15 @@ of Backport Providers was done on March 17, 2021. Detailed instruction of releasing Provider Packages can be found in the [README_RELEASE_PROVIDER_PACKAGES.md](README_RELEASE_PROVIDER_PACKAGES.md) +# Preinstalled providers + +The `dev/preinstalled-providers.json` file contains the list of provider ids that are pre-installed. +Those providers are dynamically added to generated standard wheel packages that are released in PyPI. +Those packages are not present in pyproject.toml as dependencies, and +they are not installed when you install Airflow for editable installation for development. +This way, when you develop Airflow you can work on Airflow and Providers together from the same +Source tree - without polluting your editable installation with installed provider packages. + # Prerequisites for the release manager preparing the release The person acting as release manager has to fulfill certain pre-requisites. More details and FAQs are diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index 5b1ad59a40a9c..dabc02ea2ddf6 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -76,7 +76,7 @@ For obvious reasons, you can't cherry-pick every change from `main` into the rel some are incompatible without a large set of other changes, some are brand-new features, and some just don't need to be in a release. In general only security fixes, data-loss bugs and regression fixes are essential to bring into a patch release; -also changes in dependencies (setup.py, setup.cfg) resulting from releasing newer versions of packages that Airflow depends on. +also changes in dependencies (pyproject.toml) resulting from releasing newer versions of packages that Airflow depends on. Other bugfixes can be added on a best-effort basis, but if something is going to be very difficult to backport (maybe it has a lot of conflicts, or heavily depends on a new feature or API that's not being backported), it's OK to leave it out of the release at your sole discretion as the release manager - @@ -772,7 +772,7 @@ that have min-airflow version set to X.Y.0 * NOTE! WE MIGHT WANT TO AUTOMATE THAT STEP IN THE FUTURE -1. Checkout the constraints-2-* branch and update the ``constraints-3*.txt`` file with the new provider +1. Checkout the ``constraints-2-*`` branch and update the ``constraints-3*.txt`` file with the new provider version. Find the place where the provider should be added, add it with the latest provider version. ``` @@ -793,7 +793,7 @@ git push -f apache constraints-X.Y.Z that have >= ``X.Y.0`` in the corresponding provider.yaml file. -3. In case the provider should also be installed in the image (it is part of ``airflow/providers/installed_providers.txt``) +3. In case the provider should also be installed in the image (it is part of ``dev/prod_image_installed_providers.txt``) it should also be added at this moment to ``Dockerfile`` to the list of default extras in the line with ``AIRFLOW_EXTRAS``: ```Dockerfile diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index ba7c68b103e34..b97e920685a62 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -127,29 +127,27 @@ Details about maintaining the SEMVER version are going to be discussed and imple The provider packages can be in one of several states. -* The `ready` state the provider package is released as part of the regular release cycle (including the - documentation, package building and publishing). This is the default state for all providers. -* The `not-ready` state is when the provider has `not-ready` field set to `true` in the `provider.yaml` file. - This is usually used when the provider has some in-progress changes (usually API changes) that we do not - want to release yet as part of the regular release cycle. Providers in this state are excluded from being - released as part of the regular release cycle (including documentation building). You can build and prepare - such provider when you explicitly specify it as argument of a release command or by passing +* The `not-ready` state is used when the provider has some in-progress changes (usually API changes) that + we do not want to release yet as part of the regular release cycle. Providers in this state are excluded + from being released as part of the regular release cycle (including documentation building). You can build + and prepare such provider when you explicitly specify it as argument of a release command or by passing `--include-not-ready-providers` flag in corresponding command. The `not-ready` providers are treated as regular providers when it comes to running tests and preparing and releasing packages in `CI` - as we want to make sure they are properly releasable any time and we want them to contribute to dependencies and we - want to test them. -* The `suspended` state is when the provider has `suspended` field set to `true` in the `provider.yaml` file. - This is used when we have a good reason to suspend such provider, following the devlist discussion and - vote or "lazy consensus". The process of suspension is described in [Provider's docs](../PROVIDERS.rst). + want to test them. Also in case of preinstalled providers, the `not-ready` providers are contributing + their dependencies rather than the provider package to requirements of Airflow. +* The `ready` state is the usual state of the provider that is released in the regular release cycle + (including the documentation, package building and publishing). This is the state most providers are in. +* The `suspended` state is used when we have a good reason to suspend such provider, following the devlist + discussion and vote or "lazy consensus". The process of suspension is described in [Provider's docs](../PROVIDERS.rst). The `suspended` providers are excluded from being released as part of the regular release cycle (including documentation building) but also they do not contribute dependencies to the CI image and their tests are not run in CI process. You can build and prepare such provider when you explicitly specify it as argument of a release command or by passing `--include-suspended-providers` flag in corresponding command (but it might or might not work at any time as the provider release commands are not regularly run on CI for the suspended providers). The `suspended` providers are not released as part of the regular release cycle. -* The `removed` state is when the provider is marked as `removed` - usually after some period of time being - `suspended`. This is a temporary state after the provider has been voted (or agreed in "lazy consensus") to - be removed and it is only used for exactly one release cycle - in order to produce the final version of +* The `removed` state is a temporary state after the provider has been voted (or agreed in "lazy consensus") + to be removed and it is only used for exactly one release cycle - in order to produce the final version of the package - identical to the previous version with the exception of the removal notice. The process of removal is described in [Provider's docs](../PROVIDERS.rst). The `removed` providers are included in the regular release cycle (including documentation building) because the `--include-removed-providers` @@ -222,6 +220,9 @@ In case you prepare provider documentation for just a few selected providers, yo breeze release-management prepare-provider-documentation [packages] ``` +In case you want to also release a pre-installed provider that is in ``not-ready`` state (i.e. when +you want to release it before you switch their state to ``ready``), you need to pass +``--include-not-ready-providers`` flag to the command above. This command will not only prepare documentation but will also help the release manager to review changes implemented in all providers, and determine which of the providers should be released. For each @@ -251,6 +252,10 @@ In case you prepare provider documentation for just a few selected providers, yo breeze release-management prepare-provider-documentation --answer yes [packages] ``` +In case you want to also release a pre-installed provider that is in ``not-ready`` state (i.e. when +you want to release it before you switch their state to ``ready``), you need to pass +``--include-not-ready-providers`` flag to the command above. + NOTE!! In case you prepare provider's documentation in a branch different than main, you need to manually specify the base branch via `--base-branch` parameter. For example if you try to build a `cncf.kubernetes` provider that is build from `provider-cncf-kubernetes/v4-4` @@ -319,6 +324,11 @@ breeze release-management prepare-provider-packages --include-removed-providers --package-format both PACKAGE PACKAGE .... ``` +In case you want to also release a pre-installed provider that is in ``not-ready`` state (i.e. when +you want to release it before you switch their state to ``ready``), you need to pass +``--include-not-ready-providers`` flag to the command above. + + * Sign all your packages ```shell script diff --git a/dev/airflow_pre_installed_providers.txt b/dev/airflow_pre_installed_providers.txt new file mode 100644 index 0000000000000..878c309d76efc --- /dev/null +++ b/dev/airflow_pre_installed_providers.txt @@ -0,0 +1,7 @@ +# List of all the providers that are pre-installed when you run `pip install apache-airflow` without extras +common.io +common.sql +ftp +http +imap +sqlite diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 7452be51ae188..e64de13a5632d 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: ac1e65234e1a780d1f21ac28e27451c763e308cc901ab8f72acc6414db8da14c8f841e4757b134a4cc6f3c76186af340db0610cd3fa155ed5fcf1abca45381e8 +Package config hash: 5253de328868bc7e725254e1efe2daa57e7b1faee1b0be68aa8609569e47d08a1e0be4760bf889f422c4ce5c2884577c1300cd2f2292f2cb4677b1d1d160916c --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/SELECTIVE_CHECKS.md b/dev/breeze/SELECTIVE_CHECKS.md index 2f7d9620417c7..627fa5a365360 100644 --- a/dev/breeze/SELECTIVE_CHECKS.md +++ b/dev/breeze/SELECTIVE_CHECKS.md @@ -44,7 +44,10 @@ We have the following Groups of files for CI that determine which tests are run: Open API specification and determine that we should run dedicated API tests. * `Helm files` - change in those files impacts helm "rendering" tests - `chart` folder and `helm_tests` folder. * `Setup files` - change in the setup files indicates that we should run `upgrade to newer dependencies` - - setup.* files, pyproject.toml, generated dependencies files in `generated` folder + pyproject.toml and generated dependencies files in `generated` folder. The dependency files and part of + the pyproject.toml are automatically generated from the provider.yaml files in provider by + the `update-providers-dependencies` pre-commit. The provider.yaml is a single source of truth for each + provider. * `DOC files` - change in those files indicate that we should run documentation builds (both airflow sources and airflow documentation) * `WWW files` - those are files for the WWW part of our UI (useful to determine if UI tests should run) @@ -74,9 +77,10 @@ together using `pytest-xdist` (pytest-xdist distributes the tests among parallel ## Selective check decision rules * `Full tests` case is enabled when the event is PUSH, or SCHEDULE or we miss commit info or any of the - important environment files (setup.py, setup.cfg, provider.yaml, Dockerfile, build scripts) changed or - when `full tests needed` label is set. That enables all matrix combinations of variables (representative) - and all possible test type. No further checks are performed. + important environment files (`pyproject.toml`, `Dockerfile`, `scripts`, + `generated/provider_dependencies.json` etc.) changed or when `full tests needed` label is set. + That enables all matrix combinations of variables (representative) and all possible test type. No further + checks are performed. See also [1] note below. * Python, Kubernetes, Backend, Kind, Helm versions are limited to "defaults" only unless `Full tests` mode is enabled. * `Python scans`, `Javascript scans`, `API tests/codegen`, `UI`, `WWW`, `Kubernetes` tests and `DOC builds` @@ -199,3 +203,20 @@ Github Actions to pass the list of parameters to a command to execute | skip-provider-tests | When provider tests should be skipped (on non-main branch or when no provider changes detected) | true | | | sqlite-exclude | Which versions of Sqlite to exclude for tests as JSON array | [] | | | upgrade-to-newer-dependencies | Whether the image build should attempt to upgrade all dependencies (might be true/false or commit hash) | false | | + + +[1] Note for deciding if `full tests needed` mode is enabled and provider.yaml files. + +When we decided whether to run `full tests` we do not check (directly) if provider.yaml files changed, +even if they are single source of truth for provider dependencies and when you add a dependency there, +the environment changes and generally full tests are advised. + +This is because provider.yaml change will automatically trigger (via `update-provider-dependencies` pre-commit) +generation of `generated/provider_dependencies.json` and `pyproject.toml` gets updated as well. This is a far +better indication if we need to run full tests than just checking if provider.yaml files changed, because +provider.yaml files contain more information than just dependencies - they are the single source of truth +for a lot of information for each provider and sometimes (for example when we update provider documentation +or when new Hook class is added), we do not need to run full tests. + +That's why we do not base our `full tests needed` decision on changes in dependency files that are generated +from the `provider.yaml` files. diff --git a/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md b/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md index 5cf11fe327929..a2788b57c4f0f 100644 --- a/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md +++ b/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md @@ -101,7 +101,7 @@ version of Breeze will remain as part of the Airflow's source code. The decision is to implement Breeze in a subfolder (`dev/breeze2/`) of Apache Airflow as a Python project following the standard setuptools enabled project. The project contains setup.py and dependencies described -in setup.cfg and contains both source code and tests for Breeze code. +in pyproject.toml and contains both source code and tests for Breeze code. The sub-project could be used in the future to produce a PyPI package (we reserved such package in PyPI), however its main purpose is @@ -116,7 +116,7 @@ of Airflow) performs the following tasks: * when run for the first time it creates `.build/breeze2/venv` virtual environment (Python3.6+ based) - with locally installed `dev` - project in editable mode (`pip install -e .`) - this makes sure + project in editable mode (`pip install -e ".[devel]"`) - this makes sure that the users of Breeze will use the latest version of Breeze available in their version of the repository * when run subsequently, it will check if setup files changed for diff --git a/dev/breeze/pyproject.toml b/dev/breeze/pyproject.toml index 9cae9052e28bd..438719faa7e2d 100644 --- a/dev/breeze/pyproject.toml +++ b/dev/breeze/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ "filelock>=3.13.0", "flit>=3.5.0", "gitpython>=3.1.40", + "hatch==1.9.1", "inputimeout>=1.0.4", "jinja2>=3.1.0", "jsonschema>=4.19.1", @@ -61,7 +62,7 @@ dependencies = [ "pytest-xdist>=3.3.1", "pytest>=7.4.0", "pyyaml>=6.0.1", - "requests>=2.30.0", + "requests>=2.31.0", "rich-click>=1.7.1", "rich>=13.6.0", "semver>=3.0.2", diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py index 981124540fe32..0682728ff4127 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py @@ -46,7 +46,6 @@ option_image_tag_for_pulling, option_image_tag_for_verifying, option_install_mysql_client_type, - option_install_providers_from_sources, option_platform_multiple, option_prepare_buildx_cache, option_pull, @@ -304,7 +303,6 @@ def get_exitcode(status: int) -> int: @option_install_mysql_client_type @option_image_tag_for_building @option_include_success_outputs -@option_install_providers_from_sources @option_parallelism @option_platform_multiple @option_prepare_buildx_cache @@ -345,7 +343,6 @@ def build( image_tag: str, include_success_outputs, install_mysql_client_type: str, - install_providers_from_sources: bool, parallelism: int, platform: str | None, prepare_buildx_cache: bool, @@ -417,7 +414,6 @@ def run_build(ci_image_params: BuildCiParams) -> None: github_token=github_token, image_tag=image_tag, install_mysql_client_type=install_mysql_client_type, - install_providers_from_sources=install_providers_from_sources, prepare_buildx_cache=prepare_buildx_cache, push=push, python=python, diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py index d790ae33a172d..97f5f2353887c 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py @@ -58,7 +58,6 @@ "--commit-sha", "--debian-version", "--install-mysql-client-type", - "--install-providers-from-sources", "--python-image", ], }, diff --git a/dev/breeze/src/airflow_breeze/commands/common_image_options.py b/dev/breeze/src/airflow_breeze/commands/common_image_options.py index dd94ffd499a25..1b8f9460949e3 100644 --- a/dev/breeze/src/airflow_breeze/commands/common_image_options.py +++ b/dev/breeze/src/airflow_breeze/commands/common_image_options.py @@ -146,12 +146,6 @@ default=ALLOWED_INSTALL_MYSQL_CLIENT_TYPES[0], envvar="INSTALL_MYSQL_CLIENT_TYPE", ) -option_install_providers_from_sources = click.option( - "--install-providers-from-sources", - help="Install providers from sources when installing.", - is_flag=True, - envvar="INSTALL_PROVIDERS_FROM_SOURCES", -) option_platform_multiple = click.option( "--platform", help="Platform for Airflow image.", diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands.py b/dev/breeze/src/airflow_breeze/commands/developer_commands.py index fcc9bd57ce9a6..ede6dfd933d4d 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands.py @@ -423,7 +423,7 @@ def shell( @main.command(name="start-airflow") @click.option( - "--skip-asset-compilation", + "--skip-assets-compilation", help="Skips compilation of assets when starting airflow even if the content of www changed " "(mutually exclusive with --dev-mode).", is_flag=True, @@ -431,7 +431,7 @@ def shell( @click.option( "--dev-mode", help="Starts webserver in dev mode (assets are always recompiled in this case when starting) " - "(mutually exclusive with --skip-asset-compilation).", + "(mutually exclusive with --skip-assets-compilation).", is_flag=True, ) @click.argument("extra-args", nargs=-1, type=click.UNPROCESSED) @@ -512,7 +512,7 @@ def start_airflow( providers_skip_constraints: bool, python: str, restart: bool, - skip_asset_compilation: bool, + skip_assets_compilation: bool, standalone_dag_processor: bool, use_airflow_version: str | None, use_packages_from_dist: bool, @@ -521,12 +521,12 @@ def start_airflow( Enter breeze environment and starts all Airflow components in the tmux session. Compile assets if contents of www directory changed. """ - if dev_mode and skip_asset_compilation: + if dev_mode and skip_assets_compilation: get_console().print( "[warning]You cannot skip asset compilation in dev mode! Assets will be compiled!" ) - skip_asset_compilation = True - if use_airflow_version is None and not skip_asset_compilation: + skip_assets_compilation = True + if use_airflow_version is None and not skip_assets_compilation: run_compile_www_assets(dev=dev_mode, run_in_background=True, force_clean=False) airflow_constraints_reference = _determine_constraint_branch_used( airflow_constraints_reference, use_airflow_version diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py b/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py index 7fba31b8b083b..88b734f513c95 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py @@ -228,7 +228,7 @@ { "name": "Asset compilation options", "options": [ - "--skip-asset-compilation", + "--skip-assets-compilation", "--dev-mode", ], }, diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py index 9c2cb3a57c3a5..1266085c79a5a 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py @@ -41,7 +41,6 @@ option_image_tag_for_pulling, option_image_tag_for_verifying, option_install_mysql_client_type, - option_install_providers_from_sources, option_platform_multiple, option_prepare_buildx_cache, option_pull, @@ -233,7 +232,6 @@ def prod_image(): @option_image_tag_for_building @option_include_success_outputs @option_install_mysql_client_type -@option_install_providers_from_sources @option_parallelism @option_platform_multiple @option_prepare_buildx_cache @@ -284,7 +282,6 @@ def build( install_airflow_version: str | None, install_mysql_client_type: str, install_packages_from_context: bool, - install_providers_from_sources: bool, installation_method: str, parallelism: int, platform: str | None, @@ -348,7 +345,6 @@ def run_build(prod_image_params: BuildProdParams) -> None: install_airflow_version=install_airflow_version, install_mysql_client_type=install_mysql_client_type, install_packages_from_context=install_packages_from_context, - install_providers_from_sources=install_providers_from_sources, installation_method=installation_method, prepare_buildx_cache=prepare_buildx_cache, push=push, diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py index 69c7d23c969ac..78b4b9e00d675 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py @@ -57,7 +57,6 @@ "--python-image", "--commit-sha", "--additional-pip-install-flags", - "--install-providers-from-sources", ], }, { diff --git a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py index f88388a22a9f4..09cc67c226370 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py +++ b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py @@ -90,7 +90,7 @@ def tarball_release(version, version_without_rc): def create_artifacts_with_sdist(): - run_command(["python3", "setup.py", "compile_assets", "sdist", "bdist_wheel"], check=True) + run_command(["hatch", "build", "-t", "sdist", "-t", "wheel"], check=True) console_print("Artifacts created") @@ -100,7 +100,6 @@ def create_artifacts_with_breeze(): "breeze", "release-management", "prepare-airflow-package", - "--use-container-for-assets-compilation", "--package-format", "both", ], @@ -356,7 +355,7 @@ def publish_release_candidate(version, previous_version, github_token): # Create the artifacts if confirm_action("Use breeze to create artifacts?"): create_artifacts_with_breeze() - elif confirm_action("Use setup.py to create artifacts?"): + elif confirm_action("Use hatch to create artifacts?"): create_artifacts_with_sdist() # Sign the release sign_the_release(airflow_repo_root) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index f943025205230..39211c965892a 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -19,6 +19,7 @@ import glob import operator import os +import random import re import shutil import sys @@ -129,7 +130,6 @@ ) from airflow_breeze.utils.path_utils import ( AIRFLOW_SOURCES_ROOT, - AIRFLOW_WWW_DIR, CONSTRAINTS_CACHE_DIR, DIST_DIR, GENERATED_PROVIDER_PACKAGES_DIR, @@ -143,9 +143,7 @@ ) from airflow_breeze.utils.python_versions import get_python_version_list from airflow_breeze.utils.run_utils import ( - clean_www_assets, run_command, - run_compile_www_assets, ) from airflow_breeze.utils.shared_options import get_dry_run, get_verbose from airflow_breeze.utils.versions import is_pre_release @@ -209,50 +207,74 @@ class VersionedFile(NamedTuple): WHEEL_VERSION = "0.36.2" GITPYTHON_VERSION = "3.1.40" RICH_VERSION = "13.7.0" +NODE_VERSION = "21.2.0" +PRE_COMMIT_VERSION = "3.5.0" AIRFLOW_BUILD_DOCKERFILE = f""" FROM python:{DEFAULT_PYTHON_MAJOR_MINOR_VERSION}-slim-{ALLOWED_DEBIAN_VERSIONS[0]} RUN apt-get update && apt-get install -y --no-install-recommends git -RUN pip install pip=={AIRFLOW_PIP_VERSION} wheel=={WHEEL_VERSION} \\ - gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION} +RUN pip install pip=={AIRFLOW_PIP_VERSION} hatch==1.9.1 \ + gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION} pre-commit=={PRE_COMMIT_VERSION} +COPY . /opt/airflow """ -AIRFLOW_BUILD_IMAGE_TAG = "apache/airflow:local-build-image" -NODE_BUILD_IMAGE_TAG = "node:21.2.0-bookworm-slim" - +AIRFLOW_BUILD_DOCKERIGNORE = """ +# Git version is dynamically generated +airflow/git_version +# Exclude mode_modules pulled by "yarn" for compilation of www files generated by NPM +airflow/www/node_modules + +# Exclude link to docs +airflow/www/static/docs + +# Exclude python generated files +**/__pycache__/ +**/*.py[cod] +**/*$py.class +**/.pytest_cache/ +**/env/ +**/build/ +**/develop-eggs/ +/dist/ +**/downloads/ +**/eggs/ +**/.eggs/ +**/lib/ +**/lib64/ +**/parts/ +**/sdist/ +**/var/ +**/wheels/ +**/*.egg-info/ +**/.installed.cfg +**/*.egg + +# Exclude temporary vi files +**/*~ + +# Exclude output files +**/*.out +**/hive_scratch_dir/ + +# Exclude auto-generated Finder files on Mac OS +**/.DS_Store +**/Thumbs.db + +# Exclude docs generated files +docs/_build/ +docs/_api/ +docs/_doctrees/ + +# files generated by memray +*.py.*.html +*.py.*.bin +""" -def _compile_assets_in_docker(): - clean_www_assets() - get_console().print("[info]Compiling assets in docker container\n") - result = run_command( - [ - "docker", - "run", - "-t", - "-v", - f"{AIRFLOW_WWW_DIR}:/opt/airflow/airflow/www/", - "-e", - "FORCE_COLOR=true", - NODE_BUILD_IMAGE_TAG, - "bash", - "-c", - "cd /opt/airflow/airflow/www && yarn install --frozen-lockfile && yarn run build", - ], - text=True, - capture_output=not get_verbose(), - check=False, - ) - if result.returncode != 0: - get_console().print("[error]Error compiling assets[/]") - get_console().print(result.stdout) - get_console().print(result.stderr) - fix_ownership_using_docker() - sys.exit(result.returncode) +AIRFLOW_BUILD_IMAGE_TAG = "apache/airflow:local-build-image" +NODE_BUILD_IMAGE_TAG = f"node:{NODE_VERSION}-bookworm-slim" - get_console().print("[success]compiled assets in docker container\n") - get_console().print("[info]Fixing ownership of compiled assets\n") - fix_ownership_using_docker() - get_console().print("[success]Fixing ownership of compiled assets\n") +AIRFLOW_BUILD_DOCKERFILE_PATH = AIRFLOW_SOURCES_ROOT / "airflow-build-dockerfile" +AIRFLOW_BUILD_DOCKERFILE_IGNORE_PATH = AIRFLOW_SOURCES_ROOT / "airflow-build-dockerfile.dockerignore" @release_management.command( @@ -260,63 +282,74 @@ def _compile_assets_in_docker(): help="Prepare sdist/whl package of Airflow.", ) @option_package_format -@click.option( - "--use-container-for-assets-compilation", - is_flag=True, - help="If set, the assets are compiled in docker container. On MacOS, asset compilation in containers " - "is slower, due to slow mounted filesystem and number of node_module files so by default asset " - "compilation is done locally. This option is useful for officially building packages by release " - "manager on MacOS to make sure it is a reproducible build.", -) @option_version_suffix_for_pypi @option_verbose @option_dry_run def prepare_airflow_packages( package_format: str, version_suffix_for_pypi: str, - use_container_for_assets_compilation: bool, ): perform_environment_checks() fix_ownership_using_docker() cleanup_python_generated_files() - get_console().print("[info]Compiling assets\n") - from sys import platform - - if platform == "darwin" and not use_container_for_assets_compilation: - run_compile_www_assets(dev=False, run_in_background=False, force_clean=True) - else: - _compile_assets_in_docker() - get_console().print("[success]Assets compiled successfully[/]") + # This is security feature. + # + # Building the image needed to build airflow package including .git directory + # In isolated environment, to not allow the in-docker code to override local code + # The image used to build airflow package is built from scratch and contains + # Full Airflow code including Airflow repository is added to the image, but locally build node_modules + # are not added to the context of that image + AIRFLOW_BUILD_DOCKERFILE_PATH.write_text(AIRFLOW_BUILD_DOCKERFILE.strip()) + AIRFLOW_BUILD_DOCKERFILE_IGNORE_PATH.write_text(AIRFLOW_BUILD_DOCKERIGNORE.strip()) run_command( - ["docker", "build", "--tag", AIRFLOW_BUILD_IMAGE_TAG, "-"], - input=AIRFLOW_BUILD_DOCKERFILE, + [ + "docker", + "buildx", + "build", + ".", + "-f", + "airflow-build-dockerfile", + "--tag", + AIRFLOW_BUILD_IMAGE_TAG, + ], text=True, check=True, + cwd=AIRFLOW_SOURCES_ROOT, env={"DOCKER_CLI_HINTS": "false"}, ) - run_command( + container_id = f"airflow-build-{random.getrandbits(64):08x}" + result = run_command( cmd=[ "docker", "run", + "--name", + container_id, "-t", - "-v", - f"{AIRFLOW_SOURCES_ROOT}:/opt/airflow:cached", "-e", f"VERSION_SUFFIX_FOR_PYPI={version_suffix_for_pypi}", "-e", + "HOME=/opt/airflow/files/home", + "-e", "GITHUB_ACTIONS", "-e", f"PACKAGE_FORMAT={package_format}", + "-w", + "/opt/airflow", AIRFLOW_BUILD_IMAGE_TAG, "python", "/opt/airflow/scripts/in_container/run_prepare_airflow_packages.py", ], - check=True, + check=False, ) + if result.returncode != 0: + get_console().print("[error]Error preparing Airflow package[/]") + fix_ownership_using_docker() + sys.exit(result.returncode) + DIST_DIR.mkdir(parents=True, exist_ok=True) + # Copy all files in the dist directory in container to the host dist directory (note '/.' in SRC) + run_command(["docker", "cp", f"{container_id}:/opt/airflow/dist/.", "./dist"], check=True) + run_command(["docker", "rm", "--force", container_id], check=True) get_console().print("[success]Successfully prepared Airflow package!\n\n") - get_console().print("\n[info]Cleaning ownership of generated files\n") - fix_ownership_using_docker() - get_console().print("\n[success]Cleaned ownership of generated files\n") def provider_action_summary(description: str, message_type: MessageType, packages: list[str]): @@ -699,7 +732,7 @@ def run_generate_constraints_in_parallel( @release_management.command( name="generate-constraints", - help="Generates pinned constraint files with all extras from setup.py in parallel.", + help="Generates pinned constraint files with all extras from pyproject.toml in parallel.", ) @option_python @option_run_in_parallel diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index 77fc83650c67f..e6ddec7e161e6 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -55,7 +55,6 @@ "name": "Package flags", "options": [ "--package-format", - "--use-container-for-assets-compilation", "--version-suffix-for-pypi", ], } diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 15e6bbec94ef8..ded03520b8841 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -387,8 +387,7 @@ def get_airflow_extras(): # Initialize files for rebuild check FILES_FOR_REBUILD_CHECK = [ - "setup.py", - "setup.cfg", + "pyproject.toml", "Dockerfile.ci", ".dockerignore", "generated/provider_dependencies.json", @@ -430,19 +429,19 @@ def get_airflow_extras(): "amazon", "async", "celery", - "cncf.kubernetes", - "common.io", + "cncf-kubernetes", + "common-io", "docker", "elasticsearch", "ftp", "google", - "google_auth", + "google-auth", "graphviz", "grpc", "hashicorp", "http", "ldap", - "microsoft.azure", + "microsoft-azure", "mysql", "odbc", "openlineage", diff --git a/dev/breeze/src/airflow_breeze/params/build_ci_params.py b/dev/breeze/src/airflow_breeze/params/build_ci_params.py index 954f405f9c584..2883eded8ce13 100644 --- a/dev/breeze/src/airflow_breeze/params/build_ci_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_ci_params.py @@ -33,7 +33,7 @@ class BuildCiParams(CommonBuildParams): airflow_constraints_mode: str = "constraints-source-providers" airflow_constraints_reference: str = DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH - airflow_extras: str = "devel_ci" + airflow_extras: str = "devel-ci" airflow_pre_cached_pip_packages: bool = True force_build: bool = False upgrade_to_newer_dependencies: bool = False diff --git a/dev/breeze/src/airflow_breeze/params/build_prod_params.py b/dev/breeze/src/airflow_breeze/params/build_prod_params.py index ed0cec9a8cb66..334edaab48503 100644 --- a/dev/breeze/src/airflow_breeze/params/build_prod_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_prod_params.py @@ -212,7 +212,6 @@ def prepare_arguments_for_docker_build_command(self) -> list[str]: self._req_arg("DOCKER_CONTEXT_FILES", self.docker_context_files) self._req_arg("INSTALL_PACKAGES_FROM_CONTEXT", self.install_packages_from_context) self._req_arg("INSTALL_POSTGRES_CLIENT", self.install_postgres_client) - self._req_arg("INSTALL_PROVIDERS_FROM_SOURCES", self.install_providers_from_sources) self._req_arg("PYTHON_BASE_IMAGE", self.python_base_image) # optional build args self._opt_arg("AIRFLOW_CONSTRAINTS_LOCATION", self.airflow_constraints_location) diff --git a/dev/breeze/src/airflow_breeze/params/common_build_params.py b/dev/breeze/src/airflow_breeze/params/common_build_params.py index e1c77990e9f78..421bcc7c3a947 100644 --- a/dev/breeze/src/airflow_breeze/params/common_build_params.py +++ b/dev/breeze/src/airflow_breeze/params/common_build_params.py @@ -63,7 +63,6 @@ class CommonBuildParams: github_repository: str = APACHE_AIRFLOW_GITHUB_REPOSITORY github_token: str = os.environ.get("GITHUB_TOKEN", "") image_tag: str | None = None - install_providers_from_sources: bool = False install_mysql_client_type: str = ALLOWED_INSTALL_MYSQL_CLIENT_TYPES[0] platform: str = DOCKER_DEFAULT_PLATFORM prepare_buildx_cache: bool = False diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index e90d3480464e2..85fbecd6ca8e8 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -162,7 +162,6 @@ class ShellParams: image_tag: str | None = None include_mypy_volume: bool = False install_airflow_version: str = "" - install_providers_from_sources: bool = True install_selected_providers: str | None = None integration: tuple[str, ...] = () issue_id: str = "" @@ -526,7 +525,6 @@ def env_variables_for_docker_commands(self) -> dict[str, str]: _set_var(_env, "HOST_USER_ID", self.host_user_id) _set_var(_env, "INIT_SCRIPT_FILE", None, "init.sh") _set_var(_env, "INSTALL_AIRFLOW_VERSION", self.install_airflow_version) - _set_var(_env, "INSTALL_PROVIDERS_FROM_SOURCES", self.install_providers_from_sources) _set_var(_env, "INSTALL_SELECTED_PROVIDERS", self.install_selected_providers) _set_var(_env, "ISSUE_ID", self.issue_id) _set_var(_env, "LOAD_DEFAULT_CONNECTIONS", self.load_default_connections) diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py b/dev/breeze/src/airflow_breeze/pre_commit_ids.py index 1b35b2c5bcaae..e5ad08afa1ccd 100644 --- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py +++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py @@ -71,9 +71,9 @@ "check-providers-init-file-missing", "check-providers-subpackages-init-file-exist", "check-pydevd-left-in-code", + "check-pyproject-toml-order", "check-revision-heads-map", "check-safe-filter-usage-in-html", - "check-setup-order", "check-start-date-not-used-in-defaults", "check-system-tests-present", "check-system-tests-tocs", @@ -133,6 +133,5 @@ "update-supported-versions", "update-vendored-in-k8s-json-schema", "update-version", - "validate-pyproject", "yamllint", ] diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py index 391edb6056bd0..00bb2203a0832 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -656,7 +656,7 @@ def update_release_notes( ) -> tuple[bool, bool]: """Updates generated files. - This includes the readme, changes, and/or setup.cfg/setup.py/manifest.in/provider_info. + This includes the readme, changes, and provider.yaml files. :param provider_package_id: id of the package :param reapply_templates_only: regenerate already released documentation only - without updating versions diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py index a158334f7db92..fa3a1b68451ba 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py @@ -260,7 +260,8 @@ def get_packages_list_to_act_on( return [ package.strip() for package in package_list_file.readlines() - if (package.strip() not in removed_provider_ids or include_removed) + if not package.strip().startswith("#") + and (package.strip() not in removed_provider_ids or include_removed) and (package.strip() not in not_ready_provider_ids or include_not_ready) ] elif provider_packages: diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index 5c8e98056d0ae..4e35cabe3be84 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -65,7 +65,7 @@ # Those are volumes that are mounted when MOUNT_SELECTED is chosen (which is the default when # entering Breeze. MOUNT_SELECTED prevents to mount the files that you can have accidentally added -# in your sources (or they were added automatically by setup.py etc.) to be mounted to container. +# in your sources (or they were added automatically by pyproject.toml) to be mounted to container. # This is important to get a "clean" environment for different python versions and to avoid # unnecessary slow-downs when you are mounting files on MacOS (which has very slow filesystem) # Any time you add a top-level folder in airflow that should also be added to container you should @@ -80,7 +80,6 @@ (".rat-excludes", "/opt/airflow/.rat-excludes"), ("BREEZE.rst", "/opt/airflow/BREEZE.rst"), ("LICENSE", "/opt/airflow/LICENSE"), - ("MANIFEST.in", "/opt/airflow/MANIFEST.in"), ("NOTICE", "/opt/airflow/NOTICE"), ("RELEASE_NOTES.rst", "/opt/airflow/RELEASE_NOTES.rst"), ("airflow", "/opt/airflow/airflow"), @@ -95,8 +94,6 @@ ("pyproject.toml", "/opt/airflow/pyproject.toml"), ("scripts", "/opt/airflow/scripts"), ("scripts/docker/entrypoint_ci.sh", "/entrypoint"), - ("setup.cfg", "/opt/airflow/setup.cfg"), - ("setup.py", "/opt/airflow/setup.py"), ("tests", "/opt/airflow/tests"), ("helm_tests", "/opt/airflow/helm_tests"), ("kubernetes_tests", "/opt/airflow/kubernetes_tests"), diff --git a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py index 8a8db025d5902..dcbb585db1b83 100644 --- a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py @@ -299,7 +299,6 @@ def _install_packages_in_k8s_virtualenv(): str(K8S_REQUIREMENTS.resolve()), ] env = os.environ.copy() - env["INSTALL_PROVIDERS_FROM_SOURCES"] = "true" capture_output = True if get_verbose(): capture_output = False diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index 49626a4668efe..a6eb386148551 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -53,16 +53,6 @@ LONG_PROVIDERS_PREFIX = "apache-airflow-providers-" -# TODO: use single source of truth for those -# for now we need to keep them in sync with the ones in setup.py -PREINSTALLED_PROVIDERS = [ - "common.sql", - "ftp", - "http", - "imap", - "sqlite", -] - class EntityType(Enum): Operators = "Operators" @@ -259,7 +249,7 @@ def get_available_packages( if include_not_ready: valid_states.add("not-ready") if include_regular: - valid_states.add("ready") + valid_states.update({"ready", "pre-release"}) if include_suspended: valid_states.add("suspended") if include_removed: diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 4db2972356628..3d236473922c5 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -113,13 +113,12 @@ def __hash__(self): r"^dev/.*\.py$", r"^Dockerfile", r"^scripts", - r"^setup.py", - r"^setup.cfg", + r"^pyproject.toml", r"^generated/provider_dependencies.json$", ], FileGroupForCi.PYTHON_PRODUCTION_FILES: [ r"^airflow/.*\.py", - r"^setup.py", + r"^pyproject.toml", ], FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES: [ r"^airflow/.*\.[jt]sx?", @@ -141,8 +140,6 @@ def __hash__(self): ], FileGroupForCi.SETUP_FILES: [ r"^pyproject.toml", - r"^setup.cfg", - r"^setup.py", r"^generated/provider_dependencies.json$", ], FileGroupForCi.DOC_FILES: [ diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 56e219062814c..2db8f657aec9c 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -426,7 +426,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - ("setup.py",), + ("pyproject.toml",), { "affected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, "all-python-versions": "['3.8', '3.9', '3.10', '3.11']", @@ -447,7 +447,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "mypy-packages": "['airflow', 'airflow/providers', 'docs', 'dev']", }, id="Everything should run - including all providers and upgrading to " - "newer requirements as setup.py changed and all Python versions", + "newer requirements as pyproject.toml changed and all Python versions", ) ), ( @@ -1168,20 +1168,12 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): id="Regular source changed", ), pytest.param( - ("setup.py",), + ("pyproject.toml",), { "upgrade-to-newer-dependencies": "true", }, (), - id="Setup.py changed", - ), - pytest.param( - ("setup.cfg",), - { - "upgrade-to-newer-dependencies": "true", - }, - (), - id="Setup.cfg changed", + id="pyproject.toml changed", ), pytest.param( ("airflow/providers/microsoft/azure/provider.yaml",), diff --git a/dev/hatch_build.py b/dev/hatch_build.py new file mode 100644 index 0000000000000..afbf35df63a03 --- /dev/null +++ b/dev/hatch_build.py @@ -0,0 +1,176 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import logging +import os +from pathlib import Path +from subprocess import run +from typing import Any, Callable, Iterable + +from hatchling.builders.config import BuilderConfig +from hatchling.builders.hooks.plugin.interface import BuildHookInterface +from hatchling.builders.plugin.interface import BuilderInterface +from hatchling.plugin.manager import PluginManager + +log = logging.getLogger(__name__) +log_level = logging.getLevelName(os.getenv("CUSTOM_AIRFLOW_BUILD_LOG_LEVEL", "INFO")) +log.setLevel(log_level) + +AIRFLOW_ROOT_PATH = Path(__file__).parent.parent.resolve() +GENERATED_PROVIDERS_DEPENDENCIES_FILE = AIRFLOW_ROOT_PATH / "generated" / "provider_dependencies.json" +DEV_DIR_PATH = AIRFLOW_ROOT_PATH / "dev" +PREINSTALLED_PROVIDERS_FILE = DEV_DIR_PATH / "airflow_pre_installed_providers.txt" +DEPENDENCIES = json.loads(GENERATED_PROVIDERS_DEPENDENCIES_FILE.read_text()) +PREINSTALLED_PROVIDER_IDS = [ + package.strip() + for package in PREINSTALLED_PROVIDERS_FILE.read_text().splitlines() + if not package.strip().startswith("#") +] + +# if providers are ready, we can preinstall them +PREINSTALLED_PROVIDERS = [ + f"apache-airflow-providers-{provider_id.replace('.','-')}" + for provider_id in PREINSTALLED_PROVIDER_IDS + if DEPENDENCIES[provider_id]["state"] == "ready" +] +# if provider is in not-ready or pre-release, we need to install its dependencies +# however we need to skip apache-airflow itself and potentially any providers that are +PREINSTALLED_NOT_READY_DEPS = [] +for provider_id in PREINSTALLED_PROVIDER_IDS: + if DEPENDENCIES[provider_id]["state"] not in ["ready", "suspended", "removed"]: + for dependency in DEPENDENCIES[provider_id]["deps"]: + if dependency.startswith("apache-airflow-providers"): + raise Exception( + f"The provider {provider_id} is pre-installed and it has as dependency " + f"to another provider {dependency}. This is not allowed. Pre-installed" + f"providers should only have 'apache-airflow' and regular dependencies." + ) + if not dependency.startswith("apache-airflow"): + PREINSTALLED_NOT_READY_DEPS.append(dependency) + + +class CustomBuild(BuilderInterface[BuilderConfig, PluginManager]): + """Custom build class for Airflow assets and git version.""" + + # Note that this name of the plugin MUST be `custom` - as long as we use it from custom + # hatch_build.py file and not from external plugin. See note in the: + # https://hatch.pypa.io/latest/plugins/build-hook/custom/#example + # + PLUGIN_NAME = "custom" + + def clean(self, directory: str, versions: Iterable[str]) -> None: + work_dir = Path(self.root) + commands = [ + ["rm -rf airflow/www/static/dist"], + ["rm -rf airflow/www/node_modules"], + ] + for cmd in commands: + run(cmd, cwd=work_dir.as_posix(), check=True, shell=True) + + def get_version_api(self) -> dict[str, Callable[..., str]]: + """Custom build target for standard package preparation.""" + return {"standard": self.build_standard} + + def build_standard(self, directory: str, artifacts: Any, **build_data: Any) -> str: + self.write_git_version() + work_dir = Path(self.root) + commands = [ + ["pre-commit run --hook-stage manual compile-www-assets --all-files"], + ] + for cmd in commands: + run(cmd, cwd=work_dir.as_posix(), check=True, shell=True) + dist_path = work_dir / "airflow" / "www" / "static" / "dist" + return dist_path.resolve().as_posix() + + def get_git_version(self) -> str: + """ + Return a version to identify the state of the underlying git repo. + + The version will indicate whether the head of the current git-backed working directory + is tied to a release tag or not. It will indicate the former with a 'release:{version}' + prefix and the latter with a '.dev0' suffix. Following the prefix will be a sha of the + current branch head. Finally, a "dirty" suffix is appended to indicate that uncommitted + changes are present. + + Example pre-release version: ".dev0+2f635dc265e78db6708f59f68e8009abb92c1e65". + Example release version: ".release+2f635dc265e78db6708f59f68e8009abb92c1e65". + Example modified release version: ".release+2f635dc265e78db6708f59f68e8009abb92c1e65".dirty + + :return: Found Airflow version in Git repo. + """ + try: + import git + + try: + repo = git.Repo(str(Path(self.root) / ".git")) + except git.NoSuchPathError: + log.warning(".git directory not found: Cannot compute the git version") + return "" + except git.InvalidGitRepositoryError: + log.warning("Invalid .git directory not found: Cannot compute the git version") + return "" + except ImportError: + log.warning("gitpython not found: Cannot compute the git version.") + return "" + if repo: + sha = repo.head.commit.hexsha + if repo.is_dirty(): + return f".dev0+{sha}.dirty" + # commit is clean + return f".release:{sha}" + return "no_git_version" + + def write_git_version(self) -> None: + """Write git version to git_version file.""" + version = self.get_git_version() + git_version_file = Path(self.root) / "airflow" / "git_version" + self.app.display(f"Writing version {version} to {git_version_file}") + git_version_file.write_text(version) + + +class CustomBuildHook(BuildHookInterface[BuilderConfig]): + """Custom build hook for Airflow - remove devel extras and adds preinstalled providers.""" + + def initialize(self, version: str, build_data: dict[str, Any]) -> None: + """ + This occurs immediately before each build. + + Any modifications to the build data will be seen by the build target. + """ + if version == "standard": + # remove devel dependencies from optional dependencies for standard packages + self.metadata.core._optional_dependencies = { + key: value + for (key, value) in self.metadata.core.optional_dependencies.items() + if not key.startswith("devel") and key not in ["doc", "doc-gen"] + } + # Replace editable dependencies with provider dependencies for provider packages + for dependency_id in DEPENDENCIES.keys(): + if DEPENDENCIES[dependency_id]["state"] != "ready": + continue + normalized_dependency_id = dependency_id.replace(".", "-") + self.metadata.core._optional_dependencies[normalized_dependency_id] = [ + f"apache-airflow-providers-{normalized_dependency_id}" + ] + # Inject preinstalled providers into the dependencies for standard packages + if self.metadata.core._dependencies: + for provider in PREINSTALLED_PROVIDERS: + self.metadata.core._dependencies.append(provider) + for dependency in PREINSTALLED_NOT_READY_DEPS: + self.metadata.core._dependencies.append(dependency) diff --git a/airflow/providers/installed_providers.txt b/dev/prod_image_installed_providers.txt similarity index 74% rename from airflow/providers/installed_providers.txt rename to dev/prod_image_installed_providers.txt index d416a8a47b68e..3aadc9d0b1f94 100644 --- a/airflow/providers/installed_providers.txt +++ b/dev/prod_image_installed_providers.txt @@ -1,3 +1,4 @@ +# List of all the providers installed in regular airflow PROD image amazon celery cncf.kubernetes diff --git a/dev/refresh_images.sh b/dev/refresh_images.sh index 2b11229aa06bc..a6d4e9a06fb99 100755 --- a/dev/refresh_images.sh +++ b/dev/refresh_images.sh @@ -36,7 +36,7 @@ breeze ci-image build \ rm -fv ./dist/* ./docker-context-files/* breeze release-management prepare-provider-packages \ - --package-list-file ./airflow/providers/installed_providers.txt \ + --package-list-file ./dev/prod_image_installed_providers.txt \ --package-format wheel \ --version-suffix-for-pypi dev0 diff --git a/dev/sign.sh b/dev/sign.sh index 178ba3ccb7c80..0d4c2861a5c76 100755 --- a/dev/sign.sh +++ b/dev/sign.sh @@ -17,9 +17,7 @@ # under the License. set -euo pipefail -# Use this to sign the tar balls generated from -# python setup.py sdist --formats=gztar -# ie. sign.sh +# Use this to sign the tar balls generated via hatch # you will still be required to type in your signing key password # or it needs to be available in your keychain diff --git a/docker_tests/docker_tests_utils.py b/docker_tests/docker_tests_utils.py index 8d95805f6459c..7eea98e9bd40b 100644 --- a/docker_tests/docker_tests_utils.py +++ b/docker_tests/docker_tests_utils.py @@ -73,7 +73,8 @@ def display_dependency_conflict_message(): It can mean one of those: 1) The main is currently broken (other PRs will fail with the same error) -2) You changed some dependencies in setup.py or setup.cfg and they are conflicting. +2) You changed some dependencies in pyproject.toml (either manually or automatically by pre-commit) + and they are conflicting. diff --git a/docker_tests/test_prod_image.py b/docker_tests/test_prod_image.py index e54de129d873f..4f6a93de42117 100644 --- a/docker_tests/test_prod_image.py +++ b/docker_tests/test_prod_image.py @@ -20,10 +20,10 @@ import os import subprocess from importlib.util import find_spec +from pathlib import Path import pytest -# isort:off (needed to workaround isort bug) from docker_tests.command_utils import run_command from docker_tests.constants import SOURCE_ROOT from docker_tests.docker_tests_utils import ( @@ -33,10 +33,20 @@ run_python_in_docker, ) -# isort:on (needed to workaround isort bug) -from setup import PREINSTALLED_PROVIDERS - -INSTALLED_PROVIDER_PATH = SOURCE_ROOT / "airflow" / "providers" / "installed_providers.txt" +DEV_DIR_PATH = SOURCE_ROOT / "dev" +AIRFLOW_PRE_INSTALLED_PROVIDERS_FILE_PATH = DEV_DIR_PATH / "airflow_pre_installed_providers.txt" +PROD_IMAGE_PROVIDERS_FILE_PATH = DEV_DIR_PATH / "prod_image_installed_providers.txt" +AIRFLOW_ROOT_PATH = Path(__file__).parents[2].resolve() +SLIM_IMAGE_PROVIDERS = [ + f"apache-airflow-providers-{provider_id.replace('.','-')}" + for provider_id in AIRFLOW_PRE_INSTALLED_PROVIDERS_FILE_PATH.read_text().splitlines() + if not provider_id.startswith("#") +] +REGULAR_IMAGE_PROVIDERS = [ + f"apache-airflow-providers-{provider_id.replace('.','-')}" + for provider_id in PROD_IMAGE_PROVIDERS_FILE_PATH.read_text().splitlines() + if not provider_id.startswith("#") +] class TestCommands: @@ -80,23 +90,22 @@ def test_bash_version(self): class TestPythonPackages: def test_required_providers_are_installed(self): if os.environ.get("TEST_SLIM_IMAGE"): - lines = PREINSTALLED_PROVIDERS + packages_to_install = set(SLIM_IMAGE_PROVIDERS) + package_file = AIRFLOW_PRE_INSTALLED_PROVIDERS_FILE_PATH else: - lines = (d.strip() for d in INSTALLED_PROVIDER_PATH.read_text().splitlines()) - packages_to_install = {f"apache-airflow-providers-{d.replace('.', '-')}" for d in lines} + packages_to_install = set(REGULAR_IMAGE_PROVIDERS) + package_file = PROD_IMAGE_PROVIDERS_FILE_PATH assert len(packages_to_install) != 0 - output = run_bash_in_docker( "airflow providers list --output json", stderr=subprocess.DEVNULL, return_output=True ) providers = json.loads(output) - packages_installed = {d["package_name"] for d in providers} + packages_installed = set(d["package_name"] for d in providers) assert len(packages_installed) != 0 - assert packages_to_install == packages_installed, ( - f"List of expected installed packages and image content mismatch. " - f"Check {INSTALLED_PROVIDER_PATH} file." - ) + assert ( + packages_to_install == packages_installed + ), f"List of expected installed packages and image content mismatch. Check {package_file} file." def test_pip_dependencies_conflict(self): try: diff --git a/docs/apache-airflow-providers-apache-hive/index.rst b/docs/apache-airflow-providers-apache-hive/index.rst index 63d3096112d54..8aca48be9801b 100644 --- a/docs/apache-airflow-providers-apache-hive/index.rst +++ b/docs/apache-airflow-providers-apache-hive/index.rst @@ -106,7 +106,7 @@ PIP package Version required ``apache-airflow-providers-common-sql`` ``>=1.3.1`` ``hmsclient`` ``>=0.1.0`` ``pandas`` ``>=0.17.1`` -``pyhive[hive_pure_sasl]`` ``>=0.7.0`` +``pyhive[hive-pure-sasl]`` ``>=0.7.0`` ``thrift`` ``>=0.9.2`` ======================================= ================== diff --git a/docs/apache-airflow-providers/howto/create-custom-providers.rst b/docs/apache-airflow-providers/howto/create-custom-providers.rst index 7b2a2fcf4c1dd..0e868ac253bd2 100644 --- a/docs/apache-airflow-providers/howto/create-custom-providers.rst +++ b/docs/apache-airflow-providers/howto/create-custom-providers.rst @@ -196,8 +196,8 @@ names, so preferably choose package that is in your "domain". You need to do the following to turn an existing Python package into a provider (see below for examples): -* Add the ``apache_airflow_provider`` entry point in the ``setup.cfg`` - this tells airflow where to get - the required provider metadata +* Add the ``apache_airflow_provider`` entry point in the ``pyproject.toml`` file - this tells airflow + where to get the required provider metadata * Create the function that you refer to in the first step as part of your package: this functions returns a dictionary that contains all meta-data about your provider package * If you want Airflow to link to documentation of your Provider in the providers page, make sure @@ -211,29 +211,27 @@ You need to do the following to turn an existing Python package into a provider .. exampleinclude:: /../../airflow/provider_info.schema.json :language: json -Example ``setup.cfg``: +Example ``pyproject.toml``: -.. code-block:: cfg +.. code-block:: toml - [options.entry_points] - # the function get_provider_info is defined in myproviderpackage.somemodule - apache_airflow_provider= - provider_info=myproviderpackage.somemodule:get_provider_info + [project.entry-points."apache_airflow_provider"] + provider_info = "airflow.providers.myproviderpackage.get_provider_info:get_provider_info" -Example ``myproviderpackage/somemodule.py``: -.. code-block:: Python +Example ``myproviderpackage/get_provider_info.py``: - def get_provider_info(): - return { - "package-name": "my-package-name", - "name": "name", - "description": "a description", - "hook-class-names": [ - "myproviderpackage.hooks.source.SourceHook", - ], - } +.. code-block:: Python + def get_provider_info(): + return { + "package-name": "my-package-name", + "name": "name", + "description": "a description", + "hook-class-names": [ + "myproviderpackage.hooks.source.SourceHook", + ], + } **Is there a convention for a connection id and type?** diff --git a/docs/apache-airflow/administration-and-deployment/modules_management.rst b/docs/apache-airflow/administration-and-deployment/modules_management.rst index 44b360fbc83ef..a619d2a06ee24 100644 --- a/docs/apache-airflow/administration-and-deployment/modules_management.rst +++ b/docs/apache-airflow/administration-and-deployment/modules_management.rst @@ -331,20 +331,12 @@ packages, so learning how to build your package is handy. Here is how to create your package: -1. Before starting, install the following packages: +1. Before starting, choose and install the build/packaging tool that you will use, ideally it should be +PEP-621 compliant to be able to switch to a different tool easily. +The popular choices are setuptools, poetry, hatch, flit. -``setuptools``: setuptools is a package development process library designed -for creating and distributing Python packages. - -``wheel``: The wheel package provides a bdist_wheel command for setuptools. It -creates .whl file which is directly installable through the ``pip install`` -command. We can then upload the same file to `PyPI `_. - -.. code-block:: bash - - pip install --upgrade pip setuptools wheel - -2. Create the package directory - in our case, we will call it ``airflow_operators``. +2. Decide when you create your own package. create the package directory - in our case, + we will call it ``airflow_operators``. .. code-block:: bash @@ -358,42 +350,16 @@ command. We can then upload the same file to `PyPI `_. When we import this package, it should print the above message. -4. Create ``setup.py``: - -.. code-block:: python - - import setuptools - - setuptools.setup( - name="airflow_operators", - packages=setuptools.find_packages(), - ) - -5. Build the wheel: - -.. code-block:: bash - - python setup.py bdist_wheel +4. Create ``pyproject.toml`` and fill it with build tool configuration of your choice +See `The pyproject.toml specification `__ -This will create a few directories in the project and the overall structure will -look like following: +5. Build your project using the tool of your choice. For example for hatch it can be: .. code-block:: bash - . - ├── airflow_operators - │ ├── __init__.py - ├── airflow_operators.egg-info - │ ├── PKG-INFO - │ ├── SOURCES.txt - │ ├── dependency_links.txt - │ └── top_level.txt - ├── build - │ └── bdist.macosx-10.15-x86_64 - ├── dist - │ └── airflow_operators-0.0.0-py3-none-any.whl - └── setup.py + hatch build -t wheel +This will create .whl file in your ``dist`` folder 6. Install the .whl file using pip: diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index 0b6a51fd32aa4..ffbe9225246ca 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -18,17 +18,22 @@ Reference for package extras '''''''''''''''''''''''''''' -Here's the list of all the extra dependencies of Apache Airflow. +Airflow has a number of optional "extras" that you can use to add features to your installation when you +are installing Airflow. Those extras are a good way for the users to manage their installation, but also +they are useful for contributors to airflow when they want to contribute some of the features - including +optional integrations of Airflow - via providers. -The entries with ``*`` in the ``Preinstalled`` column indicate that those extras (providers) are always -pre-installed when Airflow is installed. +.. warning:: + + Traditionally in Airflow some of the extras used `.` and `_` to separate the parts of the extra name. + This was not PEP-685 normalized name and we opted to change it to to `-` for all our extras, Expecting that + PEP-685 will be implemented in full by `pip` and other tools we change all our extras to use `-` as + separator even if in some cases it will introduce warnings (the warnings are harmless). This is future + proof approach. It's also fully backwards-compatible if you use `_` or `.` in your extras, but we + recommend using `-` as separator in the future. -.. note:: - You can disable automated installation of the providers with extras when installing Airflow. You need to - have ``INSTALL_PROVIDERS_FROM_SOURCES`` environment variable to ``true`` before running ``pip install`` - command. Contributors need to set it, if they are installing Airflow locally, and want to develop - providers directly via Airflow sources. This variable is automatically set in ``Breeze`` - development environment. Setting this variable is not needed in editable mode (``pip install -e``). + +Here's the list of all the extra dependencies of Apache Airflow. Core Airflow extras ------------------- @@ -46,11 +51,13 @@ python dependencies for the provided package. +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | cgroups | ``pip install 'apache-airflow[cgroups]'`` | Needed To use CgroupTaskRunner | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ -| deprecated_api | ``pip install 'apache-airflow[deprecated_api]'`` | Deprecated, experimental API that is replaced with the new REST API | +| deprecated-api | ``pip install 'apache-airflow[deprecated-api]'`` | Deprecated, experimental API that is replaced with the new REST API | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ -| github_enterprise | ``pip install 'apache-airflow[github_enterprise]'`` | GitHub Enterprise auth backend | +| github-enterprise | ``pip install 'apache-airflow[github-enterprise]'`` | GitHub Enterprise auth backend | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ -| google_auth | ``pip install 'apache-airflow[google_auth]'`` | Google auth backend | +| google-auth | ``pip install 'apache-airflow[google-auth]'`` | Google auth backend | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ +| graphviz | ``pip install 'apache-airflow[graphvis]'`` | Enables exporting DAGs to .dot graphical output | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | graphviz | ``pip install 'apache-airflow[graphviz]'`` | Graphviz renderer for converting DAG to graphical output | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ @@ -99,7 +106,7 @@ with a consistent set of dependencies based on constraint files provided by Airf .. code-block:: bash :substitutions: - pip install apache-airflow[google,amazon,apache.spark]==|version| \ + pip install apache-airflow[google,amazon,apache-spark]==|version| \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-|version|/constraints-3.8.txt" Note, that this will install providers in the versions that were released at the time of Airflow |version| release. You can later @@ -116,40 +123,39 @@ custom bash/python providers). +---------------------+-----------------------------------------------------+------------------------------------------------+ | extra | install command | enables | +=====================+=====================================================+================================================+ -| apache.atlas | ``pip install 'apache-airflow[apache.atlas]'`` | Apache Atlas | +| apache-atlas | ``pip install 'apache-airflow[apache-atlas]'`` | Apache Atlas | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.beam | ``pip install 'apache-airflow[apache.beam]'`` | Apache Beam operators & hooks | +| apache-beam | ``pip install 'apache-airflow[apache-beam]'`` | Apache Beam operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.cassandra | ``pip install 'apache-airflow[apache.cassandra]'`` | Cassandra related operators & hooks | +| apache-cassandra | ``pip install 'apache-airflow[apache-cassandra]'`` | Cassandra related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.drill | ``pip install 'apache-airflow[apache.drill]'`` | Drill related operators & hooks | +| apache-drill | ``pip install 'apache-airflow[apache-drill]'`` | Drill related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.druid | ``pip install 'apache-airflow[apache.druid]'`` | Druid related operators & hooks | +| apache-druid | ``pip install 'apache-airflow[apache-druid]'`` | Druid related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.flink | ``pip install 'apache-airflow[apache.flink]'`` | Flink related operators & hooks | +| apache-flink | ``pip install 'apache-airflow[apache-flink]'`` | Flink related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.hdfs | ``pip install 'apache-airflow[apache.hdfs]'`` | HDFS hooks and operators | +| apache-hdfs | ``pip install 'apache-airflow[apache-hdfs]'`` | HDFS hooks and operators | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.hive | ``pip install 'apache-airflow[apache.hive]'`` | All Hive related operators | +| apache-hive | ``pip install 'apache-airflow[apache-hive]'`` | All Hive related operators | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.impala | ``pip install 'apache-airflow[apache.impala]'`` | All Impala related operators & hooks | +| apache-impala | ``pip install 'apache-airflow[apache-impala]'`` | All Impala related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.kafka | ``pip install 'apache-airflow[apache.kafka]'`` | All Kafka related operators & hooks | +| apache-kafka | ``pip install 'apache-airflow[apache-kafka]'`` | All Kafka related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.kylin | ``pip install 'apache-airflow[apache.kylin]'`` | All Kylin related operators & hooks | +| apache-kylin | ``pip install 'apache-airflow[apache-kylin]'`` | All Kylin related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.livy | ``pip install 'apache-airflow[apache.livy]'`` | All Livy related operators, hooks & sensors | +| apache-livy | ``pip install 'apache-airflow[apache-livy]'`` | All Livy related operators, hooks & sensors | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.pig | ``pip install 'apache-airflow[apache.pig]'`` | All Pig related operators & hooks | +| apache-pig | ``pip install 'apache-airflow[apache-pig]'`` | All Pig related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.pinot | ``pip install 'apache-airflow[apache.pinot]'`` | All Pinot related hooks | +| apache-pinot | ``pip install 'apache-airflow[apache-pinot]'`` | All Pinot related hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.spark | ``pip install 'apache-airflow[apache.spark]'`` | All Spark related operators & hooks | +| apache-spark | ``pip install 'apache-airflow[apache-spark]'`` | All Spark related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.webhdfs | ``pip install 'apache-airflow[apache.webhdfs]'`` | HDFS hooks and operators | +| apache-webhdfs | ``pip install 'apache-airflow[apache-webhdfs]'`` | HDFS hooks and operators | +---------------------+-----------------------------------------------------+------------------------------------------------+ - External Services extras ======================== @@ -168,9 +174,9 @@ These are extras that add dependencies needed for integration with external serv +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | asana | ``pip install 'apache-airflow[asana]'`` | Asana hooks and operators | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ -| atlassian.jira | ``pip install 'apache-airflow[atlassian.jira]'`` | Jira hooks and operators | +| atlassian-jira | ``pip install 'apache-airflow[atlassian-jira]'`` | Jira hooks and operators | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ -| azure | ``pip install 'apache-airflow[microsoft.azure]'`` | Microsoft Azure | +| microsoft-azure | ``pip install 'apache-airflow[microsoft-azure]'`` | Microsoft Azure | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | cloudant | ``pip install 'apache-airflow[cloudant]'`` | Cloudant hook | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ @@ -180,7 +186,7 @@ These are extras that add dependencies needed for integration with external serv +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | datadog | ``pip install 'apache-airflow[datadog]'`` | Datadog hooks and sensors | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ -| dbt.cloud | ``pip install 'apache-airflow[dbt.cloud]'`` | dbt Cloud hooks and operators | +| dbt-cloud | ``pip install 'apache-airflow[dbt-cloud]'`` | dbt Cloud hooks and operators | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | dingding | ``pip install 'apache-airflow[dingding]'`` | Dingding hooks and sensors | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ @@ -243,7 +249,7 @@ Some of those enable Airflow to use executors to run tasks with them - other tha +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | celery | ``pip install 'apache-airflow[celery]'`` | Celery dependencies and sensor | CeleryExecutor, CeleryKubernetesExecutor | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ -| cncf.kubernetes | ``pip install 'apache-airflow[cncf.kubernetes]'`` | Kubernetes client libraries, KubernetesPodOperator & friends | KubernetesExecutor, LocalKubernetesExecutor | +| cncf-kubernetes | ``pip install 'apache-airflow[cncf-kubernetes]'`` | Kubernetes client libraries, KubernetesPodOperator & friends | KubernetesExecutor, LocalKubernetesExecutor | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | docker | ``pip install 'apache-airflow[docker]'`` | Docker hooks and operators | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ @@ -259,7 +265,7 @@ Some of those enable Airflow to use executors to run tasks with them - other tha +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | mongo | ``pip install 'apache-airflow[mongo]'`` | Mongo hooks and operators | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ -| microsoft.mssql | ``pip install 'apache-airflow[microsoft.mssql]'`` | Microsoft SQL Server operators and hook. | | +| microsoft-mssql | ``pip install 'apache-airflow[microsoft-mssql]'`` | Microsoft SQL Server operators and hook. | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | mysql | ``pip install 'apache-airflow[mysql]'`` | MySQL operators and hook | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ @@ -290,12 +296,16 @@ Other extras These are extras that provide support for integration with external systems via some - usually - standard protocols. +The entries with ``*`` in the ``Preinstalled`` column indicate that those extras (providers) are always +pre-installed when Airflow is installed. + + +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | extra | install command | enables | Preinstalled | +=====================+=====================================================+======================================+==============+ -| common.io | ``pip install 'apache-airflow[common.io]'`` | Core IO Operators | | +| common-io | ``pip install 'apache-airflow[common-io]'`` | Core IO Operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -| common.sql | ``pip install 'apache-airflow[common.sql]'`` | Core SQL Operators | * | +| common-sql | ``pip install 'apache-airflow[common-sql]'`` | Core SQL Operators | * | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | ftp | ``pip install 'apache-airflow[ftp]'`` | FTP hooks and operators | * | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ @@ -307,9 +317,9 @@ These are extras that provide support for integration with external systems via +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | jdbc | ``pip install 'apache-airflow[jdbc]'`` | JDBC hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -| microsoft.psrp | ``pip install 'apache-airflow[microsoft.psrp]'`` | PSRP hooks and operators | | +| microsoft-psrp | ``pip install 'apache-airflow[microsoft-psrp]'`` | PSRP hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -| microsoft.winrm | ``pip install 'apache-airflow[microsoft.winrm]'`` | WinRM hooks and operators | | +| microsoft-winrm | ``pip install 'apache-airflow[microsoft-winrm]'`` | WinRM hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | openlineage | ``pip install 'apache-airflow[openlineage]'`` | Sending OpenLineage events | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ @@ -326,44 +336,90 @@ These are extras that provide support for integration with external systems via | ssh | ``pip install 'apache-airflow[ssh]'`` | SSH hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -Bundle extras -------------- +Production Bundle extras +------------------------- -These are extras that install one or more extras as a bundle. Note that these extras should only be used for "development" version -of Airflow - i.e. when Airflow is installed from sources. Because of the way how bundle extras are constructed they might not -work when airflow is installed from 'PyPI`. - -If you want to install Airflow from PyPI with "all" extras (which should basically be never needed - you almost never need all extras from Airflow), -you need to list explicitly all the non-bundle extras that you want to install. +These are extras that install one or more extras as a bundle. +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ | extra | install command | enables | +=====================+=====================================================+========================================================================+ | all | ``pip install 'apache-airflow[all]'`` | All Airflow user facing features (no devel and doc requirements) | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| all_dbs | ``pip install 'apache-airflow[all_dbs]'`` | All database integrations | +| all-core | ``pip install 'apache-airflow[all-core]'`` | All core airflow features that do not require installing providers | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel | ``pip install 'apache-airflow[devel]'`` | Minimum development dependencies (without Hadoop, Kerberos, providers) | +| all-dbs | ``pip install 'apache-airflow[all-dbs]'`` | All database integrations | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel_hadoop | ``pip install 'apache-airflow[devel_hadoop]'`` | Adds Hadoop stack libraries to ``devel`` dependencies | + +Development extras +------------------ + +The ``devel`` extras only make sense in editable mode. Users of Airflow should not be using them, unless they +start contributing back and install airflow from sources. Those extras are only available in Airflow when +it is installed in editable mode from sources (``pip install -e .[devel,EXTRAS]``). + +Devel extras +============ + +The devel extras do not install dependencies for features of Airflow, but add functionality that is needed to +develop Airflow, such as running tests, static checks. + ++---------------------+-----------------------------------------+------------------------------------------------------+ +| extra | install command | enables | ++=====================+=========================================+======================================================+ +| devel-debuggers | pip install -e '.[devel-debuggers]' | Adds all test libraries needed to test debuggers | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel-devscripts | pip install -e '.[devel-devscripts]' | Adds all test libraries needed to test devel scripts | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel-duckdb | pip install -e '.[devel-duckdb]' | Adds all test libraries needed to test duckdb | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel-iceberg | pip install -e '.[devel-iceberg]' | Adds all test libraries needed to test iceberg | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel-mypy | pip install -e '.[devel-mypy]' | Adds all test libraries needed to test mypy | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel-sentry | pip install -e '.[devel-sentry]' | Adds all test libraries needed to test sentry | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel-static-checks | pip install -e '.[devel-static-checks]' | Adds all test libraries needed to test static_checks | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel-tests | pip install -e '.[devel-tests]' | Adds all test libraries needed to test tests | ++---------------------+-----------------------------------------+------------------------------------------------------+ + +Bundle devel extras +=================== + +Those are extras that bundle devel, editable and doc extras together to make it easy to install them together in a single installation. Some of the +extras are more difficult to install on certain systems (such as ARM MacBooks) because they require system level dependencies to be installed. + +Note that ``pip install -e ".[devel]"`` should be run at least once, the first time you initialize the editable environment in order +to get minimal, complete test environment with usual tools and dependencies needed for unit testing. + ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| extra | install command | enables | ++=====================+=====================================================+========================================================================+ +| devel | ``pip install -e '.[devel]'`` | Minimum development dependencies - minimal, complete test environment | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel_all | ``pip install 'apache-airflow[devel_all]'`` | Everything needed for development including Hadoop and providers | +| devel-hadoop | ``pip install -e '.[devel-hadoop]'`` | Adds Hadoop stack libraries ``devel`` dependencies | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel_ci | ``pip install 'apache-airflow[devel_ci]'`` | All dependencies required for CI tests (same as ``devel_all``) | +| devel-all-dbs | ``pip install -e '.[devel-all-dbs]'`` | Adds all libraries needed to test database providers | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| devel-all | ``pip install -e '.[devel-all]'`` | Everything needed for development including Hadoop, all devel extras, | +| | | all doc extras. Generally: all possible dependencies | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| devel-ci | ``pip install -e '.[devel-ci]'`` | All dependencies required for CI tests (same as ``devel-all``) | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ Doc extras ----------- +========== Those are the extras that are needed to generated documentation for Airflow. This is used for development time only -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| extra | install command | enables | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| doc | ``pip install 'apache-airflow[doc]'`` | Packages needed to build docs (included in ``devel``) | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| doc_gen | ``pip install 'apache-airflow[doc_gen]'`` | Packages needed to generate er diagrams (included in ``devel_all``) | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| extra | install command | enables | ++=====================+=====================================================+========================================================================+ +| doc | ``pip install -e '.[doc]'`` | Packages needed to build docs (included in ``devel``) | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| doc-gen | ``pip install -e '.[doc-gen]'`` | Packages needed to generate er diagrams (included in ``devel-all``) | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ Deprecated 1.10 extras @@ -378,37 +434,37 @@ so there is no replacement for ``crypto`` extra. +---------------------+-----------------------------+ | Deprecated extra | Extra to be used instead | +=====================+=============================+ -| atlas | apache.atlas | +| atlas | apache-atlas | +---------------------+-----------------------------+ | aws | amazon | +---------------------+-----------------------------+ -| azure | microsoft.azure | +| azure | microsoft-azure | +---------------------+-----------------------------+ -| cassandra | apache.cassandra | +| cassandra | apache-cassandra | +---------------------+-----------------------------+ | crypto | | +---------------------+-----------------------------+ -| druid | apache.druid | +| druid | apache-druid | +---------------------+-----------------------------+ | gcp | google | +---------------------+-----------------------------+ | gcp_api | google | +---------------------+-----------------------------+ -| hdfs | apache.hdfs | +| hdfs | apache-hdfs | +---------------------+-----------------------------+ -| hive | apache.hive | +| hive | apache-hive | +---------------------+-----------------------------+ -| kubernetes | cncf.kubernetes | +| kubernetes | cncf-kubernetes | +---------------------+-----------------------------+ -| mssql | microsoft.mssql | +| mssql | microsoft-mssql | +---------------------+-----------------------------+ -| pinot | apache.pinot | +| pinot | apache-pinot | +---------------------+-----------------------------+ | s3 | amazon | +---------------------+-----------------------------+ -| spark | apache.spark | +| spark | apache-spark | +---------------------+-----------------------------+ -| webhdfs | apache.webhdfs | +| webhdfs | apache-webhdfs | +---------------------+-----------------------------+ -| winrm | microsoft.winrm | +| winrm | microsoft-winrm | +---------------------+-----------------------------+ diff --git a/docs/apache-airflow/installation/installing-from-pypi.rst b/docs/apache-airflow/installation/installing-from-pypi.rst index ed345f0cc1f38..6c379f6639d38 100644 --- a/docs/apache-airflow/installation/installing-from-pypi.rst +++ b/docs/apache-airflow/installation/installing-from-pypi.rst @@ -84,9 +84,9 @@ Airflow™ installation can be tricky because Airflow is both a library and an a Libraries usually keep their dependencies open and applications usually pin them, but we should do neither and both at the same time. We decided to keep our dependencies as open as possible -(in ``setup.cfg`` and ``setup.py``) so users can install different -version of libraries if needed. This means that from time to time plain ``pip install apache-airflow`` will -not work or will produce an unusable Airflow installation. +(in ``pyproject.toml``) so users can install different version of libraries if needed. This means that +from time to time plain ``pip install apache-airflow`` will not work or will produce an unusable +Airflow installation. Reproducible Airflow installation ================================= diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst index 73c30a3892863..4ea605a6dec3c 100644 --- a/docs/docker-stack/build-arg-ref.rst +++ b/docs/docker-stack/build-arg-ref.rst @@ -84,19 +84,19 @@ List of default extras in the production Dockerfile: * amazon * async * celery -* cncf.kubernetes -* common.io +* cncf-kubernetes +* common-io * docker * elasticsearch * ftp * google -* google_auth +* google-auth * graphviz * grpc * hashicorp * http * ldap -* microsoft.azure +* microsoft-azure * mysql * odbc * openlineage @@ -225,12 +225,6 @@ You can see some examples of those in: | | | for Airflow version installation - for | | | | example ``<2.0.2`` for automated builds. | +------------------------------------+------------------------------------------+------------------------------------------+ -| ``INSTALL_PROVIDERS_FROM_SOURCES`` | ``false`` | If set to ``true`` and image is built | -| | | from sources, all provider packages are | -| | | installed from sources rather than from | -| | | packages. It has no effect when | -| | | installing from PyPI or GitHub repo. | -+------------------------------------+------------------------------------------+------------------------------------------+ | ``AIRFLOW_CONSTRAINTS_LOCATION`` | | If not empty, it will override the | | | | source of the constraints with the | | | | specified URL or file. Note that the | @@ -260,7 +254,7 @@ Pre-caching PIP dependencies ............................ When image is build from PIP, by default pre-caching of PIP dependencies is used. This is in order to speed-up incremental -builds during development. When pre-cached PIP dependencies are used and ``setup.py`` or ``setup.cfg`` changes, the +builds during development. When pre-cached PIP dependencies are used and ``pyproject.toml`` changes, the PIP dependencies are already pre-installed, thus resulting in much faster image rebuild. This is purely an optimization of time needed to build the images and should be disabled if you want to install Airflow from Docker context files. diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index e7b4b509b93aa..ef260e0d2b317 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -458,6 +458,7 @@ dest dev devel DevOps +devscripts devtools df dicts @@ -514,6 +515,7 @@ dsn dttm dtypes du +duckdb durations dylib Dynamodb diff --git a/generated/PYPI_README.md b/generated/PYPI_README.md index d11eb11cc57db..30ad2e2f41d90 100644 --- a/generated/PYPI_README.md +++ b/generated/PYPI_README.md @@ -105,7 +105,7 @@ Documentation for dependent projects like provider packages, Docker image, Helm We publish Apache Airflow as `apache-airflow` package in PyPI. Installing it however might be sometimes tricky because Airflow is a bit of both a library and application. Libraries usually keep their dependencies open, and applications usually pin them, but we should do neither and both simultaneously. We decided to keep -our dependencies as open as possible (in `setup.py`) so users can install different versions of libraries +our dependencies as open as possible (in `pyproject.toml`) so users can install different versions of libraries if needed. This means that `pip install apache-airflow` will not work from time to time or will produce unusable Airflow installation. diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index d42ae80ee407f..e62c8521dafc2 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -4,6 +4,7 @@ "apache-airflow-providers-http", "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [ "http" ], @@ -17,6 +18,7 @@ "apache-airflow>=2.6.0", "oss2>=2.14.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -35,6 +37,16 @@ "sqlalchemy_redshift>=0.8.6", "watchtower>=2.0.1,<4" ], + "devel-deps": [ + "aiobotocore>=2.7.0", + "aws_xray_sdk>=2.12.0", + "moto[cloudformation,glue]>=4.2.12", + "mypy-boto3-appflow>=1.33.0", + "mypy-boto3-rds>=1.33.0", + "mypy-boto3-redshift-data>=1.33.0", + "mypy-boto3-s3>=1.33.0", + "s3fs>=2023.10.0" + ], "cross-providers-deps": [ "apache.hive", "cncf.kubernetes", @@ -59,6 +71,7 @@ "apache-beam>=2.53.0", "pyarrow>=14.0.1" ], + "devel-deps": [], "cross-providers-deps": [ "google" ], @@ -70,6 +83,7 @@ "apache-airflow>=2.6.0", "cassandra-driver>=3.13.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -80,6 +94,7 @@ "apache-airflow>=2.6.0", "sqlalchemy-drill>=1.1.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -92,6 +107,7 @@ "apache-airflow>=2.6.0", "pydruid>=0.4.1" ], + "devel-deps": [], "cross-providers-deps": [ "apache.hive", "common.sql" @@ -105,6 +121,7 @@ "apache-airflow>=2.6.0", "cryptography>=2.0.0" ], + "devel-deps": [], "cross-providers-deps": [ "cncf.kubernetes" ], @@ -116,6 +133,7 @@ "apache-airflow>=2.6.0", "hdfs[avro,dataframe,kerberos]>=2.0.4" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -126,9 +144,10 @@ "apache-airflow>=2.6.0", "hmsclient>=0.1.0", "pandas>=1.2.5", - "pyhive[hive_pure_sasl]>=0.7.0", + "pyhive[hive-pure-sasl]>=0.7.0", "thrift>=0.9.2" ], + "devel-deps": [], "cross-providers-deps": [ "amazon", "common.sql", @@ -146,6 +165,7 @@ "apache-airflow>=2.6.0", "impyla>=0.18.0,<1.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -158,6 +178,7 @@ "asgiref", "confluent-kafka>=1.8.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -167,6 +188,7 @@ "apache-airflow>=2.6.0", "kylinpy>=2.6" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -178,6 +200,7 @@ "apache-airflow>=2.6.0", "asgiref" ], + "devel-deps": [], "cross-providers-deps": [ "http" ], @@ -188,6 +211,7 @@ "deps": [ "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -198,6 +222,7 @@ "apache-airflow>=2.6.0", "pinotdb>0.4.7" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -210,6 +235,7 @@ "grpcio-status>=1.59.0", "pyspark" ], + "devel-deps": [], "cross-providers-deps": [ "cncf.kubernetes" ], @@ -220,6 +246,7 @@ "deps": [ "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "suspended" @@ -229,6 +256,7 @@ "apache-airflow>=2.6.0", "apprise" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -238,6 +266,7 @@ "apache-airflow>=2.6.0", "python-arango>=7.3.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -247,6 +276,7 @@ "apache-airflow>=2.6.0", "asana>=0.10,<4.0.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -256,6 +286,7 @@ "apache-airflow>=2.6.0", "atlassian-python-api>=1.14.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -267,6 +298,7 @@ "flower>=1.0.0", "google-re2>=1.0" ], + "devel-deps": [], "cross-providers-deps": [ "cncf.kubernetes" ], @@ -278,6 +310,7 @@ "apache-airflow>=2.6.0", "cloudant>=2.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -292,6 +325,7 @@ "kubernetes>=21.7.0,<24", "kubernetes_asyncio>=18.20.1,<25" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -301,6 +335,7 @@ "apache-airflow>=2.6.0", "cohere>=4.37" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -309,6 +344,7 @@ "deps": [ "apache-airflow>=2.8.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -318,6 +354,7 @@ "apache-airflow>=2.6.0", "sqlparse>=0.4.2" ], + "devel-deps": [], "cross-providers-deps": [ "openlineage" ], @@ -331,6 +368,7 @@ "dask>=2.9.0,!=2022.10.1,!=2023.5.0", "distributed>=2.11.1,!=2023.5.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "suspended" @@ -343,6 +381,9 @@ "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0", "requests>=2.27,<3" ], + "devel-deps": [ + "deltalake>=0.12.0" + ], "cross-providers-deps": [ "common.sql" ], @@ -354,6 +395,7 @@ "apache-airflow>=2.6.0", "datadog>=0.14.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -365,6 +407,7 @@ "apache-airflow>=2.6.0", "asgiref" ], + "devel-deps": [], "cross-providers-deps": [ "http", "openlineage" @@ -377,6 +420,7 @@ "apache-airflow-providers-http", "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [ "http" ], @@ -388,6 +432,7 @@ "apache-airflow-providers-http", "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [ "http" ], @@ -400,6 +445,7 @@ "docker>=5.0.3", "python-dotenv>=0.21.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -410,6 +456,7 @@ "apache-airflow>=2.6.0", "elasticsearch>=8.10,<9" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -423,6 +470,7 @@ "pandas>=1.2.5", "pyexasol>=0.5.1" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -434,6 +482,7 @@ "apache-airflow>=2.6.0", "facebook-business>=6.0.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -442,6 +491,7 @@ "deps": [ "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [ "openlineage" ], @@ -453,6 +503,7 @@ "PyGithub!=1.58", "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -519,6 +570,7 @@ "sqlalchemy-bigquery>=1.2.1", "sqlalchemy-spanner>=1.6.2" ], + "devel-deps": [], "cross-providers-deps": [ "amazon", "apache.beam", @@ -548,6 +600,7 @@ "google-auth>=1.0.0, <3.0.0", "grpcio>=1.15.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -557,6 +610,7 @@ "apache-airflow>=2.6.0", "hvac>=1.1.0" ], + "devel-deps": [], "cross-providers-deps": [ "google" ], @@ -571,6 +625,7 @@ "requests>=2.26.0", "requests_toolbelt" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -579,6 +634,7 @@ "deps": [ "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -589,6 +645,7 @@ "influxdb-client>=1.19.0", "requests>=2.26.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -599,6 +656,7 @@ "apache-airflow>=2.6.0", "jaydebeapi>=1.1.1" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -610,6 +668,7 @@ "apache-airflow>=2.6.0", "python-jenkins>=1.0.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -639,6 +698,9 @@ "azure-synapse-artifacts>=0.17.0", "azure-synapse-spark" ], + "devel-deps": [ + "pywinrm" + ], "cross-providers-deps": [ "google", "oracle", @@ -653,6 +715,7 @@ "apache-airflow>=2.6.0", "pymssql>=2.1.8" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -664,6 +727,7 @@ "apache-airflow>=2.6.0", "pypsrp>=0.8.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -673,6 +737,7 @@ "apache-airflow>=2.6.0", "pywinrm>=0.4" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -683,6 +748,9 @@ "dnspython>=1.13.0", "pymongo>=3.6.0" ], + "devel-deps": [ + "mongomock" + ], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -694,6 +762,7 @@ "mysql-connector-python>=8.0.29", "mysqlclient>=1.3.6" ], + "devel-deps": [], "cross-providers-deps": [ "amazon", "common.sql", @@ -710,6 +779,7 @@ "apache-airflow>=2.6.0", "neo4j>=4.2.1" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -720,6 +790,7 @@ "apache-airflow>=2.6.0", "pyodbc" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -731,6 +802,7 @@ "apache-airflow>=2.6.0", "openai[datalib]>=1.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -739,6 +811,7 @@ "deps": [ "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -751,6 +824,7 @@ "openlineage-integration-common>=0.28.0", "openlineage-python>=0.28.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -762,6 +836,7 @@ "apache-airflow>=2.6.0", "opensearch-py>=2.2.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -771,6 +846,7 @@ "apache-airflow>=2.6.0", "opsgenie-sdk>=2.1.5" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -781,6 +857,7 @@ "apache-airflow>=2.6.0", "oracledb>=1.0.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -792,6 +869,7 @@ "apache-airflow>=2.6.0", "pdpyras>=4.1.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -803,6 +881,7 @@ "papermill[all]>=2.4.0", "scrapbook[all]" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -813,6 +892,7 @@ "apache-airflow>=2.6.0", "pgvector>=0.2.3" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql", "postgres" @@ -825,6 +905,7 @@ "apache-airflow>=2.6.0", "pinecone-client>=2.2.4" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -834,6 +915,7 @@ "apache-airflow>=2.6.0", "arrow>=0.16.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "suspended" @@ -844,6 +926,7 @@ "apache-airflow>=2.6.0", "psycopg2-binary>=2.8.0" ], + "devel-deps": [], "cross-providers-deps": [ "amazon", "common.sql", @@ -859,6 +942,7 @@ "pandas>=1.2.5", "presto-python-client>=0.8.4" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql", "google" @@ -871,6 +955,7 @@ "apache-airflow>=2.6.0", "redis>=4.5.2,<5.0.0,!=4.5.5" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -881,6 +966,7 @@ "pandas>=1.2.5", "simple-salesforce>=1.0.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -890,6 +976,7 @@ "apache-airflow>=2.6.0", "smbprotocol>=1.5.0" ], + "devel-deps": [], "cross-providers-deps": [ "google" ], @@ -901,6 +988,7 @@ "analytics-python>=1.2.9", "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -910,6 +998,7 @@ "apache-airflow>=2.6.0", "sendgrid>=6.0.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -920,6 +1009,7 @@ "apache-airflow>=2.6.0", "paramiko>=2.8.0" ], + "devel-deps": [], "cross-providers-deps": [ "openlineage", "ssh" @@ -932,6 +1022,7 @@ "apache-airflow>=2.6.0", "spython>=0.0.56" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -942,6 +1033,7 @@ "apache-airflow>=2.6.0", "slack_sdk>=3.0.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -952,6 +1044,7 @@ "deps": [ "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -963,6 +1056,7 @@ "snowflake-connector-python>=2.7.8", "snowflake-sqlalchemy>=1.1.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql", "openlineage" @@ -975,6 +1069,7 @@ "apache-airflow-providers-common-sql>=1.3.1", "apache-airflow>=2.6.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -987,6 +1082,7 @@ "paramiko>=2.6.0", "sshtunnel>=0.3.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -996,6 +1092,7 @@ "apache-airflow>=2.6.0", "tableauserverclient" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -1004,6 +1101,9 @@ "deps": [ "apache-airflow>=2.6.0" ], + "devel-deps": [ + "pyiceberg>=0.5.0" + ], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -1013,6 +1113,7 @@ "apache-airflow>=2.6.0", "python-telegram-bot>=20.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -1024,6 +1125,7 @@ "pandas>=1.2.5", "trino>=0.318.0" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql", "google", @@ -1038,6 +1140,7 @@ "apache-airflow>=2.6.0", "vertica-python>=0.5.1" ], + "devel-deps": [], "cross-providers-deps": [ "common.sql" ], @@ -1050,6 +1153,7 @@ "pandas>=1.2.5", "weaviate-client>=3.24.2" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -1059,6 +1163,7 @@ "apache-airflow>=2.6.0", "yandexcloud>=0.228.0" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" @@ -1068,6 +1173,7 @@ "apache-airflow>=2.6.0", "zenpy>=2.0.24" ], + "devel-deps": [], "cross-providers-deps": [], "excluded-python-versions": [], "state": "ready" diff --git a/images/breeze/output_ci-image_build.svg b/images/breeze/output_ci-image_build.svg index 70960aa673af1..b6d63b0465846 100644 --- a/images/breeze/output_ci-image_build.svg +++ b/images/breeze/output_ci-image_build.svg @@ -1,4 +1,4 @@ - +
into
and breaks our doc formatting + # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle + #
tags for sections + "docutils<0.17,>=0.16", + "sphinx-airflow-theme>=0.0.12", + "sphinx-argparse>=0.4.0", + # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 + # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x + "sphinx-autoapi>=2.1.1", + "sphinx-copybutton>=0.5.2", + "sphinx-design>=0.5.0", + "sphinx-jinja>=2.0.2", + "sphinx-rtd-theme>=2.0.0", + # Currently we are using sphinx 5 but we need to migrate to Sphinx 7 + "sphinx>=5.3.0,<6.0.0", + "sphinxcontrib-applehelp>=1.0.4", + "sphinxcontrib-devhelp>=1.0.2", + "sphinxcontrib-htmlhelp>=2.0.1", + "sphinxcontrib-httpdomain>=1.8.1", + "sphinxcontrib-jquery>=4.1", + "sphinxcontrib-jsmath>=1.0.1", + "sphinxcontrib-qthelp>=1.0.3", + "sphinxcontrib-redoc>=1.6.0", + "sphinxcontrib-serializinghtml==1.1.5", + "sphinxcontrib-spelling>=8.0.0", +] +doc-gen = [ + "apache-airflow[doc]", + "eralchemy2>=1.3.8", +] +# END OF doc extras +# START OF bundle extras +all-dbs = [ + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-pinot]", + "apache-airflow[arangodb]", + "apache-airflow[cloudant]", + "apache-airflow[databricks]", + "apache-airflow[exasol]", + "apache-airflow[influxdb]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[trino]", + "apache-airflow[vertica]", +] +devel = [ + "apache-airflow[common.io]", + "apache-airflow[common.sql]", + "apache-airflow[devel-debuggers]", + "apache-airflow[devel-devscripts]", + "apache-airflow[devel-duckdb]", + "apache-airflow[devel-mypy]", + "apache-airflow[devel-sentry]", + "apache-airflow[devel-static-checks]", + "apache-airflow[devel-tests]", + "apache-airflow[fab]", + "apache-airflow[ftp]", + "apache-airflow[http]", + "apache-airflow[imap]", + "apache-airflow[sqlite]", +] +devel-all-dbs = [ + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-pinot]", + "apache-airflow[arangodb]", + "apache-airflow[cloudant]", + "apache-airflow[databricks]", + "apache-airflow[exasol]", + "apache-airflow[influxdb]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[trino]", + "apache-airflow[vertica]", +] +devel-ci = [ + "apache-airflow[devel-all]", +] +devel-hadoop = [ + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[devel]", + "apache-airflow[hdfs]", + "apache-airflow[kerberos]", + "apache-airflow[presto]", +] +# END OF bundle extras +############################################################################################################# +# The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series +############################################################################################################# +# START OF deprecated extras +atlas = [ + "apache-airflow[apache-atlas]", +] +aws = [ + "apache-airflow[amazon]", +] +azure = [ + "apache-airflow[microsoft-azure]", +] +cassandra = [ + "apache-airflow[apache-cassandra]", +] +# Empty alias extra just for backward compatibility with Airflow 1.10 +crypto = [ +] +druid = [ + "pache-airflow[apache-druid]", +] +gcp = [ + "apache-airflow[google]", +] +gcp_api = [ + "apache-airflow[google]", +] +hdfs = [ + "apache-airflow[apache-hdfs]", +] +hive = [ + "apache-airflow[apache-hive]", +] +kubernetes = [ + "apache-airflow[cncf-kubernetes]", +] +mssql = [ + "apache-airflow[microsoft-mssql]", +] +pinot = [ + "apache-airflow[apache-pinot]", +] +s3 = [ + "apache-airflow[amazon]", +] +spark = [ + "apache-airflow[apache-spark]", +] +webhdfs = [ + "apache-airflow[apache-webhdfs]", +] +winrm = [ + "apache-airflow[microsoft-winrm]", +] +# END OF deprecated extras +############################################################################################################# +# The whole section below is automatically generated by `update-providers-dependencies` pre-commit based +# on `provider.yaml` files present in the `providers` subdirectories. The `provider.yaml` files are +# A single source of truth for provider dependencies, +# +# PLEASE DO NOT MODIFY THIS SECTION MANUALLY. IT WILL BE OVERWRITTEN BY PRE-COMMIT !! +# If you want to modify these - modify the corresponding provider.yaml instead. +############################################################################################################# +# START OF GENERATED DEPENDENCIES +airbyte = [ + "apache-airflow[http]", +] +alibaba = [ + "alibabacloud_adb20211201>=1.0.0", + "alibabacloud_tea_openapi>=0.3.7", + "oss2>=2.14.0", +] +amazon = [ + "apache-airflow[common_sql]", + "apache-airflow[http]", + "asgiref", + "boto3>=1.33.0", + "botocore>=1.33.0", + "inflection>=0.5.1", + "jsonpath_ng>=1.5.3", + "redshift_connector>=2.0.918", + "sqlalchemy_redshift>=0.8.6", + "watchtower>=2.0.1,<4", + # Devel dependencies for the amazon provider + "aiobotocore>=2.7.0", + "aws_xray_sdk>=2.12.0", + "moto[cloudformation,glue]>=4.2.12", + "mypy-boto3-appflow>=1.33.0", + "mypy-boto3-rds>=1.33.0", + "mypy-boto3-redshift-data>=1.33.0", + "mypy-boto3-s3>=1.33.0", + "s3fs>=2023.10.0", +] +apache-beam = [ + "apache-beam>=2.53.0", + "pyarrow>=14.0.1", +] +apache-cassandra = [ + "cassandra-driver>=3.13.0", +] +apache-drill = [ + "apache-airflow[common_sql]", + "sqlalchemy-drill>=1.1.0", +] +apache-druid = [ + "apache-airflow[common_sql]", + "pydruid>=0.4.1", +] +apache-flink = [ + "apache-airflow[cncf_kubernetes]", + "cryptography>=2.0.0", +] +apache-hdfs = [ + "hdfs[avro,dataframe,kerberos]>=2.0.4", +] +apache-hive = [ + "apache-airflow[common_sql]", + "hmsclient>=0.1.0", + "pandas>=1.2.5", + "pyhive[hive-pure-sasl]>=0.7.0", + "thrift>=0.9.2", +] +apache-impala = [ + "impyla>=0.18.0,<1.0", +] +apache-kafka = [ + "asgiref", + "confluent-kafka>=1.8.2", +] +apache-kylin = [ + "kylinpy>=2.6", +] +apache-livy = [ + "aiohttp", + "apache-airflow[http]", + "asgiref", +] +apache-pig = [ +] +apache-pinot = [ + "apache-airflow[common_sql]", + "pinotdb>0.4.7", +] +apache-spark = [ + "grpcio-status>=1.59.0", + "pyspark", +] +apprise = [ + "apprise", +] +arangodb = [ + "python-arango>=7.3.2", +] +asana = [ + "asana>=0.10,<4.0.0", +] +atlassian-jira = [ + "atlassian-python-api>=1.14.2", +] +celery = [ + "celery>=5.3.0,<6,!=5.3.3,!=5.3.2", + "flower>=1.0.0", + "google-re2>=1.0", +] +cloudant = [ + "cloudant>=2.0", +] +cncf-kubernetes = [ + "aiofiles>=23.2.0", + "asgiref>=3.5.2", + "cryptography>=2.0.0", + "google-re2>=1.0", + "kubernetes>=21.7.0,<24", + "kubernetes_asyncio>=18.20.1,<25", +] +cohere = [ + "cohere>=4.37", +] +common-io = [ +] +common-sql = [ + "sqlparse>=0.4.2", +] +databricks = [ + "aiohttp>=3.6.3, <4", + "apache-airflow[common_sql]", + "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0", + "requests>=2.27,<3", + # Devel dependencies for the databricks provider + "deltalake>=0.12.0", +] +datadog = [ + "datadog>=0.14.0", +] +dbt-cloud = [ + "aiohttp", + "apache-airflow[http]", + "asgiref", +] +dingding = [ + "apache-airflow[http]", +] +discord = [ + "apache-airflow[http]", +] +docker = [ + "docker>=5.0.3", + "python-dotenv>=0.21.0", +] +elasticsearch = [ + "apache-airflow[common_sql]", + "elasticsearch>=8.10,<9", +] +exasol = [ + "apache-airflow[common_sql]", + "pandas>=1.2.5", + "pyexasol>=0.5.1", +] +facebook = [ + "facebook-business>=6.0.2", +] +ftp = [ +] +github = [ + "PyGithub!=1.58", +] +google = [ + "PyOpenSSL", + "apache-airflow[common_sql]", + "asgiref>=3.5.2", + "gcloud-aio-auth>=4.0.0,<5.0.0", + "gcloud-aio-bigquery>=6.1.2", + "gcloud-aio-storage>=9.0.0", + "gcsfs>=2023.10.0", + "google-ads>=22.1.0", + "google-analytics-admin", + "google-api-core>=2.11.0", + "google-api-python-client>=1.6.0", + "google-auth-httplib2>=0.0.1", + "google-auth>=1.0.0", + "google-cloud-aiplatform>=1.22.1", + "google-cloud-automl>=2.12.0", + "google-cloud-batch>=0.13.0", + "google-cloud-bigquery-datatransfer>=3.13.0", + "google-cloud-bigtable>=2.17.0", + "google-cloud-build>=3.22.0", + "google-cloud-compute>=1.10.0", + "google-cloud-container>=2.17.4", + "google-cloud-datacatalog>=3.11.1", + "google-cloud-dataflow-client>=0.8.6", + "google-cloud-dataform>=0.5.0", + "google-cloud-dataplex>=1.10.0", + "google-cloud-dataproc-metastore>=1.12.0", + "google-cloud-dataproc>=5.8.0", + "google-cloud-dlp>=3.12.0", + "google-cloud-kms>=2.15.0", + "google-cloud-language>=2.9.0", + "google-cloud-logging>=3.5.0", + "google-cloud-memcache>=1.7.0", + "google-cloud-monitoring>=2.18.0", + "google-cloud-orchestration-airflow>=1.10.0", + "google-cloud-os-login>=2.9.1", + "google-cloud-pubsub>=2.19.0", + "google-cloud-redis>=2.12.0", + "google-cloud-run>=0.9.0", + "google-cloud-secret-manager>=2.16.0", + "google-cloud-spanner>=3.11.1", + "google-cloud-speech>=2.18.0", + "google-cloud-storage-transfer>=1.4.1", + "google-cloud-storage>=2.7.0", + "google-cloud-tasks>=2.13.0", + "google-cloud-texttospeech>=2.14.1", + "google-cloud-translate>=3.11.0", + "google-cloud-videointelligence>=2.11.0", + "google-cloud-vision>=3.4.0", + "google-cloud-workflows>=1.10.0", + "grpcio-gcp>=0.2.2", + "httpx", + "json-merge-patch>=0.2", + "looker-sdk>=22.2.0", + "pandas-gbq", + "pandas>=1.2.5", + "proto-plus>=1.19.6", + "sqlalchemy-bigquery>=1.2.1", + "sqlalchemy-spanner>=1.6.2", +] +grpc = [ + "google-auth-httplib2>=0.0.1", + "google-auth>=1.0.0, <3.0.0", + "grpcio>=1.15.0", +] +hashicorp = [ + "hvac>=1.1.0", +] +http = [ + "aiohttp", + "asgiref", + "requests>=2.26.0", + "requests_toolbelt", +] +imap = [ +] +influxdb = [ + "influxdb-client>=1.19.0", + "requests>=2.26.0", +] +jdbc = [ + "apache-airflow[common_sql]", + "jaydebeapi>=1.1.1", +] +jenkins = [ + "python-jenkins>=1.0.0", +] +microsoft-azure = [ + "adal>=1.2.7", + "adlfs>=2023.10.0", + "azure-batch>=8.0.0", + "azure-cosmos>=4.0.0", + "azure-datalake-store>=0.0.45", + "azure-identity>=1.3.1", + "azure-keyvault-secrets>=4.1.0", + "azure-kusto-data>=4.1.0", + "azure-mgmt-containerinstance>=9.0.0", + "azure-mgmt-containerregistry>=8.0.0", + "azure-mgmt-cosmosdb", + "azure-mgmt-datafactory>=2.0.0", + "azure-mgmt-datalake-store>=0.5.0", + "azure-mgmt-resource>=2.2.0", + "azure-mgmt-storage>=16.0.0", + "azure-servicebus>=7.6.1", + "azure-storage-blob>=12.14.0", + "azure-storage-file-datalake>=12.9.1", + "azure-storage-file-share", + "azure-synapse-artifacts>=0.17.0", + "azure-synapse-spark", + # Devel dependencies for the microsoft.azure provider + "pywinrm", +] +microsoft-mssql = [ + "apache-airflow[common_sql]", + "pymssql>=2.1.8", +] +microsoft-psrp = [ + "pypsrp>=0.8.0", +] +microsoft-winrm = [ + "pywinrm>=0.4", +] +mongo = [ + "dnspython>=1.13.0", + "pymongo>=3.6.0", + # Devel dependencies for the mongo provider + "mongomock", +] +mysql = [ + "apache-airflow[common_sql]", + "mysql-connector-python>=8.0.29", + "mysqlclient>=1.3.6", +] +neo4j = [ + "neo4j>=4.2.1", +] +odbc = [ + "apache-airflow[common_sql]", + "pyodbc", +] +openai = [ + "openai[datalib]>=1.0", +] +openfaas = [ +] +openlineage = [ + "apache-airflow[common_sql]", + "attrs>=22.2", + "openlineage-integration-common>=0.28.0", + "openlineage-python>=0.28.0", +] +opensearch = [ + "opensearch-py>=2.2.0", +] +opsgenie = [ + "opsgenie-sdk>=2.1.5", +] +oracle = [ + "apache-airflow[common_sql]", + "oracledb>=1.0.0", +] +pagerduty = [ + "pdpyras>=4.1.2", +] +papermill = [ + "ipykernel", + "papermill[all]>=2.4.0", + "scrapbook[all]", +] +pgvector = [ + "apache-airflow[postgres]", + "pgvector>=0.2.3", +] +pinecone = [ + "pinecone-client>=2.2.4", +] +postgres = [ + "apache-airflow[common_sql]", + "psycopg2-binary>=2.8.0", +] +presto = [ + "apache-airflow[common_sql]", + "pandas>=1.2.5", + "presto-python-client>=0.8.4", +] +redis = [ + "redis>=4.5.2,<5.0.0,!=4.5.5", +] +salesforce = [ + "pandas>=1.2.5", + "simple-salesforce>=1.0.0", +] +samba = [ + "smbprotocol>=1.5.0", +] +segment = [ + "analytics-python>=1.2.9", +] +sendgrid = [ + "sendgrid>=6.0.0", +] +sftp = [ + "apache-airflow[ssh]", + "paramiko>=2.8.0", +] +singularity = [ + "spython>=0.0.56", +] +slack = [ + "apache-airflow[common_sql]", + "slack_sdk>=3.0.0", +] +smtp = [ +] +snowflake = [ + "apache-airflow[common_sql]", + "snowflake-connector-python>=2.7.8", + "snowflake-sqlalchemy>=1.1.0", +] +sqlite = [ + "apache-airflow[common_sql]", +] +ssh = [ + "paramiko>=2.6.0", + "sshtunnel>=0.3.2", +] +tableau = [ + "tableauserverclient", +] +tabular = [ + # Devel dependencies for the tabular provider + "pyiceberg>=0.5.0", +] +telegram = [ + "python-telegram-bot>=20.2", +] +trino = [ + "apache-airflow[common_sql]", + "pandas>=1.2.5", + "trino>=0.318.0", +] +vertica = [ + "apache-airflow[common_sql]", + "vertica-python>=0.5.1", +] +weaviate = [ + "pandas>=1.2.5", + "weaviate-client>=3.24.2", +] +yandex = [ + "yandexcloud>=0.228.0", +] +zendesk = [ + "zenpy>=2.0.24", +] +all = [ + # core extras + "apache-airflow[aiobotocore]", + "apache-airflow[async]", + "apache-airflow[cgroups]", + "apache-airflow[deprecated-api]", + "apache-airflow[github-enterprise]", + "apache-airflow[google-auth]", + "apache-airflow[graphviz]", + "apache-airflow[kerberos]", + "apache-airflow[ldap]", + "apache-airflow[leveldb]", + "apache-airflow[otel]", + "apache-airflow[pandas]", + "apache-airflow[password]", + "apache-airflow[rabbitmq]", + "apache-airflow[s3fs]", + "apache-airflow[saml]", + "apache-airflow[sentry]", + "apache-airflow[statsd]", + "apache-airflow[virtualenv]", + # Apache no provider extras + "apache-airflow[apache-atlas]", + "apache-airflow[apache-webhdfs]", + "apache-airflow[all-core]", + # Provider extras + "apache-airflow[airbyte]", + "apache-airflow[alibaba]", + "apache-airflow[amazon]", + "apache-airflow[apache-beam]", + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-flink]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-kafka]", + "apache-airflow[apache-kylin]", + "apache-airflow[apache-livy]", + "apache-airflow[apache-pig]", + "apache-airflow[apache-pinot]", + "apache-airflow[apache-spark]", + "apache-airflow[apache-sqoop]", + "apache-airflow[apprise]", + "apache-airflow[arangodb]", + "apache-airflow[asana]", + "apache-airflow[atlassian-jira]", + "apache-airflow[celery]", + "apache-airflow[cloudant]", + "apache-airflow[cncf-kubernetes]", + "apache-airflow[cohere]", + "apache-airflow[common-io]", + "apache-airflow[common-sql]", + "apache-airflow[daskexecutor]", + "apache-airflow[databricks]", + "apache-airflow[datadog]", + "apache-airflow[dbt-cloud]", + "apache-airflow[dingding]", + "apache-airflow[discord]", + "apache-airflow[docker]", + "apache-airflow[elasticsearch]", + "apache-airflow[exasol]", + "apache-airflow[facebook]", + "apache-airflow[ftp]", + "apache-airflow[github]", + "apache-airflow[google]", + "apache-airflow[grpc]", + "apache-airflow[hashicorp]", + "apache-airflow[http]", + "apache-airflow[imap]", + "apache-airflow[influxdb]", + "apache-airflow[jdbc]", + "apache-airflow[jenkins]", + "apache-airflow[microsoft-azure]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[microsoft-psrp]", + "apache-airflow[microsoft-winrm]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[odbc]", + "apache-airflow[openai]", + "apache-airflow[openfaas]", + "apache-airflow[openlineage]", + "apache-airflow[opensearch]", + "apache-airflow[opsgenie]", + "apache-airflow[oracle]", + "apache-airflow[pagerduty]", + "apache-airflow[papermill]", + "apache-airflow[pgvector]", + "apache-airflow[pinecone]", + "apache-airflow[plexus]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[redis]", + "apache-airflow[salesforce]", + "apache-airflow[samba]", + "apache-airflow[segment]", + "apache-airflow[sendgrid]", + "apache-airflow[sftp]", + "apache-airflow[singularity]", + "apache-airflow[slack]", + "apache-airflow[smtp]", + "apache-airflow[snowflake]", + "apache-airflow[sqlite]", + "apache-airflow[ssh]", + "apache-airflow[tableau]", + "apache-airflow[tabular]", + "apache-airflow[telegram]", + "apache-airflow[trino]", + "apache-airflow[vertica]", + "apache-airflow[weaviate]", + "apache-airflow[yandex]", + "apache-airflow[zendesk]", +] +devel-all = [ + "apache-airflow[all]", + "apache-airflow[devel]", + "apache-airflow[doc]", + "apache-airflow[doc-gen]", + "apache-airflow[saml]", + # Apache no provider extras + "apache-airflow[apache-atlas]", + "apache-airflow[apache-webhdfs]", + "apache-airflow[all-core]", + # Include all provider deps + "apache-airflow[airbyte]", + "apache-airflow[alibaba]", + "apache-airflow[amazon]", + "apache-airflow[apache-beam]", + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-flink]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-kafka]", + "apache-airflow[apache-kylin]", + "apache-airflow[apache-livy]", + "apache-airflow[apache-pig]", + "apache-airflow[apache-pinot]", + "apache-airflow[apache-spark]", + "apache-airflow[apache-sqoop]", + "apache-airflow[apprise]", + "apache-airflow[arangodb]", + "apache-airflow[asana]", + "apache-airflow[atlassian-jira]", + "apache-airflow[celery]", + "apache-airflow[cloudant]", + "apache-airflow[cncf-kubernetes]", + "apache-airflow[cohere]", + "apache-airflow[common-io]", + "apache-airflow[common-sql]", + "apache-airflow[daskexecutor]", + "apache-airflow[databricks]", + "apache-airflow[datadog]", + "apache-airflow[dbt-cloud]", + "apache-airflow[dingding]", + "apache-airflow[discord]", + "apache-airflow[docker]", + "apache-airflow[elasticsearch]", + "apache-airflow[exasol]", + "apache-airflow[facebook]", + "apache-airflow[ftp]", + "apache-airflow[github]", + "apache-airflow[google]", + "apache-airflow[grpc]", + "apache-airflow[hashicorp]", + "apache-airflow[http]", + "apache-airflow[imap]", + "apache-airflow[influxdb]", + "apache-airflow[jdbc]", + "apache-airflow[jenkins]", + "apache-airflow[microsoft-azure]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[microsoft-psrp]", + "apache-airflow[microsoft-winrm]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[odbc]", + "apache-airflow[openai]", + "apache-airflow[openfaas]", + "apache-airflow[openlineage]", + "apache-airflow[opensearch]", + "apache-airflow[opsgenie]", + "apache-airflow[oracle]", + "apache-airflow[pagerduty]", + "apache-airflow[papermill]", + "apache-airflow[pgvector]", + "apache-airflow[pinecone]", + "apache-airflow[plexus]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[redis]", + "apache-airflow[salesforce]", + "apache-airflow[samba]", + "apache-airflow[segment]", + "apache-airflow[sendgrid]", + "apache-airflow[sftp]", + "apache-airflow[singularity]", + "apache-airflow[slack]", + "apache-airflow[smtp]", + "apache-airflow[snowflake]", + "apache-airflow[sqlite]", + "apache-airflow[ssh]", + "apache-airflow[tableau]", + "apache-airflow[tabular]", + "apache-airflow[telegram]", + "apache-airflow[trino]", + "apache-airflow[vertica]", + "apache-airflow[weaviate]", + "apache-airflow[yandex]", + "apache-airflow[zendesk]", +] +# END OF GENERATED DEPENDENCIES +############################################################################################################# +# The rest of the pyproject.toml file should be manually maintained +############################################################################################################# +[project.scripts] +airflow = "airflow.__main__:main" +[project.urls] +"Bug Tracker" = "https://github.com/apache/airflow/issues" +Documentation = "https://airflow.apache.org/docs/" +Downloads = "https://archive.apache.org/dist/airflow/" +Homepage = "https://airflow.apache.org/" +"Release Notes" = "https://airflow.apache.org/docs/apache-airflow/stable/release_notes.html" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Source Code" = "https://github.com/apache/airflow" +Twitter = "https://twitter.com/ApacheAirflow" +YouTube = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[tool.hatch.envs.default] +python = "3.8" +platforms = ["linux", "macos"] +description = "Default environment with Python 3.8 for maximum compatibility" +features = ["devel"] + +[tool.hatch.envs.airflow-38] +python = "3.8" +platforms = ["linux", "macos"] +description = "Environment with Python 3.8. No devel installed." +features = [] + +[tool.hatch.envs.airflow-39] +python = "3.9" +platforms = ["linux", "macos"] +description = "Environment with Python 3.9. No devel installed." +features = [] + +[tool.hatch.envs.airflow-310] +python = "3.10" +platforms = ["linux", "macos"] +description = "Environment with Python 3.10. No devel installed." +features = [] + +[tool.hatch.envs.airflow-311] +python = "3.11" +platforms = ["linux", "macos"] +description = "Environment with Python 3.11. No devel installed" +features = [] + +[tool.hatch.version] +path = "airflow/__init__.py" + +[tool.hatch.build.targets.wheel.hooks.custom] +path = "./dev/hatch_build.py" + +[tool.hatch.build.hooks.custom] +path = "./dev/hatch_build.py" + +[tool.hatch.build.targets.custom] +path = "./dev/hatch_build.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/airflow", + "/airflow/git_version" +] +exclude = [ + "/airflow/providers/", + "/airflow/www/node_modules/" +] +artifacts = [ + "/airflow/www/static/dist/", + "/airflow/git_version", + "/generated/" +] + + +[tool.hatch.build.targets.wheel] +include = [ + "/airflow", +] +exclude = [ + "/airflow/providers/", +] +artifacts = [ + "/airflow/www/static/dist/", + "/airflow/git_version" +] + + [tool.black] line-length = 110 target-version = ['py38', 'py39', 'py310', 'py311'] -# Editable installs are currently broken using setuptools 64.0.0 and above. The problem is tracked in -# https://github.com/pypa/setuptools/issues/3548. We're also discussing how we could potentially fix -# this problem on our end in issue https://github.com/apache/airflow/issues/30764. Until then we need -# to use one of the following workarounds locally for editable installs: -# 1) Pin setuptools <= 63.4.3 below in the [build-system] section. -# 2) Include your airflow source code directory in PYTHONPATH. -[build-system] -requires = ['setuptools==67.2.0'] -build-backend = "setuptools.build_meta" - [tool.ruff] target-version = "py38" typing-modules = ["airflow.typing_compat"] @@ -47,7 +1272,6 @@ extend-select = [ "I", # Missing required import (auto-fixable) "UP", # Pyupgrade "RUF100", # Unused noqa (auto-fixable) - # We ignore more pydocstyle than we enable, so be more selective at what we enable "D101", "D106", @@ -188,3 +1412,45 @@ exclude_also = [ "@(typing(_extensions)?\\.)?overload", "if (typing(_extensions)?\\.)?TYPE_CHECKING:" ] + +[tool.mypy] +ignore_missing_imports = true +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = false +plugins = [ + "dev/mypy/plugin/decorators.py", + "dev/mypy/plugin/outputs.py", +] +pretty = true +show_error_codes = true +disable_error_code = [ + "annotation-unchecked", +] + +[[tool.mypy.overrides]] +module="airflow.config_templates.default_webserver_config" +disable_error_code = [ + "var-annotated", +] + +[[tool.mypy.overrides]] +module="airflow.migrations.*" +ignore_errors = true + +[[tool.mypy.overrides]] +module= [ + "google.cloud.*", + "azure.*", +] +no_implicit_optional = false + +[[tool.mypy.overrides]] +module=[ + "referencing.*", + # Beam has some old type annotations, and they introduced an error recently with bad signature of + # a function. This is captured in https://github.com/apache/beam/issues/29927 + # and we should remove this exclusion when it is fixed. + "apache_beam.*" +] +ignore_errors = true diff --git a/scripts/ci/docker-compose/devcontainer.env b/scripts/ci/docker-compose/devcontainer.env index 465e7ab13f6d2..4ef3fd045972d 100644 --- a/scripts/ci/docker-compose/devcontainer.env +++ b/scripts/ci/docker-compose/devcontainer.env @@ -46,7 +46,6 @@ HOST_OS="linux" INIT_SCRIPT_FILE="init.sh" INSTALL_AIRFLOW_VERSION= AIRFLOW_CONSTRAINTS_MODE= -INSTALL_PROVIDERS_FROM_SOURCES= INSTALL_SELECTED_PROVIDERS= USE_AIRFLOW_VERSION= USE_PACKAGES_FROM_DIST= diff --git a/scripts/ci/docker-compose/local.yml b/scripts/ci/docker-compose/local.yml index 1ee86fc7739ec..9258b3b504607 100644 --- a/scripts/ci/docker-compose/local.yml +++ b/scripts/ci/docker-compose/local.yml @@ -55,9 +55,6 @@ services: - type: bind source: ../../../LICENSE target: /opt/airflow/LICENSE - - type: bind - source: ../../../MANIFEST.in - target: /opt/airflow/MANIFEST.in - type: bind source: ../../../NOTICE target: /opt/airflow/NOTICE @@ -100,12 +97,6 @@ services: - type: bind source: ../../../scripts/docker/entrypoint_ci.sh target: /entrypoint - - type: bind - source: ../../../setup.cfg - target: /opt/airflow/setup.cfg - - type: bind - source: ../../../setup.py - target: /opt/airflow/setup.py - type: bind source: ../../../tests target: /opt/airflow/tests diff --git a/scripts/ci/kubernetes/k8s_requirements.txt b/scripts/ci/kubernetes/k8s_requirements.txt index 0f6cc2fd38081..bba6871b5196e 100644 --- a/scripts/ci/kubernetes/k8s_requirements.txt +++ b/scripts/ci/kubernetes/k8s_requirements.txt @@ -1,4 +1,4 @@ --e .[kubernetes] +-e .[cncf.kubernetes] pytest pytest-cov pytest-instafail diff --git a/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py new file mode 100755 index 0000000000000..1dbd2d9fbe8a6 --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Checks if all the libraries in setup.py are listed in installation.rst file +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +from tabulate import tabulate + +# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files +try: + import tomllib +except ImportError: + import tomli as tomllib + + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +EXTRA_PACKAGES_REF_FILE = AIRFLOW_ROOT_PATH / "docs" / "apache-airflow" / "extra-packages-ref.rst" +PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" + +sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported + +from common_precommit_utils import console + +pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) + +optional_dependencies: dict[str, list[str]] = pyproject_toml_content["project"]["optional-dependencies"] +doc_ref_content = EXTRA_PACKAGES_REF_FILE.read_text() + +errors: list[str] = [] +regular_suggestions: list[str] = [] +devel_suggestions: list[str] = [] +suggestions: list[tuple] = [] +suggestions_devel: list[tuple] = [] +suggestions_providers: list[tuple] = [] + +for dependency in optional_dependencies: + console.print(f"[bright_blue]Checking if {dependency} is mentioned in refs[/]") + find_matching = re.search(rf"^\| {dependency} *\|", doc_ref_content, flags=re.MULTILINE) + if not find_matching: + errors.append(f"[red]ERROR: {dependency} is not listed in {EXTRA_PACKAGES_REF_FILE}[/]") + is_devel_dep = dependency.startswith("devel") or dependency in ["doc", "doc-gen"] + short_dep = dependency.replace("devel-", "") + if is_devel_dep: + suggestions_devel.append( + ( + dependency, + f"pip install -e '.[{dependency}]'", + f"Adds all test libraries needed to test {short_dep}", + ) + ) + else: + suggestions.append( + ( + dependency, + f"pip install apache-airflow[{dependency}]", + f"{dependency.capitalize()} hooks and operators", + ) + ) + +HEADERS = ["extra", "install command", "enables"] +if errors: + console.print("\n".join(errors)) + console.print() + console.print("[bright_blue]Suggested tables to add to references::[/]") + if suggestions: + console.print("[bright_blue]Regular dependencies[/]") + console.print(tabulate(suggestions, headers=HEADERS, tablefmt="grid"), markup=False) + if suggestions_devel: + console.print("[bright_blue]Devel dependencies[/]") + console.print(tabulate(suggestions_devel, headers=HEADERS, tablefmt="grid"), markup=False) + if suggestions_providers: + console.print("[bright_blue]Devel dependencies[/]") + console.print(tabulate(suggestions_providers, headers=HEADERS, tablefmt="grid"), markup=False) + sys.exit(1) +else: + console.print(f"[green]Checked: {len(optional_dependencies)} dependencies are mentioned[/]") diff --git a/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py b/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py index 31c8c88eae061..a57eee9a89818 100755 --- a/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py +++ b/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. """ -Test for an order of dependencies in setup.py +Check if extras in Dockerfile are reflected in docker build-arg-ref.rst and global constants. """ from __future__ import annotations diff --git a/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py b/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py new file mode 100755 index 0000000000000..0fbdf357f3aaf --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Test for an order of dependencies in setup.py +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +from rich import print + +errors: list[str] = [] + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +PYPROJECT_TOML_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" + +sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported +from common_precommit_utils import check_list_sorted + + +def check_extras(type: str, extra: str, extras: list[str]) -> None: + r""" + Test for an order of dependencies in extra defined + `^dependent_group_name = [.*?]\n` in setup.py + """ + print(f"[info]Checking {type}:{extra}[/]") + extras = [extra.replace("[", "\\[") for extra in extras] + check_list_sorted(extras, f"Order of extra: {type}:{extra}", errors) + + +def extract_deps(content: str, extra: str) -> list[str]: + deps: list[str] = [] + extracting = False + for line in content.splitlines(): + line = line.strip() + if line.startswith("#"): + continue + if not extracting and line == f"{extra} = [": + extracting = True + elif extracting and line == "]": + break + elif extracting: + deps.append(line.strip().strip(",").strip('"')) + return deps + + +def check_type(pyproject_toml_contents: str, type: str) -> None: + """ + Test for an order of dependencies groups between mark + '# Start dependencies group' and '# End dependencies group' in setup.py + """ + print(f"[info]Checking {type}[/]") + pattern_type = re.compile(f"# START OF {type}\n(.*)# END OF {type}", re.DOTALL) + parsed_type_content = pattern_type.findall(pyproject_toml_contents)[0] + # strip comments + parsed_type_content = ( + "\n".join([line for line in parsed_type_content.splitlines() if not line.startswith("#")]) + "\n" + ) + pattern_extra_name = re.compile(r" = \[.*?]\n", re.DOTALL) + type_content = pattern_extra_name.sub(",", parsed_type_content) + + list_extra_names = type_content.strip(",").split(",") + check_list_sorted(list_extra_names, "Order of dependencies", errors) + for extra in list_extra_names: + deps_list = extract_deps(parsed_type_content, extra) + check_extras(type, extra, deps_list) + + +if __name__ == "__main__": + file_contents = PYPROJECT_TOML_PATH.read_text() + check_type(file_contents, "core extras") + check_type(file_contents, "Apache no provider extras") + check_type(file_contents, "devel extras") + check_type(file_contents, "doc extras") + check_type(file_contents, "bundle extras") + check_type(file_contents, "deprecated extras") + + print() + for error in errors: + print(error) + + print() + + if errors: + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_check_order_setup.py b/scripts/ci/pre_commit/pre_commit_check_order_setup.py deleted file mode 100755 index 95bc8e59ee256..0000000000000 --- a/scripts/ci/pre_commit/pre_commit_check_order_setup.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Test for an order of dependencies in setup.py -""" -from __future__ import annotations - -import os -import re -import sys -from pathlib import Path - -from rich import print - -errors: list[str] = [] - -SOURCE_DIR_PATH = Path(__file__).parents[3].resolve() -sys.path.insert(0, os.fspath(SOURCE_DIR_PATH)) -sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported -from common_precommit_utils import check_list_sorted - - -def check_main_dependent_group(setup_contents: str) -> None: - """ - Test for an order of dependencies groups between mark - '# Start dependencies group' and '# End dependencies group' in setup.py - """ - print("[info]Checking main dependency group[/]") - pattern_main_dependent_group = re.compile( - "# Start dependencies group\n(.*)# End dependencies group", re.DOTALL - ) - main_dependent_group = pattern_main_dependent_group.findall(setup_contents)[0] - - pattern_sub_dependent = re.compile(r" = \[.*?]\n", re.DOTALL) - main_dependent = pattern_sub_dependent.sub(",", main_dependent_group) - - src = main_dependent.strip(",").split(",") - check_list_sorted(src, "Order of dependencies", errors) - - for group in src: - check_sub_dependent_group(group) - - -def check_sub_dependent_group(group_name: str) -> None: - r""" - Test for an order of each dependencies groups declare like - `^dependent_group_name = [.*?]\n` in setup.py - """ - print(f"[info]Checking dependency group {group_name}[/]") - check_list_sorted(getattr(setup, group_name), f"Order of dependency group: {group_name}", errors) - - -def check_alias_dependent_group(setup_context: str) -> None: - """ - Test for an order of each dependencies groups declare like - `alias_dependent_group = dependent_group_1 + ... + dependent_group_n` in setup.py - """ - pattern = re.compile("^\\w+ = (\\w+ \\+.*)", re.MULTILINE) - dependents = pattern.findall(setup_context) - - for dependent in dependents: - print(f"[info]Checking alias-dependent group {dependent}[/]") - src = dependent.split(" + ") - check_list_sorted(src, f"Order of alias dependencies group: {dependent}", errors) - - -def check_variable_order(var_name: str) -> None: - print(f"[info]Checking {var_name}[/]") - - var = getattr(setup, var_name) - - if isinstance(var, dict): - check_list_sorted(list(var.keys()), f"Order of dependencies in: {var_name}", errors) - else: - check_list_sorted(var, f"Order of dependencies in: {var_name}", errors) - - -def check_install_and_setup_requires() -> None: - """ - Test for an order of dependencies in function do_setup section - install_requires and setup_requires in setup.cfg - """ - - from setuptools.config import read_configuration - - path = os.fspath(SOURCE_DIR_PATH / "setup.cfg") - config = read_configuration(path) - - pattern_dependent_version = re.compile("[~|><=;].*") - - for key in ("install_requires", "setup_requires"): - print(f"[info]Checking setup.cfg group {key}[/]") - deps = config["options"][key] - dists = [pattern_dependent_version.sub("", p) for p in deps] - check_list_sorted(dists, f"Order of dependencies in do_setup section: {key}", errors) - - -if __name__ == "__main__": - import setup - - with open(setup.__file__) as setup_file: - file_contents = setup_file.read() - check_main_dependent_group(file_contents) - check_alias_dependent_group(file_contents) - check_variable_order("CORE_EXTRAS_DEPENDENCIES") - check_variable_order("ADDITIONAL_EXTRAS_DEPENDENCIES") - check_variable_order("EXTRAS_DEPRECATED_ALIASES") - check_variable_order("PREINSTALLED_PROVIDERS") - check_install_and_setup_requires() - - print() - print() - for error in errors: - print(error) - - print() - - if errors: - sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py deleted file mode 100755 index 0997fc008de9e..0000000000000 --- a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Checks if all the libraries in setup.py are listed in installation.rst file -""" -from __future__ import annotations - -import os -import re -import sys -from pathlib import Path - -from rich import print -from rich.console import Console -from rich.table import Table - -AIRFLOW_SOURCES_DIR = Path(__file__).parents[3].resolve() -SETUP_PY_FILE = "setup.py" -DOCS_FILE = os.path.join("docs", "apache-airflow", "extra-packages-ref.rst") -PY_IDENTIFIER = r"[a-zA-Z_][a-zA-Z0-9_\.]*" - -sys.path.insert(0, os.fspath(AIRFLOW_SOURCES_DIR)) - -os.environ["_SKIP_PYTHON_VERSION_CHECK"] = "true" - -from setup import ( - EXTRAS_DEPENDENCIES, - EXTRAS_DEPRECATED_ALIASES, - EXTRAS_DEPRECATED_ALIASES_IGNORED_FROM_REF_DOCS, - PREINSTALLED_PROVIDERS, - add_all_provider_packages, -) - - -def get_file_content(*path_elements: str) -> str: - file_path = AIRFLOW_SOURCES_DIR.joinpath(*path_elements) - return file_path.read_text() - - -def get_extras_from_setup() -> set[str]: - """Returns a set of regular (non-deprecated) extras from setup.""" - return ( - set(EXTRAS_DEPENDENCIES.keys()) - - set(EXTRAS_DEPRECATED_ALIASES.keys()) - - set(EXTRAS_DEPRECATED_ALIASES_IGNORED_FROM_REF_DOCS) - ) - - -def get_extras_from_docs() -> set[str]: - """ - Returns a list of extras from airflow.docs. - """ - docs_content = get_file_content(DOCS_FILE) - extras_section_regex = re.compile( - rf"\|[^|]+\|.*pip install .apache-airflow\[({PY_IDENTIFIER})][^|]+\|[^|]+\|", - re.MULTILINE, - ) - doc_extra_set: set[str] = set() - for doc_extra in extras_section_regex.findall(docs_content): - doc_extra_set.add(doc_extra) - return doc_extra_set - - -def get_preinstalled_providers_from_docs() -> list[str]: - """ - Returns list of pre-installed providers from the doc. - """ - docs_content = get_file_content(DOCS_FILE) - preinstalled_section_regex = re.compile( - rf"\|\s*({PY_IDENTIFIER})\s*\|[^|]+pip install[^|]+\|[^|]+\|\s+\*\s+\|$", - re.MULTILINE, - ) - return preinstalled_section_regex.findall(docs_content) - - -def get_deprecated_extras_from_docs() -> dict[str, str]: - """ - Returns dict of deprecated extras from airflow.docs (alias -> target extra) - """ - deprecated_extras = {} - docs_content = get_file_content(DOCS_FILE) - - deprecated_extras_section_regex = re.compile( - r"\| Deprecated extra \| Extra to be used instead \|\n(.*)\n", re.DOTALL - ) - deprecated_extras_content = deprecated_extras_section_regex.findall(docs_content)[0] - - deprecated_extras_regexp = re.compile(r"\|\s(\S+)\s+\|\s(\S*)\s+\|$", re.MULTILINE) - for extras in deprecated_extras_regexp.findall(deprecated_extras_content): - deprecated_extras[extras[0]] = extras[1] - return deprecated_extras - - -def check_extras(console: Console) -> bool: - """ - Checks if non-deprecated extras match setup vs. doc. - :param console: print table there in case of errors - :return: True if all ok, False otherwise - """ - extras_table = Table() - extras_table.add_column("NAME", justify="right", style="cyan") - extras_table.add_column("SETUP", justify="center", style="magenta") - extras_table.add_column("DOCS", justify="center", style="yellow") - non_deprecated_setup_extras = get_extras_from_setup() - non_deprecated_docs_extras = get_extras_from_docs() - for extra in non_deprecated_setup_extras: - if extra not in non_deprecated_docs_extras: - extras_table.add_row(extra, "V", "") - for extra in non_deprecated_docs_extras: - if extra not in non_deprecated_setup_extras: - extras_table.add_row(extra, "", "V") - if extras_table.row_count != 0: - print( - f"""\ -[red bold]ERROR!![/red bold] - -The "[bold]CORE_EXTRAS_DEPENDENCIES[/bold]" -sections in the setup file: [bold yellow]{SETUP_PY_FILE}[/bold yellow] -should be synchronized with the "Extra Packages Reference" -in the documentation file: [bold yellow]{DOCS_FILE}[/bold yellow]. - -Below is the list of extras that: - - * are used but are not documented, - * are documented but not used, - -[bold]Please synchronize setup/documentation files![/bold] - -""" - ) - console.print(extras_table) - return False - return True - - -def check_deprecated_extras(console: Console) -> bool: - """ - Checks if deprecated extras match setup vs. doc. - :param console: print table there in case of errors - :return: True if all ok, False otherwise - """ - deprecated_setup_extras = EXTRAS_DEPRECATED_ALIASES - deprecated_docs_extras = get_deprecated_extras_from_docs() - - deprecated_extras_table = Table() - deprecated_extras_table.add_column("DEPRECATED_IN_SETUP", justify="right", style="cyan") - deprecated_extras_table.add_column("TARGET_IN_SETUP", justify="center", style="magenta") - deprecated_extras_table.add_column("DEPRECATED_IN_DOCS", justify="right", style="cyan") - deprecated_extras_table.add_column("TARGET_IN_DOCS", justify="center", style="magenta") - - for extra in deprecated_setup_extras.keys(): - if extra not in deprecated_docs_extras: - deprecated_extras_table.add_row(extra, deprecated_setup_extras[extra], "", "") - elif deprecated_docs_extras[extra] != deprecated_setup_extras[extra]: - deprecated_extras_table.add_row( - extra, deprecated_setup_extras[extra], extra, deprecated_docs_extras[extra] - ) - - for extra in deprecated_docs_extras.keys(): - if extra not in deprecated_setup_extras: - deprecated_extras_table.add_row("", "", extra, deprecated_docs_extras[extra]) - - if deprecated_extras_table.row_count != 0: - print( - f"""\ -[red bold]ERROR!![/red bold] - -The "[bold]EXTRAS_DEPRECATED_ALIASES[/bold]" section in the setup file:\ -[bold yellow]{SETUP_PY_FILE}[/bold yellow] -should be synchronized with the "Extra Packages Reference" -in the documentation file: [bold yellow]{DOCS_FILE}[/bold yellow]. - -Below is the list of deprecated extras that: - - * are used but are not documented, - * are documented but not used, - * or have different target extra specified in the documentation or setup. - -[bold]Please synchronize setup/documentation files![/bold] - -""" - ) - console.print(deprecated_extras_table) - return False - return True - - -def check_preinstalled_extras(console: Console) -> bool: - """ - Checks if preinstalled extras match setup vs. doc. - :param console: print table there in case of errors - :return: True if all ok, False otherwise - """ - preinstalled_providers_from_docs = get_preinstalled_providers_from_docs() - preinstalled_providers_from_setup = [provider.split(">=")[0] for provider in PREINSTALLED_PROVIDERS] - - preinstalled_providers_table = Table() - preinstalled_providers_table.add_column("PREINSTALLED_IN_SETUP", justify="right", style="cyan") - preinstalled_providers_table.add_column("PREINSTALLED_IN_DOCS", justify="center", style="magenta") - - for provider in preinstalled_providers_from_setup: - if provider not in preinstalled_providers_from_docs: - preinstalled_providers_table.add_row(provider, "") - - for provider in preinstalled_providers_from_docs: - if provider not in preinstalled_providers_from_setup: - preinstalled_providers_table.add_row("", provider) - - if preinstalled_providers_table.row_count != 0: - print( - f"""\ -[red bold]ERROR!![/red bold] - -The "[bold]PREINSTALLED_PROVIDERS[/bold]" section in the setup file:\ -[bold yellow]{SETUP_PY_FILE}[/bold yellow] -should be synchronized with the "Extra Packages Reference" -in the documentation file: [bold yellow]{DOCS_FILE}[/bold yellow]. - -Below is the list of preinstalled providers that: - * are used but are not documented, - * or are documented but not used. - -[bold]Please synchronize setup/documentation files![/bold] - -""" - ) - console.print(preinstalled_providers_table) - return False - return True - - -if __name__ == "__main__": - status: list[bool] = [] - # force adding all provider package dependencies, to check providers status - add_all_provider_packages() - main_console = Console() - status.append(check_extras(main_console)) - status.append(check_deprecated_extras(main_console)) - status.append(check_preinstalled_extras(main_console)) - - if all(status): - print("All extras are synchronized: [green]OK[/]") - sys.exit(0) - sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_compile_www_assets.py b/scripts/ci/pre_commit/pre_commit_compile_www_assets.py index 3365d1fb872ec..d3529f1b13653 100755 --- a/scripts/ci/pre_commit/pre_commit_compile_www_assets.py +++ b/scripts/ci/pre_commit/pre_commit_compile_www_assets.py @@ -20,6 +20,7 @@ import hashlib import os import re +import shutil import subprocess import sys from pathlib import Path @@ -53,14 +54,21 @@ def get_directory_hash(directory: Path, skip_path_regexp: str | None = None) -> if __name__ == "__main__": www_directory = AIRFLOW_SOURCES_PATH / "airflow" / "www" + node_modules_directory = www_directory / "node_modules" + dist_directory = www_directory / "static" / "dist" WWW_HASH_FILE.parent.mkdir(exist_ok=True) - old_hash = WWW_HASH_FILE.read_text() if WWW_HASH_FILE.exists() else "" - new_hash = get_directory_hash(www_directory, skip_path_regexp=r".*node_modules.*") - if new_hash == old_hash: - print("The WWW directory has not changed! Skip regeneration.") - sys.exit(0) + if node_modules_directory.exists() and dist_directory.exists(): + old_hash = WWW_HASH_FILE.read_text() if WWW_HASH_FILE.exists() else "" + new_hash = get_directory_hash(www_directory, skip_path_regexp=r".*node_modules.*") + if new_hash == old_hash: + print("The WWW directory has not changed! Skip regeneration.") + sys.exit(0) + else: + shutil.rmtree(node_modules_directory, ignore_errors=True) + shutil.rmtree(dist_directory, ignore_errors=True) env = os.environ.copy() env["FORCE_COLOR"] = "true" subprocess.check_call(["yarn", "install", "--frozen-lockfile"], cwd=os.fspath(www_directory)) subprocess.check_call(["yarn", "run", "build"], cwd=os.fspath(www_directory), env=env) + new_hash = get_directory_hash(www_directory, skip_path_regexp=r".*node_modules.*") WWW_HASH_FILE.write_text(new_hash) diff --git a/scripts/ci/pre_commit/pre_commit_insert_extras.py b/scripts/ci/pre_commit/pre_commit_insert_extras.py index 103cec230bfde..4933032e3020e 100755 --- a/scripts/ci/pre_commit/pre_commit_insert_extras.py +++ b/scripts/ci/pre_commit/pre_commit_insert_extras.py @@ -17,33 +17,85 @@ # under the License. from __future__ import annotations -import os import sys import textwrap +from enum import Enum from pathlib import Path -AIRFLOW_SOURCES_DIR = Path(__file__).parents[3].resolve() +# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files +try: + import tomllib +except ImportError: + import tomli as tomllib + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported -sys.path.insert(0, str(AIRFLOW_SOURCES_DIR)) # make sure setup is imported from Airflow -# flake8: noqa: F401 +from common_precommit_utils import insert_documentation -os.environ["_SKIP_PYTHON_VERSION_CHECK"] = "true" -from common_precommit_utils import insert_documentation +class ExtraType(Enum): + DEVEL = "DEVEL" + DOC = "DOC" + REGULAR = "REGULAR" + + +def get_header_and_footer(extra_type: ExtraType, file_format: str) -> tuple[str, str]: + if file_format == "rst": + return f" .. START {extra_type.value} EXTRAS HERE", f" .. END {extra_type.value} EXTRAS HERE" + elif file_format == "txt": + return f"# START {extra_type.value} EXTRAS HERE", f"# END {extra_type.value} EXTRAS HERE" + else: + raise Exception(f"Bad format {format} passed. Only rst and txt are supported") + + +def get_wrapped_list(extras_set: set[str]) -> list[str]: + return [line + "\n" for line in textwrap.wrap(", ".join(sorted(extras_set)), 100)] + + +def get_extra_types_dict(extras: dict[str, list[str]]) -> dict[ExtraType, tuple[set[str], list[str]]]: + """ + Split extras into four types. + + :return: dictionary of extra types with tuple of two set,list - set of extras and text-wrapped list + """ + extra_type_dict: dict[ExtraType, tuple[set[str], list[str]]] = {} + + for extra_type in ExtraType: + extra_type_dict[extra_type] = (set(), []) + + for key, value in extras.items(): + if key.startswith("devel"): + extra_type_dict[ExtraType.DEVEL][0].add(key) + elif key in ["doc", "doc-gen"]: + extra_type_dict[ExtraType.DOC][0].add(key) + else: + extra_type_dict[ExtraType.REGULAR][0].add(key) + + for extra_type in ExtraType: + extra_type_dict[extra_type][1].extend(get_wrapped_list(extra_type_dict[extra_type][0])) + + return extra_type_dict + + +def get_extras_from_pyproject_toml() -> dict[str, list[str]]: + pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) + return pyproject_toml_content["project"]["optional-dependencies"] -from setup import EXTRAS_DEPENDENCIES -sys.path.append(str(AIRFLOW_SOURCES_DIR)) +FILES_TO_UPDATE = [(AIRFLOW_ROOT_PATH / "INSTALL", "txt"), (AIRFLOW_ROOT_PATH / "CONTRIBUTING.rst", "rst")] -RST_HEADER = " .. START EXTRAS HERE" -RST_FOOTER = " .. END EXTRAS HERE" -INSTALL_HEADER = "# START EXTRAS HERE" -INSTALL_FOOTER = "# END EXTRAS HERE" +def process_documentation_files(): + extra_type_dict = get_extra_types_dict(get_extras_from_pyproject_toml()) + for file, file_format in FILES_TO_UPDATE: + if not file.exists(): + raise Exception(f"File {file} does not exist") + for extra_type in ExtraType: + header, footer = get_header_and_footer(extra_type, file_format) + insert_documentation(file, extra_type_dict[extra_type][1], header, footer) -CONSTANTS_HEADER = "# START EXTRAS HERE" -CONSTANTS_FOOTER = "# END EXTRAS HERE" DEFAULT_EXTRAS = ( "amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsearch,ftp,google," @@ -53,13 +105,4 @@ if __name__ == "__main__": - install_file_path = AIRFLOW_SOURCES_DIR / "INSTALL" - contributing_file_path = AIRFLOW_SOURCES_DIR / "CONTRIBUTING.rst" - global_constants_file_path = ( - AIRFLOW_SOURCES_DIR / "dev" / "breeze" / "src" / "airflow_breeze" / "global_constants.py" - ) - extras_list = textwrap.wrap(", ".join(EXTRAS_DEPENDENCIES.keys()), 100) - extras_list = [line + "\n" for line in extras_list] - extras_code = [f" {extra}\n" for extra in EXTRAS_DEPENDENCIES.keys()] - insert_documentation(install_file_path, extras_list, INSTALL_HEADER, INSTALL_FOOTER) - insert_documentation(contributing_file_path, extras_list, RST_HEADER, RST_FOOTER) + process_documentation_files() diff --git a/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py b/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py index df53dea94dc3e..fb735569a31f5 100755 --- a/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py +++ b/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py @@ -27,6 +27,7 @@ AIRFLOW_SOURCES = Path(__file__).parents[3].resolve() +DEV_DIR_PATH = AIRFLOW_SOURCES / "dev" def stable_sort(x): @@ -37,8 +38,14 @@ def sort_uniq(sequence): return sorted(set(sequence), key=stable_sort) -if __name__ == "__main__": - installed_providers_path = Path(AIRFLOW_SOURCES) / "airflow" / "providers" / "installed_providers.txt" - content = installed_providers_path.read_text().splitlines(keepends=True) +def sort_file(path: Path): + content = path.read_text().splitlines(keepends=True) sorted_content = sort_uniq(content) - installed_providers_path.write_text("".join(sorted_content)) + path.write_text("".join(sorted_content)) + + +if __name__ == "__main__": + prod_image_installed_providers_path = DEV_DIR_PATH / "prod_image_installed_providers.txt" + airflow_pre_installed_providers_path = DEV_DIR_PATH / "airflow_pre_installed_providers.txt" + sort_file(prod_image_installed_providers_path) + sort_file(airflow_pre_installed_providers_path) diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py index 5fa638505fece..50cebee3e5c19 100755 --- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py @@ -17,11 +17,13 @@ # under the License. from __future__ import annotations +import hashlib import json import os import sys from ast import Import, ImportFrom, NodeVisitor, parse from collections import defaultdict +from enum import Enum from pathlib import Path from typing import Any, List @@ -40,6 +42,12 @@ DEPENDENCIES_JSON_FILE_PATH = AIRFLOW_SOURCES_ROOT / "generated" / "provider_dependencies.json" +PYPROJECT_TOML_FILE_PATH = AIRFLOW_SOURCES_ROOT / "pyproject.toml" + +MY_FILE = Path(__file__).resolve() +MY_MD5SUM_FILE = MY_FILE.parent / MY_FILE.name.replace(".py", ".py.md5sum") + + sys.path.insert(0, str(AIRFLOW_SOURCES_ROOT)) # make sure setup is imported from Airflow warnings: list[str] = [] @@ -109,8 +117,6 @@ def get_provider_id_from_relative_import_or_file(relative_path_or_file: str) -> provider_candidate = relative_path_or_file.replace(os.sep, ".").split(".") while provider_candidate: candidate_provider_id = ".".join(provider_candidate) - if "google_vendor" in candidate_provider_id: - candidate_provider_id = candidate_provider_id.replace("google_vendor", "google") if candidate_provider_id in ALL_PROVIDERS: return candidate_provider_id provider_candidate = provider_candidate[:-1] @@ -175,6 +181,149 @@ def check_if_different_provider_used(file_path: Path) -> None: STATES: dict[str, str] = {} +FOUND_EXTRAS: dict[str, list[str]] = defaultdict(list) + + +class ParsedDependencyTypes(Enum): + CORE_EXTRAS = "core extras" + APACHE_NO_PROVIDER_EXTRAS = "Apache no provider extras" + DEVEL_EXTRAS = "devel extras" + DOC_EXTRAS = "doc extras" + BUNDLE_EXTRAS = "bundle extras" + DEPRECATED_EXTRAS = "deprecated extras" + MANUAL_EXTRAS = "manual extras" + + +GENERATED_DEPENDENCIES_START = "# START OF GENERATED DEPENDENCIES" +GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES" + + +def normalize_extra(dependency: str) -> str: + return dependency.replace(".", "-").replace("_", "-") + + +def normalize_package_name(dependency: str) -> str: + return f"apache-airflow-providers-{dependency.replace('.', '-').replace('_', '-')}" + + +def convert_to_extra_dependency(dependency: str) -> str: + # if there is version in dependency - remove it as we do not need it in extra specification + # for editable installation + if ">=" in dependency: + dependency = dependency.split(">=")[0] + extra = dependency.replace("apache-airflow-providers-", "").replace("-", "_").replace(".", "_") + return f"apache-airflow[{extra}]" + + +def generate_dependencies( + result_content: list[str], + dependencies: dict[str, dict[str, list[str] | str]], +): + def generate_parsed_extras(type: ParsedDependencyTypes): + result_content.append(f" # {type.value}") + for extra in FOUND_EXTRAS[type.value]: + result_content.append(f' "apache-airflow[{extra}]",') + + def get_python_exclusion(dependency_info: dict[str, list[str] | str]): + excluded_python_versions = dependency_info.get("excluded-python-versions") + exclusion = "" + if excluded_python_versions: + separator = ";" + for version in excluded_python_versions: + exclusion += f'{separator}python_version != \\"{version}\\"' + separator = " and " + return exclusion + + for dependency, dependency_info in dependencies.items(): + if dependency_info["state"] in ["suspended", "removed"]: + continue + result_content.append(f"{normalize_extra(dependency)} = [") + deps = dependency_info["deps"] + if not isinstance(deps, list): + raise TypeError(f"Wrong type of 'deps' {deps} for {dependency} in {DEPENDENCIES_JSON_FILE_PATH}") + for dep in deps: + if dep.startswith("apache-airflow-providers-"): + dep = convert_to_extra_dependency(dep) + elif dep.startswith("apache-airflow>="): + continue + result_content.append(f' "{dep}{get_python_exclusion(dependency_info)}",') + devel_deps = dependency_info.get("devel-deps") + if devel_deps: + result_content.append(f" # Devel dependencies for the {dependency} provider") + for dep in devel_deps: + result_content.append(f' "{dep}{get_python_exclusion(dependency_info)}",') + result_content.append("]") + result_content.append("all = [") + generate_parsed_extras(ParsedDependencyTypes.CORE_EXTRAS) + generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) + result_content.append(" # Provider extras") + for dependency, dependency_info in dependencies.items(): + result_content.append(f' "apache-airflow[{normalize_extra(dependency)}]",') + result_content.append("]") + result_content.append("devel-all = [") + result_content.append(' "apache-airflow[all]",') + result_content.append(' "apache-airflow[devel]",') + result_content.append(' "apache-airflow[doc]",') + result_content.append(' "apache-airflow[doc-gen]",') + result_content.append(' "apache-airflow[saml]",') + generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) + result_content.append(" # Include all provider deps") + for dependency, dependency_info in dependencies.items(): + result_content.append(f' "apache-airflow[{normalize_extra(dependency)}]",') + result_content.append("]") + + +def get_dependency_type(dependency_type: str) -> ParsedDependencyTypes | None: + for dep_type in ParsedDependencyTypes: + if dep_type.value == dependency_type: + return dep_type + return None + + +def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]): + file_content = PYPROJECT_TOML_FILE_PATH.read_text() + result_content: list[str] = [] + copying = True + current_type: str | None = None + line_count: int = 0 + for line in file_content.splitlines(): + if copying: + result_content.append(line) + if line.strip().startswith(GENERATED_DEPENDENCIES_START): + copying = False + generate_dependencies(result_content, dependencies) + elif line.strip().startswith(GENERATED_DEPENDENCIES_END): + copying = True + result_content.append(line) + elif line.strip().startswith("# START OF "): + current_type = line.strip().replace("# START OF ", "") + type_enum = get_dependency_type(current_type) + if type_enum is None: + console.print( + f"[red]Wrong start of section '{current_type}' in {PYPROJECT_TOML_FILE_PATH} " + f"at line {line_count}: Unknown section type" + ) + sys.exit(1) + elif line.strip().startswith("# END OF "): + end_type = line.strip().replace("# END OF ", "") + if end_type != current_type: + console.print( + f"[red]Wrong end of section {end_type} in {PYPROJECT_TOML_FILE_PATH} at line {line_count}" + ) + sys.exit(1) + if current_type: + if line.strip().endswith(" = ["): + FOUND_EXTRAS[current_type].append(line.split(" = [")[0].strip()) + line_count += 1 + PYPROJECT_TOML_FILE_PATH.write_text("\n".join(result_content) + "\n") + + +def calculate_my_hash(): + my_file = MY_FILE.resolve() + hash_md5 = hashlib.md5() + hash_md5.update(my_file.read_bytes()) + return hash_md5.hexdigest() + if __name__ == "__main__": find_all_providers_and_provider_files() @@ -185,6 +334,7 @@ def check_if_different_provider_used(file_path: Path) -> None: check_if_different_provider_used(file) for provider, provider_yaml_content in ALL_PROVIDERS.items(): ALL_DEPENDENCIES[provider]["deps"].extend(provider_yaml_content["dependencies"]) + ALL_DEPENDENCIES[provider]["devel-deps"].extend(provider_yaml_content.get("devel-dependencies") or []) STATES[provider] = provider_yaml_content["state"] if warnings: console.print("[yellow]Warnings!\n") @@ -199,6 +349,7 @@ def check_if_different_provider_used(file_path: Path) -> None: unique_sorted_dependencies: dict[str, dict[str, list[str] | str]] = defaultdict(dict) for key in sorted(ALL_DEPENDENCIES.keys()): unique_sorted_dependencies[key]["deps"] = sorted(ALL_DEPENDENCIES[key]["deps"]) + unique_sorted_dependencies[key]["devel-deps"] = ALL_DEPENDENCIES[key].get("devel-deps") or [] unique_sorted_dependencies[key]["cross-providers-deps"] = sorted( set(ALL_DEPENDENCIES[key]["cross-providers-deps"]) ) @@ -210,13 +361,17 @@ def check_if_different_provider_used(file_path: Path) -> None: console.print("[red]Errors found during verification. Exiting!") console.print() sys.exit(1) - old_dependencies = DEPENDENCIES_JSON_FILE_PATH.read_text() + old_dependencies = ( + DEPENDENCIES_JSON_FILE_PATH.read_text() if DEPENDENCIES_JSON_FILE_PATH.exists() else "{}" + ) new_dependencies = json.dumps(unique_sorted_dependencies, indent=2) + "\n" - if new_dependencies != old_dependencies: + old_md5sum = MY_MD5SUM_FILE.read_text().strip() if MY_MD5SUM_FILE.exists() else "" + new_md5sum = calculate_my_hash() + if new_dependencies != old_dependencies or new_md5sum != old_md5sum: DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, indent=2) + "\n") if os.environ.get("CI"): console.print() - console.print(f"[info]There is a need to regenerate {DEPENDENCIES_JSON_FILE_PATH}") + console.print(f"There is a need to regenerate {DEPENDENCIES_JSON_FILE_PATH}") console.print( f"[red]You need to run the following command locally and commit generated " f"{DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)} file:\n" @@ -229,8 +384,12 @@ def check_if_different_provider_used(file_path: Path) -> None: f"[yellow]Regenerated new dependencies. Please commit " f"{DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)}!\n" ) - console.print(f"[info]Written {DEPENDENCIES_JSON_FILE_PATH}") + console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}") + console.print() + update_pyproject_toml(unique_sorted_dependencies) + console.print(f"Written {PYPROJECT_TOML_FILE_PATH}") console.print() + MY_MD5SUM_FILE.write_text(new_md5sum + "\n") sys.exit(1) else: console.print( diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum new file mode 100644 index 0000000000000..610f5562c8a5f --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum @@ -0,0 +1 @@ +ed25c4f6b220c14b40bbf370fee9388e diff --git a/scripts/docker/entrypoint_ci.sh b/scripts/docker/entrypoint_ci.sh index cf0ee85bab2e4..52d79f415473e 100755 --- a/scripts/docker/entrypoint_ci.sh +++ b/scripts/docker/entrypoint_ci.sh @@ -231,7 +231,7 @@ function check_download_sqlalchemy() { if [[ ${DOWNGRADE_SQLALCHEMY=} != "true" ]]; then return fi - min_sqlalchemy_version=$(grep "sqlalchemy>=" setup.cfg | sed "s/.*>=\([0-9\.]*\).*/\1/") + min_sqlalchemy_version=$(grep "\"sqlalchemy>=" pyproject.toml | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs) echo echo "${COLOR_BLUE}Downgrading sqlalchemy to minimum supported version: ${min_sqlalchemy_version}${COLOR_RESET}" echo diff --git a/scripts/docker/install_airflow.sh b/scripts/docker/install_airflow.sh index 56fec404074e4..959b4befda4ee 100644 --- a/scripts/docker/install_airflow.sh +++ b/scripts/docker/install_airflow.sh @@ -60,10 +60,11 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then - # Remove airflow and reinstall it using editable flag + # Remove airflow and all providers and reinstall it using editable flag # We can only do it when we install airflow from sources set -x - pip uninstall apache-airflow --yes + pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true + pip uninstall apache-airflow --yes 2>/dev/null || true pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" @@ -85,7 +86,7 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py + # then upgrade if needed without using constraints to account for new limits in pyproject.toml pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ diff --git a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh index 26279c4bc85fb..9c809039c7df5 100644 --- a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh +++ b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh @@ -18,11 +18,11 @@ # shellcheck shell=bash disable=SC2086 # Installs Airflow from $AIRFLOW_BRANCH tip. This is pure optimisation. It is done because we do not want -# to reinstall all dependencies from scratch when setup.py changes. Problem with Docker caching is that +# to reinstall all dependencies from scratch when pyproject.toml changes. Problem with Docker caching is that # when a file is changed, when added to docker context, it invalidates the cache and it causes Docker # build to reinstall all dependencies from scratch. This can take a loooooot of time. Therefore we install # the dependencies first from main (and uninstall airflow right after) so that we can start installing -# deps from those pre-installed dependencies. It saves few minutes of build time when setup.py changes. +# deps from those pre-installed dependencies. It saves few minutes of build time when pyproject.toml changes. # # If INSTALL_MYSQL_CLIENT is set to false, mysql extra is removed # If INSTALL_POSTGRES_CLIENT is set to false, postgres extra is removed diff --git a/scripts/in_container/run_generate_constraints.py b/scripts/in_container/run_generate_constraints.py index e03eff5126783..99080fa3eb02d 100755 --- a/scripts/in_container/run_generate_constraints.py +++ b/scripts/in_container/run_generate_constraints.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import json import os import sys from dataclasses import dataclass @@ -32,6 +33,9 @@ AIRFLOW_SOURCES = Path(__file__).resolve().parents[2] DEFAULT_BRANCH = os.environ.get("DEFAULT_BRANCH", "main") PYTHON_VERSION = os.environ.get("PYTHON_MAJOR_MINOR_VERSION", "3.8") +GENERATED_PROVIDER_DEPENDENCIES_FILE = AIRFLOW_SOURCES / "generated" / "provider_dependencies.json" + +ALL_PROVIDER_DEPENDENCIES = json.loads(GENERATED_PROVIDER_DEPENDENCIES_FILE.read_text()) now = datetime.now().isoformat() @@ -125,7 +129,7 @@ def current_constraints_file(self) -> Path: def install_local_airflow_with_eager_upgrade( - config_params: ConfigParams, eager_upgrade_additional_requirements: str, extras: list[str] + config_params: ConfigParams, eager_upgrade_additional_requirements: str ) -> None: run_command( [ @@ -133,7 +137,8 @@ def install_local_airflow_with_eager_upgrade( "install", "--root-user-action", "ignore", - f".[{','.join(extras)}]", + "-e", + ".[all-core]", *eager_upgrade_additional_requirements.split(" "), "--upgrade", "--upgrade-strategy", @@ -254,16 +259,12 @@ def uninstall_all_packages(config_params: ConfigParams): ) -def get_core_airflow_dependencies() -> list[str]: - import setup - - return list(setup.CORE_EXTRAS_DEPENDENCIES.keys()) - - -def get_all_provider_packages() -> list[str]: - import setup - - return setup.get_all_provider_packages().split(" ") +def get_all_active_provider_packages() -> list[str]: + return [ + f"apache-airflow-providers-{provider.replace('.','-')}" + for provider in ALL_PROVIDER_DEPENDENCIES.keys() + if ALL_PROVIDER_DEPENDENCIES[provider]["state"] == "ready" + ] def generate_constraints_source_providers(config_params: ConfigParams) -> None: @@ -288,8 +289,7 @@ def generate_constraints_pypi_providers(config_params: ConfigParams) -> None: :return: """ dist_dir = Path("/dist") - core_dependencies = get_core_airflow_dependencies() - all_provider_packages = get_all_provider_packages() + all_provider_packages = get_all_active_provider_packages() chicken_egg_prefixes = [] packages_to_install = [] console.print("[bright_blue]Installing Airflow with PyPI providers with eager upgrade") @@ -332,7 +332,7 @@ def generate_constraints_pypi_providers(config_params: ConfigParams) -> None: "install", "--root-user-action", "ignore", - f".[{','.join(core_dependencies)}]", + ".[all-core]", *packages_to_install, *config_params.eager_upgrade_additional_requirements.split(" "), "--upgrade", @@ -355,15 +355,15 @@ def generate_constraints_no_providers(config_params: ConfigParams) -> None: Generates constraints without any provider dependencies. This is used mostly to generate SBOM files - where we generate list of dependencies for Airflow without any provider installed. """ - core_dependencies = get_core_airflow_dependencies() uninstall_all_packages(config_params) console.print( - f"[bright_blue]Installing airflow with [{core_dependencies}] extras only " f"with eager upgrade." + "[bright_blue]Installing airflow with [all-core] extras only with eager upgrade in " + "installable mode." ) install_local_airflow_with_eager_upgrade( - config_params, config_params.eager_upgrade_additional_requirements, core_dependencies + config_params, config_params.eager_upgrade_additional_requirements ) - console.print(f"[success]Installed airflow with [{core_dependencies}] extras only with eager upgrade.") + console.print("[success]Installed airflow with [all-core] extras only with eager upgrade.") with config_params.current_constraints_file.open("w") as constraints_file: constraints_file.write(NO_PROVIDERS_CONSTRAINTS_PREFIX) freeze_packages_to_file(config_params, constraints_file) diff --git a/scripts/in_container/run_prepare_airflow_packages.py b/scripts/in_container/run_prepare_airflow_packages.py index 4b319a722c825..2f6f1912c2f99 100755 --- a/scripts/in_container/run_prepare_airflow_packages.py +++ b/scripts/in_container/run_prepare_airflow_packages.py @@ -22,95 +22,121 @@ import re import subprocess import sys +from contextlib import contextmanager from pathlib import Path from shutil import rmtree -import rich - - -def process_summary(success_message: str, error_message: str, completed_process: subprocess.CompletedProcess): - if completed_process.returncode != 0: - if os.environ.get("GITHUB_ACTIONS", "") != "": - print("::endgroup::") - print(f"::error::{error_message}") - rich.print(f"[red]{error_message}") - rich.print(completed_process.stdout) - rich.print(completed_process.stderr) - sys.exit(completed_process.returncode) - else: - rich.print(f"[green]{success_message}") +from rich.console import Console +console = Console(color_system="standard", width=200) AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2].resolve() +AIRFLOW_INIT_FILE = AIRFLOW_SOURCES_ROOT / "airflow" / "__init__.py" WWW_DIRECTORY = AIRFLOW_SOURCES_ROOT / "airflow" / "www" +VERSION_SUFFIX = os.environ.get("VERSION_SUFFIX_FOR_PYPI", "") +PACKAGE_FORMAT = os.environ.get("PACKAGE_FORMAT", "wheel") + + +def clean_build_directory(): + console.print("[bright_blue]Cleaning build directories\n") + for egg_info_file in AIRFLOW_SOURCES_ROOT.glob("*egg-info*"): + rmtree(egg_info_file, ignore_errors=True) + rmtree(AIRFLOW_SOURCES_ROOT / "build", ignore_errors=True) + console.print("[green]Cleaned build directories\n\n") + + +def mark_git_directory_as_safe(): + console.print(f"[bright_blue]Marking {AIRFLOW_SOURCES_ROOT} as safe directory for git commands.\n") + subprocess.run( + ["git", "config", "--global", "--unset-all", "safe.directory"], + cwd=AIRFLOW_SOURCES_ROOT, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + subprocess.run( + ["git", "config", "--global", "--add", "safe.directory", AIRFLOW_SOURCES_ROOT], + cwd=AIRFLOW_SOURCES_ROOT, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + console.print(f"[green]Marked {AIRFLOW_SOURCES_ROOT} as safe directory for git commands.\n") + + +def get_current_airflow_version() -> str: + console.print("[bright_blue]Checking airflow version\n") + airflow_version = subprocess.check_output( + [sys.executable, "-m", "hatch", "version"], text=True, cwd=AIRFLOW_SOURCES_ROOT + ).strip() + console.print(f"[green]Airflow version: {airflow_version}\n") + return airflow_version + + +def build_airflow_packages(package_format: str): + build_command = [sys.executable, "-m", "hatch", "build", "-t", "custom"] + + if package_format in ["both", "wheel"]: + build_command.extend(["-t", "wheel"]) + if package_format in ["both", "sdist"]: + build_command.extend(["-t", "sdist"]) + + console.print(f"[bright_blue]Building packages: {package_format}\n") + build_process = subprocess.run(build_command, capture_output=False, cwd=AIRFLOW_SOURCES_ROOT) + + if build_process.returncode != 0: + console.print("[red]Error building Airflow packages") + sys.exit(build_process.returncode) + else: + console.print("[green]Airflow packages built successfully") + + +def set_package_version(version: str) -> None: + console.print(f"\n[yellow]Setting {version} for Airflow package\n") + # replace __version__ with the version passed as argument in python + init_content = AIRFLOW_INIT_FILE.read_text() + init_content = re.sub(r'__version__ = "[^"]+"', f'__version__ = "{version}"', init_content) + AIRFLOW_INIT_FILE.write_text(init_content) + + +@contextmanager +def package_version(version_suffix: str): + release_version_matcher = re.compile(r"^\d+\.\d+\.\d+$") + airflow_version = get_current_airflow_version() + + update_version = False + if version_suffix: + if airflow_version.endswith(f".{version_suffix}"): + console.print( + f"[bright_blue]The {airflow_version} already has suffix {version_suffix}. Not updating it.\n" + ) + elif not release_version_matcher.match(airflow_version): + console.print( + f"[red]You should only pass version suffix if {airflow_version}" + f"does not have suffix in code. The version in code is: {airflow_version}.\n" + ) + console.print( + "[yellow]Make sure that you remove the suffix before using `--version-suffix-for-pypi`!" + ) + sys.exit(1) + else: + update_version = True + if update_version: + set_package_version(f"{airflow_version}.{version_suffix}") + try: + yield + finally: + # Set the version back to the original version + if update_version: + set_package_version(airflow_version) + + +clean_build_directory() +mark_git_directory_as_safe() + +with package_version(VERSION_SUFFIX): + build_airflow_packages(PACKAGE_FORMAT) -rich.print("[bright_blue]Cleaning build directories\n") - -for egg_info_file in AIRFLOW_SOURCES_ROOT.glob("*egg-info*"): - rmtree(egg_info_file, ignore_errors=True) - -rmtree(AIRFLOW_SOURCES_ROOT / "build", ignore_errors=True) - -rich.print("[green]Cleaned build directories\n\n") - -version_suffix = os.environ.get("VERSION_SUFFIX_FOR_PYPI", "") -package_format = os.environ.get("PACKAGE_FORMAT", "wheel") - -rich.print(f"[bright_blue]Marking {AIRFLOW_SOURCES_ROOT} as safe directory for git commands.\n") - -subprocess.run( - ["git", "config", "--global", "--unset-all", "safe.directory"], - cwd=AIRFLOW_SOURCES_ROOT, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=False, -) - -subprocess.run( - ["git", "config", "--global", "--add", "safe.directory", AIRFLOW_SOURCES_ROOT], - cwd=AIRFLOW_SOURCES_ROOT, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=True, -) - -rich.print(f"[green]Marked {AIRFLOW_SOURCES_ROOT} as safe directory for git commands.\n") - -rich.print("[bright_blue]Checking airflow version\n") - -airflow_version = subprocess.check_output( - [sys.executable, "setup.py", "--version"], text=True, cwd=AIRFLOW_SOURCES_ROOT -).strip() - -rich.print(f"[green]Airflow version: {airflow_version}\n") - -RELEASED_VERSION_MATCHER = re.compile(r"^\d+\.\d+\.\d+$") - -command = [sys.executable, "setup.py"] - -if version_suffix: - if RELEASED_VERSION_MATCHER.match(airflow_version): - rich.print(f"[warning]Adding {version_suffix} suffix to the {airflow_version}") - command.extend(["egg_info", "--tag-build", version_suffix]) - elif not airflow_version.endswith(version_suffix): - rich.print(f"[red]Version {airflow_version} does not end with {version_suffix}. Using !") - sys.exit(1) - -if package_format in ["both", "wheel"]: - command.append("bdist_wheel") -if package_format in ["both", "sdist"]: - command.append("sdist") - -rich.print(f"[bright_blue]Building packages: {package_format}\n") - -process = subprocess.run(command, capture_output=True, text=True, cwd=AIRFLOW_SOURCES_ROOT) - -process_summary("Airflow packages built successfully", "Error building Airflow packages", process) - -if os.environ.get("GITHUB_ACTIONS", "") != "": - print("::endgroup::") - -rich.print("[green]Packages built successfully:\n") for file in (AIRFLOW_SOURCES_ROOT / "dist").glob("apache*"): - rich.print(file.name) -rich.print() + console.print(file.name) +console.print() diff --git a/scripts/in_container/verify_providers.py b/scripts/in_container/verify_providers.py index 769f85d520a68..e0491963dbf79 100755 --- a/scripts/in_container/verify_providers.py +++ b/scripts/in_container/verify_providers.py @@ -18,6 +18,7 @@ from __future__ import annotations import importlib +import json import logging import os import pkgutil @@ -40,6 +41,8 @@ AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2].resolve() PROVIDERS_PATH = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" +GENERATED_PROVIDERS_DEPENDENCIES_FILE = AIRFLOW_SOURCES_ROOT / "generated" / "provider_dependencies.json" +ALL_DEPENDENCIES = json.loads(GENERATED_PROVIDERS_DEPENDENCIES_FILE.read_text()) USE_AIRFLOW_VERSION = os.environ.get("USE_AIRFLOW_VERSION") or "" IS_AIRFLOW_VERSION_PROVIDED = re.match("^(\d+)\.(\d+)\.(\d+)\S*$", USE_AIRFLOW_VERSION) @@ -129,13 +132,7 @@ class ProviderPackageDetails(NamedTuple): def get_all_providers() -> list[str]: - """Returns all providers for regular packages. - - :return: list of providers that are considered for provider packages - """ - from setup import ALL_PROVIDERS - - return list(ALL_PROVIDERS) + return list(ALL_DEPENDENCIES.keys()) def import_all_classes( diff --git a/scripts/tools/initialize_virtualenv.py b/scripts/tools/initialize_virtualenv.py index e5e5633fdf22f..1058a21af2d0a 100755 --- a/scripts/tools/initialize_virtualenv.py +++ b/scripts/tools/initialize_virtualenv.py @@ -74,10 +74,13 @@ def pip_install_requirements() -> int: You can specify extras as single coma-separated parameter to install. For example -* google,amazon,microsoft.azure -* devel_all +* devel - to have all development dependencies required to test core. +* devel-* - to selectively install tools that we use to run scripts, tests, static checks etc. +* google,amazon,microsoft_azure - to install dependencies needed at runtime by specified providers +* devel-all-dbs - to have all development dependencies required for all DB providers +* devel-all - to have all development dependencies required for all providers -Note that "devel_all" installs all possible dependencies and we have > 600 of them, +Note that "devel-all" installs all possible dependencies and we have > 600 of them, which might not be possible to install cleanly on your host because of lack of system packages. It's easier to install extras one-by-one as needed. diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index ab7b34511cf40..0000000000000 --- a/setup.cfg +++ /dev/null @@ -1,244 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[metadata] -name = apache-airflow -summary = Programmatically author, schedule and monitor data pipelines -author = Apache Software Foundation -author_email = dev@airflow.apache.org -url = https://airflow.apache.org/ -version = attr: airflow.__version__ -long_description = file: generated/PYPI_README.md -long_description_content_type = text/markdown -license = Apache License 2.0 -license_files = - LICENSE - NOTICE - licenses/*.txt -classifiers = - Development Status :: 5 - Production/Stable - Environment :: Console - Environment :: Web Environment - Intended Audience :: Developers - Intended Audience :: System Administrators - License :: OSI Approved :: Apache Software License - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Topic :: System :: Monitoring - Framework :: Apache Airflow -project_urls = - Bug Tracker=https://github.com/apache/airflow/issues - Documentation=https://airflow.apache.org/docs/ - Downloads=https://archive.apache.org/dist/airflow/ - Release Notes=https://airflow.apache.org/docs/apache-airflow/stable/release_notes.html - Slack Chat=https://s.apache.org/airflow-slack - Source Code=https://github.com/apache/airflow - Twitter=https://twitter.com/ApacheAirflow - YouTube=https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/ - -[options] -zip_safe = False -include_package_data = True -# Mainly because of distutils deprecation and some packages not being compatible with it, we should -# Limit airflow to < 3.12 until those dependencies are ready and until we can support Python 3.12 -python_requires = ~=3.8,<3.12 -packages = find: -setup_requires = - gitpython - wheel -##################################################################################################### -# IMPORTANT NOTE!!!!!!!!!!!!!!! -# IF you are removing dependencies from this list, please make sure that you also increase -# DEPENDENCIES_EPOCH_NUMBER in the Dockerfile.ci -##################################################################################################### -install_requires = - # Alembic is important to handle our migrations in predictable and performant way. It is developed - # together with SQLAlchemy. Our experience with Alembic is that it very stable in minor version - alembic>=1.6.3, <2.0 - argcomplete>=1.10 - asgiref - attrs>=22.1.0 - blinker - # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x - # Update CustomTTYColoredFormatter to remove - colorlog>=4.0.2, <5.0 - configupdater>=3.1.1 - # `airflow/www/extensions/init_views` imports `connexion.decorators.validation.RequestBodyValidator` - # connexion v3 has refactored the entire module to middleware, see: /spec-first/connexion/issues/1525 - # Specifically, RequestBodyValidator was removed in: /spec-first/connexion/pull/1595 - # The usage was added in #30596, seemingly only to override and improve the default error message. - # Either revert that change or find another way, preferably without using connexion internals. - # This limit can be removed after https://github.com/apache/airflow/issues/35234 is fixed - connexion[flask]>=2.10.0,<3.0 - cron-descriptor>=1.2.24 - croniter>=0.3.17 - cryptography>=0.9.3 - deprecated>=1.2.13 - dill>=0.2.2 - # Flask 2.3 is scheduled to introduce a number of deprecation removals - some of them might be breaking - # for our dependencies - notably `_app_ctx_stack` and `_request_ctx_stack` removals. - # We should remove the limitation after 2.3 is released and our dependencies are updated to handle it - flask>=2.2,<2.3 - # We are tightly coupled with FAB version because we vendored in part of FAB code related to security manager - # This is done as part of preparation to removing FAB as dependency, but we are not ready for it yet - # Every time we update FAB version here, please make sure that you review the classes and models in - # `airflow/www/fab_security` with their upstream counterparts. In particular, make sure any breaking changes, - # for example any new methods, are accounted for. - # NOTE! When you change the value here, you also have to update flask-appbuilder[oauth] in setup.py - flask-appbuilder==4.3.10 - flask-caching>=1.5.0 - flask-login>=0.6.2 - flask-session>=0.4.0 - flask-wtf>=0.15 - fsspec>=2023.10.0 - google-re2>=1.0 - gunicorn>=20.1.0 - httpx - importlib_metadata>=1.7;python_version<"3.9" - importlib_resources>=5.2;python_version<"3.9" - itsdangerous>=2.0 - jinja2>=3.0.0 - jsonschema>=4.18.0 - lazy-object-proxy - linkify-it-py>=2.0.0 - lockfile>=0.12.2 - markdown>=3.0 - markdown-it-py>=2.1.0 - markupsafe>=1.1.1 - marshmallow-oneofschema>=2.0.1 - mdit-py-plugins>=0.3.0 - opentelemetry-api>=1.15.0 - opentelemetry-exporter-otlp - packaging>=14.0 - pathspec>=0.9.0 - # When (if) pendulum 3 released it would introduce changes in module/objects imports, - # since we are tightly coupled with pendulum library internally it will breaks Airflow functionality. - pendulum>=2.0,<3.0 - pluggy>=1.0 - psutil>=4.2.0 - pydantic>=2.3.0 - pygments>=2.0.1 - pyjwt>=2.0.0 - python-daemon>=3.0.0 - python-dateutil>=2.3 - python-nvd3>=0.15.0 - python-slugify>=5.0 - rfc3339_validator>=0.1.4 - rich>=12.4.4 - rich-argparse>=1.0.0 - setproctitle>=1.1.8 - # We use some deprecated features of sqlalchemy 2.0 and we should replace them before we can upgrade - # See https://sqlalche.me/e/b8d9 for details of deprecated features - # you can set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. - # The issue tracking it is https://github.com/apache/airflow/issues/28723 - sqlalchemy>=1.4.28,<2.0 - sqlalchemy_jsonfield>=1.0 - tabulate>=0.7.5 - tenacity>=6.2.0,!=8.2.0 - termcolor>=1.1.0 - typing-extensions>=4.0.0 - # We should remove this dependency when Providers are limited to Airflow 2.7+ - # as we replaced the usage of unicodecsv with csv in Airflow 2.7 - # See https://github.com/apache/airflow/pull/31693 - # We should also remove "licenses/LICENSE-unicodecsv.txt" file when we remove this dependency - unicodecsv>=0.14.1 - universal_pathlib>=0.1.4 - # Werkzug 3 breaks Flask-Login 0.6.2 - # we should remove this limitation when FAB supports Flask 2.3 - werkzeug>=2.0,<3 - -[options.packages.find] -include = - airflow* - -[options.package_data] -airflow= - py.typed - alembic.ini - git_version - customized_form_field_behaviours.schema.json - provider_info.schema.json - -airflow.api_connexion.openapi=*.yaml -airflow.auth.managers.fab.openapi=*.yaml -airflow.serialization=*.json -airflow.utils= - context.pyi - -[options.data_files] -generated= - generated/provider_dependencies.json - -[options.entry_points] -console_scripts= - airflow=airflow.__main__:main - -[bdist_wheel] -python-tag=py3 - - -[files] -packages = airflow - -[easy_install] - -[mypy] -ignore_missing_imports = True -no_implicit_optional = True -warn_redundant_casts = True -warn_unused_ignores = False -plugins = - dev/mypy/plugin/decorators.py, - dev/mypy/plugin/outputs.py -pretty = True -show_error_codes = True -# Mypy since 0.991 warns about type annotations being present in an untyped -# function since they are not checked and technically meaningless. Ideally we -# should make them meaningful by setting check_untyped_defs = True, but there -# are too many issues in those currently unchecked functions to make it viable -# in the short term, so this error is simply ignored for now. -disable_error_code = annotation-unchecked - -[mypy-airflow.config_templates.default_webserver_config] -# This file gets written to user installs, so lets not litter it with type comments -disable_error_code = var-annotated - -[mypy-airflow.migrations.*] -ignore_errors = True - -# Let's assume all google.cloud packages have implicit optionals -# Most of them don't but even if they do, it does not matter -[mypy-google.cloud.*] -no_implicit_optional = False - -# Let's assume all azure packages have implicit optionals -[mypy-azure.*] -no_implicit_optional = False - - -[mypy-referencing.*] -# Referencing has some old type annotations that are not compatible with new versions of mypy -ignore_errors = True - - -[mypy-apache_beam.*] -# Beam has some old type annotations and they introduced an error recently with bad signature of -# a function. This is captured in https://github.com/apache/beam/issues/29927 -# and we should remove this exclusion when it is fixed. -ignore_errors = True diff --git a/setup.py b/setup.py deleted file mode 100644 index 1c831fb98dac7..0000000000000 --- a/setup.py +++ /dev/null @@ -1,1084 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Setup.py for the Airflow project.""" -# To make sure the CI build is using "upgrade to newer dependencies", which is useful when you want to check -# if the dependencies are still compatible with the latest versions as they seem to break some unrelated -# tests in main, you can modify this file. The modification can be simply modifying this particular comment. -# e.g. you can modify the following number "00001" to something else to trigger it. -from __future__ import annotations - -import glob -import json -import logging -import os -import subprocess -import sys -import textwrap -import unittest -from copy import deepcopy -from pathlib import Path -from typing import Iterable - -from setuptools import Command, Distribution, find_namespace_packages, setup -from setuptools.command.develop import develop as develop_orig -from setuptools.command.install import install as install_orig - -# Setuptools patches this import to point to a vendored copy instead of the -# stdlib, which is deprecated in Python 3.10 and will be removed in 3.12. -from distutils import log # isort: skip - - -# Controls whether providers are installed from packages or directly from sources -# It is turned on by default in case of development environments such as Breeze -# And it is particularly useful when you add a new provider and there is no -# PyPI version to install the provider package from -INSTALL_PROVIDERS_FROM_SOURCES = "INSTALL_PROVIDERS_FROM_SOURCES" -PY39 = sys.version_info >= (3, 9) - -logger = logging.getLogger(__name__) - -AIRFLOW_SOURCES_ROOT = Path(__file__).parent.resolve() -PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" - -CROSS_PROVIDERS_DEPS = "cross-providers-deps" -DEPS = "deps" -CURRENT_PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}" - - -def apply_pypi_suffix_to_airflow_packages(dependencies: list[str]) -> None: - """ - Apply version suffix to dependencies that do not have one. - - Looks through the list of dependencies, finds which one are airflow or airflow providers packages - and applies the version suffix to those of them that do not have the suffix applied yet. - - :param dependencies: list of dependencies to add suffix to - """ - for i in range(len(dependencies)): - dependency = dependencies[i] - if dependency.startswith("apache-airflow"): - # in case we want to depend on other airflow package, the chance is the package - # has not yet been released to PyPI and we only see it as a local package that is - # being installed with .dev0 suffix in CI. Unfortunately, there is no way in standard - # PEP-440 compliant way to specify version that would be both - releasable, and - # testable to install on CI with .dev0 or .rc suffixes. We could add `--pre` flag to - # enable it, but `--pre` flag is not selective and will work for all packages so - # we would automatically install all "pre-release" packages for all packages that - # we install from PyPI - and this is definitely not what we want. So in order to - # install only airflow packages that are available in sources in .dev0 or .rc version - # we need to dynamically modify the dependencies here. - if ">=" in dependency: - package, version = dependency.split(">=") - version_spec = f">={version}" - version_suffix = os.environ.get("VERSION_SUFFIX_FOR_PYPI") - if version_suffix and version_suffix not in version_spec: - version_spec += version_suffix - dependencies[i] = f"{package}{version_spec}" - - -# NOTE! IN Airflow 2.4.+ dependencies for providers are maintained in `provider.yaml` files for each -# provider separately. They are loaded here and if you want to modify them, you need to modify -# corresponding provider.yaml file. -# -def fill_provider_dependencies() -> dict[str, dict[str, list[str]]]: - # in case we are loading setup from pre-commits, we want to skip the check for python version - # because if someone uses a version of Python where providers are excluded, the setup will fail - # to see the extras for those providers - skip_python_version_check = os.environ.get("_SKIP_PYTHON_VERSION_CHECK") - try: - with AIRFLOW_SOURCES_ROOT.joinpath("generated", "provider_dependencies.json").open() as f: - dependencies = json.load(f) - provider_dict = {} - for key, value in dependencies.items(): - if value["state"] in ["suspended", "removed"]: - continue - if value.get(DEPS): - apply_pypi_suffix_to_airflow_packages(value[DEPS]) - if CURRENT_PYTHON_VERSION not in value["excluded-python-versions"] or skip_python_version_check: - provider_dict[key] = value - return provider_dict - except Exception as e: - print(f"Exception while loading provider dependencies {e}") - # we can ignore loading dependencies when they are missing - they are only used to generate - # correct extras when packages are build and when we install airflow from sources - # (in both cases the provider_dependencies should be present). - return {} - - -PROVIDER_DEPENDENCIES = fill_provider_dependencies() - - -def airflow_test_suite() -> unittest.TestSuite: - """Test suite for Airflow tests.""" - test_loader = unittest.TestLoader() - test_suite = test_loader.discover(str(AIRFLOW_SOURCES_ROOT / "tests"), pattern="test_*.py") - return test_suite - - -class CleanCommand(Command): - """ - Command to tidy up the project root. - - Registered as cmdclass in setup() so it can be called with ``python setup.py extra_clean``. - """ - - description = "Tidy up the project root" - user_options: list[str] = [] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - @staticmethod - def rm_all_files(files: list[str]) -> None: - """Remove all files from the list.""" - for file in files: - try: - os.remove(file) - except Exception as e: - logger.warning("Error when removing %s: %s", file, e) - - def run(self) -> None: - """Remove temporary files and directories.""" - os.chdir(str(AIRFLOW_SOURCES_ROOT)) - self.rm_all_files(glob.glob("./build/*")) - self.rm_all_files(glob.glob("./**/__pycache__/*", recursive=True)) - self.rm_all_files(glob.glob("./**/*.pyc", recursive=True)) - self.rm_all_files(glob.glob("./dist/*")) - self.rm_all_files(glob.glob("./*.egg-info")) - self.rm_all_files(glob.glob("./docker-context-files/*.whl")) - self.rm_all_files(glob.glob("./docker-context-files/*.tgz")) - - -class CompileAssets(Command): - """ - Compile and build the frontend assets using yarn and webpack. - - Registered as cmdclass in setup() so it can be called with ``python setup.py compile_assets``. - """ - - description = "Compile and build the frontend assets" - user_options: list[str] = [] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - def run(self) -> None: - """Run a command to compile and build assets.""" - www_dir = AIRFLOW_SOURCES_ROOT / "airflow" / "www" - subprocess.check_call(["yarn", "install", "--frozen-lockfile"], cwd=str(www_dir)) - subprocess.check_call(["yarn", "run", "build"], cwd=str(www_dir)) - - -class ListExtras(Command): - """ - List all available extras. - - Registered as cmdclass in setup() so it can be called with ``python setup.py list_extras``. - """ - - description = "List available extras" - user_options: list[str] = [] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - def run(self) -> None: - """List extras.""" - print("\n".join(textwrap.wrap(", ".join(EXTRAS_DEPENDENCIES.keys()), 100))) - - -def git_version() -> str: - """ - Return a version to identify the state of the underlying git repo. - - The version will indicate whether the head of the current git-backed working directory - is tied to a release tag or not : it will indicate the former with a 'release:{version}' - prefix and the latter with a '.dev0' suffix. Following the prefix will be a sha of the - current branch head. Finally, a "dirty" suffix is appended to indicate that uncommitted - changes are present. - - :return: Found Airflow version in Git repo - """ - try: - import git - - try: - repo = git.Repo(str(AIRFLOW_SOURCES_ROOT / ".git")) - except git.NoSuchPathError: - logger.warning(".git directory not found: Cannot compute the git version") - return "" - except git.InvalidGitRepositoryError: - logger.warning("Invalid .git directory not found: Cannot compute the git version") - return "" - except ImportError: - logger.warning("gitpython not found: Cannot compute the git version.") - return "" - if repo: - sha = repo.head.commit.hexsha - if repo.is_dirty(): - return f".dev0+{sha}.dirty" - # commit is clean - return f".release:{sha}" - return "no_git_version" - - -def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_version")) -> None: - """ - Write the Semver version + git hash to file, e.g. ".dev0+2f635dc265e78db6708f59f68e8009abb92c1e65". - - :param str filename: Destination file to write. - """ - text = git_version() - with open(filename, "w") as file: - file.write(text) - - -# -# NOTE! IN Airflow 2.4.+ dependencies for providers are maintained in `provider.yaml` files for each -# provider separately. Before, the provider dependencies were kept here. THEY ARE NOT HERE ANYMORE. -# -# 'Start dependencies group' and 'End dependencies group' are marks for ./scripts/ci/check_order_setup.py -# If you change these marks you should also change ./scripts/ci/check_order_setup.py -# Start dependencies group -async_packages = [ - "eventlet>=0.33.3", - "gevent>=0.13", - "greenlet>=0.4.9", -] -atlas = [ - "atlasclient>=0.1.2", -] -celery = [ - # The Celery is known to introduce problems when upgraded to a MAJOR version. Airflow Core - # Uses Celery for CeleryExecutor, and we also know that Kubernetes Python client follows SemVer - # (https://docs.celeryq.dev/en/stable/contributing.html?highlight=semver#versions). - # This is a crucial component of Airflow, so we should limit it to the next MAJOR version and only - # deliberately bump the version when we tested it, and we know it can be bumped. - # Bumping this version should also be connected with - # limiting minimum airflow version supported in celery provider due to the - # potential breaking changes in Airflow Core as well (celery is added as extra, so Airflow - # core is not hard-limited via install-requires, only by extra). - "celery>=5.3.0,<6" -] -cgroups = [ - # Cgroupspy 0.2.2 added Python 3.10 compatibility - "cgroupspy>=0.2.2", -] -deprecated_api = [ - "requests>=2.26.0", -] -doc = [ - # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 - # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x - "astroid>=2.12.3, <3.0", - "checksumdir", - # click 8.1.4 and 8.1.5 generate mypy errors due to typing issue in the upstream package: - # https://github.com/pallets/click/issues/2558 - "click>=8.0,!=8.1.4,!=8.1.5", - # Docutils 0.17.0 converts generated
into
and breaks our doc formatting - # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle - #
tags for sections - "docutils<0.17.0", - "eralchemy2", - "sphinx-airflow-theme", - "sphinx-argparse>=0.1.13", - "sphinx-autoapi>=2.0.0", - "sphinx-copybutton", - "sphinx-design>=0.5.0", - "sphinx-jinja>=2.0", - "sphinx-rtd-theme>=0.1.6", - "sphinx>=5.2.0", - "sphinxcontrib-httpdomain>=1.7.0", - "sphinxcontrib-redoc>=1.6.0", - "sphinxcontrib-spelling>=7.3", -] -doc_gen = [ - "eralchemy2", - "graphviz>=0.12", -] -flask_appbuilder_oauth = [ - "authlib>=1.0.0", - # The version here should be upgraded at the same time as flask-appbuilder in setup.cfg - "flask-appbuilder[oauth]==4.3.10", -] -graphviz = ["graphviz>=0.12"] -kerberos = [ - "pykerberos>=1.1.13", - "requests_kerberos>=0.10.0", - "thrift_sasl>=0.2.0", -] -kubernetes = [ - # The Kubernetes API is known to introduce problems when upgraded to a MAJOR version. Airflow Core - # Uses Kubernetes for Kubernetes executor, and we also know that Kubernetes Python client follows SemVer - # (https://github.com/kubernetes-client/python#compatibility). This is a crucial component of Airflow - # So we should limit it to the next MAJOR version and only deliberately bump the version when we - # tested it, and we know it can be bumped. Bumping this version should also be connected with - # limiting minimum airflow version supported in cncf.kubernetes provider, due to the - # potential breaking changes in Airflow Core as well (kubernetes is added as extra, so Airflow - # core is not hard-limited via install-requires, only by extra). - "cryptography>=2.0.0", - "kubernetes>=21.7.0,<24", -] -ldap = [ - "ldap3>=2.5.1", - "python-ldap", -] -leveldb = ["plyvel"] -otel = ["opentelemetry-exporter-prometheus"] -pandas = [ - "pandas>=1.2.5", -] -password = [ - "bcrypt>=2.0.0", - "flask-bcrypt>=0.7.1", -] -rabbitmq = [ - "amqp", -] -sentry = [ - "blinker>=1.1", - # Sentry SDK 1.33 is broken when greenlets are installed and fails to import - # See https://github.com/getsentry/sentry-python/issues/2473 - "sentry-sdk>=1.32.0,!=1.33.0", -] -statsd = [ - "statsd>=3.3.0", -] -virtualenv = [ - "virtualenv", -] -webhdfs = [ - "hdfs[avro,dataframe,kerberos]>=2.0.4", -] -# End dependencies group - -# Mypy 0.900 and above ships only with stubs from stdlib so if we need other stubs, we need to install them -# manually as `types-*`. See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -# for details. We want to install them explicitly because we want to eventually move to -# mypyd which does not support installing the types dynamically with --install-types -mypy_dependencies = [ - # TODO: upgrade to newer versions of MyPy continuously as they are released - # Make sure to upgrade the mypy version in update-common-sql-api-stubs in .pre-commit-config.yaml - # when you upgrade it here !!!! - "mypy==1.2.0", - "types-aiofiles", - "types-certifi", - "types-croniter", - "types-Deprecated", - "types-docutils", - "types-paramiko", - "types-protobuf", - "types-python-dateutil", - "types-python-slugify", - "types-pytz", - "types-redis", - "types-requests", - "types-setuptools", - "types-termcolor", - "types-tabulate", - "types-toml", - "types-Markdown", - "types-PyMySQL", - "types-PyYAML", -] - -# make sure to update providers/amazon/provider.yaml botocore min version when you update it here -_MIN_BOTO3_VERSION = "1.33.0" - -_devel_only_amazon = [ - "aws_xray_sdk>=2.12.0", - "moto[cloudformation,glue]>=4.2.12", - f"mypy-boto3-appflow>={_MIN_BOTO3_VERSION}", - f"mypy-boto3-rds>={_MIN_BOTO3_VERSION}", - f"mypy-boto3-redshift-data>={_MIN_BOTO3_VERSION}", - f"mypy-boto3-s3>={_MIN_BOTO3_VERSION}", -] - -_devel_only_azure = [ - "pywinrm", -] - -_devel_only_breeze = [ - "filelock", -] - -_devel_only_debuggers = [ - "ipdb", -] - -_devel_only_deltalake = [ - "deltalake>=0.12.0", -] - -_devel_only_devscripts = [ - "click>=8.0", - "gitpython", - "pipdeptree", - "pygithub", - "rich-click>=1.7.0", - "restructuredtext-lint", - "semver", - "towncrier", - "twine", - "wheel", -] - -_devel_only_duckdb = [ - "duckdb>=0.9.0", -] - -_devel_only_mongo = [ - "mongomock", -] - -_devel_only_iceberg = [ - "pyiceberg>=0.5.0", -] - -_devel_only_sentry = [ - "blinker", -] - -_devel_only_static_checks = [ - "pre-commit", - "black", - "ruff==0.1.11", - "yamllint", -] - -_devel_only_tests = [ - "aioresponses", - "backports.zoneinfo>=0.2.1;python_version<'3.9'", - "beautifulsoup4>=4.7.1", - "coverage>=7.2", - "pytest>=7.1", - # Pytest-asyncio 0.23.0 and 0.23.1 break test collection - # See https://github.com/pytest-dev/pytest-asyncio/issues/703 for details. - "pytest-asyncio!=0.23.0,!=0.23.1", - "pytest-cov", - "pytest-httpx", - "pytest-icdiff", - "pytest-instafail", - "pytest-mock", - "pytest-rerunfailures", - "pytest-timeouts", - "pytest-xdist", - "requests_mock", - "time-machine", -] - -# Dependencies needed for development only -devel_only = [ - *_devel_only_amazon, - *_devel_only_azure, - *_devel_only_breeze, - *_devel_only_debuggers, - *_devel_only_deltalake, - *_devel_only_devscripts, - *_devel_only_duckdb, - *_devel_only_mongo, - *_devel_only_iceberg, - *_devel_only_sentry, - *_devel_only_static_checks, - *_devel_only_tests, -] - -aiobotocore = [ - # This required for AWS deferrable operators. - # There is conflict between boto3 and aiobotocore dependency botocore. - # TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or - # boto3 have native aync support and we move away from aio aiobotocore - "aiobotocore>=2.1.1", -] - -s3fs = [ - # This is required for support of S3 file system which uses aiobotocore - # which can have a conflict with boto3 as mentioned above - "s3fs>=2023.10.0", -] - -saml = [ - # This is required for support of SAML which might be used by some providers (e.g. Amazon) - "python3-saml>=1.16.0", -] - - -def get_provider_dependencies(provider_name: str) -> list[str]: - if provider_name not in PROVIDER_DEPENDENCIES: - return [] - return PROVIDER_DEPENDENCIES[provider_name][DEPS] - - -def get_unique_dependency_list(req_list_iterable: Iterable[list[str]]): - _all_reqs: set[str] = set() - for req_list in req_list_iterable: - for req in req_list: - _all_reqs.add(req) - return list(_all_reqs) - - -devel = get_unique_dependency_list( - [ - aiobotocore, - cgroups, - devel_only, - doc, - kubernetes, - mypy_dependencies, - get_provider_dependencies("mysql"), - pandas, - password, - s3fs, - saml, - ] -) - -devel_hadoop = get_unique_dependency_list( - [ - devel, - get_provider_dependencies("apache.hdfs"), - get_provider_dependencies("apache.hive"), - get_provider_dependencies("apache.hdfs"), - get_provider_dependencies("apache.hive"), - get_provider_dependencies("apache.impala"), - kerberos, - get_provider_dependencies("presto"), - webhdfs, - ] -) - -# Those are all additional extras which do not have their own 'providers' -# The 'apache.atlas' and 'apache.webhdfs' are extras that provide additional libraries -# but they do not have separate providers (yet?), they are merely there to add extra libraries -# That can be used in custom python/bash operators. -ADDITIONAL_EXTRAS_DEPENDENCIES: dict[str, list[str]] = { - "apache.atlas": atlas, - "apache.webhdfs": webhdfs, -} - -# Those are extras that are extensions of the 'core' Airflow. They provide additional features -# To airflow core. They do not have separate providers because they do not have any operators/hooks etc. -CORE_EXTRAS_DEPENDENCIES: dict[str, list[str]] = { - "aiobotocore": aiobotocore, - "async": async_packages, - "celery": celery, # TODO: remove and move to a regular provider package in a separate PR - "cgroups": cgroups, - "cncf.kubernetes": kubernetes, # TODO: remove and move to a regular provider package in a separate PR - "deprecated_api": deprecated_api, - "github_enterprise": flask_appbuilder_oauth, - "google_auth": flask_appbuilder_oauth, - "graphviz": graphviz, - "kerberos": kerberos, - "ldap": ldap, - "leveldb": leveldb, - "otel": otel, - "pandas": pandas, - "password": password, - "rabbitmq": rabbitmq, - "s3fs": s3fs, - "saml": saml, - "sentry": sentry, - "statsd": statsd, - "virtualenv": virtualenv, -} - - -def filter_out_excluded_extras() -> Iterable[tuple[str, list[str]]]: - for key, value in CORE_EXTRAS_DEPENDENCIES.items(): - if value: - yield key, value - else: - print(f"Removing extra {key} as it has been excluded") - - -CORE_EXTRAS_DEPENDENCIES = dict(filter_out_excluded_extras()) - -EXTRAS_DEPENDENCIES: dict[str, list[str]] = deepcopy(CORE_EXTRAS_DEPENDENCIES) - - -def add_extras_for_all_providers() -> None: - for provider_name, provider_dict in PROVIDER_DEPENDENCIES.items(): - EXTRAS_DEPENDENCIES[provider_name] = provider_dict[DEPS] - - -def add_additional_extras() -> None: - for extra_name, extra_dependencies in ADDITIONAL_EXTRAS_DEPENDENCIES.items(): - EXTRAS_DEPENDENCIES[extra_name] = extra_dependencies - - -add_extras_for_all_providers() -add_additional_extras() - -############################################################################################################# -# The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series -############################################################################################################# - -# Dictionary of aliases from 1.10 - deprecated in Airflow 2.* -EXTRAS_DEPRECATED_ALIASES: dict[str, str] = { - "atlas": "apache.atlas", - "aws": "amazon", - "azure": "microsoft.azure", - "cassandra": "apache.cassandra", - "crypto": "", # this is legacy extra - all dependencies are already "install-requires" - "druid": "apache.druid", - "gcp": "google", - "gcp_api": "google", - "hdfs": "apache.hdfs", - "hive": "apache.hive", - "kubernetes": "cncf.kubernetes", - "mssql": "microsoft.mssql", - "pinot": "apache.pinot", - "s3": "amazon", - "spark": "apache.spark", - "webhdfs": "apache.webhdfs", - "winrm": "microsoft.winrm", -} - -EXTRAS_DEPRECATED_ALIASES_NOT_PROVIDERS: list[str] = [ - "crypto", - "webhdfs", -] - -EXTRAS_DEPRECATED_ALIASES_IGNORED_FROM_REF_DOCS: list[str] = [ - "jira", -] - - -def add_extras_for_all_deprecated_aliases() -> None: - """ - Add extras for all deprecated aliases. - - Requirements for those deprecated aliases are the same as the extras they are replaced with. - The dependencies are not copies - those are the same lists as for the new extras. This is intended. - Thanks to that if the original extras are later extended with providers, aliases are extended as well. - """ - for alias, extra in EXTRAS_DEPRECATED_ALIASES.items(): - dependencies = EXTRAS_DEPENDENCIES.get(extra) if extra != "" else [] - if dependencies is not None: - EXTRAS_DEPENDENCIES[alias] = dependencies - - -def add_all_deprecated_provider_packages() -> None: - """ - For deprecated aliases that are providers, swap the providers dependencies to be the provider itself. - - e.g. {"kubernetes": ["kubernetes>=3.0.0, <12.0.0", ...]} becomes - {"kubernetes": ["apache-airflow-provider-cncf-kubernetes"]} - """ - for alias, provider in EXTRAS_DEPRECATED_ALIASES.items(): - if alias not in EXTRAS_DEPRECATED_ALIASES_NOT_PROVIDERS: - replace_extra_dependencies_with_provider_packages(alias, [provider]) - - -add_extras_for_all_deprecated_aliases() - -############################################################################################################# -# End of deprecated section -############################################################################################################# - -# This is list of all providers. It's a shortcut for anyone who would like to easily get list of -# All providers. It is used by pre-commits. -ALL_PROVIDERS = list(PROVIDER_DEPENDENCIES.keys()) - -ALL_DB_PROVIDERS = [ - "apache.cassandra", - "apache.drill", - "apache.druid", - "apache.hdfs", - "apache.hive", - "apache.impala", - "apache.pinot", - "arangodb", - "cloudant", - "databricks", - "exasol", - "influxdb", - "microsoft.mssql", - "mongo", - "mysql", - "neo4j", - "postgres", - "presto", - "trino", - "vertica", -] - - -def get_all_db_dependencies() -> list[str]: - _all_db_reqs: set[str] = set() - for provider in ALL_DB_PROVIDERS: - if provider in PROVIDER_DEPENDENCIES: - for req in PROVIDER_DEPENDENCIES[provider][DEPS]: - _all_db_reqs.add(req) - return list(_all_db_reqs) - - -# Special dependencies for all database-related providers. They are de-duplicated. -all_dbs = get_all_db_dependencies() - -# All db user extras here -EXTRAS_DEPENDENCIES["all_dbs"] = all_dbs - -# Requirements for all "user" extras (no devel). They are de-duplicated. Note that we do not need -# to separately add providers dependencies - they have been already added as 'providers' extras above -_all_dependencies = get_unique_dependency_list(EXTRAS_DEPENDENCIES.values()) - -_all_dependencies_without_airflow_providers = [k for k in _all_dependencies if "apache-airflow-" not in k] - -# All user extras here -# all is purely development extra and it should contain only direct dependencies of Airflow -# It should contain all dependencies of airflow and dependencies of all community providers, -# but not the providers themselves -EXTRAS_DEPENDENCIES["all"] = _all_dependencies_without_airflow_providers - -# This can be simplified to devel_hadoop + _all_dependencies due to inclusions -# but we keep it for explicit sake. We are de-duplicating it anyway. -devel_all = get_unique_dependency_list( - [_all_dependencies_without_airflow_providers, doc, doc_gen, devel, devel_hadoop] -) - -# Those are packages excluded for "all" dependencies -PACKAGES_EXCLUDED_FOR_ALL: list[str] = [] - - -def is_package_excluded(package: str, exclusion_list: list[str]) -> bool: - """ - Check if package should be excluded. - - :param package: package name (beginning of it) - :param exclusion_list: list of excluded packages - :return: true if package should be excluded - """ - return package.startswith(tuple(exclusion_list)) - - -def remove_provider_limits(package: str) -> str: - """ - Remove the limit for providers in devel_all to account for pre-release and development packages. - - :param package: package name (beginning of it) - :return: true if package should be excluded - """ - return ( - package.split(">=")[0] - if package.startswith("apache-airflow-providers") and ">=" in package - else package - ) - - -devel = [remove_provider_limits(package) for package in devel] -devel_all = [ - remove_provider_limits(package) - for package in devel_all - if not is_package_excluded(package=package, exclusion_list=PACKAGES_EXCLUDED_FOR_ALL) -] -devel_hadoop = [remove_provider_limits(package) for package in devel_hadoop] -devel_ci = devel_all - - -# Those are extras that we have to add for development purposes -# They can be use to install some predefined set of dependencies. -EXTRAS_DEPENDENCIES["doc"] = doc -EXTRAS_DEPENDENCIES["doc_gen"] = doc_gen -EXTRAS_DEPENDENCIES["devel"] = devel # devel already includes doc -EXTRAS_DEPENDENCIES["devel_hadoop"] = devel_hadoop # devel_hadoop already includes devel -EXTRAS_DEPENDENCIES["devel_all"] = devel_all -EXTRAS_DEPENDENCIES["devel_ci"] = devel_ci - - -def sort_extras_dependencies() -> dict[str, list[str]]: - """ - Sort dependencies; the dictionary order remains when keys() are retrieved. - - Sort both: extras and list of dependencies to make it easier to analyse problems - external packages will be first, then if providers are added they are added at the end of the lists. - """ - sorted_dependencies: dict[str, list[str]] = {} - sorted_extra_ids = sorted(EXTRAS_DEPENDENCIES.keys()) - for extra_id in sorted_extra_ids: - sorted_dependencies[extra_id] = sorted(EXTRAS_DEPENDENCIES[extra_id]) - return sorted_dependencies - - -EXTRAS_DEPENDENCIES = sort_extras_dependencies() - -# Those providers are pre-installed always when airflow is installed. -# TODO: Sync them with the ones in dev/breeze/src/airflow_breeze/util/packages.py -PREINSTALLED_PROVIDERS = [ - # Until we cut off the 2.8.0 branch and bump current airflow version to 2.9.0, we should - # Keep common.io commented out in order ot be able to generate PyPI constraints because - # The version from PyPI has requirement of apache-airflow>=2.8.0 - # "common.io", - "common.sql", - "ftp", - "http", - "imap", - "sqlite", -] - - -def get_provider_package_name_from_package_id(package_id: str) -> str: - """ - Build the name of provider package out of the package id provided. - - :param package_id: id of the package (like amazon or microsoft.azure) - :return: full name of package in PyPI - """ - version_spec = "" - if ">=" in package_id: - package, version = package_id.split(">=") - version_spec = f">={version}" - version_suffix = os.environ.get("VERSION_SUFFIX_FOR_PYPI") - if version_suffix: - version_spec += version_suffix - else: - package = package_id - package_suffix = package.replace(".", "-") - return f"apache-airflow-providers-{package_suffix}{version_spec}" - - -def get_excluded_providers() -> list[str]: - """Return packages excluded for the current python version.""" - return [] - - -def get_all_provider_packages() -> str: - """Return all provider packages configured in setup.py.""" - excluded_providers = get_excluded_providers() - return " ".join( - get_provider_package_name_from_package_id(package) - for package in ALL_PROVIDERS - if package not in excluded_providers - ) - - -class AirflowDistribution(Distribution): - """The setuptools.Distribution subclass with Airflow specific behaviour.""" - - def __init__(self, attrs=None): - super().__init__(attrs) - self.install_requires = None - - def parse_config_files(self, *args, **kwargs) -> None: - """ - When asked to install providers from sources, ensure we don't *also* try to install from PyPI. - - Also we should make sure that in this case we copy provider.yaml files so that - Providers manager can find package information. - """ - super().parse_config_files(*args, **kwargs) - if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == "true": - self.install_requires = [ - req for req in self.install_requires if not req.startswith("apache-airflow-providers-") - ] - provider_yaml_files = glob.glob("airflow/providers/**/provider.yaml", recursive=True) - for provider_yaml_file in provider_yaml_files: - provider_relative_path = os.path.relpath( - provider_yaml_file, str(AIRFLOW_SOURCES_ROOT / "airflow") - ) - self.package_data["airflow"].append(provider_relative_path) - # Add python_kubernetes_script.jinja2 to package data - self.package_data["airflow"].append("providers/cncf/kubernetes/python_kubernetes_script.jinja2") - # Add default email template to package data - self.package_data["airflow"].append("providers/smtp/notifications/templates/email.html") - else: - self.install_requires.extend( - [ - get_provider_package_name_from_package_id(package_id) - for package_id in PREINSTALLED_PROVIDERS - ] - ) - - -def replace_extra_dependencies_with_provider_packages(extra: str, providers: list[str]) -> None: - """ - Replace extra dependencies with provider package. - - The intention here is that when the provider is added as dependency of extra, there is no - need to add the dependencies separately. This is not needed and even harmful, because in - case of future versions of the provider, the dependencies might change, so hard-coding - dependencies from the version that was available at the release time might cause dependency - conflicts in the future. - - Say for example that you have salesforce provider with those deps: - - { 'salesforce': ['simple-salesforce>=1.0.0', 'tableauserverclient'] } - - Initially ['salesforce'] extra has those dependencies, and it works like that when you install - it when INSTALL_PROVIDERS_FROM_SOURCES is set to `true` (during the development). However, when - the production installation is used, The dependencies are changed: - - { 'salesforce': ['apache-airflow-providers-salesforce'] } - - And then, 'apache-airflow-providers-salesforce' package has those 'install_requires' dependencies: - ['simple-salesforce>=1.0.0', 'tableauserverclient'] - - So transitively 'salesforce' extra has all the dependencies it needs and in case the provider - changes its dependencies, they will transitively change as well. - - In the constraint mechanism we save both - provider versions and its dependencies - version, which means that installation using constraints is repeatable. - - For K8s and Celery which are both "Core executors" and "Providers" we have to - add the base dependencies to core as well, in order to mitigate problems where - newer version of provider will have less strict limits. This should be done for both - extras and their deprecated aliases. This is not a full protection however, the way - extras work, this will not add "hard" limits for Airflow and the user who does not use - constraints. - - :param extra: Name of the extra to add providers to - :param providers: list of provider ids - """ - if extra in ["cncf.kubernetes", "kubernetes", "celery"]: - EXTRAS_DEPENDENCIES[extra].extend( - [get_provider_package_name_from_package_id(package_name) for package_name in providers] - ) - elif extra == "apache.hive": - # We moved the hive macros to the hive provider, and they are available in hive provider only as of - # 5.1.0 version only, so we have to make sure minimum version is used - EXTRAS_DEPENDENCIES[extra] = ["apache-airflow-providers-apache-hive>=5.1.0"] - else: - EXTRAS_DEPENDENCIES[extra] = [ - get_provider_package_name_from_package_id(package_name) for package_name in providers - ] - - -def add_provider_packages_to_extra_dependencies(extra: str, providers: list[str]) -> None: - """ - Add provider packages as dependencies to extra. - - This is used to add provider packages as dependencies to the "bulk" kind of extras. - Those bulk extras do not have the detailed 'extra' dependencies as initial values, - so instead of replacing them (see previous function) we can extend them. - - :param extra: Name of the extra to add providers to - :param providers: list of provider ids - """ - EXTRAS_DEPENDENCIES[extra].extend( - [get_provider_package_name_from_package_id(package_name) for package_name in providers] - ) - - -def add_all_provider_packages() -> None: - """ - Add extra dependencies when providers are installed from packages. - - In case of regular installation (providers installed from packages), we should add extra dependencies to - Airflow - to get the providers automatically installed when those extras are installed. - - For providers installed from sources we skip that step. That helps to test and install airflow with - all packages in CI - for example when new providers are added, otherwise the installation would fail - as the new provider is not yet in PyPI. - - """ - for provider_id in ALL_PROVIDERS: - replace_extra_dependencies_with_provider_packages(provider_id, [provider_id]) - add_provider_packages_to_extra_dependencies("all", ALL_PROVIDERS) - add_provider_packages_to_extra_dependencies("devel_ci", ALL_PROVIDERS) - add_provider_packages_to_extra_dependencies("devel_all", ALL_PROVIDERS) - add_provider_packages_to_extra_dependencies("all_dbs", ALL_DB_PROVIDERS) - add_provider_packages_to_extra_dependencies( - "devel_hadoop", ["apache.hdfs", "apache.hive", "presto", "trino"] - ) - add_all_deprecated_provider_packages() - - -class Develop(develop_orig): - """Forces removal of providers in editable mode.""" - - def run(self) -> None: # type: ignore - self.announce("Installing in editable mode. Uninstalling provider packages!", level=log.INFO) - # We need to run "python3 -m pip" because it might be that older PIP binary is in the path - # And it results with an error when running pip directly (cannot import pip module) - # also PIP does not have a stable API so we have to run subprocesses ¯\_(ツ)_/¯ - try: - installed_packages = ( - subprocess.check_output(["python3", "-m", "pip", "freeze"]).decode().splitlines() - ) - airflow_provider_packages = [ - package_line.split("=")[0] - for package_line in installed_packages - if package_line.startswith("apache-airflow-providers") - ] - self.announce(f"Uninstalling ${airflow_provider_packages}!", level=log.INFO) - subprocess.check_call(["python3", "-m", "pip", "uninstall", "--yes", *airflow_provider_packages]) - except subprocess.CalledProcessError as e: - self.announce(f"Error when uninstalling airflow provider packages: {e}!", level=log.WARN) - super().run() - - -class Install(install_orig): - """Forces installation of providers from sources in editable mode.""" - - def run(self) -> None: - self.announce("Standard installation. Providers are installed from packages", level=log.INFO) - super().run() - - -def do_setup() -> None: - """ - Perform the Airflow package setup. - - Most values come from setup.cfg, only the dynamically calculated ones are passed to setup - function call. See https://setuptools.readthedocs.io/en/latest/userguide/declarative_config.html - """ - setup_kwargs = {} - - def include_provider_namespace_packages_when_installing_from_sources() -> None: - """ - When installing providers from sources we install all namespace packages found below airflow. - - Includes airflow and provider packages, otherwise defaults from setup.cfg control this. - The kwargs in setup() call override those that are specified in setup.cfg. - """ - if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == "true": - setup_kwargs["packages"] = find_namespace_packages(include=["airflow*"]) - - include_provider_namespace_packages_when_installing_from_sources() - if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == "true": - print("Installing providers from sources. Skip adding providers as dependencies") - else: - add_all_provider_packages() - - write_version() - setup( - distclass=AirflowDistribution, - extras_require=EXTRAS_DEPENDENCIES, - cmdclass={ - "extra_clean": CleanCommand, - "compile_assets": CompileAssets, - "list_extras": ListExtras, - "install": Install, # type: ignore - "develop": Develop, - }, - test_suite="setup.airflow_test_suite", - **setup_kwargs, # type: ignore - ) - - -if __name__ == "__main__": - do_setup() # comment to trigger upgrade to newer dependencies when setup.py is changed diff --git a/tests/cli/commands/test_task_command.py b/tests/cli/commands/test_task_command.py index 0e86f20f2e400..00edf54e83cda 100644 --- a/tests/cli/commands/test_task_command.py +++ b/tests/cli/commands/test_task_command.py @@ -49,7 +49,6 @@ from airflow.utils.session import create_session from airflow.utils.state import State from airflow.utils.types import DagRunType -from setup import AIRFLOW_SOURCES_ROOT from tests.test_utils.config import conf_vars from tests.test_utils.db import clear_db_pools, clear_db_runs @@ -60,9 +59,7 @@ from airflow.models.dag import DAG DEFAULT_DATE = timezone.datetime(2022, 1, 1) -ROOT_FOLDER = os.path.realpath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) -) +ROOT_FOLDER = Path(__file__).parents[3].resolve() def reset(dag_id): @@ -755,7 +752,7 @@ def test_logging_with_run_task_stdout_k8s_executor_pod(self, is_k8s, is_containe "os.environ", AIRFLOW_IS_K8S_EXECUTOR_POD=is_k8s, AIRFLOW_IS_EXECUTOR_CONTAINER=is_container_exec, - PYTHONPATH=os.fspath(AIRFLOW_SOURCES_ROOT), + PYTHONPATH=os.fspath(ROOT_FOLDER), ): with subprocess.Popen( args=[sys.executable, "-m", "airflow", *self.task_args, "-S", self.dag_path], @@ -763,9 +760,12 @@ def test_logging_with_run_task_stdout_k8s_executor_pod(self, is_k8s, is_containe stderr=subprocess.PIPE, ) as process: output, err = process.communicate() + if err: + print(err.decode("utf-8")) lines = [] found_start = False for line_ in output.splitlines(): + print(line_.decode("utf-8")) line = line_.decode("utf-8") if "Running